clausehelper.h

00001 /*
00002  * All of the documentation and software included in the
00003  * Alchemy Software is copyrighted by Stanley Kok, Parag
00004  * Singla, Matthew Richardson, Pedro Domingos, Marc
00005  * Sumner and Hoifung Poon.
00006  * 
00007  * Copyright [2004-07] Stanley Kok, Parag Singla, Matthew
00008  * Richardson, Pedro Domingos, Marc Sumner and Hoifung
00009  * Poon. All rights reserved.
00010  * 
00011  * Contact: Pedro Domingos, University of Washington
00012  * (pedrod@cs.washington.edu).
00013  * 
00014  * Redistribution and use in source and binary forms, with
00015  * or without modification, are permitted provided that
00016  * the following conditions are met:
00017  * 
00018  * 1. Redistributions of source code must retain the above
00019  * copyright notice, this list of conditions and the
00020  * following disclaimer.
00021  * 
00022  * 2. Redistributions in binary form must reproduce the
00023  * above copyright notice, this list of conditions and the
00024  * following disclaimer in the documentation and/or other
00025  * materials provided with the distribution.
00026  * 
00027  * 3. All advertising materials mentioning features or use
00028  * of this software must display the following
00029  * acknowledgment: "This product includes software
00030  * developed by Stanley Kok, Parag Singla, Matthew
00031  * Richardson, Pedro Domingos, Marc Sumner and Hoifung
00032  * Poon in the Department of Computer Science and
00033  * Engineering at the University of Washington".
00034  * 
00035  * 4. Your publications acknowledge the use or
00036  * contribution made by the Software to your research
00037  * using the following citation(s): 
00038  * Stanley Kok, Parag Singla, Matthew Richardson and
00039  * Pedro Domingos (2005). "The Alchemy System for
00040  * Statistical Relational AI", Technical Report,
00041  * Department of Computer Science and Engineering,
00042  * University of Washington, Seattle, WA.
00043  * http://www.cs.washington.edu/ai/alchemy.
00044  * 
00045  * 5. Neither the name of the University of Washington nor
00046  * the names of its contributors may be used to endorse or
00047  * promote products derived from this software without
00048  * specific prior written permission.
00049  * 
00050  * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF WASHINGTON
00051  * AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
00052  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00053  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00054  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
00055  * OF WASHINGTON OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
00056  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00057  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00058  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00059  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
00060  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00061  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00062  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
00063  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00064  * 
00065  */
00066 #ifndef CLAUSEHELPER_H_JUN_26_2005
00067 #define CLAUSEHELPER_H_JUN_26_2005
00068 
00069 struct VarsGroundedType
00070 {
00071   VarsGroundedType() : isGrounded(false), typeId(-1), numGndings(-1) {}
00072   VarsGroundedType(const VarsGroundedType& vgt)
00073   {
00074     vars.copyFrom(vgt.vars);
00075     isGrounded = vgt.isGrounded;
00076     typeId = vgt.typeId;
00077     numGndings = vgt.numGndings;
00078   }
00079 
00080   Array<Term*> vars;  //addresses of variables with the same id
00081   bool isGrounded;   //indicate whether the variable has been grounded
00082   int typeId;        //type of the variable
00083   int numGndings;    //number of groundings of the variable
00084 };
00085 
00086 
00087 struct LitIdxVarIdsGndings
00088 {
00089   unsigned int litIdx; //index of a literal in a clause
00090   Array<int> varIds;   //vars in literal (preceding pred's vars are grounded)
00091   ArraysAccessor<int> varGndings; //groundings of vars
00092   Array<Predicate*> subseqGndLits; //subsequent literals that are grounded
00093   bool litUnseen;  //this literal has not been seen
00094   Array<Predicate*> bannedPreds; // used when obtaining unknown clauses
00095 };
00096 
00097 
00098 typedef hash_set<string, HashString, EqualString> FormulaStringSet;
00099 
00100 
00101 struct CacheCount
00102 {
00103   CacheCount(const int& gg, const int& cc, const double& ccnt) :
00104     g(gg), c(cc), cnt(ccnt) {}
00105   ~CacheCount() {}
00106   int g; // gth grounding of predicate
00107   int c; // cth flip combination of the grounding (when in a block)
00108   double cnt; //counts
00109 };
00110 
00111 
00112   // Operations that can be applied to a clause. Used to set AuxClauseData.op.
00113 enum { OP_NONE = 0, OP_ADD = 1, OP_REMOVE = 2, OP_REPLACE = 3, 
00114        OP_REPLACE_ADDPRED = 4, OP_REPLACE_REMPRED = 5 };
00115 
00116 
00117   //NOTE: ensure that the AuxClauseData's member are copied in Clause's 
00118   //      copy constructor and at the beginning of StructLearn::run()
00119 struct AuxClauseData
00120 {
00121   AuxClauseData() : gain(0), constTermPtrs(NULL), op(OP_NONE), 
00122                     removedClauseIdx(-1), cache(NULL), removedPredIdx(-1),
00123                     hasBeenExpanded(false), lastStepExpanded(-1),
00124                     lastStepOverMinWeight(-1) {}
00125 
00126   AuxClauseData(const double& ggain, const int& oop, 
00127                 const int& rremovedClauseIdx)
00128     : gain(ggain), constTermPtrs(NULL), op(oop), 
00129       removedClauseIdx(rremovedClauseIdx), cache(NULL), removedPredIdx(-1),
00130       hasBeenExpanded(false), lastStepExpanded(-1),
00131       lastStepOverMinWeight(-1) {}
00132 
00133   AuxClauseData(const double& ggain, const int& oop, 
00134                 const int& rremovedClauseIdx,
00135                 const bool& hhasBeenExpanded,
00136                 const int& llastStepExpanded,
00137                 const int& llastStepOverMinWeight)
00138     : gain(ggain), constTermPtrs(NULL), op(oop), 
00139       removedClauseIdx(rremovedClauseIdx), cache(NULL), removedPredIdx(-1),
00140       hasBeenExpanded(hhasBeenExpanded), lastStepExpanded(llastStepExpanded),
00141       lastStepOverMinWeight(llastStepOverMinWeight) {}
00142 
00143   AuxClauseData(const double& ggain, const int& oop, 
00144                 const int& rremovedClauseIdx, const string& prevClause,
00145                 const string& addedPred, const int& remPredIdx)
00146     : gain(ggain), constTermPtrs(NULL), op(oop), 
00147       removedClauseIdx(rremovedClauseIdx), cache(NULL), 
00148       prevClauseStr(prevClause), addedPredStr(addedPred), 
00149       removedPredIdx(remPredIdx),
00150       hasBeenExpanded(false), lastStepExpanded(-1),
00151       lastStepOverMinWeight(-1) {}
00152 
00153   ~AuxClauseData() 
00154   { 
00155     if (constTermPtrs) delete constTermPtrs; 
00156     if (cache) deleteCache();
00157   }
00158 
00159 
00160   void deleteCache()
00161   {
00162     if (cache)
00163     {
00164       for (int i = 0; i < cache->size(); i++) //for each domain
00165       {
00166         Array<Array<CacheCount*>*>* aacc = (*cache)[i];
00167         for (int j = 0; j < aacc->size(); j++) //for each predicate
00168         {
00169           Array<CacheCount*>* ccArr = (*aacc)[j];
00170           if (ccArr == NULL) continue;
00171           ccArr->deleteItemsAndClear();
00172         }
00173         aacc->deleteItemsAndClear();
00174       }
00175       cache->deleteItemsAndClear(); 
00176       delete cache; 
00177       cache = NULL;
00178     }
00179   }
00180 
00181   
00182   void compress()
00183   {
00184     if (constTermPtrs) constTermPtrs->compress();
00185     if (cache)
00186     {
00187       for (int i = 0; i < cache->size(); i++) //for each domain
00188       {
00189         Array<Array<CacheCount*>*>* aacc = (*cache)[i];
00190         for (int j = 0; j < aacc->size(); j++) //for each predicate
00191         {
00192           Array<CacheCount*>* ccArr = (*aacc)[j];
00193           if (ccArr == NULL) continue;
00194           ccArr->compress();
00195         }
00196         aacc->compress();
00197       }
00198       cache->compress();
00199     }
00200     
00201   }
00202 
00203     //approximate size in MB, mainly due to cache
00204   double sizeMB() const
00205   {
00206     double sz = fixedSizeB_;
00207     if (cache)
00208     {
00209       for (int i = 0; i < cache->size(); i++)
00210         for (int j = 0; j < (*cache)[i]->size(); j++)
00211         {
00212           if((*(*cache)[i])[j] == NULL) continue;
00213           sz += (*(*cache)[i])[j]->size() * (sizeof(CacheCount) +
00214                                              sizeof(CacheCount*));
00215         }
00216     }
00217     if (constTermPtrs) sz += constTermPtrs->size() * sizeof(Term*);
00218     return sz/1000000.0;
00219   }
00220 
00221 
00222   static void computeFixedSizeB() { fixedSizeB_ = sizeof(AuxClauseData); }
00223 
00224 
00225   void reset()
00226   {
00227     gain = 0;
00228     if (constTermPtrs) constTermPtrs->clear();
00229     op = OP_NONE;
00230     removedClauseIdx = -1;
00231     deleteCache(); cache = NULL;
00232     prevClauseStr = "";
00233     addedPredStr = "";
00234     removedPredIdx = -1;
00235   }
00236 
00237   double gain;
00238   Array<Term*>* constTermPtrs;
00239   int op;
00240   int removedClauseIdx;
00241 
00242     //cache[d][p] is an array of CacheCounts for predicate p in domain d
00243   Array<Array<Array<CacheCount*>*>*>* cache; 
00244 
00245   string prevClauseStr;
00246   string addedPredStr;
00247   int removedPredIdx;
00248 
00249     // Variables for structural gradient descent
00250   bool hasBeenExpanded;
00251   int lastStepExpanded;
00252   int lastStepOverMinWeight;
00253 
00254   static double fixedSizeB_;
00255 };
00256 
00257 
00258 #endif

Generated on Tue Jan 16 05:30:04 2007 for Alchemy by  doxygen 1.5.1