Alchemy: src/logic/clausesampler.h Source File

00001 /*
00002  * All of the documentation and software included in the
00003  * Alchemy Software is copyrighted by Stanley Kok, Parag
00004  * Singla, Matthew Richardson, Pedro Domingos, Marc
00005  * Sumner, Hoifung Poon, and Daniel Lowd.
00006  * 
00007  * Copyright [2004-07] Stanley Kok, Parag Singla, Matthew
00008  * Richardson, Pedro Domingos, Marc Sumner, Hoifung
00009  * Poon, and Daniel Lowd. All rights reserved.
00010  * 
00011  * Contact: Pedro Domingos, University of Washington
00012  * (pedrod@cs.washington.edu).
00013  * 
00014  * Redistribution and use in source and binary forms, with
00015  * or without modification, are permitted provided that
00016  * the following conditions are met:
00017  * 
00018  * 1. Redistributions of source code must retain the above
00019  * copyright notice, this list of conditions and the
00020  * following disclaimer.
00021  * 
00022  * 2. Redistributions in binary form must reproduce the
00023  * above copyright notice, this list of conditions and the
00024  * following disclaimer in the documentation and/or other
00025  * materials provided with the distribution.
00026  * 
00027  * 3. All advertising materials mentioning features or use
00028  * of this software must display the following
00029  * acknowledgment: "This product includes software
00030  * developed by Stanley Kok, Parag Singla, Matthew
00031  * Richardson, Pedro Domingos, Marc Sumner, Hoifung
00032  * Poon, and Daniel Lowd in the Department of Computer Science and
00033  * Engineering at the University of Washington".
00034  * 
00035  * 4. Your publications acknowledge the use or
00036  * contribution made by the Software to your research
00037  * using the following citation(s): 
00038  * Stanley Kok, Parag Singla, Matthew Richardson and
00039  * Pedro Domingos (2005). "The Alchemy System for
00040  * Statistical Relational AI", Technical Report,
00041  * Department of Computer Science and Engineering,
00042  * University of Washington, Seattle, WA.
00043  * http://www.cs.washington.edu/ai/alchemy.
00044  * 
00045  * 5. Neither the name of the University of Washington nor
00046  * the names of its contributors may be used to endorse or
00047  * promote products derived from this software without
00048  * specific prior written permission.
00049  * 
00050  * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF WASHINGTON
00051  * AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
00052  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00053  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00054  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
00055  * OF WASHINGTON OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
00056  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00057  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00058  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00059  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
00060  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00061  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00062  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
00063  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00064  * 
00065  */
00066 #ifndef CLAUSESAMPLER_H_NOV_16_2005
00067 #define CLAUSESAMPLER_H_NOV_16_2005
00068 
00069 #include <cmath>
00070 #include "database.h"
00071 #include "truefalsegroundingsstore.h"
00072 
00073   // expl and logl aren't available in cygwin / windows, so use exp and log
00074 #ifndef expl
00075 # define expl exp
00076 # define logl log
00077 #endif
00078 
00079   // test for convergence once every this many samples
00080 const int NUM_SAMPLES_TEST_CONV = 200; 
00081 
00082 class Clause;
00083 class VarsGroundedType;
00084 
00085 
00086 class ClauseSampler
00087 {
00088  public:
00089   ClauseSampler(const double& delta, const double& epsilon,
00090                 const int& minSamples, const int& maxSamples) 
00091     : delta_(delta), epsilon_(epsilon), minSamples_(minSamples),
00092       maxSamples_(maxSamples) { random_.init(-1); }
00093   
00094   ~ClauseSampler() {}
00095   
00096   void setEpsilon(const double& epsilon) { epsilon_ = epsilon; }
00097   double getEpsilon() const { return epsilon_; }
00098 
00099   void setDelta(const double& delta) { delta_ = delta; }
00100   double getDelta() const { return delta_; }
00101 
00102   void setMinSamples(const int& min) { minSamples_ = min; }
00103   int getMinSamples() const { return minSamples_; }
00104 
00105   void setMaxSamples(const int& max) { maxSamples_ = max; }
00106   int getMaxSamples() const { return maxSamples_; }
00107 
00108 
00109   double computeNumSamples(const int& numPreds) const
00110   { 
00111     assert(numPreds > 1);
00112     double n =  9/2.0 * (numPreds-1) * 1/(epsilon_*epsilon_) * logl(2.0/delta_);
00113     if (minSamples_ >= 0 && n < minSamples_)     n = minSamples_;
00114     else if (maxSamples_ >= 0 && n > maxSamples_) n = maxSamples_;
00115     return n;
00116   }
00117 
00118     //clause must have more than one predicate and been canonicalized
00119   double estimateNumTrueGroundings(Clause* const& clause, 
00120                                    const Predicate* const & flippedGndPred,
00121                                    const Domain* const& domain,
00122                                    double numSamples=-1);
00123 
00124  private:
00125   void getNumGroundingWithEachPredGrounded(const Clause* const & clause,
00126                                           Array<double>& gndingsWithPredGnded,
00127                                         const Array<VarsGroundedType*>& vgtArr);
00128   
00129   void getProbInfo(const Clause* const & clause, const Database* const & db, 
00130                    const Predicate* const & flippedGndPred,
00131                    const Array<VarsGroundedType*>* const & vgtArr, 
00132                    TrueFalseGroundingsStore* const & tfGndingsStore,
00133                    Array<float>& corrProbLimit, double& sumTrueGnds, 
00134                    Array<double>& numTrueGndsPerPos);  
00135 
00136 
00137   int choosePredPos(const Array<float>& corrProbLimit)
00138   {
00139     float r = random_.random();    
00140     for (int predPos = 0; predPos < corrProbLimit.size(); predPos++)
00141       if (r < corrProbLimit[predPos]) return predPos;
00142     return -1;
00143   }
00144 
00145 
00146   Array<int>* chooseSample(const Clause* const & clause, 
00147                            const Array<VarsGroundedType*>* const & vgtArr,
00148                            const Domain* const & domain, const int& predPos,
00149                            const Predicate* const & flippedGndPred);
00150 
00151 
00152   void groundClause(const Array<VarsGroundedType*>& vgtArr, 
00153                     const Array<int>& samp);
00154 
00155   
00156   int testSampleMembership(Clause* const & clause, 
00157                            Array<VarsGroundedType*>* const & vgtArr,
00158                            const Database* const & db, 
00159                            const Array<int>& samp,
00160                            const double& sumTrueGnds);
00161 
00162 
00163   double getNumSamplesNeeded(const double& sigma, const double& gamma,
00164                              const double& epsilon)
00165   {
00166     double p = (gamma+1)/2.0;
00167     
00168       // inverse of normal function
00169     double a[] = {-3.969683028665376e+01,  2.209460984245205e+02,
00170                   -2.759285104469687e+02,  1.383577518672690e+02,
00171                   -3.066479806614716e+01,  2.506628277459239e+00};
00172     double b[] = {-5.447609879822406e+01,  1.615858368580409e+02,
00173                   -1.556989798598866e+02,  6.680131188771972e+01,
00174                   -1.328068155288572e+01 };
00175     
00176     double c[] = {-7.784894002430293e-03, -3.223964580411365e-01,
00177                   -2.400758277161838e+00, -2.549732539343734e+00,
00178                   4.374664141464968e+00,  2.938163982698783e+00};
00179     
00180     double d[] = {7.784695709041462e-03, 3.224671290700398e-01,
00181                   2.445134137142996e+00, 3.754408661907416e+00};
00182     
00183       // Define break-points.
00184     double plow  = 0.02425;
00185     double phigh = 1 - plow;
00186     double invNorm;
00187     
00188     if (p < plow)  // Rational approximation for lower region:
00189     {
00190       double q = sqrt(-2*log(p));
00191       invNorm = (((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) /
00192                  ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1);
00193     }
00194     else
00195     if (phigh < p) // Rational approximation for upper region:
00196     {
00197       double q  = sqrt(-2*log(1-p));
00198       invNorm =  -(((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) /
00199                   ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1);
00200     }
00201     else     // Rational approximation for central region:
00202     {
00203       double q = p - 0.5;
00204       double r = q*q;
00205       invNorm = (((((a[0]*r+a[1])*r+a[2])*r+a[3])*r+a[4])*r+a[5])*q /
00206                 (((((b[0]*r+b[1])*r+b[2])*r+b[3])*r+b[4])*r+1);
00207     }
00208       // compute number of samples needed
00209     double val = invNorm * sigma / epsilon;
00210     return val*val;
00211   }
00212 
00213 
00214   
00215  private:
00216   double delta_;
00217   double epsilon_;
00218 
00219   int minSamples_;
00220   int maxSamples_;
00221 
00222   Random random_;
00223 };
00224 
00225 
00226 #endif