00001 #include "gfeature.h"
00002 #include "rrf.h"
00003
00004 GroundRRF::GroundRRF(RRF* rrf, Database* db)
00005 : rrf_(rrf), db_(db)
00006 {
00007 numCounts_ = 0;
00008 int numFeatures = rrf_->getNumFeatures();
00009 for (int i = 0; i < numFeatures; i++) {
00010 allFeatures_.append(Array<GroundFeature*>());
00011 numCounts_ += rrf_->getFeature(i)->getNumWeights();
00012 }
00013
00014 Array<int> nullGrounding;
00015 root_ = rrf->getRoot()->constructGroundFeature(this, nullGrounding, db);
00016 allFeatures_[rrf->getRoot()->getId()].append(root_);
00017 }
00018
00019
00020 void GroundRRF::getCounts(Array<double>& counts)
00021 {
00022 counts.growToSize(numCounts_);
00023
00024
00025 int countIdx = 0;
00026 for (int i = 0; i < allFeatures_.size(); i++) {
00027 for (int j = 0; j < rrf_->getFeature(i)->getNumWeights(); j++) {
00028
00029 double totalCounts = 0.0;
00030 for (int k = 0; k < allFeatures_[i].size(); k++) {
00031 if (allFeatures_[i][k] == NULL) {
00032 continue;
00033 }
00034 totalCounts += allFeatures_[i][k]->getDeriv()
00035 * allFeatures_[i][k]->computePartialDeriv(i,j);
00036
00037 #if 0
00038 cout << "Deriv " << i << " " << k << ": ";
00039 cout << allFeatures_[i][k]->getDeriv() << endl;
00040 cout << allFeatures_[i][k]->computePartialDeriv(i,j) << endl;
00041 #endif
00042 }
00043 #if 0
00044 double oldCounts = root_->getCounts(i,j);
00045 if (2.0*fabs((oldCounts - totalCounts)/(oldCounts + totalCounts))
00046 > 0.01) {
00047 cout << "Counts differ " << i << " " << j << ": \n";
00048 cout << "Groundings: " << allFeatures_[i].size() << endl;
00049 cout << "Deriv: " << allFeatures_[i][0]->getDeriv() << endl;
00050 cout << "Partial: " << allFeatures_[i][0]->computePartialDeriv(i,j) << endl;
00051 cout << oldCounts << endl;
00052 cout << totalCounts << endl;
00053 }
00054
00055 counts.append(root_->getCounts(i, j));
00056 #endif
00057 counts[countIdx++] = totalCounts;
00058 }
00059 }
00060 }
00061
00062
00063 void GroundRRF::getPseudoCounts(Array<double>& counts,
00064 const Array<int>& queryPreds, double samplingFrac)
00065 {
00066
00067 counts.growToSize(numCounts_);
00068 for (int i = 0; i < numCounts_; i++) {
00069 counts[i] = 0.0;
00070 }
00071
00072
00073 double trueSum = getLogValue();
00074
00075 if (isnan(trueSum) || isinf(trueSum)) {
00076 for (int i = 0; i < numCounts_; i++) {
00077 counts[i] = trueSum;
00078 }
00079 return;
00080 }
00081
00082 Array<double> trueCounts;
00083 getCounts(trueCounts);
00084
00085 double falseSum;
00086 Array<double> falseCounts;
00087
00088
00089 for (int q = 0; q < queryPreds.size(); q++) {
00090
00091 int i = queryPreds[q];
00092 for (int j = 0; j < getNumPredicateGroundings(i); j++) {
00093
00094 if (frand() < samplingFrac) {
00095
00096
00097 bool origValue = getPredicateValue(i,j);
00098 setPredicateAndUpdate(i,j,!origValue);
00099
00100
00101 falseSum = getLogValue();
00102 getCounts(falseCounts);
00103
00104 double prob_false = sigmoid(falseSum - trueSum);
00105 for (int c = 0; c < numCounts_; c++) {
00106 counts[c] += prob_false * (trueCounts[c] - falseCounts[c]);
00107 }
00108
00109
00110 setPredicateAndUpdate(i,j,origValue);
00111 }
00112 }
00113 }
00114
00115 #if 0
00116 for (int c = 0; c < numCounts_; c++) {
00117 cout << "lpl counts[" << c << "] = " << counts[c] << endl;
00118 }
00119 #endif
00120
00121
00122 for (int i = 0; i < numCounts_; i++) {
00123 counts[i] /= samplingFrac;
00124 }
00125
00126
00127 double numQueryPreds = 0.0;
00128 for (int q = 0; q < queryPreds.size(); q++) {
00129 numQueryPreds += getNumPredicateGroundings(queryPreds[q]);
00130 }
00131 for (int i = 0; i < numCounts_; i++) {
00132 counts[i] /= numQueryPreds;
00133 }
00134 }
00135
00136
00137 void GroundRRF::getPseudoCountsFast(Array<double>& counts,
00138 const Array<int>& queryPreds, double samplingFrac)
00139 {
00140 #if 0 // THIS IS BROKEN -- don't use it!
00141
00142 counts.growToSize(numCounts_);
00143 for (int i = 0; i < numCounts_; i++) {
00144 counts[i] = 0.0;
00145 }
00146
00147
00148 double trueSum = getLogValue();
00149 Array<double> trueCounts;
00150 getCounts(trueCounts);
00151
00152 double falseSum;
00153 Array<double> falseCounts;
00154 Array<double> falseCounts2;
00155 #if 0
00156 Array<double> counts1;
00157 Array<double> counts2;
00158 counts1.growToSize(numCounts_);
00159 counts2.growToSize(numCounts_);
00160 for (int i = 0; i < numCounts_; i++) {
00161 counts1[i] = 0.0;
00162 counts2[i] = 0.0;
00163 }
00164 #endif
00165
00166
00167 for (int q = 0; q < queryPreds.size(); q++) {
00168
00169 int i = queryPreds[q];
00170 for (int j = 0; j < getNumPredicateGroundings(i); j++) {
00171
00172 if (frand() < samplingFrac) {
00173
00174 int c = 0;
00175 #if 0
00176 for (int f = 0; f < allFeatures_.size(); f++) {
00177 Feature* feat = rrf_->getFeature(f);
00178 for (int w = 0; w < feat->getNumWeights(); w++) {
00179 cout << "pre[" << c << "] = "
00180 << feat->getCount(w) << endl;
00181 feat->setCount(w,0.0);
00182 c++;
00183 }
00184 }
00185 #endif
00186
00187
00188 bool origValue = getPredicateValue(i,j);
00189 setPredicateAndUpdateCounts(i,j,!origValue);
00190
00191
00192 falseSum = getLogValue();
00193 double prob_false = sigmoid(falseSum - trueSum);
00194
00195
00196 c = 0;
00197 for (int f = 0; f < allFeatures_.size(); f++) {
00198 Feature* feat = rrf_->getFeature(f);
00199 for (int w = 0; w < feat->getNumWeights(); w++) {
00200
00201 if (c == 2) {
00202 cout << "pfalse = " << prob_false << endl;
00203 cout << "-featCount = " << -feat->getCount(w) << endl;
00204 }
00205 counts[c] += prob_false * -feat->getCount(w);
00206 #if 0
00207
00208
00209
00210 cout << prob_false*(trueCounts[c] - falseCounts[c]) << " "
00211 << prob_false*(trueCounts[c] - falseCounts2[c]) << endl;
00212 #endif
00213 c++;
00214 }
00215 }
00216
00217 #if 0
00218
00219 c = 0;
00220 for (int f = 0; f < allFeatures_.size(); f++) {
00221 Feature* feat = rrf_->getFeature(f);
00222 for (int w = 0; w < feat->getNumWeights(); w++) {
00223 cout << "mid[" << c << "] = "
00224 << feat->getCount(w) << endl;
00225 c++;
00226 }
00227 }
00228 #endif
00229
00230
00231
00232 setPredicateAndUpdateCounts(i,j,origValue);
00233 #if 0
00234 c = 0;
00235 for (int f = 0; f < allFeatures_.size(); f++) {
00236 Feature* feat = rrf_->getFeature(f);
00237 for (int w = 0; w < feat->getNumWeights(); w++) {
00238 cout << "post[" << c << "] = "
00239 << feat->getCount(w) << endl;
00240
00241 c++;
00242 }
00243 }
00244 #endif
00245 }
00246 }
00247 }
00248
00249 for (int c = 0; c < numCounts_; c++) {
00250 cout << "lpl1 counts[" << c << "] = " << counts[c] << endl;
00251 }
00252
00253 #if 0
00254 for (int c = 0; c < numCounts_; c++) {
00255 cout << c << ": " << counts1[c] << " ; " << counts2[c] << endl;
00256 }
00257 #endif
00258
00259
00260 for (int i = 0; i < queryPreds.size(); i++) {
00261 counts[i] /= samplingFrac;
00262 }
00263 #endif
00264 }
00265
00266
00267 double GroundRRF::getLogPseudoLikelihood(const Array<Predicate*>& queryPreds)
00268 {
00269
00270
00271
00272 double lpl = 0.0;
00273 double logTruthProb = getLogValue();
00274
00275 for (int q = 0; q < queryPreds.size(); q++) {
00276
00277 int i = queryPreds[q]->getId();
00278 Array<int> grounding;
00279 for (int ti = 0; ti < queryPreds[q]->getNumTerms(); ti++) {
00280 grounding.append(queryPreds[q]->getTerm(ti)->getId());
00281 }
00282 int j = rrf_->getFeature(i)->getGroundingIndex(grounding, db_);
00283
00284 if (allFeatures_[i][j] == NULL) {
00285 lpl += log(0.5);
00286 } else {
00287 double currValue = allFeatures_[i][j]->getValue();
00288 double newValue = (currValue == 0.0) ? 1.0 : 0.0;
00289
00290 setPredicateAndUpdate(i, j, newValue);
00291 double logUntruthProb = getLogValue();
00292
00293 setPredicateAndUpdate(i, j, currValue);
00294
00295 if (fabs(logUntruthProb - logTruthProb) > 100) {
00296 lpl += (logUntruthProb - logTruthProb);
00297 } else {
00298 lpl += -log(1.0 + exp(logUntruthProb - logTruthProb));
00299 }
00300
00301 }
00302 }
00303
00304 return lpl/queryPreds.size();
00305 }
00306
00307
00308 double GroundRRF::getLogPseudoLikelihood(const Array<int>& queryPreds)
00309 {
00310
00311
00312
00313 double lpl = 0.0;
00314 dirtyAll();
00315 double logTruthProb = getLogValue();
00316
00317
00318
00319
00320 int totalPreds = 0;
00321 for (int q = 0; q < queryPreds.size(); q++) {
00322
00323 int i = queryPreds[q];
00324 for (int j = 0; j < allFeatures_[i-1].size(); j++) {
00325 totalPreds++;
00326 if (allFeatures_[i-1][j] == NULL) {
00327 lpl += log(0.5);
00328 } else {
00329 double currValue = allFeatures_[i-1][j]->getValue();
00330 double newValue = (currValue == 0.0) ? 1.0 : 0.0;
00331
00332 setPredicateAndUpdate(i, j, newValue);
00333 double logUntruthProb = getLogValue();
00334 #if 0
00335
00336 if (isnan(logUntruthProb)) {
00337 cout << "\nuntruthProb = " << logUntruthProb;
00338 } else {
00339 cout << ".";
00340 }
00341 #endif
00342
00343 setPredicateAndUpdate(i, j, currValue);
00344
00345
00346 double curr_lpl;
00347 if (logUntruthProb - logTruthProb > 100) {
00348 curr_lpl = logTruthProb - logUntruthProb;
00349 } else if (logTruthProb - logUntruthProb > 100) {
00350 curr_lpl = 0.0;
00351 } else {
00352 curr_lpl = -log(1.0 + exp(logUntruthProb - logTruthProb));
00353 }
00354 lpl += curr_lpl;
00355 #if 0
00356
00357
00358
00359 cout << " = " << curr_lpl << endl;
00360 cout << logUntruthProb << " ; " << logTruthProb << endl;
00361 lpl += logTruthProb - log(exp(logTruthProb) + exp(logUntruthProb));
00362 #endif
00363 }
00364 }
00365 }
00366
00367
00368
00369
00370 return lpl/totalPreds;
00371 }