00001 #include "structlearn.h"
00002
00004
00005 struct ExistFormula
00006 {
00007 ExistFormula(const string& fformula)
00008 : formula(fformula), gain(0), wt(0), newScore(0), numPreds(0) {}
00009 ~ExistFormula()
00010 {
00011 for (int i = 0; i < cnfClausesForDomains.size(); i++)
00012 cnfClausesForDomains[i].deleteItemsAndClear();
00013 }
00014
00015 string formula;
00016
00017
00018 Array<Array<Clause*> > cnfClausesForDomains;
00019 double gain;
00020 double wt;
00021 Array<double> wts;
00022 double newScore;
00023 int numPreds;
00024 };
00025
00026
00027 void StructLearn::deleteExistFormulas(Array<ExistFormula*>& existFormulas)
00028 { existFormulas.deleteItemsAndClear(); }
00029
00030
00031 void StructLearn::getMLNClauses(Array<Clause*>& initialMLNClauses,
00032 Array<ExistFormula*>& existFormulas)
00033 {
00034 for (int i = 0; i < mln0_->getNumClauses(); i++)
00035 if (isModifiableClause(i))
00036 {
00037 Clause* c = (Clause*) mln0_->getClause(i);
00038 initialMLNClauses.append(new Clause(*c));
00039 }
00040
00041 const FormulaAndClausesArray* fnca = mln0_->getFormulaAndClausesArray();
00042 for (int i = 0; i < fnca->size(); i++)
00043 if (isNonModifiableFormula((*fnca)[i]))
00044 existFormulas.append(new ExistFormula((*fnca)[i]->formula));
00045
00046 for (int i = 0; i < existFormulas.size(); i++)
00047 {
00048 string formula = existFormulas[i]->formula;
00049 FormulaAndClauses tmp(formula, 0, false);
00050 const FormulaAndClausesArray* fnca
00051 = (*mlns_)[0]->getFormulaAndClausesArray();
00052 int a = fnca->find(&tmp);
00053 existFormulas[i]->numPreds = (*fnca)[a]->numPreds;
00054
00055 Array<Array<Clause*> >& cnfClausesForDomains
00056 = existFormulas[i]->cnfClausesForDomains;
00057 cnfClausesForDomains.growToSize(mlns_->size());
00058 for (int j = 0; j < mlns_->size(); j++)
00059 {
00060 Array<Clause*>& cnfClauses = cnfClausesForDomains[j];
00061 fnca = (*mlns_)[j]->getFormulaAndClausesArray();
00062 a = fnca->find(&tmp);
00063 assert(a >= 0);
00064 IndexClauseHashArray* indexClauses = (*fnca)[a]->indexClauses;
00065 for (int k = 0; k < indexClauses->size(); k++)
00066 {
00067 Clause* c = new Clause(*((*indexClauses)[k]->clause));
00068 c->newAuxClauseData();
00069 cnfClauses.append(c);
00070 }
00071 cnfClauses.compress();
00072 }
00073 }
00074 }
00075
00076
00077
00078 inline
00079 void StructLearn::pllCountsForExistFormula(Clause* cnfClause,
00080 const int& domainIdx,
00081 int* clauseIdxInMln,
00082 Array<UndoInfo*>* const & undoInfos)
00083 {
00084 assert(cnfClause->getAuxClauseData()->cache == NULL);
00085 bool inCache = false;
00086 bool hasDomainCounts = false;
00087 double prevCNFClauseSize = 0;
00088
00089 if (cacheClauses_)
00090 {
00091 int i;
00092 if ((i = cachedClauses_->find(cnfClause)) >= 0)
00093 {
00094 inCache = true;
00095 Array<Array<Array<CacheCount*>*>*>* cache =
00096 (*cachedClauses_)[i]->getAuxClauseData()->cache;
00097 Array<Array<CacheCount*>*>* domainCache = (*cache)[domainIdx];
00098 for (int j = 0; j < domainCache->size(); j++)
00099 if ((*domainCache)[j] != NULL) { hasDomainCounts = true; break; }
00100 }
00101
00102 if (hasDomainCounts)
00103 {
00104 pll_->insertCounts(clauseIdxInMln, undoInfos,
00105 (*cachedClauses_)[i]->getAuxClauseData()->cache,
00106 domainIdx);
00107 return;
00108 }
00109 else
00110 {
00111 if (cacheSizeMB_ < maxCacheSizeMB_)
00112 {
00113 if (inCache)
00114 {
00115 cnfClause->getAuxClauseData()->cache
00116 = (*cachedClauses_)[i]->getAuxClauseData()->cache;
00117 prevCNFClauseSize = cnfClause->sizeMB();
00118 }
00119 else
00120 cnfClause->newCache(domains_->size(),
00121 (*domains_)[0]->getNumPredicates());
00122 }
00123 else
00124 {
00125 static bool printCacheFull = true;
00126 if (printCacheFull)
00127 {
00128 cout << "Cache is full, approximate size = " << cacheSizeMB_
00129 <<" MB" << endl;
00130 printCacheFull = false;
00131 }
00132 }
00133 }
00134 }
00135
00136
00137
00138 pll_->computeCountsForNewAppendedClause(cnfClause,clauseIdxInMln,domainIdx,
00139 undoInfos, sampleClauses_,
00140 cnfClause->getAuxClauseData()->cache);
00141
00142 if (cnfClause->getAuxClauseData()->cache)
00143 {
00144 if (inCache)
00145 {
00146 cacheSizeMB_ += cnfClause->sizeMB() - prevCNFClauseSize;
00147 cnfClause->getAuxClauseData()->cache = NULL;
00148 }
00149 else
00150 {
00151 if (cacheSizeMB_ < maxCacheSizeMB_)
00152 {
00153 cacheSizeMB_ += cnfClause->sizeMB();
00154 Array<Array<Array<CacheCount*>*>*>* cache
00155 = cnfClause->getAuxClauseData()->cache;
00156 cnfClause->getAuxClauseData()->cache = NULL;
00157 Clause* copyClause = new Clause(*cnfClause);
00158 copyClause->getAuxClauseData()->cache = cache;
00159 copyClause->compress();
00160 cachedClauses_->append(copyClause);
00161 }
00162 else
00163 {
00164 cnfClause->getAuxClauseData()->deleteCache();
00165 cnfClause->getAuxClauseData()->cache = NULL;
00166 }
00167 }
00168 }
00169 }
00170
00171
00172 inline
00173 void StructLearn::printNewScore(const string existFormStr,const int& lbfgsbIter,
00174 const double& lbfgsbSec, const double& newScore,
00175 const double& gain, const double& penalty,
00176 const double& wt)
00177 {
00178 cout << "*************************** " << candCnt_++ << ", iter " << iter_
00179 << ", beam search iter " << bsiter_ << endl;
00180
00181 cout << "exist formula : " << existFormStr << endl;
00182 cout << "op : OP_ADD" << endl;
00183 cout << "score : " << newScore << endl;
00184 cout << "gain : " << gain << endl;
00185 cout << "penalty : " << penalty << endl;
00186 cout << "net gain : " << gain - penalty << endl;
00187 cout << "wt : " << wt << endl;
00188 cout << "num LBFGSB iter = " << lbfgsbIter << endl;
00189 cout << "time taken by LBFGSB = "; Timer::printTime(cout, lbfgsbSec);
00190 cout << endl;
00191 cout << "*************************** " << endl << endl;;
00192 }
00193
00194
00195 inline
00196 void StructLearn::rquicksort(Array<ExistFormula*>& existFormulas, const int& l,
00197 const int& r)
00198 {
00199 ExistFormula** items = (ExistFormula**) existFormulas.getItems();
00200 if (l >= r) return;
00201 ExistFormula* tmp = items[l];
00202 items[l] = items[(l+r)/2];
00203 items[(l+r)/2] = tmp;
00204
00205 int last = l;
00206 for (int i = l+1; i <= r; i++)
00207 if (items[i]->gain > items[l]->gain)
00208 {
00209 ++last;
00210 ExistFormula* tmp = items[last];
00211 items[last] = items[i];
00212 items[i] = tmp;
00213 }
00214
00215 tmp = items[l];
00216 items[l] = items[last];
00217 items[last] = tmp;
00218 rquicksort(existFormulas, l, last-1);
00219 rquicksort(existFormulas, last+1, r);
00220 }
00221
00222
00223 inline
00224 void StructLearn::rquicksort(Array<ExistFormula*>& ef)
00225 { rquicksort(ef, 0, ef.size()-1); }
00226
00227
00228
00229
00230
00231 inline
00232 void StructLearn::evaluateExistFormula(ExistFormula* const& ef,
00233 const bool& computeCountsOnly,
00234 Array<Array<Array<IndexAndCount*> > >* const & iacArraysPerDomain,
00235 const double& prevScore)
00236 {
00237 bool undo = !computeCountsOnly;
00238 bool evalGainLearnWts = !computeCountsOnly;
00239
00240 Array<int*> idxPtrs;
00241 Array<UndoInfo*>* undoInfos = (undo) ? new Array<UndoInfo*> : NULL;
00242
00243
00244
00245 Array<Array<Clause*> >& cnfClausesForDomains = ef->cnfClausesForDomains;
00246 for (int d = 0; d < cnfClausesForDomains.size(); d++)
00247 {
00248 Array<Clause*>& cnfClauses = cnfClausesForDomains[d];
00249 MLN* mln = (*mlns_)[d];
00250 for (int k = 0; k < cnfClauses.size(); k++)
00251 {
00252 Clause* c = cnfClauses[k];
00253
00254
00255
00256 if (!undo && mln->containsClause(c)) continue;
00257 int* tmpClauseIdxInMLN = new int(mln->getNumClauses() + k);
00258 idxPtrs.append(tmpClauseIdxInMLN);
00259 if (iacArraysPerDomain)
00260 {
00261 Array<Array<IndexAndCount*> >& iacArrays = (*iacArraysPerDomain)[d];
00262 assert(iacArrays.size() == cnfClauses.size());
00263 Array<IndexAndCount*>& iacArray = iacArrays[k];
00264
00265 Array<UndoInfo*> tmpUndoInfos;
00266 pllCountsForExistFormula(c, d, tmpClauseIdxInMLN, &tmpUndoInfos);
00267 for (int i = 0; i < tmpUndoInfos.size(); i++)
00268 iacArray.append(tmpUndoInfos[i]->affectedArr->lastItem());
00269 if (undoInfos) undoInfos->append(tmpUndoInfos);
00270 else tmpUndoInfos.deleteItemsAndClear();
00271 }
00272 else
00273 pllCountsForExistFormula(c, d, tmpClauseIdxInMLN, undoInfos);
00274 }
00275 }
00276
00277
00278
00279
00280 if (evalGainLearnWts)
00281 {
00282 Array<double> priorMeans, priorStdDevs;
00283
00284
00285
00286 int numAdded = (indexTrans_) ? 1 : cnfClausesForDomains[0].size();
00287 setPriorMeansStdDevs(priorMeans, priorStdDevs, numAdded, NULL);
00288
00289 if (hasPrior_)
00290 pll_->setMeansStdDevs(priorMeans.size(), priorMeans.getItems(),
00291 priorStdDevs.getItems());
00292 else
00293 pll_->setMeansStdDevs(-1, NULL, NULL);
00294
00295 if (indexTrans_)
00296 {
00297 for (int d = 0; d < cnfClausesForDomains.size(); d++)
00298 {
00299 int numCNFClauses = cnfClausesForDomains[d].size();
00300 indexTrans_->appendClauseIdxToClauseFormulaIdxs(1, numCNFClauses);
00301 }
00302 }
00303
00304 int iter; bool error; double elapsedSec;
00305 int numClausesFormulas = getNumClausesFormulas();
00306 Array<double>* wts = &(ef->wts);
00307 wts->growToSize(numClausesFormulas + numAdded + 1);
00308
00309 double newScore = maximizeScore(numClausesFormulas, numAdded, wts,
00310 NULL, NULL, iter, error, elapsedSec);
00311
00312 if (indexTrans_)
00313 {
00314 for (int d = 0; d < cnfClausesForDomains.size(); d++)
00315 {
00316 int numCNFClauses = cnfClausesForDomains[d].size();
00317 indexTrans_->removeClauseIdxToClauseFormulaIdxs(1, numCNFClauses);
00318 }
00319 }
00320
00321 double totalWt = 0;
00322 for (int i = 0; i < numAdded; i++)
00323 totalWt += (*wts)[numClausesFormulas+i+1];
00324
00325 double penalty = ef->numPreds * penalty_;
00326
00327 ef->gain = newScore-prevScore-penalty;
00328 ef->wt = totalWt;
00329 ef->newScore = newScore;
00330
00331 if (error) {newScore=prevScore;cout<<"LBFGSB failed to find wts"<<endl;}
00332 printNewScore(ef->formula, iter, elapsedSec, newScore,
00333 newScore-prevScore, penalty, totalWt);
00334 }
00335
00336 if (undo) { pll_->undoAppendRemoveCounts(undoInfos); delete undoInfos; }
00337 idxPtrs.deleteItemsAndClear();
00338 }
00339
00340
00341
00342 double StructLearn::evaluateExistFormulas(Array<ExistFormula*>& existFormulas,
00343 Array<ExistFormula*>& highGainWtFormulas,
00344 const double& prevScore)
00345 {
00346 if (existFormulas.size() == 0) return 0;
00347 for (int i = 0; i < existFormulas.size(); i++)
00348 evaluateExistFormula(existFormulas[i], false, NULL, prevScore);
00349
00350 Array<ExistFormula*> tmp(existFormulas);
00351 rquicksort(tmp);
00352 double minGain = DBL_MAX;
00353 cout << "evaluated existential formulas " << endl;
00354 cout << "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" << endl;
00355 for (int i = 0; i < tmp.size(); i++)
00356 {
00357 existFormulas[i] = tmp[i];
00358 double gain = existFormulas[i]->gain;
00359 double wt = existFormulas[i]->wt;
00360 cout << i << "\t" << existFormulas[i]->formula << endl
00361 << "\tgain = " << gain << ", wt = " << wt << ", op = OP_ADD"
00362 << endl;
00363 if (gain > minGain_ && wt >= minWt_)
00364 {
00365 highGainWtFormulas.append(tmp[i]);
00366 if (gain < minGain) minGain = gain;
00367 }
00368 }
00369 cout << "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" << endl << endl;
00370
00371 if (minGain == DBL_MAX) minGain = 0;
00372 return minGain;
00373 }
00374
00375
00376 inline
00377 void StructLearn::appendExistFormulaToMLNs(ExistFormula* const & ef)
00378 {
00379 Array<Array<Array<IndexAndCount*> > > iacsPerDomain;
00380 Array<Array<Clause*> >& cnfClausesForDomains = ef->cnfClausesForDomains;
00381 iacsPerDomain.growToSize(cnfClausesForDomains.size());
00382 for (int d = 0; d < cnfClausesForDomains.size(); d++)
00383 {
00384 Array<Array<IndexAndCount*> >& iacArrays = iacsPerDomain[d];
00385 iacArrays.growToSize( cnfClausesForDomains[d].size() );
00386 }
00387 evaluateExistFormula(ef, true, &iacsPerDomain, 0);
00388
00389 Array<double>& wts = ef->wts;
00390 int numClausesFormulas = getNumClausesFormulas();
00391
00392
00393
00394 if (indexTrans_ == NULL) updateWts(wts, NULL, NULL);
00395
00396
00397 for (int d = 0; d < cnfClausesForDomains.size(); d++)
00398 {
00399 MLN* mln = (*mlns_)[d];
00400 Array<Array<IndexAndCount*> >& iacArrays = iacsPerDomain[d];
00401 Array<Clause*>& cnfClauses = cnfClausesForDomains[d];
00402
00403 for (int i = 0; i < cnfClauses.size(); i++)
00404 {
00405 int idx;
00406
00407
00408 double wt = (indexTrans_) ? 0 : wts[numClausesFormulas+i+1];
00409
00410
00411 mln->appendClause(ef->formula, true, new Clause(*cnfClauses[i]),
00412 wt, false, idx);
00413 mln->setFormulaPriorMean(ef->formula, priorMean_);
00414 ((MLNClauseInfo*)mln->getMLNClauseInfo(idx))->priorMean
00415 += priorMean_/cnfClauses.size();
00416
00417 int* idxPtr = mln->getMLNClauseInfoIndexPtr(idx);
00418 Array<IndexAndCount*>& iacs = iacArrays[i];
00419 for (int j = 0; j < iacs.size(); j++) iacs[j]->index = idxPtr;
00420 }
00421 }
00422 assert(pll_->checkNoRepeatedIndex());
00423
00424
00425 if (indexTrans_)
00426 {
00427
00428 Array<string> appendedFormula;
00429 appendedFormula.append(ef->formula);
00430 updateWts(wts, NULL, &appendedFormula);
00431
00432 indexTrans_->createClauseIdxToClauseFormulaIdxsMap();
00433 }
00434
00435
00436 cout << "Modified MLN: Appended formula to MLN: " << ef->formula << endl;
00437 }
00438
00439
00440 inline
00441 bool StructLearn::effectExistFormulaOnMLNs(ExistFormula* ef,
00442 Array<ExistFormula*>& existFormulas,
00443 double& score)
00444 {
00445 cout << "effecting existentially quantified formula " << ef->formula
00446 << " on MLN..." << endl;
00447 appendExistFormulaToMLNs(ef);
00448 score = ef->newScore;
00449 printMLNClausesWithWeightsAndScore(score, iter_);
00450 printMLNToFile(NULL, iter_);
00451
00452 int r = existFormulas.find(ef);
00453 assert(r >= 0);
00454 ExistFormula* rf = existFormulas.removeItemFastDisorder(r);
00455 assert(rf == ef);
00456 delete rf;
00457 return true;
00458 }
00459
00460
00461 bool StructLearn::effectBestCandidateOnMLNs(Array<Clause*>& bestCandidates,
00462 Array<ExistFormula*>& existFormulas,
00463 Array<ExistFormula*>& highGainWtFormulas,
00464 double& score)
00465 {
00466 cout << "effecting best candidate among existential formulas and "
00467 << "best candidates on MLN..." << endl << endl;
00468
00469 int a = 0, b = 0;
00470 bool ok = false;
00471 int numCands = bestCandidates.size() + highGainWtFormulas.size();
00472 for (int i = 0; i < numCands; i++)
00473 {
00474 if (a >= bestCandidates.size())
00475 {
00476 if (ok=effectExistFormulaOnMLNs(highGainWtFormulas[b++],
00477 existFormulas, score)) break;
00478 }
00479 else
00480 if (b >= highGainWtFormulas.size())
00481 {
00482 cout << "effecting best candidate " << a << " on MLN..." << endl;
00483 if (ok=effectBestCandidateOnMLNs(bestCandidates[a++], score)) break;
00484 cout << "failed to effect candidate on MLN." << endl;
00485 delete bestCandidates[a-1];
00486 }
00487 else
00488 if (highGainWtFormulas[b]->gain >
00489 bestCandidates[a]->getAuxClauseData()->gain)
00490 {
00491 if (ok=effectExistFormulaOnMLNs(highGainWtFormulas[b++],
00492 existFormulas, score)) break;
00493 }
00494 else
00495 {
00496 cout << "effecting best candidate " << a << " on MLN..." << endl;
00497 if (ok=effectBestCandidateOnMLNs(bestCandidates[a++], score)) break;
00498 cout << "failed to effect candidate on MLN." << endl;
00499 delete bestCandidates[a-1];
00500 }
00501 }
00502
00503 for (int i = a; i < bestCandidates.size(); i++) delete bestCandidates[i];
00504 return ok;
00505 }