hasharray.h

00001 /*
00002  * All of the documentation and software included in the
00003  * Alchemy Software is copyrighted by Stanley Kok, Parag
00004  * Singla, Matthew Richardson, Pedro Domingos, Marc
00005  * Sumner, Hoifung Poon, and Daniel Lowd.
00006  * 
00007  * Copyright [2004-07] Stanley Kok, Parag Singla, Matthew
00008  * Richardson, Pedro Domingos, Marc Sumner, Hoifung
00009  * Poon, and Daniel Lowd. All rights reserved.
00010  * 
00011  * Contact: Pedro Domingos, University of Washington
00012  * (pedrod@cs.washington.edu).
00013  * 
00014  * Redistribution and use in source and binary forms, with
00015  * or without modification, are permitted provided that
00016  * the following conditions are met:
00017  * 
00018  * 1. Redistributions of source code must retain the above
00019  * copyright notice, this list of conditions and the
00020  * following disclaimer.
00021  * 
00022  * 2. Redistributions in binary form must reproduce the
00023  * above copyright notice, this list of conditions and the
00024  * following disclaimer in the documentation and/or other
00025  * materials provided with the distribution.
00026  * 
00027  * 3. All advertising materials mentioning features or use
00028  * of this software must display the following
00029  * acknowledgment: "This product includes software
00030  * developed by Stanley Kok, Parag Singla, Matthew
00031  * Richardson, Pedro Domingos, Marc Sumner, Hoifung
00032  * Poon, and Daniel Lowd in the Department of Computer Science and
00033  * Engineering at the University of Washington".
00034  * 
00035  * 4. Your publications acknowledge the use or
00036  * contribution made by the Software to your research
00037  * using the following citation(s): 
00038  * Stanley Kok, Parag Singla, Matthew Richardson and
00039  * Pedro Domingos (2005). "The Alchemy System for
00040  * Statistical Relational AI", Technical Report,
00041  * Department of Computer Science and Engineering,
00042  * University of Washington, Seattle, WA.
00043  * http://www.cs.washington.edu/ai/alchemy.
00044  * 
00045  * 5. Neither the name of the University of Washington nor
00046  * the names of its contributors may be used to endorse or
00047  * promote products derived from this software without
00048  * specific prior written permission.
00049  * 
00050  * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF WASHINGTON
00051  * AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
00052  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00053  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00054  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
00055  * OF WASHINGTON OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
00056  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00057  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00058  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00059  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
00060  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00061  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00062  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
00063  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00064  * 
00065  */
00066 #ifndef HASHARRAY_H_JUL_19_2005
00067 #define HASHARRAY_H_JUL_19_2005
00068 
00069 #include <stdlib.h>
00070 #include <assert.h>
00071 #include <iostream>
00072 #include <string>
00073 #include "random.h"
00074 using namespace std;
00075 #include <ext/hash_map>
00076 using namespace __gnu_cxx;
00077 
00078 
00079   //An Array of that does not contain duplicates. It is backed up by a hash_set
00080 template <typename Type, class HashFn, class EqualFn> 
00081 class HashArray
00082 {
00083  public:
00084   HashArray(const int& initSize=0) : items_(NULL), maxItems_(0), numItems_(0),
00085                                      map_(new hash_map<Type,int,HashFn,EqualFn>)
00086   { allocateMemory(initSize); }
00087     
00088   
00089   HashArray(const HashArray<Type, HashFn, EqualFn>& x)
00090   { 
00091     if (x.numItems_ == 0) 
00092     {
00093       items_    = NULL;
00094       maxItems_ = 0;
00095       numItems_ = 0;
00096       allocateMemory(0);
00097       map_ = new hash_map<Type, int, HashFn, EqualFn>;
00098       return;
00099     }
00100 
00101     items_ = new Type[x.numItems_];      
00102     //commented out: Type may not be a basic type and thus need deep copying
00103     //memcpy(items_, x.items_, x.numItems_*sizeof(Type));
00104     for (int i = 0; i < x.numItems_; i++) items_[i] = x.items_[i];
00105     numItems_ = x.numItems_;
00106     maxItems_ = numItems_;
00107     map_ = new hash_map<Type, int, HashFn, EqualFn>;
00108     for (int i = 0; i < numItems_; i++)
00109       (*map_)[items_[i]] = i;
00110     assert(map_->size() == (unsigned int) numItems_);
00111   }
00112 
00113 
00114     // does not delete each individual item
00115   ~HashArray() { if (items_) delete [] items_; delete map_; }
00116 
00117 
00118     // returns index of appended item or -1 if item is already in the array
00119   int append(Type item)
00120   {
00121     if (map_->find(item) != map_->end()) return -1;
00122 
00123     if (numItems_ == maxItems_) 
00124       allocateMemory(maxItems_*2+1);
00125     items_[numItems_++] = item;
00126     (*map_)[item] = numItems_-1;
00127 
00128     assert(map_->size() == (unsigned int) numItems_);
00129     return numItems_-1;
00130   }
00131 
00132 
00133   int append(Type item, const int& max)
00134   {
00135     if (map_->find(item) != map_->end()) return -1;
00136 
00137     if (numItems_ == maxItems_) 
00138     {
00139       int newNumItems = maxItems_*2+1;
00140       if (newNumItems > max) newNumItems = max;
00141       allocateMemory(newNumItems);
00142     }
00143     items_[numItems_++] = item;
00144     (*map_)[item] = numItems_-1;
00145     assert(map_->size() == (unsigned int) numItems_);
00146     return numItems_-1;
00147   }
00148 
00149 
00150 
00151     // returns the number of items appended
00152   int append(const HashArray<Type, HashFn, EqualFn>& newItems)
00153   {
00154     int n = 0;
00155     for (int i = 0; i < newItems.size(); i++)
00156       if (append(newItems[i]) >= 0) n++;
00157     return n;
00158   }
00159 
00160 
00161   int append(const HashArray<Type, HashFn, EqualFn>* const & items) 
00162   { return append(*items); }
00163 
00164 
00165     // the contents of the array are cleared (but not deleted), and populated
00166     // with those in items
00167     // returns the number of items copied
00168   int copyFrom(const HashArray<Type, HashFn, EqualFn>& items) 
00169   { 
00170     clear(); 
00171     return append(items); 
00172   }
00173 
00174    // Assignment operator
00175   HashArray<Type, HashFn, EqualFn>& 
00176       operator=(const HashArray<Type, HashFn, EqualFn>& x)
00177   { 
00178       copyFrom(x);
00179       return *this;
00180   }
00181 
00182 
00183   bool growToSize(const int& newSize)
00184   {
00185     if (newSize <= numItems_) return false;
00186     
00187     if (newSize <= maxItems_)
00188       numItems_ = newSize;
00189     else
00190     {
00191       allocateMemory(newSize);
00192       numItems_ = newSize;
00193     }
00194     return true;
00195   }
00196 
00197 
00198   bool growToSize(const int& newSize, Type fill)
00199   {
00200     int origNumItems = numItems_;
00201     if(!growToSize(newSize)) return false;
00202     for (int i = origNumItems; i < numItems_; i++)
00203       items_[i] = fill;
00204     return true;
00205   }
00206 
00207 
00208     // the contents in the array are not deleted
00209   void clear()  { numItems_ = 0; map_->clear(); }
00210 
00211     // the contents in the array are not deleted
00212   void clearAndCompress()  { numItems_ = 0; map_->clear(); compress(); }
00213 
00214 
00215     // delete the contents of items_ array and set numItems = 0;
00216     // items_ is not deleted,and maxItems_ stay the same
00217   void deleteItemsAndClear() 
00218   { 
00219     for (int i = 0; i < numItems_; i++)
00220       if (items_[i]) delete items_[i];
00221     clear();
00222     assert(map_->size() == (unsigned int) numItems_);
00223   }
00224 
00225     // delete the contents of items_ array and set numItems = 0;
00226     // items_ is not deleted,and maxItems_ stay the same
00227   void deleteItemsAndClearCompress() 
00228   { 
00229     for (int i = 0; i < numItems_; i++)
00230       if (items_[i]) delete items_[i];
00231     clear();
00232     assert(map_->size() == (unsigned int) numItems_);
00233     compress();
00234   }
00235 
00236 
00237   int size() const { return numItems_; }
00238 
00239   bool empty() const { return size()==0; } 
00240 
00241     // important for the return value to be const so that the array cannot
00242     // be written via [] (e.g. array[0] = anotherObject). This prevents
00243     // the indexes in map_ from being inconsistent.
00244   const Type& operator[](const int& index) const { return item(index); } 
00245 
00246 
00247   Type& lastItem() const { return items_[numItems_-1]; }
00248 
00249 
00250     // caller should not delete the returned pointer nor modify the object
00251   const Type* getItems() const { return items_; }
00252 
00259   int find(Type const & item) const
00260   {
00261     typename hash_map<Type, int, HashFn, EqualFn>::iterator it=map_->find(item);
00262     if (it == map_->end()) return -1;
00263     assert((*it).second >= 0);
00264     return (*it).second;
00265   }
00266 
00267   
00268   bool contains(Type const & item) const { return find(item) >= 0;}
00269 
00270 
00271   Type removeItem(const int& index)
00272   {
00273       // move everything past the item back
00274     Type removedItem = items_[index];
00275     map_->erase(map_->find(items_[index]));
00276     for (int i = index+1; i < numItems_; i++) items_[i-1] = items_[i];
00277 
00278     //commented out: Type may not be a basic type and thus need deep copying
00279     //int numItemsToMove = numItems_ - index - 1;
00280     //if (numItemsToMove > 0)
00281     //  memmove(&items_[index], &items_[index+1], numItemsToMove*sizeof(Type));
00282 
00283     numItems_--;
00284     
00285       // subtract the indexes of the moved items by 1
00286     typename hash_map<Type, int, HashFn, EqualFn>::iterator it;
00287     for (int i = index; i < numItems_; i++)
00288     {
00289       it = map_->find(items_[i]);
00290       assert(it != map_->end());
00291       assert((*it).second-1==i);
00292       (*it).second = i;
00293     }
00294 
00295     assert(map_->size() == (unsigned int)numItems_);
00296     return removedItem;
00297   }
00298 
00299 
00300     //modified to remove the ambiguity from removeItem(const int & index)
00301     //when type is also an int - also need to make sure that idx >= 0
00302   Type removeInputItem(Type & item)
00303   {
00304     int idx = find(item);
00305     if (idx < 0) return NULL;
00306     return removeItem(idx);
00307   }
00308   
00309   Type removeLastItem() { return removeItem(numItems_-1); }
00310 
00311   // removes the item but doesn't leave the array in the same order as before
00312   Type removeItemFastDisorder(const int& index) 
00313   {
00314     Type removedItem = items_[index];
00315     map_->erase(map_->find(items_[index]));
00316 
00317     if (numItems_ > 1 && index != numItems_-1)
00318     {
00319       map_->erase(map_->find(items_[numItems_-1]));
00320       (*map_)[items_[numItems_-1]] = index;
00321       item(index) = item(numItems_-1);   
00322     }
00323 
00324     numItems_--;
00325     assert(map_->size() == (unsigned int)numItems_);
00326     return removedItem;
00327   }
00328 
00329     //modified to remove the ambiguity from removeItem(const int & index)
00330     //when type is also an int
00331   Type removeInputItemFastDisorder(Type & item)
00332   {
00333     int idx = find(item);
00334     if (idx < 0) return NULL;
00335     return removeItemFastDisorder(idx);
00336   } 
00337 
00338 
00339   // resizes the array to be exactly the number of elements in it
00340   void compress()  { if (maxItems_ > numItems_)  allocateMemory(numItems_); }
00341 
00342 
00343     // reorder randomly
00344   void shuffle() 
00345   {
00346     for (int i = 0; i < numItems_; i++) 
00347     {
00348         // swap item i with a random item
00349       int swapwith = Random::randomOneOf(numItems_);
00350       Type tmp;
00351       tmp = items_[i];
00352       items_[i] = items_[swapwith];
00353       items_[swapwith] = tmp;
00354     }
00355   }
00356 
00357 
00358     // returns the index of largest item
00359   int getMaxIndex() 
00360   {
00361     assert(numItems_ > 0);
00362     int max = 0;
00363     for (int i = 1; i < numItems_; i++) 
00364     {
00365       if (items_[i] > items_[max])
00366         max = i;
00367     }
00368     return max;
00369   }
00370 
00371 
00372     // returns the value of largest item
00373   Type getMaxValue() 
00374   {
00375     assert(numItems_ > 0);
00376     Type max = items_[0];
00377     for (int i = 1; i < numItems_; i++) 
00378     {
00379       if (items_[i] > max)
00380         max = items_[i];
00381     }
00382     return max;
00383   }
00384 
00385 
00386  private:
00387   Type& item(const int& index) const 
00388   {
00389     assert(index < numItems_);
00390     return items_[index];
00391   }
00392  
00393     // if newSize < numItems, numItems will be shrunk to newSize
00394   void allocateMemory(const int& newSize)
00395   {
00396     if (newSize > 0)
00397     {
00398       if (newSize >= numItems_)
00399       {
00400         Type* tempItems = new Type[newSize];
00401         if (numItems_ > 0)
00402         {
00403           //commented out: Type may not be a basic type & thus need deep copying
00404           //memcpy(tempItems, items_, numItems_*sizeof(Type));
00405           for (int i = 0; i < numItems_; i++) tempItems[i] = items_[i];
00406         }
00407         delete [] items_;
00408         items_ = tempItems;
00409         maxItems_ = newSize;
00410       }
00411       else  // newSize < numItems_
00412       {
00413         Type* tempItems = new Type[newSize];
00414         if (numItems_ > 0) 
00415         {
00416           //commented out: Type may not be a basic type & thus need deep copying
00417           //memcpy(tempItems, items_, newSize*sizeof(Type));
00418           for (int i = 0; i < newSize; i++) tempItems[i] = items_[i];
00419         }
00420         delete [] items_;
00421         items_ = tempItems;
00422         maxItems_ = newSize;
00423         numItems_ = newSize;
00424       }
00425     }
00426     else // newSize==0
00427     {
00428       delete [] items_;
00429       items_ = NULL;
00430       maxItems_ = 0;
00431       numItems_ = 0;
00432     }
00433   }
00434 
00435   
00436  private:
00437   Type* items_;
00438   int maxItems_;
00439   int numItems_;
00440 
00441   hash_map<Type, int, HashFn, EqualFn>* map_;
00442 };
00443 
00444 
00445 // Useful functions
00446 
00447   // between is the filler string to be used between items
00448 template <typename T, class HashFn, class EqualFn> 
00449 void writeHashArray(const HashArray<T*,HashFn,EqualFn>& array, ostream& out, 
00450                     char* between, const char& delimiter)
00451 {
00452   out << array.size() << delimiter;
00453   for (int i = 0; i < array.size(); i++)
00454   {
00455     array.item(i)->save(out);
00456     if (between) out << between;
00457   }
00458 }
00459 
00460 
00461 template <typename T, class HashFn, class EqualFn>
00462 void writeArray(HashArray<T*,HashFn,EqualFn>&  array, ostream& out, 
00463                 char* between=NULL)
00464 { writeArray(array, out, between, '\n'); }
00465 
00466 
00467 template <typename T, class HashFn, class EqualFn>
00468 void readArray(HashArray<T*,HashFn,EqualFn>& array, istream& in)
00469 {
00470   int numItems;
00471   in >> numItems;
00472   for (int i = 0; i < numItems && in.good(); i++)
00473   {
00474     T* x = new T;
00475     x->load(in);
00476     array.append(x);
00477   }
00478 }
00479 
00480 
00481 template <typename T, class HashFn, class EqualFn>
00482 void writeArray(HashArray<T,HashFn,EqualFn>& array, ostream& out, 
00483                 char* between, const char& delimiter)
00484 {
00485   out << array.Size() << delimiter;
00486   for (int i = 0; i < array.Size(); i++)  
00487   {
00488     array.item(i).save(out);
00489     if (between) out << between;
00490   }  
00491 }
00492 
00493 
00494 template <typename T, class HashFn, class EqualFn>
00495 void writeArray(HashArray<T,HashFn,EqualFn>& array, ostream& out, 
00496                 char* between=NULL)
00497 { writeArray(array, out, between, '\n'); }
00498 
00499 
00500 template <typename T, class HashFn, class EqualFn>
00501 void readArray(HashArray<T,HashFn,EqualFn>& array, istream& in) 
00502 {
00503   int numItems;
00504   in >> numItems;
00505   for (int i = 0; i < numItems && in.good(); i++) 
00506   {
00507     T x;
00508     x.load(in);
00509     array.append(x);
00510   }
00511 }
00512 
00513 
00514 template <typename T, class HashFn, class EqualFn>
00515 void writeBasicArray(HashArray<T,HashFn,EqualFn>& array, ostream& out, 
00516                      char* between, const char& delimiter)
00517 {
00518   out << array.Size() << delimiter;
00519   for (int i = 0; i < array.Size(); i++)
00520   {
00521     out << array.item(i) << " ";
00522     if (between) out << between;
00523   }
00524 }
00525 
00526 
00527 template <typename T, class HashFn, class EqualFn>
00528 void writeBasicArray(HashArray<T,HashFn,EqualFn>& array, ostream& out, 
00529                      char* between=NULL)
00530 { writeBasicArray(array, out, between, '\n'); }
00531 
00532 
00533 template <typename T, class HashFn, class EqualFn>
00534 void readBasicArray(HashArray<T,HashFn,EqualFn>& array, istream& in)
00535 {
00536   int numItems=0;
00537   in >> numItems;
00538   for (int i = 0; i < numItems && in.good(); i++)
00539   {
00540     T x;
00541     in >> x;
00542     array.append(x);
00543   }
00544   if (!in.good())
00545     cout << "ERROR: readBasicArray(). Input not good." << endl;
00546 }
00547 
00548 
00549 #endif

Generated on Sun Jun 7 11:55:21 2009 for Alchemy by  doxygen 1.5.1