hasharray.h

00001 /*
00002  * All of the documentation and software included in the
00003  * Alchemy Software is copyrighted by Stanley Kok, Parag
00004  * Singla, Matthew Richardson, Pedro Domingos, Marc
00005  * Sumner and Hoifung Poon.
00006  * 
00007  * Copyright [2004-07] Stanley Kok, Parag Singla, Matthew
00008  * Richardson, Pedro Domingos, Marc Sumner and Hoifung
00009  * Poon. All rights reserved.
00010  * 
00011  * Contact: Pedro Domingos, University of Washington
00012  * (pedrod@cs.washington.edu).
00013  * 
00014  * Redistribution and use in source and binary forms, with
00015  * or without modification, are permitted provided that
00016  * the following conditions are met:
00017  * 
00018  * 1. Redistributions of source code must retain the above
00019  * copyright notice, this list of conditions and the
00020  * following disclaimer.
00021  * 
00022  * 2. Redistributions in binary form must reproduce the
00023  * above copyright notice, this list of conditions and the
00024  * following disclaimer in the documentation and/or other
00025  * materials provided with the distribution.
00026  * 
00027  * 3. All advertising materials mentioning features or use
00028  * of this software must display the following
00029  * acknowledgment: "This product includes software
00030  * developed by Stanley Kok, Parag Singla, Matthew
00031  * Richardson, Pedro Domingos, Marc Sumner and Hoifung
00032  * Poon in the Department of Computer Science and
00033  * Engineering at the University of Washington".
00034  * 
00035  * 4. Your publications acknowledge the use or
00036  * contribution made by the Software to your research
00037  * using the following citation(s): 
00038  * Stanley Kok, Parag Singla, Matthew Richardson and
00039  * Pedro Domingos (2005). "The Alchemy System for
00040  * Statistical Relational AI", Technical Report,
00041  * Department of Computer Science and Engineering,
00042  * University of Washington, Seattle, WA.
00043  * http://www.cs.washington.edu/ai/alchemy.
00044  * 
00045  * 5. Neither the name of the University of Washington nor
00046  * the names of its contributors may be used to endorse or
00047  * promote products derived from this software without
00048  * specific prior written permission.
00049  * 
00050  * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF WASHINGTON
00051  * AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
00052  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00053  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00054  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
00055  * OF WASHINGTON OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
00056  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00057  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00058  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00059  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
00060  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00061  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00062  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
00063  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00064  * 
00065  */
00066 #ifndef HASHARRAY_H_JUL_19_2005
00067 #define HASHARRAY_H_JUL_19_2005
00068 
00069 #include <stdlib.h>
00070 #include <assert.h>
00071 #include <iostream>
00072 #include <string>
00073 #include "random.h"
00074 using namespace std;
00075 #include <ext/hash_map>
00076 using namespace __gnu_cxx;
00077 
00078 
00079   //An Array of that does not contain duplicates. It is backed up by a hash_set
00080 template <typename Type, class HashFn, class EqualFn> 
00081 class HashArray
00082 {
00083  public:
00084   HashArray(const int& initSize=0) : items_(NULL), maxItems_(0), numItems_(0),
00085                                      map_(new hash_map<Type,int,HashFn,EqualFn>)
00086   { allocateMemory(initSize); }
00087     
00088   
00089   HashArray(const HashArray<Type, HashFn, EqualFn>& x)
00090   { 
00091     if (x.numItems_ == 0) 
00092     {
00093       items_    = NULL;
00094       maxItems_ = 0;
00095       numItems_ = 0;
00096       allocateMemory(0);
00097       map_ = new hash_map<Type, int, HashFn, EqualFn>;
00098       return;
00099     }
00100 
00101     items_ = new Type[x.numItems_];      
00102     //commented out: Type may not be a basic type and thus need deep copying
00103     //memcpy(items_, x.items_, x.numItems_*sizeof(Type));
00104     for (int i = 0; i < x.numItems_; i++) items_[i] = x.items_[i];
00105     numItems_ = x.numItems_;
00106     maxItems_ = numItems_;
00107     map_ = new hash_map<Type, int, HashFn, EqualFn>;
00108     for (int i = 0; i < numItems_; i++)
00109       (*map_)[items_[i]] = i;
00110     assert(map_->size() == (unsigned int) numItems_);
00111   }
00112 
00113 
00114     // does not delete each individual item
00115   ~HashArray() { if (items_) delete [] items_; delete map_; }
00116 
00117 
00118     // returns index of appended item or -1 if item is already in the array
00119   int append(Type item)
00120   {
00121     if (map_->find(item) != map_->end()) return -1;
00122 
00123     if (numItems_ == maxItems_) 
00124       allocateMemory(maxItems_*2+1);
00125     items_[numItems_++] = item;
00126     (*map_)[item] = numItems_-1;
00127 
00128     assert(map_->size() == (unsigned int) numItems_);
00129     return numItems_-1;
00130   }
00131 
00132 
00133   int append(Type item, const int& max)
00134   {
00135     if (map_->find(item) != map_->end()) return -1;
00136 
00137     if (numItems_ == maxItems_) 
00138     {
00139       int newNumItems = maxItems_*2+1;
00140       if (newNumItems > max) newNumItems = max;
00141       allocateMemory(newNumItems);
00142     }
00143     items_[numItems_++] = item;
00144     (*map_)[item] = numItems_-1;
00145     assert(map_->size() == (unsigned int) numItems_);
00146     return numItems_-1;
00147   }
00148 
00149 
00150 
00151     // returns the number of items appended
00152   int append(const HashArray<Type, HashFn, EqualFn>& newItems)
00153   {
00154     int n = 0;
00155     for (int i = 0; i < newItems.size(); i++)
00156       if (append(newItems[i]) >= 0) n++;
00157     return n;
00158   }
00159 
00160 
00161   int append(const HashArray<Type, HashFn, EqualFn>* const & items) 
00162   { return append(*items); }
00163 
00164 
00165     // the contents of the array are cleared (but not deleted), and populated
00166     // with those in items
00167     // returns the number of items copied
00168   int copyFrom(const HashArray<Type, HashFn, EqualFn>& items) 
00169   { 
00170     clear(); 
00171     return append(items); 
00172   }
00173 
00174 
00175   bool growToSize(const int& newSize)
00176   {
00177     if (newSize <= numItems_) return false;
00178     
00179     if (newSize <= maxItems_)
00180       numItems_ = newSize;
00181     else
00182     {
00183       allocateMemory(newSize);
00184       numItems_ = newSize;
00185     }
00186     return true;
00187   }
00188 
00189 
00190   bool growToSize(const int& newSize, Type fill)
00191   {
00192     int origNumItems = numItems_;
00193     if(!growToSize(newSize)) return false;
00194     for (int i = origNumItems; i < numItems_; i++)
00195       items_[i] = fill;
00196     return true;
00197   }
00198 
00199 
00200     // the contents in the array are not deleted
00201   void clear()  { numItems_ = 0; map_->clear(); }
00202 
00203     // the contents in the array are not deleted
00204   void clearAndCompress()  { numItems_ = 0; map_->clear(); compress(); }
00205 
00206 
00207     // delete the contents of items_ array and set numItems = 0;
00208     // items_ is not deleted,and maxItems_ stay the same
00209   void deleteItemsAndClear() 
00210   { 
00211     for (int i = 0; i < numItems_; i++)
00212       if (items_[i]) delete items_[i];
00213     clear();
00214     assert(map_->size() == (unsigned int) numItems_);
00215   }
00216 
00217     // delete the contents of items_ array and set numItems = 0;
00218     // items_ is not deleted,and maxItems_ stay the same
00219   void deleteItemsAndClearCompress() 
00220   { 
00221     for (int i = 0; i < numItems_; i++)
00222       if (items_[i]) delete items_[i];
00223     clear();
00224     assert(map_->size() == (unsigned int) numItems_);
00225     compress();
00226   }
00227 
00228 
00229   int size() const { return numItems_; }
00230 
00231   bool empty() const { return size()==0; } 
00232 
00233     // important for the return value to be const so that the array cannot
00234     // be written via [] (e.g. array[0] = anotherObject). This prevents
00235     // the indexes in map_ from being inconsistent.
00236   const Type& operator[](const int& index) const { return item(index); } 
00237 
00238 
00239   Type& lastItem() const { return items_[numItems_-1]; }
00240 
00241 
00242     // caller should not delete the returned pointer nor modify the object
00243   const Type* getItems() const { return items_; }
00244 
00245 
00246   //comment out: randomOneOf no longer a static function
00247   //Type& randomItem() const 
00248   //{
00249   //  assert(numItems_>0); 
00250   //  return items_[Random::randomOneOf(numItems_)];
00251   //}
00252   
00253 
00260   int find(Type const & item) const
00261   {
00262     typename hash_map<Type, int, HashFn, EqualFn>::iterator it=map_->find(item);
00263     if (it == map_->end()) return -1;
00264     assert((*it).second >= 0);
00265     return (*it).second;
00266   }
00267 
00268   
00269   bool contains(Type const & item) const { return find(item) >= 0;}
00270 
00271 
00272   Type removeItem(const int& index)
00273   {
00274       // move everything past the item back
00275     Type removedItem = items_[index];
00276     map_->erase(map_->find(items_[index]));
00277     for (int i = index+1; i < numItems_; i++) items_[i-1] = items_[i];
00278 
00279     //commented out: Type may not be a basic type and thus need deep copying
00280     //int numItemsToMove = numItems_ - index - 1;
00281     //if (numItemsToMove > 0)
00282     //  memmove(&items_[index], &items_[index+1], numItemsToMove*sizeof(Type));
00283 
00284     numItems_--;
00285     
00286       // subtract the indexes of the moved items by 1
00287     typename hash_map<Type, int, HashFn, EqualFn>::iterator it;
00288     for (int i = index; i < numItems_; i++)
00289     {
00290       it = map_->find(items_[i]);
00291       assert(it != map_->end());
00292       assert((*it).second-1==i);
00293       (*it).second = i;
00294     }
00295 
00296     assert(map_->size() == (unsigned int)numItems_);
00297     return removedItem;
00298   }
00299 
00300 
00301   Type removeItem(Type& item)
00302   {
00303     int idx = find(item);
00304     return removeItem(idx);
00305   }
00306   
00307 
00308   Type removeLastItem() { return removeItem(numItems_-1); }
00309 
00310 
00311     // removes the item but doesn't leave the array in the same order as before
00312   Type removeItemFastDisorder(const int& index) 
00313   {
00314     Type removedItem = items_[index];
00315     map_->erase(map_->find(items_[index]));
00316 
00317     if (numItems_ > 1 && index != numItems_-1)
00318     {
00319       map_->erase(map_->find(items_[numItems_-1]));
00320       (*map_)[items_[numItems_-1]] = index;
00321       item(index) = item(numItems_-1);   
00322     }
00323 
00324     numItems_--;
00325     assert(map_->size() == (unsigned int)numItems_);
00326     return removedItem;
00327   }
00328 
00329 
00330   Type removeItemFastDisorder(Type& item)
00331   {
00332     int idx = find(item);
00333     if (idx < 0) return NULL;
00334     return removeItemFastDisorder(idx);
00335   }
00336 
00337 
00338     // resizes the array to be exactly the number of elements in it
00339   void compress()  { if (maxItems_ > numItems_)  allocateMemory(numItems_); }
00340 
00341 
00342     // reorder randomly
00343   void shuffle() 
00344   {
00345     for (int i = 0; i < numItems_; i++) 
00346     {
00347         // swap item i with a random item
00348       int swapwith = Random::randomOneOf(numItems_);
00349       Type tmp;
00350       tmp = items_[i];
00351       items_[i] = items_[swapwith];
00352       items_[swapwith] = tmp;
00353     }
00354   }
00355 
00356 
00357     // returns the index of largest item
00358   int getMaxIndex() 
00359   {
00360     assert(numItems_ > 0);
00361     int max = 0;
00362     for (int i = 1; i < numItems_; i++) 
00363     {
00364       if (items_[i] > items_[max])
00365         max = i;
00366     }
00367     return max;
00368   }
00369 
00370 
00371     // returns the value of largest item
00372   Type getMaxValue() 
00373   {
00374     assert(numItems_ > 0);
00375     Type max = items_[0];
00376     for (int i = 1; i < numItems_; i++) 
00377     {
00378       if (items_[i] > max)
00379         max = items_[i];
00380     }
00381     return max;
00382   }
00383 
00384 
00385  private:
00386   Type& item(const int& index) const 
00387   {
00388     assert(index < numItems_);
00389     return items_[index];
00390   }
00391  
00392     // if newSize < numItems, numItems will be shrunk to newSize
00393   void allocateMemory(const int& newSize)
00394   {
00395     if (newSize > 0)
00396     {
00397       if (newSize >= numItems_)
00398       {
00399         Type* tempItems = new Type[newSize];
00400         if (numItems_ > 0)
00401         {
00402           //commented out: Type may not be a basic type & thus need deep copying
00403           //memcpy(tempItems, items_, numItems_*sizeof(Type));
00404           for (int i = 0; i < numItems_; i++) tempItems[i] = items_[i];
00405         }
00406         delete [] items_;
00407         items_ = tempItems;
00408         maxItems_ = newSize;
00409       }
00410       else  // newSize < numItems_
00411       {
00412         Type* tempItems = new Type[newSize];
00413         if (numItems_ > 0) 
00414         {
00415           //commented out: Type may not be a basic type & thus need deep copying
00416           //memcpy(tempItems, items_, newSize*sizeof(Type));
00417           for (int i = 0; i < newSize; i++) tempItems[i] = items_[i];
00418         }
00419         delete [] items_;
00420         items_ = tempItems;
00421         maxItems_ = newSize;
00422         numItems_ = newSize;
00423       }
00424     }
00425     else // newSize==0
00426     {
00427       delete [] items_;
00428       items_ = NULL;
00429       maxItems_ = 0;
00430       numItems_ = 0;
00431     }
00432   }
00433 
00434   
00435  private:
00436   Type* items_;
00437   int maxItems_;
00438   int numItems_;
00439 
00440   hash_map<Type, int, HashFn, EqualFn>* map_;
00441 };
00442 
00443 
00444 // Useful functions
00445 
00446   // between is the filler string to be used between items
00447 template <typename T, class HashFn, class EqualFn> 
00448 void writeHashArray(const HashArray<T*,HashFn,EqualFn>& array, ostream& out, 
00449                     char* between, const char& delimiter)
00450 {
00451   out << array.size() << delimiter;
00452   for (int i = 0; i < array.size(); i++)
00453   {
00454     array.item(i)->save(out);
00455     if (between) out << between;
00456   }
00457 }
00458 
00459 
00460 template <typename T, class HashFn, class EqualFn>
00461 void writeArray(HashArray<T*,HashFn,EqualFn>&  array, ostream& out, 
00462                 char* between=NULL)
00463 { writeArray(array, out, between, '\n'); }
00464 
00465 
00466 template <typename T, class HashFn, class EqualFn>
00467 void readArray(HashArray<T*,HashFn,EqualFn>& array, istream& in)
00468 {
00469   int numItems;
00470   in >> numItems;
00471   for (int i = 0; i < numItems && in.good(); i++)
00472   {
00473     T* x = new T;
00474     x->load(in);
00475     array.append(x);
00476   }
00477 }
00478 
00479 
00480 template <typename T, class HashFn, class EqualFn>
00481 void writeArray(HashArray<T,HashFn,EqualFn>& array, ostream& out, 
00482                 char* between, const char& delimiter)
00483 {
00484   out << array.Size() << delimiter;
00485   for (int i = 0; i < array.Size(); i++)  
00486   {
00487     array.item(i).save(out);
00488     if (between) out << between;
00489   }  
00490 }
00491 
00492 
00493 template <typename T, class HashFn, class EqualFn>
00494 void writeArray(HashArray<T,HashFn,EqualFn>& array, ostream& out, 
00495                 char* between=NULL)
00496 { writeArray(array, out, between, '\n'); }
00497 
00498 
00499 template <typename T, class HashFn, class EqualFn>
00500 void readArray(HashArray<T,HashFn,EqualFn>& array, istream& in) 
00501 {
00502   int numItems;
00503   in >> numItems;
00504   for (int i = 0; i < numItems && in.good(); i++) 
00505   {
00506     T x;
00507     x.load(in);
00508     array.append(x);
00509   }
00510 }
00511 
00512 
00513 template <typename T, class HashFn, class EqualFn>
00514 void writeBasicArray(HashArray<T,HashFn,EqualFn>& array, ostream& out, 
00515                      char* between, const char& delimiter)
00516 {
00517   out << array.Size() << delimiter;
00518   for (int i = 0; i < array.Size(); i++)
00519   {
00520     out << array.item(i) << " ";
00521     if (between) out << between;
00522   }
00523 }
00524 
00525 
00526 template <typename T, class HashFn, class EqualFn>
00527 void writeBasicArray(HashArray<T,HashFn,EqualFn>& array, ostream& out, 
00528                      char* between=NULL)
00529 { writeBasicArray(array, out, between, '\n'); }
00530 
00531 
00532 template <typename T, class HashFn, class EqualFn>
00533 void readBasicArray(HashArray<T,HashFn,EqualFn>& array, istream& in)
00534 {
00535   int numItems=0;
00536   in >> numItems;
00537   for (int i = 0; i < numItems && in.good(); i++)
00538   {
00539     T x;
00540     in >> x;
00541     array.append(x);
00542   }
00543   if (!in.good())
00544     cout << "ERROR: readBasicArray(). Input not good." << endl;
00545 }
00546 
00547 
00548 #endif

Generated on Tue Jan 16 05:30:05 2007 for Alchemy by  doxygen 1.5.1