Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

buildfptree.hpp

Go to the documentation of this file.
00001 
00008 #ifndef BUILDFPTREE_HPP_050413
00009 #define BUILDFPTREE_HPP_050413
00010 
00011 #include "common/linmap.cpp"
00012 #include "common/allocators.hpp"
00013 
00014   // we must define it this way because of template circular references. :(
00015 #define NONORDFP_CHILDMAP linmap
00016 
00017 
00018 namespace bracz {
00019   
00020   class nullTreeAnn {
00021   public:
00022     inline void DINLINE initTree(item_t maxitem) {
00023     }
00024     
00025     inline void DINLINE onNewTreeNode(item_t curritem,counter_t /*count*/) {
00026     }
00027 
00028     nullTreeAnn() {
00029     }
00030   }; //class nullTreeAnn
00031 
00032 
00033 
00034 /*
00035   Shows the interface which a build fp tree must comply to
00036 */
00037 class AbstractBuildTree {
00038 public:
00039   class nodeiter_t;
00040   class nodeptr_t;
00041   class node_t;
00042 
00043   class nodeptr_t {
00044   public:
00045     node_t * operator->();
00046     node_t & operator*();
00047   };
00048 
00049   class nodeiter_t {
00050   public:
00051     const std::pair<item_t,nodeptr_t> & operator*();
00052     const std::pair<item_t,nodeptr_t> * operator->();
00053     void operator++();
00054     bool operator==(const nodeiter_t &o);
00055     bool operator!=(const nodeiter_t &o);
00056   };
00057 
00058 
00059   class node_t {
00060   public:
00061     counter_t getCounter();
00062     void setCounter(counter_t c);
00063     nodeiter_t childrenBegin();
00064     nodeiter_t childrenEnd();
00065   };
00066 
00067   typedef node_t root_t;
00068 
00069   /* 
00070      example within:
00071      allocator
00072      root node
00073   */
00074   class buildtree_t {
00075   public:
00076     nodeptr_t getRoot();
00077   };
00078 
00079   /*root_t *&root,U& nodesperitem, BUILDTREEALLOC &allocator,*/
00080    void initBuildTree(buildtree_t &tree, item_t maxitem);
00081   
00082   /*
00083     CALLBACK has the following functions:
00084     void onNewTreeNode(item_t item, counter_t count);
00085     //void onIncraseCounter(item_t item, counter_t oldcount, counter_t count)
00086   */
00087   template<class T, class CALLBACK>  void addTransToTree(T &trans,size_t length,buildtree_t &tree, counter_t count, CALLBACK & cb);
00088 }; //class abstractbuildtree
00089 
00090 
00091 template <bool ENDONLY = false> class TrieBuildTree {
00092 public:
00093 
00094   class node_t;
00095 
00096       typedef NONORDFP_CHILDMAP<item_t,node_t*> childmap_t;
00097       typedef typename childmap_t::iterator nodeiter_t;
00098 
00099 
00100 
00102   class node_t {
00104     counter_t counter;
00106     NONORDFP_CHILDMAP<item_t,node_t*> children;
00107     friend class TrieBuildTree;
00108   public:
00109 
00110     inline counter_t DINLINE getCounter() {
00111       return counter;
00112     } 
00113     inline void DINLINE setCounter(counter_t c) {
00114       counter=c;
00115     }
00116     inline nodeiter_t DINLINE childrenBegin() {
00117       return children.begin();
00118     }
00119     inline nodeiter_t DINLINE childrenEnd() {
00120       return children.end();
00121     }
00122 
00123     childmap_t* getChildren() {
00124       return &children;
00125     }
00126   };//class node_t 
00127 
00128 
00129   class nodeptr_t {
00130   private:
00131     node_t *data;
00132   public:
00133     inline node_t & operator*() {
00134       return *data;
00135     }
00136     inline node_t * operator->() {
00137       return data;
00138     }
00139     inline bool operator==(const nodeptr_t &o) {
00140       return data==o.data;
00141     }
00142     inline bool operator!=(const nodeptr_t &o) {
00143       return data!=o.data;
00144     }
00145     nodeptr_t(node_t *d): data(d){}
00146   }; //class nodeptr_t
00147 
00148   class buildtree_t {
00149     node_t *root;
00150     friend class TrieBuildTree;
00151   public:
00152     inline nodeptr_t DINLINE getRoot() {
00153       return nodeptr_t(root);
00154     }
00155   };
00156 
00157 protected:
00158   typedef bracz::blockalloc<node_t,10000> nodealloc_t;
00159 
00160   nodealloc_t allocator;
00161 
00162 public:
00163 
00164   /*root_t *&root,U& nodesperitem, BUILDTREEALLOC &allocator,*/
00165   template <class CALLBACK> void initBuildTree(buildtree_t &tree, item_t maxitem, CALLBACK &cb) {
00166     tree.root = allocator.allocate();
00167     tree.root->counter=0;
00168     cb.initTree(maxitem);
00169   }
00170 
00171   template<class T, class CALLBACK>  void addTransToTree(T &trans,size_t length,buildtree_t &tree, counter_t count, CALLBACK & cb) {
00172     node_t *bnode=tree.root;
00173     if (!ENDONLY) {
00174       bnode->counter+=count;
00175     }
00176     for(size_t i=0;i<length;++i) {
00177       register item_t curritem=trans[i];
00178       node_t*& nnode=bnode->children[curritem];
00179       if (!nnode) {
00180         //we need to allocate a new node
00181         nnode=allocator.allocate();
00182         if (!ENDONLY)
00183           nnode->counter=count;
00184         else
00185           nnode->counter=0;
00186         //nodesperitem[curritem]++;
00187         cb.onNewTreeNode(curritem,count);
00188       } else {
00189         // **** remove this in production code
00190         /*if(nnode->counter==1)
00191           singlec++;*/
00192         // ****
00193         if (!ENDONLY)
00194           nnode->counter+=count;
00195       }
00196       //traverse to next node
00197       bnode=nnode;
00198     }
00199     if (ENDONLY) {
00200       bnode->counter+=count;
00201     }
00202   }
00203   
00204   
00205 
00206 }; //class TrieBuildTree ===================================================
00207 
00208 
00209 template <bool ENDONLY=false> class EndPatriciaBuildTree {
00210 protected:
00211   static const counter_t leafbit=1<<30;
00212 
00213   inline static bool DINLINE isLeaf(counter_t cnt){
00214     return (cnt & leafbit);
00215   }
00216   inline static counter_t DINLINE  origCounter(counter_t cnt) {
00217     return cnt & (~leafbit);
00218   }
00219 
00220   inline static counter_t DINLINE  counterToLeaf(counter_t cnt) {
00221     return cnt | (leafbit);
00222   }
00223 
00224   inline static counter_t DINLINE  counterToNode(counter_t cnt) {
00225     return cnt;
00226   }
00227 
00228 
00229   typedef NONORDFP_CHILDMAP<item_t,void*> childmap_t;
00230 
00231   static const int internal_node_allocsize = (sizeof(childmap_t)+sizeof(counter_t) + sizeof(uint32_t)-1 )/sizeof(uint32_t);
00232   
00233   inline static int DINLINE leafAllocSize(int itemcnt) {
00234     //assumes sizeof(item_t)==sizeof(uint32_t)
00235     if (sizeof(item_t)==sizeof(uint32_t)) {
00236       //faster calculation
00237       return ((sizeof(counter_t)+sizeof(uint32_t)-1)/sizeof(uint32_t))+1+itemcnt;
00238     }else {
00239       return ((sizeof(counter_t)+itemcnt*sizeof(item_t) + sizeof(uint32_t)-1)/sizeof(uint32_t))+1;
00240     }
00241   }
00242 
00243   class internalnode_t {
00244   public:
00245     counter_t counter;
00246     childmap_t children;
00247     void clear() {
00248       children.clear();
00249       counter=0;
00250     }
00251   }; //class internalnode_t
00252 
00253   class endnode_t {
00254   public:
00255     counter_t counter;
00256     uint32_t leafsize;
00257     item_t leafs;
00258   }; //class endnode_t
00259 
00260   //allocator for internal nodes
00261   blockalloc<internalnode_t,10*1024> nodeallocator;
00262   //allocator for leafs
00263   blockalloc<uint32_t,100*1024> rawallocator;
00264 
00265 public:
00266 
00267 
00269   class storenode_t {
00270   protected:
00272     void * data;
00273     friend class EndPatriciaBuildTree;
00274   protected:
00275     inline bool DINLINE isLeaf() const {
00276       return EndPatriciaBuildTree::isLeaf(getRawCounter());
00277     }
00278     inline counter_t DINLINE getRawCounter() const {
00279       return *reinterpret_cast<counter_t*>(data);
00280     }
00281     inline counter_t & DINLINE getRawCounter() {
00282       return *reinterpret_cast<counter_t*>(data);
00283     }
00284     inline const item_t* DINLINE getLeafsPtr() const {
00285       return &(reinterpret_cast<endnode_t*>(data)->leafs);
00286     }
00287     inline item_t* DINLINE getLeafsPtr() {
00288       return &(reinterpret_cast<endnode_t*>(data)->leafs);
00289     }
00290     inline uint32_t& DINLINE getLeafsSize() {
00291       return reinterpret_cast<endnode_t*>(data)->leafsize;
00292     }
00293 
00294     inline uint32_t DINLINE getLeafsSize() const {
00295       return reinterpret_cast<endnode_t*>(data)->leafsize;
00296     }
00297 
00298     inline childmap_t& DINLINE getChildMap() const {
00299       return reinterpret_cast<internalnode_t*>(data)->children;
00300     }
00301 
00302 
00303 
00304 
00305   public:
00306     inline counter_t DINLINE getCounter() {
00307       return origCounter(getRawCounter());
00308     }
00309 
00310     inline childmap_t * getChildren() {
00311       if (isLeaf()) {
00312         return NULL;
00313       } else {
00314         return &(getChildMap());
00315       }
00316     }
00317 
00318     explicit storenode_t(void* d): data(d){}
00319   };//class storenode_t 
00320 
00321   class nodeiter_t;
00322 
00323   class node_t: public storenode_t {
00324   protected:
00325     uint32_t idx;
00326   public:
00327     static const uint32_t npos= (uint32_t)(-1);
00328     
00329     inline counter_t DINLINE getCounter() {
00330       if (!ENDONLY) {
00331         return origCounter(this->getRawCounter());
00332       } else {
00333         if (this->isLeaf()) {
00334           if (idx>=this->getLeafsSize()) {
00335             return origCounter(this->getRawCounter());
00336           } else {
00337             return 0;
00338           }
00339         } else {
00340           return origCounter(this->getRawCounter());          
00341         }
00342       }
00343     }
00344 
00345     inline void DINLINE setCounter(counter_t c) {
00346       if (this->isLeaf()) {
00347         this->getRawCounter()=EndPatriciaBuildTree::counterToLeaf(c); 
00348       } else {
00349         this->getRawCounter()=EndPatriciaBuildTree::counterToNode(c); 
00350       }
00351     }
00352 
00353 
00354     inline nodeiter_t DINLINE childrenBegin() {
00355       if (this->isLeaf()) {
00356         if (idx<this->getLeafsSize())
00357           return nodeiter_t(this->data,idx);
00358         else
00359           return nodeiter_t(this->data,npos);
00360       } else {
00361         return nodeiter_t(this->getChildMap().begin());
00362       }
00363     }
00364     inline const nodeiter_t DINLINE childrenEnd() const {
00365       if (this->isLeaf()) {
00366         return nodeiter_t(this->data,npos);
00367       } else {
00368         return nodeiter_t(this->getChildMap().end());
00369       }
00370     }
00371     explicit node_t(void*d):storenode_t(d),idx(0) {}
00372     explicit node_t(void*d,uint32_t i):storenode_t(d),idx(i) {}
00373     //static const int internal_node_allocsize = (siz
00374   }; //class ndoe_t
00375 
00376   class nodeptr_t: public node_t {
00377   public:
00378     inline node_t & DINLINE operator*() {
00379       return *this;
00380     }
00381     inline node_t * DINLINE operator->() {
00382       return this;
00383     }
00384     explicit nodeptr_t(void *d):node_t(d) {}
00385     explicit nodeptr_t(void *d,uint32_t i):node_t(d,i) {}
00386     nodeptr_t(const node_t & n): node_t(n) {}
00387   }; //class nodeptr_t
00388   
00389 
00390   class nodeiter_t: protected nodeptr_t {
00391   protected:
00392     using node_t::npos;
00393     using node_t::idx;
00394     using storenode_t::data;
00395     childmap_t::iterator mapiter;
00396     inline bool isMapIter() const {
00397       return (!this->data);
00398     }
00399     inline bool isEndIter() const {
00400       return (this->idx==npos);
00401     }
00402   public:
00403     inline nodeiter_t& DINLINE operator++() {
00404       if (isMapIter()) {
00405         ++mapiter;
00406         return *this;
00407       } else {
00408         this->idx=npos;//set to enditer
00409         return *this;
00410       }
00411     }
00412     inline std::pair<item_t,nodeptr_t> DINLINE operator*() const {
00413       if (isMapIter()) {
00414         return std::make_pair(mapiter->first,nodeptr_t(mapiter->second));
00415       } else {
00416         return std::make_pair(this->getLeafsPtr()[idx],nodeptr_t(this->data,this->idx+1));
00417       }
00418     }
00419     inline bool DINLINE operator==(const nodeiter_t &o) {
00420       if (isMapIter()&&o.isMapIter()) {
00421         return mapiter==o.mapiter;
00422       } else {
00423         return (this->idx==o.idx) && (this->data==o.data);
00424       }
00425     }
00426     inline bool DINLINE operator!=(const nodeiter_t &o) {
00427       if (isMapIter()&&o.isMapIter()) {
00428         return mapiter!=o.mapiter;
00429       } else {
00430         return (idx!=o.idx) || (data!=o.data);
00431       }
00432     }
00433     nodeiter_t(void* d,uint32_t i):nodeptr_t(d,i){}
00434     nodeiter_t(const childmap_t::iterator & it): nodeptr_t(NULL,npos),mapiter(it){}
00435   }; 
00436 
00437 
00438 
00439   class buildtree_t {
00440     node_t root;
00441     friend class EndPatriciaBuildTree;
00442   public:
00443     inline nodeptr_t DINLINE getRoot() {
00444       return nodeptr_t(root);
00445     }
00446     buildtree_t():root(NULL) {
00447     }
00448   };
00449 
00450 protected:
00451 
00452   void* allocateInternalNode(counter_t cnt) {
00453     internalnode_t *nn=nodeallocator.allocate();
00454     nn->clear();
00455     nn->counter=counterToNode(cnt);
00456     return nn;
00457   }
00458 
00459   storenode_t allocateLeafNode(counter_t cnt,uint32_t len) {
00460     storenode_t nn(rawallocator.allocate(leafAllocSize(len)));
00461     nn.getRawCounter()=counterToLeaf(cnt);
00462     nn.getLeafsSize()=len;
00463     return nn;
00464   }
00465 
00466 public:
00467 
00468   template <class CALLBACK> void initBuildTree(buildtree_t &tree, item_t maxitem, CALLBACK &cb) {
00469     tree.root = node_t(allocateInternalNode(0));
00470     cb.initTree(maxitem);
00471   }
00472 
00473   template<class T, class CALLBACK>  void addTransToTree(T &trans,size_t length,buildtree_t &tree, counter_t count, CALLBACK & cb) {
00474     storenode_t bnode=tree.root;
00475     size_t i=0;
00476     void **nnode=NULL;//save the previous pointer
00477     //the root may not be a leaf node
00478     while ((i<length) && (!bnode.isLeaf())) {
00479       if (!ENDONLY)
00480         bnode.getRawCounter()+=count;
00481       register item_t curritem=trans[i];
00482       nnode=&(bnode.getChildMap()[curritem]);
00483       if (!*nnode) {
00484         //we need to add a new (leaf) node
00485         bnode=allocateLeafNode(count,length-i-1);
00486         *nnode=bnode.data;
00487         //nnode->counter=count;
00488         //nodesperitem[curritem]++;
00489         cb.onNewTreeNode(trans[i],count);
00490         //if (trans[i]==5) {
00491         //  log_dbg(0,"5|a|%d|%d",i,length);
00492         //}
00493         ++i;
00494         item_t *lptr=bnode.getLeafsPtr();
00495         while(i<length) {
00496           *lptr=trans[i];
00497           lptr++;
00498           cb.onNewTreeNode(trans[i],count);
00499 
00500           //if (trans[i]==5) {
00501           //  log_dbg(0,"5|b|%d|%d",i,length);
00502           //}
00503           i++;
00504         }
00505         return;
00506       } 
00507       //traverse to next node
00508       bnode=storenode_t(*nnode);
00509       ++i;
00510     }//while internal nodes
00511     if ((i==length)&&((!bnode.isLeaf())||(bnode.getLeafsSize()==0))) {
00512       //if (!bnode.isLeaf()) {
00513       //increase last counter
00514       bnode.getRawCounter()+=count;
00515     } else {
00516       //now: either i<length && bnode.isleaf
00517       // or: bnode.isleaf && bnode.leafsize>0
00518       //in both cases we have to split the current leaf node
00519       internalnode_t *newnode = nodeallocator.allocate();
00520       newnode->clear();
00521       counter_t nodectr;
00522       if (ENDONLY)
00523         nodectr=counterToNode(0);
00524       else
00525         nodectr=counterToNode(bnode.getCounter()+count);
00526       newnode->counter=nodectr;
00527       *nnode = newnode;
00528       //create common elements
00529       uint32_t lc=0;//index in the existing leaf
00530       while ((i<length)&&(lc<bnode.getLeafsSize())&&(trans[i]==bnode.getLeafsPtr()[lc])) {
00531         newnode=(internalnode_t*)((newnode->children[trans[i]])=nodeallocator.allocate());
00532         newnode->clear();
00533         newnode->counter=nodectr;
00534         ++lc;
00535         ++i;
00536       }
00537       //we did the common path. now create the distinct path
00538       if (lc<bnode.getLeafsSize()) {
00539         uint32_t ls=bnode.getLeafsSize()-lc-1;
00540         item_t li=bnode.getLeafsPtr()[lc];
00541         //shorten and attach the original leaf
00542         endnode_t *newleaf = reinterpret_cast<endnode_t*>((item_t*)bnode.data + lc+1);
00543         newleaf->counter=bnode.getRawCounter();
00544         newleaf->leafsize=ls;
00545         newnode->children[li]=newleaf;
00546       } else if (ENDONLY) {
00547         newnode->counter+=bnode.getCounter();
00548       }
00549       if (i<length) {
00550         uint32_t ls=length-i-1;
00551         item_t li=trans[i];
00552         storenode_t newleaf=allocateLeafNode(count,ls);
00553         newnode->children[li]=newleaf.data;
00554         //if (trans[i]==5) {
00555         // log_dbg(0,"5|c|%d|%d",i,length);
00556         //}
00557         ++i;
00558         cb.onNewTreeNode(li,count);
00559         uint32_t ofs=0;
00560         while(ofs<ls) {
00561           newleaf.getLeafsPtr()[ofs]=trans[i+ofs];
00562           cb.onNewTreeNode(trans[i+ofs],count);
00563           //if (trans[i+ofs]==5) {
00564           //  log_dbg(0,"5|d|%d|%d",i+ofs,length);
00565           //}
00566           ofs++;
00567         }
00568       } else if (ENDONLY) {
00569         newnode->counter+=count;
00570       }        //add new leaf node
00571     }//finish splitting
00572   }//addTransToTree
00573   
00574 
00575 }; //class EndPatriciaBuildTree
00576 
00577 
00578 }//namespace bracz
00579 
00580 
00581 #endif
00582 

Generated on Sun Sep 17 17:50:37 2006 for FIM environment by  doxygen 1.4.4