00001
00009 #include "common.hpp"
00010 #include "common/log.h"
00011 #include "io/input/transaction_reader/TransactionReader.hpp"
00012 #include "fpgrowth/buildfptree.hpp"
00013
00014
00015 namespace bracz {
00016
00018 typedef struct bnode_t {
00019 bnode_t *parent;
00020 item_t item;
00021 counter_t counter;
00022 bnode_t *headerlink;
00023 bnode_t *firstchild;
00024 bnode_t *nextsibling;
00025 } bnode_t;
00026
00027
00028 template<class INPUT, FirstLevel FIRSTLEVEL, bool SINGLE> class ClassicRebuildFPStructure {
00029 private:
00030
00031 public:
00033 typedef struct fptree_t {
00034 counter_t *freqs;
00035 bnode_t **headertable;
00036 bnode_t *root;
00037 counter_t DINLINE getFrequency(item_t i) {
00038 return freqs[i];
00039 }
00040
00041 counter_t DINLINE getRootFrequency() {
00042 return root->counter;
00043 }
00044 } fptree_t;
00045
00046
00047 counter_t getTransactionCount() {
00048 return transaction_count;
00049 }
00050
00051 private:
00052
00054 INPUT *inp;
00055
00056 protected:
00057
00058 counter_t minsupp;
00059 fptree_t fulltree;
00060
00061 std::vector<fptree_t> l1trees;
00062
00063
00064 counter_t transaction_count;
00065
00067 singleualloc<bnode_t, 10*1024> nodeallocator;
00069 singlesalloc<fptree_t,100> treealloc;
00070
00071
00072 typedef blockstack<stackmultiblock<bnode_t*,false,stacksingleblock<counter_t,false> > > treecontentalloc_t;
00073
00075 treecontentalloc_t treecontentalloc;
00076
00077
00078 template<class ARR> void addTransToTree(const ARR &trans, size_t len, fptree_t *tree, counter_t count) {
00079 bnode_t *bnode=tree->root;
00080 bnode->counter+=count;
00081 for(size_t i=0;i<len;++i) {
00082 register item_t curritem=trans[i];
00083 bnode_t** rnode=&(bnode->firstchild);
00084 bnode_t * nnode=*rnode;
00085 tree->freqs[curritem]+=count;
00086 while (nnode && nnode->item<curritem) {
00087 rnode=&(nnode->nextsibling);
00088 nnode=nnode->nextsibling;
00089 }
00090 if (!nnode || (nnode->item>curritem)) {
00091
00092 nnode=nodeallocator.allocate();
00093 nnode->parent=i?bnode:NULL;
00094 nnode->item=curritem;
00095 nnode->counter=count;
00096 nnode->headerlink=tree->headertable[curritem];
00097 tree->headertable[curritem]=nnode;
00098
00099 nnode->nextsibling=*rnode;
00100 *rnode=nnode;
00101 nnode->firstchild=NULL;
00102
00103 } else {
00104 nnode->counter+=count;
00105 }
00106
00107 bnode=nnode;
00108 }
00109 }
00110
00111 void DINLINE inittree(fptree_t &fulltree) {
00112 fulltree.root=nodeallocator.allocate();
00113 fulltree.root->item=-1;
00114 fulltree.root->parent=NULL;
00115 fulltree.root->firstchild=NULL;
00116 fulltree.root->counter=0;
00117 fulltree.root->headerlink=NULL;
00118 fulltree.root->nextsibling=NULL;
00119 }
00120
00121 void DINLINE allocatetree(fptree_t &fulltree, item_t maxitem) {
00122 alignalloc::allocz(fulltree.freqs,maxitem);
00123 alignalloc::allocz(fulltree.headertable,maxitem);
00124 inittree(fulltree);
00125 }
00126
00128 void buildTree(item_t maxitem) {
00129 log_status(0,"Building unconditional FP tree.");
00130
00131 std::vector<item_t> trans;
00132 inp->rewind();
00133
00134 transaction_count=0;
00135 allocatetree(fulltree,maxitem);
00136 while (counter_t count=inp->nextTransactionBIS(trans)) {
00137 transaction_count+=count;
00138 addTransToTree(trans,trans.size(),&fulltree,count);
00139 }
00140 }
00141
00142
00144 void buildAllL1Trees(item_t maxitem) {
00145 log_status(0,"Initializing FP tree.");
00146 l1trees.resize(maxitem);
00147 for(item_t i=0;i<maxitem;++i) {
00148 allocatetree(l1trees[i],i);
00149 }
00150 log_status(0,"Building 1st conditional FP trees.");
00151
00152 std::vector<item_t> trans;
00153 inp->rewind();
00154
00155 transaction_count=0;
00156 while (counter_t count=inp->nextTransactionBIS(trans)) {
00157 transaction_count+=count;
00158 for (size_t itemidx=0;itemidx<trans.size();++itemidx) {
00159 addTransToTree(trans,itemidx,&l1trees[trans[itemidx]],count);
00160 }
00161 }
00162 }
00163
00164 public:
00165 class SinglePathIterator {
00166 private:
00167 bnode_t *c;
00168 public:
00169 void DINLINE increment(fptree_t *) {
00170 c=c->parent;
00171 }
00172 bool DINLINE atEnd(fptree_t *) {
00173 return !c;
00174 }
00175 item_t DINLINE getCurrItem(fptree_t *) {
00176 return c->item;
00177 }
00178 SinglePathIterator(bnode_t *_c): c(_c){}
00179 };
00180
00181 SinglePathIterator getSinglePathIterator(fptree_t *t,item_t curritem) {
00182 return SinglePathIterator(t->headertable[curritem]);
00183 }
00184
00192 ClassicRebuildFPStructure(INPUT *_inp, item_t maxitem, counter_t _minsupp):inp(_inp),minsupp(_minsupp) {
00193 temptransaction=new item_t[maxitem];
00194 switch(FIRSTLEVEL) {
00195 case FLBuildSingleTree: {
00196 buildTree(maxitem);
00197 break;
00198 }
00199 case FLBuildAllL1Trees: {
00200 buildAllL1Trees(maxitem);
00201 break;
00202 }
00203 case FLSimultProject: {
00204 log_err(0,"Simultaneous projection is not implemented in ClassicFpTree yet.");
00205 exit(1);
00206 break;
00207 }
00208 }
00209 }
00210
00211 ~ClassicRebuildFPStructure() {
00212 delete [] temptransaction;
00213 }
00214
00215 fptree_t * getFullTree() {
00216 if (FIRSTLEVEL != FLBuildSingleTree) {
00217 log_err(0,"Requested full tree when per-item trees are calculated!");
00218 return NULL;
00219 } else
00220 return &fulltree;
00221 }
00222
00223 fptree_t * getProjTree(item_t item) {
00224 if (FIRSTLEVEL == FLBuildSingleTree) {
00225 log_err(0,"Requested per-item tree when only unconditional tree is calculated!");
00226 return NULL;
00227 } else
00228 return &(l1trees[item]);
00229 }
00230
00231
00232 item_t DINLINE checkSinglePath(fptree_t *t, item_t curritem, item_t spdepth) {
00233 if (!SINGLE) {
00234 return 0;
00235 } else {
00236 while ((spdepth<curritem) && ((!t->headertable[spdepth]) || (!t->headertable[spdepth]->headerlink)))
00237 spdepth++;
00238 return spdepth;
00239 }
00240 }
00241
00242 template <class O_M> void DINLINE handleSinglePath(fptree_t *t, item_t curritem, O_M *out) {
00243
00244 }
00245
00246 void DINLINE zeroDataDense(fptree_t *intr, item_t curritem) {
00247
00248 alignalloc::zerola(intr->freqs,curritem);
00249
00250 for (item_t i=0;i<curritem;++i) {
00251 for(bnode_t *cnode=intr->headertable[i];cnode;cnode=cnode->headerlink) {
00252 cnode->counter=0;
00253 }
00254 }
00255 }
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271 item_t* temptransaction;
00272
00273 fptree_t * DINLINE projectTree(fptree_t *intr,item_t curritem) {
00274
00275
00276
00277
00278
00279 fptree_t *nftree = treealloc.allocate();
00280 pair<bnode_t**, counter_t*> als;
00281 treecontentalloc.pushAndGetBlock(curritem,als);
00282 nftree->freqs=als.second;
00283 alignalloc::zero(nftree->freqs,curritem);
00284 nftree->headertable=als.first;
00285 alignalloc::zero(nftree->headertable,curritem);
00286 inittree(*nftree);
00287
00288 for (bnode_t *cnode=intr->headertable[curritem];cnode;cnode=cnode->headerlink) {
00289 counter_t c=cnode->counter;
00290 bnode_t *nnode=cnode->parent;
00291 item_t *ptr=temptransaction+curritem;
00292 while (nnode) {
00293 if (intr->freqs[nnode->item]>=minsupp) {
00294 --ptr;
00295 *ptr=nnode->item;
00296 }
00297 nnode=nnode->parent;
00298 }
00299 addTransToTree(ptr,temptransaction+curritem-ptr,nftree,c);
00300 }
00301
00302 return nftree;
00303 }
00304
00305
00306 void DINLINE deallocTree(fptree_t *t, fptree_t *parent, item_t projitem) {
00307 for (item_t i=0;i<projitem;++i) {
00308 for(bnode_t *cnode=t->headertable[i];cnode;cnode=cnode->headerlink) {
00309 nodeallocator.free(cnode);
00310 }
00311 }
00312 nodeallocator.free(t->root);
00313 treecontentalloc.freeBlock();
00314 treealloc.free(t);
00315 }
00316
00317 };
00318
00319
00320 }