00001
00009 #include "common.hpp"
00010 #include "common/log.h"
00011 #include "io/input/transaction_reader/TransactionReader.hpp"
00012 #include "fpgrowth/buildfptree.hpp"
00013
00014
00015 static const int PREFETCHDIST=64;
00016
00017 namespace bracz {
00018
00019
00020
00021
00031 template<class INPUT,class BUILDTREE, FirstLevel FIRSTLEVEL> class NonOrdFPStructure {
00032 public:
00034 typedef uint32_t index_t;
00035
00036 typedef struct fptree_t {
00038 index_t *itemstarts;
00040 index_t *parents;
00041
00043 counter_t *freqs;
00045 counter_t *counters;
00046
00047 enum freeelement_t {
00048 free_none=0,
00049 free_data=1,
00050 free_structure=2
00051 };
00053 int freeelements;
00054
00055
00056 fptree_t() {
00057 itemstarts=NULL;
00058 counters=NULL;
00059 parents=NULL;
00060 freqs=NULL;
00061 freeelements=free_none;
00062 }
00063
00064 counter_t DINLINE getFrequency(item_t i) {
00065 return freqs[i];
00066 }
00067
00068 counter_t DINLINE getRootFrequency() {
00069 return counters[0];
00070 }
00071 } fptree_t;
00072
00073 counter_t getTransactionCount() {
00074 return transaction_count;
00075 }
00076
00077 private:
00078
00079
00081 INPUT *inp;
00082
00083 protected:
00084
00085 fptree_t fulltree;
00086
00087 std::vector<fptree_t> l1trees;
00088
00089
00090 counter_t transaction_count;
00091
00092
00094 class TreeCopy {
00095 public:
00097
00101 std::vector<index_t> nextfreenode;
00103
00110 fptree_t *target;
00111
00112 void reccopytree(TYPENAME BUILDTREE::nodeptr_t node, index_t nodeidx) {
00113 target->counters[nodeidx]=node->getCounter();
00114 for(TYPENAME BUILDTREE::nodeiter_t i=node->childrenBegin();
00115 i!=node->childrenEnd();
00116 ++i) {
00117 register index_t newidx=nextfreenode[(*i).first]++;
00118 target->parents[newidx]=nodeidx;
00119 target->freqs[(*i).first]+=(*i).second->getCounter();
00120 reccopytree(TYPENAME BUILDTREE::nodeptr_t((*i).second),newidx);
00121 }
00122 }
00123
00124 };
00125
00132 class TreeAnnotation {
00133 protected:
00134 std::vector<counter_t> nodesperitem;
00135
00136 public:
00137 inline void DINLINE initTree(item_t maxitem) {
00138 nodesperitem.resize(maxitem,0);
00139 }
00140
00141 inline void DINLINE onNewTreeNode(item_t curritem,counter_t ) {
00142 nodesperitem[curritem]++;
00143 }
00144
00145
00146 template<class BUILDTREE_> inline void DINLINE buildTreeToFinalTree(BUILDTREE_ & btr, typename BUILDTREE_::buildtree_t &tree, fptree_t &fulltree, item_t maxitem) {
00147
00148
00149 alignalloc::alloc(fulltree.itemstarts,maxitem+1);
00150 alignalloc::allocz(fulltree.freqs,maxitem);
00151 index_t last=1;
00152 for(size_t i=0;i<maxitem;++i) {
00153 fulltree.itemstarts[i]=last;
00154
00155 last+=nodesperitem[i];
00156 }
00157
00158 fulltree.itemstarts[maxitem]=last;
00159
00160
00161
00162
00163 alignalloc::alloc(fulltree.parents,last);
00164 alignalloc::alloc(fulltree.counters,last);
00165 TreeCopy tc;
00166 tc.nextfreenode.assign(fulltree.itemstarts, fulltree.itemstarts+maxitem);
00167 tc.target=&fulltree;
00168 tc.reccopytree((tree.getRoot()),0);
00169 fprintf(stderr,"[%d %d %d] ",maxitem,fulltree.counters[0],last);
00170
00171 }
00172 };
00174 void buildTree(item_t maxitem) {
00175 TreeAnnotation treeann;
00176 BUILDTREE btr;
00177 typename BUILDTREE::buildtree_t buildtree;
00178 btr.initBuildTree(buildtree,maxitem,treeann);
00179
00180 log_status(0,"Building temporary FP tree.");
00181
00182 std::vector<item_t> trans;
00183 inp->rewind();
00184
00185 transaction_count=0;
00186 while (counter_t count=inp->nextTransactionBIS(trans)) {
00187 transaction_count+=count;
00188 btr.addTransToTree(trans,trans.size(),buildtree,count,treeann);
00189 }
00190 treeann.buildTreeToFinalTree(btr,buildtree,fulltree,maxitem);
00191 }
00192
00194 void buildAllL1Trees(item_t maxitem) {
00195 BUILDTREE btr;
00196
00197 std::vector<TreeAnnotation> treeannotations;
00198 std::vector<typename BUILDTREE::buildtree_t> trees;
00199 trees.resize(maxitem);
00200 treeannotations.resize(maxitem);
00201 log_status(0,"Initializing FP tree.");
00202 for(item_t i=0;i<maxitem;++i) {
00203 btr.initBuildTree(trees[i],i,treeannotations[i]);
00204 }
00205 log_status(0,"Building temporary FP trees.");
00206
00207 std::vector<item_t> trans;
00208 inp->rewind();
00209
00210 transaction_count=0;
00211 while (counter_t count=inp->nextTransactionBIS(trans)) {
00212 transaction_count+=count;
00213 for (size_t itemidx=0;itemidx<trans.size();++itemidx) {
00214 btr.addTransToTree(trans,itemidx,trees[trans[itemidx]],count,treeannotations[trans[itemidx]]);
00215 }
00216 }
00217 l1trees.resize(maxitem);
00218 for(item_t i=0;i<maxitem;++i) {
00219 treeannotations[i].buildTreeToFinalTree(btr,trees[i],l1trees[i],i);
00220 }
00221 fprintf(stderr,"\n");
00222 }
00223
00224
00225
00226 class SimultProject {
00227
00228 typedef struct simultprojnode_t {
00229 item_t parentitem;
00230 index_t parentoffset;
00231 counter_t counter;
00232 simultprojnode_t *next;
00233 } simultprojnode_t __attribute__ ((aligned (16)));;
00234
00235 bracz::blockalloc<simultprojnode_t,100000> tempnodealloc;
00236
00237
00238 typedef struct temptrie_t {
00239 public:
00240 bracz::maxvector<simultprojnode_t*> nodes;
00241 } temptrie_t;
00242
00243
00244 std::vector<item_t> curritemset;
00245
00246
00247 bracz::maxvector<temptrie_t> temptrees;
00248
00249
00250 NonOrdFPStructure *parent;
00251
00252 class counterAddCombine {
00253 public:
00254 inline static void DINLINE combine (counter_t &dest, const counter_t &src) {
00255 dest+=src;
00256 }
00257 };
00258
00259 template <typename T, class CombineFn> class itemlist_t {
00260 private:
00261 std::vector<item_t> itemlist;
00262 T* elements;
00263 uint32_t maxitem;
00264
00265 public:
00266 itemlist_t(): elements(NULL){}
00267
00268 inline void DINLINE init(uint32_t _maxitem){
00269 maxitem=_maxitem;
00270 itemlist.reserve(maxitem);
00271 alignalloc::allocz(elements,maxitem);
00272 }
00273
00274 ~itemlist_t() {
00275 alignalloc::freeifnonnull(elements);
00276 }
00277
00278 inline T& DINLINE getItem(item_t i) {
00279 return elements[i];
00280 }
00281
00282 inline void DINLINE setItem(item_t i,T element) {
00283 if (!getItem(i)) {
00284 itemlist.push_back(i);
00285 }
00286 CombineFn::combine(elements[i],element);
00287 }
00288
00289 inline void DINLINE combine(const itemlist_t &o) {
00290 for(index_t idx=0;idx<o.itemlist.size();++idx) {
00291 item_t i=o.itemlist[idx];
00292 setItem(i,o.elements[i]);
00293 }
00294 }
00295
00296 inline void DINLINE clear() {
00297
00298 if (true) {
00299 for(index_t idx=0;idx<itemlist.size();++idx) {
00300 item_t i=itemlist[idx];
00301 elements[i]=T();
00302 }
00303 } else {
00304 alignalloc::zero(elements,maxitem);
00305 }
00306 itemlist.clear();
00307 }
00308
00309 class iterator {
00310 private:
00311 itemlist_t *parent;
00312 uint32_t idx;
00313 public:
00314 iterator(itemlist_t *p,uint32_t i):parent(p),idx(i){
00315 }
00316 iterator(const iterator &o):parent(o.parent),idx(o.idx) {
00317 }
00318 inline bool DINLINE operator==(const iterator&o) const {
00319 return (idx==o.idx)&&(parent==o.parent);
00320 }
00321 inline bool DINLINE operator!=(const iterator&o) const {
00322 return (idx!=o.idx)||(parent!=o.parent);
00323 }
00324 inline iterator& operator++() {
00325 ++idx;
00326 return *this;
00327 }
00328 inline std::pair<item_t, T> operator*() const {
00329 item_t i=parent->itemlist[idx];
00330 return std::make_pair(i,parent->elements[i]);
00331 }
00332
00333 };
00334 friend class iterator;
00335 iterator begin() {
00336 return iterator(this,0);
00337 }
00338 iterator end() {
00339 return iterator(this,itemlist.size());
00340 }
00341 };
00342
00343 typedef itemlist_t<counter_t,counterAddCombine> myitemlist_t;
00344
00345 bracz::maxvector<myitemlist_t> itemlists;
00346
00347 bracz::maxvector<simultprojnode_t**> recursenodes;
00348
00349 item_t maxitem;
00350
00351 size_t totnodecount;
00352
00353 void simultRecurse(TYPENAME BUILDTREE::nodeptr_t node) {
00354 index_t idx=curritemset.size()-1;
00355 itemlists[idx].clear();
00356
00357
00358 for(TYPENAME BUILDTREE::nodeiter_t i=node->childrenBegin();
00359 i!=node->childrenEnd();
00360 ++i) {
00361 item_t childitem = (*i).first;
00362 TYPENAME BUILDTREE::nodeptr_t childp((*i).second);
00363
00364 curritemset.push_back(childitem+1);
00365
00366
00367
00368 index_t pidx=idx;
00369 while (!recursenodes[pidx][childitem])
00370 --pidx;
00371 ++pidx;
00372 while(pidx<=idx) {
00373
00374 simultprojnode_t *newnode = tempnodealloc.allocate();
00375 totnodecount++;
00376
00377 newnode->parentitem=curritemset[pidx-1];
00378
00379 newnode->parentoffset=parent->l1trees[childitem].itemstarts[curritemset[pidx-1]]-1;
00380
00381 parent->l1trees[childitem].itemstarts[curritemset[pidx]]++;
00382
00383
00384 newnode->next=temptrees[childitem].nodes[curritemset[pidx]];
00385 temptrees[childitem].nodes[curritemset[pidx]]=newnode;
00386
00387 recursenodes[pidx][childitem]=newnode;
00388 pidx++;
00389 }
00390
00391
00392
00393 simultRecurse(childp);
00394
00395 itemlists[idx].combine(itemlists[idx+1]);
00396 itemlists[idx].setItem(childitem,(*childp).getCounter());
00397
00398 curritemset.pop_back();
00399 }
00400
00401
00402 for (typename myitemlist_t::iterator i=itemlists[idx].begin();
00403 i!=itemlists[idx].end();
00404 ++i) {
00405
00406 recursenodes[idx][(*i).first]->counter=(*i).second;
00407 recursenodes[idx][(*i).first]=NULL;
00408
00409 }
00410
00411 }
00412
00413 public:
00414 void doSimultProject(NonOrdFPStructure *par,item_t maxitem, uint32_t maxdepth, BUILDTREE &btr,typename BUILDTREE::buildtree_t &tree) {
00415 log_status(0,"Preparing simultaneous projection");
00416 parent=par;
00417 totnodecount=0;
00418 parent->l1trees.resize(maxitem);
00419 this->maxitem=maxitem;
00420 curritemset.reserve(maxdepth+1);
00421 curritemset.resize(0);
00422 curritemset.push_back(0);
00423 temptrees.resize(maxitem);
00424 itemlists.resize(maxdepth+1);
00425 recursenodes.resize(maxdepth+1);
00426 for(uint32_t i=0;i<=maxdepth;++i) {
00427 itemlists[i].init(maxitem);
00428 alignalloc::allocz(recursenodes[i],maxitem);
00429 }
00430 for (item_t i=0;i<maxitem;++i) {
00431 temptrees[i].nodes.resize(i+1,NULL);
00432
00433
00434 alignalloc::allocz(parent->l1trees[i].itemstarts,i+1);
00435 parent->l1trees[i].itemstarts[0]=1;
00436 recursenodes[0][i]=tempnodealloc.allocate();
00437 totnodecount++;
00438 recursenodes[0][i]->parentitem=0;
00439 recursenodes[0][i]->parentoffset=0;
00440 recursenodes[0][i]->next=NULL;
00441 recursenodes[0][i]->counter=0;
00442 temptrees[i].nodes[0]=recursenodes[0][i];
00443 }
00444 log_status(0,"Doing simultaneous projection");
00445 simultRecurse(tree.getRoot());
00446
00447 log_info(0,"total node count %u",totnodecount);
00448 log_status(0,"Copying temporary tries into the final containers.");
00449 for(uint32_t i=0;i<=maxdepth;++i) {
00450 alignalloc::free(recursenodes[i]);
00451 }
00452 bracz::maxvector<index_t> nodeoffsets;
00453 nodeoffsets.reserve(maxitem+2);
00454 bracz::maxvector<index_t> nextnode;
00455 nextnode.reserve(maxitem+1);
00456 for (item_t curritem=0;curritem<maxitem;++curritem) {
00457
00458 fptree_t &t=parent->l1trees[curritem];
00459 nodeoffsets.resize(curritem+2);
00460 nextnode.resize(curritem+1);
00461 nodeoffsets[0]=0;
00462 for(item_t i=1;i<=curritem+1;++i) {
00463 nodeoffsets[i]=nodeoffsets[i-1]+t.itemstarts[i-1];
00464 nextnode[i-1]=t.itemstarts[i-1]=nodeoffsets[i];
00465 }
00466
00467 size_t nodecount=t.itemstarts[curritem];
00468 alignalloc::alloc(t.parents,nodecount);
00469 alignalloc::alloc(t.counters,nodecount);
00470 alignalloc::alloc(t.freqs,curritem);
00471 for(item_t i=0;i<=curritem;++i) {
00472 counter_t allc=0;
00473 for (simultprojnode_t *cnode=temptrees[curritem].nodes[i];cnode;cnode=cnode->next) {
00474 __builtin_prefetch(cnode->next);
00475 index_t idx=--nextnode[i];
00476
00477 t.parents[idx]=nodeoffsets[cnode->parentitem]+cnode->parentoffset;
00478 allc+=(t.counters[idx]=cnode->counter);
00479 }
00480 if (i) {
00481 t.freqs[i-1]=allc;
00482
00483 } else {
00484
00485 }
00486 }
00487 }
00488 log_status(0,"Finished. Starting main recursion.");
00489 }
00490
00491 };
00492
00493 friend class SimultProject;
00494
00495 void simultProject(item_t maxitem) {
00496 bracz::nullTreeAnn treeann;
00497 BUILDTREE btr;
00498 typename BUILDTREE::buildtree_t buildtree;
00499 btr.initBuildTree(buildtree,maxitem,treeann);
00500
00501 log_status(0,"Building temporary FP tree.");
00502
00503 std::vector<item_t> trans;
00504 inp->rewind();
00505
00506 transaction_count=0;
00507 uint32_t maxdepth = 0;
00508 while (counter_t count=inp->nextTransactionBIS(trans)) {
00509 transaction_count+=count;
00510 if (trans.size()>maxdepth)
00511 maxdepth=trans.size();
00512 btr.addTransToTree(trans,trans.size(),buildtree,count,treeann);
00513 }
00514 SimultProject projctr;
00515 projctr.doSimultProject(this,maxitem,maxdepth,btr,buildtree);
00516 }
00517
00518 public:
00519
00526 NonOrdFPStructure(INPUT *_inp, item_t maxitem) {
00527 inp=_inp;
00528 switch(FIRSTLEVEL) {
00529 case FLBuildSingleTree: {
00530 buildTree(maxitem);
00531 break;
00532 }
00533 case FLBuildAllL1Trees: {
00534 buildAllL1Trees(maxitem);
00535 break;
00536 }
00537 case FLSimultProject: {
00538 simultProject(maxitem);
00539 break;
00540 }
00541 }
00542 }
00543
00544 fptree_t * getFullTree() {
00545 if (FIRSTLEVEL != FLBuildSingleTree) {
00546 log_err(0,"Requested full tree when per-item trees are calculated!");
00547 return NULL;
00548 } else
00549 return &fulltree;
00550 }
00551
00552 fptree_t * getProjTree(item_t item) {
00553 if (FIRSTLEVEL == FLBuildSingleTree) {
00554 log_err(0,"Requested per-item tree when only unconditional tree is calculated!");
00555 return NULL;
00556 } else
00557 return &(l1trees[item]);
00558 }
00559
00560
00561 ~NonOrdFPStructure() {
00562 alignalloc::freeifnonnull(fulltree.parents);
00563 alignalloc::freeifnonnull(fulltree.itemstarts);
00564 alignalloc::freeifnonnull(fulltree.counters);
00565 alignalloc::freeifnonnull(fulltree.freqs);
00566 }
00567
00568 bool DINLINE checkSinglePath(fptree_t *t, item_t curritem) {
00569 return false;
00570 }
00571
00572 template <class O_M> void DINLINE handleSinglePath(fptree_t *t, item_t curritem, O_M *out) {
00573
00574 }
00575
00576 };
00577
00597 template<class INPUT,class BUILDTREEALLOC, bool SINGLE, bool TD,
00598 bool PROJECT, bool PROJECTDELETECLOSED, bool PROJMERGENODES,
00599 FirstLevel FIRSTLEVEL, bool SPARSEAGGR> class TDNonOrdFPStructure : public NonOrdFPStructure<INPUT,BUILDTREEALLOC,FIRSTLEVEL> {
00600 public:
00601 typedef TYPENAME NonOrdFPStructure<INPUT,BUILDTREEALLOC,FIRSTLEVEL>::fptree_t fptree_t;
00602 typedef TYPENAME NonOrdFPStructure<INPUT,BUILDTREEALLOC,FIRSTLEVEL>::index_t index_t;
00603
00604 private:
00605 counter_t minsupp;
00606
00607
00608 counter_t *multiples;
00609 index_t *last;
00610
00612 index_t *projmap;
00614 index_t *projlastidx;
00615
00617 index_t *sparsenodes;
00619 index_t **nextfreesparsenode;
00620
00621
00623 blockstack<stacksingleblock<counter_t,true> > nodealloc;
00625 blockstack<stacksingleblock<counter_t,false> > headeralloc;
00627 blockstack<stacksingleblock<index_t,false> > projheaderalloc;
00629 blockstack<stacksingleblock<index_t,false> > projnodealloc;
00631 singlesalloc<fptree_t,100> treealloc;
00632
00633 using NonOrdFPStructure<INPUT,BUILDTREEALLOC,FIRSTLEVEL>::l1trees;
00634
00635 public:
00636
00637
00638 TDNonOrdFPStructure(INPUT *_inp, item_t maxitem, counter_t _minsupp) : NonOrdFPStructure<INPUT,BUILDTREEALLOC,FIRSTLEVEL>(_inp,maxitem),minsupp(_minsupp) {
00639 if (SINGLE) {
00640 alignalloc::alloc(multiples,maxitem);
00641 alignalloc::alloc(last,maxitem);
00642 }
00643 index_t maxtreesize=0;
00644 if (PROJECT||SPARSEAGGR) {
00645 if (FIRSTLEVEL!=FLBuildSingleTree) {
00646 for (item_t i=0;i<maxitem;++i) {
00647 if (l1trees[i].itemstarts[i]>maxtreesize) {
00648 maxtreesize=l1trees[i].itemstarts[i];
00649 }
00650 }
00651 } else {
00652 maxtreesize=this->fulltree.itemstarts[maxitem];
00653 }
00654 }
00655 if (PROJECT) {
00656 alignalloc::alloc(projmap,maxtreesize);
00657 alignalloc::alloc(projlastidx,maxtreesize);
00658 }
00659 if (SPARSEAGGR) {
00660 alignalloc::allocz(sparsenodes,maxtreesize);
00661 alignalloc::alloc(nextfreesparsenode,maxitem);
00662 }
00663 }
00664
00665 ~TDNonOrdFPStructure() {
00666 if (SINGLE) {
00667 alignalloc::free(multiples);
00668 alignalloc::free(last);
00669 }
00670 }
00671
00672 class SinglePathIterator {
00673 private:
00674 index_t nodeidx;
00675 item_t curritem;
00676 public:
00677 SinglePathIterator () {}
00678 SinglePathIterator(index_t _n, item_t _i):
00679 nodeidx(_n), curritem(_i) {}
00680
00681 void DINLINE increment (fptree_t *tree) {
00682 nodeidx=tree->parents[nodeidx];
00683 while (nodeidx && (tree->itemstarts[curritem]>nodeidx))
00684 curritem--;
00685 }
00686
00687 bool DINLINE atEnd(fptree_t *tree) {
00688 return (!nodeidx);
00689 }
00690
00691 item_t DINLINE getCurrItem(fptree_t *tree) {
00692 return curritem;
00693 }
00694
00695 };
00696
00698
00703 item_t DINLINE checkSinglePath(fptree_t *t, item_t curritem, item_t spdepth) {
00704 if (!SINGLE) {
00705 return 0;
00706 } else {
00707 while ((spdepth<curritem) && (multiples[spdepth]<=1))
00708 spdepth++;
00709 return spdepth;
00710 }
00711 }
00712
00713 SinglePathIterator DINLINE getSinglePathIterator(fptree_t *tree, item_t curritem) {
00714 if (!SINGLE) {
00715 log_err(0,"Error: requested SinglePathIterator when singlepath optimization is not enabled");
00716 }
00717 return SinglePathIterator(last[curritem],curritem);
00718 }
00719
00720
00721 void DINLINE zeroDataDense(fptree_t *intr, item_t curritem) {
00722
00723 alignalloc::zerola(intr->counters,intr->itemstarts[curritem]);
00724 alignalloc::zerola(intr->freqs,curritem);
00725 if (SINGLE)
00726 alignalloc::zero(multiples,curritem+1);
00727 }
00728
00729 void DINLINE zeroDataADense(fptree_t *intr, item_t curritem) {
00730
00731 if (!SPARSEAGGR)
00732 alignalloc::zero(intr->counters,intr->itemstarts[curritem]);
00733 alignalloc::zero(intr->freqs,curritem);
00734 if (SINGLE)
00735 alignalloc::zero(multiples,curritem+1);
00736 }
00737
00738 void DINLINE aggregateSparse(index_t* itemstarts,index_t *parents, counter_t *freqs, counter_t *ocounters, counter_t *ncounters, item_t curritem) {
00739 for (item_t i=0;i<curritem;++i) {
00740 nextfreesparsenode[i]=sparsenodes+itemstarts[i];
00741 }
00742 item_t paritem=curritem;
00743 for(index_t nodeidx=itemstarts[curritem];nodeidx<itemstarts[curritem+1];++nodeidx) {
00744 if (ocounters[nodeidx]) {
00745 register index_t par=parents[nodeidx];
00746 if (par && !ncounters[parents[nodeidx]]) {
00747 if (itemstarts[paritem]>par) {
00748 do {
00749 --paritem;
00750 } while (itemstarts[paritem]>par);
00751 } else {
00752 while (itemstarts[paritem+1]<=par) ++paritem;
00753 }
00754 *(nextfreesparsenode[paritem]++)=par;
00755 }
00756 ncounters[parents[nodeidx]]+=ocounters[nodeidx];
00757 }
00758 }
00759 for (item_t nextitem=curritem;nextitem>0;) {
00760 --nextitem;
00761 if (SINGLE) {
00762 multiples[nextitem]=nextfreesparsenode[nextitem]-(sparsenodes+itemstarts[nextitem]);
00763 last[nextitem]=*(nextfreesparsenode[nextitem]-1);
00764 }
00765 for(index_t *pnodeidx=sparsenodes+itemstarts[nextitem];pnodeidx!=nextfreesparsenode[nextitem];++pnodeidx) {
00766 index_t nodeidx=*pnodeidx;
00767 counter_t cnt=ncounters[nodeidx];
00768 index_t par=parents[nodeidx];
00769 freqs[nextitem]+=cnt;
00770 if (par && !ncounters[par]) {
00771 if (itemstarts[paritem]>par) {
00772 do {
00773 --paritem;
00774 } while (itemstarts[paritem]>par);
00775 } else {
00776 while (itemstarts[paritem+1]<=par) ++paritem;
00777 }
00778 *(nextfreesparsenode[paritem]++)=par;
00779 }
00780 ncounters[par]+=cnt;
00781 }
00782 }
00783 }
00784
00785 void DINLINE aggregateDense(index_t* itemstarts,index_t *parents, counter_t *freqs, counter_t *ocounters, counter_t *ncounters, item_t curritem) {
00786 #ifdef PREFETCH
00787 for(index_t nodeidx=itemstarts[curritem+1];nodeidx>itemstarts[curritem];) {
00788 --nodeidx;
00789 if (nodeidx>PREFETCHDIST)
00790 __builtin_prefetch(ncounters+parents[nodeidx-PREFETCHDIST]);
00791 ncounters[parents[nodeidx]]+=ocounters[nodeidx];
00792 }
00793 #else
00794 for(index_t nodeidx=itemstarts[curritem];nodeidx<itemstarts[curritem+1];++nodeidx) {
00795 ncounters[parents[nodeidx]]+=ocounters[nodeidx];
00796 }
00797 #endif
00798 for (item_t nextitem=curritem;nextitem>0;) {
00799 --nextitem;
00800 #ifdef PREFETCH
00801 for(index_t nodeidx=itemstarts[nextitem+1]-1;nodeidx>=itemstarts[nextitem];--nodeidx) {
00802 if (nodeidx>PREFETCHDIST)
00803 __builtin_prefetch(ncounters+parents[nodeidx-PREFETCHDIST]);
00804 #else
00805 for(index_t nodeidx=itemstarts[nextitem];nodeidx<itemstarts[nextitem+1];++nodeidx) {
00806 #endif
00807 register counter_t cnt=ncounters[nodeidx];
00808 if (cnt) {
00809 {
00810 register int par=parents[nodeidx];
00811 ncounters[par]+=cnt;
00812 }
00813 freqs[nextitem]+=cnt;
00814 if (SINGLE) {
00815 multiples[nextitem]++;
00816 last[nextitem]=nodeidx;
00817 }
00818 }
00819 }
00820 }
00821 }
00822
00823 fptree_t * DINLINE projectTree(fptree_t *intr,item_t curritem) {
00824 if (TD) {
00825 zeroDataDense(intr,curritem);
00826 aggregateDense(intr->itemstarts,intr->parents,intr->freqs,intr->counters,intr->counters,curritem);
00827 return intr;
00828 } else {
00829 fptree_t *nftree = treealloc.allocate();
00830 nftree->parents=intr->parents;
00831 nftree->itemstarts=intr->itemstarts;
00832 nftree->counters=NULL;
00833 nodealloc.pushAndGetBlock(nftree->itemstarts[curritem],nftree->counters);
00834 nftree->freqs=NULL;
00835 headeralloc.pushAndGetBlock(curritem,nftree->freqs);
00836 zeroDataADense(nftree,curritem);
00837 if (SPARSEAGGR) {
00838 aggregateSparse(nftree->itemstarts,nftree->parents,nftree->freqs,intr->counters,nftree->counters,curritem);
00839
00840 } else {
00841 aggregateDense(nftree->itemstarts,nftree->parents,nftree->freqs,intr->counters,nftree->counters,curritem);
00842 }
00843 if (PROJECT) {
00844 if (SPARSEAGGR)
00845 compactTreeSparse(nftree,curritem);
00846 else
00847 compactTree(nftree,curritem);
00848
00849 }
00850 return nftree;
00851 }
00852 }
00853
00854 inline bool DINLINE dropNode(fptree_t *t, item_t curritem, index_t nodeidx) {
00855 return (t->counters[nodeidx]==0) || (t->freqs[curritem]<minsupp) || (PROJECTDELETECLOSED&&(t->freqs[curritem]==t->getRootFrequency()));
00856 }
00857
00858
00859 inline void DINLINE compactTree(fptree_t *t, item_t projitem) {
00860 index_t nn=1;
00861 index_t *newparents;
00862 index_t *newitemstarts;
00863 projheaderalloc.pushAndGetBlock(projitem+1,newitemstarts);
00864 projnodealloc.pushAndGetBlock(t->itemstarts[projitem],newparents);
00865 projmap[0]=0;
00866 newparents[0]=0;
00867 projlastidx[0]=0;
00868 for (item_t curritem=0; curritem<projitem; ++curritem) {
00869 newitemstarts[curritem]=nn;
00870 for (index_t nodeidx=t->itemstarts[curritem];nodeidx<t->itemstarts[curritem+1];++nodeidx) {
00871 if (dropNode(t,curritem,nodeidx)) {
00872 projmap[nodeidx]=projmap[t->parents[nodeidx]];
00873 } else {
00874 index_t newidx;
00875 if (PROJMERGENODES &&
00876 ((newidx=projlastidx[projmap[t->parents[nodeidx]]])
00877 >=newitemstarts[curritem])) {
00878
00879 t->counters[newidx]+=t->counters[nodeidx];
00880 } else {
00881
00882 newidx=nn++;
00883 t->counters[newidx]=t->counters[nodeidx];
00884 newparents[newidx]=projmap[t->parents[nodeidx]];
00885 projlastidx[newparents[newidx]]=newidx;
00886 projlastidx[newidx]=0;
00887 }
00888 projmap[nodeidx]=newidx;
00889 }
00890 }
00891 if (SINGLE) {
00892 multiples[curritem]=nn-newitemstarts[curritem];
00893 last[curritem]=newitemstarts[curritem];
00894 }
00895 }
00896 newitemstarts[projitem]=nn;
00897 t->parents=newparents;
00898 t->itemstarts=newitemstarts;
00899 t->freeelements|=fptree_t::free_structure;
00900 }
00901
00902
00903 inline void DINLINE compactTreeSparse(fptree_t *t, item_t projitem) {
00904 index_t nn=1;
00905 index_t *newparents;
00906 index_t *newitemstarts;
00907 projheaderalloc.pushAndGetBlock(projitem+1,newitemstarts);
00908 projnodealloc.pushAndGetBlock(t->itemstarts[projitem],newparents);
00909 counter_t *newcounters;
00910 projnodealloc.pushAndGetBlock(t->itemstarts[projitem],newcounters);
00911 projmap[0]=0;
00912 newparents[0]=0;
00913 projlastidx[0]=0;
00914 newcounters[0]=t->counters[0];
00915 for (item_t curritem=0; curritem<projitem; ++curritem) {
00916 newitemstarts[curritem]=nn;
00917 for(index_t *pnodeidx=sparsenodes+t->itemstarts[curritem];pnodeidx!=nextfreesparsenode[curritem];++pnodeidx) {
00918 index_t nodeidx=*pnodeidx;
00919
00920 if (dropNode(t,curritem,nodeidx)) {
00921 projmap[nodeidx]=projmap[t->parents[nodeidx]];
00922 } else {
00923 index_t newidx;
00924 if (PROJMERGENODES &&
00925 ((newidx=projlastidx[projmap[t->parents[nodeidx]]])
00926 >=newitemstarts[curritem])) {
00927
00928 newcounters[newidx]+=t->counters[nodeidx];
00929 } else {
00930
00931 newidx=nn++;
00932 newcounters[newidx]=t->counters[nodeidx];
00933 newparents[newidx]=projmap[t->parents[nodeidx]];
00934 projlastidx[newparents[newidx]]=newidx;
00935 projlastidx[newidx]=0;
00936 }
00937 projmap[nodeidx]=newidx;
00938 }
00939 t->counters[nodeidx]=0;
00940 }
00941 if (SINGLE) {
00942 multiples[curritem]=nn-newitemstarts[curritem];
00943 last[curritem]=newitemstarts[curritem];
00944 }
00945 }
00946 newitemstarts[projitem]=nn;
00947 t->parents=newparents;
00948 t->itemstarts=newitemstarts;
00949 t->freeelements|=fptree_t::free_structure;
00950 t->counters[0]=0;
00951 t->counters=newcounters;
00952 nodealloc.freeBlock();
00953 }
00954
00955 void DINLINE deallocTree(fptree_t *t, fptree_t *parent, item_t projitem) {
00956
00957 if (!TD) {
00958 if (PROJECT) {
00959 projheaderalloc.freeBlock();
00960 projnodealloc.freeBlock();
00961 if (SPARSEAGGR) {
00962 projnodealloc.freeBlock();
00963 }
00964 }
00965 if (!PROJECT || !SPARSEAGGR) {
00966 nodealloc.freeBlock();
00967 }
00968 headeralloc.freeBlock();
00969 treealloc.free(t);
00970 }
00971 }
00972 };
00973
00974
00975 }