00001
00009 #include "common.hpp"
00010
00011 #include <stack>
00012 #include <vector>
00013
00014 #include "common/log.h"
00015 #include "common/maxvector.cpp"
00016
00017 namespace bracz {
00018
00034 template<class STRUCTURE, class OUTPUT>
00035 class FPGrowthBase {
00036 protected:
00037
00038 typedef TYPENAME STRUCTURE::fptree_t fptree_t;
00039
00041 typedef struct {
00043 item_t nextitem;
00045 item_t lastitem;
00047 fptree_t *fptree;
00049 item_t singlepdepth;
00050 } tdrecurse_t;
00051
00052 public:
00064 void findFrequentItems(const counter_t minsupp, STRUCTURE &str, OUTPUT &out, item_t maxitem) {}
00065
00066 };
00067
00068
00069 template<class STRUCTURE, class OUTPUT, bool SINGLEPATH, NEELevel NEE, FirstLevel FIRSTLEVEL> class FPGrowthBaseNoNEE : public FPGrowthBase<STRUCTURE,OUTPUT> {
00070 protected:
00071 typedef TYPENAME FPGrowthBase<STRUCTURE,OUTPUT>::tdrecurse_t tdrecurse_t;
00072 public:
00073 #define VECTOR bracz::maxvector
00074
00075 FPGrowthBaseNoNEE(){}
00076
00077
00078 VECTOR<item_t> curritemset;
00079 VECTOR<item_t> neelist;
00080 VECTOR<counter_t> neecounts;
00081
00082
00083
00084 size_t numfreq;
00085
00086 size_t numfreqrec;
00087
00088 size_t singleheader;
00089 size_t singlepart;
00090 size_t singlebody;
00091
00092 counter_t minsupp;
00093
00094 item_t maxitem;
00095
00096
00097 OUTPUT *out;
00098
00099 STRUCTURE *str;
00100
00102 TYPENAME STRUCTURE::fptree_t *singleptree;
00104 counter_t cfreq;
00105
00106 private:
00107 inline void DINLINE outputPush(item_t item) {
00108 if (OUTPUT::isDFO()) {
00109 out->pushItem(item);
00110 } else {
00111 curritemset.push_back(item);
00112 }
00113 }
00114
00115 inline void DINLINE outputPop() {
00116 if (OUTPUT::isDFO()) {
00117 out->popItem();
00118 } else {
00119 curritemset.pop_back();
00120 }
00121 }
00122
00123 inline void DINLINE outputWriteLow(counter_t supp) {
00124 numfreq++;
00125 if (OUTPUT::isDFO()) {
00126 out->write(supp);
00127 } else {
00128 out->writeItemsetAndCounter(curritemset.begin(),curritemset.end(),supp);
00129 }
00130 }
00131
00133 counter_t neesupp;
00134
00135 void neeRecurse(size_t neeidx) {
00136
00137 while(neeidx<neelist.size()) {
00138 if (OUTPUT::isDFO()) {
00139 numfreq++;
00140 out->pushItemWithPrevSupport(neelist[neeidx]);
00141 } else {
00142 outputPush(neelist[neeidx]);
00143 outputWriteLow(neesupp);
00144 }
00145 neeRecurse(++neeidx);
00146 outputPop();
00147 }
00148 }
00149
00150
00151 inline void DINLINE outputWrite(counter_t supp) {
00152 outputWriteLow(supp);
00153 if (NEE==NEE_Full) {
00154 neesupp=supp;
00155 #ifdef STATS_NEECOUNT
00156 neecounts[neelist.size()]++;
00157 #endif
00158 neeRecurse(0);
00159 }
00160 }
00161
00162
00163 inline void DINLINE outputPushAndWrite(item_t item, counter_t supp) {
00164 outputPush(item);
00165 outputWrite(supp);
00166 }
00167
00168 public:
00169
00170 void singlePRecurse(TYPENAME STRUCTURE::SinglePathIterator itop) {
00171 while(itop.increment(singleptree),!itop.atEnd(singleptree)) {
00172 outputPushAndWrite(itop.getCurrItem(singleptree),cfreq);
00173
00174 singlebody++;
00175 singlePRecurse(itop);
00176 }
00177 outputPop();
00178 }
00179
00180 void freqRecurse(tdrecurse_t stop) {
00181 counter_t rootfreq=stop.fptree->getRootFrequency();
00182 if (NEE==NEE_Full) {
00183 for (item_t curritem=stop.lastitem;curritem>(SINGLEPATH?stop.singlepdepth:0);) {
00184 --curritem;
00185 if (stop.fptree->getFrequency(curritem)==rootfreq) {
00186
00187 neelist.push_back(curritem);
00188 }
00189 }
00190 }
00191 outputWrite(rootfreq);
00192 for (item_t curritem=0;curritem<stop.lastitem;++curritem) {
00193 if (stop.lastitem==maxitem) {
00194 fprintf(stderr,"%d ",curritem);
00195 }
00196 counter_t cfreq=stop.fptree->getFrequency(curritem);
00197 if (cfreq < minsupp) {
00198 continue;
00199 } else if (SINGLEPATH && curritem < stop.singlepdepth) {
00200 singlepart++;
00201 outputPushAndWrite(curritem,cfreq);
00202
00203 this->cfreq=cfreq;
00204 this->singleptree=stop.fptree;
00205 singlePRecurse(str->getSinglePathIterator(stop.fptree,curritem));
00206
00207 continue;
00208 } else if ((NEE==NEE_Full) && (cfreq==rootfreq)) {
00209
00210 neelist.pop_back();
00211 continue;
00212 }
00213 numfreqrec++;
00214 outputPush(curritem);
00215 tdrecurse_t nr;
00216 nr.nextitem=0;
00217 nr.lastitem=curritem;
00218 if ((NEE==NEE_Level1) && (cfreq==rootfreq)) {
00219 nr.fptree=stop.fptree;
00220 nr.singlepdepth=stop.singlepdepth;
00221 freqRecurse(nr);
00222 } else {
00223 nr.fptree=str->projectTree(stop.fptree,curritem);
00224
00225 if (cfreq!=nr.fptree->getRootFrequency()) {
00226 log_err(0,"Inconsistency: cfreq %u cond tree freq %u",cfreq,nr.fptree->getRootFrequency());
00227 }
00228 nr.singlepdepth=str->checkSinglePath(nr.fptree,curritem,stop.singlepdepth);
00229 freqRecurse(nr);
00230 str->deallocTree(nr.fptree, stop.fptree, curritem);
00231 }
00232 outputPop();
00233 }
00234 }
00235
00247 void NOINLINE findFrequentItems(const counter_t minsupp, STRUCTURE &str, OUTPUT &out, item_t maxitem) {
00248 curritemset.reserve(maxitem);
00249 neelist.reserve(maxitem);
00250 #ifdef STATS_NEECOUNT
00251 neecounts.reserve(maxitem);
00252 for (item_t i=0;i<maxitem;++i)
00253 neecounts[i]=0;
00254 #endif
00255 this->minsupp=minsupp;
00256 this->out=&out;
00257 this->str=&str;
00258
00259 numfreq=0;
00260 numfreqrec=0;
00261
00262 singleheader=0;
00263 singlepart=0;
00264 singlebody=0;
00265 this->maxitem=maxitem;
00266 tdrecurse_t globtree;
00267
00268 if (FIRSTLEVEL!=FLBuildSingleTree) {
00269 out.write(str.getTransactionCount());
00270 for (item_t curritem=0;curritem<maxitem;++curritem) {
00271 fprintf(stderr,"%d ",curritem);
00272 globtree.nextitem=0;
00273 globtree.singlepdepth=0;
00274 globtree.lastitem=curritem;
00275 globtree.fptree=str.getProjTree(curritem);
00276 outputPush(curritem);
00277 freqRecurse(globtree);
00278 outputPop();
00279 }
00280 } else {
00281 globtree.nextitem=0;
00282 globtree.singlepdepth=0;
00283 globtree.lastitem=maxitem;
00284 globtree.fptree=str.getFullTree();
00285 freqRecurse(globtree);
00286 }
00287 fprintf(stderr,"\n");
00288 log_info(0,"core recursion ended. numfreq rec=%zu, single header=%zu, part=%zu, body=%zu",numfreqrec,singleheader,singlepart,singlebody);
00289 log_info(0,"Total frequent itemsets returned: %zu",numfreq);
00290 }
00291
00292 ~FPGrowthBaseNoNEE() {
00293 #ifdef STATS_NEECOUNT
00294 for (item_t i=0;i<maxitem;++i) {
00295 fprintf(stderr,"length %d, count %u\n",i,neecounts[i]);
00296 }
00297 #endif
00298 }
00299
00300 };
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333 }