00001 #ifndef FPGrowthSelector_HPP
00002 #define FPGrowthSelector_HPP
00003
00008
00009
00010 #include "common.hpp"
00011
00012 #include "io/input/transaction_reader/SortedTransactionReader.hpp"
00013 #include "io/codec/coder/Coder.hpp"
00014 #include "io/output/NoOutput.hpp"
00015 #include "io/output/StatOutput.hpp"
00016
00017 #include "fpgrowth/core.cpp"
00018 #include "fpgrowth/nonordfp.cpp"
00019 #include "fpgrowth/classicfp.cpp"
00020 #include "fpgrowth/classicrebuildfp.cpp"
00021 #include "fpgrowth/buildfptree.hpp"
00022
00023 #include "common/allocators.hpp"
00024 #include "common/linmap.cpp"
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044 template<NEELevel NEE> struct NEE_to_String {
00045 static char* getString() {
00046 return "unknown";
00047 }
00048 };
00049
00050 template<> struct NEE_to_String<NEE_Off> {
00051 static char* getString() {
00052 return "Off";
00053 }
00054 };
00055 template<> struct NEE_to_String<NEE_Level1>{
00056 static char* getString() {
00057 return "Level1";
00058 }
00059 };
00060 template<> struct NEE_to_String<NEE_Full>{
00061 static char* getString() {
00062 return "Full";
00063 }
00064 };
00065
00066
00067 template<FirstLevel First> struct FirstLevel_to_String {
00068 static char* getString() {
00069 return "unknown";
00070 }
00071 };
00072
00073 template<> struct FirstLevel_to_String<FLBuildSingleTree> {
00074 static char* getString() {
00075 return "FL_BuildSingleTree";
00076 }
00077 };
00078 template<> struct FirstLevel_to_String<FLBuildAllL1Trees>{
00079 static char* getString() {
00080 return "FL_BuildAllL1Trees";
00081 }
00082 };
00083 template<> struct FirstLevel_to_String<FLSimultProject>{
00084 static char* getString() {
00085 return "FL_SimultProject";
00086 }
00087 };
00088
00089
00090 struct type_nonordfp_basic {};
00091 struct type_nonordfp_dense {};
00092 struct type_nonordfp_sparse {};
00093 struct type_nonordfp_nsparse {};
00094 struct type_nonordfp_classictd {};
00095 struct type_nonordfp_classicrebuild {};
00096 struct type_nonordfp_classictd_alll1trees {};
00097 struct type_nonordfp_classicrebuild_alll1trees {};
00098
00099
00100 template<class S_C, class O_M, class B_T, bool SINGLE, NEELevel NEE, bool TD,
00101 bool PROJECT, bool PROJECTMERGENODES, bool PROJECTDELETECLOSED,
00102 FirstLevel PERITEMTREE, bool SPARSEAGGR> void
00103 doFpGrowth(S_C *sorted_coder, item_t maxitem,counter_t min_supp,O_M &o_m) {
00104 log_perf(0,"Fp-growth configuration: +nonordfp (compact), %ssinglepath, %std, %sproject, %sProjMergeNodes, %sProjDeleteClosed NEE_%s, %s, BT_EndPatricia",SINGLE?"+":"-",TD?"+":"-",PROJECT?"+":"-",PROJECTMERGENODES?"+":"-",PROJECTDELETECLOSED?"+":"-",NEE_to_String<NEE>::getString(),FirstLevel_to_String<PERITEMTREE>::getString());
00105
00106 typedef bracz::TDNonOrdFPStructure<S_C, B_T ,SINGLE,TD,
00107 PROJECT,PROJECTDELETECLOSED,PROJECTMERGENODES,
00108 PERITEMTREE,SPARSEAGGR> NonOrdFpStructure;
00109
00110 NonOrdFpStructure fpstr(sorted_coder,maxitem,min_supp);
00111
00112 bracz::FPGrowthBaseNoNEE<NonOrdFpStructure,O_M,SINGLE,NEE,PERITEMTREE> fpgr;
00113
00114 fpgr.findFrequentItems(min_supp,fpstr,o_m,maxitem);
00115 }
00116
00117
00118 template<class S_C, class O_M, class B_T, bool SINGLE, NEELevel NEE, bool TD,
00119 FirstLevel PERITEMTREE> void doFpGrowthClassic
00120 (S_C *sorted_coder, item_t maxitem,counter_t min_supp,O_M &o_m) {
00121 log_perf(0,"Fp-growth configuration: +classic (node-based), %ssinglepath, %std, -project, -ProjMergeNodes, -ProjDeleteClosed NEE_%s, %s, BT_EndPatricia",SINGLE?"+":"-",TD?"+":"-",NEE_to_String<NEE>::getString(),FirstLevel_to_String<PERITEMTREE>::getString());
00122
00123 typedef bracz::ClassicFPStructure<S_C, B_T ,PERITEMTREE,TD,SINGLE
00124 > NonOrdFpStructure;
00125
00126 NonOrdFpStructure fpstr(sorted_coder,maxitem);
00127
00128 bracz::FPGrowthBaseNoNEE<NonOrdFpStructure,O_M,SINGLE,NEE,PERITEMTREE> fpgr;
00129
00130 fpgr.findFrequentItems(min_supp,fpstr,o_m,maxitem);
00131 }
00132
00133
00134 template<class S_C, class O_M, class B_T, bool SINGLE, NEELevel NEE,
00135 FirstLevel PERITEMTREE> void doFpGrowthClassicRebuild
00136 (S_C *sorted_coder, item_t maxitem,counter_t min_supp,O_M &o_m) {
00137 log_perf(0,"Fp-growth configuration: +classic (node-based), %ssinglepath, -td, +project, +ProjMergeNodes, -ProjDeleteClosed NEE_%s, %s, BT_EndPatricia",SINGLE?"+":"-",NEE_to_String<NEE>::getString(),FirstLevel_to_String<PERITEMTREE>::getString());
00138
00139 typedef bracz::ClassicRebuildFPStructure<S_C, PERITEMTREE,SINGLE
00140 > NonOrdFpStructure;
00141
00142 NonOrdFpStructure fpstr(sorted_coder,maxitem,min_supp);
00143
00144 bracz::FPGrowthBaseNoNEE<NonOrdFpStructure,O_M,SINGLE,NEE,PERITEMTREE> fpgr;
00145
00146 fpgr.findFrequentItems(min_supp,fpstr,o_m,maxitem);
00147 }
00148
00149
00150 template <class T_R, class DF_D>
00151 class FPGrowthSelector
00152 {
00153 private:
00154 typedef bracz::EndPatriciaBuildTree<false> B_T;
00155 enum EModify {
00156 EModifyNone,
00157 EModifySingleOff,
00158 EModifyNeeOff
00159 };
00160 public:
00161 FPGrowthSelector(
00162 counter_t min_supp, bool relative, double relminsupp,
00163 char* algorithm, char* input_file, counter_t nr_of_transactions,
00164 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00165 T_R& tr_reader, DF_D& df_decoder);
00166
00167 template<class S_C,class O_M, bool SINGLE, NEELevel NEE> class Phase2 {
00168 public:
00169
00170
00171
00172 static void doPhase2(S_C *sorted_coder, item_t maxitem,
00173 counter_t min_supp,O_M &o_m,
00174 type_nonordfp_basic) {
00175 doFpGrowth<S_C,O_M,B_T,SINGLE,NEE,false,false,
00176 false, false,
00177 FLBuildSingleTree,false>(sorted_coder,maxitem,min_supp,o_m);
00178 }
00179
00180 static void doPhase2(S_C *sorted_coder, item_t maxitem,
00181 counter_t min_supp,O_M &o_m,
00182 type_nonordfp_dense) {
00183 doFpGrowth<S_C,O_M,B_T,SINGLE,NEE,true,false,
00184 false, false,
00185 FLBuildSingleTree,false>(sorted_coder,maxitem,min_supp,o_m);
00186 }
00187
00188 static void doPhase2(S_C *sorted_coder, item_t maxitem,
00189 counter_t min_supp,O_M &o_m,
00190 type_nonordfp_sparse) {
00191 doFpGrowth<S_C,O_M,B_T,SINGLE,NEE,false,true,
00192 true, false,
00193 FLSimultProject,false>(sorted_coder,maxitem,min_supp,o_m);
00194 }
00195 static void doPhase2(S_C *sorted_coder, item_t maxitem,
00196 counter_t min_supp,O_M &o_m, type_nonordfp_nsparse) {
00197 doFpGrowth<S_C,O_M,B_T,SINGLE,NEE,false,true,
00198 true, false,
00199 FLSimultProject,true>(sorted_coder,maxitem,min_supp,o_m);
00200 }
00201 static void doPhase2(S_C *sorted_coder, item_t maxitem,
00202 counter_t min_supp,O_M &o_m,
00203 type_nonordfp_classictd) {
00204 doFpGrowthClassic<S_C,O_M,B_T,SINGLE,NEE,true,
00205 FLBuildSingleTree>(sorted_coder,maxitem,min_supp,o_m);
00206 }
00207
00208 static void doPhase2(S_C *sorted_coder, item_t maxitem,
00209 counter_t min_supp,O_M &o_m,
00210 type_nonordfp_classictd_alll1trees) {
00211 doFpGrowthClassic<S_C,O_M,B_T,SINGLE,NEE,true,
00212 FLBuildAllL1Trees>(sorted_coder,maxitem,min_supp,o_m);
00213 }
00214
00215 static void doPhase2(S_C *sorted_coder, item_t maxitem,
00216 counter_t min_supp,O_M &o_m,
00217 type_nonordfp_classicrebuild) {
00218 doFpGrowthClassicRebuild<S_C,O_M,B_T,SINGLE,NEE,
00219 FLBuildSingleTree>(sorted_coder,maxitem,min_supp,o_m);
00220 }
00221 static void doPhase2(S_C *sorted_coder, item_t maxitem,
00222 counter_t min_supp,O_M &o_m,
00223 type_nonordfp_classicrebuild_alll1trees) {
00224 doFpGrowthClassicRebuild<S_C,O_M,B_T,SINGLE,NEE,
00225 FLBuildAllL1Trees>(sorted_coder,maxitem,min_supp,o_m);
00226 }
00227
00228
00229 };
00230
00231 template<class S_C,class O_M, class Type>
00232 void Phase1(S_C *sorted_coder, item_t maxitem,counter_t min_supp,
00233 O_M &o_m, Type t, EModify mod) {
00234 switch(mod) {
00235 case EModifyNone: {
00236 Phase2<S_C,O_M,true,NEE_Full>::doPhase2(sorted_coder,maxitem,
00237 min_supp,o_m,t);
00238 break;
00239 }
00240 #if 1
00241 case EModifySingleOff: {
00242 Phase2<S_C,O_M,false,NEE_Full>::doPhase2(sorted_coder,maxitem,
00243 min_supp,o_m,t);
00244 break;
00245 }
00246 case EModifyNeeOff: {
00247 Phase2<S_C,O_M,true,NEE_Off>::doPhase2(sorted_coder,maxitem,
00248 min_supp,o_m,t);
00249 break;
00250 }
00251 #endif
00252 default:
00253 log_err(0,"Unknown (or disabled) modification requested.");
00254 }
00255 }
00256
00257
00258 };
00259
00260
00261
00262
00263
00264
00265
00266
00267 template <class T_R, class DF_D>
00268 FPGrowthSelector<T_R, DF_D> ::FPGrowthSelector(
00269 counter_t min_supp, bool relative, double relminsupp,
00270 char* algorithm, char* input_file, counter_t nr_of_transactions,
00271 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00272 T_R& tr_reader, DF_D& df_decoder)
00273 {
00274 typedef SortedTransactionReader<Coder<T_R, DF_D>, true> S_C;
00275 typename S_C::params_t par_c;
00276 par_c.file_name = input_file;
00277 par_c.mode=FileReprBase::READ;
00278 par_c.largest_item = tr_reader.getLargestItem();
00279 par_c.decoder = &df_decoder;
00280 par_c.freq_items_with_counters = &freq_items_with_counters;
00281 par_c.codemode = DESC;
00282 S_C sorted_coder(&par_c);
00283 item_t maxitem=freq_items_with_counters.size();
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294 #if 0
00295 #if 0
00296 typedef bracz::NonOptDFOutput NEWOM;
00297 #else
00298 typedef NoOutput<true> NEWOM;
00299 #endif
00300 NEWOM::params_t par2;
00301 NEWOM o_m(&par2);
00302 #else
00303 typedef DF_D NEWOM;
00304 #define o_m df_decoder
00305 #endif
00306
00307 log_info(0,"Min supp %d, Freq item count %d, trans count %d",
00308 min_supp,maxitem,nr_of_transactions);
00309
00310
00311
00312
00313 char *kind=algorithm+9;
00314 char *sub=strchr(kind,'_');
00315 if (!sub) sub=kind+strlen(kind);
00316 std::string skind(kind, sub-kind);
00317 std::string ssub(sub);
00318
00319 log_info(0,"kind >>%s<< sub >>%s<<",skind.c_str(),ssub.c_str());
00320
00321 EModify mod=EModifyNone;
00322
00323 if (ssub=="") {
00324 mod=EModifyNone;
00325 } else if (ssub=="_neeoff") {
00326 mod=EModifyNeeOff;
00327 log_info(0,"requested modify nee off");
00328 } else if (ssub=="_-single") {
00329 mod=EModifySingleOff;
00330 log_info(0,"requested modify singlepath off");
00331 }
00332
00333 if ((skind=="")||(skind=="-nonordfp-basic")) {
00334 log_info(0,"Using standard nonordfp.");
00335 Phase1(&sorted_coder,maxitem,min_supp,o_m,type_nonordfp_basic(),mod);
00336 } else if (skind=="-nonordfp-dense") {
00337 log_info(0,"requested dense nonordfp.");
00338 Phase1(&sorted_coder,maxitem,min_supp,o_m,type_nonordfp_dense(),mod);
00339 } else if (skind=="-nonordfp-sparse") {
00340 log_info(0,"requested sparse nonordfp.");
00341 Phase1(&sorted_coder,maxitem,min_supp,o_m,type_nonordfp_sparse(),mod);
00342 } else if (skind=="-nonordfp-nsparse") {
00343 log_info(0,"requested sparse nonordfp.");
00344 Phase1(&sorted_coder,maxitem,min_supp,o_m,type_nonordfp_nsparse(),mod);
00345 } else if (skind=="-classic-td") {
00346 log_info(0,"requested classic td.");
00347 Phase1(&sorted_coder,maxitem,min_supp,o_m,type_nonordfp_classictd(),mod);
00348 } else if (skind=="-classic-td-l1") {
00349 log_info(0,"requested classic td fp-growth with all l1 trees built.");
00350 Phase1(&sorted_coder,maxitem,min_supp,o_m,type_nonordfp_classictd_alll1trees(),mod);
00351 } else if (skind=="-classic-rebuild") {
00352 log_info(0,"requested classic rebuild fp-growth (note: buildtree is ignored).");
00353 Phase1(&sorted_coder,maxitem,min_supp,o_m,type_nonordfp_classicrebuild(),mod);
00354 } else if (skind=="-classic-rebuild-l1") {
00355 log_info(0,"requested classic rebuild fp-growth with all l1 trees built (note: buildtree is ignored).");
00356 Phase1(&sorted_coder,maxitem,min_supp,o_m,type_nonordfp_classicrebuild_alll1trees(),mod);
00357 }else {
00358 log_err(0,"unknown basic type >>%s<< requested.",skind.c_str());
00359 }
00360
00361
00362 }
00363
00364 #endif