00001 #ifndef RoutingSelector_HPP
00002 #define RoutingSelector_HPP
00003
00004 #include "io/input/transaction_reader/SortedTransactionReader.hpp"
00005 #include "io/input/transaction_reader/OrderReverser.hpp"
00006 #include "io/db_cache/BuildTreeDBCache.hpp"
00007 #include "io/codec/coder/Coder.hpp"
00008 #include "io/db_cache/BuildTreeDBCache.hpp"
00009 #include "util/Frequent2Filter.cpp"
00010
00011 #include "util/StreamParser.hpp"
00012 #include "common/allocators.hpp"
00013
00014 #include "apriori/bodon/trie/trie_manipulators/FrequentItemInserter.hpp"
00015 #include "apriori/bodon/trie/trie_manipulators/FrequentPairInserterNoprune.hpp"
00016
00017 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/CandGenInfreqRemoveNopruneMerge.hpp"
00018
00019 #include "apriori/OneByOneSupportCounter.hpp"
00020 #include "apriori/bodon/trie/trie_manipulators/support_counter/SupportCounterMerge.hpp"
00021
00022
00023 #include "apriori/bodon/trie/trie_manipulators/support_counter/SupportCounterLookupTrans.hpp"
00024 #include "apriori/bodon/trie/trie_manipulators/support_counter/SupportCounterLookupTransLinBin.hpp"
00025 #include "apriori/bodon/trie/trie_manipulators/support_counter/SupportCounterLookupTransOffsetBitVec.hpp"
00026 #include "apriori/bodon/trie/trie_manipulators/support_counter/SupportCounterLookupTransOffsetIndex.hpp"
00027 #include "apriori/bodon/trie/trie_manipulators/support_counter/SupportCounterLookupEdge.hpp"
00028 #include "apriori/bodon/trie/trie_manipulators/support_counter/SupportCounterLookupEdgePrevMem.hpp"
00029 #include "apriori/bodon/trie/trie_manipulators/support_counter/SupportCounterHybridSimple.hpp"
00030 #include "apriori/Apriori.hpp"
00031
00032 #include "apriori/bodon/dynamic_trie/trie_manipulators/FrequentItemInserter.hpp"
00033 #include "apriori/bodon/dynamic_trie/trie_manipulators/FrequentPairInserterNoprune.hpp"
00034 #include "apriori/bodon/dynamic_trie/trie_manipulators/SupportCounter.hpp"
00035 #include "apriori/bodon/dynamic_trie/trie_manipulators/CandGenInfreqRemoveNopruneMerge.hpp"
00036
00037 template <class TRIE, class LEAF_WC, class LEAF_ALLOCATOR, class T_R, class DF_D>
00038 class RoutingSelector
00039 {
00040 public:
00041 RoutingSelector(
00042 counter_t min_supp, char* routing, char* input_file,
00043 counter_t nr_of_transactions,
00044 std::vector< std::pair<counter_t, item_t> >&
00045 freq_items_with_counters,
00046 T_R& tr_reader, DF_D& df_decoder);
00047
00048 private:
00049 std::vector< std::pair<counter_t, std::pair<item_t, item_t> > >
00050 freq_pairs_with_counters;
00051 };
00052
00053 template <class TRIE, class LEAF_WC, class LEAF_ALLOCATOR, class T_R, class DF_D>
00054 class RoutingSelectorOffset
00055 {
00056
00057 public:
00058 RoutingSelectorOffset(
00059 counter_t min_supp, char* routing, char* input_file,
00060 counter_t nr_of_transactions,
00061 std::vector< std::pair<counter_t, item_t> >&
00062 freq_items_with_counters,
00063 T_R& tr_reader, DF_D& df_decoder);
00064
00065 private:
00066 std::vector< std::pair<counter_t, std::pair<item_t, item_t> > >
00067 freq_pairs_with_counters;
00068 };
00069 template <class TRIE_OEL, class TRIE_OI, class LEAF_WC, class LEAF_ALLOCATOR, class T_R, class DF_D>
00070 class RoutingSelectorHybrid
00071 {
00072
00073 public:
00074 RoutingSelectorHybrid(
00075 counter_t min_supp, char* routing, char* input_file,
00076 counter_t nr_of_transactions,
00077 std::vector< std::pair<counter_t, item_t> >&
00078 freq_items_with_counters,
00079 T_R& tr_reader, DF_D& df_decoder);
00080
00081 private:
00082 std::vector< std::pair<counter_t, std::pair<item_t, item_t> > >
00083 freq_pairs_with_counters;
00084 };
00085 template <class TRIE, class LEAF_WC, class LEAF_ALLOCATOR, class T_R, class DF_D>
00086 class RoutingSelectorDouble
00087 {
00088
00089
00090 public:
00091 RoutingSelectorDouble(
00092 counter_t min_supp, char* routing, char* input_file,
00093 counter_t nr_of_transactions,
00094 std::vector< std::pair<counter_t, item_t> >&
00095 freq_items_with_counters,
00096 T_R& tr_reader, DF_D& df_decoder);
00097 private:
00098
00099 template <unsigned int THRESHOLD, class S_C,
00100 class FII, class FPI, class CG, class IR> void
00101 selectHybrid(
00102 S_C& sorted_coder, DF_D& df_decoder, counter_t nr_of_transactions,
00103 std::vector<counter_t>& freq_counters, counter_t min_supp);
00104
00105 private:
00106 std::vector< std::pair<counter_t, std::pair<item_t, item_t> > >
00107 freq_pairs_with_counters;
00108 };
00109 template <class TRIE, class LEAF_WC, class LEAF_ALLOCATOR, class T_R, class DF_D>
00110 RoutingSelector<TRIE, LEAF_WC, LEAF_ALLOCATOR, T_R, DF_D>::RoutingSelector(
00111 counter_t min_supp, char* routing, char* input_file,
00112 counter_t nr_of_transactions,
00113 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00114 T_R& tr_reader, DF_D& df_decoder)
00115 {
00116 typedef SortedTransactionReader< Coder<T_R, DF_D>, false, false > S_C_T_R;
00117 typedef OrderReverser< typename bracz::BuildTreeDBCache<
00118 S_C_T_R, std::vector<item_t>, bracz::EndPatriciaBuildTree<true> > >S_C;
00119
00120
00121 typename S_C::params_t par_c;
00122 par_c.file_name = input_file;
00123 par_c.mode=FileReprBase::READ;
00124 par_c.largest_item = tr_reader.getLargestItem();
00125 par_c.decoder = &df_decoder;
00126 par_c.freq_items_with_counters = &freq_items_with_counters;
00127 par_c.codemode = ASC;
00128 log_status(0,"Doing sorted codec.");
00129 S_C sorted_coder(&par_c);
00130
00131
00132 Frequent2Filter<S_C> fr_2_filter(
00133 &sorted_coder );
00134 log_status(0,"Finding frequent pairs.");
00135 fr_2_filter.findFrequentPairs(freq_pairs_with_counters, min_supp);
00136
00137 TRIE main_trie;
00138 LEAF_ALLOCATOR s_alloc;
00139 typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE_Off> FII;
00140 FII fii(main_trie, df_decoder);
00141 typedef Bodon::FrequentPairInserterNoprune<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE_Off> FPI;
00142
00143 typedef Bodon::inhomogeneous_trie::CandGenInfreqRemoveNopruneMerge<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE_Off > CGIR;
00144 CGIR infrequent_remover(main_trie, df_decoder, s_alloc);
00145 if( strcmp(routing,"merge") == 0 || strcmp(routing,"default") == 0 )
00146 {
00147 log_status(0,"merge is selected for routing.")
00148 typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE;
00149 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00150 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CGIR, CGIR, SUPP_C> A;
00151 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00152 log_status(0,"Finding frequent itemsets.");
00153 apriori.findFrequentItemsets(
00154 nr_of_transactions, *par_c.freq_counters,
00155 freq_pairs_with_counters, min_supp );
00156 }
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183 else if( strcmp(routing,"lookup_edge") == 0 )
00184 {
00185 log_status(0,"Lookup on edges is selected for routing.");
00186 typedef Bodon::SupportCounterLookupEdge<TRIE> SUPP_C_BASE;
00187 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00188 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CGIR, CGIR, SUPP_C> A;
00189 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00190 log_status(0,"Finding frequent itemsets.");
00191 apriori.findFrequentItemsets(
00192 nr_of_transactions, *par_c.freq_counters,
00193 freq_pairs_with_counters, min_supp );
00194 }
00195
00196 else if( strcmp(routing,"lookup_edge_prev_mem") == 0 )
00197 {
00198 log_status(0,"Lookup on edges with previously found memorization is selected for routing.");
00199 typedef Bodon::SupportCounterLookupEdgePrevMem<TRIE> SUPP_C_BASE;
00200 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00201 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CGIR, CGIR, SUPP_C> A;
00202 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00203 log_status(0,"Finding frequent itemsets.");
00204 apriori.findFrequentItemsets(
00205 nr_of_transactions, *par_c.freq_counters,
00206 freq_pairs_with_counters, min_supp );
00207 }
00208 else if( strstr(routing,"bitvector") )
00209 {
00210 log_status(0,"Bitvector-based lookup on transaction is selected for routing.");
00211 typedef Bodon::SupportCounterLookupTransOffsetBitVec<TRIE> SUPP_C_BASE;
00212 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00213 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CGIR, CGIR, SUPP_C> A;
00214 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00215 log_status(0,"Finding frequent itemsets.");
00216 apriori.findFrequentItemsets(
00217 nr_of_transactions, *par_c.freq_counters,
00218 freq_pairs_with_counters, min_supp );
00219 }
00220 else if( strstr(routing,"indexvector") )
00221 {
00222 log_status(0,"Indexvector-based lookup on transaction is selected for routing.");
00223 typedef Bodon::SupportCounterLookupTransOffsetIndex<TRIE> SUPP_C_BASE;
00224 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00225 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CGIR, CGIR, SUPP_C> A;
00226 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00227 log_status(0,"Finding frequent itemsets.");
00228 apriori.findFrequentItemsets(
00229 nr_of_transactions, *par_c.freq_counters,
00230 freq_pairs_with_counters, min_supp );
00231 }
00232 else if( !strcmp(routing,"lookup_tr") )
00233 {
00234 log_status(0,"Lookup on transaction is selected for routing.");
00235 typedef Bodon::SupportCounterLookupTrans<TRIE> SUPP_C_BASE;
00236 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00237 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CGIR, CGIR, SUPP_C> A;
00238 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00239 log_status(0,"Finding frequent itemsets.");
00240 apriori.findFrequentItemsets(
00241 nr_of_transactions, *par_c.freq_counters,
00242 freq_pairs_with_counters, min_supp );
00243 }
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349 else
00350 log_warn(0,"Unknown routing type requested '%s'.",routing);
00351 }
00352 template <class TRIE, class LEAF_WC, class LEAF_ALLOCATOR, class T_R, class DF_D>
00353 RoutingSelectorOffset<TRIE, LEAF_WC, LEAF_ALLOCATOR, T_R, DF_D>::RoutingSelectorOffset(
00354 counter_t min_supp, char* routing, char* input_file,
00355 counter_t nr_of_transactions,
00356 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00357 T_R& tr_reader, DF_D& df_decoder)
00358 {
00359 typedef SortedTransactionReader< Coder<T_R, DF_D>, false, false > S_C_T_R;
00360 typedef OrderReverser< typename bracz::BuildTreeDBCache<
00361 S_C_T_R, std::vector<item_t>, bracz::EndPatriciaBuildTree<true> > >S_C;
00362
00363
00364 typename S_C::params_t par_c;
00365 par_c.file_name = input_file;
00366 par_c.mode=FileReprBase::READ;
00367 par_c.largest_item = tr_reader.getLargestItem();
00368 par_c.decoder = &df_decoder;
00369 par_c.freq_items_with_counters = &freq_items_with_counters;
00370 par_c.codemode = ASC;
00371 log_status(0,"Doing sorted codec.");
00372 S_C sorted_coder(&par_c);
00373
00374
00375 Frequent2Filter<S_C> fr_2_filter(
00376 &sorted_coder );
00377 log_status(0,"Finding frequent pairs.")
00378 fr_2_filter.findFrequentPairs(freq_pairs_with_counters, min_supp);
00379
00380 TRIE main_trie;
00381 LEAF_ALLOCATOR s_alloc;
00382 typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE_Off> FII;
00383 FII fii(main_trie, df_decoder);
00384 typedef Bodon::FrequentPairInserterNoprune<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE_Off> FPI;
00385 typedef Bodon::inhomogeneous_trie::CandGenInfreqRemoveNopruneMerge<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE_Off > CGIR;
00386 CGIR infrequent_remover(main_trie, df_decoder, s_alloc);
00387
00388 if( strcmp(routing,"lookup_edge") == 0 || strcmp(routing,"default") == 0 )
00389 {
00390 log_status(0,"Lookup on edges is selected for routing.");
00391 typedef Bodon::SupportCounterLookupEdge<TRIE> SUPP_C_BASE;
00392 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00393 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CGIR, CGIR, SUPP_C> A;
00394 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00395 log_status(0,"Finding frequent itemsets.");
00396 apriori.findFrequentItemsets(
00397 nr_of_transactions, *par_c.freq_counters,
00398 freq_pairs_with_counters, min_supp );
00399 }
00400 else
00401 {
00402 log_warn(0,"Only 'lookup_edge' routing strategy");
00403 log_warn(0,"can be selected for offsetindex edge representation.");
00404 }
00405 }
00406
00407 template <class TRIE_OEL, class TRIE_OI, class LEAF_WC, class LEAF_ALLOCATOR, class T_R, class DF_D>
00408 RoutingSelectorHybrid<TRIE_OEL, TRIE_OI, LEAF_WC, LEAF_ALLOCATOR, T_R, DF_D>::RoutingSelectorHybrid(
00409 counter_t min_supp, char* routing, char* input_file,
00410 counter_t nr_of_transactions,
00411 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00412 T_R& tr_reader, DF_D& df_decoder)
00413 {
00414 typedef SortedTransactionReader< Coder<T_R, DF_D>, false, false > S_C_T_R;
00415 typedef OrderReverser< typename bracz::BuildTreeDBCache<
00416 S_C_T_R, std::vector<item_t>, bracz::EndPatriciaBuildTree<true> > >S_C;
00417
00418
00419
00420 typename S_C::params_t par_c;
00421 par_c.file_name = input_file;
00422 par_c.mode=FileReprBase::READ;
00423 par_c.largest_item = tr_reader.getLargestItem();
00424 par_c.decoder = &df_decoder;
00425 par_c.freq_items_with_counters = &freq_items_with_counters;
00426 par_c.codemode = ASC;
00427 log_status(0,"Doing sorted codec.");
00428 S_C sorted_coder(&par_c);
00429
00430
00431 Frequent2Filter<S_C> fr_2_filter(
00432 &sorted_coder );
00433 log_status(0,"Finding frequent pairs.")
00434 fr_2_filter.findFrequentPairs(freq_pairs_with_counters, min_supp);
00435
00436 TRIE_OEL main_trie;
00437 LEAF_ALLOCATOR s_alloc;
00438 typedef Bodon::dynamic_trie::FrequentItemInserter<DF_D, TRIE_OEL, NEE_Off> FII;
00439 FII fii(main_trie, df_decoder);
00440 typedef Bodon::dynamic_trie::FrequentPairInserterNoprune<DF_D, TRIE_OEL, TRIE_OI, LEAF_WC, LEAF_ALLOCATOR, NEE_Off> FPI;
00441
00442 typedef Bodon::dynamic_trie::CandGenInfreqRemoveNopruneMerge<DF_D, TRIE_OEL, TRIE_OI, LEAF_WC, LEAF_ALLOCATOR, NEE_Off > CGIR;
00443 CGIR infrequent_remover(main_trie, df_decoder, s_alloc);
00444
00445 if( strcmp(routing,"hybrid") == 0 || strcmp(routing,"default") == 0)
00446 {
00447 log_status(0,"Hybrid strategy is selected for routing.");
00448 typedef Bodon::dynamic_trie::SupportCounter<TRIE_OEL, TRIE_OI> SUPP_C_BASE;
00449 typedef OneByOneSupportCounter<TRIE_OEL, S_C, SUPP_C_BASE> SUPP_C;
00450 typedef Apriori<S_C, DF_D, TRIE_OEL, LEAF_ALLOCATOR, FII, FPI, CGIR, CGIR, SUPP_C> A;
00451 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00452 log_status(0,"Finding frequent itemsets.");
00453 apriori.findFrequentItemsets(
00454 nr_of_transactions, *par_c.freq_counters,
00455 freq_pairs_with_counters, min_supp );
00456 }
00457 else
00458 {
00459 log_warn(0,"Only 'hybrid' routing strategy");
00460 log_warn(0,"can be selected for hybrid edge representation.");
00461 }
00462 }
00463
00464 template <class TRIE, class LEAF_WC, class LEAF_ALLOCATOR, class T_R, class DF_D>
00465 RoutingSelectorDouble<TRIE, LEAF_WC, LEAF_ALLOCATOR, T_R, DF_D>::RoutingSelectorDouble(
00466 counter_t min_supp, char* routing, char* input_file,
00467 counter_t nr_of_transactions,
00468 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00469 T_R& tr_reader, DF_D& df_decoder)
00470 {
00471 typedef typename bracz::BuildTreeDBCache< SortedTransactionReader<Coder<T_R, DF_D>, false>,
00472 std::vector<item_t>, bracz::EndPatriciaBuildTree<true> > S_C;
00473
00474 typename S_C::params_t par_c;
00475 par_c.file_name = input_file;
00476 par_c.mode=FileReprBase::READ;
00477 par_c.largest_item = tr_reader.getLargestItem();
00478 par_c.decoder = &df_decoder;
00479 par_c.freq_items_with_counters = &freq_items_with_counters;
00480 par_c.codemode = ASC;
00481 log_status(0,"Doing sorted codec.");
00482 S_C sorted_coder(&par_c);
00483
00484
00485 Frequent2Filter<S_C> fr_2_filter(
00486 &sorted_coder );
00487 log_status(0,"Finding frequent pairs.")
00488 fr_2_filter.findFrequentPairs(freq_pairs_with_counters, min_supp);
00489
00490 typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE_Off> FII;
00491 typedef Bodon::FrequentPairInserterNoprune<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE_Off> FPI;
00492
00493 typedef Bodon::inhomogeneous_trie::
00494 CandGenInfreqRemoveNopruneMerge<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE_Off > CGIR;
00495 if( strstr(routing,"hybrid_simple") )
00496 {
00497 log_status(0,"Hybrid solution is selected for routing.");
00498 char* threshold = routing + strlen("hybrid_simple_");
00499 if(strcmp(threshold,"6") ==0)
00500 {
00501 log_status(0,"Threshold is set to: 6.");
00502 selectHybrid<6, S_C, FII, FPI, CGIR, CGIR>(
00503 sorted_coder, df_decoder, nr_of_transactions,
00504 *par_c.freq_counters, min_supp);
00505 }
00506 else if(strcmp(threshold,"10") ==0)
00507 {
00508 log_status(0,"Threshold is set to: 10.");
00509 selectHybrid<10, S_C, FII, FPI, CGIR, CGIR>(
00510 sorted_coder, df_decoder, nr_of_transactions,
00511 *par_c.freq_counters, min_supp);
00512 }
00513 else if(strcmp(threshold,"30") ==0)
00514 {
00515 log_status(0,"Threshold is set to: 30.");
00516 selectHybrid<30, S_C, FII, FPI, CGIR, CGIR>(
00517 sorted_coder, df_decoder, nr_of_transactions,
00518 *par_c.freq_counters, min_supp);
00519 }
00520 else if(strcmp(threshold,"100") ==0)
00521 {
00522 log_status(0,"Threshold is set to: 100.");
00523 selectHybrid<100, S_C, FII, FPI, CGIR, CGIR>(
00524 sorted_coder, df_decoder, nr_of_transactions,
00525 *par_c.freq_counters, min_supp);
00526 }
00527 else
00528 {
00529 log_warn(0,"Bad threshold value was selected!");
00530 log_warn(0,"Please select 6, 10, 20, 30, 40, 100!");
00531 }
00532 }
00533 else
00534 {
00535 log_warn(0,"For 'double edge representation' only 'hybrid_simple'");
00536 log_warn(0,"routing strategy can be selected.");
00537 }
00538 }
00539
00540 #endif