00001 #ifndef AprioriSelector_intest_HPP
00002 #define AprioriSelector_intest_HPP
00003
00004 #include "io/input/transaction_reader/SortedTransactionReader.hpp"
00005 #include "io/input/transaction_reader/OrderReverser.hpp"
00006 #include "io/codec/coder/Coder.hpp"
00007 #include "io/db_cache/BuildTreeDBCache.hpp"
00008 #include "util/Frequent2Filter.cpp"
00009
00010
00011 #include "common/allocators.hpp"
00012
00013
00014 #include "apriori/bodon/dynamic_trie/trie_manipulators/FrequentItemInserter.hpp"
00015 #include "apriori/bodon/dynamic_trie/trie_manipulators/FrequentPairInserter.hpp"
00016 #include "apriori/bodon/dynamic_trie/trie_manipulators/FrequentPairInserterNoprune.hpp"
00017
00018 #include "apriori/bodon/dynamic_trie/trie_manipulators/SupportCounter.hpp"
00019 #include "apriori/OneByOneSupportCounter.hpp"
00020
00021 #include "apriori/bodon/dynamic_trie/trie_manipulators/SimplePruner.hpp"
00022 #include "apriori/bodon/dynamic_trie/trie_manipulators/IntersectProPruner.hpp"
00023 #include "apriori/bodon/dynamic_trie/trie_manipulators/CandidateGeneratorPrune.hpp"
00024 #include "apriori/bodon/dynamic_trie/trie_manipulators/CandGenInfreqRemoveNopruneMerge.hpp"
00025 #include "apriori/bodon/dynamic_trie/trie_manipulators/InfreqRemover.hpp"
00026
00027 #include "apriori/Apriori.hpp"
00028
00029 template <class TRIE_OEL, class TRIE_OI, class LEAF_WC, class T_R,
00030 class DF_D, NEELevel NEE>
00031 class AprioriSelector
00032 {
00033 public:
00034 AprioriSelector(
00035 counter_t min_supp, char* algorithm, char* input_file,
00036 counter_t nr_of_transactions,
00037 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00038 T_R& tr_reader, DF_D& df_decoder, unsigned int maxsize = largest_itemsetsize);
00039 };
00040
00041 template <class TRIE_OEL, class TRIE_OI, class LEAF_WC, class T_R,
00042 class DF_D, NEELevel NEE>
00043 AprioriSelector<TRIE_OEL, TRIE_OI, LEAF_WC, T_R, DF_D, NEE>::AprioriSelector(
00044 counter_t min_supp, char* algorithm, char* input_file,
00045 counter_t nr_of_transactions,
00046 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00047 T_R& tr_reader, DF_D& df_decoder, unsigned int maxsize )
00048 {
00049
00050 typedef SortedTransactionReader< Coder<T_R, DF_D>, false, false > S_C_T_R;
00051 typedef OrderReverser< typename bracz::BuildTreeDBCache<
00052 S_C_T_R, std::vector<item_t>, bracz::EndPatriciaBuildTree<true> > >S_C;
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063 typename S_C::params_t par_c;
00064 par_c.file_name = input_file;
00065 par_c.mode=FileReprBase::READ;
00066 par_c.largest_item = tr_reader.getLargestItem();
00067 par_c.decoder = &df_decoder;
00068 par_c.freq_items_with_counters = &freq_items_with_counters;
00069 par_c.codemode = ASC;
00070
00071 log_status(0,"Doing sorted codec.");
00072 S_C sorted_coder(&par_c);
00073
00074 std::vector< std::pair<counter_t, std::pair<item_t, item_t> > >
00075 freq_pairs_with_counters;
00076 Frequent2Filter<S_C> fr_2_filter(
00077 &sorted_coder );
00078
00079
00080 log_status(0,"Finding frequent pairs.")
00081 fr_2_filter.findFrequentPairs(freq_pairs_with_counters, min_supp);
00082
00083 TRIE_OEL main_trie;
00084 typedef Bodon::dynamic_trie::FrequentItemInserter<DF_D, TRIE_OEL, NEE> FII;
00085 FII fii(main_trie, df_decoder);
00086 typedef Bodon::dynamic_trie::SupportCounter<TRIE_OEL, TRIE_OI> SUPP_C_BASE;
00087 typedef OneByOneSupportCounter<TRIE_OEL, S_C, SUPP_C_BASE> SUPP_C;
00088 typedef bracz::singleualloc<LEAF_WC, 64 * 1024> LEAF_ALLOCATOR;
00089 LEAF_ALLOCATOR s_alloc;
00090 char *kind=algorithm+7;
00091
00092 if( strstr(kind,"-noprune") )
00093 {
00094 log_info(0,"Complete pruning is disabled.");
00095 typedef Bodon::dynamic_trie::FrequentPairInserterNoprune<
00096 DF_D, TRIE_OEL, TRIE_OI, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI;
00097 typedef Bodon::dynamic_trie::CandGenInfreqRemoveNopruneMerge<
00098 DF_D, TRIE_OEL, TRIE_OI, LEAF_WC, LEAF_ALLOCATOR, NEE > CGIR;
00099 CGIR infrequent_remover(main_trie, df_decoder, s_alloc);
00100 typedef Apriori<S_C, DF_D, TRIE_OEL, LEAF_ALLOCATOR, FII,
00101 FPI, CGIR, CGIR, SUPP_C> A;
00102 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00103 log_status(0,"Finding frequent itemsets.")
00104 apriori.findFrequentItemsets(
00105 nr_of_transactions, *par_c.freq_counters,
00106 freq_pairs_with_counters, min_supp, maxsize );
00107 }
00108 else if( strstr(kind,"-intersectprune") )
00109 {
00110 log_info(0,"Intersection-based pruning is selected.");
00111 typedef Bodon::dynamic_trie::FrequentPairInserter<DF_D, TRIE_OEL, TRIE_OI, LEAF_WC,
00112 LEAF_ALLOCATOR, NEE> FPI;
00113 typedef Bodon::dynamic_trie::IntersectProPruner<
00114 DF_D, TRIE_OEL, TRIE_OI, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER;
00115 typedef Bodon::dynamic_trie::CandidateGeneratorPrune<PRUNER, DF_D,
00116 TRIE_OEL, TRIE_OI, LEAF_WC, LEAF_ALLOCATOR, NEE> CG;
00117 typedef Bodon::dynamic_trie::InfreqRemover<
00118 DF_D, TRIE_OEL, TRIE_OI, LEAF_WC, LEAF_ALLOCATOR, NEE> IR;
00119 IR infrequent_remover(main_trie, df_decoder, s_alloc);
00120 typedef Apriori<S_C, DF_D, TRIE_OEL, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A;
00121 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00122 log_status(0,"Finding frequent itemsets.");
00123 apriori.findFrequentItemsets(
00124 nr_of_transactions, *par_c.freq_counters,
00125 freq_pairs_with_counters, min_supp, maxsize );
00126 }
00127 else if( strstr(kind,"-simpleprune") )
00128 {
00129 log_info(0,"Simple pruning is selected.");
00130 typedef Bodon::dynamic_trie::FrequentPairInserter<DF_D, TRIE_OEL, TRIE_OI, LEAF_WC,
00131 LEAF_ALLOCATOR, NEE> FPI;
00132 typedef Bodon::dynamic_trie::SimplePruner<DF_D, TRIE_OEL, TRIE_OI, LEAF_WC,
00133 LEAF_ALLOCATOR, NEE> PRUNER;
00134 typedef Bodon::dynamic_trie::CandidateGeneratorPrune<PRUNER, DF_D,
00135 TRIE_OEL, TRIE_OI, LEAF_WC, LEAF_ALLOCATOR, NEE> CG;
00136 typedef Bodon::dynamic_trie::InfreqRemover<DF_D, TRIE_OEL, TRIE_OI, LEAF_WC,
00137 LEAF_ALLOCATOR, NEE> IR;
00138 IR infrequent_remover(main_trie, df_decoder, s_alloc);
00139 typedef Apriori<S_C, DF_D, TRIE_OEL, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A;
00140 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00141 log_status(0,"Finding frequent itemsets.")
00142 apriori.findFrequentItemsets(
00143 nr_of_transactions, *par_c.freq_counters,
00144 freq_pairs_with_counters, min_supp, maxsize );
00145 }
00146 else
00147 log_warn(0,"Unknown apriori type requested '%s'.",kind);
00148 }
00149
00150 #endif