00001 #ifndef ThresholdSelector_HPP 00002 #define ThresholdSelector_HPP 00003 00004 #include "io/input/transaction_reader/SortedTransactionReader.hpp" 00005 #include "io/codec/coder/Coder.hpp" 00006 #include "io/db_cache/BuildTreeDBCache.hpp" 00007 #include "util/Frequent2Filter.cpp" 00008 00009 #include "util/StreamParser.hpp" 00010 00011 #include "datastructures/trie/edgelist/OrderedEdgelist.hpp" 00012 #include "datastructures/trie/edgelist/OrderedEdgelistDynLookup.hpp" 00013 #include "apriori/bodon/Trie.hpp" 00014 00015 #include "apriori/bodon/trie/trie_manipulators/FrequentItemInserter.hpp" 00016 #include "apriori/bodon/trie/trie_manipulators/FrequentPairInserter.hpp" 00017 #include "apriori/bodon/trie/trie_manipulators/support_counter/SupportCounterMerge.hpp" 00018 #include "apriori/OneByOneSupportCounter.hpp" 00019 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/CandidateGeneratorPrune.hpp" 00020 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/IntersectProPruner.hpp" 00021 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/InfreqRemover.hpp" 00022 #include "apriori/Apriori.hpp" 00023 00024 00025 template <class VECTOR, class T_R, class DF_D> 00026 class ThresholdSelector 00027 { 00028 public: 00029 ThresholdSelector( char* threshold, 00030 counter_t min_supp, char* input_file, 00031 counter_t nr_of_transactions, 00032 std::vector< std::pair<counter_t, item_t> >& 00033 freq_items_with_counters, 00034 T_R& tr_reader, DF_D& df_decoder); 00035 00036 private: 00037 std::vector< std::pair<counter_t, std::pair<item_t, item_t> > > 00038 freq_pairs_with_counters; 00039 }; 00040 00041 template <class VECTOR, class T_R, class DF_D> 00042 ThresholdSelector<VECTOR, T_R, DF_D>::ThresholdSelector( char* threshold, 00043 counter_t min_supp, char* input_file, 00044 counter_t nr_of_transactions, 00045 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters, 00046 T_R& tr_reader, DF_D& df_decoder) 00047 { 00048 typedef typename bracz::BuildTreeDBCache< SortedTransactionReader<Coder<T_R, DF_D>, false>, 00049 std::vector<item_t>, bracz::EndPatriciaBuildTree<true> > S_C; 00050 00051 typename S_C::params_t par_c; 00052 par_c.file_name = input_file; 00053 par_c.mode=FileReprBase::READ; 00054 par_c.largest_item = tr_reader.getLargestItem(); 00055 par_c.decoder = &df_decoder; 00056 par_c.freq_items_with_counters = &freq_items_with_counters; 00057 par_c.codemode = ASC; 00058 log_status(0,"Doing sorted codec."); 00059 S_C sorted_coder(&par_c); 00060 00061 00062 Frequent2Filter<S_C> fr_2_filter( 00063 &sorted_coder ); 00064 log_status(0,"Finding frequent pairs.") 00065 fr_2_filter.findFrequentPairs(freq_pairs_with_counters, min_supp); 00066 00067 typedef Bodon::LeafWithoutConstructor LEAF_WC; 00068 typedef Bodon::Leaf LEAF; 00069 typedef bracz::singleualloc<LEAF_WC, 1024> LEAF_ALLOCATOR; 00070 LEAF_ALLOCATOR s_alloc; 00071 const NEELevel NEE = NEE_Off; 00072 00073 if( strcmp(threshold,"3") == 0 ) 00074 { 00075 log_info(0,"Threshol is set to 3."); 00076 typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<VECTOR, 3> > TRIE; 00077 TRIE main_trie; 00078 typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII; 00079 FII fii(main_trie, df_decoder); 00080 typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI; 00081 typedef Bodon::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER; 00082 typedef Bodon::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF_ALLOCATOR, NEE> CG; 00083 typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR; 00084 IR infrequent_remover(main_trie, df_decoder, s_alloc); 00085 typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE; 00086 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C; 00087 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A; 00088 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii); 00089 log_status(0,"Finding frequent itemsets."); 00090 apriori.findFrequentItemsets( 00091 nr_of_transactions, *par_c.freq_counters, 00092 freq_pairs_with_counters, min_supp ); 00093 } 00094 else if( strcmp(threshold,"6") == 0 ) 00095 { 00096 log_info(0,"Threshol is set to 6."); 00097 typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<VECTOR, 6> > TRIE; 00098 TRIE main_trie; 00099 typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII; 00100 FII fii(main_trie, df_decoder); 00101 typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI; 00102 typedef Bodon::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER; 00103 typedef Bodon::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF_ALLOCATOR, NEE> CG; 00104 typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR; 00105 IR infrequent_remover(main_trie, df_decoder, s_alloc); 00106 typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE; 00107 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C; 00108 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A; 00109 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii); 00110 log_status(0,"Finding frequent itemsets."); 00111 apriori.findFrequentItemsets( 00112 nr_of_transactions, *par_c.freq_counters, 00113 freq_pairs_with_counters, min_supp ); 00114 } 00115 else if( strcmp(threshold,"10") == 0 ) 00116 { 00117 log_info(0,"Threshol is set to 10."); 00118 typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<VECTOR, 10> > TRIE; 00119 TRIE main_trie; 00120 typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII; 00121 FII fii(main_trie, df_decoder); 00122 typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI; 00123 typedef Bodon::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER; 00124 typedef Bodon::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF_ALLOCATOR, NEE> CG; 00125 typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR; 00126 IR infrequent_remover(main_trie, df_decoder, s_alloc); 00127 typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE; 00128 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C; 00129 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A; 00130 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii); 00131 log_status(0,"Finding frequent itemsets."); 00132 apriori.findFrequentItemsets( 00133 nr_of_transactions, *par_c.freq_counters, 00134 freq_pairs_with_counters, min_supp ); 00135 } 00136 else if( strcmp(threshold,"20") == 0 ) 00137 { 00138 log_info(0,"Threshol is set to 20."); 00139 typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<VECTOR, 20> > TRIE; 00140 TRIE main_trie; 00141 typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII; 00142 FII fii(main_trie, df_decoder); 00143 typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI; 00144 typedef Bodon::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER; 00145 typedef Bodon::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF_ALLOCATOR, NEE> CG; 00146 typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR; 00147 IR infrequent_remover(main_trie, df_decoder, s_alloc); 00148 typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE; 00149 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C; 00150 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A; 00151 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii); 00152 log_status(0,"Finding frequent itemsets."); 00153 apriori.findFrequentItemsets( 00154 nr_of_transactions, *par_c.freq_counters, 00155 freq_pairs_with_counters, min_supp ); 00156 } 00157 else if( strcmp(threshold,"40") == 0 ) 00158 { 00159 log_info(0,"Threshol is set to 40."); 00160 typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<VECTOR, 40> > TRIE; 00161 TRIE main_trie; 00162 typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII; 00163 FII fii(main_trie, df_decoder); 00164 typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI; 00165 typedef Bodon::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER; 00166 typedef Bodon::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF_ALLOCATOR, NEE> CG; 00167 typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR; 00168 IR infrequent_remover(main_trie, df_decoder, s_alloc); 00169 typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE; 00170 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C; 00171 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A; 00172 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii); 00173 log_status(0,"Finding frequent itemsets."); 00174 apriori.findFrequentItemsets( 00175 nr_of_transactions, *par_c.freq_counters, 00176 freq_pairs_with_counters, min_supp ); 00177 } 00178 else if( strcmp(threshold,"100") == 0 ) 00179 { 00180 log_info(0,"Threshol is set to 100."); 00181 typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<VECTOR, 100> > TRIE; 00182 TRIE main_trie; 00183 typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII; 00184 FII fii(main_trie, df_decoder); 00185 typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI; 00186 typedef Bodon::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER; 00187 typedef Bodon::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF_ALLOCATOR, NEE> CG; 00188 typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR; 00189 IR infrequent_remover(main_trie, df_decoder, s_alloc); 00190 typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE; 00191 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C; 00192 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A; 00193 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii); 00194 log_status(0,"Finding frequent itemsets."); 00195 apriori.findFrequentItemsets( 00196 nr_of_transactions, *par_c.freq_counters, 00197 freq_pairs_with_counters, min_supp ); 00198 } 00199 else 00200 log_warn(0,"Unknown threshold value is requested '%s'.", threshold); 00201 } 00202 #endif