00001 #ifndef ThresholdSelector_HPP
00002 #define ThresholdSelector_HPP
00003
00004 #include "io/input/transaction_reader/SortedTransactionReader.hpp"
00005 #include "io/codec/coder/Coder.hpp"
00006 #include "io/db_cache/BuildTreeDBCache.hpp"
00007 #include "util/Frequent2Filter.cpp"
00008
00009 #include "util/StreamParser.hpp"
00010
00011 #include "datastructures/trie/edgelist/OrderedEdgelist.hpp"
00012 #include "datastructures/trie/edgelist/OrderedEdgelistDynLookup.hpp"
00013 #include "apriori/bodon/Trie.hpp"
00014
00015 #include "apriori/bodon/trie/trie_manipulators/FrequentItemInserter.hpp"
00016 #include "apriori/bodon/trie/trie_manipulators/FrequentPairInserter.hpp"
00017 #include "apriori/bodon/trie/trie_manipulators/support_counter/SupportCounterMerge.hpp"
00018 #include "apriori/OneByOneSupportCounter.hpp"
00019 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/CandidateGeneratorPrune.hpp"
00020 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/IntersectProPruner.hpp"
00021 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/InfreqRemover.hpp"
00022 #include "apriori/Apriori.hpp"
00023
00024
00025 template <class VECTOR, class T_R, class DF_D>
00026 class ThresholdSelector
00027 {
00028 public:
00029 ThresholdSelector( char* threshold,
00030 counter_t min_supp, char* input_file,
00031 counter_t nr_of_transactions,
00032 std::vector< std::pair<counter_t, item_t> >&
00033 freq_items_with_counters,
00034 T_R& tr_reader, DF_D& df_decoder);
00035
00036 private:
00037 std::vector< std::pair<counter_t, std::pair<item_t, item_t> > >
00038 freq_pairs_with_counters;
00039 };
00040
00041 template <class VECTOR, class T_R, class DF_D>
00042 ThresholdSelector<VECTOR, T_R, DF_D>::ThresholdSelector( char* threshold,
00043 counter_t min_supp, char* input_file,
00044 counter_t nr_of_transactions,
00045 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00046 T_R& tr_reader, DF_D& df_decoder)
00047 {
00048 typedef typename bracz::BuildTreeDBCache< SortedTransactionReader<Coder<T_R, DF_D>, false>,
00049 std::vector<item_t>, bracz::EndPatriciaBuildTree<true> > S_C;
00050
00051 typename S_C::params_t par_c;
00052 par_c.file_name = input_file;
00053 par_c.mode=FileReprBase::READ;
00054 par_c.largest_item = tr_reader.getLargestItem();
00055 par_c.decoder = &df_decoder;
00056 par_c.freq_items_with_counters = &freq_items_with_counters;
00057 par_c.codemode = ASC;
00058 log_status(0,"Doing sorted codec.");
00059 S_C sorted_coder(&par_c);
00060
00061
00062 Frequent2Filter<S_C> fr_2_filter(
00063 &sorted_coder );
00064 log_status(0,"Finding frequent pairs.")
00065 fr_2_filter.findFrequentPairs(freq_pairs_with_counters, min_supp);
00066
00067 typedef Bodon::LeafWithoutConstructor LEAF_WC;
00068 typedef Bodon::Leaf LEAF;
00069 typedef bracz::singleualloc<LEAF_WC, 1024> LEAF_ALLOCATOR;
00070 LEAF_ALLOCATOR s_alloc;
00071 const NEELevel NEE = NEE_Off;
00072
00073 if( strcmp(threshold,"3") == 0 )
00074 {
00075 log_info(0,"Threshol is set to 3.");
00076 typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<VECTOR, 3> > TRIE;
00077 TRIE main_trie;
00078 typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII;
00079 FII fii(main_trie, df_decoder);
00080 typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI;
00081 typedef Bodon::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER;
00082 typedef Bodon::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF_ALLOCATOR, NEE> CG;
00083 typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR;
00084 IR infrequent_remover(main_trie, df_decoder, s_alloc);
00085 typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE;
00086 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00087 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A;
00088 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00089 log_status(0,"Finding frequent itemsets.");
00090 apriori.findFrequentItemsets(
00091 nr_of_transactions, *par_c.freq_counters,
00092 freq_pairs_with_counters, min_supp );
00093 }
00094 else if( strcmp(threshold,"6") == 0 )
00095 {
00096 log_info(0,"Threshol is set to 6.");
00097 typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<VECTOR, 6> > TRIE;
00098 TRIE main_trie;
00099 typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII;
00100 FII fii(main_trie, df_decoder);
00101 typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI;
00102 typedef Bodon::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER;
00103 typedef Bodon::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF_ALLOCATOR, NEE> CG;
00104 typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR;
00105 IR infrequent_remover(main_trie, df_decoder, s_alloc);
00106 typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE;
00107 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00108 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A;
00109 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00110 log_status(0,"Finding frequent itemsets.");
00111 apriori.findFrequentItemsets(
00112 nr_of_transactions, *par_c.freq_counters,
00113 freq_pairs_with_counters, min_supp );
00114 }
00115 else if( strcmp(threshold,"10") == 0 )
00116 {
00117 log_info(0,"Threshol is set to 10.");
00118 typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<VECTOR, 10> > TRIE;
00119 TRIE main_trie;
00120 typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII;
00121 FII fii(main_trie, df_decoder);
00122 typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI;
00123 typedef Bodon::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER;
00124 typedef Bodon::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF_ALLOCATOR, NEE> CG;
00125 typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR;
00126 IR infrequent_remover(main_trie, df_decoder, s_alloc);
00127 typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE;
00128 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00129 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A;
00130 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00131 log_status(0,"Finding frequent itemsets.");
00132 apriori.findFrequentItemsets(
00133 nr_of_transactions, *par_c.freq_counters,
00134 freq_pairs_with_counters, min_supp );
00135 }
00136 else if( strcmp(threshold,"20") == 0 )
00137 {
00138 log_info(0,"Threshol is set to 20.");
00139 typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<VECTOR, 20> > TRIE;
00140 TRIE main_trie;
00141 typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII;
00142 FII fii(main_trie, df_decoder);
00143 typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI;
00144 typedef Bodon::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER;
00145 typedef Bodon::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF_ALLOCATOR, NEE> CG;
00146 typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR;
00147 IR infrequent_remover(main_trie, df_decoder, s_alloc);
00148 typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE;
00149 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00150 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A;
00151 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00152 log_status(0,"Finding frequent itemsets.");
00153 apriori.findFrequentItemsets(
00154 nr_of_transactions, *par_c.freq_counters,
00155 freq_pairs_with_counters, min_supp );
00156 }
00157 else if( strcmp(threshold,"40") == 0 )
00158 {
00159 log_info(0,"Threshol is set to 40.");
00160 typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<VECTOR, 40> > TRIE;
00161 TRIE main_trie;
00162 typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII;
00163 FII fii(main_trie, df_decoder);
00164 typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI;
00165 typedef Bodon::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER;
00166 typedef Bodon::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF_ALLOCATOR, NEE> CG;
00167 typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR;
00168 IR infrequent_remover(main_trie, df_decoder, s_alloc);
00169 typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE;
00170 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00171 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A;
00172 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00173 log_status(0,"Finding frequent itemsets.");
00174 apriori.findFrequentItemsets(
00175 nr_of_transactions, *par_c.freq_counters,
00176 freq_pairs_with_counters, min_supp );
00177 }
00178 else if( strcmp(threshold,"100") == 0 )
00179 {
00180 log_info(0,"Threshol is set to 100.");
00181 typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<VECTOR, 100> > TRIE;
00182 TRIE main_trie;
00183 typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII;
00184 FII fii(main_trie, df_decoder);
00185 typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI;
00186 typedef Bodon::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER;
00187 typedef Bodon::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF_ALLOCATOR, NEE> CG;
00188 typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR;
00189 IR infrequent_remover(main_trie, df_decoder, s_alloc);
00190 typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE;
00191 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00192 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A;
00193 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00194 log_status(0,"Finding frequent itemsets.");
00195 apriori.findFrequentItemsets(
00196 nr_of_transactions, *par_c.freq_counters,
00197 freq_pairs_with_counters, min_supp );
00198 }
00199 else
00200 log_warn(0,"Unknown threshold value is requested '%s'.", threshold);
00201 }
00202 #endif