00001 #ifndef AprioriSelector2_HPP
00002 #define AprioriSelector2_HPP
00003
00004 #include "common/allocators.hpp"
00005 #include "util/Frequent2Filter.cpp"
00006
00007 #include "io/input/transaction_reader/SortedTransactionReader.hpp"
00008 #include "io/input/transaction_reader/OrderReverser.hpp"
00009 #include "io/db_cache/transaction_shrinker/RBTreeDBCacheSimultaneous.hpp"
00010 #include "io/db_cache/transaction_shrinker/RBTreeDBCacheInsertThenDestroySimple.hpp"
00011 #include "io/db_cache/transaction_shrinker/RBTreeDBCacheInsertThenDestroyPro.hpp"
00012 #include "apriori/bodon/dynamic_trie/trie_manipulators/FrequentItemInserter.hpp"
00013 #include "apriori/bodon/dynamic_trie/trie_manipulators/FrequentPairInserter.hpp"
00014 #include "apriori/bodon/dynamic_trie/trie_manipulators/IntersectProPruner.hpp"
00015 #include "apriori/bodon/dynamic_trie/trie_manipulators/CandidateGeneratorPrune.hpp"
00016 #include "apriori/bodon/dynamic_trie/trie_manipulators/InfreqRemover.hpp"
00017 #include "apriori/bodon/dynamic_trie/trie_manipulators/SupportCounter.hpp"
00018 #include "apriori/bodon/dynamic_trie/trie_manipulators/SupportCounterFilter.hpp"
00019 #include "apriori/OneByOneSupportCounter.hpp"
00020 #include "apriori/OneByOneSupportCounterANDTransactionFiltering.hpp"
00021 #include "io/db_cache/BuildTreeDBCache.hpp"
00022
00023 #include "apriori/Apriori.hpp"
00024
00025 template <class TRIE_OEL, class TRIE_OI, class LEAF_WC,
00026 class T_R, class DF_D, NEELevel NEE>
00027 class AprioriSelector2
00028 {
00029 public:
00030 AprioriSelector2( counter_t min_supp, char* input_file,
00031 counter_t nr_of_transactions,
00032 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00033 T_R& tr_reader, DF_D& df_decoder,
00034 TRIE_OEL& main_trie, char *trcache_option, char* ordering_option,
00035 unsigned int maxsize );
00036 private:
00037 template <class S_C, class SUPP_C, class IR, class FII,
00038 class FPI, class CG, class LEAF_ALLOCATOR>
00039 void StartApriori(
00040 counter_t min_supp, char* input_file, counter_t nr_of_transactions,
00041 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00042 T_R& tr_reader, DF_D& df_decoder, typename S_C::params_t& par_c,
00043 TRIE_OEL& main_trie, FII& fii, unsigned int maxsize);
00044 };
00045
00046 template <class TRIE_OEL, class TRIE_OI, class LEAF_WC,
00047 class T_R, class DF_D, NEELevel NEE>
00048 AprioriSelector2<TRIE_OEL, TRIE_OI, LEAF_WC, T_R, DF_D, NEE>::
00049 AprioriSelector2(
00050 counter_t min_supp, char* input_file, counter_t nr_of_transactions,
00051 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00052 T_R& tr_reader, DF_D& df_decoder, TRIE_OEL& main_trie,
00053 char *trcache_option, char* ordering_option, unsigned int maxsize)
00054 {
00055 typedef bracz::singleualloc<LEAF_WC, 64 * 1024> LEAF_ALLOCATOR;
00056 typedef Bodon::dynamic_trie::FrequentPairInserter<DF_D, TRIE_OEL,
00057 TRIE_OI, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI;
00058 typedef Bodon::dynamic_trie::IntersectProPruner<
00059 DF_D, TRIE_OEL, TRIE_OI, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER;
00060 typedef Bodon::dynamic_trie::CandidateGeneratorPrune<PRUNER, DF_D,
00061 TRIE_OEL, TRIE_OI, LEAF_WC, LEAF_ALLOCATOR, NEE> CG;
00062 log_status(0,"Intersectpro pruning is selected.");
00063 typedef Bodon::dynamic_trie::FrequentItemInserter<DF_D, TRIE_OEL, NEE>
00064 FII;
00065 FII fii(main_trie, df_decoder);
00066 typedef Bodon::dynamic_trie::InfreqRemover<
00067 DF_D, TRIE_OEL, TRIE_OI, LEAF_WC, LEAF_ALLOCATOR, NEE> IR;
00068 if(strstr(trcache_option,"patricia"))
00069 {
00070 log_status(0,"Patricia caching option is on.");
00071 typedef Bodon::dynamic_trie::SupportCounter<TRIE_OEL, TRIE_OI>
00072 SUPP_C_BASE;
00073 if(strstr(ordering_option, "ASC"))
00074 {
00075 log_status(0,"Ordering mode is ASC.");
00076 typedef SortedTransactionReader<
00077 Coder<T_R, DF_D>, false, false > S_C_T_R;
00078 const bool ENDONLY = true;
00079 typedef OrderReverser< bracz::BuildTreeDBCache<
00080 S_C_T_R, std::vector<item_t>, bracz::EndPatriciaBuildTree<
00081 ENDONLY>, ENDONLY > >S_C;
00082 typedef OneByOneSupportCounter<TRIE_OEL, S_C, SUPP_C_BASE> SUPP_C;
00083
00084 typedef Frequent2Filter<S_C> F2F;
00085
00086 typename S_C::params_t par_c;
00087 par_c.codemode = ASC;
00088 StartApriori<S_C, SUPP_C, IR, FII, FPI, CG, LEAF_ALLOCATOR>(
00089 min_supp, input_file, nr_of_transactions,
00090 freq_items_with_counters, tr_reader, df_decoder, par_c,
00091 main_trie, fii, maxsize);
00092 }
00093 else
00094 {
00095 log_status(0,"Ordering mode is DESC.");
00096 typedef SortedTransactionReader< Coder<T_R, DF_D>, false, true >
00097 S_C_T_R;
00098 const bool ENDONLY = true;
00099 typedef bracz::BuildTreeDBCache<
00100 S_C_T_R, std::vector<item_t>, bracz::EndPatriciaBuildTree<
00101 ENDONLY>, ENDONLY > S_C;
00102 typedef OneByOneSupportCounter<TRIE_OEL, S_C, SUPP_C_BASE>
00103 SUPP_C;
00104
00105 typedef Frequent2Filter<S_C> F2F;
00106 typename S_C::params_t par_c;
00107 par_c.codemode = DESC;
00108 StartApriori<S_C, SUPP_C, IR, FII, FPI, CG, LEAF_ALLOCATOR>(
00109 min_supp, input_file, nr_of_transactions,
00110 freq_items_with_counters, tr_reader, df_decoder, par_c,
00111 main_trie, fii, maxsize);
00112
00113 }
00114 }
00115 else if(strstr(trcache_option, "rb-tree-simul"))
00116 {
00117 log_status(0,"RB-tree caching with transaction filtering (simultaneous insert and delete) option is on.");
00118 typedef Bodon::dynamic_trie::SupportCounterFilter<TRIE_OEL, TRIE_OI>
00119 SUPP_C_BASE;
00120 if(strstr(ordering_option, "ASC"))
00121 {
00122 log_status(0,"Ordering mode is ASC.");
00123 typedef SortedTransactionReader< Coder<T_R, DF_D> > S_C_T_R;
00124 typedef Bodon::RBTreeDBCacheSimultaneous<S_C_T_R> S_C;
00125 typedef OneByOneSupportCounterANDTransactionFiltering<TRIE_OEL,
00126 S_C, SUPP_C_BASE> SUPP_C;
00127
00128 typedef Frequent2Filter<S_C> F2F;
00129
00130 typename S_C::params_t par_c;
00131 par_c.codemode = ASC;
00132 StartApriori<S_C, SUPP_C, IR, FII, FPI, CG, LEAF_ALLOCATOR>(
00133 min_supp, input_file, nr_of_transactions,
00134 freq_items_with_counters, tr_reader, df_decoder, par_c,
00135 main_trie, fii, maxsize);
00136 }
00137 else
00138 {
00139 log_status(0,"Ordering mode is DESC.");
00140 typedef SortedTransactionReader< Coder<T_R, DF_D> > S_C_T_R;
00141 typedef Bodon::RBTreeDBCacheSimultaneous<S_C_T_R> S_C;
00142 typedef OneByOneSupportCounterANDTransactionFiltering<TRIE_OEL,
00143 S_C, SUPP_C_BASE> SUPP_C;
00144
00145 typedef Frequent2Filter<S_C> F2F;
00146
00147 typename S_C::params_t par_c;
00148 par_c.codemode = DESC;
00149 StartApriori<S_C, SUPP_C, IR, FII, FPI, CG, LEAF_ALLOCATOR>(
00150 min_supp, input_file, nr_of_transactions,
00151 freq_items_with_counters, tr_reader, df_decoder, par_c,
00152 main_trie, fii, maxsize);
00153 }
00154 }
00155 else if(strstr(trcache_option, "rb-tree-insert-clear-simple"))
00156 {
00157 log_status(0,"RB-tree caching with transaction filtering (with insert then clear, simple) option is on.");
00158 typedef Bodon::dynamic_trie::SupportCounterFilter<TRIE_OEL, TRIE_OI>
00159 SUPP_C_BASE;
00160 if(strstr(ordering_option, "ASC"))
00161 {
00162 log_status(0,"Ordering mode is ASC.");
00163 typedef SortedTransactionReader< Coder<T_R, DF_D> > S_C_T_R;
00164 typedef Bodon::RBTreeDBCacheInsertThenDestroySimple<S_C_T_R> S_C;
00165 typedef OneByOneSupportCounterANDTransactionFiltering<TRIE_OEL,
00166 S_C, SUPP_C_BASE> SUPP_C;
00167
00168 typedef Frequent2Filter<S_C> F2F;
00169
00170 typename S_C::params_t par_c;
00171 par_c.codemode = ASC;
00172 StartApriori<S_C, SUPP_C, IR, FII, FPI, CG, LEAF_ALLOCATOR>(
00173 min_supp, input_file, nr_of_transactions,
00174 freq_items_with_counters, tr_reader, df_decoder, par_c,
00175 main_trie, fii, maxsize);
00176 }
00177 else
00178 {
00179 log_status(0,"Ordering mode is DESC.");
00180 typedef SortedTransactionReader< Coder<T_R, DF_D> > S_C_T_R;
00181 typedef Bodon::RBTreeDBCacheInsertThenDestroySimple<S_C_T_R> S_C;
00182 typedef OneByOneSupportCounterANDTransactionFiltering<TRIE_OEL,
00183 S_C, SUPP_C_BASE> SUPP_C;
00184
00185 typedef Frequent2Filter<S_C> F2F;
00186
00187 typename S_C::params_t par_c;
00188 par_c.codemode = DESC;
00189 StartApriori<S_C, SUPP_C, IR, FII, FPI, CG, LEAF_ALLOCATOR>(
00190 min_supp, input_file, nr_of_transactions,
00191 freq_items_with_counters, tr_reader, df_decoder, par_c,
00192 main_trie, fii, maxsize);
00193 }
00194 }
00195 else if(strstr(trcache_option, "rb-tree-insert-clear-pro"))
00196 {
00197 log_status(0,"RB-tree caching with transaction filtering (with insert then clear, pro) option is on.");
00198 typedef Bodon::dynamic_trie::SupportCounterFilter<TRIE_OEL, TRIE_OI>
00199 SUPP_C_BASE;
00200 if(strstr(ordering_option, "ASC"))
00201 {
00202 log_status(0,"Ordering mode is ASC.");
00203 typedef SortedTransactionReader< Coder<T_R, DF_D> > S_C_T_R;
00204 typedef Bodon::RBTreeDBCacheInsertThenDestroyPro<S_C_T_R> S_C;
00205 typedef OneByOneSupportCounterANDTransactionFiltering<TRIE_OEL,
00206 S_C, SUPP_C_BASE> SUPP_C;
00207
00208 typedef Frequent2Filter<S_C> F2F;
00209
00210 typename S_C::params_t par_c;
00211 par_c.codemode = ASC;
00212 StartApriori<S_C, SUPP_C, IR, FII, FPI, CG, LEAF_ALLOCATOR>(
00213 min_supp, input_file, nr_of_transactions,
00214 freq_items_with_counters, tr_reader, df_decoder, par_c,
00215 main_trie, fii, maxsize);
00216 }
00217 else
00218 {
00219 log_status(0,"Ordering mode is DESC.");
00220 typedef SortedTransactionReader< Coder<T_R, DF_D> > S_C_T_R;
00221 typedef Bodon::RBTreeDBCacheInsertThenDestroyPro<S_C_T_R> S_C;
00222 typedef OneByOneSupportCounterANDTransactionFiltering<TRIE_OEL,
00223 S_C, SUPP_C_BASE> SUPP_C;
00224
00225 typedef Frequent2Filter<S_C> F2F;
00226
00227 typename S_C::params_t par_c;
00228 par_c.codemode = DESC;
00229 StartApriori<S_C, SUPP_C, IR, FII, FPI, CG, LEAF_ALLOCATOR>(
00230 min_supp, input_file, nr_of_transactions,
00231 freq_items_with_counters, tr_reader, df_decoder, par_c,
00232 main_trie, fii, maxsize);
00233 }
00234 }
00235 else
00236 {
00237 log_status(0,"Transaction caching option is off.");
00238 typedef Bodon::dynamic_trie::SupportCounter<TRIE_OEL, TRIE_OI>
00239 SUPP_C_BASE;
00240 typedef SortedTransactionReader<Coder<T_R, DF_D> > S_C;
00241 typedef OneByOneSupportCounter<TRIE_OEL, S_C, SUPP_C_BASE> SUPP_C;
00242
00243 typedef Frequent2Filter<S_C> F2F;
00244
00245 typename S_C::params_t par_c;
00246 if(strstr(ordering_option, "ASC"))
00247 {
00248 log_status(0,"Ordering mode is ASC.");
00249 par_c.codemode = ASC;
00250 StartApriori<S_C, SUPP_C, IR, FII, FPI, CG, LEAF_ALLOCATOR>(
00251 min_supp, input_file, nr_of_transactions,
00252 freq_items_with_counters, tr_reader, df_decoder, par_c,
00253 main_trie, fii, maxsize);
00254 }
00255 else
00256 {
00257 log_status(0,"Ordering mode is DESC.");
00258 par_c.codemode = DESC;
00259 StartApriori<S_C, SUPP_C, IR, FII, FPI, CG, LEAF_ALLOCATOR>(
00260 min_supp, input_file, nr_of_transactions,
00261 freq_items_with_counters, tr_reader, df_decoder, par_c,
00262 main_trie, fii, maxsize);
00263 }
00264 }
00265 }
00266
00267
00268
00269 template <class TRIE_OEL, class TRIE_OI, class LEAF_WC,
00270 class T_R, class DF_D, NEELevel NEE>
00271 template <class S_C, class SUPP_C, class IR, class FII,
00272 class FPI, class CG, class LEAF_ALLOCATOR>
00273 void AprioriSelector2<TRIE_OEL, TRIE_OI, LEAF_WC, T_R, DF_D, NEE>::
00274 StartApriori( counter_t min_supp, char* input_file,
00275 counter_t nr_of_transactions,
00276 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00277 T_R& tr_reader, DF_D& df_decoder, typename S_C::params_t& par_c,
00278 TRIE_OEL& main_trie, FII& fii, unsigned int maxsize )
00279 {
00280
00281 typedef Frequent2Filter<S_C> F2F;
00282
00283 par_c.file_name = input_file;
00284 par_c.mode=FileReprBase::READ;
00285 par_c.largest_item = tr_reader.getLargestItem();
00286 par_c.decoder = &df_decoder;
00287 par_c.freq_items_with_counters = &freq_items_with_counters;
00288 log_status(0,"Doing sorted codec.");
00289 S_C sorted_coder(&par_c);
00290 std::vector< std::pair<counter_t, std::pair<item_t, item_t> > >
00291 freq_pairs_with_counters;
00292 F2F fr_2_filter(&sorted_coder );
00293 log_status(0,"Finding frequent pairs.")
00294 fr_2_filter.findFrequentPairs(freq_pairs_with_counters,
00295 min_supp);
00296 LEAF_ALLOCATOR s_alloc;
00297 IR infrequent_remover(main_trie, df_decoder, s_alloc);
00298 typedef Apriori<S_C, DF_D, TRIE_OEL, LEAF_ALLOCATOR, FII,
00299 FPI, CG, IR, SUPP_C> A;
00300 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00301 log_status(0,"Finding frequent itemsets.");
00302 apriori.findFrequentItemsets(
00303 nr_of_transactions, *par_c.freq_counters,
00304 freq_pairs_with_counters, min_supp, maxsize );
00305 }
00306
00307 #endif