00001
00011 #ifndef AprioriSelector_HPP
00012 #define AprioriSelector_HPP
00013
00014 #include "common/allocators.hpp"
00015
00016
00017 #include "apriori/bodon/dynamic_trie/trie_manipulators/FrequentItemInserter.hpp"
00018 #include "apriori/bodon/dynamic_trie/trie_manipulators/FrequentPairInserter.hpp"
00019 #include "apriori/bodon/dynamic_trie/trie_manipulators/FrequentPairInserterNoprune.hpp"
00020
00021 #include "apriori/bodon/dynamic_trie/trie_manipulators/IntersectProPruner.hpp"
00022 #include "apriori/bodon/dynamic_trie/trie_manipulators/CandidateGeneratorPrune.hpp"
00023 #include "apriori/bodon/dynamic_trie/trie_manipulators/CandGenInfreqRemoveNopruneMerge.hpp"
00024 #include "apriori/bodon/dynamic_trie/trie_manipulators/InfreqRemover.hpp"
00025
00026 #include "apriori/Apriori.hpp"
00027
00043 template <class MAIN_DATA_STRUCTURE, class SEC_DATA_STRUCTURE, class LEAF_WC,
00044 class CODER, class F2F, class SUPP_C, class T_R,
00045 class DF_D, NEELevel NEE = NEE_Full> class AprioriSelector
00046 {
00047 public:
00048 AprioriSelector(
00049 counter_t min_supp, char* algorithm, char* input_file,
00050 counter_t nr_of_transactions,
00051 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00052 T_R& tr_reader, DF_D& df_decoder, unsigned int maxsize = largest_itemsetsize);
00053 };
00054
00055 template <class MAIN_DATA_STRUCTURE, class SEC_DATA_STRUCTURE, class LEAF_WC,
00056 class CODER, class F2F, class SUPP_C, class T_R, class DF_D,
00057 NEELevel NEE>
00058 AprioriSelector<MAIN_DATA_STRUCTURE, SEC_DATA_STRUCTURE, LEAF_WC, CODER, F2F,
00059 SUPP_C, T_R, DF_D, NEE>::AprioriSelector(
00060 counter_t min_supp, char* algorithm, char* input_file,
00061 counter_t nr_of_transactions,
00062 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00063 T_R& tr_reader, DF_D& df_decoder, unsigned int maxsize )
00064 {
00065 typename CODER::params_t par_c;
00066 par_c.file_name = input_file;
00067 par_c.mode=FileReprBase::READ;
00068 par_c.largest_item = tr_reader.getLargestItem();
00069 par_c.decoder = &df_decoder;
00070 par_c.freq_items_with_counters = &freq_items_with_counters;
00071 par_c.codemode = ASC;
00072
00073 log_status(0,"Doing sorted codec.");
00074 CODER sorted_coder(&par_c);
00075
00076 std::vector< std::pair<counter_t, std::pair<item_t, item_t> > >
00077 freq_pairs_with_counters;
00078 F2F fr_2_filter(&sorted_coder );
00079 log_status(0,"Finding frequent pairs.")
00080 fr_2_filter.findFrequentPairs(freq_pairs_with_counters, min_supp);
00081
00082 MAIN_DATA_STRUCTURE main_trie;
00083 typedef bracz::singleualloc<LEAF_WC, 64 * 1024> LEAF_ALLOCATOR;
00084 LEAF_ALLOCATOR s_alloc;
00085 typedef Bodon::dynamic_trie::FrequentItemInserter<DF_D, MAIN_DATA_STRUCTURE, NEE> FII;
00086 FII fii(main_trie, df_decoder);
00087 char *kind=algorithm+6;
00088
00089
00090 if( strstr(kind,"-noprune") )
00091 {
00092 log_status(0,"Complete pruning is disabled.");
00093 typedef Bodon::dynamic_trie::FrequentPairInserterNoprune<
00094 DF_D, MAIN_DATA_STRUCTURE, SEC_DATA_STRUCTURE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI;
00095 typedef Bodon::dynamic_trie::CandGenInfreqRemoveNopruneMerge<
00096 DF_D, MAIN_DATA_STRUCTURE, SEC_DATA_STRUCTURE, LEAF_WC, LEAF_ALLOCATOR, NEE > CGIR;
00097 CGIR infrequent_remover(main_trie, df_decoder, s_alloc);
00098
00099 typedef Apriori<CODER, DF_D, MAIN_DATA_STRUCTURE, LEAF_ALLOCATOR, FII,
00100 FPI, CGIR, CGIR, SUPP_C> A;
00101 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00102 log_status(0,"Finding frequent itemsets.")
00103 apriori.findFrequentItemsets(
00104 nr_of_transactions, *par_c.freq_counters,
00105 freq_pairs_with_counters, min_supp, maxsize );
00106 }
00107 else
00108 {
00109 log_status(0,"Intersection-based pruning is selected.");
00110 typedef Bodon::dynamic_trie::FrequentPairInserter<DF_D, MAIN_DATA_STRUCTURE, SEC_DATA_STRUCTURE, LEAF_WC,
00111 LEAF_ALLOCATOR, NEE> FPI;
00112 typedef Bodon::dynamic_trie::IntersectProPruner<
00113 DF_D, MAIN_DATA_STRUCTURE, SEC_DATA_STRUCTURE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER;
00114 typedef Bodon::dynamic_trie::CandidateGeneratorPrune<PRUNER, DF_D,
00115 MAIN_DATA_STRUCTURE, SEC_DATA_STRUCTURE, LEAF_WC, LEAF_ALLOCATOR, NEE> CG;
00116 typedef Bodon::dynamic_trie::InfreqRemover<
00117 DF_D, MAIN_DATA_STRUCTURE, SEC_DATA_STRUCTURE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR;
00118 IR infrequent_remover(main_trie, df_decoder, s_alloc);
00119 typedef Apriori<CODER, DF_D, MAIN_DATA_STRUCTURE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A;
00120 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00121 log_status(0,"Finding frequent itemsets.");
00122 apriori.findFrequentItemsets(
00123 nr_of_transactions, *par_c.freq_counters,
00124 freq_pairs_with_counters, min_supp, maxsize );
00125 }
00126 }
00127
00128 #endif