00001
00009 #ifndef SeqAprioriSelector_HPP
00010 #define SeqAprioriSelector_HPP
00011
00012 #include "io/codec/coder/Coder.hpp"
00013 #include "io/db_cache/BuildTreeDBCache.hpp"
00014 #include "util/SeqFrequent2Filter.cpp"
00015
00016
00017 #include "apriori/bodon/trie/trie_manipulators/FrequentItemInserter.hpp"
00018 #include "apriori/bodon/trie/trie_manipulators/FrequentPairInserter.hpp"
00019 #include "apriori/bodon/trie/trie_manipulators/sequence/FrequentPairInserterNoprune.hpp"
00020
00021 #include "apriori/bodon/trie/trie_manipulators/sequence/support_counter/SupportCounterLookupSeq.hpp"
00022 #include "apriori/OneByOneSupportCounter.hpp"
00023
00024 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/sequence/CandidateGeneratorPrune.hpp"
00025 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/sequence/SimplePruner.hpp"
00026 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/sequence/IntersectProPruner.hpp"
00027 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/sequence/CandGenInfreqRemoveNopruneMerge.hpp"
00028 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/InfreqRemover.hpp"
00029 #include "apriori/bodon/trie/trie_manipulators/InfreqRemover.hpp"
00030 #include "apriori/Apriori.hpp"
00031
00032
00033 template <class TRIE, class LEAF, class T_R, class DF_D>
00034 class SeqAprioriSelector
00035 {
00036 public:
00037 SeqAprioriSelector(
00038 counter_t min_supp, char* algorithm, char* input_file,
00039 counter_t nr_of_transactions,
00040 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00041 T_R& tr_reader, DF_D& df_decoder, unsigned int maxsize = largest_itemsetsize);
00042 };
00043
00044 template <class TRIE, class LEAF, class T_R, class DF_D>
00045 SeqAprioriSelector<TRIE, LEAF, T_R, DF_D>::SeqAprioriSelector(
00046 counter_t min_supp, char* algorithm, char* input_file,
00047 counter_t nr_of_transactions,
00048 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00049 T_R& tr_reader, DF_D& df_decoder, unsigned int maxsize)
00050 {
00051 typedef typename bracz::BuildTreeDBCache< Coder<T_R, DF_D>,
00052 std::vector<item_t>, bracz::EndPatriciaBuildTree<true> > S_C;
00053
00054
00055 typename S_C::params_t par_c;
00056 par_c.file_name = input_file;
00057 par_c.mode=FileReprBase::READ;
00058 par_c.largest_item = tr_reader.getLargestItem();
00059 par_c.decoder = &df_decoder;
00060 par_c.freq_items_with_counters = &freq_items_with_counters;
00061 par_c.codemode = ASC;
00062 log_status(0,"Doing sorted codec.");
00063 S_C sorted_coder(&par_c);
00064
00065 std::vector< std::pair<counter_t, std::pair<item_t, item_t> > >
00066 freq_pairs_with_counters;
00067 SeqFrequent2Filter<S_C> fr_2_filter(
00068 &sorted_coder );
00069 log_status(0,"Finding frequent pairs.")
00070 fr_2_filter.findFrequentPairs(freq_pairs_with_counters, min_supp);
00071
00072 TRIE main_trie;
00073 typedef bracz::singleualloc<LEAF, 64 * 1024> LEAF_ALLOCATOR;
00074 LEAF_ALLOCATOR s_alloc;
00075 typedef Bodon::FrequentItemInserter<DF_D, TRIE, false> FII;
00076 FII fii(main_trie, df_decoder);
00077 typedef Bodon::sequence::SupportCounterLookupSeq<TRIE> SUPP_C_BASE;
00078
00079
00080
00081
00082
00083 typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00084
00085 char *kind=algorithm+7;
00086 const NEELevel NEE = NEE_Off;
00087
00088 if( strstr(kind,"-noprune") )
00089 {
00090 log_status(0,"Complete pruning is disabled.");
00091 typedef Bodon::sequence::FrequentPairInserterNoprune<DF_D, TRIE, LEAF, LEAF_ALLOCATOR> FPI;
00092 typedef Bodon::sequence::inhomogeneous_trie::CandGenInfreqRemoveNopruneMerge<DF_D, TRIE, LEAF, LEAF_ALLOCATOR> CGIR;
00093 CGIR infrequent_remover(main_trie, df_decoder, s_alloc);
00094 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CGIR, CGIR, SUPP_C> A;
00095 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00096 log_status(0,"Finding frequent itemsets.")
00097 apriori.findFrequentItemsets(
00098 nr_of_transactions, *par_c.freq_counters,
00099 freq_pairs_with_counters, min_supp, maxsize );
00100 }
00101 else if( strstr(kind,"-intersectprune") )
00102 {
00103 log_status(0,"Intersection-based pruning is selected.");
00104 typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE> FPI;
00105 typedef Bodon::sequence::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF, LEAF_ALLOCATOR> PRUNER;
00106 typedef Bodon::sequence::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF, LEAF_ALLOCATOR> CG;
00107 typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE> IR;
00108 IR infrequent_remover(main_trie, df_decoder, s_alloc);
00109 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A;
00110 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00111 log_status(0,"Finding frequent itemsets.");
00112 apriori.findFrequentItemsets(
00113 nr_of_transactions, *par_c.freq_counters,
00114 freq_pairs_with_counters, min_supp, maxsize );
00115 }
00116 else
00117 {
00118 log_status(0,"Simple pruning is selected.");
00119 typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE> FPI;
00120 typedef Bodon::sequence::inhomogeneous_trie::SimplePruner<DF_D, TRIE, LEAF, LEAF_ALLOCATOR> PRUNER;
00121 typedef Bodon::sequence::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF, LEAF_ALLOCATOR> CG;
00122 typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE> IR;
00123 IR infrequent_remover(main_trie, df_decoder, s_alloc);
00124 typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A;
00125 A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00126 log_status(0,"Finding frequent itemsets.")
00127 apriori.findFrequentItemsets(
00128 nr_of_transactions, *par_c.freq_counters,
00129 freq_pairs_with_counters, min_supp, maxsize );
00130 }
00131 }
00132
00133 #endif