00001
00011 #ifndef Apriori_HPP
00012 #define Apriori_HPP
00013
00014 #include "common.hpp"
00015 #include "common/log.h"
00016 #include "common/allocators.hpp"
00017 #include <vector>
00018
00081 template <class C, class D, class MAIN_DATA_STRUCTURE,
00082 class SEC_DATA_STRUCTURE, class FII, class FPI,
00083 class CG, class IR, class SUPP_C>
00084 class Apriori
00085 {
00086 private:
00088 MAIN_DATA_STRUCTURE& data_structure;
00090 SEC_DATA_STRUCTURE& s_alloc;
00092 IR& infrequent_remover;
00094 C& coder;
00096 D& decoder;
00097
00098 FII& fii;
00099
00100 unsigned int candidate_size;
00101
00102 public:
00103
00104 Apriori(MAIN_DATA_STRUCTURE& data_structure, SEC_DATA_STRUCTURE& s_alloc,
00105 IR& infrequent_remover, C& coder, D& decoder, FII& fii ) :
00106 data_structure(data_structure), s_alloc(s_alloc),
00107 infrequent_remover(infrequent_remover), coder(coder), decoder(decoder), fii(fii){}
00108
00110 void findFrequentItemsets(
00111 const counter_t emptyset_support,
00112 std::vector<counter_t>& freq_counters,
00113 std::vector< std::pair< counter_t, std::pair<item_t, item_t> > >&
00114 freq_pairs_with_counters,
00115 const counter_t min_supp,
00116 unsigned int maxsize = largest_itemsetsize);
00117
00118 };
00119
00133 template <class C, class D, class MAIN_DATA_STRUCTURE,
00134 class SEC_DATA_STRUCTURE, class FII, class FPI,
00135 class CG, class IR, class SUPP_C>
00136 void Apriori<C, D, MAIN_DATA_STRUCTURE, SEC_DATA_STRUCTURE,
00137 FII, FPI, CG, IR, SUPP_C>::
00138 findFrequentItemsets(
00139 const counter_t emptyset_support,
00140 std::vector<counter_t>& freq_counters,
00141 std::vector< std::pair< counter_t, std::pair<item_t, item_t> > >&
00142 freq_pairs_with_counters,
00143 const counter_t min_supp,
00144 unsigned int maxsize )
00145 {
00146 if(maxsize > 2)
00147 {
00148 FPI fpi(data_structure, decoder, s_alloc);
00149 log_status(0,"Setting the support of the emptyset in the trie.");
00150 fii.setEmptysetSupport(emptyset_support);
00151 log_status(0,"Inserting the support of the frequent items.");
00152 fii.insertFrequentItems(freq_counters);
00153 freq_counters.clear();
00154 std::vector<counter_t>().swap(freq_counters);
00155 log_status(0,"Inserting the frequent pairs and they supports.");
00156 fpi.insertFrequentPairs(freq_pairs_with_counters);
00157 freq_pairs_with_counters.clear();
00158 std::vector< std::pair< counter_t, std::pair<item_t, item_t> > >().
00159 swap(freq_pairs_with_counters);
00160
00161 CG candidate_generator(data_structure, decoder, s_alloc);
00162 SUPP_C support_counter(data_structure, coder);
00163 log_status(0,"Setting the size of the candidate to 3");
00164 candidate_size = 3;
00165
00166 log_status(0,"Generating candidates of size 3");
00167 candidate_generator.generateCandidate(candidate_size);
00168
00169 while( candidate_generator.isThereAnyCandidate() )
00170 {
00171 log_status(0,"Determining the supports");
00172 support_counter.countSupport(candidate_size);
00173 log_status(0,"Deleting infrequent candidates");
00174 infrequent_remover.deleteInfrequent(min_supp, candidate_size);
00175 ++candidate_size;
00176 log_status(0,"Setting the candidatesize counter to %d",
00177 candidate_size);
00178 if(maxsize < candidate_size)
00179 {
00180 log_status(0,"Maximal size threshold is reached.");
00181 break;
00182 }
00183 log_status(0,"Generating new candidates");
00184 candidate_generator.generateCandidate(candidate_size);
00185 }
00186 log_status(0,"No new candidates were generated.");
00187 candidate_generator.afterWorkCandGen();
00188 infrequent_remover.afterWorkDel();
00189 }
00190 else
00191 {
00192 for(std::vector<counter_t>::size_type index = 0;
00193 index != freq_counters.size(); ++index)
00194 {
00195 decoder.pushItemWithWrite( index, freq_counters[index] );
00196 decoder.popItem();
00197 }
00198 for(std::vector< std::pair< counter_t, std::pair<item_t, item_t> > >::iterator it
00199 = freq_pairs_with_counters.begin(); it != freq_pairs_with_counters.end(); ++it)
00200 {
00201
00202 decoder.pushItem((*it).second.first);
00203 decoder.pushItemWithWrite((*it).second.second, (*it).first);
00204 decoder.popItem();
00205 decoder.popItem();
00206 }
00207 }
00208 }
00209
00210 #endif