Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

Apriori.hpp

Go to the documentation of this file.
00001 
00011 #ifndef Apriori_HPP
00012 #define Apriori_HPP
00013 
00014 #include "common.hpp"
00015 #include "common/log.h"
00016 #include "common/allocators.hpp"
00017 #include <vector>
00018 
00081 template <class C, class D, class MAIN_DATA_STRUCTURE, 
00082           class SEC_DATA_STRUCTURE, class FII, class FPI, 
00083           class CG, class IR, class SUPP_C>
00084 class Apriori
00085 {
00086    private:
00088       MAIN_DATA_STRUCTURE& data_structure;
00090       SEC_DATA_STRUCTURE& s_alloc;
00092       IR& infrequent_remover;
00094       C& coder;
00096       D& decoder;
00097       //
00098       FII& fii;
00099 
00100       unsigned int candidate_size;
00101 
00102    public:
00103 
00104       Apriori(MAIN_DATA_STRUCTURE& data_structure, SEC_DATA_STRUCTURE& s_alloc, 
00105               IR& infrequent_remover, C& coder, D& decoder, FII& fii ) : 
00106          data_structure(data_structure), s_alloc(s_alloc), 
00107          infrequent_remover(infrequent_remover), coder(coder), decoder(decoder), fii(fii){}
00108 
00110       void findFrequentItemsets( 
00111          const counter_t emptyset_support,
00112          std::vector<counter_t>& freq_counters,
00113          std::vector< std::pair< counter_t, std::pair<item_t, item_t> > >& 
00114          freq_pairs_with_counters,
00115          const counter_t min_supp,
00116          unsigned int maxsize = largest_itemsetsize);
00117 
00118 };
00119 
00133 template <class C, class D, class MAIN_DATA_STRUCTURE, 
00134           class SEC_DATA_STRUCTURE, class FII, class FPI, 
00135           class CG, class IR, class SUPP_C>
00136 void Apriori<C, D, MAIN_DATA_STRUCTURE, SEC_DATA_STRUCTURE, 
00137              FII, FPI, CG, IR, SUPP_C>::
00138 findFrequentItemsets( 
00139    const counter_t emptyset_support,
00140    std::vector<counter_t>& freq_counters,
00141    std::vector< std::pair< counter_t, std::pair<item_t, item_t> > >& 
00142    freq_pairs_with_counters,
00143    const counter_t min_supp,
00144    unsigned int maxsize )
00145 {
00146    if(maxsize > 2)
00147    {
00148       FPI fpi(data_structure, decoder, s_alloc);
00149       log_status(0,"Setting the support of the emptyset in the trie.");
00150       fii.setEmptysetSupport(emptyset_support);
00151       log_status(0,"Inserting the support of the frequent items.");
00152       fii.insertFrequentItems(freq_counters);
00153       freq_counters.clear();
00154       std::vector<counter_t>().swap(freq_counters);
00155       log_status(0,"Inserting the frequent pairs and they supports.");
00156       fpi.insertFrequentPairs(freq_pairs_with_counters);
00157       freq_pairs_with_counters.clear();  
00158       std::vector< std::pair< counter_t, std::pair<item_t, item_t> > >().
00159          swap(freq_pairs_with_counters);
00160 
00161       CG candidate_generator(data_structure, decoder, s_alloc);
00162       SUPP_C support_counter(data_structure, coder);
00163       log_status(0,"Setting the size of the candidate to 3");
00164       candidate_size = 3;
00165 
00166       log_status(0,"Generating candidates of size 3");
00167       candidate_generator.generateCandidate(candidate_size);
00168    
00169       while( candidate_generator.isThereAnyCandidate()  )
00170       {
00171          log_status(0,"Determining the supports");
00172          support_counter.countSupport(candidate_size);
00173          log_status(0,"Deleting infrequent candidates");
00174          infrequent_remover.deleteInfrequent(min_supp, candidate_size);
00175          ++candidate_size;
00176          log_status(0,"Setting the candidatesize counter to %d", 
00177                     candidate_size);
00178          if(maxsize < candidate_size)
00179          {
00180             log_status(0,"Maximal size threshold is reached.");
00181             break;
00182          }
00183          log_status(0,"Generating new candidates");
00184          candidate_generator.generateCandidate(candidate_size);
00185       }
00186       log_status(0,"No new candidates were generated.");
00187       candidate_generator.afterWorkCandGen();
00188       infrequent_remover.afterWorkDel();
00189    }
00190    else
00191    {
00192       for(std::vector<counter_t>::size_type index = 0;
00193           index != freq_counters.size(); ++index)
00194       {
00195          decoder.pushItemWithWrite( index, freq_counters[index] );
00196          decoder.popItem();      
00197       }
00198       for(std::vector< std::pair< counter_t, std::pair<item_t, item_t> > >::iterator it 
00199              = freq_pairs_with_counters.begin(); it != freq_pairs_with_counters.end(); ++it)
00200       {
00201          // it could be solved more efficient... but I don't care ;)
00202          decoder.pushItem((*it).second.first);
00203          decoder.pushItemWithWrite((*it).second.second, (*it).first);
00204          decoder.popItem();
00205          decoder.popItem();
00206       }      
00207    }
00208 }
00209 
00210 #endif

Generated on Sun Sep 17 17:50:37 2006 for FIM environment by  doxygen 1.4.4