00001
00002
00003
00004
00005
00006
00007
00008
00009 #include "Apriori.hpp"
00010 #include <iostream>
00011 #include <vector>
00012 #include <set>
00013 #include <cmath>
00014
00015 using namespace std;
00016
00021 void Apriori::support( const itemtype& candidate_size )
00022 {
00023 set<itemtype> basket;
00024 vector<itemtype> basket_v;
00025 if( store_input )
00026 {
00027 if (candidate_size == 2)
00028 {
00029 while( input_output_manager.read_in_a_line( basket ) )
00030 {
00031 input_output_manager.basket_recode( basket, basket_v );
00032 if (basket_v.size()>1) reduced_baskets[basket_v]++;
00033 }
00034 }
00035 for (map<vector<itemtype>, countertype>::iterator it =
00036 reduced_baskets.begin(); it!=reduced_baskets.end();it++)
00037 apriori_trie->find_candidate(it->first,candidate_size,it->second);
00038 }
00039 else while( input_output_manager.read_in_a_line( basket ) )
00040 {
00041 input_output_manager.basket_recode(basket, basket_v);
00042 apriori_trie->find_candidate(basket_v,candidate_size);
00043 }
00044 }
00045
00055 void Apriori::APRIORI_alg( const double min_supp,
00056 const bool quiet,
00057 const unsigned int size_threshold )
00058 {
00059 countertype basket_number;
00060 if(!quiet) cout<<endl<<"\t\tFinding frequent itemsets..."<<endl<<endl;
00061 itemtype candidate_size=1;
00062 if(!quiet)
00063 {
00064 cout<<endl<<"Determining the support of the items";
00065 cout<<" and deleting infrequent ones!"<<endl;
00066 }
00067 vector<countertype> support_of_items;
00068 basket_number = input_output_manager.find_frequent_items(
00069 min_supp, support_of_items );
00070 input_output_manager<< "Frequent 0-itemsets:\nitemset (occurrence)\n";
00071 input_output_manager<< "{} ("<< basket_number << ")\n";
00072 input_output_manager<< "Frequent " << 1;
00073 input_output_manager<< "-itemsets:\nitemset (occurrence)\n";
00074 set<itemtype> temp_set;
00075 for(vector<countertype>::size_type index = 0;
00076 index < support_of_items.size(); index++)
00077 {
00078 temp_set.insert(index);
00079 input_output_manager.write_out_basket_and_counter(
00080 temp_set, support_of_items[index] );
00081 temp_set.erase(temp_set.begin());
00082 }
00083
00084 apriori_trie = new Apriori_Trie( basket_number );
00085 apriori_trie->insert_frequent_items( support_of_items );
00086
00087
00088
00089 double min_supp_abs = min_supp * (basket_number - 0.5);
00090
00091
00092 candidate_size++;
00093 if(!quiet)
00094 {
00095 cout<<endl<<"Genarating "<<candidate_size;
00096 cout<<"-itemset candidates!"<<endl;
00097 }
00098 apriori_trie->candidate_generation(candidate_size-1,
00099 input_output_manager);
00100
00101
00102 while( apriori_trie->is_there_any_candidate() )
00103 {
00104 input_output_manager.rewind();
00105 if(!quiet)
00106 {
00107 cout<<"Determining the support of the "<<candidate_size;
00108 cout<<"-itemset candidates!"<<endl;
00109 }
00110 support( candidate_size );
00111
00112
00113 if(!quiet) cout<<"Deleting infrequent itemsets!"<<endl;
00114 apriori_trie->delete_infrequent(min_supp_abs, candidate_size);
00115
00116
00117 if (candidate_size == size_threshold )
00118 {
00119 if(!quiet) cout<<"Size threshold is reached!"<<endl;
00120 break;
00121 }
00122 candidate_size++;
00123 if( !quiet )
00124 {
00125 cout<<endl<<"Genarating "<<candidate_size;
00126 cout<<"-itemset candidates!"<<endl;
00127 }
00128 input_output_manager<< "Frequent " << candidate_size-1;
00129 input_output_manager<< "-itemsets:\nitemset (occurrence)\n";
00130 apriori_trie->candidate_generation(candidate_size-1,
00131 input_output_manager);
00132
00133
00134 }
00135 if(!quiet) cout<<endl<<"Mining is done!"<<endl;
00136 }