00001
00002
00003
00004
00005
00006
00007
00008
00009 #include "MSApriori.hpp"
00010 #include <iostream>
00011 #include <vector>
00012 #include <set>
00013
00014
00015 using namespace std;
00016
00021 void MSApriori::support( const itemtype& candidate_size )
00022 {
00023 set<itemtype> basket;
00024 vector<itemtype> basket_v;
00025 if( store_input )
00026 {
00027 if (candidate_size == 2)
00028 {
00029 while( input_output_manager.read_in_a_line( basket ) )
00030 {
00031 input_output_manager.basket_recode( basket, basket_v );
00032 if (basket_v.size()>1) reduced_baskets[basket_v]++;
00033 }
00034 }
00035 for (map<vector<itemtype>,unsigned long>::iterator it = reduced_baskets.begin();
00036 it!=reduced_baskets.end();it++)
00037 msapriori_trie->find_candidate(it->first,candidate_size,it->second);
00038 }
00039 else while( input_output_manager.read_in_a_line( basket ) )
00040 {
00041 input_output_manager.basket_recode(basket, basket_v);
00042 msapriori_trie->find_candidate(basket_v,candidate_size);
00043 }
00044 }
00045
00054 MSApriori::MSApriori( ifstream& basket_file, const char* output_file_name,
00055 const bool store_input):
00056 input_output_manager(basket_file, output_file_name ),
00057 store_input(store_input)
00058 {
00059 }
00060
00068 void MSApriori::MSAPRIORI_alg( ifstream& mis_file, const double min_conf,
00069 const bool quiet, const unsigned long size_threshold )
00070 {
00071 vector<double> mis_abs;
00072 if(!quiet) cout<<endl<<"\t\tFinding frequent itemsets..."<<endl<<endl;
00073 itemtype candidate_size=1;
00074 itemtype longest_path,longest_path_after_delete=1;
00075 if(!quiet)
00076 cout<<endl<<"Determining the support of the items and deleting infrequent ones!"<<endl;
00077 set< pair<itemtype, unsigned long> > support_of_items;
00078 unsigned long basket_number = input_output_manager.find_frequent_items( mis_file,
00079 support_of_items, mis_abs );
00080 msapriori_trie = new MSApriori_Trie( basket_number, mis_abs );
00081 msapriori_trie->insert_frequent_items( support_of_items );
00082
00083
00084
00085 longest_path_after_delete = msapriori_trie->longest_path();
00086
00087
00088 longest_path = msapriori_trie->longest_path();
00089 candidate_size++;
00090 if(!quiet) cout<<endl<<"Genarating "<<candidate_size<<"-itemset candidates!"<<endl;
00091 msapriori_trie->candidate_generation( candidate_size-1 );
00092
00093
00094 while( longest_path < msapriori_trie->longest_path() )
00095 {
00096 input_output_manager.rewind();
00097 if(!quiet) cout<<"Determining the support of the "<<candidate_size<<"-itemset candidates!"<<endl;
00098 support( candidate_size );
00099
00100
00101 if(!quiet) cout<<"Deleting infrequent itemsets!"<<endl;
00102 msapriori_trie->delete_infrequent( candidate_size);
00103 longest_path_after_delete = msapriori_trie->longest_path();
00104
00105
00106 if (candidate_size == size_threshold )
00107 {
00108 if(!quiet) cout<<"Size threshold is reached!"<<endl;
00109 break;
00110 }
00111 longest_path = msapriori_trie->longest_path();
00112 candidate_size++;
00113 if( !quiet ) cout<<endl<<"Genarating "<<candidate_size<<"-itemset candidates!"<<endl;
00114 msapriori_trie->candidate_generation(candidate_size-1);
00115
00116
00117 }
00118 msapriori_trie->write_content_to_file( input_output_manager );
00119 if (min_conf)
00120 {
00121 if(!quiet) cout<<"\nGenerating association rules...!\n";
00122 msapriori_trie->association( min_conf, input_output_manager );
00123 }
00124 if(!quiet) cout<<"\nMining is done!\n";
00125 }
00126
00127 MSApriori::~MSApriori()
00128 {
00129 delete msapriori_trie;
00130 }
00131