00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "MSApriori_Trie.hpp"
00011 #include <cstdlib>
00012 #include <algorithm>
00013 #include <iostream>
00014
00015
00020 MSApriori_Trie::MSApriori_Trie(const unsigned long counter_of_emptyset, const vector<double>& mis_abs ):
00021 main_trie( NULL, counter_of_emptyset),mis_abs(mis_abs)
00022 {
00023 }
00024
00030 void MSApriori_Trie::insert_frequent_items( const set< pair<itemtype, unsigned long> >& counters )
00031 {
00032 for(set< pair<itemtype, unsigned long> >::iterator it = counters.begin(); it != counters.end(); it++)
00033 main_trie.add_empty_state( (*it).first, (*it).second );
00034 if( !main_trie.edgevector.empty() ) main_trie.maxpath = 1;
00035 }
00036
00040 void MSApriori_Trie::candidate_generation( const itemtype& frequent_size )
00041 {
00042 if( frequent_size == 1 ) candidate_generation_two();
00043 else if( main_trie.maxpath == frequent_size )
00044 {
00045 set<itemtype> maybe_candidate;
00046 candidate_generation_assist( &main_trie, frequent_size-1, maybe_candidate );
00047 }
00048 }
00049
00057 void MSApriori_Trie::find_candidate( const vector<itemtype>& basket, const itemtype candidate_size,
00058 const unsigned long counter_incr)
00059 {
00060 if( candidate_size != 2 )
00061 main_trie.find_candidate( basket, candidate_size, basket.begin(), counter_incr );
00062 else find_candidate_two( basket, counter_incr );
00063 }
00064
00068 void MSApriori_Trie::delete_infrequent( const itemtype candidate_size )
00069 {
00070 if( candidate_size != 2 )
00071 {
00072 for( vector<Edge>::iterator itEdge = main_trie.edgevector.begin();
00073 itEdge != main_trie.edgevector.end(); itEdge++ )
00074 if( (*itEdge).subtrie->maxpath == candidate_size - 1 )
00075 (*itEdge).subtrie->delete_infrequent( mis_abs[(*itEdge).label], candidate_size - 2 );
00076 }
00077 else delete_infrequent_two( );
00078 }
00079
00084 void MSApriori_Trie::association( const double min_conf, Input_Output_Manager& input_output_manager ) const
00085 {
00086 input_output_manager << "\nAssociation rules:\ncondition ==> consequence (confidence, occurrence)\n";
00087 set<itemtype> consequence_part;
00088 assoc_rule_assist( min_conf, &main_trie, consequence_part, input_output_manager );
00089 }
00090
00091 itemtype MSApriori_Trie::longest_path() const
00092 {
00093 return main_trie.maxpath;
00094 }
00095
00096 void MSApriori_Trie::write_content_to_file( Input_Output_Manager& input_output_manager ) const
00097 {
00098 input_output_manager<< "Frequent 0-itemsets:\nitemset (occurrence)\n";
00099 input_output_manager<< "{} ("<< main_trie.counter << ')' << endl;
00100 for( itemtype item_size = 1; item_size < main_trie.maxpath+1; item_size++ )
00101 {
00102 input_output_manager<< "Frequent " << item_size << "-itemsets:\nitemset (occurrence)\n";
00103 set<itemtype> frequent_itemset;
00104 write_content_to_file_assist( input_output_manager, &main_trie, item_size, frequent_itemset );
00105 }
00106 }
00107
00108 void MSApriori_Trie::show_content_preorder( ) const
00109 {
00110 main_trie.show_content_preorder( );
00111 }
00112
00113
00114 MSApriori_Trie::~MSApriori_Trie()
00115 {
00116 }
00117
00122 bool MSApriori_Trie::is_all_subset_frequent( const set<itemtype>& maybe_candidate ) const
00123 {
00124 if( maybe_candidate.size() < 3) return true;
00125 else
00126 {
00127 set<itemtype> temp_itemset(maybe_candidate);
00128 set<itemtype>::const_iterator item_it = --(--maybe_candidate.end());
00129 set<itemtype>::const_iterator it_lower_bound = ++maybe_candidate.begin();
00130 while ( item_it != it_lower_bound )
00131 {
00132 item_it--;
00133 temp_itemset.erase( *item_it );
00134 if( !main_trie.is_included( temp_itemset, temp_itemset.begin() ) ) return false;
00135 temp_itemset.insert( *item_it );
00136 }
00137 if( mis_abs[*it_lower_bound] == mis_abs[*maybe_candidate.begin()] )
00138 {
00139 temp_itemset.erase( *maybe_candidate.begin() );
00140 if( main_trie.is_included( temp_itemset, temp_itemset.begin() ) ) return true;
00141 else return false;
00142 }
00143 else return true;
00144 }
00145 }
00146
00152 void MSApriori_Trie::find_candidate_two( const vector<itemtype>& basket, const unsigned long counter )
00153 {
00154 if(basket.size() > 1)
00155 {
00156 vector<itemtype>::const_iterator it1_basket,
00157 it2_basket;
00158
00159 for( it1_basket = basket.begin(); it1_basket != basket.end()-1; it1_basket++)
00160 for( it2_basket = it1_basket+1; it2_basket != basket.end(); it2_basket++)
00161 temp_counter_array[*it1_basket][*it2_basket-*it1_basket-1] += counter;
00162 }
00163 }
00164
00165 void MSApriori_Trie::candidate_generation_two( )
00166 {
00167 if( !mis_abs.empty() )
00168 {
00169 main_trie.maxpath = 2;
00170 temp_counter_array.reserve( mis_abs.size() - 1 );
00171 temp_counter_array.resize( mis_abs.size() - 1 );
00172 for( vector<Edge>::size_type stateIndex = 0; stateIndex < mis_abs.size() - 1; stateIndex++ )
00173 {
00174 temp_counter_array[stateIndex].reserve( mis_abs.size() - 1 - stateIndex );
00175 temp_counter_array[stateIndex].resize( mis_abs.size() - 1 - stateIndex, 0);
00176 }
00177 }
00178 }
00179
00180
00181 void MSApriori_Trie::candidate_generation_assist( Trie* trie, const itemtype distance_from_generator,
00182 set<itemtype>& maybe_candidate)
00183 {
00184 vector<Edge>::iterator itEdge = trie->edgevector.begin();
00185 if( distance_from_generator )
00186 {
00187 for( ; itEdge != trie->edgevector.end(); itEdge++ )
00188 if( (*itEdge).subtrie->maxpath + 1 >= distance_from_generator )
00189 {
00190 maybe_candidate.insert((*itEdge).label);
00191 candidate_generation_assist((*itEdge).subtrie, distance_from_generator - 1, maybe_candidate );
00192 maybe_candidate.erase((*itEdge).label);
00193 }
00194 }
00195 else
00196 {
00197 vector<Edge>::iterator itEdge2;
00198 Trie* toExtend;
00199 for( ; itEdge != trie->edgevector.end(); itEdge++ )
00200 {
00201 maybe_candidate.insert((*itEdge).label);
00202 toExtend = (*itEdge).subtrie;
00203 for( itEdge2 = itEdge + 1; itEdge2 != trie->edgevector.end(); itEdge2++ )
00204 {
00205 maybe_candidate.insert( (*itEdge2).label );
00206 if( is_all_subset_frequent( maybe_candidate) )
00207 toExtend->add_empty_state( (*itEdge2).label );
00208 maybe_candidate.erase( (*itEdge2).label );
00209 }
00210 if( !toExtend->edgevector.empty()) toExtend->maxpath = 1;
00211 (vector<Edge>(toExtend->edgevector)).swap(toExtend->edgevector);
00212 maybe_candidate.erase((*itEdge).label);
00213 }
00214 trie->max_path_set();
00215 }
00216 }
00217
00218 void MSApriori_Trie::delete_infrequent_two( )
00219 {
00220 vector<Edge>::size_type stateIndex_1,
00221 stateIndex_2;
00222 vector<Edge>::iterator it;
00223 for( stateIndex_1 = 0; stateIndex_1 < mis_abs.size()-1; stateIndex_1++ )
00224 {
00225 it = lower_bound(main_trie.edgevector.begin(),
00226 main_trie.edgevector.end(), stateIndex_1, Edge_point_less);
00227 for( stateIndex_2 = 0; stateIndex_2 < mis_abs.size() - 1 - stateIndex_1; stateIndex_2++ )
00228 if( temp_counter_array[stateIndex_1][stateIndex_2] > mis_abs[stateIndex_1] )
00229 (*it).subtrie->add_empty_state( stateIndex_1 + stateIndex_2 + 1,
00230 temp_counter_array[stateIndex_1][stateIndex_2] );
00231 temp_counter_array[stateIndex_1].clear();
00232 vector<unsigned long>().swap(temp_counter_array[stateIndex_1]);
00233 if( !(*it).subtrie->edgevector.empty() ) (*it).subtrie->maxpath = 1;
00234 }
00235 temp_counter_array.clear();
00236 vector< vector<unsigned long> >().swap(temp_counter_array);
00237 main_trie.max_path_set();
00238 }
00239
00240 void MSApriori_Trie::assoc_rule_find( const double min_conf, set<itemtype>& condition_part, set<itemtype>& consequence_part,
00241 const unsigned long union_support, Input_Output_Manager& input_output_manager ) const
00242 {
00243 itemtype item;
00244 for( set<itemtype>::const_iterator item_it = consequence_part.begin(); item_it != consequence_part.end(); item_it++)
00245 if( condition_part.empty() || *(--condition_part.end()) < *item_it)
00246 {
00247 item = *item_it;
00248 consequence_part.erase( item );
00249 condition_part.insert( item );
00250 if( union_support > main_trie.is_included(condition_part, condition_part.begin() )->counter * min_conf)
00251 {
00252 input_output_manager<< endl;
00253 input_output_manager.write_out_basket(condition_part);
00254 input_output_manager<< "==> ";
00255 input_output_manager.write_out_basket(consequence_part);
00256 input_output_manager<< "("<<((double) union_support) / main_trie.is_included(condition_part, condition_part.begin())->counter << ", " << union_support << ')';
00257 }
00258 else if( consequence_part.size() > 1 ) assoc_rule_find( min_conf, condition_part, consequence_part, union_support, input_output_manager );
00259 item_it = (consequence_part.insert( item )).first;
00260 condition_part.erase( item );
00261 }
00262 }
00263
00264 void MSApriori_Trie::assoc_rule_assist( const double min_conf, const Trie* trie, set<itemtype>& consequence_part, Input_Output_Manager& input_output_manager) const
00265 {
00266 if( consequence_part.size() > 1 )
00267 {
00268 set<itemtype> condition_part;
00269 assoc_rule_find( min_conf, condition_part, consequence_part, trie->counter, input_output_manager );
00270 }
00271 for( vector<Edge>::const_iterator it_item = trie->edgevector.begin(); it_item != trie->edgevector.end(); it_item++)
00272 {
00273 consequence_part.insert( (*it_item).label );
00274 assoc_rule_assist( min_conf, (*it_item).subtrie, consequence_part, input_output_manager);
00275 consequence_part.erase( (*it_item).label );
00276 }
00277 }
00278 void MSApriori_Trie::write_content_to_file_assist( Input_Output_Manager& input_output_manager, const Trie* trie, const itemtype distance_from_frequent, set<itemtype>& frequent_itemset ) const
00279 {
00280 if( distance_from_frequent )
00281 {
00282 for( vector<Edge>::const_iterator it = trie->edgevector.begin(); it != trie->edgevector.end(); it++ )
00283 if( (*it).subtrie->maxpath + 1 >= distance_from_frequent )
00284 {
00285 frequent_itemset.insert( (*it).label );
00286 write_content_to_file_assist( input_output_manager, (*it).subtrie, distance_from_frequent -1, frequent_itemset );
00287 frequent_itemset.erase( (*it).label );
00288 }
00289 }
00290 else input_output_manager.write_out_basket_and_counter( frequent_itemset, trie->counter );
00291 }
00292