00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "Apriori_Trie.hpp"
00011 #include <cstdlib>
00012 #include <algorithm>
00013 #include <iostream>
00014
00015
00020 void Apriori_Trie::insert_frequent_items(
00021 const vector<countertype>& counters )
00022 {
00023 for(vector<countertype>::size_type item_index = 0;
00024 item_index < counters.size(); item_index++)
00025 main_trie.add_empty_state( item_index, counters[item_index] );
00026 }
00027
00032 void Apriori_Trie::candidate_generation(
00033 const itemtype& frequent_size,
00034 Input_Output_Manager& input_output_manager )
00035 {
00036 if( frequent_size == 1 ) candidate_generation_two();
00037 else
00038 {
00039 set<itemtype> maybe_candidate;
00040 candidate_generation_assist( &main_trie,
00041 maybe_candidate,
00042 input_output_manager );
00043 }
00044 }
00045
00053 void Apriori_Trie::find_candidate( const vector<itemtype>& basket,
00054 const itemtype candidate_size,
00055 const countertype counter_incr)
00056 {
00057 if( candidate_size != 2 )
00058 if ( candidate_size<basket.size()+1 )
00059 main_trie.find_candidate( basket.end()-candidate_size+1,
00060 basket.begin(), counter_incr );
00061 else;
00062 else find_candidate_two( basket, counter_incr );
00063 }
00064
00069 void Apriori_Trie::delete_infrequent( const double min_occurrence,
00070 const itemtype candidate_size )
00071 {
00072 if( candidate_size != 2 )
00073 main_trie.delete_infrequent( min_occurrence );
00074 else delete_infrequent_two( min_occurrence );
00075 }
00076
00077
00082 bool Apriori_Trie::is_all_subset_frequent(
00083 const set<itemtype>& maybe_candidate ) const
00084 {
00085 if( maybe_candidate.size() < 3) return true;
00086
00087 else
00088 {
00089 set<itemtype> temp_itemset(maybe_candidate);
00090 set<itemtype>::const_iterator item_it = --(--maybe_candidate.end());
00091 do
00092 {
00093 item_it--;
00094 temp_itemset.erase( *item_it );
00095 if( !main_trie.is_included( temp_itemset, temp_itemset.begin() ) )
00096 return false;
00097 temp_itemset.insert( *item_it );
00098 }
00099 while ( item_it != maybe_candidate.begin() );
00100 return true;
00101 }
00102 }
00103
00104 void Apriori_Trie::candidate_generation_two()
00105 {
00106 if( !main_trie.edgevector.empty() )
00107 {
00108 temp_counter_array.reserve(main_trie.edgevector.size()-1);
00109 temp_counter_array.resize(main_trie.edgevector.size()-1);
00110 for( vector<Edge>::size_type stateIndex = 0;
00111 stateIndex < main_trie.edgevector.size()-1; stateIndex++ )
00112 {
00113 temp_counter_array[stateIndex].reserve(
00114 main_trie.edgevector.size()-1-stateIndex );
00115 temp_counter_array[stateIndex].resize(
00116 main_trie.edgevector.size()-1-stateIndex, 0);
00117 }
00118 }
00119 }
00120
00121 void Apriori_Trie::candidate_generation_assist(
00122 Trie* trie,
00123 set<itemtype>& maybe_candidate,
00124 Input_Output_Manager& input_output_manager)
00125 {
00126 vector<Edge>::iterator itEdge = trie->edgevector.begin();
00127 if( (*itEdge).subtrie->edgevector.empty() )
00128 {
00129 vector<Edge>::iterator itEdge2;
00130 Trie* toExtend;
00131 while( itEdge != trie->edgevector.end() )
00132 {
00133 maybe_candidate.insert((*itEdge).label);
00134 toExtend = (*itEdge).subtrie;
00135 input_output_manager.write_out_basket_and_counter(
00136 maybe_candidate, toExtend->counter );
00137 for( itEdge2 = itEdge + 1; itEdge2 != trie->edgevector.end();
00138 itEdge2++ )
00139 {
00140 maybe_candidate.insert( (*itEdge2).label );
00141 if( is_all_subset_frequent( maybe_candidate) )
00142 toExtend->add_empty_state( (*itEdge2).label );
00143 maybe_candidate.erase( (*itEdge2).label );
00144 }
00145
00146
00147 (vector<Edge>(toExtend->edgevector)).swap(toExtend->edgevector);
00148
00149 maybe_candidate.erase((*itEdge).label);
00150 if( toExtend->edgevector.empty() )
00151 {
00152 delete (*itEdge).subtrie;
00153 itEdge = trie->edgevector.erase(itEdge);
00154 }
00155 else itEdge++;
00156 }
00157 }
00158 else
00159 {
00160 while( itEdge != trie->edgevector.end() )
00161 {
00162
00163 maybe_candidate.insert((*itEdge).label);
00164 candidate_generation_assist((*itEdge).subtrie,
00165 maybe_candidate, input_output_manager );
00166 maybe_candidate.erase((*itEdge).label);
00167 if((*itEdge).subtrie->edgevector.empty())
00168 {
00169 delete (*itEdge).subtrie;
00170 itEdge = trie->edgevector.erase(itEdge);
00171 }
00172 else itEdge++;
00173 }
00174 }
00175 }
00176
00183 void Apriori_Trie::find_candidate_two( const vector<itemtype>& basket,
00184 const countertype counter )
00185 {
00186 if( basket.size() > 1)
00187 {
00188 vector<itemtype>::const_iterator it1_basket,
00189 it2_basket;
00190
00191 for( it1_basket = basket.begin(); it1_basket != basket.end()-1;
00192 it1_basket++)
00193 for( it2_basket = it1_basket+1; it2_basket != basket.end();
00194 it2_basket++)
00195 temp_counter_array[*it1_basket][*it2_basket-*it1_basket-1]
00196 += counter;
00197 }
00198 }
00199
00203 void Apriori_Trie::delete_infrequent_two( const double min_occurrence )
00204 {
00205 vector<Edge>::size_type stateIndex_1,
00206 stateIndex_2;
00207 for( stateIndex_1 = 0; stateIndex_1 < main_trie.edgevector.size()-1;
00208 stateIndex_1++ )
00209 {
00210 for( stateIndex_2 = 0;
00211 stateIndex_2 < main_trie.edgevector.size() - 1 - stateIndex_1;
00212 stateIndex_2++ )
00213 {
00214 if( temp_counter_array[stateIndex_1][stateIndex_2] > min_occurrence )
00215 main_trie.edgevector[stateIndex_1].subtrie->add_empty_state(
00216 stateIndex_1 + stateIndex_2 + 1,
00217 temp_counter_array[stateIndex_1][stateIndex_2] );
00218 }
00219 temp_counter_array[stateIndex_1].clear();
00220 vector<countertype>().swap(temp_counter_array[stateIndex_1]);
00222 }
00223 temp_counter_array.clear();
00224
00225 vector< vector<countertype> >().swap(temp_counter_array);
00227 vector<Edge>::iterator it= main_trie.edgevector.begin();
00228 while( it!=main_trie.edgevector.end() )
00229 if((*it).subtrie->edgevector.empty())
00230 {
00231 delete (*it).subtrie;
00232 it = main_trie.edgevector.erase(it);
00233 }
00234 else it++;
00235 }