00001
00002
00003
00004
00005
00006
00007
00008
00009
00014 #include "Input_Output_Manager.hpp"
00015 #include <algorithm>
00016
00017 Input_Output_Manager::Input_Output_Manager( ifstream& basket_file,
00018 const char* output_file_name ):
00019 ofstream(output_file_name), basket_file(basket_file)
00020 {
00021 }
00022
00027 int Input_Output_Manager::read_in_a_line( set<itemtype>& basket )
00028 {
00029 if( basket_file.eof() ) return 0;
00030 char c;
00031 itemtype pos;
00032
00033 basket.clear();
00034 do
00035 {
00036 int item = 0;
00037 pos = 0;
00038 basket_file.get(c);
00039 while(basket_file.good() && (c >= '0') && (c <= '9'))
00040 {
00041 item *= 10;
00042 item += int(c)-int('0');
00043 basket_file.get(c);
00044 pos++;
00045 }
00046 if( pos ) basket.insert( (itemtype) item );
00047 }
00048 while( !basket_file.eof() && c != '\n' );
00049 return 1;
00050 }
00051
00059 countertype Input_Output_Manager::find_frequent_items(
00060 const double min_supp, vector<countertype>& support_of_items )
00061 {
00062 countertype basket_number = 0;
00063 set<itemtype> basket;
00064 vector< countertype > temp_counter_vector;
00065
00067 set<itemtype>::iterator it_basket;
00068 while( read_in_a_line( basket ) )
00069 {
00070 if( !basket.empty() )
00071 {
00072 basket_number++;
00073 for( it_basket = basket.begin(); it_basket != basket.end();
00074 it_basket++ )
00075 {
00076 if( *it_basket + 1 > temp_counter_vector.size() )
00077 temp_counter_vector.resize( *it_basket + 1, 0 );
00078 temp_counter_vector[*it_basket]++;
00079 }
00080 }
00081 }
00082
00084 double min_occurrence = min_supp * (basket_number - 0.5);
00085 vector<countertype>::size_type edgeIndex;
00086
00087 set< pair<countertype, itemtype> > temp_set;
00088 for( edgeIndex = 0; edgeIndex < temp_counter_vector.size();
00089 edgeIndex++ )
00090 if( temp_counter_vector[edgeIndex] > min_occurrence )
00091 temp_set.insert(
00092 pair<countertype, itemtype>(temp_counter_vector[edgeIndex],
00093 edgeIndex));
00094
00095 new_code_inverse.clear();
00096 support_of_items.clear();
00097 for(set< pair<countertype, itemtype> >::iterator it = temp_set.begin();
00098 it != temp_set.end(); it++)
00099 {
00100 new_code_inverse.push_back((*it).second);
00101 support_of_items.push_back((*it).first);
00102 }
00103
00104
00105 vector<itemtype>(new_code_inverse).swap(new_code_inverse);
00106 vector<countertype>(support_of_items).swap(support_of_items);
00107
00108 new_code.reserve( temp_counter_vector.size() + 1 );
00109 new_code.resize( temp_counter_vector.size() + 1, 0 );
00110 for( edgeIndex = 0; edgeIndex < new_code_inverse.size(); edgeIndex++ )
00111 new_code[new_code_inverse[edgeIndex]] = edgeIndex+1;
00112 return basket_number;
00113 }
00118 void Input_Output_Manager::basket_recode(
00119 const set<itemtype>& original_basket, vector<itemtype>& new_basket )
00120 {
00121 new_basket.clear();
00122 for( set<itemtype>::const_iterator it_basket = original_basket.begin();
00123 it_basket != original_basket.end(); it_basket++ )
00124 if( new_code[*it_basket] ) new_basket.push_back( new_code[*it_basket]-1 );
00125 sort( new_basket.begin(), new_basket.end() );
00126 }
00127
00128 void Input_Output_Manager::write_out_basket(const set<itemtype>& basket)
00129 {
00130 for( set<itemtype>::const_iterator it_item = basket.begin();
00131 it_item != basket.end(); it_item++)
00132 {
00133 operator<<( new_code_inverse[*it_item] );
00134 put(' ');
00135 }
00136 }
00137
00138 void Input_Output_Manager::write_out_basket_and_counter(
00139 const set<itemtype>& itemset, const countertype counter)
00140 {
00141 for( set<itemtype>::const_iterator it_item = itemset.begin();
00142 it_item != itemset.end(); it_item++)
00143 {
00144 operator<<( new_code_inverse[*it_item] );
00145 put(' ');
00146 }
00147 put('(');
00148 operator<<(counter);
00149 write(")\n",2);
00150 }
00151
00152 void Input_Output_Manager::rewind()
00153 {
00154 basket_file.clear();
00155 basket_file.seekg(0, ios::beg);
00156 }
00157
00158 Input_Output_Manager::~Input_Output_Manager()
00159 {
00160 close();
00161 }