00001 #include "common.hpp"
00002 #include "io/input/transaction_reader/brBufferedTransactionReader.hpp"
00003 #include "io/input/transaction_reader/SortedTransactionReader.hpp"
00004
00005 #include "io/output/BufferedOutput.hpp"
00006 #include "io/codec/decoder/df/DFDecoderWithEEManagement.hpp"
00007
00008 #include "util/StreamParser.hpp"
00009 #include "util/FrequentFilter.cpp"
00010
00011 #include "datastructures/maxvector.hpp"
00012
00013 #include "test/apriori/bodon/LeafRepresentationSelector.hpp"
00014
00015 #include "datastructures/trie/edgelist/OrderedEdgelist.hpp"
00016 #include "apriori/bodon/Trie.hpp"
00017 #include "apriori/bodon/TrieNEE.hpp"
00018
00019 #include <vector>
00020 #include <iostream>
00021 #include <string>
00022
00023
00024 std::string file_format;
00025
00026 void init()
00027 {
00028 file_format = "File format:";
00029 file_format += "\n\nThe transactionfile is a plan text file. Each row ";
00030 file_format += "represents a transaction. \n";
00031 file_format += "A transaction is a set of items seperated by a nonnumeric ";
00032 file_format += "character.\nIt can be for example a white space, comma, ";
00033 file_format += "colon, etc.\n";
00034 file_format += "Items are nonnegative integers.\n";
00035 }
00037 void usage()
00038 {
00039 std::cerr << "\nUsage: findLeafRepresentation leaf_type transactionfile min_supp ";
00040 std::cerr << "outcomefile [options]\n";
00041 std::cerr << "\n leaf_tyep\t the type of the leaf, i.e: homo,";
00042 std::cerr << " inhomo_new, inhomo_alloc_1K, inhomo_alloc_8K, inhomo_alloc_64K, inhomo_alloc_512K \n ";
00043 std::cerr << "\n transactionfile file, that contains the transactions of items";
00044 std::cerr << "\n outcomefile\t file to write the outcome";
00045 std::cerr << "\n min_supp\t absolute support threshold";
00046
00047 std::cerr << file_format;
00048 std::cerr << "\n\nHave a succesful mining ;-)"<<std::endl<<std::endl;
00049 }
00050
00062 int process_arguments( int argc, char *argv[],
00063 counter_t& min_supp, bool &isrel, double &relminsupp )
00064 {
00065 if ( argc < 5 )
00066 {
00067 usage();
00068 log_err(0,"There are 5 mandatory arguments.");
00069 return 2;
00070 }
00071 std::string mins=argv[3];
00072 if (mins[mins.size()-1]=='%') {
00073 mins.erase(mins.size()-1);
00074 isrel=true;
00075 relminsupp=atof(mins.c_str());
00076 relminsupp/=100;
00077 log_info(0,"Using relative minimum support of %lg",relminsupp);
00078 return 0;
00079 }
00080 isrel=false;
00081
00082 int min_supp_i;
00083 try
00084 {
00085 convert(argv[3], min_supp_i);
00086 if ( min_supp_i <= 0 )
00087 {
00088 log_err(0,"%s cannot be converted to a positive integer.",argv[3]);
00089 return 3;
00090 }
00091 }
00092 catch(BadConversion e)
00093 {
00094 log_err(0,"min_supp conversion problem.");
00095 return 3;
00096 }
00097 min_supp = static_cast<counter_t>(min_supp_i);
00098 log_info(0,"min_supp is set to %d", min_supp);
00099 return 0;
00100 }
00101
00102 int main( int argc, char *argv[] )
00103 {
00104 init();
00105 counter_t min_supp;
00106 bool relative;
00107 double relminsupp;
00108
00109 {
00110 int return_val =
00111 process_arguments( argc, argv, min_supp, relative, relminsupp );
00112 if(return_val)
00113 return return_val;
00114 }
00115
00116 char* leaf_repr = argv[1];
00117 char* input_file = argv[2];
00118 char* output_file = argv[4];
00119
00120 try
00121 {
00122 typedef brBufferedTransactionReader< > T_R;
00123 T_R::params_t par_i;
00124 par_i.file_name = input_file;
00125 par_i.mode=FileReprBase::READ;
00126 par_i.file_buffer_size = 16 * 1024;
00127 T_R tr_reader(&par_i);
00128 std::vector< std::pair<counter_t, item_t> > freq_items_with_counters;
00129 counter_t nr_of_transactions;
00130 FrequentFilter<T_R>
00131 fr_filter(tr_reader);
00132 log_status(0,"Finding frequent items.");
00133 fr_filter.findFrequentItems( freq_items_with_counters,
00134 nr_of_transactions, min_supp);
00135
00136 log_status(0,"Doing decoder.");
00137 typedef DFDecoderWithEEManagement< > DF_D;
00138
00139 DF_D::params_t par_d;
00140 par_d.file_name = output_file;
00141 par_d.mode=FileReprBase::WRITE;
00142 DF_D df_decoder(&par_d);
00143
00144 typedef Bodon::LeafWithoutConstructor LEAF_WC;
00145 typedef Bodon::Leaf LEAF;
00146 typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelist<> > TRIE_BASE;
00147 typedef Bodon::TrieNEE< TRIE_BASE > TRIE;
00148
00149
00150
00151
00152
00153
00154
00155
00156 if(!strcmp(leaf_repr,"homo"))
00157 {
00158 log_info(0,"homo representation is selected pruning is selected.");
00159 typedef NewWrapperAlloc<TRIE> LEAF_ALLOCATOR;
00160 LeafRepresentationSelector<TRIE, TRIE, LEAF_ALLOCATOR, T_R, DF_D>(
00161 min_supp, input_file, nr_of_transactions,
00162 freq_items_with_counters, tr_reader, df_decoder);
00163 }
00164 else if(!strcmp(leaf_repr,"inhomo_new"))
00165 {
00166 log_info(0,"inhomo_new representation is selected pruning is selected.");
00167 typedef NewWrapperAlloc<LEAF> LEAF_ALLOCATOR;
00168 LeafRepresentationSelector<TRIE, LEAF, LEAF_ALLOCATOR, T_R, DF_D>(
00169 min_supp, input_file, nr_of_transactions,
00170 freq_items_with_counters, tr_reader, df_decoder);
00171 }
00172 else if(!strcmp(leaf_repr,"inhomo_alloc_1K"))
00173 {
00174 log_info(0,"inhomo_alloc_1K representation is selected pruning is selected.");
00175 typedef bracz::singleualloc<LEAF_WC, 1024> LEAF_ALLOCATOR;
00176 LeafRepresentationSelector<TRIE, LEAF_WC, LEAF_ALLOCATOR, T_R, DF_D>(
00177 min_supp, input_file, nr_of_transactions,
00178 freq_items_with_counters, tr_reader, df_decoder);
00179 }
00180 else if(!strcmp(leaf_repr,"inhomo_alloc_8K"))
00181 {
00182 log_info(0,"inhomo_alloc_8K representation is selected pruning is selected.");
00183 typedef bracz::singleualloc<LEAF_WC, 8196> LEAF_ALLOCATOR;
00184 LeafRepresentationSelector<TRIE, LEAF_WC, LEAF_ALLOCATOR, T_R, DF_D>(
00185 min_supp, input_file, nr_of_transactions,
00186 freq_items_with_counters, tr_reader, df_decoder);
00187 }
00188 else if(!strcmp(leaf_repr,"inhomo_alloc_64K"))
00189 {
00190 log_info(0,"inhomo_alloc_64K representation is selected pruning is selected.");
00191 typedef bracz::singleualloc<LEAF_WC, 65536> LEAF_ALLOCATOR;
00192 LeafRepresentationSelector<TRIE, LEAF_WC, LEAF_ALLOCATOR, T_R, DF_D>(
00193 min_supp, input_file, nr_of_transactions,
00194 freq_items_with_counters, tr_reader, df_decoder);
00195 }
00196 else if(!strcmp(leaf_repr,"inhomo_alloc_512K"))
00197 {
00198 log_info(0,"inhomo_alloc_512K representation is selected pruning is selected.");
00199 typedef bracz::singleualloc<LEAF_WC, 524288> LEAF_ALLOCATOR;
00200 LeafRepresentationSelector<TRIE, LEAF_WC, LEAF_ALLOCATOR, T_R, DF_D>(
00201 min_supp, input_file, nr_of_transactions,
00202 freq_items_with_counters, tr_reader, df_decoder);
00203 }
00204 else
00205 {
00206 usage();
00207 log_err(0,"leaf_type should be either homo, inhomo_new, inhomo_alloc_1K, , inhomo_alloc_8K, inhomo_alloc_64K, inhomo_alloc_512K!");
00208 return 1;
00209 }
00210 }
00211 catch (std::ios_base::failure e)
00212 {
00213 log_err(0,"Exiting the program due to IO exception");
00214 return 1;
00215 }
00216 }
00217
00218