00001
00002 #include "common.hpp"
00003 #include "common/log.h"
00004
00005 #include "io/input/transaction_reader/brBufferedTransactionReader.hpp"
00006
00007 #include "io/codec/decoder/df/SimpleDFDecoder.hpp"
00008
00009 #include "io/codec/decoder/df/DFDecoderWithEEManagement.hpp"
00010
00011 #include "io/output/normal/SortOutput.hpp"
00012
00013
00014 #include "util/StreamParser.hpp"
00015 #include "util/FrequentFilter.cpp"
00016
00017 #include "datastructures/maxvector.hpp"
00018 #include "datastructures/trie/edgelist/OrderedEdgelist.hpp"
00019
00020 #include "datastructures/trie/edgelist/OffsetIndexVector.hpp"
00021
00022
00023 #include "apriori/bodon/Leaf.hpp"
00024 #include "apriori/bodon/Trie.hpp"
00025 #include "apriori/bodon/TrieNEE.hpp"
00026
00027 #include "io/codec/coder/Coder.hpp"
00028
00029 #include "test/apriori/bodon/AprioriSelector2.hpp"
00030
00031 #include <vector>
00032 #include <iostream>
00033 #include <string>
00034
00035 std::string file_format;
00036
00037 void init()
00038 {
00039 file_format = "File format:";
00040 file_format += "\n\nThe transactionfile is a plan text file. Each row ";
00041 file_format += "represents a transaction. ";
00042 file_format += "A transaction is a set of items seperated by a nonnumeric ";
00043 file_format += "character.\nIt can be for example a white space, comma, ";
00044 file_format += "colon, etc.\n";
00045 file_format += "Items are nonnegative integers.\n";
00046 }
00048 void usage()
00049 {
00050 cerr<< "Usage: tr_cache tr_cache_option transactionfile min_supp outcomefile ordering_option nee_option [options]\n\n";
00051 cerr<<" tr_cache_option option of transaction caching, \n\t\t i.e: off, patricia, rb-tree-simul,";
00052 cerr<<" \n\t\t rb-tree-insert-clear-simple, rb-tree-insert-clear-pro \n";
00053 cerr<<" transactionfile file, that contains the tranasctions of items\n";
00054 cerr<<" min_supp\t absolute support threshold\n";
00055 cerr<<" outcomefile\t file to write the outcome\n";
00056 cerr<<" ordering_option the ordering to be used, i.e: ASC, DESC\n";
00057 cerr<<" nee_option\t equisupport pruning option, i.e: off, level3\n";
00058
00059 std::cerr << file_format;
00060 cerr<<"\t\t\tHave a succesful mining ;-)\n\n";
00061 }
00062
00074 int process_arguments( int argc, char *argv[], counter_t& min_supp,
00075 bool &isrel, double &relminsupp, unsigned int& maxsize )
00076 {
00077 if ( argc < 7 )
00078 {
00079 log_err(0,"There are 6 mandatory arguments.");
00080 usage();
00081 return 2;
00082 }
00083 std::string mins=argv[3];
00084 if (mins[mins.size()-1]=='%') {
00085 mins.erase(mins.size()-1);
00086 isrel=true;
00087 relminsupp=atof(mins.c_str());
00088 relminsupp/=100;
00089 log_info(0,"Using relative minimum support of %lg",relminsupp);
00090 return 0;
00091 }
00092 isrel=false; relminsupp=0;
00093 int min_supp_i;
00094 try
00095 {
00096 convert(argv[3], min_supp_i);
00097 if ( min_supp_i <= 0 )
00098 {
00099 log_err(0,"%s cannot be converted to a positive integer.",argv[3]);
00100 return 3;
00101 }
00102 }
00103 catch(BadConversion e)
00104 {
00105 log_err(0,"min_supp conversion problem.");
00106 return 3;
00107 }
00108 min_supp = static_cast<counter_t>(min_supp_i);
00109 log_info(0,"min_supp is set to %d", min_supp);
00110 if(argc == 8)
00111 {
00112 int maxsize_i;
00113 try
00114 {
00115 convert(argv[7], maxsize_i);
00116 if ( maxsize_i <= 0 )
00117 {
00118 log_err(0,"%s cannot be converted to a positive integer.",argv[6]);
00119 return 4;
00120 }
00121 }
00122 catch(BadConversion e)
00123 {
00124 log_err(0,"max_size conversion problem.");
00125 return 4;
00126 }
00127 maxsize = static_cast<unsigned int>(maxsize_i);
00128 log_status(0,"maxsize is set to %d", maxsize);
00129 }
00130 else
00131 maxsize = largest_itemsetsize;
00132 return 0;
00133 }
00134
00135 int main( int argc, char *argv[] )
00136 {
00137 init();
00138 counter_t min_supp;
00139 unsigned int maxsize;
00140 bool relative;
00141 double relminsupp;
00142
00143 {
00144 int return_val =
00145 process_arguments( argc, argv, min_supp,
00146 relative, relminsupp, maxsize );
00147 if(return_val)
00148 return return_val;
00149 }
00150
00151 char *trcache_option=argv[1];
00152 char* input_file = argv[2];
00153 char* output_file = argv[4];
00154 char* ordering_option = argv[5];
00155 char* nee_option = argv[6];
00156
00157 try
00158 {
00159
00160 typedef brBufferedTransactionReader< > T_R;
00161
00162
00163
00164 T_R::params_t par_i;
00165 par_i.file_name = input_file;
00166 par_i.mode=FileReprBase::READ;
00167 par_i.file_buffer_size = 16 * 1024;
00168 T_R tr_reader(&par_i);
00169 std::vector< std::pair<counter_t, item_t> > freq_items_with_counters;
00170 counter_t nr_of_transactions;
00171
00172 FrequentFilter<T_R>
00173 fr_filter(tr_reader);
00174 log_status(0,"Finding frequent items.");
00175 fr_filter.findFrequentItems( freq_items_with_counters,
00176 nr_of_transactions, min_supp);
00177
00178 if(!freq_items_with_counters.empty())
00179 {
00180 log_status(0,"Doing decoder.");
00181 typedef DFDecoderWithEEManagement< > DF_D;
00182 DF_D::params_t par_d;
00183 par_d.file_name = output_file;
00184 par_d.mode=FileReprBase::WRITE;
00185
00186 DF_D df_decoder(&par_d);
00187
00188 log_status(0,"APRIORI is selected");
00189 log_status(0,"deadend pruning option is on.");
00190 typedef Bodon::LeafWithoutConstructor LEAF_WC;
00191 typedef Bodon::Leaf LEAF;
00192
00193 typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelist<std::vector<Edge> > > TRIE_OEL;
00194 typedef Bodon::Trie< LEAF, Bodon::OffsetIndexVector< std::vector<void*> > > TRIE_OI;
00195
00196 if(strstr( nee_option,"off"))
00197 {
00198 const NEELevel NEE = NEE_Off;
00199 log_status(0,"Equisupport pruning is disabled");
00200 TRIE_OEL main_trie;
00201 AprioriSelector2<TRIE_OEL, TRIE_OI, LEAF_WC, T_R, DF_D, NEE>(
00202 min_supp, input_file, nr_of_transactions,
00203 freq_items_with_counters, tr_reader, df_decoder,
00204 main_trie, trcache_option, ordering_option, maxsize);
00205 }
00206 else
00207 {
00208 const NEELevel NEE = NEE_Full;
00209 log_status(0,"Level 3 equisupport pruning is enabled");
00210 typedef Bodon::TrieNEE<TRIE_OEL> TRIENEE_OEL;
00211 typedef Bodon::TrieNEE<TRIE_OI> TRIENEE_OI;
00212 TRIENEE_OEL main_trie;
00213 AprioriSelector2<TRIENEE_OEL, TRIENEE_OI, LEAF_WC, T_R, DF_D, NEE>(
00214 min_supp, input_file, nr_of_transactions,
00215 freq_items_with_counters, tr_reader, df_decoder,
00216 main_trie, trcache_option, ordering_option, maxsize);
00217 }
00218 }
00219 }
00220 catch (std::ios_base::failure e)
00221 {
00222 log_err(0,"Exiting the program due to IO exception");
00223 return 1;
00224 }
00225 }
00226
00227