00001 #ifndef SeqFrequentFilter_CPP 00002 #define SeqFrequentFilter_CPP 00003 00008 #include "common.hpp" 00009 #include "common/log.h" 00010 #include <vector> 00011 00012 00018 template <class IT_R> 00019 class SeqFrequentFilter 00020 { 00021 public: 00022 SeqFrequentFilter<IT_R>(IT_R& it_r):it_r(it_r){} 00023 00029 void findFrequentItems( 00030 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters, 00031 counter_t& nr_of_transactions, counter_t& min_supp, 00032 bool relative=false, double relminsupp=0.0); 00033 private: 00034 IT_R& it_r; 00035 00036 00037 }; 00038 00047 template <class IT_R> void SeqFrequentFilter<IT_R>::findFrequentItems( 00048 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters, 00049 counter_t& nr_of_transactions, counter_t& min_supp, 00050 bool relative, double relminsupp) 00051 { 00052 freq_items_with_counters.clear(); 00053 nr_of_transactions = 0; 00054 it_r.rewind(); 00055 00056 std::vector<item_t> transaction; 00057 std::vector<bool> pattern; 00058 std::vector< counter_t > temp_counter_vector; 00059 00061 std::vector<item_t>::iterator it_transaction; 00062 item_t nr_of_items = 0; 00063 item_t sum_of_lengths = 0; 00064 while( it_r.nextTransactionBIS( transaction ) ) 00065 { 00066 if( !transaction.empty() ) 00067 { 00068 nr_of_transactions++; 00069 for( it_transaction = transaction.begin(); 00070 it_transaction != transaction.end(); ++it_transaction ) 00071 { 00072 if( *it_transaction + 1 > temp_counter_vector.size() ) 00073 { 00074 temp_counter_vector.resize( *it_transaction + 1, 0 ); 00075 pattern.resize( *it_transaction + 1, 1 ); 00076 } 00077 if(pattern[*it_transaction]) 00078 { 00079 ++temp_counter_vector[*it_transaction]; 00080 pattern[*it_transaction] = false; 00081 } 00082 } 00083 for( it_transaction = transaction.begin(); 00084 it_transaction != transaction.end(); ++it_transaction ) 00085 pattern[*it_transaction] = true; 00086 #if DEBUG_LEVEL >= LEVEL_DBG 00087 sum_of_lengths += transaction.size(); 00088 #endif 00089 } 00090 } 00091 it_r.setLargestItem( temp_counter_vector.size() - 1 ); 00092 log_info(0,"Largest itemcode: %d", temp_counter_vector.size() - 1); 00093 00094 if(relative) 00095 min_supp = static_cast<unsigned int>(relminsupp * nr_of_transactions); 00097 for( std::vector< counter_t >::size_type index = 0; 00098 index < temp_counter_vector.size(); ++index ) 00099 { 00100 if(temp_counter_vector[index] >= min_supp) 00101 { 00102 std::pair<counter_t, item_t> temp_pair( 00103 temp_counter_vector[index],index); 00104 freq_items_with_counters.push_back(temp_pair); 00105 } 00106 #if DEBUG_LEVEL >= LEVEL_DBG 00107 if(temp_counter_vector[index] ) 00108 ++nr_of_items; 00109 #endif 00110 } 00111 log_dbg(0,"Number of items: %d", nr_of_items); 00112 log_dbg(0,"Average of the transactions' sizes: %d", 00113 sum_of_lengths / nr_of_transactions); 00114 } 00115 00116 #endif