00001 #ifndef CandGenInfreqRemoveNopruneMerge_HPP
00002 #define CandGenInfreqRemoveNopruneMerge_HPP
00003
00004
00005 #include "common.hpp"
00006 #include "common/Edge.hpp"
00007 #include "common/log.h"
00008 #include "apriori/bodon/trie/trie_manipulators/CandidateGeneratorNoprune.hpp"
00009 #include <vector>
00010
00011
00012
00013
00014
00015
00016 namespace Bodon
00017 {
00018 namespace trie
00019 {
00020 template <class DF_D, class TRIE, bool NEE>
00021 class CandGenInfreqRemoveNopruneMerge :
00022 public CandidateGeneratorNoprune<DF_D, TRIE, NEE>
00023 {
00024 protected:
00025 unsigned int nr_of_deleted;
00026 unsigned int total_nr_of_deleted;
00027 typedef CandidateGeneratorNoprune<DF_D, TRIE, NEE> PARENT;
00028 public:
00029 CandGenInfreqRemoveNopruneMerge( TRIE& main_trie, DF_D& df_decoder ) :
00030 PARENT(main_trie, df_decoder),
00031 total_nr_of_deleted(0){}
00032
00033
00039 void generateCandidate(unsigned int candidate_size);
00040
00041 void deleteInfrequent( const counter_t min_supp,
00042 unsigned int candidate_size );
00043 void afterWorkDel();
00044 protected:
00045
00046 void delete_infrequent_subtrie( TRIE* subtrie,
00047 const counter_t min_supp,
00048 unsigned int step_to_cand_par );
00049
00050 void delete_infrequent_subtrieNEE( TRIE* subtrie,
00051 const counter_t min_supp,
00052 std::vector<item_t>& NEEsum,
00053 unsigned int step_to_cand_par );
00054
00055
00056
00057 public:
00058
00059 };
00060
00061
00062 template <class DF_D, class TRIE, bool NEE>
00063 void CandGenInfreqRemoveNopruneMerge<DF_D, TRIE, NEE>::
00064 generateCandidate(const unsigned int candidate_size)
00065 {
00066 if(candidate_size == 3)
00067 {
00068 PARENT::nr_of_new_candidates = 0;
00069 if(NEE > NEE_Off)
00070 {
00071 std::vector<item_t> NEEsum;
00072 generateCandidateFindParentNEE( &this->main_trie, NEEsum,
00073 candidate_size -2 );
00074 }
00075 else
00076 generateCandidateFindParent( &this->main_trie,
00077 candidate_size -2 );
00078 log_dbg(0,"Number of newly generated candidates: %d", PARENT::nr_of_new_candidates);
00079 #if DEBUG_LEVEL >= LEVEL_DBG
00080 nr_of_all_candidates += nr_of_new_candidates;
00081 #endif
00082 }
00083 else
00084 log_dbg(0,"This algorithm does not generate any candidate in this phase.");
00085
00086 }
00087
00088 template <class DF_D, class TRIE, bool NEE>
00089 void CandGenInfreqRemoveNopruneMerge<DF_D, TRIE, NEE>::
00090 deleteInfrequent( const counter_t min_supp, unsigned int candidate_size )
00091 {
00092 assert(candidate_size > 1);
00093 nr_of_deleted = 0;
00094 PARENT::nr_of_new_candidates = 0;
00095 if(NEE > NEE_Off)
00096 {
00097 std::vector<item_t> NEEsum;
00098 delete_infrequent_subtrieNEE( &this->main_trie, min_supp, NEEsum,
00099 candidate_size - 1 );
00100 }
00101 else
00102 delete_infrequent_subtrie( &this->main_trie, min_supp,
00103 candidate_size - 1 );
00104 log_dbg(0,"Number of infrequent candidates: %d", nr_of_deleted);
00105 log_dbg(0,"Number of newly generated candidates (in del. phase!): %d",
00106 PARENT::nr_of_new_candidates);
00107 #if DEBUG_LEVEL >= LEVEL_DBG
00108 nr_of_all_candidates += nr_of_new_candidates;
00109 total_nr_of_deleted += nr_of_deleted;
00110 #endif
00111 }
00112
00113 template <class DF_D, class TRIE, bool NEE>
00114 void CandGenInfreqRemoveNopruneMerge<DF_D, TRIE, NEE>::
00115 afterWorkDel()
00116 {
00117 log_dbg(0,"The total number of generated candidates (in del. p.): %d",
00118 PARENT::nr_of_all_candidates);
00119 log_dbg(0,"The total number of infrequent candidates: %d",
00120 total_nr_of_deleted);
00121 }
00122
00123 template <class DF_D, class TRIE, bool NEE>
00124 void CandGenInfreqRemoveNopruneMerge<DF_D, TRIE, NEE>::
00125 delete_infrequent_subtrie( TRIE* subtrie, const counter_t min_supp,
00126 unsigned int step_to_cand_par)
00127 {
00128 typename TRIE::iterator itEdge( subtrie->edgelist.begin() );
00129 if(step_to_cand_par)
00130 {
00131 --step_to_cand_par;
00132 while( itEdge != subtrie->edgelist.end() )
00133 {
00134 PARENT::df_decoder.pushItem( (*itEdge).first );
00135 delete_infrequent_subtrie( static_cast<TRIE*>((*itEdge).second), min_supp,
00136 step_to_cand_par);
00137 PARENT::df_decoder.popItem();
00138 if( static_cast<TRIE*>((*itEdge).second)->edgelist.empty() )
00139 {
00140 delete static_cast<TRIE*>((*itEdge).second);
00141 itEdge = subtrie->edgelist.erase(itEdge);
00142 }
00143 else ++itEdge;
00144 }
00145 }
00146 else
00147 {
00148 while( itEdge != subtrie->edgelist.end() )
00149 {
00150 if( static_cast<TRIE*>((*itEdge).second)->getCounter() < min_supp )
00151 {
00152 #if DEBUG_LEVEL >= LEVEL_DBG
00153 ++nr_of_deleted;
00154 #endif
00155 delete static_cast<TRIE*>((*itEdge).second);
00156 itEdge = subtrie->edgelist.erase(itEdge);
00157 }
00158 else
00159 ++itEdge;
00160 }
00161 generateCandidateAtParent(subtrie);
00162 #if DEBUG_LEVEL >= LEVEL_DBG
00163 for( typename TRIE::iterator it( subtrie->edgelist.begin() );
00164 it != subtrie->edgelist.end(); ++it)
00165 nr_of_new_candidates +=
00166 static_cast<TRIE*>((*it).second)->edgelist.size();
00167 #endif
00168 }
00169 }
00170 template <class DF_D, class TRIE, bool NEE>
00171 void CandGenInfreqRemoveNopruneMerge<DF_D, TRIE, NEE>::
00172 delete_infrequent_subtrieNEE( TRIE* subtrie, const counter_t min_supp,
00173 std::vector<item_t>& NEEsum,
00174 unsigned int step_to_cand_par)
00175 {
00176 typename TRIE::iterator itEdge( subtrie->edgelist.begin() );
00177 if(step_to_cand_par)
00178 {
00179 --step_to_cand_par;
00180 NEEsum.insert( NEEsum.end(),
00181 subtrie->neelist.begin(), subtrie->neelist.end() );
00182 while( itEdge != subtrie->edgelist.end() )
00183 {
00184 PARENT::df_decoder.pushItem( (*itEdge).first );
00185 delete_infrequent_subtrieNEE( static_cast<TRIE*>((*itEdge).second), min_supp,
00186 NEEsum, step_to_cand_par);
00187 if( static_cast<TRIE*>((*itEdge).second)->edgelist.empty() )
00188 {
00189 NEEsum.insert( NEEsum.end(),
00190 static_cast<TRIE*>((*itEdge).second)->neelist.begin(),
00191 static_cast<TRIE*>((*itEdge).second)->neelist.end() );
00192 PARENT::df_decoder.writeNEE( static_cast<TRIE*>((*itEdge).second)->getCounter(),
00193 NEEsum);
00194 NEEsum.resize( NEEsum.size() -
00195 static_cast<TRIE*>((*itEdge).second)->neelist.size() );
00196 delete static_cast<TRIE*>((*itEdge).second);
00197 itEdge = subtrie->edgelist.erase(itEdge);
00198 }
00199 else ++itEdge;
00200 PARENT::df_decoder.popItem();
00201 }
00202 NEEsum.resize( NEEsum.size() - subtrie->neelist.size() );
00203 }
00204 else
00205 {
00206 while( itEdge != subtrie->edgelist.end() )
00207 {
00208 if( static_cast<TRIE*>((*itEdge).second)->getCounter() < min_supp )
00209 {
00210 #if DEBUG_LEVEL >= LEVEL_DBG
00211 ++nr_of_deleted;
00212 #endif
00213 delete static_cast<TRIE*>((*itEdge).second);
00214 itEdge = subtrie->edgelist.erase(itEdge);
00215 }
00216 else if(static_cast<TRIE*>((*itEdge).second)->getCounter() ==
00217 subtrie->getCounter())
00218 {
00219 subtrie->neelist.push_back((*itEdge).first);
00220 delete static_cast<TRIE*>((*itEdge).second);
00221 itEdge = subtrie->edgelist.erase(itEdge);
00222 }
00223 else
00224 ++itEdge;
00225 }
00226 generateCandidateAtParentNEE(subtrie, NEEsum);
00227 #if DEBUG_LEVEL >= LEVEL_DBG
00228 for( typename TRIE::iterator it( subtrie->edgelist.begin() );
00229 it != subtrie->edgelist.end(); ++it)
00230 nr_of_new_candidates +=
00231 static_cast<TRIE*>((*it).second)->edgelist.size();
00232 #endif
00233 }
00234 }
00235 }
00236 }
00237 #endif