00001 #ifndef CandGenInfreqRemoveNopruneMergeInhomo_HPP
00002 #define CandGenInfreqRemoveNopruneMergeInhomo_HPP
00003
00004
00005 #include "common.hpp"
00006 #include "common/Edge.hpp"
00007 #include "common/log.h"
00008 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/CandidateGeneratorNoprune.hpp"
00009 #include <vector>
00010
00011
00012
00013
00014
00015
00016 namespace Bodon
00017 {
00018 namespace inhomogeneous_trie
00019 {
00020 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR, bool NEE>
00021 class CandGenInfreqRemoveNopruneMerge :
00022 public CandidateGeneratorNoprune<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE>
00023 {
00024 private:
00025 typedef CandidateGeneratorNoprune<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE> PARENT;
00026 protected:
00027 unsigned int nr_of_deleted;
00028 unsigned int total_nr_of_deleted;
00029 public:
00030 CandGenInfreqRemoveNopruneMerge( TRIE& main_trie, DF_D& df_decoder,
00031 LEAF_ALLOCATOR& s_alloc ) :
00032 CandidateGeneratorNoprune<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE>(
00033 main_trie, df_decoder, s_alloc),
00034 total_nr_of_deleted(0){}
00035
00036
00042 void generateCandidate(unsigned int candidate_size);
00043
00044 void deleteInfrequent( const counter_t min_supp,
00045 unsigned int candidate_size );
00046 void afterWorkDel();
00047 protected:
00048
00050 void delete_infrequent_subtrie( TRIE* subtrie,
00051 const counter_t min_supp,
00052 unsigned int step_to_cand_par );
00053
00054 void delete_infrequent_subtrieNEE( TRIE* subtrie,
00055 const counter_t min_supp,
00056 std::vector<item_t>& NEEsum,
00057 unsigned int step_to_cand_par );
00058
00059
00060
00061 public:
00062
00063 };
00064
00065
00066 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR, bool NEE>
00067 void CandGenInfreqRemoveNopruneMerge<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE>::
00068 generateCandidate(const unsigned int candidate_size)
00069 {
00070 if(candidate_size == 3)
00071 {
00072 PARENT::nr_of_new_candidates = 0;
00073 if(NEE)
00074 {
00075 std::vector<item_t> NEEsum;
00076 generateCandidateFindParentNEE( &this->main_trie, NEEsum,
00077 candidate_size -2 );
00078 }
00079 else
00080 generateCandidateFindParent( &this->main_trie,
00081 candidate_size -2 );
00082 log_dbg(0,"Number of newly generated candidates: %d", PARENT::nr_of_new_candidates);
00083 #if DEBUG_LEVEL >= LEVEL_DBG
00084 nr_of_all_candidates += PARENT::nr_of_new_candidates;
00085 #endif
00086 }
00087 else
00088 log_dbg(0,"This algorithm does not generate any candidate in this phase.");
00089
00090 }
00091
00092 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR, bool NEE>
00093 void CandGenInfreqRemoveNopruneMerge<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE>::
00094 deleteInfrequent( const counter_t min_supp, unsigned int candidate_size )
00095 {
00096 assert(candidate_size > 1);
00097 nr_of_deleted = 0;
00098 PARENT::nr_of_new_candidates = 0;
00099 if(NEE)
00100 {
00101 std::vector<item_t> NEEsum;
00102 delete_infrequent_subtrieNEE( &this->main_trie, min_supp, NEEsum,
00103 candidate_size - 1 );
00104 }
00105 else
00106 delete_infrequent_subtrie( &this->main_trie, min_supp,
00107 candidate_size - 1 );
00108 log_dbg(0,"Number of infrequent candidates: %d", nr_of_deleted);
00109 log_dbg(0,"Number of newly generated candidates (in del. phase!): %d",
00110 PARENT::nr_of_new_candidates);
00111 #if DEBUG_LEVEL >= LEVEL_DBG
00112 nr_of_all_candidates += PARENT::nr_of_new_candidates;
00113 total_nr_of_deleted += nr_of_deleted;
00114 #endif
00115 }
00116
00117 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR, bool NEE>
00118 void CandGenInfreqRemoveNopruneMerge<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE>::
00119 afterWorkDel()
00120 {
00121 log_dbg(0,"The total number of generated candidates (in del. p.): %d",
00122 PARENT::nr_of_all_candidates);
00123 log_dbg(0,"The total number of infrequent candidates: %d",
00124 total_nr_of_deleted);
00125 }
00126
00127 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR, bool NEE>
00128 void CandGenInfreqRemoveNopruneMerge<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE>::
00129 delete_infrequent_subtrie( TRIE* subtrie, const counter_t min_supp,
00130 unsigned int step_to_cand_par)
00131 {
00132 typename TRIE::iterator itEdge( subtrie->edgelist.begin() );
00133 if(step_to_cand_par)
00134 {
00135 --step_to_cand_par;
00136 while( itEdge != subtrie->edgelist.end() )
00137 {
00138 PARENT::df_decoder.pushItem( (*itEdge).first );
00139 delete_infrequent_subtrie( static_cast<TRIE*>((*itEdge).second), min_supp,
00140 step_to_cand_par);
00141 PARENT::df_decoder.popItem();
00142 if( static_cast<TRIE*>((*itEdge).second)->edgelist.empty() )
00143 {
00144 delete static_cast<TRIE*>((*itEdge).second);
00145 itEdge = subtrie->edgelist.erase(itEdge);
00146 }
00147 else ++itEdge;
00148 }
00149 }
00150 else
00151 {
00152 while( itEdge != subtrie->edgelist.end() )
00153 {
00154 if( static_cast<LEAF*>((*itEdge).second)->getCounter() < min_supp )
00155 {
00156 #if DEBUG_LEVEL >= LEVEL_DBG
00157 ++nr_of_deleted;
00158 #endif
00159 PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00160 itEdge = subtrie->edgelist.erase(itEdge);
00161 }
00162 else
00163 ++itEdge;
00164 }
00165 generateCandidateAtParent(subtrie);
00166 #if DEBUG_LEVEL >= LEVEL_DBG
00167 for( typename TRIE::iterator it( subtrie->edgelist.begin() );
00168 it != subtrie->edgelist.end(); ++it)
00169 nr_of_new_candidates +=
00170 static_cast<TRIE*>((*it).second)->edgelist.size();
00171 #endif
00172 }
00173 }
00174 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR, bool NEE>
00175 void CandGenInfreqRemoveNopruneMerge<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE>::
00176 delete_infrequent_subtrieNEE( TRIE* subtrie, const counter_t min_supp,
00177 std::vector<item_t>& NEEsum,
00178 unsigned int step_to_cand_par)
00179 {
00180 typename TRIE::iterator itEdge( subtrie->edgelist.begin() );
00181 if(step_to_cand_par)
00182 {
00183 --step_to_cand_par;
00184 NEEsum.insert( NEEsum.end(),
00185 subtrie->neelist.begin(), subtrie->neelist.end() );
00186 while( itEdge != subtrie->edgelist.end() )
00187 {
00188 PARENT::df_decoder.pushItem( (*itEdge).first );
00189 delete_infrequent_subtrieNEE( static_cast<TRIE*>((*itEdge).second), min_supp,
00190 NEEsum, step_to_cand_par);
00191 if( static_cast<TRIE*>((*itEdge).second)->edgelist.empty() )
00192 {
00193 NEEsum.insert( NEEsum.end(),
00194 static_cast<TRIE*>((*itEdge).second)->neelist.begin(),
00195 static_cast<TRIE*>((*itEdge).second)->neelist.end() );
00196 PARENT::df_decoder.writeNEE( static_cast<TRIE*>((*itEdge).second)->getCounter(),
00197 NEEsum);
00198 NEEsum.resize( NEEsum.size() -
00199 static_cast<TRIE*>((*itEdge).second)->neelist.size() );
00200 delete static_cast<TRIE*>((*itEdge).second);
00201 itEdge = subtrie->edgelist.erase(itEdge);
00202 }
00203 else ++itEdge;
00204 PARENT::df_decoder.popItem();
00205 }
00206 NEEsum.resize( NEEsum.size() - subtrie->neelist.size() );
00207 }
00208 else
00209 {
00210 while( itEdge != subtrie->edgelist.end() )
00211 {
00212 if( static_cast<LEAF*>((*itEdge).second)->getCounter() < min_supp )
00213 {
00214 #if DEBUG_LEVEL >= LEVEL_DBG
00215 ++nr_of_deleted;
00216 #endif
00217 PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00218 itEdge = subtrie->edgelist.erase(itEdge);
00219 }
00220 else if(static_cast<LEAF*>((*itEdge).second)->getCounter() ==
00221 subtrie->getCounter())
00222 {
00223 subtrie->neelist.push_back((*itEdge).first);
00224 PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00225 itEdge = subtrie->edgelist.erase(itEdge);
00226 }
00227 else
00228 ++itEdge;
00229 }
00230 generateCandidateAtParentNEE(subtrie, NEEsum);
00231 #if DEBUG_LEVEL >= LEVEL_DBG
00232 for( typename TRIE::iterator it( subtrie->edgelist.begin() );
00233 it != subtrie->edgelist.end(); ++it)
00234 nr_of_new_candidates +=
00235 static_cast<TRIE*>((*it).second)->edgelist.size();
00236 #endif
00237 }
00238 }
00239 }
00240 }
00241 #endif