00001 #ifndef InfreqRemover_HPP
00002 #define InfreqRemover_HPP
00003
00004 #include "common.hpp"
00005 #include "common/Edge.hpp"
00006 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/ManipulatorBase.hpp"
00007 #include <vector>
00008
00009
00010
00011
00012
00013
00014 namespace Bodon
00015 {
00016 namespace inhomogeneous_trie
00017 {
00018 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR,
00019 NEELevel NEE, bool DEADENDPRUNE = true>
00020 class InfreqRemover : public ManipulatorBase<DF_D, TRIE, LEAF_ALLOCATOR>
00021 {
00022 private:
00023 typedef ManipulatorBase<DF_D, TRIE, LEAF_ALLOCATOR> PARENT;
00024 protected:
00025 std::vector<Edge> extenders;
00026 unsigned int nr_of_deleted;
00027 unsigned int total_nr_of_deleted;
00028
00029 public:
00030 InfreqRemover( TRIE& main_trie, DF_D& df_decoder,
00031 LEAF_ALLOCATOR& s_alloc ) :
00032 ManipulatorBase<DF_D, TRIE, LEAF_ALLOCATOR>(main_trie, df_decoder, s_alloc),
00033 total_nr_of_deleted(0){}
00034
00036 void deleteInfrequent( const counter_t min_supp,
00037 unsigned int candidate_size );
00038 void afterWorkDel();
00039 protected:
00040
00042 void delete_infrequent_subtrie(
00043 TRIE* subtrie, const counter_t min_supp,
00044 unsigned int step_to_cand_par );
00045 void delete_infrequent_subtrieNEE(
00046 TRIE* subtrie, const counter_t min_supp,
00047 std::vector<item_t>& NEEsum, unsigned int step_to_cand_par);
00048
00049 void destroy(TRIE* subtrie);
00050 void destroyAndWriteNEE(TRIE* subtrie, std::vector<item_t>& NEEsum);
00051 };
00052
00053 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR,
00054 NEELevel NEE, bool DEADENDPRUNE>
00055 void InfreqRemover<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00056 deleteInfrequent( const counter_t min_supp,
00057 unsigned int candidate_size )
00058 {
00059 assert(candidate_size > 2);
00060 nr_of_deleted = 0;
00061 if(NEE > NEE_Off)
00062 {
00063 std::vector<item_t> NEEsum;
00064 delete_infrequent_subtrieNEE( &this->main_trie, min_supp,
00065 NEEsum, candidate_size - 1 );
00066
00067 }
00068 else
00069 delete_infrequent_subtrie( &this->main_trie, min_supp,
00070 candidate_size - 1 );
00071 log_dbg(0,"Number of infrequent candidates: %d", nr_of_deleted);
00072 #if DEBUG_LEVEL >= LEVEL_DBG
00073 total_nr_of_deleted += nr_of_deleted;
00074 #endif
00075 }
00076
00077 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR,
00078 NEELevel NEE, bool DEADENDPRUNE>
00079 void InfreqRemover<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00080 afterWorkDel()
00081 {
00082 log_dbg(0,"The total number of infrequent candidates: %d",
00083 total_nr_of_deleted);
00084 if(!DEADENDPRUNE)
00085 if(NEE > NEE_Off)
00086 {
00087 std::vector<item_t> NEEsum;
00088 destroyAndWriteNEE(&this->main_trie, NEEsum);
00089 }
00090 else
00091 destroy(&this->main_trie);
00092 }
00093
00094 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR,
00095 NEELevel NEE, bool DEADENDPRUNE>
00096 void InfreqRemover<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00097 delete_infrequent_subtrie( TRIE* subtrie, const counter_t min_supp,
00098 unsigned int step_to_cand_par)
00099 {
00100 typename TRIE::iterator itEdge( subtrie->edgelist.begin() );
00101 if(step_to_cand_par)
00102 {
00103 --step_to_cand_par;
00104 while( itEdge != subtrie->edgelist.end() )
00105 {
00106 delete_infrequent_subtrie( static_cast<TRIE*>((*itEdge).second), min_supp,
00107 step_to_cand_par);
00108 if( DEADENDPRUNE &&
00109 static_cast<TRIE*>((*itEdge).second)->edgelist.empty() )
00110 {
00111 delete static_cast<TRIE*>((*itEdge).second);
00112 itEdge = subtrie->edgelist.erase(itEdge);
00113 }
00114 else ++itEdge;
00115 }
00116 }
00117 else
00118 {
00119 while( itEdge != subtrie->edgelist.end() )
00120 {
00121 if( static_cast<LEAF*>((*itEdge).second)->getCounter() < min_supp )
00122 {
00123 #if DEBUG_LEVEL >= LEVEL_DBG
00124 ++nr_of_deleted;
00125 #endif
00126 PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00127 itEdge = subtrie->edgelist.erase(itEdge);
00128 }
00129 else
00130 ++itEdge;
00131 }
00132 }
00133 }
00134 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR,
00135 NEELevel NEE, bool DEADENDPRUNE>
00136 void InfreqRemover<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00137 delete_infrequent_subtrieNEE( TRIE* subtrie, const counter_t min_supp,
00138 std::vector<item_t>& NEEsum,
00139 unsigned int step_to_cand_par)
00140 {
00141 typename TRIE::iterator itEdge( subtrie->edgelist.begin() );
00142 if(step_to_cand_par)
00143 {
00144 --step_to_cand_par;
00145 NEEsum.insert( NEEsum.end(),
00146 subtrie->neelist.begin(), subtrie->neelist.end() );
00147 while( itEdge != subtrie->edgelist.end() )
00148 {
00149 PARENT::df_decoder.pushItem((*itEdge).first);
00150 delete_infrequent_subtrieNEE( static_cast<TRIE*>((*itEdge).second), min_supp,
00151 NEEsum, step_to_cand_par);
00152 if( DEADENDPRUNE &&
00153 static_cast<TRIE*>((*itEdge).second)->edgelist.empty() )
00154 {
00155 NEEsum.insert( NEEsum.end(),
00156 static_cast<TRIE*>((*itEdge).second)->neelist.begin(),
00157 static_cast<TRIE*>((*itEdge).second)->neelist.end() );
00158 if( NEEsum.size() < step_to_cand_par + 1)
00159 {
00160
00161 PARENT::df_decoder.writeNEE(
00162 static_cast<TRIE*>((*itEdge).second)->getCounter(), NEEsum);
00163 NEEsum.resize( NEEsum.size() -
00164 static_cast<TRIE*>((*itEdge).second)->neelist.size() );
00165 delete static_cast<TRIE*>((*itEdge).second);
00166 itEdge = subtrie->edgelist.erase(itEdge);
00167 }
00168 else
00169 {
00170 NEEsum.resize( NEEsum.size() -
00171 static_cast<TRIE*>((*itEdge).second)->neelist.size() );
00172 ++itEdge;
00173 }
00174 }
00175 else ++itEdge;
00176 PARENT::df_decoder.popItem();
00177 }
00178 NEEsum.resize( NEEsum.size() - subtrie->neelist.size() );
00179 }
00180 else
00181 {
00182 while( itEdge != subtrie->edgelist.end() )
00183 {
00184 if( static_cast<LEAF*>((*itEdge).second)->getCounter() < min_supp )
00185 {
00186 #if DEBUG_LEVEL >= LEVEL_DBG
00187 ++nr_of_deleted;
00188 #endif
00189 PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00190 itEdge = subtrie->edgelist.erase(itEdge);
00191 }
00192 else if(static_cast<LEAF*>((*itEdge).second)->getCounter() ==
00193 subtrie->getCounter())
00194 {
00195 subtrie->neelist.push_back((*itEdge).first);
00196 PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00197 itEdge = subtrie->edgelist.erase(itEdge);
00198 }
00199 else
00200 ++itEdge;
00201 }
00202 }
00203 }
00204 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR,
00205 NEELevel NEE, bool DEADENDPRUNE>
00206 void InfreqRemover<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00207 destroy(TRIE* trie)
00208 {
00209 for(typename TRIE::iterator itEdge( trie->edgelist.begin());
00210 itEdge != trie->edgelist.end(); ++itEdge )
00211 {
00212 destroy( static_cast<TRIE*>((*itEdge).second) );
00213 delete static_cast<TRIE*>((*itEdge).second);
00214 }
00215 trie->edgelist.clear();
00216 }
00217 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR,
00218 NEELevel NEE, bool DEADENDPRUNE>
00219 void InfreqRemover<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00220 destroyAndWriteNEE(TRIE* trie, std::vector<item_t>& NEEsum)
00221 {
00222 NEEsum.insert( NEEsum.end(),
00223 trie->neelist.begin(), trie->neelist.end() );
00224 PARENT::df_decoder.writeNEE( trie->getCounter(), NEEsum);
00225 for(typename TRIE::iterator itEdge( trie->edgelist.begin());
00226 itEdge != trie->edgelist.end(); ++itEdge )
00227 {
00228 PARENT::df_decoder.pushItem((*itEdge).first);
00229 destroyAndWriteNEE( static_cast<TRIE*>((*itEdge).second), NEEsum );
00230 delete static_cast<TRIE*>((*itEdge).second);
00231 PARENT::df_decoder.popItem();
00232 }
00233 trie->edgelist.clear();
00234 NEEsum.resize( NEEsum.size() - trie->neelist.size() );
00235 }
00236 }
00237 }
00238 #endif