00001 #ifndef InfreqRemover_HPP
00002 #define InfreqRemover_HPP
00003
00004 #include "common.hpp"
00005 #include "common/Edge.hpp"
00006 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/ManipulatorBase.hpp"
00007 #include <vector>
00008
00009
00010
00011
00012
00013
00014 namespace Bodon
00015 {
00016 namespace trie
00017 {
00018 template <class DF_D, class TRIE, class TRIE_ALLOCATOR, NEELevel NEE>
00019 class InfreqRemover : public Bodon::inhomogeneous_trie::
00020 ManipulatorBase<DF_D, TRIE, TRIE_ALLOCATOR>
00021 {
00022 private:
00023 typedef Bodon::inhomogeneous_trie::ManipulatorBase<DF_D, TRIE, TRIE_ALLOCATOR> PARENT;
00024 protected:
00025 std::vector<Edge> extenders;
00026 unsigned int nr_of_deleted;
00027 unsigned int total_nr_of_deleted;
00028
00029 public:
00030 InfreqRemover( TRIE& main_trie, DF_D& df_decoder,
00031 TRIE_ALLOCATOR& s_alloc ) :
00032 Bodon::inhomogeneous_trie::ManipulatorBase<DF_D, TRIE, TRIE_ALLOCATOR>(
00033 main_trie, df_decoder, s_alloc),
00034 total_nr_of_deleted(0){}
00035
00037 void deleteInfrequent( const counter_t min_supp,
00038 unsigned int candidate_size );
00039 void afterWorkDel();
00040 protected:
00041
00043 void delete_infrequent_subtrie(
00044 TRIE* subtrie, const counter_t min_supp,
00045 unsigned int step_to_cand_par );
00046 void delete_infrequent_subtrieNEE(
00047 TRIE* subtrie, const counter_t min_supp,
00048 std::vector<item_t>& NEEsum, unsigned int step_to_cand_par);
00049 };
00050
00055 template <class DF_D, class TRIE, class TRIE_ALLOCATOR, NEELevel NEE>
00056 void InfreqRemover<DF_D, TRIE, TRIE_ALLOCATOR, NEE>::
00057 deleteInfrequent( const counter_t min_supp,
00058 unsigned int candidate_size )
00059 {
00060 assert(candidate_size > 2);
00061 nr_of_deleted = 0;
00062 if(NEE > NEE_Off)
00063 {
00064 std::vector<item_t> NEEsum;
00065 delete_infrequent_subtrieNEE( &this->main_trie, min_supp,
00066 NEEsum, candidate_size - 1 );
00067
00068 }
00069 else
00070 delete_infrequent_subtrie( &this->main_trie, min_supp,
00071 candidate_size - 1 );
00072 log_dbg(0,"Number of infrequent candidates: %d", nr_of_deleted);
00073 #if DEBUG_LEVEL >= LEVEL_DBG
00074 total_nr_of_deleted += nr_of_deleted;
00075 #endif
00076 }
00077
00078 template <class DF_D, class TRIE, class TRIE_ALLOCATOR, NEELevel NEE>
00079 void InfreqRemover<DF_D, TRIE, TRIE_ALLOCATOR, NEE>::
00080 afterWorkDel()
00081 {
00082 log_dbg(0,"The total number of infrequent candidates: %d",
00083 total_nr_of_deleted);
00084 }
00085
00092 template <class DF_D, class TRIE, class TRIE_ALLOCATOR, NEELevel NEE>
00093 void InfreqRemover<DF_D, TRIE, TRIE_ALLOCATOR, NEE>::
00094 delete_infrequent_subtrie( TRIE* subtrie, const counter_t min_supp,
00095 unsigned int step_to_cand_par)
00096 {
00097 typename TRIE::iterator itEdge( subtrie->edgelist.begin() );
00098 if(step_to_cand_par)
00099 {
00100 --step_to_cand_par;
00101 while( itEdge != subtrie->edgelist.end() )
00102 {
00103 delete_infrequent_subtrie( static_cast<TRIE*>((*itEdge).second), min_supp,
00104 step_to_cand_par);
00105 if( static_cast<TRIE*>((*itEdge).second)->edgelist.empty() )
00106 {
00107 PARENT::s_alloc.free(static_cast<TRIE*>((*itEdge).second));
00108 itEdge = subtrie->edgelist.erase(itEdge);
00109 }
00110 else ++itEdge;
00111 }
00112 }
00113 else
00114 {
00115 while( itEdge != subtrie->edgelist.end() )
00116 {
00117 if( static_cast<TRIE*>((*itEdge).second)->getCounter() < min_supp )
00118 {
00119 #if DEBUG_LEVEL >= LEVEL_DBG
00120 ++nr_of_deleted;
00121 #endif
00122 PARENT::s_alloc.free(static_cast<TRIE*>((*itEdge).second));
00123 itEdge = subtrie->edgelist.erase(itEdge);
00124 }
00125 else
00126 ++itEdge;
00127 }
00128 }
00129 }
00130 template <class DF_D, class TRIE, class TRIE_ALLOCATOR, NEELevel NEE>
00131 void InfreqRemover<DF_D, TRIE, TRIE_ALLOCATOR, NEE>::
00132 delete_infrequent_subtrieNEE( TRIE* subtrie, const counter_t min_supp,
00133 std::vector<item_t>& NEEsum,
00134 unsigned int step_to_cand_par)
00135 {
00136 typename TRIE::iterator itEdge( subtrie->edgelist.begin() );
00137 if(step_to_cand_par)
00138 {
00139 --step_to_cand_par;
00140 NEEsum.insert( NEEsum.end(),
00141 subtrie->neelist.begin(), subtrie->neelist.end() );
00142 while( itEdge != subtrie->edgelist.end() )
00143 {
00144 PARENT::df_decoder.pushItem((*itEdge).first);
00145 delete_infrequent_subtrieNEE( static_cast<TRIE*>((*itEdge).second), min_supp,
00146 NEEsum, step_to_cand_par);
00147 if( static_cast<TRIE*>((*itEdge).second)->edgelist.empty() )
00148 {
00149 NEEsum.insert( NEEsum.end(),
00150 static_cast<TRIE*>((*itEdge).second)->neelist.begin(),
00151 static_cast<TRIE*>((*itEdge).second)->neelist.end() );
00152 if( NEEsum.size() < step_to_cand_par + 1)
00153 {
00154
00155 PARENT::df_decoder.writeNEE(
00156 static_cast<TRIE*>((*itEdge).second)->getCounter(), NEEsum);
00157 NEEsum.resize( NEEsum.size() -
00158 static_cast<TRIE*>((*itEdge).second)->neelist.size() );
00159 PARENT::s_alloc.free(static_cast<TRIE*>((*itEdge).second));
00160 itEdge = subtrie->edgelist.erase(itEdge);
00161 }
00162 else
00163 {
00164 NEEsum.resize( NEEsum.size() -
00165 static_cast<TRIE*>((*itEdge).second)->neelist.size() );
00166 ++itEdge;
00167 }
00168 }
00169 else ++itEdge;
00170 PARENT::df_decoder.popItem();
00171 }
00172 NEEsum.resize( NEEsum.size() - subtrie->neelist.size() );
00173 }
00174 else
00175 {
00176 while( itEdge != subtrie->edgelist.end() )
00177 {
00178 if( static_cast<TRIE*>((*itEdge).second)->getCounter() < min_supp )
00179 {
00180 #if DEBUG_LEVEL >= LEVEL_DBG
00181 ++nr_of_deleted;
00182 #endif
00183 PARENT::s_alloc.free(static_cast<TRIE*>((*itEdge).second));
00184 itEdge = subtrie->edgelist.erase(itEdge);
00185 }
00186 else if(static_cast<TRIE*>((*itEdge).second)->getCounter() == subtrie->getCounter())
00187 {
00188 subtrie->neelist.push_back((*itEdge).first);
00189 PARENT::s_alloc.free(static_cast<TRIE*>((*itEdge).second));
00190 itEdge = subtrie->edgelist.erase(itEdge);
00191 }
00192 else
00193 ++itEdge;
00194 }
00195 }
00196 }
00197 }
00198 }
00199 #endif