00001 #ifndef InfreqRemover_HPP
00002 #define InfreqRemover_HPP
00003
00004 #include "common.hpp"
00005 #include "common/Edge.hpp"
00006 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/ManipulatorBase.hpp"
00007 #include <vector>
00008
00009
00010
00011
00012
00013
00014 namespace Bodon
00015 {
00016 namespace dynamic_trie
00017 {
00018
00019 template <class DF_D, class TRIE_OEL, class TRIE_OI,
00020 class LEAF, class LEAF_ALLOCATOR, NEELevel NEE>
00021 class InfreqRemover : public Bodon::inhomogeneous_trie::
00022 ManipulatorBase<DF_D, TRIE_OEL, LEAF_ALLOCATOR>
00023 {
00024 private:
00025 typedef Bodon::inhomogeneous_trie::
00026 ManipulatorBase<DF_D, TRIE_OEL, LEAF_ALLOCATOR> PARENT;
00027 protected:
00028 std::vector<Edge> extenders;
00029 std::vector<item_t> neelist;
00030 unsigned int nr_of_deleted;
00031 unsigned int total_nr_of_deleted;
00032 unsigned int nr_of_prefix_equiitem;
00033 unsigned int total_nr_of_prefix_equiitem;
00034
00035 public:
00036 InfreqRemover( TRIE_OEL& main_trie, DF_D& df_decoder,
00037 LEAF_ALLOCATOR& s_alloc ) :
00038 PARENT(main_trie, df_decoder, s_alloc),
00039 total_nr_of_deleted(0), total_nr_of_prefix_equiitem(0){}
00040
00042 void deleteInfrequent( const counter_t min_supp,
00043 unsigned int candidate_size );
00044 void afterWorkDel();
00045 protected:
00046
00048 template <class TRIE> void delete_infrequent_subtrie(
00049 TRIE* subtrie, const counter_t min_supp,
00050 unsigned int step_to_cand_par );
00051 template <class TRIE> void delete_infrequent_subtrieNEE(
00052 TRIE* subtrie, const counter_t min_supp,
00053 unsigned int step_to_cand_par);
00054 };
00055
00056 template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF, class LEAF_ALLOCATOR,
00057 NEELevel NEE>
00058 void InfreqRemover<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00059 deleteInfrequent( const counter_t min_supp,
00060 unsigned int candidate_size )
00061 {
00062 assert(candidate_size > 2);
00063 nr_of_deleted = 0;
00064 if(NEE > NEE_Off)
00065 {
00066 nr_of_prefix_equiitem = 0;
00067 PARENT::df_decoder.pushEquisupportItemSet(
00068 PARENT::main_trie.neelist);
00069 delete_infrequent_subtrieNEE<TRIE_OEL>(
00070 &this->main_trie, min_supp, candidate_size - 1 );
00071 log_info_det(0,"Number of prefix equisupport items: %d", nr_of_prefix_equiitem);
00072 #if DEBUG_LEVEL >= LEVEL_INFO
00073 total_nr_of_prefix_equiitem += nr_of_prefix_equiitem;
00074 #endif
00075
00076 }
00077 else
00078 delete_infrequent_subtrie<TRIE_OEL>(
00079 &this->main_trie, min_supp, candidate_size - 1 );
00080 log_info_det(0,"Number of infrequent candidates: %d", nr_of_deleted);
00081 #if DEBUG_LEVEL >= LEVEL_INFO
00082 total_nr_of_deleted += nr_of_deleted;
00083 #endif
00084 }
00085
00086 template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF, class LEAF_ALLOCATOR,
00087 NEELevel NEE>
00088 void InfreqRemover<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00089 afterWorkDel()
00090 {
00091 log_info(0,"The total number of infrequent candidates: %d",
00092 total_nr_of_deleted);
00093 if(NEE > NEE_Off)
00094 log_info(0,"The total number of prefix equisupport item: %d",
00095 total_nr_of_prefix_equiitem);
00096 }
00097
00098 template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF, class LEAF_ALLOCATOR,
00099 NEELevel NEE>
00100 template <class TRIE>
00101 void InfreqRemover<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00102 delete_infrequent_subtrie( TRIE* subtrie, const counter_t min_supp,
00103 unsigned int step_to_cand_par)
00104 {
00105 typename TRIE::iterator itEdge( subtrie->edgelist.begin() );
00106 if(step_to_cand_par)
00107 {
00108 --step_to_cand_par;
00109 while( itEdge != subtrie->edgelist.end() )
00110 {
00111 if( static_cast<LEAF*>((*itEdge).second)->getCounter() & TWO_POW31 )
00112 {
00113 delete_infrequent_subtrie<TRIE_OEL>(
00114 static_cast<TRIE_OEL*>((*itEdge).second), min_supp, step_to_cand_par);
00115 if( static_cast<TRIE_OEL*>((*itEdge).second)->edgelist.empty() )
00116 {
00117 delete static_cast<TRIE_OEL*>((*itEdge).second);
00118 itEdge = subtrie->edgelist.erase(itEdge);
00119 }
00120 else ++itEdge;
00121 }
00122 else
00123 {
00124 delete_infrequent_subtrie<TRIE_OI>(
00125 static_cast<TRIE_OI*>((*itEdge).second), min_supp, step_to_cand_par);
00126 if( static_cast<TRIE_OI*>((*itEdge).second)->edgelist.empty() )
00127 {
00128 delete static_cast<TRIE_OI*>((*itEdge).second);
00129 itEdge = subtrie->edgelist.erase(itEdge);
00130 }
00131 else ++itEdge;
00132 }
00133 }
00134 }
00135 else
00136 {
00137 while( itEdge != subtrie->edgelist.end() )
00138 {
00139 if( static_cast<LEAF*>((*itEdge).second)->getCounter() < min_supp )
00140 {
00141 #if DEBUG_LEVEL >= LEVEL_INFO
00142 ++nr_of_deleted;
00143 #endif
00144 PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00145 itEdge = subtrie->edgelist.erase(itEdge);
00146 }
00147 else
00148 ++itEdge;
00149 }
00150 }
00151 }
00152 template <class DF_D, class TRIE_OEL, class TRIE_OI,
00153 class LEAF, class LEAF_ALLOCATOR, NEELevel NEE>
00154 template <class TRIE>
00155 void InfreqRemover<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00156 delete_infrequent_subtrieNEE( TRIE* subtrie, const counter_t min_supp,
00157 unsigned int step_to_cand_par)
00158 {
00159 typename TRIE::iterator itEdge( subtrie->edgelist.begin() );
00160 if(step_to_cand_par)
00161 {
00162 --step_to_cand_par;
00163 while( itEdge != subtrie->edgelist.end() )
00164 {
00165 PARENT::df_decoder.pushItem((*itEdge).first);
00166 if( static_cast<LEAF*>((*itEdge).second)->getCounter() & TWO_POW31 )
00167 {
00168 PARENT::df_decoder.pushEquisupportItemSet(
00169 static_cast<TRIE_OEL*>((*itEdge).second)->neelist);
00170 delete_infrequent_subtrieNEE<TRIE_OEL>(
00171 static_cast<TRIE_OEL*>((*itEdge).second), min_supp,
00172 step_to_cand_par);
00173 if( static_cast<TRIE_OEL*>((*itEdge).second)->edgelist.empty() &&
00174 ( NEE == NEE_Level1 && PARENT::df_decoder.nrOfEEItems() < step_to_cand_par + 1 ||
00175 NEE ==NEE_Full && step_to_cand_par != 0 ) )
00176 {
00177 PARENT::df_decoder.write(
00178 static_cast<TRIE_OEL*>((*itEdge).second)->getCounter() & TWO_POW31_1);
00179 delete static_cast<TRIE_OEL*>((*itEdge).second);
00180 itEdge = subtrie->edgelist.erase(itEdge);
00181 }
00182 else
00183 ++itEdge;
00184 }
00185 else
00186 {
00187 PARENT::df_decoder.pushEquisupportItemSet(
00188 static_cast<TRIE_OI*>((*itEdge).second)->neelist);
00189 delete_infrequent_subtrieNEE<TRIE_OI>(
00190 static_cast<TRIE_OI*>((*itEdge).second), min_supp,
00191 step_to_cand_par);
00192 if( static_cast<TRIE_OI*>((*itEdge).second)->edgelist.empty() &&
00193 (NEE == NEE_Level1 && PARENT::df_decoder.nrOfEEItems() < step_to_cand_par + 1 ||
00194 NEE ==NEE_Full && step_to_cand_par != 0 ) )
00195 {
00196 PARENT::df_decoder.write(
00197 static_cast<TRIE_OI*>((*itEdge).second)->getCounter());
00198 delete static_cast<TRIE_OI*>((*itEdge).second);
00199 itEdge = subtrie->edgelist.erase(itEdge);
00200 }
00201 else
00202 ++itEdge;
00203 }
00204 PARENT::df_decoder.popItem();
00205 }
00206 }
00207 else
00208 {
00209 neelist.clear();
00210 while( itEdge != subtrie->edgelist.end() )
00211 {
00212 if( static_cast<LEAF*>((*itEdge).second)->getCounter() < min_supp )
00213 {
00214 #if DEBUG_LEVEL >= LEVEL_INFO
00215 ++nr_of_deleted;
00216 #endif
00217 PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00218 itEdge = subtrie->edgelist.erase(itEdge);
00219 }
00220 else if(static_cast<LEAF*>((*itEdge).second)->getCounter() ==
00221 (subtrie->getCounter() & TWO_POW31_1) )
00222 {
00223 neelist.push_back((*itEdge).first);
00224 PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00225 itEdge = subtrie->edgelist.erase(itEdge);
00226 #if DEBUG_LEVEL >= LEVEL_INFO
00227 ++nr_of_prefix_equiitem;
00228 #endif
00229 }
00230 else
00231 ++itEdge;
00232 }
00233 subtrie->neeAddSorted(neelist);
00234 PARENT::df_decoder.pushEquisupportItemSet(neelist);
00235 }
00236 }
00237 }
00238 }
00239 #endif