00001 #ifndef SimplePruner_HPP
00002 #define SimplePruner_HPP
00003
00004 #include "common.hpp"
00005 #include "common/Edge.hpp"
00006 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/ManipulatorBase.hpp"
00007 #include <vector>
00008
00009
00010
00011
00012 namespace Bodon
00013 {
00014 namespace dynamic_trie
00015 {
00016 template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF,
00017 class LEAF_ALLOCATOR, NEELevel NEE>
00018 class SimplePruner : public Bodon::inhomogeneous_trie::
00019 ManipulatorBase<DF_D, TRIE_OEL, LEAF_ALLOCATOR>
00020 {
00021 protected:
00022 std::vector<Edge> extenders;
00023 std::vector<item_t> leaf_neelist;
00024 std::vector<Edge> replace_list;
00025 unsigned int nr_of_new_candidates;
00026 unsigned int nr_of_nonprefix_equiitem;
00027 typedef Bodon::inhomogeneous_trie::
00028 ManipulatorBase<DF_D, TRIE_OEL, LEAF_ALLOCATOR> PARENT;
00029
00030 public:
00031 SimplePruner( TRIE_OEL& main_trie, DF_D& df_decoder,
00032 LEAF_ALLOCATOR& s_alloc ) :
00033 PARENT(main_trie, df_decoder, s_alloc){}
00034
00035
00036 protected:
00037 template <class TRIE> const void* isIncluded(
00038 const TRIE* trie, const std::vector<item_t>& an_itemset,
00039 std::vector<item_t>::const_iterator item_it ) const;
00040
00041 template <class TRIE> int helper_func(const TRIE* trie, const item_t new_item) const;
00042
00044 int isAllSubsetFrequent( std::vector<item_t>& maybe_candidate,
00045 const item_t new_item ) const;
00046
00047
00048 template <class TRIE> void generateCandidateAtParent(
00049 TRIE* trie, std::vector<item_t>& maybe_candidate );
00050 template <class TRIE> void generateCandidateAtParentNEE(
00051 TRIE* trie, std::vector<item_t>& maybe_candidate );
00052 };
00053 template <class DF_D, class TRIE_OEL, class TRIE_OI,
00054 class LEAF, class LEAF_ALLOCATOR, NEELevel NEE>
00055 template <class TRIE> const void*
00056 SimplePruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00057 isIncluded( const TRIE* trie, const std::vector<item_t>& an_itemset,
00058 std::vector<item_t>::const_iterator item_it ) const
00059 {
00060 if( item_it == an_itemset.end() )
00061 return static_cast<const void*>(trie);
00062 else
00063 {
00064 void* subtrie;
00065 subtrie = trie->edgelist.find(*item_it);
00066 if(subtrie)
00067 {
00068 if( static_cast<LEAF*>(subtrie)->getCounter() & TWO_POW31 )
00069 return isIncluded(static_cast<TRIE_OEL*>(subtrie), an_itemset, ++item_it);
00070 else
00071 return isIncluded(static_cast<TRIE_OI*>(subtrie), an_itemset, ++item_it);
00072 }
00073 else
00074 if(NEE == NEE_Level1 && trie->neeFind(*item_it) )
00075 return isIncluded(trie, an_itemset, ++item_it);
00076 else
00077 return NULL;
00078 }
00079 }
00080
00081 template <class DF_D, class TRIE_OEL, class TRIE_OI,
00082 class LEAF, class LEAF_ALLOCATOR, NEELevel NEE>
00083 template <class TRIE> inline int
00084 SimplePruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00085 helper_func( const TRIE* temp_trie, const item_t new_item) const
00086 {
00087 if(NEE > NEE_Off && temp_trie->neeFind(new_item))
00088 {
00089 if(NEE == NEE_Level1)
00090 return 1;
00091 else
00092 {
00093
00094 return 2;
00095 }
00096 }
00097 else
00098 return 0;
00099 }
00100
00101
00102 template <class DF_D, class TRIE_OEL, class TRIE_OI,
00103 class LEAF, class LEAF_ALLOCATOR, NEELevel NEE >
00104 inline int SimplePruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00105 isAllSubsetFrequent( std::vector<item_t>& generator, const item_t new_item ) const
00106 {
00107 assert(itemset.size() > 1);
00108 std::vector<item_t>::iterator item_it = generator.end()-2;
00109 register item_t deleted_item = *item_it, temp_item;
00110 const void* temp_trie;
00111
00112 generator.erase( item_it );
00113 temp_trie = isIncluded( &this->main_trie, generator, generator.begin() );
00114 if( temp_trie)
00115 {
00116 if( static_cast<const LEAF*>(temp_trie)->getCounter() & TWO_POW31 )
00117 {
00118 if( !static_cast<const TRIE_OEL*>(temp_trie)->edgelist.find(new_item) )
00119 {
00120 generator.insert( item_it, deleted_item );
00121 return helper_func(static_cast<const TRIE_OEL*>(temp_trie), new_item);
00122 }
00123 }
00124 else
00125 {
00126 if( !static_cast<const TRIE_OI*>(temp_trie)->edgelist.find(new_item) )
00127 {
00128 generator.insert( item_it, deleted_item );
00129 return helper_func(static_cast<const TRIE_OI*>(temp_trie), new_item);
00130 }
00131 }
00132 }
00133 else
00134 {
00135 generator.insert( item_it, deleted_item );
00136 return 0;
00137 }
00138 while(item_it != generator.begin())
00139 {
00140 --item_it;
00141 temp_item = *item_it;
00142 *item_it = deleted_item;
00143 deleted_item = temp_item;
00144 temp_trie = isIncluded( &this->main_trie, generator, generator.begin() );
00145 if( temp_trie)
00146 {
00147 if( static_cast<const LEAF*>(temp_trie)->getCounter() & TWO_POW31 )
00148 {
00149 if( !static_cast<const TRIE_OEL*>(temp_trie)->edgelist.find(new_item) )
00150 {
00151 generator.insert( item_it, deleted_item );
00152 return helper_func(static_cast<const TRIE_OEL*>(temp_trie), new_item);
00153 }
00154 }
00155 else
00156 {
00157 if( !static_cast<const TRIE_OI*>(temp_trie)->edgelist.find(new_item) )
00158 {
00159 generator.insert( item_it, deleted_item );
00160 return helper_func(static_cast<const TRIE_OI*>(temp_trie), new_item);
00161 }
00162 }
00163 }
00164 else
00165 {
00166 generator.insert( item_it, deleted_item );
00167 return 0;
00168 }
00169 }
00170 generator.insert( item_it, deleted_item );
00171 return 1;
00172 }
00173
00174
00175 template <class DF_D, class TRIE_OEL, class TRIE_OI,
00176 class LEAF, class LEAF_ALLOCATOR, NEELevel NEE>
00177 template <class TRIE> inline void
00178 SimplePruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00179 generateCandidateAtParent(TRIE* trie, std::vector<item_t>& maybe_candidate)
00180 {
00181 typename TRIE::iterator itEdge2;
00182 LEAF* toExtendAsLeaf;
00183 replace_list.clear();
00184 for( typename TRIE::iterator itEdge( trie->edgelist.begin() );
00185 itEdge != trie->edgelist.end(); ++itEdge )
00186 {
00187 toExtendAsLeaf = static_cast<LEAF*>((*itEdge).second);
00188 PARENT::df_decoder.pushItemWithWrite(
00189 (*itEdge).first, toExtendAsLeaf->getCounter() );
00190 PARENT::df_decoder.popItem();
00191 maybe_candidate.push_back((*itEdge).first);
00192 itEdge2 = itEdge;
00193 ++itEdge2;
00194 extenders.clear();
00195 for( ; itEdge2 != trie->edgelist.end(); ++itEdge2 )
00196 {
00197 if( isAllSubsetFrequent( maybe_candidate, (*itEdge2).first ) == 1 )
00198 {
00199 extenders.push_back(Edge((*itEdge2).first, PARENT::s_alloc.allocate()));
00200 static_cast<LEAF*>(extenders.back().second)->setCounter(0);
00201 }
00202 }
00203 maybe_candidate.pop_back();
00204
00205 if(!extenders.empty())
00206 {
00207 if( 2 * extenders.size() < extenders.back().first - extenders.front().first + 3 )
00208 {
00209 TRIE_OEL* toExtend = new TRIE_OEL(toExtendAsLeaf->getCounter() | TWO_POW31);
00210 replace_list.push_back(Edge((*itEdge).first, toExtend));
00211 toExtend->edgelist.insert(extenders);
00212 }
00213 else
00214 {
00215 TRIE_OI* toExtend = new TRIE_OI(toExtendAsLeaf->getCounter() );
00216 replace_list.push_back(Edge((*itEdge).first, toExtend));
00217 toExtend->edgelist.insert(extenders);
00218 }
00219 #if DEBUG_LEVEL >= LEVEL_INFO
00220 nr_of_new_candidates +=extenders.size();
00221 #endif
00222 }
00223 PARENT::s_alloc.free(toExtendAsLeaf);
00224 }
00225 trie->edgelist.clear();
00226 trie->edgelist.insert(replace_list);
00227 }
00228
00229 template <class DF_D, class TRIE_OEL, class TRIE_OI,
00230 class LEAF, class LEAF_ALLOCATOR, NEELevel NEE>
00231 template <class TRIE> inline void
00232 SimplePruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00233 generateCandidateAtParentNEE( TRIE* trie, std::vector<item_t>& maybe_candidate )
00234 {
00235 typename TRIE::iterator itEdge2;
00236 LEAF* toExtendAsLeaf;
00237 replace_list.clear();
00238 for( typename TRIE::iterator itEdge( trie->edgelist.begin() );
00239 itEdge != trie->edgelist.end(); ++itEdge )
00240 {
00241 maybe_candidate.push_back((*itEdge).first);
00242 itEdge2 = itEdge;
00243 ++itEdge2;
00244 extenders.clear();
00245 leaf_neelist.clear();
00246 toExtendAsLeaf = static_cast<LEAF*>((*itEdge).second);
00247 for( ; itEdge2 != trie->edgelist.end(); ++itEdge2 )
00248 {
00249 switch( isAllSubsetFrequent( maybe_candidate, (*itEdge2).first ))
00250 {
00251 case 1:
00252 extenders.push_back(Edge((*itEdge2).first, PARENT::s_alloc.allocate()));
00253 static_cast<LEAF*>(extenders.back().second)->setCounter(0);
00254 break;
00255 case 2:
00256 leaf_neelist.push_back((*itEdge2).first);
00257 break;
00258 }
00259 }
00260 maybe_candidate.pop_back();
00261 if(extenders.empty() && leaf_neelist.empty() &&
00262 ( NEE == NEE_Full ||
00263 NEE == NEE_Level1 && PARENT::df_decoder.EEListEmpty()) )
00264 {
00265 PARENT::df_decoder.pushItemWithWrite(
00266 (*itEdge).first, toExtendAsLeaf->getCounter() );
00267 PARENT::df_decoder.popItem();
00268 }
00269 else
00270 {
00271 if( extenders.empty() ||
00272 2 * extenders.size() < extenders.back().first - extenders.front().first + 3 )
00273 {
00274 TRIE_OEL* toExtend = new TRIE_OEL(toExtendAsLeaf->getCounter() | TWO_POW31);
00275 toExtend->edgelist.insert(extenders);
00276 toExtend->neePushBackSorted( leaf_neelist );
00277 replace_list.push_back(Edge((*itEdge).first, toExtend));
00278 }
00279 else
00280 {
00281 TRIE_OI* toExtend = new TRIE_OI(toExtendAsLeaf->getCounter());
00282 toExtend->edgelist.insert(extenders);
00283 toExtend->neePushBackSorted( leaf_neelist );
00284 replace_list.push_back(Edge((*itEdge).first, toExtend));
00285 }
00286 #if DEBUG_LEVEL >= LEVEL_INFO
00287 nr_of_new_candidates +=extenders.size();
00288 nr_of_nonprefix_equiitem += leaf_neelist.size();
00289 #endif
00290
00291 }
00292 PARENT::s_alloc.free(toExtendAsLeaf);
00293 }
00294 trie->edgelist.clear();
00295 trie->edgelist.insert(replace_list);
00296 }
00297 }
00298 }
00299 #endif