00001 #ifndef IntersectProPruner_HPP
00002 #define IntersectProPruner_HPP
00003
00004 #include "common.hpp"
00005 #include "common/Edge.hpp"
00006 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/ManipulatorBase.hpp"
00007 #include <vector>
00008
00009
00010
00011
00012
00013
00014 namespace Bodon
00015 {
00016 namespace inhomogeneous_trie
00017 {
00018 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR,
00019 bool NEE, bool DEADENDPRUNE = true>
00020 class IntersectProPruner : public ManipulatorBase<DF_D, TRIE, LEAF_ALLOCATOR>
00021 {
00022 protected:
00023 std::vector<Edge> extenders;
00024 mutable std::vector<item_t> ext_items;
00025 mutable std::vector<item_t> leaf_neelist;
00026 std::vector<Edge> replace_list;
00027 bool is_there_any_new_candidate;
00028 typedef ManipulatorBase<DF_D, TRIE, LEAF_ALLOCATOR> PARENT;
00029 public:
00030 IntersectProPruner( TRIE& main_trie, DF_D& df_decoder,
00031 LEAF_ALLOCATOR& s_alloc ) :
00032 PARENT(main_trie, df_decoder, s_alloc){}
00033
00034 protected:
00035
00036 void intersect( const TRIE* subset_trie ) const;
00037
00038 void intersectNEE( const TRIE* subset_trie ) const;
00039
00040 void filterNonExtenders(
00041 const std::vector<const TRIE*>& subset_tries,
00042 const item_t leaf_item ) const;
00043
00044 void filterNonExtendersNEE(
00045 const std::vector<const TRIE*>& subset_tries,
00046 const item_t leaf_item ) const;
00047
00048
00049 bool findSubsetTries(
00050 std::vector<item_t>& itemset,
00051 std::vector<const TRIE*>& subset_trie) const;
00052
00053 void generateCandidateAtParent(
00054 TRIE* trie, std::vector<item_t>& maybe_candidate);
00055 void generateCandidateAtParentNEE(
00056 TRIE* trie, std::vector<item_t>& maybe_candidate,
00057 std::vector<item_t>& NEEsum);
00058 void generateCandidateAtParentNEENoDeadend(
00059 TRIE* trie, std::vector<item_t>& maybe_candidate);
00060
00061
00062 };
00063
00064 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR,
00065 bool NEE, bool DEADENDPRUNE> inline void
00066 IntersectProPruner<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00067 intersect( const TRIE* subset_trie) const
00068 {
00069 typename TRIE::const_iterator it_e( subset_trie->edgelist.begin() );
00070 std::vector<item_t>::iterator it_i(ext_items.begin());
00071 while(it_e != subset_trie->edgelist.end() && it_i != ext_items.end())
00072 {
00073 if(*it_e < *it_i)
00074 ++it_e;
00075 else if(*it_e > *it_i)
00076 it_i = ext_items.erase(it_i);
00077 else
00078 {
00079 ++it_e;
00080 ++it_i;
00081 }
00082 }
00083 ext_items.erase(it_i, ext_items.end());
00084 }
00085
00086 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR,
00087 bool NEE, bool DEADENDPRUNE> inline void
00088 IntersectProPruner<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00089 intersectNEE( const TRIE* subset_trie ) const
00090 {
00091 typename TRIE::const_iterator it_e( subset_trie->edgelist.begin() );
00092 std::vector<item_t>::const_iterator it_nee( subset_trie->neelist.begin() );
00093 std::vector<item_t>::iterator it_i(ext_items.begin());
00094 while(it_e != subset_trie->edgelist.end() && it_i != ext_items.end())
00095 {
00096 if(*it_e < *it_i)
00097 ++it_e;
00098 else if(*it_e > *it_i)
00099 {
00100 while(it_nee != subset_trie->neelist.end() && *it_nee < *it_i)
00101 ++it_nee;
00102 if(it_nee != subset_trie->neelist.end() && *it_nee == *it_i)
00103 leaf_neelist.push_back(*it_i);
00104 it_i = ext_items.erase(it_i);
00105 }
00106 else
00107 {
00108 ++it_e;
00109 ++it_i;
00110 }
00111 }
00112 if(it_i != ext_items.end())
00113 {
00114 std::vector<item_t>::iterator it_2(it_i);
00115 do
00116 {
00117 while(it_nee != subset_trie->neelist.end() && *it_nee < *it_i)
00118 ++it_nee;
00119 if(it_nee != subset_trie->neelist.end() && *it_nee == *it_i)
00120 leaf_neelist.push_back(*it_i);
00121 ++it_i;
00122 }
00123 while(it_i != ext_items.end());
00124 ext_items.erase(it_2, ext_items.end());
00125 }
00126 }
00127
00128 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR,
00129 bool NEE, bool DEADENDPRUNE> inline void
00130 IntersectProPruner<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00131 filterNonExtenders( const std::vector<const TRIE*>& subset_tries,
00132 const item_t leaf_item ) const
00133 {
00134 std::vector<typename TRIE::const_iterator> subset_child_iters;
00135 subset_child_iters.reserve(subset_tries.size());
00136 for(typename std::vector<const TRIE*>::const_iterator it = subset_tries.begin();
00137 it != subset_tries.end(); ++it)
00138 subset_child_iters.push_back((*it)->edgelist.begin());
00139
00140 for(std::vector<item_t>::size_type index = 0;
00141 index < subset_tries.size(); ++index)
00142 {
00143 while( subset_child_iters[index] != subset_tries[index]->edgelist.end() &&
00144 (*(subset_child_iters[index])).first < leaf_item)
00145 ++subset_child_iters[index];
00146 if( subset_child_iters[index] != subset_tries[index]->edgelist.end() &&
00147 (*(subset_child_iters[index])).first == leaf_item)
00148 intersect(static_cast<const TRIE*>((*(subset_child_iters[index])).second));
00149 else
00150 {
00151 ext_items.clear();
00152 break;
00153 }
00154 }
00155 }
00156
00157
00158 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR,
00159 bool NEE, bool DEADENDPRUNE> inline void
00160 IntersectProPruner<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00161 filterNonExtendersNEE( const std::vector<const TRIE*>& subset_tries,
00162 const item_t leaf_item ) const
00163 {
00164 std::vector<typename TRIE::const_iterator> subset_child_iters;
00165 std::vector<vector<item_t>::const_iterator> subset_nee_iters;
00166 subset_child_iters.reserve(subset_tries.size());
00167 subset_nee_iters.reserve(subset_tries.size());
00168 for(typename std::vector<const TRIE*>::const_iterator it = subset_tries.begin();
00169 it != subset_tries.end(); ++it)
00170 {
00171 subset_child_iters.push_back((*it)->edgelist.begin());
00172 subset_nee_iters.push_back((*it)->neelist.begin());
00173 }
00174
00175 for(std::vector<item_t>::size_type index = 0;
00176 index < subset_tries.size(); ++index)
00177 {
00178 while( subset_child_iters[index] != subset_tries[index]->edgelist.end() &&
00179 (*(subset_child_iters[index])).first < leaf_item)
00180 ++subset_child_iters[index];
00181 if( subset_child_iters[index] != subset_tries[index]->edgelist.end() &&
00182 (*(subset_child_iters[index])).first == leaf_item)
00183 intersectNEE(static_cast<const TRIE*>((*(subset_child_iters[index])).second));
00184 else
00185 {
00186 while( subset_nee_iters[index] != subset_tries[index]->neelist.end() &&
00187 *subset_nee_iters[index] < leaf_item)
00188 ++subset_nee_iters[index];
00189 if( subset_nee_iters[index] != subset_tries[index]->neelist.end() &&
00190 *subset_nee_iters[index] == leaf_item)
00191 intersectNEE( subset_tries[index] );
00192 else
00193 {
00194 ext_items.clear();
00195 break;
00196 }
00197 }
00198 }
00199 }
00200 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR,
00201 bool NEE, bool DEADENDPRUNE> inline bool
00202 IntersectProPruner<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00203 findSubsetTries( std::vector<item_t>& itemset,
00204 std::vector<const TRIE*>& subset_tries ) const
00205 {
00206 assert(!itemset.empty());
00207 std::vector<item_t>::iterator item_it = itemset.end()-1;
00208 register item_t deleted_item = *item_it, temp_item;
00209 const TRIE* a_subset_trie;
00210
00211 itemset.pop_back();
00212 a_subset_trie = this->main_trie.isIncluded( itemset, itemset.begin() );
00213 if(a_subset_trie)
00214 subset_tries.push_back(a_subset_trie);
00215 else
00216 {
00217 itemset.push_back(deleted_item);
00218 return false;
00219 }
00220 while(item_it != itemset.begin() )
00221 {
00222 --item_it;
00223 temp_item = *item_it;
00224 *item_it = deleted_item;
00225 deleted_item = temp_item;
00226 a_subset_trie = this->main_trie.isIncluded( itemset, itemset.begin() );
00227 if(a_subset_trie)
00228 subset_tries.push_back(a_subset_trie);
00229 else
00230 {
00231 itemset.insert( item_it, deleted_item );
00232 return false;
00233 }
00234 }
00235 itemset.insert( item_it, deleted_item );
00236 return true;
00237 }
00238
00239 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR,
00240 bool NEE, bool DEADENDPRUNE> inline void
00241 IntersectProPruner<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00242 generateCandidateAtParent(TRIE* trie, std::vector<item_t>& maybe_candidate)
00243 {
00244 std::vector<const TRIE*> subset_tries;
00245 const bool all_subset_included = findSubsetTries(
00246 maybe_candidate, subset_tries);
00247 typename TRIE::iterator itEdge2;
00248 LEAF* toExtendAsLeaf;
00249 TRIE* toExtend;
00250 replace_list.clear();
00251 for( typename TRIE::iterator itEdge( trie->edgelist.begin() );
00252 itEdge != trie->edgelist.end(); ++itEdge )
00253 {
00254 toExtendAsLeaf = static_cast<LEAF*>((*itEdge).second);
00255 PARENT::df_decoder.pushItemWithWrite( (*itEdge).first,
00256 toExtendAsLeaf->getCounter() );
00257 PARENT::df_decoder.popItem();
00258 if(all_subset_included)
00259 {
00260 itEdge2 = itEdge;
00261 ++itEdge2;
00262 ext_items.clear();
00263 for( ; itEdge2 != trie->edgelist.end(); ++itEdge2 )
00264 ext_items.push_back((*itEdge2).first);
00265 filterNonExtenders( subset_tries, (*itEdge).first );
00266 extenders.clear();
00267 for(std::vector<item_t>::iterator it = ext_items.begin();
00268 it != ext_items.end(); ++it)
00269 {
00270 extenders.push_back(Edge(*it, PARENT::s_alloc.allocate()));
00271 static_cast<LEAF*>(extenders.back().second)->setCounter(0);
00272 }
00273 if(!extenders.empty())
00274 {
00275 toExtend = new TRIE(toExtendAsLeaf->getCounter());
00276 toExtend->edgelist.insert(extenders);
00277 replace_list.push_back(Edge((*itEdge).first, toExtend));
00278 if( !DEADENDPRUNE)
00279 is_there_any_new_candidate = true;
00280 }
00281 }
00282 PARENT::s_alloc.free(toExtendAsLeaf);
00283 }
00284 trie->edgelist.clear();
00285 trie->edgelist.insert(replace_list);
00286 }
00287 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR,
00288 bool NEE, bool DEADENDPRUNE> inline void
00289 IntersectProPruner<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00290 generateCandidateAtParentNEE(TRIE* trie, std::vector<item_t>& maybe_candidate,
00291 std::vector<item_t>& NEEsum)
00292 {
00293 std::vector<const TRIE*> subset_tries;
00294 NEEsum.insert( NEEsum.end(),
00295 trie->neelist.begin(), trie->neelist.end() );
00296 replace_list.clear();
00297 if( findSubsetTries( maybe_candidate, subset_tries ) )
00298 {
00299 typename TRIE::iterator itEdge2;
00300 LEAF* toExtendAsLeaf;
00301 TRIE* toExtend;
00302 for( typename TRIE::iterator itEdge( trie->edgelist.begin() );
00303 itEdge != trie->edgelist.end(); ++itEdge )
00304 {
00305 toExtendAsLeaf = static_cast<LEAF*>((*itEdge).second);
00306 itEdge2 = itEdge;
00307 ++itEdge2;
00308 ext_items.clear();
00309 leaf_neelist.clear();
00310 for( ; itEdge2 != trie->edgelist.end(); ++itEdge2 )
00311 ext_items.push_back((*itEdge2).first);
00312 filterNonExtendersNEE( subset_tries, (*itEdge).first );
00313 extenders.clear();
00314 for(std::vector<item_t>::iterator it = ext_items.begin();
00315 it != ext_items.end(); ++it)
00316 {
00317 extenders.push_back(Edge(*it, PARENT::s_alloc.allocate()));
00318 static_cast<LEAF*>(extenders.back().second)->setCounter(0);
00319 }
00320 if(DEADENDPRUNE && extenders.empty() && leaf_neelist.empty() && NEEsum.empty() )
00321 {
00322 PARENT::df_decoder.pushItemWithWrite( (*itEdge).first,
00323 toExtendAsLeaf->getCounter() );
00324 PARENT::df_decoder.popItem();
00325 }
00326 else
00327 {
00328 toExtend = new TRIE(toExtendAsLeaf->getCounter());
00329 toExtend->neePushBackSorted( leaf_neelist );
00330 replace_list.push_back(Edge((*itEdge).first, toExtend));
00331 toExtend->edgelist.insert(extenders);
00332 if( !DEADENDPRUNE)
00333 is_there_any_new_candidate = true;
00334 }
00335 PARENT::s_alloc.free(toExtendAsLeaf);
00336 }
00337 trie->edgelist.clear();
00338 trie->edgelist.insert(replace_list);
00339 }
00340 else if(DEADENDPRUNE && NEEsum.empty())
00341 {
00342 for( typename TRIE::iterator itEdge( trie->edgelist.begin() );
00343 itEdge != trie->edgelist.end(); ++itEdge )
00344 {
00345 PARENT::df_decoder.pushItemWithWrite(
00346 (*itEdge).first,
00347 static_cast<LEAF*>((*itEdge).second)->getCounter() );
00348 PARENT::df_decoder.popItem();
00349 PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00350 }
00351 trie->edgelist.clear();
00352 }
00353 else
00354 {
00355 TRIE* toExtend;
00356 for( typename TRIE::iterator itEdge( trie->edgelist.begin() );
00357 itEdge != trie->edgelist.end(); ++itEdge )
00358 {
00359 toExtend = new TRIE(
00360 static_cast<LEAF*>((*itEdge).second)->getCounter());
00361 PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00362 replace_list.push_back(Edge((*itEdge).first, toExtend));
00363 }
00364 trie->edgelist.clear();
00365 trie->edgelist.insert(replace_list);
00366 }
00367 NEEsum.resize( NEEsum.size() - trie->neelist.size() );
00368 }
00369 template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR,
00370 bool NEE, bool DEADENDPRUNE> inline void
00371 IntersectProPruner<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00372 generateCandidateAtParentNEENoDeadend(TRIE* trie, std::vector<item_t>& maybe_candidate)
00373 {
00374 std::vector<const TRIE*> subset_tries;
00375 replace_list.clear();
00376 if( findSubsetTries( maybe_candidate, subset_tries ) )
00377 {
00378 typename TRIE::iterator itEdge2;
00379 LEAF* toExtendAsLeaf;
00380 TRIE* toExtend;
00381 for( typename TRIE::iterator itEdge( trie->edgelist.begin() );
00382 itEdge != trie->edgelist.end(); ++itEdge )
00383 {
00384 toExtendAsLeaf = static_cast<LEAF*>((*itEdge).second);
00385 itEdge2 = itEdge;
00386 ++itEdge2;
00387 ext_items.clear();
00388 leaf_neelist.clear();
00389 for( ; itEdge2 != trie->edgelist.end(); ++itEdge2 )
00390 ext_items.push_back((*itEdge2).first);
00391 filterNonExtendersNEE( subset_tries, (*itEdge).first );
00392 extenders.clear();
00393 for(std::vector<item_t>::iterator it = ext_items.begin();
00394 it != ext_items.end(); ++it)
00395 {
00396 extenders.push_back(Edge(*it, PARENT::s_alloc.allocate()));
00397 static_cast<LEAF*>(extenders.back().second)->setCounter(0);
00398 }
00399 toExtend = new TRIE(toExtendAsLeaf->getCounter());
00400 toExtend->neePushBackSorted( leaf_neelist );
00401 replace_list.push_back(Edge((*itEdge).first, toExtend));
00402 toExtend->edgelist.insert(extenders);
00403 is_there_any_new_candidate = true;
00404 PARENT::s_alloc.free(toExtendAsLeaf);
00405 }
00406 trie->edgelist.clear();
00407 trie->edgelist.insert(replace_list);
00408 }
00409 else
00410 {
00411 TRIE* toExtend;
00412 for( typename TRIE::iterator itEdge( trie->edgelist.begin() );
00413 itEdge != trie->edgelist.end(); ++itEdge )
00414 {
00415 toExtend = new TRIE(
00416 static_cast<LEAF*>((*itEdge).second)->getCounter());
00417 PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00418 replace_list.push_back(Edge((*itEdge).first, toExtend));
00419 }
00420 trie->edgelist.clear();
00421 trie->edgelist.insert(replace_list);
00422 }
00423 }
00424 }
00425 }
00426 #endif