00001 #ifndef IntersectProPruner_HPP
00002 #define IntersectProPruner_HPP
00003
00004 #include "common.hpp"
00005 #include "common/Edge.hpp"
00006 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/ManipulatorBase.hpp"
00007 #include <vector>
00008
00009
00010
00011
00012
00013
00014 namespace Bodon
00015 {
00016 namespace dynamic_trie
00017 {
00018 template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF,
00019 class LEAF_ALLOCATOR, NEELevel NEE>
00020 class IntersectProPruner : public Bodon::inhomogeneous_trie::
00021 ManipulatorBase<DF_D, TRIE_OEL, LEAF_ALLOCATOR>
00022 {
00023 private:
00024 typedef Bodon::inhomogeneous_trie::
00025 ManipulatorBase<DF_D, TRIE_OEL, LEAF_ALLOCATOR> PARENT;
00026 protected:
00027 std::vector<Edge> extenders;
00028 mutable std::vector<item_t> ext_items;
00029 mutable std::vector<item_t> leaf_neelist;
00030 std::vector<Edge> replace_list;
00031
00032 unsigned int nr_of_new_candidates;
00033 unsigned int nr_of_nonprefix_equiitem;
00034 public:
00035 IntersectProPruner( TRIE_OEL& main_trie, DF_D& df_decoder,
00036 LEAF_ALLOCATOR& s_alloc ) :
00037 PARENT(main_trie, df_decoder, s_alloc){}
00038
00039 protected:
00040
00041 template <class TRIE> void intersect( const TRIE* subset_trie ) const;
00042
00043 template <class TRIE> void intersectNEE(
00044 const TRIE* subset_trie ) const;
00045
00046 void filterNonExtenders(
00047 const std::vector<const TRIE_OEL*>& subset_oel_tries,
00048 const std::vector<const TRIE_OI*>& subset_oi_tries,
00049 std::vector<typename TRIE_OEL::const_iterator>&
00050 subset_oel_child_iters,
00051 std::vector<typename TRIE_OI::const_iterator>&
00052 subset_oi_child_iters,
00053 const item_t leaf_item ) const;
00054
00055 template <class TRIE> const void* isIncluded(
00056 const TRIE* trie, const std::vector<item_t>& an_itemset,
00057 std::vector<item_t>::const_iterator item_it ) const;
00058
00059 bool findSubsetTries(
00060 std::vector<item_t>& itemset,
00061 std::vector<const TRIE_OEL*>& subset_oel_trie,
00062 std::vector<const TRIE_OI*>& subset_oi_trie) const;
00063
00064 template <class TRIE> void generateCandidateAtParent(
00065 TRIE* trie, std::vector<item_t>& maybe_candidate);
00066 template <class TRIE> void generateCandidateAtParentNEE(
00067 TRIE* trie, std::vector<item_t>& maybe_candidate);
00068 };
00069
00070 template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF,
00071 class LEAF_ALLOCATOR, NEELevel NEE>
00072 template <class TRIE> inline void
00073 IntersectProPruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00074 intersect( const TRIE* subset_trie) const
00075 {
00076 typename TRIE::const_iterator it_e( subset_trie->edgelist.begin() );
00077 std::vector<item_t>::iterator it_i(ext_items.begin());
00078 while(it_e != subset_trie->edgelist.end() && it_i != ext_items.end())
00079 {
00080 if(*it_e < *it_i)
00081 ++it_e;
00082 else if(*it_e > *it_i)
00083 it_i = ext_items.erase(it_i);
00084 else
00085 {
00086 ++it_e;
00087 ++it_i;
00088 }
00089 }
00090 ext_items.erase(it_i, ext_items.end());
00091 }
00092
00093 template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF,
00094 class LEAF_ALLOCATOR, NEELevel NEE>
00095 template <class TRIE> inline void
00096 IntersectProPruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00097 intersectNEE( const TRIE* subset_trie ) const
00098 {
00099 typename TRIE::const_iterator it_e( subset_trie->edgelist.begin() );
00100 std::vector<item_t>::const_iterator it_nee( subset_trie->neelist.begin() );
00101 std::vector<item_t>::iterator it_i(ext_items.begin());
00102 while(it_i != ext_items.end())
00103 {
00104 while (it_e != subset_trie->edgelist.end() && *it_e < *it_i)
00105 ++it_e;
00106 if(it_e != subset_trie->edgelist.end() && *it_e == *it_i)
00107 {
00108 ++it_e;
00109 ++it_i;
00110 }
00111 else
00112 {
00113 while(it_nee != subset_trie->neelist.end() && *it_nee < *it_i)
00114 ++it_nee;
00115 if(it_nee != subset_trie->neelist.end() && *it_nee == *it_i)
00116 {
00117 if(NEE == NEE_Full)
00118 {
00119 leaf_neelist.push_back(*it_i);
00120 it_i = ext_items.erase(it_i);
00121 }
00122 else
00123 {
00124 ++it_i;
00125 }
00126 ++it_nee;
00127 }
00128 else
00129 it_i = ext_items.erase(it_i);
00130 }
00131 }
00132 }
00133
00134 template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF,
00135 class LEAF_ALLOCATOR, NEELevel NEE> inline void
00136 IntersectProPruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00137 filterNonExtenders(
00138 const std::vector<const TRIE_OEL*>& subset_oel_tries,
00139 const std::vector<const TRIE_OI*>& subset_oi_tries,
00140 std::vector<typename TRIE_OEL::const_iterator>& subset_oel_child_iters,
00141 std::vector<typename TRIE_OI::const_iterator>& subset_oi_child_iters,
00142 const item_t leaf_item ) const
00143 {
00144
00145 std::vector<item_t>::size_type index = 0;
00146 for( ; index < subset_oel_tries.size(); ++index)
00147 {
00148 while( subset_oel_child_iters[index] !=
00149 subset_oel_tries[index]->edgelist.end()
00150 && (*(subset_oel_child_iters[index])).first < leaf_item)
00151 ++subset_oel_child_iters[index];
00152 if( subset_oel_child_iters[index] !=
00153 subset_oel_tries[index]->edgelist.end() &&
00154 (*(subset_oel_child_iters[index])).first == leaf_item)
00155 if( static_cast<LEAF*>((*(subset_oel_child_iters[index])).second)->
00156 getCounter() & TWO_POW31 )
00157 if(NEE != NEE_Off && !static_cast<const TRIE_OEL*>(
00158 (*(subset_oel_child_iters[index])).second)->neelist.empty())
00159 intersectNEE(static_cast<const TRIE_OEL*>(
00160 (*(subset_oel_child_iters[index])).second));
00161 else
00162 intersect(static_cast<const TRIE_OEL*>(
00163 (*(subset_oel_child_iters[index])).second));
00164 else
00165 if(NEE != NEE_Off && !static_cast<const TRIE_OI*>(
00166 (*(subset_oel_child_iters[index])).second)->neelist.empty())
00167 intersectNEE(static_cast<const TRIE_OI*>(
00168 (*(subset_oel_child_iters[index])).second));
00169 else
00170 intersect(static_cast<const TRIE_OI*>(
00171 (*(subset_oel_child_iters[index])).second));
00172 else
00173 {
00174 ext_items.clear();
00175 break;
00176 }
00177 }
00178 if( index == subset_oel_tries.size())
00179 {
00180 index = 0;
00181 for( ; index < subset_oi_tries.size(); ++index)
00182 {
00183 while( subset_oi_child_iters[index] !=
00184 subset_oi_tries[index]->edgelist.end()
00185 && (*(subset_oi_child_iters[index])).first < leaf_item)
00186 ++subset_oi_child_iters[index];
00187 if( subset_oi_child_iters[index] !=
00188 subset_oi_tries[index]->edgelist.end() &&
00189 (*(subset_oi_child_iters[index])).first == leaf_item)
00190 if( static_cast<LEAF*>((*(subset_oi_child_iters[index])).second)->
00191 getCounter() & TWO_POW31 )
00192 if(NEE != NEE_Off && !static_cast<const TRIE_OEL*>(
00193 (*(subset_oi_child_iters[index])).second)->neelist.empty())
00194 intersectNEE(static_cast<const TRIE_OEL*>(
00195 (*(subset_oi_child_iters[index])).second));
00196 else
00197 intersect(static_cast<const TRIE_OEL*>(
00198 (*(subset_oi_child_iters[index])).second));
00199 else
00200 if(NEE != NEE_Off && !static_cast<const TRIE_OI*>(
00201 (*(subset_oi_child_iters[index])).second)->neelist.empty())
00202 intersectNEE(static_cast<const TRIE_OI*>(
00203 (*(subset_oi_child_iters[index])).second));
00204 else
00205 intersect( static_cast<const TRIE_OI*>(
00206 (*(subset_oi_child_iters[index])).second) );
00207 else
00208 {
00209 ext_items.clear();
00210 break;
00211 }
00212 }
00213 }
00214 }
00215
00216 template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF,
00217 class LEAF_ALLOCATOR, NEELevel NEE>
00218 template <class TRIE> const void*
00219 IntersectProPruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00220 isIncluded( const TRIE* trie, const std::vector<item_t>& an_itemset,
00221 std::vector<item_t>::const_iterator item_it ) const
00222 {
00223 if( item_it == an_itemset.end() )
00224 return static_cast<const void*>(trie);
00225 else
00226 {
00227 void* subtrie;
00228 subtrie = trie->edgelist.find(*item_it);
00229 if(subtrie)
00230 {
00231 if( static_cast<LEAF*>(subtrie)->getCounter() & TWO_POW31 )
00232 return isIncluded(static_cast<TRIE_OEL*>(subtrie), an_itemset, ++item_it);
00233 else
00234 return isIncluded(static_cast<TRIE_OI*>(subtrie), an_itemset, ++item_it);
00235 }
00236 else
00237 if(NEE == NEE_Level1 && trie->neeFind(*item_it) )
00238 return isIncluded(trie, an_itemset, ++item_it);
00239 else
00240 return NULL;
00241 }
00242 }
00243
00244 template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF,
00245 class LEAF_ALLOCATOR, NEELevel NEE> inline bool
00246 IntersectProPruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00247 findSubsetTries( std::vector<item_t>& itemset,
00248 std::vector<const TRIE_OEL*>& subset_oel_tries,
00249 std::vector<const TRIE_OI*>& subset_oi_tries ) const
00250 {
00251 assert(!itemset.empty());
00252 std::vector<item_t>::iterator item_it = itemset.end()-1;
00253 register item_t deleted_item = *item_it, temp_item;
00254 const void* a_subset_trie;
00255
00256 itemset.pop_back();
00257 a_subset_trie = isIncluded( &this->main_trie, itemset, itemset.begin() );
00258 if(a_subset_trie)
00259 if( static_cast<const LEAF*>(a_subset_trie)->
00260 getCounter() & TWO_POW31 )
00261 subset_oel_tries.push_back(static_cast<const TRIE_OEL*>(a_subset_trie));
00262 else
00263 subset_oi_tries.push_back(static_cast<const TRIE_OI*>(a_subset_trie));
00264 else
00265 {
00266 itemset.push_back(deleted_item);
00267 return false;
00268 }
00269 while(item_it != itemset.begin() )
00270 {
00271 --item_it;
00272 temp_item = *item_it;
00273 *item_it = deleted_item;
00274 deleted_item = temp_item;
00275 a_subset_trie = isIncluded( &this->main_trie, itemset, itemset.begin() );
00276 if(a_subset_trie)
00277 if( static_cast<const LEAF*>(a_subset_trie)->
00278 getCounter() & TWO_POW31 )
00279 subset_oel_tries.push_back(static_cast<const TRIE_OEL*>(a_subset_trie));
00280 else
00281 subset_oi_tries.push_back(static_cast<const TRIE_OI*>(a_subset_trie));
00282 else
00283 {
00284 itemset.insert( item_it, deleted_item );
00285 return false;
00286 }
00287 }
00288 itemset.insert( item_it, deleted_item );
00289 return true;
00290 }
00291
00292 template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF,
00293 class LEAF_ALLOCATOR, NEELevel NEE>
00294 template <class TRIE> inline void
00295 IntersectProPruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00296 generateCandidateAtParent(TRIE* trie, std::vector<item_t>& maybe_candidate)
00297 {
00298 std::vector<const TRIE_OEL*> subset_oel_tries;
00299 std::vector<const TRIE_OI*> subset_oi_tries;
00300 const bool all_subset_included = findSubsetTries(
00301 maybe_candidate, subset_oel_tries, subset_oi_tries);
00302
00303 std::vector<typename TRIE_OEL::const_iterator> subset_oel_child_iters;
00304 std::vector<typename TRIE_OI::const_iterator> subset_oi_child_iters;
00305 subset_oel_child_iters.reserve(subset_oel_tries.size());
00306 subset_oi_child_iters.reserve(subset_oi_tries.size());
00307
00308 for(typename std::vector<const TRIE_OEL*>::const_iterator it =
00309 subset_oel_tries.begin(); it != subset_oel_tries.end(); ++it)
00310 subset_oel_child_iters.push_back((*it)->edgelist.begin());
00311
00312 for(typename std::vector<const TRIE_OI*>::const_iterator it =
00313 subset_oi_tries.begin(); it != subset_oi_tries.end(); ++it)
00314 subset_oi_child_iters.push_back((*it)->edgelist.begin());
00315
00316 typename TRIE::iterator itEdge2;
00317 LEAF* toExtendAsLeaf;
00318 replace_list.clear();
00319 for( typename TRIE::iterator itEdge( trie->edgelist.begin() );
00320 itEdge != trie->edgelist.end(); ++itEdge )
00321 {
00322 toExtendAsLeaf = static_cast<LEAF*>((*itEdge).second);
00323 PARENT::df_decoder.pushItemWithWrite( (*itEdge).first,
00324 toExtendAsLeaf->getCounter() );
00325 PARENT::df_decoder.popItem();
00326 if(all_subset_included)
00327 {
00328 itEdge2 = itEdge;
00329 ++itEdge2;
00330 ext_items.clear();
00331 for( ; itEdge2 != trie->edgelist.end(); ++itEdge2 )
00332 ext_items.push_back((*itEdge2).first);
00333 filterNonExtenders( subset_oel_tries, subset_oi_tries,
00334 subset_oel_child_iters, subset_oi_child_iters,
00335 (*itEdge).first );
00336 extenders.clear();
00337 for(std::vector<item_t>::iterator it = ext_items.begin();
00338 it != ext_items.end(); ++it)
00339 {
00340 extenders.push_back(Edge(*it, PARENT::s_alloc.allocate()));
00341 static_cast<LEAF*>(extenders.back().second)->setCounter(0);
00342 }
00343 if(!extenders.empty())
00344 {
00345 if( 2 * extenders.size() < extenders.back().first - extenders.front().first + 3 )
00346 {
00347 TRIE_OEL* toExtend = new TRIE_OEL(toExtendAsLeaf->getCounter() | TWO_POW31);
00348 replace_list.push_back(Edge((*itEdge).first, toExtend));
00349 toExtend->edgelist.insert(extenders);
00350 }
00351 else
00352 {
00353 TRIE_OI* toExtend = new TRIE_OI(toExtendAsLeaf->getCounter());
00354 replace_list.push_back(Edge((*itEdge).first, toExtend));
00355 toExtend->edgelist.insert(extenders);
00356 }
00357 #if DEBUG_LEVEL >= LEVEL_INFO
00358 nr_of_new_candidates +=extenders.size();
00359 #endif
00360 }
00361 }
00362 PARENT::s_alloc.free(toExtendAsLeaf);
00363 }
00364 trie->edgelist.clear();
00365 trie->edgelist.insert(replace_list);
00366 }
00367 template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF,
00368 class LEAF_ALLOCATOR, NEELevel NEE>
00369 template <class TRIE> inline void
00370 IntersectProPruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00371 generateCandidateAtParentNEE( TRIE* trie,
00372 std::vector<item_t>& maybe_candidate )
00373 {
00374 std::vector<const TRIE_OEL*> subset_oel_tries;
00375 std::vector<const TRIE_OI*> subset_oi_tries;
00376 replace_list.clear();
00377 if( findSubsetTries( maybe_candidate, subset_oel_tries, subset_oi_tries ) )
00378 {
00379 std::vector<typename TRIE_OEL::const_iterator> subset_oel_child_iters;
00380 std::vector<typename TRIE_OI::const_iterator> subset_oi_child_iters;
00381 subset_oel_child_iters.reserve(subset_oel_tries.size());
00382 subset_oi_child_iters.reserve(subset_oi_tries.size());
00383
00384 for(typename std::vector<const TRIE_OEL*>::const_iterator it =
00385 subset_oel_tries.begin(); it != subset_oel_tries.end(); ++it)
00386 subset_oel_child_iters.push_back((*it)->edgelist.begin());
00387
00388 for(typename std::vector<const TRIE_OI*>::const_iterator it =
00389 subset_oi_tries.begin(); it != subset_oi_tries.end(); ++it)
00390 subset_oi_child_iters.push_back((*it)->edgelist.begin());
00391
00392 typename TRIE::iterator itEdge2;
00393 LEAF* toExtendAsLeaf;
00394 for( typename TRIE::iterator itEdge( trie->edgelist.begin() );
00395 itEdge != trie->edgelist.end(); ++itEdge )
00396 {
00397 toExtendAsLeaf = static_cast<LEAF*>((*itEdge).second);
00398 itEdge2 = itEdge;
00399 ++itEdge2;
00400 ext_items.clear();
00401 leaf_neelist.clear();
00402 for( ; itEdge2 != trie->edgelist.end(); ++itEdge2 )
00403 ext_items.push_back((*itEdge2).first);
00404 filterNonExtenders( subset_oel_tries, subset_oi_tries,
00405 subset_oel_child_iters, subset_oi_child_iters,
00406 (*itEdge).first );
00407 std::sort(leaf_neelist.begin(), leaf_neelist.end());
00408 extenders.clear();
00409 for(std::vector<item_t>::iterator it = ext_items.begin();
00410 it != ext_items.end(); ++it)
00411 {
00412 extenders.push_back(Edge(*it, PARENT::s_alloc.allocate()));
00413 static_cast<LEAF*>(extenders.back().second)->setCounter(0);
00414 }
00415 if(extenders.empty() && leaf_neelist.empty() &&
00416 ( NEE == NEE_Full ||
00417 NEE == NEE_Level1 && PARENT::df_decoder.EEListEmpty() )
00418 )
00419 {
00420 PARENT::df_decoder.pushItemWithWrite( (*itEdge).first,
00421 toExtendAsLeaf->getCounter() );
00422 PARENT::df_decoder.popItem();
00423 }
00424 else
00425 {
00426 if( extenders.empty() ||
00427 2 * extenders.size() < extenders.back().first - extenders.front().first + 3 )
00428 {
00429 TRIE_OEL* toExtend = new TRIE_OEL(toExtendAsLeaf->getCounter() | TWO_POW31);
00430 toExtend->edgelist.insert(extenders);
00431 toExtend->neePushBackSorted( leaf_neelist );
00432 replace_list.push_back(Edge((*itEdge).first, toExtend));
00433 }
00434 else
00435 {
00436 TRIE_OI* toExtend = new TRIE_OI(toExtendAsLeaf->getCounter());
00437 toExtend->edgelist.insert(extenders);
00438 toExtend->neePushBackSorted( leaf_neelist );
00439 replace_list.push_back(Edge((*itEdge).first, toExtend));
00440 }
00441 #if DEBUG_LEVEL >= LEVEL_INFO
00442 nr_of_new_candidates +=extenders.size();
00443 nr_of_nonprefix_equiitem += leaf_neelist.size();
00444 #endif
00445 }
00446 PARENT::s_alloc.free(toExtendAsLeaf);
00447 }
00448 trie->edgelist.clear();
00449 trie->edgelist.insert(replace_list);
00450 }
00451 else if( NEE == NEE_Full || PARENT::df_decoder.EEListEmpty())
00452 {
00453 for( typename TRIE::iterator itEdge( trie->edgelist.begin() );
00454 itEdge != trie->edgelist.end(); ++itEdge )
00455 {
00456 PARENT::df_decoder.pushItemWithWrite(
00457 (*itEdge).first,
00458 static_cast<LEAF*>((*itEdge).second)->getCounter() );
00459 PARENT::df_decoder.popItem();
00460 PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00461 }
00462 trie->edgelist.clear();
00463 }
00464 else
00465 {
00466 TRIE_OEL* toExtend;
00467 for( typename TRIE::iterator itEdge( trie->edgelist.begin() );
00468 itEdge != trie->edgelist.end(); ++itEdge )
00469 {
00470 toExtend = new TRIE_OEL(
00471 static_cast<LEAF*>((*itEdge).second)->getCounter() | TWO_POW31);
00472 PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00473 replace_list.push_back(Edge((*itEdge).first, toExtend));
00474 }
00475 trie->edgelist.clear();
00476 trie->edgelist.insert(replace_list);
00477 }
00478 }
00479 }
00480 }
00481 #endif