Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

dynamic_trie/trie_manipulators/IntersectProPruner.hpp

Go to the documentation of this file.
00001 #ifndef IntersectProPruner_HPP
00002 #define IntersectProPruner_HPP
00003 
00004 #include "common.hpp"
00005 #include "common/Edge.hpp"
00006 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/ManipulatorBase.hpp"
00007 #include <vector>
00008 //#include <cstdio>
00009 //#include <iterator>   //for test
00010 
00011 
00012 
00013 
00014 namespace Bodon
00015 {
00016 namespace dynamic_trie
00017 {
00018    template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF, 
00019              class LEAF_ALLOCATOR, NEELevel NEE>
00020    class IntersectProPruner : public Bodon::inhomogeneous_trie::
00021          ManipulatorBase<DF_D, TRIE_OEL, LEAF_ALLOCATOR>
00022    {
00023       private:
00024          typedef Bodon::inhomogeneous_trie::
00025          ManipulatorBase<DF_D, TRIE_OEL, LEAF_ALLOCATOR> PARENT;
00026       protected:
00027          std::vector<Edge> extenders;
00028          mutable std::vector<item_t> ext_items;
00029          mutable std::vector<item_t> leaf_neelist;
00030          std::vector<Edge> replace_list;
00031 
00032          unsigned int nr_of_new_candidates;
00033          unsigned int nr_of_nonprefix_equiitem;
00034       public:
00035          IntersectProPruner( TRIE_OEL& main_trie, DF_D& df_decoder,
00036                              LEAF_ALLOCATOR& s_alloc ) : 
00037             PARENT(main_trie, df_decoder, s_alloc){}
00038 
00039       protected:
00040          
00041          template <class TRIE> void intersect( const TRIE* subset_trie ) const;
00042 
00043          template <class TRIE> void intersectNEE( 
00044             const TRIE* subset_trie ) const;
00045 
00046          void filterNonExtenders( 
00047             const std::vector<const TRIE_OEL*>& subset_oel_tries,
00048             const std::vector<const TRIE_OI*>& subset_oi_tries,
00049             std::vector<typename TRIE_OEL::const_iterator>& 
00050             subset_oel_child_iters,
00051             std::vector<typename TRIE_OI::const_iterator>& 
00052             subset_oi_child_iters,
00053             const item_t leaf_item ) const;
00054 
00055          template <class TRIE> const void* isIncluded( 
00056             const TRIE* trie, const std::vector<item_t>& an_itemset, 
00057             std::vector<item_t>::const_iterator item_it ) const;
00058 
00059          bool findSubsetTries( 
00060             std::vector<item_t>& itemset, 
00061             std::vector<const TRIE_OEL*>& subset_oel_trie,
00062             std::vector<const TRIE_OI*>& subset_oi_trie) const;
00063 
00064          template <class TRIE> void generateCandidateAtParent(
00065             TRIE* trie, std::vector<item_t>& maybe_candidate);
00066          template <class TRIE> void generateCandidateAtParentNEE(
00067             TRIE* trie, std::vector<item_t>& maybe_candidate);
00068    };
00069 
00070    template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF, 
00071              class LEAF_ALLOCATOR, NEELevel NEE> 
00072    template <class TRIE> inline void 
00073    IntersectProPruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00074    intersect( const TRIE* subset_trie) const
00075    {
00076       typename TRIE::const_iterator it_e( subset_trie->edgelist.begin() );
00077       std::vector<item_t>::iterator it_i(ext_items.begin());
00078       while(it_e != subset_trie->edgelist.end() && it_i != ext_items.end())
00079       {
00080          if(*it_e < *it_i)
00081             ++it_e;
00082          else if(*it_e > *it_i)
00083             it_i = ext_items.erase(it_i);
00084          else
00085          {
00086             ++it_e;
00087             ++it_i;
00088          }
00089       }
00090       ext_items.erase(it_i, ext_items.end());      
00091    }
00092 
00093    template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF, 
00094              class LEAF_ALLOCATOR, NEELevel NEE> 
00095    template <class TRIE> inline void 
00096    IntersectProPruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00097    intersectNEE( const TRIE* subset_trie ) const
00098    {
00099       typename TRIE::const_iterator it_e( subset_trie->edgelist.begin() );
00100       std::vector<item_t>::const_iterator it_nee( subset_trie->neelist.begin() );
00101       std::vector<item_t>::iterator it_i(ext_items.begin());
00102       while(it_i != ext_items.end())
00103       {
00104          while (it_e != subset_trie->edgelist.end() && *it_e < *it_i)
00105             ++it_e;
00106          if(it_e != subset_trie->edgelist.end() && *it_e == *it_i)
00107          {
00108             ++it_e;
00109             ++it_i;
00110          }
00111          else
00112          {
00113             while(it_nee != subset_trie->neelist.end() && *it_nee < *it_i)
00114                ++it_nee;
00115             if(it_nee != subset_trie->neelist.end() && *it_nee == *it_i)
00116             {
00117                if(NEE == NEE_Full)
00118                {
00119                   leaf_neelist.push_back(*it_i);
00120                   it_i = ext_items.erase(it_i);
00121                }
00122                else
00123                {
00124                   ++it_i;
00125                }
00126                ++it_nee;
00127             }
00128             else
00129                it_i = ext_items.erase(it_i);
00130          }
00131       }
00132    }
00133 
00134    template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF, 
00135              class LEAF_ALLOCATOR, NEELevel NEE> inline void 
00136    IntersectProPruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00137    filterNonExtenders( 
00138       const std::vector<const TRIE_OEL*>& subset_oel_tries,  
00139       const std::vector<const TRIE_OI*>& subset_oi_tries,  
00140       std::vector<typename TRIE_OEL::const_iterator>& subset_oel_child_iters,
00141       std::vector<typename TRIE_OI::const_iterator>& subset_oi_child_iters,
00142       const item_t leaf_item ) const
00143    {
00144 
00145       std::vector<item_t>::size_type index = 0;
00146       for( ; index < subset_oel_tries.size(); ++index)
00147       {
00148          while( subset_oel_child_iters[index] !=  
00149                 subset_oel_tries[index]->edgelist.end() 
00150                 && (*(subset_oel_child_iters[index])).first < leaf_item)
00151             ++subset_oel_child_iters[index];
00152          if( subset_oel_child_iters[index] !=  
00153              subset_oel_tries[index]->edgelist.end() &&
00154              (*(subset_oel_child_iters[index])).first == leaf_item)
00155             if( static_cast<LEAF*>((*(subset_oel_child_iters[index])).second)->
00156                 getCounter() & TWO_POW31 )
00157                if(NEE != NEE_Off && !static_cast<const TRIE_OEL*>(
00158                             (*(subset_oel_child_iters[index])).second)->neelist.empty())
00159                   intersectNEE(static_cast<const TRIE_OEL*>(
00160                             (*(subset_oel_child_iters[index])).second));
00161                else
00162                   intersect(static_cast<const TRIE_OEL*>(
00163                                (*(subset_oel_child_iters[index])).second));
00164             else
00165                if(NEE != NEE_Off && !static_cast<const TRIE_OI*>(
00166                             (*(subset_oel_child_iters[index])).second)->neelist.empty())
00167                   intersectNEE(static_cast<const TRIE_OI*>(
00168                                   (*(subset_oel_child_iters[index])).second));
00169                else
00170                   intersect(static_cast<const TRIE_OI*>(
00171                                (*(subset_oel_child_iters[index])).second));
00172          else
00173          {
00174             ext_items.clear();
00175             break;
00176          }
00177       }
00178       if( index == subset_oel_tries.size())
00179       {
00180          index = 0;
00181          for( ; index < subset_oi_tries.size(); ++index)
00182          {
00183             while( subset_oi_child_iters[index] !=  
00184                 subset_oi_tries[index]->edgelist.end() 
00185                    && (*(subset_oi_child_iters[index])).first < leaf_item)
00186                ++subset_oi_child_iters[index];
00187             if( subset_oi_child_iters[index] !=  
00188                 subset_oi_tries[index]->edgelist.end() &&
00189                 (*(subset_oi_child_iters[index])).first == leaf_item)
00190                if( static_cast<LEAF*>((*(subset_oi_child_iters[index])).second)->
00191                    getCounter() & TWO_POW31 )
00192                   if(NEE != NEE_Off && !static_cast<const TRIE_OEL*>(
00193                             (*(subset_oi_child_iters[index])).second)->neelist.empty())
00194                      intersectNEE(static_cast<const TRIE_OEL*>(
00195                                      (*(subset_oi_child_iters[index])).second));
00196                   else
00197                      intersect(static_cast<const TRIE_OEL*>(
00198                                   (*(subset_oi_child_iters[index])).second));
00199                else
00200                   if(NEE != NEE_Off && !static_cast<const TRIE_OI*>(
00201                         (*(subset_oi_child_iters[index])).second)->neelist.empty())
00202                      intersectNEE(static_cast<const TRIE_OI*>(
00203                                      (*(subset_oi_child_iters[index])).second));
00204                   else
00205                      intersect( static_cast<const TRIE_OI*>(
00206                                 (*(subset_oi_child_iters[index])).second) );
00207             else
00208             {
00209                ext_items.clear();
00210                break;
00211             }
00212          }
00213       }
00214    }
00215 
00216    template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF, 
00217              class LEAF_ALLOCATOR, NEELevel NEE> 
00218    template <class TRIE> const void*
00219    IntersectProPruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00220    isIncluded( const TRIE* trie, const std::vector<item_t>& an_itemset, 
00221                std::vector<item_t>::const_iterator item_it ) const
00222    {
00223       if( item_it == an_itemset.end() )
00224          return static_cast<const void*>(trie);
00225       else
00226       {
00227          void* subtrie;
00228          subtrie = trie->edgelist.find(*item_it);
00229          if(subtrie)
00230          {
00231             if( static_cast<LEAF*>(subtrie)->getCounter() & TWO_POW31 )
00232                return isIncluded(static_cast<TRIE_OEL*>(subtrie), an_itemset, ++item_it);
00233             else
00234                return isIncluded(static_cast<TRIE_OI*>(subtrie), an_itemset, ++item_it);          
00235          }
00236          else 
00237             if(NEE == NEE_Level1 && trie->neeFind(*item_it) )
00238                return isIncluded(trie, an_itemset, ++item_it);    
00239             else
00240                return NULL;
00241       }
00242    }
00243 
00244    template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF, 
00245              class LEAF_ALLOCATOR, NEELevel NEE> inline bool
00246    IntersectProPruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00247    findSubsetTries( std::vector<item_t>& itemset, 
00248                     std::vector<const TRIE_OEL*>& subset_oel_tries,
00249                     std::vector<const TRIE_OI*>& subset_oi_tries ) const
00250    {
00251       assert(!itemset.empty());
00252       std::vector<item_t>::iterator item_it = itemset.end()-1;
00253       register item_t deleted_item = *item_it, temp_item;
00254       const void* a_subset_trie;
00255 
00256       itemset.pop_back();
00257       a_subset_trie = isIncluded( &this->main_trie, itemset, itemset.begin() );
00258       if(a_subset_trie)
00259          if( static_cast<const LEAF*>(a_subset_trie)->
00260              getCounter() & TWO_POW31 )
00261             subset_oel_tries.push_back(static_cast<const TRIE_OEL*>(a_subset_trie));
00262          else
00263             subset_oi_tries.push_back(static_cast<const TRIE_OI*>(a_subset_trie));
00264       else 
00265       {
00266          itemset.push_back(deleted_item);
00267          return false;
00268       }
00269       while(item_it != itemset.begin() )
00270       {
00271          --item_it;
00272          temp_item = *item_it;
00273          *item_it = deleted_item;
00274          deleted_item = temp_item;
00275          a_subset_trie = isIncluded( &this->main_trie, itemset, itemset.begin() );
00276          if(a_subset_trie)
00277             if( static_cast<const LEAF*>(a_subset_trie)->
00278                 getCounter() & TWO_POW31 )
00279                subset_oel_tries.push_back(static_cast<const TRIE_OEL*>(a_subset_trie));
00280             else
00281                subset_oi_tries.push_back(static_cast<const TRIE_OI*>(a_subset_trie));
00282          else
00283          {
00284             itemset.insert( item_it, deleted_item );
00285             return false;
00286          }
00287       }
00288       itemset.insert( item_it, deleted_item );
00289       return true;
00290    }
00291 
00292    template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF, 
00293              class LEAF_ALLOCATOR, NEELevel NEE> 
00294    template <class TRIE> inline void 
00295    IntersectProPruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00296    generateCandidateAtParent(TRIE* trie, std::vector<item_t>& maybe_candidate)
00297    {
00298       std::vector<const TRIE_OEL*> subset_oel_tries;
00299       std::vector<const TRIE_OI*> subset_oi_tries;
00300       const bool all_subset_included = findSubsetTries( 
00301          maybe_candidate, subset_oel_tries, subset_oi_tries);
00302 
00303       std::vector<typename TRIE_OEL::const_iterator> subset_oel_child_iters;
00304       std::vector<typename TRIE_OI::const_iterator> subset_oi_child_iters;
00305       subset_oel_child_iters.reserve(subset_oel_tries.size());
00306       subset_oi_child_iters.reserve(subset_oi_tries.size());
00307 
00308       for(typename std::vector<const TRIE_OEL*>::const_iterator it = 
00309              subset_oel_tries.begin(); it !=  subset_oel_tries.end(); ++it)
00310          subset_oel_child_iters.push_back((*it)->edgelist.begin());
00311 
00312       for(typename std::vector<const TRIE_OI*>::const_iterator it = 
00313              subset_oi_tries.begin(); it !=  subset_oi_tries.end(); ++it)
00314          subset_oi_child_iters.push_back((*it)->edgelist.begin());
00315 
00316       typename TRIE::iterator itEdge2;
00317       LEAF* toExtendAsLeaf;
00318       replace_list.clear();
00319       for( typename TRIE::iterator itEdge( trie->edgelist.begin() ); 
00320            itEdge != trie->edgelist.end(); ++itEdge )
00321       {
00322          toExtendAsLeaf = static_cast<LEAF*>((*itEdge).second);
00323          PARENT::df_decoder.pushItemWithWrite( (*itEdge).first, 
00324                                        toExtendAsLeaf->getCounter() );
00325          PARENT::df_decoder.popItem();
00326          if(all_subset_included)
00327          {
00328             itEdge2 = itEdge;
00329             ++itEdge2;
00330             ext_items.clear();
00331             for( ; itEdge2 != trie->edgelist.end(); ++itEdge2 )
00332                ext_items.push_back((*itEdge2).first);
00333             filterNonExtenders( subset_oel_tries, subset_oi_tries, 
00334                                 subset_oel_child_iters, subset_oi_child_iters,
00335                                 (*itEdge).first );
00336             extenders.clear();
00337             for(std::vector<item_t>::iterator it = ext_items.begin();
00338                 it != ext_items.end(); ++it)
00339             {
00340                extenders.push_back(Edge(*it, PARENT::s_alloc.allocate()));
00341                static_cast<LEAF*>(extenders.back().second)->setCounter(0);
00342             }
00343             if(!extenders.empty())
00344             {
00345                if( 2 * extenders.size() < extenders.back().first - extenders.front().first + 3 )
00346                {
00347                   TRIE_OEL* toExtend = new TRIE_OEL(toExtendAsLeaf->getCounter() | TWO_POW31);
00348                   replace_list.push_back(Edge((*itEdge).first, toExtend));
00349                   toExtend->edgelist.insert(extenders);
00350                }
00351                else
00352                {
00353                   TRIE_OI* toExtend = new TRIE_OI(toExtendAsLeaf->getCounter());
00354                   replace_list.push_back(Edge((*itEdge).first, toExtend));
00355                   toExtend->edgelist.insert(extenders);
00356                }
00357 #if DEBUG_LEVEL >= LEVEL_INFO
00358                nr_of_new_candidates +=extenders.size();
00359 #endif
00360             }
00361          }
00362          PARENT::s_alloc.free(toExtendAsLeaf);
00363       }
00364       trie->edgelist.clear();
00365       trie->edgelist.insert(replace_list);
00366    }
00367    template <class DF_D, class TRIE_OEL, class TRIE_OI, class LEAF, 
00368              class LEAF_ALLOCATOR, NEELevel NEE> 
00369    template <class TRIE> inline void 
00370    IntersectProPruner<DF_D, TRIE_OEL, TRIE_OI, LEAF, LEAF_ALLOCATOR, NEE>::
00371    generateCandidateAtParentNEE( TRIE* trie, 
00372                                  std::vector<item_t>& maybe_candidate )
00373    {
00374       std::vector<const TRIE_OEL*> subset_oel_tries;
00375       std::vector<const TRIE_OI*> subset_oi_tries;
00376       replace_list.clear();
00377       if( findSubsetTries( maybe_candidate, subset_oel_tries, subset_oi_tries ) )
00378       {
00379          std::vector<typename TRIE_OEL::const_iterator> subset_oel_child_iters;
00380          std::vector<typename TRIE_OI::const_iterator> subset_oi_child_iters;
00381          subset_oel_child_iters.reserve(subset_oel_tries.size());
00382          subset_oi_child_iters.reserve(subset_oi_tries.size());
00383          
00384          for(typename std::vector<const TRIE_OEL*>::const_iterator it = 
00385                 subset_oel_tries.begin(); it !=  subset_oel_tries.end(); ++it)
00386             subset_oel_child_iters.push_back((*it)->edgelist.begin());
00387          
00388          for(typename std::vector<const TRIE_OI*>::const_iterator it = 
00389                 subset_oi_tries.begin(); it !=  subset_oi_tries.end(); ++it)
00390             subset_oi_child_iters.push_back((*it)->edgelist.begin());
00391 
00392          typename TRIE::iterator itEdge2;
00393          LEAF* toExtendAsLeaf;
00394          for( typename TRIE::iterator itEdge( trie->edgelist.begin() ); 
00395               itEdge != trie->edgelist.end(); ++itEdge )
00396          {
00397             toExtendAsLeaf = static_cast<LEAF*>((*itEdge).second);
00398             itEdge2 = itEdge;
00399             ++itEdge2;
00400             ext_items.clear();
00401             leaf_neelist.clear();
00402             for( ; itEdge2 != trie->edgelist.end(); ++itEdge2 )
00403                ext_items.push_back((*itEdge2).first);
00404             filterNonExtenders( subset_oel_tries, subset_oi_tries, 
00405                                 subset_oel_child_iters, subset_oi_child_iters,
00406                                 (*itEdge).first );
00407             std::sort(leaf_neelist.begin(), leaf_neelist.end());
00408             extenders.clear();
00409             for(std::vector<item_t>::iterator it = ext_items.begin();
00410                 it != ext_items.end(); ++it)
00411             {
00412                extenders.push_back(Edge(*it, PARENT::s_alloc.allocate()));
00413                static_cast<LEAF*>(extenders.back().second)->setCounter(0);
00414             }
00415             if(extenders.empty() && leaf_neelist.empty() && 
00416                ( NEE == NEE_Full  || 
00417                  NEE == NEE_Level1 && PARENT::df_decoder.EEListEmpty() )
00418                )
00419             {
00420                PARENT::df_decoder.pushItemWithWrite( (*itEdge).first,
00421                                              toExtendAsLeaf->getCounter() );
00422                PARENT::df_decoder.popItem();
00423             }
00424             else
00425             {
00426                if( extenders.empty() || 
00427                    2 * extenders.size() < extenders.back().first - extenders.front().first + 3 )
00428                {
00429                   TRIE_OEL* toExtend = new TRIE_OEL(toExtendAsLeaf->getCounter() | TWO_POW31);
00430                   toExtend->edgelist.insert(extenders);
00431                   toExtend->neePushBackSorted( leaf_neelist );
00432                   replace_list.push_back(Edge((*itEdge).first, toExtend));
00433                }
00434                else
00435                {
00436                   TRIE_OI* toExtend = new TRIE_OI(toExtendAsLeaf->getCounter());
00437                   toExtend->edgelist.insert(extenders);
00438                   toExtend->neePushBackSorted( leaf_neelist );
00439                   replace_list.push_back(Edge((*itEdge).first, toExtend));
00440                }
00441 #if DEBUG_LEVEL >= LEVEL_INFO
00442                nr_of_new_candidates +=extenders.size();
00443                nr_of_nonprefix_equiitem += leaf_neelist.size();
00444 #endif
00445             }
00446             PARENT::s_alloc.free(toExtendAsLeaf);
00447          }
00448          trie->edgelist.clear();
00449          trie->edgelist.insert(replace_list);
00450       }
00451       else if( NEE == NEE_Full || PARENT::df_decoder.EEListEmpty())
00452       {
00453          for( typename TRIE::iterator itEdge( trie->edgelist.begin() ); 
00454               itEdge != trie->edgelist.end(); ++itEdge )
00455          {
00456             PARENT::df_decoder.pushItemWithWrite( 
00457                (*itEdge).first, 
00458                static_cast<LEAF*>((*itEdge).second)->getCounter() );
00459             PARENT::df_decoder.popItem();
00460             PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00461          }
00462          trie->edgelist.clear();
00463       }
00464       else
00465       {
00466          TRIE_OEL* toExtend;
00467          for( typename TRIE::iterator itEdge( trie->edgelist.begin() ); 
00468            itEdge != trie->edgelist.end(); ++itEdge )
00469          {
00470             toExtend = new TRIE_OEL( 
00471                static_cast<LEAF*>((*itEdge).second)->getCounter() | TWO_POW31);
00472             PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00473             replace_list.push_back(Edge((*itEdge).first, toExtend));
00474          }
00475          trie->edgelist.clear();
00476          trie->edgelist.insert(replace_list);
00477       }
00478    }
00479 }
00480 }
00481 #endif

Generated on Sun Sep 17 17:50:39 2006 for FIM environment by  doxygen 1.4.4