00001 #ifndef IntersectProPruner_HPP
00002 #define IntersectProPruner_HPP
00003
00004 #include "common.hpp"
00005 #include "common/Edge.hpp"
00006 #include "apriori/bodon/trie/trie_manipulators/ManipulatorBase.hpp"
00007 #include <vector>
00008
00009
00010
00011
00012
00013
00014 namespace Bodon
00015 {
00016 namespace sequence
00017 {
00018 template <class DF_D, class TRIE>
00019 class IntersectProPruner : public ManipulatorBase<DF_D, TRIE>
00020 {
00021 protected:
00022 std::vector<Edge> extenders;
00023 mutable std::vector<item_t> ext_items;
00024 mutable std::vector<item_t> ext_nee;
00025
00026 public:
00027 IntersectProPruner( TRIE& main_trie, DF_D& df_decoder ) :
00028 ManipulatorBase<DF_D, TRIE>(main_trie, df_decoder){}
00029
00030 protected:
00031
00032 void intersect( const TRIE* subset_trie ) const;
00033
00034 void filterNonExtenders(
00035 const std::vector<const TRIE*>& subset_tries,
00036 const item_t leaf_item ) const;
00037
00038 bool findSubsetTries(
00039 std::vector<item_t>& itemset,
00040 std::vector<const TRIE*>& subset_trie) const;
00041
00042 void generateCandidateAtParent(
00043 TRIE* trie, std::vector<item_t>& maybe_candidate);
00044
00045
00046 };
00047
00048 template <class DF_D, class TRIE> inline void
00049 IntersectProPruner<DF_D, TRIE>::
00050 intersect( const TRIE* subset_trie) const
00051 {
00052 typename TRIE::const_iterator it_e( subset_trie->edgelist.begin() );
00053 std::vector<item_t>::iterator it_i(ext_items.begin());
00054 while(it_e != subset_trie->edgelist.end() && it_i != ext_items.end())
00055 {
00056 if(*it_e < *it_i)
00057 ++it_e;
00058 else if(*it_e > *it_i)
00059 it_i = ext_items.erase(it_i);
00060 else
00061 {
00062 ++it_e;
00063 ++it_i;
00064 }
00065 }
00066 ext_items.erase(it_i, ext_items.end());
00067 }
00068
00069
00070 template <class DF_D, class TRIE> inline void
00071 IntersectProPruner<DF_D, TRIE>::
00072 filterNonExtenders( const std::vector<const TRIE*>& subset_tries,
00073 const item_t leaf_item ) const
00074 {
00075 std::vector<typename TRIE::const_iterator> subset_child_iters;
00076 subset_child_iters.reserve(subset_tries.size());
00077 for(typename std::vector<const TRIE*>::const_iterator it = subset_tries.begin();
00078 it != subset_tries.end(); ++it)
00079 subset_child_iters.push_back((*it)->edgelist.begin());
00080
00081 for(std::vector<item_t>::size_type index = 0;
00082 index < subset_tries.size(); ++index)
00083 {
00084 while( subset_child_iters[index] != subset_tries[index]->edgelist.end() &&
00085 (*(subset_child_iters[index])).first < leaf_item)
00086 ++subset_child_iters[index];
00087 if( subset_child_iters[index] != subset_tries[index]->edgelist.end() &&
00088 (*(subset_child_iters[index])).first == leaf_item)
00089 intersect(static_cast<const TRIE*>((*(subset_child_iters[index])).second));
00090 else
00091 {
00092 ext_items.clear();
00093 break;
00094 }
00095 }
00096 }
00097
00098
00099 template <class DF_D, class TRIE> inline bool
00100 IntersectProPruner<DF_D, TRIE>::
00101 findSubsetTries( std::vector<item_t>& itemset,
00102 std::vector<const TRIE*>& subset_tries ) const
00103 {
00104 assert(itemset.size() > 0);
00105 std::vector<item_t>::iterator item_it = itemset.end()-1;
00106 register item_t deleted_item = *item_it, temp_item;
00107 const TRIE* a_subset_trie;
00108
00109 itemset.pop_back();
00110 a_subset_trie = main_trie.isIncluded( itemset, itemset.begin() );
00111 if(a_subset_trie)
00112 subset_tries.push_back(a_subset_trie);
00113 else
00114 {
00115 itemset.push_back(deleted_item);
00116 return false;
00117 }
00118 while(item_it != itemset.begin() )
00119 {
00120 --item_it;
00121 temp_item = *item_it;
00122 *item_it = deleted_item;
00123 deleted_item = temp_item;
00124 a_subset_trie = main_trie.isIncluded( itemset, itemset.begin() );
00125 if(a_subset_trie)
00126 subset_tries.push_back(a_subset_trie);
00127 else
00128 {
00129 itemset.insert( item_it, deleted_item );
00130 return false;
00131 }
00132 }
00133 itemset.insert( item_it, deleted_item );
00134 return true;
00135 }
00136
00137 template <class DF_D, class TRIE> inline void
00138 IntersectProPruner<DF_D, TRIE>::
00139 generateCandidateAtParent(TRIE* trie, std::vector<item_t>& maybe_candidate)
00140 {
00141 std::vector<const TRIE*> subset_tries;
00142 const bool all_subset_included = findSubsetTries(
00143 maybe_candidate, subset_tries);
00144 typename TRIE::iterator itEdge( trie->edgelist.begin() ), itEdge2;
00145 TRIE* toExtend;
00146 while( itEdge != trie->edgelist.end() )
00147 {
00148 toExtend = static_cast<TRIE*>((*itEdge).second);
00149 df_decoder.pushItemWithWrite( (*itEdge).first, toExtend->getCounter() );
00150 df_decoder.popItem();
00151 if(all_subset_included)
00152 {
00153 ext_items.clear();
00154 for( itEdge2 = trie->edgelist.begin(); itEdge2 != trie->edgelist.end(); ++itEdge2 )
00155 ext_items.push_back((*itEdge2).first);
00156 filterNonExtenders( subset_tries, (*itEdge).first );
00157 extenders.clear();
00158 for(std::vector<item_t>::iterator it = ext_items.begin();
00159 it != ext_items.end(); ++it)
00160 extenders.push_back(Edge(*it, new TRIE(0)));
00161 toExtend->edgelist.insert(extenders);
00162 }
00163 ++itEdge;
00164 }
00165 }
00166 }
00167 }
00168 #endif