00001 #ifndef IntersectProPruner_HPP
00002 #define IntersectProPruner_HPP
00003
00004 #include "common.hpp"
00005 #include "common/Edge.hpp"
00006 #include "apriori/bodon/trie/trie_manipulators/ManipulatorBase.hpp"
00007 #include <vector>
00008
00009
00010
00011
00012
00013
00014 namespace Bodon
00015 {
00016 template <class DF_D, class TRIE, bool NEE>
00017 class IntersectProPruner : public ManipulatorBase<DF_D, TRIE>
00018 {
00019 protected:
00020 std::vector<Edge> extenders;
00021 mutable std::vector<item_t> ext_items;
00022 mutable std::vector<item_t> ext_nee;
00023 typedef ManipulatorBase<DF_D, TRIE> PARENT;
00024
00025 public:
00026 IntersectProPruner( TRIE& main_trie, DF_D& df_decoder ) :
00027 ManipulatorBase<DF_D, TRIE>(main_trie, df_decoder){}
00028
00029 protected:
00030
00031 void intersect( const TRIE* subset_trie ) const;
00032
00033 void intersectNEE( const TRIE* subset_trie ) const;
00034
00035 void filterNonExtenders(
00036 const std::vector<const TRIE*>& subset_tries,
00037 const item_t leaf_item ) const;
00038
00039 void filterNonExtendersNEE(
00040 const std::vector<const TRIE*>& subset_tries,
00041 const item_t leaf_item ) const;
00042
00043
00044 bool findSubsetTries(
00045 std::vector<item_t>& itemset,
00046 std::vector<const TRIE*>& subset_trie) const;
00047
00048 void generateCandidateAtParent(
00049 TRIE* trie, std::vector<item_t>& maybe_candidate);
00050 void generateCandidateAtParentNEE(
00051 TRIE* trie, std::vector<item_t>& maybe_candidate,
00052 std::vector<item_t>& NEEsum);
00053
00054
00055 };
00056
00057 template <class DF_D, class TRIE, bool NEE> inline void
00058 IntersectProPruner<DF_D, TRIE, NEE>::
00059 intersect( const TRIE* subset_trie) const
00060 {
00061 typename TRIE::const_iterator it_e( subset_trie->edgelist.begin() );
00062 std::vector<item_t>::iterator it_i(ext_items.begin());
00063 while(it_e != subset_trie->edgelist.end() && it_i != ext_items.end())
00064 {
00065 if(*it_e < *it_i)
00066 ++it_e;
00067 else if(*it_e > *it_i)
00068 it_i = ext_items.erase(it_i);
00069 else
00070 {
00071 ++it_e;
00072 ++it_i;
00073 }
00074 }
00075 ext_items.erase(it_i, ext_items.end());
00076 }
00077
00078 template <class DF_D, class TRIE, bool NEE> inline void
00079 IntersectProPruner<DF_D, TRIE, NEE>::
00080 intersectNEE( const TRIE* subset_trie ) const
00081 {
00082 typename TRIE::const_iterator it_e( subset_trie->edgelist.begin() );
00083 std::vector<item_t>::const_iterator it_nee( subset_trie->neelist.begin() );
00084 std::vector<item_t>::iterator it_i(ext_items.begin());
00085 while(it_e != subset_trie->edgelist.end() && it_i != ext_items.end())
00086 {
00087 if(*it_e < *it_i)
00088 ++it_e;
00089 else if(*it_e > *it_i)
00090 {
00091 while(it_nee != subset_trie->neelist.end() && *it_nee < *it_i)
00092 ++it_nee;
00093 if(it_nee != subset_trie->neelist.end() && *it_nee == *it_i)
00094 ext_nee.push_back(*it_i);
00095 it_i = ext_items.erase(it_i);
00096 }
00097 else
00098 {
00099 ++it_e;
00100 ++it_i;
00101 }
00102 }
00103 if(it_i != ext_items.end())
00104 {
00105 std::vector<item_t>::iterator it_2(it_i);
00106 do
00107 {
00108 while(it_nee != subset_trie->neelist.end() && *it_nee < *it_i)
00109 ++it_nee;
00110 if(it_nee != subset_trie->neelist.end() && *it_nee == *it_i)
00111 ext_nee.push_back(*it_i);
00112 ++it_i;
00113 }
00114 while(it_i != ext_items.end());
00115 ext_items.erase(it_2, ext_items.end());
00116 }
00117 }
00118
00119 template <class DF_D, class TRIE, bool NEE> inline void
00120 IntersectProPruner<DF_D, TRIE, NEE>::
00121 filterNonExtenders( const std::vector<const TRIE*>& subset_tries,
00122 const item_t leaf_item ) const
00123 {
00124 std::vector<typename TRIE::const_iterator> subset_child_iters;
00125 subset_child_iters.reserve(subset_tries.size());
00126 for(typename std::vector<const TRIE*>::const_iterator it = subset_tries.begin();
00127 it != subset_tries.end(); ++it)
00128 subset_child_iters.push_back((*it)->edgelist.begin());
00129
00130 for(std::vector<item_t>::size_type index = 0;
00131 index < subset_tries.size(); ++index)
00132 {
00133 while( subset_child_iters[index] != subset_tries[index]->edgelist.end() &&
00134 (*(subset_child_iters[index])).first < leaf_item)
00135 ++subset_child_iters[index];
00136 if( subset_child_iters[index] != subset_tries[index]->edgelist.end() &&
00137 (*(subset_child_iters[index])).first == leaf_item)
00138 intersect(static_cast<const TRIE*>((*(subset_child_iters[index])).second));
00139 else
00140 {
00141 ext_items.clear();
00142 break;
00143 }
00144 }
00145 }
00146
00147
00148 template <class DF_D, class TRIE, bool NEE> inline void
00149 IntersectProPruner<DF_D, TRIE, NEE>::
00150 filterNonExtendersNEE( const std::vector<const TRIE*>& subset_tries,
00151 const item_t leaf_item ) const
00152 {
00153 std::vector<typename TRIE::const_iterator> subset_child_iters;
00154 std::vector<vector<item_t>::const_iterator> subset_nee_iters;
00155 subset_child_iters.reserve(subset_tries.size());
00156 subset_nee_iters.reserve(subset_tries.size());
00157 for(typename std::vector<const TRIE*>::const_iterator it = subset_tries.begin();
00158 it != subset_tries.end(); ++it)
00159 {
00160 subset_child_iters.push_back((*it)->edgelist.begin());
00161 subset_nee_iters.push_back((*it)->neelist.begin());
00162 }
00163
00164 for(std::vector<item_t>::size_type index = 0;
00165 index < subset_tries.size(); ++index)
00166 {
00167 while( subset_child_iters[index] != subset_tries[index]->edgelist.end() &&
00168 (*(subset_child_iters[index])).first < leaf_item)
00169 ++subset_child_iters[index];
00170 if( subset_child_iters[index] != subset_tries[index]->edgelist.end() &&
00171 (*(subset_child_iters[index])).first == leaf_item)
00172 intersectNEE(static_cast<const TRIE*>((*(subset_child_iters[index])).second));
00173 else
00174 {
00175 while( subset_nee_iters[index] != subset_tries[index]->neelist.end() &&
00176 *subset_nee_iters[index] < leaf_item)
00177 ++subset_nee_iters[index];
00178 if( subset_nee_iters[index] != subset_tries[index]->neelist.end() &&
00179 *subset_nee_iters[index] == leaf_item)
00180 intersectNEE( subset_tries[index] );
00181 else
00182 {
00183 ext_items.clear();
00184 break;
00185 }
00186 }
00187 }
00188 }
00189 template <class DF_D, class TRIE, bool NEE> inline bool
00190 IntersectProPruner<DF_D, TRIE, NEE>::
00191 findSubsetTries( std::vector<item_t>& itemset,
00192 std::vector<const TRIE*>& subset_tries ) const
00193 {
00194 assert(itemset.size() > 0);
00195 std::vector<item_t>::iterator item_it = itemset.end()-1;
00196 register item_t deleted_item = *item_it, temp_item;
00197 const TRIE* a_subset_trie;
00198
00199 itemset.pop_back();
00200 a_subset_trie = PARENT::main_trie.isIncluded( itemset, itemset.begin() );
00201 if(a_subset_trie)
00202 subset_tries.push_back(a_subset_trie);
00203 else
00204 {
00205 itemset.push_back(deleted_item);
00206 return false;
00207 }
00208 while(item_it != itemset.begin() )
00209 {
00210 --item_it;
00211 temp_item = *item_it;
00212 *item_it = deleted_item;
00213 deleted_item = temp_item;
00214 a_subset_trie = PARENT::main_trie.isIncluded( itemset, itemset.begin() );
00215 if(a_subset_trie)
00216 subset_tries.push_back(a_subset_trie);
00217 else
00218 {
00219 itemset.insert( item_it, deleted_item );
00220 return false;
00221 }
00222 }
00223 itemset.insert( item_it, deleted_item );
00224 return true;
00225 }
00226
00227 template <class DF_D, class TRIE, bool NEE> inline void
00228 IntersectProPruner<DF_D, TRIE, NEE>::
00229 generateCandidateAtParent(TRIE* trie, std::vector<item_t>& maybe_candidate)
00230 {
00231 std::vector<const TRIE*> subset_tries;
00232 const bool all_subset_included = findSubsetTries(
00233 maybe_candidate, subset_tries);
00234 typename TRIE::iterator itEdge( trie->edgelist.begin() ), itEdge2;
00235 TRIE* toExtend;
00236 while( itEdge != trie->edgelist.end() )
00237 {
00238 toExtend = static_cast<TRIE*>((*itEdge).second);
00239 PARENT::df_decoder.pushItemWithWrite( (*itEdge).first, toExtend->getCounter() );
00240 PARENT::df_decoder.popItem();
00241 if(all_subset_included)
00242 {
00243 itEdge2 = itEdge;
00244 ++itEdge2;
00245 ext_items.clear();
00246 for( ; itEdge2 != trie->edgelist.end(); ++itEdge2 )
00247 ext_items.push_back((*itEdge2).first);
00248 filterNonExtenders( subset_tries, (*itEdge).first );
00249 extenders.clear();
00250 for(std::vector<item_t>::iterator it = ext_items.begin();
00251 it != ext_items.end(); ++it)
00252 extenders.push_back(Edge(*it, new TRIE(0)));
00253 if(extenders.empty())
00254 {
00255 delete toExtend;
00256 itEdge = trie->edgelist.erase(itEdge);
00257 }
00258 else
00259 {
00260 toExtend->edgelist.insert(extenders);
00261 ++itEdge;
00262 }
00263 }
00264 else
00265 {
00266 delete toExtend;
00267 itEdge = trie->edgelist.erase(itEdge);
00268 }
00269 }
00270 }
00271 template <class DF_D, class TRIE, bool NEE> inline void
00272 IntersectProPruner<DF_D, TRIE, NEE>::
00273 generateCandidateAtParentNEE(TRIE* trie, std::vector<item_t>& maybe_candidate,
00274 std::vector<item_t>& NEEsum)
00275 {
00276 std::vector<const TRIE*> subset_tries;
00277 NEEsum.insert( NEEsum.end(),
00278 trie->neelist.begin(), trie->neelist.end() );
00279
00280 if( findSubsetTries( maybe_candidate, subset_tries ) )
00281 {
00282 typename TRIE::iterator itEdge( trie->edgelist.begin() ), itEdge2;
00283 TRIE* toExtend;
00284 while( itEdge != trie->edgelist.end() )
00285 {
00286 toExtend = static_cast<TRIE*>((*itEdge).second);
00287 itEdge2 = itEdge;
00288 ++itEdge2;
00289 ext_items.clear();
00290 ext_nee.clear();
00291 for( ; itEdge2 != trie->edgelist.end(); ++itEdge2 )
00292 ext_items.push_back((*itEdge2).first);
00293 filterNonExtendersNEE( subset_tries, (*itEdge).first );
00294 extenders.clear();
00295 for(std::vector<item_t>::iterator it = ext_items.begin();
00296 it != ext_items.end(); ++it)
00297 extenders.push_back(Edge(*it, new TRIE(0)));
00298 if(extenders.empty() && ext_nee.empty() && NEEsum.size() < 1)
00299 {
00300 PARENT::df_decoder.pushItemWithWrite(
00301 (*itEdge).first,
00302 static_cast<TRIE*>(toExtend)->getCounter() );
00303 PARENT::df_decoder.popItem();
00304 delete toExtend;
00305 itEdge = trie->edgelist.erase(itEdge);
00306 }
00307 else
00308 {
00309 toExtend->edgelist.insert(extenders);
00310 toExtend->neelist.reserve(ext_nee.size());
00311 toExtend->neelist.insert( toExtend->neelist.end(),
00312 ext_nee.begin(), ext_nee.end() );
00313 ++itEdge;
00314 }
00315 }
00316 }
00317 else if(NEEsum.size() < 1)
00318 {
00319 typename TRIE::iterator itEdge( trie->edgelist.begin() );
00320 while( itEdge != trie->edgelist.end() )
00321 {
00322 PARENT::df_decoder.pushItemWithWrite(
00323 (*itEdge).first,
00324 static_cast<TRIE*>((*itEdge).second)->getCounter() );
00325 PARENT::df_decoder.popItem();
00326 delete static_cast<TRIE*>((*itEdge).second);
00327 itEdge = trie->edgelist.erase(itEdge);
00328 }
00329 }
00330 NEEsum.resize( NEEsum.size() - trie->neelist.size() );
00331 }
00332 }
00333 #endif