00001
00006 #ifndef __ECLAT_HPP_
00007 #define __ECLAT_HPP_ t
00008
00009
00010
00011
00012 #ifdef ECLAT_DEFECT
00013 static const bool EXTENSION_DESCRIPTION_DEFECTS = true;
00014 static const bool OMIT_FINAL_INCIDENCE_MATRIX = true;
00015 static const bool PRUNE_EQUISUPPORT_EXTENSIONS = true;
00016 static const bool EARLY_FILTERING = true;
00017
00018 static const bool INTERSECTION_ROWSWAPPING = false;
00019 #endif
00020
00021 #ifdef ECLAT_COVER
00022 static const bool EXTENSION_DESCRIPTION_DEFECTS = false;
00023 static const bool OMIT_FINAL_INCIDENCE_MATRIX = true;
00024 static const bool PRUNE_EQUISUPPORT_EXTENSIONS = true;
00025 static const bool EARLY_FILTERING = true;
00026
00027 static const bool INTERSECTION_ROWSWAPPING = false;
00028 #endif
00029
00030
00031
00032 #include "SparseBitmatrix_create.hpp"
00033 #include "SparseBitmatrix_setdifference.hpp"
00034 #include "SparseBitmatrix_intersection.hpp"
00035
00036
00037 #include "common/debug.hpp"
00038 #include "io/input/transaction_reader/TransactionReader.hpp"
00039 #include "common.hpp"
00040
00041 #include <vector>
00042
00043
00044 static const bool OUTPUT_HINTS = false;
00045
00046
00047
00048 template <class TRANSACTION_READER, class PATTERN_WRITER>
00049 class Eclat
00050 {
00051 public:
00052 Eclat(TRANSACTION_READER& tdb, CounterItemPairs& freqItems, int minSup, int numTransactions, PATTERN_WRITER& out) :
00053 tdb(tdb), freqItems(freqItems), minSup(minSup), numTransactions(numTransactions), out(out)
00054 {
00055 MAX_PATTERN_LENGTH = 1000;
00056 ptis = new (SparseBitmatrix*)[MAX_PATTERN_LENGTH];
00057 eeIterator = new typename PATTERN_WRITER::EquisupportItemOutputIterator(out);
00058 }
00059 ~Eclat() {};
00060
00061 void findFrequentPatterns();
00062
00063 private:
00064 TRANSACTION_READER& tdb;
00065 CounterItemPairs& freqItems;
00066 int minSup;
00067 int numTransactions;
00068 PATTERN_WRITER& out;
00069
00073 typename PATTERN_WRITER::EquisupportItemOutputIterator* eeIterator;
00074
00075
00076 int MAX_PATTERN_LENGTH;
00077
00081 int depth;
00082
00086 int maxdefect;
00087
00091 SparseBitmatrix** ptis;
00092
00096 void checkUpToTwoRows(SparseBitmatrix* iti, counter_t minSup, counter_t maxdefect);
00097
00101 void depthFirstWalk();
00102
00103
00104 };
00105
00106
00107 template <class TRANSACTION_READER, class PATTERN_WRITER>
00108 void Eclat<TRANSACTION_READER, PATTERN_WRITER>::findFrequentPatterns() {
00109
00110 if (numTransactions >= minSup)
00111 out.write(numTransactions);
00112
00113
00114 std::cout << "build item/transaction incidence matrix..." << std::flush;
00115 SparseBitmatrix* iti = createSparseItemTransactionIncidenceMatrix(tdb, freqItems);
00116 int numItems = freqItems.size();
00117 std::cout << " - done" << std::endl;
00118
00119 #if DEBUG_LEVEL >= 20
00120 std::cout << "iti:" << std::endl;
00121 iti->dump();
00122 #endif
00123
00124 #if DEBUG_LEVEL >= 20
00125 std::cout << "item freqs: ";
00126 for(CounterItemPairs::iterator it_freqItems = freqItems.begin(); it_freqItems != freqItems.end(); ++it_freqItems )
00127 std::cout << it_freqItems->first << " ";
00128 std::cout << endl;
00129 #endif
00130
00131
00132 if (! EXTENSION_DESCRIPTION_DEFECTS) {
00133 std::cout << "compute covers..." << std::flush;
00134 for (int i = 0; i < MAX_PATTERN_LENGTH; i++)
00135 ptis[i] = NULL;
00136 ptis[0] = iti;
00137 depth = 0;
00138 depthFirstWalk();
00139 std::cout << " - done" << std::endl;
00140 return;
00141 }
00142
00143
00144 int rowCapacityIti2 = iti->computeMaxRowLength();
00145 if (EARLY_FILTERING)
00146 rowCapacityIti2 -= minSup;
00147 SparseBitmatrix* iti2 = new SparseBitmatrix(numItems-1, rowCapacityIti2);
00148 depth = 1;
00149
00150
00151 ptis[0] = iti;
00152 ptis[1] = iti2;
00153
00154
00155 std::cout << "compute defects..." << std::flush;
00156
00157
00158 for (int i = 0; i < numItems-1; i++) {
00159
00160 assert(i >= 0 && i < iti->nRows());
00161 out.pushItem(iti->rowLabel(i));
00162 int itiRowLengthi = iti->rowLength(i);
00163
00164 maxdefect = itiRowLengthi - minSup;
00165 int numPairs = setdifferenceOfSparseBitmatrix<EARLY_FILTERING,PRUNE_EQUISUPPORT_EXTENSIONS>(*iti, i, *iti2, maxdefect, *eeIterator);
00166
00167 if (numPairs > 0)
00168 depthFirstWalk();
00169
00170 out.write(itiRowLengthi);
00171 out.popItem();
00172 }
00173
00174 out.pushItem(iti->rowLabel(numItems-1));
00175 out.write(iti->rowLength(numItems-1));
00176 std::cout << " - done" << std::endl;
00177 }
00178
00179
00180
00181 template <class TRANSACTION_READER, class PATTERN_WRITER>
00182 void Eclat<TRANSACTION_READER, PATTERN_WRITER>::checkUpToTwoRows(SparseBitmatrix* iti,
00183 counter_t minSup, counter_t maxdefect) {
00184 assert(iti->nRows() <= 2);
00185
00186
00187 out.pushItem(iti->rowLabel(0));
00188 if (OUTPUT_HINTS) out.hint(string("a"));
00189 out.write(EXTENSION_DESCRIPTION_DEFECTS
00190 ? minSup+maxdefect-iti->rowLength(0)
00191 : iti->rowLength(0));
00192 out.popItem();
00193
00194 if (iti->nRows() == 2) {
00195
00196 out.pushItem(iti->rowLabel(1));
00197 if (OUTPUT_HINTS) out.hint(string("b"));
00198 out.write(EXTENSION_DESCRIPTION_DEFECTS
00199 ? minSup+maxdefect-iti->rowLength(1)
00200 : iti->rowLength(1));
00201 out.popItem();
00202
00203
00204 int maxdef0 = maxdefect - iti->rowLength(0);
00205 int rowlen01 = EXTENSION_DESCRIPTION_DEFECTS
00206 ? cardinalitySetdifferenceBySparseBitmatrix_TwoRows<EARLY_FILTERING>(*iti, maxdef0)
00207 : cardinalityIntersectionSparseBitmatrix_TwoRows<EARLY_FILTERING>(*iti, minSup);
00208 if ((EXTENSION_DESCRIPTION_DEFECTS
00209 ? rowlen01 <= maxdef0
00210 : rowlen01 >= minSup)) {
00211
00212 out.pushItem(iti->rowLabel(0));
00213 out.pushItem(iti->rowLabel(1));
00214 if (OUTPUT_HINTS) out.hint(string("c"));
00215 out.write(EXTENSION_DESCRIPTION_DEFECTS
00216 ? minSup+maxdef0-rowlen01
00217 : rowlen01);
00218 out.popItem();
00219 out.popItem();
00220 }
00221 }
00222 }
00223
00224 template <class TRANSACTION_READER, class PATTERN_WRITER>
00225 void Eclat<TRANSACTION_READER, PATTERN_WRITER>::depthFirstWalk() {
00226 #if DEBUG_LEVEL >= 10
00227 cout << endl
00228 << out.getCurrentPatternAsString() << " "
00229 << " (maxdefect=" << maxdefect << ", depth = " << depth <<", nrows = " << ptis[depth]->nRows() << "):" << endl;
00230 #endif
00231 #if DEBUG_LEVEL >= 30
00232 ptis[depth]->dump();
00233 #endif
00234
00235
00236 assert(ptis[depth] != NULL);
00237 SparseBitmatrix* pti = ptis[depth];
00238
00239 SparseBitmatrix* bmOut;
00240 if (OMIT_FINAL_INCIDENCE_MATRIX || pti->nRows() >= 2) {
00241 if (EXTENSION_DESCRIPTION_DEFECTS) {
00242 int rowCapacity = pti->maxRowLength();
00243 if (EARLY_FILTERING && maxdefect < rowCapacity)
00244 rowCapacity = maxdefect;
00245 bmOut = new SparseBitmatrix(pti->nRows()-1, rowCapacity);
00246 } else {
00247 bmOut = ptis[depth+1];
00248 if (bmOut == NULL)
00249 bmOut = new SparseBitmatrix(ptis[0]->nRows()-depth-1, &(ptis[0]->capacities()[depth+1]));
00250
00251
00252 }
00253 ptis[depth+1] = bmOut;
00254 } else
00255 bmOut = NULL;
00256
00257 int numSiblings = pti->nRows();
00258
00259
00260 depth++;
00261 for (int i = 0; i < numSiblings-1; i++) {
00262 assert(i < pti->capacity());
00263
00264 out.pushItem(pti->rowLabel(i));
00265
00266
00267 int numSiblingsNext;
00268 if (EXTENSION_DESCRIPTION_DEFECTS) {
00269 numSiblingsNext = setdifferenceBySparseBitmatrix<EARLY_FILTERING,PRUNE_EQUISUPPORT_EXTENSIONS>(*pti, i, *bmOut, maxdefect-pti->rowLength(i), *eeIterator);
00270 if (numSiblingsNext > 0) {
00271 maxdefect -= pti->rowLength(i);
00272 if (OMIT_FINAL_INCIDENCE_MATRIX && numSiblingsNext <= 2)
00273 checkUpToTwoRows(bmOut, minSup, maxdefect);
00274 else
00275 depthFirstWalk();
00276 maxdefect += pti->rowLength(i);
00277 }
00278 } else {
00279 numSiblingsNext = intersectSparseBitmatrix<EARLY_FILTERING, INTERSECTION_ROWSWAPPING, PRUNE_EQUISUPPORT_EXTENSIONS>(*pti, i, *bmOut, minSup, *eeIterator);
00280 if (numSiblingsNext > 0)
00281 depthFirstWalk();
00282 }
00283
00284
00285 assert(i >= 0 && i < pti->numRows);
00286 if (OUTPUT_HINTS) out.hint(string("d"));
00287 out.write(EXTENSION_DESCRIPTION_DEFECTS
00288 ? minSup+maxdefect-pti->rowLength(i)
00289 : pti->rowLength(i));
00290 out.popItem();
00291 }
00292
00293
00294 int i = numSiblings-1;
00295 out.pushItem(pti->rowLabel(i));
00296 if (OUTPUT_HINTS) out.hint(string("e"));
00297 out.write(EXTENSION_DESCRIPTION_DEFECTS
00298 ? minSup+maxdefect-pti->rowLength(i)
00299 : pti->rowLength(i));
00300 out.popItem();
00301
00302
00303 if (EXTENSION_DESCRIPTION_DEFECTS && bmOut != NULL) {
00304 delete bmOut;
00305 ptis[depth] = NULL;
00306 }
00307
00308 depth--;
00309 }
00310
00311
00312 #endif