00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include <stdio.h>
00027 #include <getopt.h>
00028
00029 #include <stdlib.h>
00030 #include <string.h>
00031 #include <unistd.h>
00032 #include <inttypes.h>
00033
00034 typedef uint8_t u8;
00035 typedef uint32_t u32;
00036 typedef uint64_t u64;
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048 #define __le32_to_cpus(buf)
00049 #define __cpu_to_le32s(buf)
00050
00051
00052
00053 #define MD5_DIGEST_SIZE 16
00054 #define MD5_HMAC_BLOCK_SIZE 64
00055 #define MD5_BLOCK_WORDS 16
00056 #define MD5_HASH_WORDS 4
00057
00058 #define F1(x, y, z) (z ^ (x & (y ^ z)))
00059 #define F2(x, y, z) F1(z, x, y)
00060 #define F3(x, y, z) (x ^ y ^ z)
00061 #define F4(x, y, z) (y ^ (x | ~z))
00062
00063 #define MD5STEP(f, w, x, y, z, in, s) \
00064 (w += f(x, y, z) + in, w = (w<<s | w>>(32-s)) + x)
00065
00066 struct md5_ctx {
00067 u32 hash[MD5_HASH_WORDS];
00068 u32 block[MD5_BLOCK_WORDS];
00069 u64 byte_count;
00070 };
00071
00072 static void md5_transform(u32 *hash, u32 const *in)
00073 {
00074 u32 a, b, c, d;
00075
00076 a = hash[0];
00077 b = hash[1];
00078 c = hash[2];
00079 d = hash[3];
00080
00081 MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
00082 MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
00083 MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
00084 MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
00085 MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
00086 MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
00087 MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
00088 MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
00089 MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
00090 MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
00091 MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
00092 MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
00093 MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
00094 MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
00095 MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
00096 MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
00097
00098 MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
00099 MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
00100 MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
00101 MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
00102 MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
00103 MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
00104 MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
00105 MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
00106 MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
00107 MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
00108 MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
00109 MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
00110 MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
00111 MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
00112 MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
00113 MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
00114
00115 MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
00116 MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
00117 MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
00118 MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
00119 MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
00120 MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
00121 MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
00122 MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
00123 MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
00124 MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
00125 MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
00126 MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
00127 MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
00128 MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
00129 MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
00130 MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
00131
00132 MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
00133 MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
00134 MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
00135 MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
00136 MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
00137 MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
00138 MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
00139 MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
00140 MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
00141 MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
00142 MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
00143 MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
00144 MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
00145 MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
00146 MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
00147 MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
00148
00149 hash[0] += a;
00150 hash[1] += b;
00151 hash[2] += c;
00152 hash[3] += d;
00153 }
00154
00155
00156 static inline void le32_to_cpu_array(u32 *buf, unsigned int words)
00157 {
00158 while (words--) {
00159 __le32_to_cpus(buf);
00160 buf++;
00161 }
00162 }
00163
00164 static inline void cpu_to_le32_array(u32 *buf, unsigned int words)
00165 {
00166 while (words--) {
00167 __cpu_to_le32s(buf);
00168 buf++;
00169 }
00170 }
00171
00172 static inline void md5_transform_helper(struct md5_ctx *ctx)
00173 {
00174 le32_to_cpu_array(ctx->block, sizeof(ctx->block) / sizeof(u32));
00175 md5_transform(ctx->hash, ctx->block);
00176 }
00177
00178 static void md5_init(void *ctx)
00179 {
00180 struct md5_ctx *mctx = (struct md5_ctx *)ctx;
00181
00182 mctx->hash[0] = 0x67452301;
00183 mctx->hash[1] = 0xefcdab89;
00184 mctx->hash[2] = 0x98badcfe;
00185 mctx->hash[3] = 0x10325476;
00186 mctx->byte_count = 0;
00187 }
00188
00189 static void md5_update(void *ctx, const u8 *data, unsigned int len)
00190 {
00191 struct md5_ctx *mctx = (struct md5_ctx *)ctx;
00192 const u32 avail = sizeof(mctx->block) - (mctx->byte_count & 0x3f);
00193
00194 mctx->byte_count += len;
00195
00196 if (avail > len) {
00197 memcpy((char *)mctx->block + (sizeof(mctx->block) - avail),
00198 data, len);
00199 return;
00200 }
00201
00202 memcpy((char *)mctx->block + (sizeof(mctx->block) - avail),
00203 data, avail);
00204
00205 md5_transform_helper(mctx);
00206 data += avail;
00207 len -= avail;
00208
00209 while (len >= sizeof(mctx->block)) {
00210 memcpy(mctx->block, data, sizeof(mctx->block));
00211 md5_transform_helper(mctx);
00212 data += sizeof(mctx->block);
00213 len -= sizeof(mctx->block);
00214 }
00215
00216 memcpy(mctx->block, data, len);
00217 }
00218
00219 static void md5_final(void *ctx, u8 *out)
00220 {
00221 struct md5_ctx *mctx = (struct md5_ctx *)ctx;
00222 const unsigned int offset = mctx->byte_count & 0x3f;
00223 char *p = (char *)mctx->block + offset;
00224 int padding = 56 - (offset + 1);
00225
00226 *p++ = 0x80;
00227 if (padding < 0) {
00228 memset(p, 0x00, padding + sizeof (u64));
00229 md5_transform_helper(mctx);
00230 p = (char *)mctx->block;
00231 padding = 56;
00232 }
00233
00234 memset(p, 0, padding);
00235 mctx->block[14] = mctx->byte_count << 3;
00236 mctx->block[15] = mctx->byte_count >> 29;
00237 le32_to_cpu_array(mctx->block, (sizeof(mctx->block) -
00238 sizeof(u64)) / sizeof(u32));
00239 md5_transform(mctx->hash, mctx->block);
00240 cpu_to_le32_array(mctx->hash, sizeof(mctx->hash) / sizeof(u32));
00241 memcpy(out, mctx->hash, sizeof(mctx->hash));
00242 memset(mctx, 0, sizeof(*mctx));
00243 }
00244
00245
00246
00247
00248
00249 #include <vector>
00250 using namespace std;
00251 #include <algorithm>
00252
00253 void usage(char *s) {
00254 fprintf(stderr,"Usage: %s [-h] infilename\n",s);
00255 fprintf(stderr,"Computes a checksum from the FIMI output file infilename\n");
00256 fprintf(stderr,"-h prints this help text\n");
00257 exit(1);
00258 }
00259
00260 char *infilename;
00261
00262 void parseparams(int argc, char *argv[]) {
00263 char ch;
00264 while ((ch=getopt(argc, argv, "h"))>=0) {
00265 switch(ch) {
00266 case 'h':
00267 usage(argv[0]);
00268 break;
00269 case '?':
00270 fprintf(stderr,"unknown option %c\n",optopt);
00271 usage(argv[0]);
00272 break;
00273 default:
00274 fprintf(stderr,"unknown option %c\n",ch);
00275 usage(argv[0]);
00276 break;
00277 }
00278 }
00279 if (optind>=argc) {
00280 usage(argv[0]);
00281 }
00282 }
00283
00284
00285 bool parse_fimi_line(int & supp, vector<int> &itemset, char *linebuf) {
00286 char *bptr;
00287 itemset.clear();
00288 bptr=linebuf;
00289 while ((*bptr) && (*bptr!='(') && (*bptr != '\n')) {
00290 int lastnum=0;
00291 if (!((*bptr >='0')&&(*bptr<='9'))) {
00292 fprintf(stderr,"Input file format error: Expecting number, got %c Erroneous line:\n%s",*bptr,linebuf);
00293 return false;
00294 }
00295 while ((*bptr >='0')&&(*bptr<='9')) {
00296 lastnum*=10;
00297 lastnum+=*bptr-'0';
00298 bptr++;
00299 }
00300 itemset.push_back(lastnum);
00301 while (*bptr==' ') {
00302 bptr++;
00303 }
00304 }
00305 if (*bptr!='(') {
00306 fprintf(stderr,"Input file format error. Expecting (, got %c. Erroneous line:\n%s",*bptr,linebuf);
00307 return false;
00308 }
00309 bptr++;
00310 int lastnum=0;
00311 while ((*bptr >='0')&&(*bptr<='9')) {
00312 lastnum*=10;
00313 lastnum+=*bptr-'0';
00314 bptr++;
00315 }
00316 if (*bptr!=')') {
00317 fprintf(stderr,"Input file format error. Expecting ), got %c. Erroneous line:\n%s",*bptr,linebuf);
00318 return false;
00319 }
00320 bptr++;
00321 if (*bptr!='\n') {
00322 fprintf(stderr,"Input file format error. Expecting newline, got %c. Erroneous line:\n%s",*bptr,linebuf);
00323 return false;
00324 }
00325 supp=lastnum;
00326 return true;
00327 }
00328
00329 void printsum(uint32_t* sum) {
00330 static char* hexdigits="0123456789abcdef";
00331 for(size_t i=0;i<MD5_HASH_WORDS;++i) {
00332 for (int q=7;q>=0;--q) {
00333 int c=sum[i];
00334 c>>=(q*4);
00335 c&=15;
00336 putchar(hexdigits[c]);
00337 }
00338 }
00339 }
00340
00341 bool checksumfile(char *name) {
00342 char buffer[128*1024];
00343 FILE *fin = fopen(name,"r");
00344 if (!fin) {
00345 perror("open");
00346 return false;
00347 }
00348 setbuffer(fin,buffer,sizeof(buffer));
00349 struct md5_ctx ctx;
00350 uint32_t finalsum1[MD5_HASH_WORDS];
00351 uint32_t finalsum2[MD5_HASH_WORDS];
00352 bzero(finalsum1,MD5_HASH_WORDS*sizeof(int));
00353 bzero(finalsum2,MD5_HASH_WORDS*sizeof(int));
00354 char linebuf[10000];
00355 vector<int> itemset;
00356 int supp;
00357 int lines=0;
00358 while(!feof(fin)) {
00359 uint32_t currsum[MD5_HASH_WORDS];
00360 if (!fgets(linebuf,sizeof(linebuf),fin)) {
00361 break;
00362 }
00363 parse_fimi_line(supp,itemset,linebuf);
00364 md5_init(&ctx);
00365 md5_update(&ctx,(u8*)&supp,sizeof(supp));
00366 sort(itemset.begin(),itemset.end());
00367 md5_update(&ctx,(u8*)&(itemset[0]),itemset.size()*sizeof(int));
00368 md5_final(&ctx,(u8*)currsum);
00369 for(size_t i=0;i<MD5_HASH_WORDS;++i) {
00370 finalsum1[i]^=currsum[i];
00371 finalsum2[i]+=currsum[i];
00372 }
00373 lines++;
00374 }
00375 for (int i=0;i<MD5_HASH_WORDS;++i) {
00376 finalsum1[i]^=finalsum2[i];
00377 }
00378 printsum(finalsum1);
00379 printf(" (%d lines) %s\n",lines,name);
00380 return true;
00381 }
00382
00383
00384 int main(int argc, char *argv[]) {
00385 parseparams(argc,argv);
00386 while(optind<argc){
00387 if (!checksumfile(argv[optind++])) {
00388 exit(1);
00389 }
00390 }
00391 return 0;
00392 }