Browse code

cache final fixes

aCaB authored on 2010/01/15 11:00:15
Showing 3 changed files
... ...
@@ -27,22 +27,33 @@
27 27
 #include <pthread.h>
28 28
 #include <assert.h>
29 29
 
30
-#include <sys/types.h>
31
-#include <sys/stat.h>
32
-#include <fcntl.h>
33
-#include <sys/mman.h>
34
-
35 30
 #include "md5.h"
36 31
 #include "mpool.h"
37 32
 #include "clamav.h"
38 33
 #include "cache.h"
39 34
 #include "fmap.h"
40 35
 
36
+
37
+/* The number of root trees and the chooser function 
38
+   Each tree is protected by a mutex against concurrent access */
39
+/* #define TREES 1 */
40
+/* static inline unsigned int getkey(uint8_t *hash) { return 0; } */
41
+#define TREES 256
42
+static inline unsigned int getkey(uint8_t *hash) { return *hash; }
43
+/* #define TREES 4096 */
44
+/* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | ((unsigned int)(hash[1] & 0xf)<<8) ; } */
45
+/* #define TREES 65536 */
46
+/* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | (((unsigned int)hash[1])<<8) ; } */
47
+
48
+/* The number of nodes in each tree */
41 49
 #define NODES 256
42 50
 
43
-/*#define USE_LRUHASHCACHE*/
51
+
52
+/* The replacement policy algorithm to use */
53
+/* #define USE_LRUHASHCACHE */
44 54
 #define USE_SPLAY
45 55
 
56
+/* LRUHASHCACHE --------------------------------------------------------------------- */
46 57
 #ifdef USE_LRUHASHCACHE
47 58
 struct cache_key {
48 59
     int64_t digest[2];
... ...
@@ -241,8 +252,10 @@ static inline void cacheset_destroy(struct cache_set *cs, mpool_t *mempool) {
241 241
 
242 242
 #endif /* USE_LRUHASHCACHE */
243 243
 
244
+/* SPLAY --------------------------------------------------------------------- */
244 245
 #ifdef USE_SPLAY
245
-struct node {
246
+
247
+struct node { /* a node */
246 248
     int64_t digest[2];
247 249
     struct node *left;
248 250
     struct node *right;
... ...
@@ -252,13 +265,14 @@ struct node {
252 252
     uint32_t size;
253 253
 };
254 254
 
255
-struct cache_set {
255
+struct cache_set { /* a tree */
256 256
     struct node *data;
257 257
     struct node *root;
258 258
     struct node *first;
259 259
     struct node *last;
260 260
 };
261 261
 
262
+/* Allocates all the nodes and sets up the replacement chain */
262 263
 static int cacheset_init(struct cache_set *cs, mpool_t *mempool) {
263 264
     unsigned int i;
264 265
     cs->data = mpool_calloc(mempool, NODES,  sizeof(*cs->data));
... ...
@@ -278,52 +292,53 @@ static int cacheset_init(struct cache_set *cs, mpool_t *mempool) {
278 278
     return 0;
279 279
 }
280 280
 
281
+/* Frees all the nodes */
281 282
 static inline void cacheset_destroy(struct cache_set *cs, mpool_t *mempool) {
282 283
     mpool_free(mempool, cs->data);
283 284
     cs->data = NULL;
284 285
 }
285 286
 
286
-static inline int cmp(int64_t *a, int64_t *b) {
287
+/* The left/right cooser for the splay tree */
288
+static inline int cmp(int64_t *a, ssize_t sa, int64_t *b, ssize_t sb) {
287 289
     if(a[1] < b[1]) return -1;
288 290
     if(a[1] > b[1]) return 1;
289
-    if(a[0] == b[0]) return 0;
290 291
     if(a[0] < b[0]) return -1;
291
-    return 1;
292
+    if(a[0] > b[0]) return 1;
293
+    if(sa < sb) return -1;
294
+    if(sa > sb) return 1;
295
+    return 0;
292 296
 }
293 297
 
294
-/* static inline int cmp(int64_t *a, int64_t *b) { */
295
-/*     return memcmp(a, b, 16); */
296
-/* } */
297
-
298 298
 
299
-/*#define PRINT_TREE*/
299
+/* #define PRINT_TREE */
300 300
 #ifdef PRINT_TREE
301 301
 #define ptree printf
302 302
 #else
303
-#define ptree (void)
303
+#define ptree(...)
304 304
 #endif
305 305
 
306
-/*#define CHECK_TREE*/
306
+/* Debug function to print the tree and check its consistency */
307
+/* #define CHECK_TREE */
307 308
 #ifdef CHECK_TREE
308 309
 static int printtree(struct cache_set *cs, struct node *n, int d) {
309 310
     int i;
310 311
     int ab = 0;
311 312
     if (n == NULL) return 0;
312
-    if(n == cs->root) ptree("--------------------------\n");
313
+    if(n == cs->root) { ptree("--------------------------\n"); }
313 314
     ab |= printtree(cs, n->right, d+1);
314 315
     if(n->right) {
315
-	if(cmp(n->digest, n->right->digest) >= 0) {
316
+	if(cmp(n->digest, n->size, n->right->digest, n->right->size) >= 0) {
316 317
 	    for (i=0; i<d; i++) ptree("        ");
317
-	    ptree("^^^^ %lld >= %lld - %lld\n", n->digest[1], n->right->digest[1], cmp(n->digest, n->right->digest));
318
+	    ptree("^^^^ %lld >= %lld\n", n->digest[1], n->right->digest[1]);
318 319
 	    ab = 1;
319 320
 	}
320 321
     }
321 322
     for (i=0; i<d; i++) ptree("        ");
322 323
     ptree("%08x(%02u)\n", n->digest[1]>>48, n - cs->data);
323 324
     if(n->left) {
324
-	if(cmp(n->digest, n->left->digest) <= 0) {
325
+	if(cmp(n->digest, n->size, n->left->digest, n->left->size) <= 0) {
325 326
 	    for (i=0; i<d; i++) ptree("        ");
326
-	    ptree("vvvv %lld <= %lld - %lld\n", n->digest[1], n->left->digest[1], cmp(n->digest, n->left->digest));
327
+	    ptree("vvvv %lld <= %lld\n", n->digest[1], n->left->digest[1]);
327 328
 	    ab = 1;
328 329
 	}
329 330
     }
... ...
@@ -350,7 +365,8 @@ static int printtree(struct cache_set *cs, struct node *n, int d) {
350 350
 #define printtree(a,b,c) (0)
351 351
 #endif
352 352
 
353
-static int splay(int64_t *md5, struct cache_set *cs) {
353
+/* Looks up a node and splays it up to the root of the tree */
354
+static int splay(int64_t *md5, size_t len, struct cache_set *cs) {
354 355
     struct node next = {{0, 0}, NULL, NULL, NULL, NULL, NULL, 0}, *right = &next, *left = &next, *temp, *root = cs->root;
355 356
     int comp, found = 0;
356 357
 
... ...
@@ -358,10 +374,10 @@ static int splay(int64_t *md5, struct cache_set *cs) {
358 358
 	return 0;
359 359
 
360 360
     while(1) {
361
-	comp = cmp(md5, root->digest);
361
+	comp = cmp(md5, len, root->digest, root->size);
362 362
 	if(comp < 0) {
363 363
 	    if(!root->left) break;
364
-	    if(cmp(md5, root->left->digest) < 0) {
364
+	    if(cmp(md5, len, root->left->digest, root->left->size) < 0) {
365 365
 		temp = root->left;
366 366
                 root->left = temp->right;
367 367
 		if(temp->right) temp->right->up = root;
... ...
@@ -376,7 +392,7 @@ static int splay(int64_t *md5, struct cache_set *cs) {
376 376
             root = root->left;
377 377
 	} else if(comp > 0) {
378 378
 	    if(!root->right) break;
379
-	    if(cmp(md5, root->right->digest) > 0) {
379
+	    if(cmp(md5, len, root->right->digest, root->right->size) > 0) {
380 380
 		temp = root->right;
381 381
                 root->right = temp->left;
382 382
 		if(temp->left) temp->left->up = root;
... ...
@@ -408,11 +424,13 @@ static int splay(int64_t *md5, struct cache_set *cs) {
408 408
     return found;
409 409
 }
410 410
 
411
+
412
+/* Looks up an hash in the tree and maintains the replacement chain */
411 413
 static int cacheset_lookup(struct cache_set *cs, unsigned char *md5, size_t size) {
412 414
     int64_t hash[2];
413 415
 
414 416
     memcpy(hash, md5, 16);
415
-    if(splay(hash, cs)) {
417
+    if(splay(hash, size, cs)) {
416 418
 	struct node *o = cs->root->prev, *p = cs->root, *q = cs->root->next;
417 419
 #ifdef PRINT_CHAINS
418 420
 	printf("promoting %02d\n", p - cs->data);
... ...
@@ -479,12 +497,15 @@ static int cacheset_lookup(struct cache_set *cs, unsigned char *md5, size_t size
479 479
     return 0;
480 480
 }
481 481
 
482
+/* If the hash is present nothing happens.
483
+   Otherwise a new node is created for the hash picking one from the begin of the chain.
484
+   Used nodes are moved to the end of the chain */
482 485
 static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size) {
483 486
     struct node *newnode;
484 487
     int64_t hash[2];
485 488
 
486 489
     memcpy(hash, md5, 16);
487
-    if(splay(hash, cs))
490
+    if(splay(hash, size, cs))
488 491
 	return; /* Already there */
489 492
 
490 493
     ptree("1:\n");
... ...
@@ -496,7 +517,7 @@ static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size)
496 496
     /*#define TAKE_FIRST*/
497 497
 #ifdef TAKE_FIRST
498 498
     if((newnode->left || newnode->right || newnode->up)) {
499
-	if(!splay(newnode->digest, cs)) {
499
+	if(!splay(newnode->digest, newnode->size, cs)) {
500 500
 	    cli_errmsg("WTF\n");
501 501
 	    abort();
502 502
 	}
... ...
@@ -509,7 +530,7 @@ static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size)
509 509
 	} else {
510 510
 	    cs->root = newnode->left;
511 511
 	    newnode->left->up = NULL;
512
-	    if(splay(newnode->digest, cs)) {
512
+	    if(splay(newnode->digest, newnode->size, cs)) {
513 513
 		cli_errmsg("WTF #2\n");
514 514
 		abort();
515 515
 	    }
... ...
@@ -520,7 +541,7 @@ static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size)
520 520
 	newnode->up = NULL;
521 521
 	newnode->right = NULL;
522 522
 	newnode->left = NULL;
523
-	if(splay(hash, cs)) {
523
+	if(splay(hash, size, cs)) {
524 524
 	    cli_errmsg("WTF #3\n");
525 525
 	    abort();
526 526
 	}
... ...
@@ -570,7 +591,7 @@ static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size)
570 570
 	newnode->left = NULL;
571 571
 	newnode->right = NULL;
572 572
     } else {
573
-	if(cmp(hash, cs->root->digest) < 0) {
573
+	if(cmp(hash, size, cs->root->digest, cs->root->size) < 0) {
574 574
 	    newnode->left = cs->root->left;
575 575
 	    newnode->right = cs->root;
576 576
 	    cs->root->left = NULL;
... ...
@@ -585,6 +606,7 @@ static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size)
585 585
     newnode->digest[0] = hash[0];
586 586
     newnode->digest[1] = hash[1];
587 587
     newnode->up = NULL;
588
+    newnode->size = size;
588 589
     cs->root = newnode;
589 590
 
590 591
     ptree("3: %lld\n", hash[1]);
... ...
@@ -594,24 +616,15 @@ static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size)
594 594
 }
595 595
 #endif /* USE_SPLAY */
596 596
 
597
-/* #define TREES 1 */
598
-/* static inline unsigned int getkey(uint8_t *hash) { return 0; } */
599
-
600
-#define TREES 256
601
-static inline unsigned int getkey(uint8_t *hash) { return *hash; }
602
-
603
-/* #define TREES 4096 */
604
-/* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | ((unsigned int)(hash[1] & 0xf)<<8) ; } */
605 597
 
606
-/* #define TREES 65536 */
607
-/* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | (((unsigned int)hash[1])<<8) ; } */
598
+/* COMMON STUFF --------------------------------------------------------------------- */
608 599
 
609 600
 struct CACHE {
610 601
     struct cache_set cacheset;
611 602
     pthread_mutex_t mutex;
612 603
 };
613 604
 
614
-
605
+/* Allocates the trees for the engine cache */
615 606
 int cli_cache_init(struct cl_engine *engine) {
616 607
     static struct CACHE *cache;
617 608
     unsigned int i, j;
... ...
@@ -645,6 +658,7 @@ int cli_cache_init(struct cl_engine *engine) {
645 645
     return 0;
646 646
 }
647 647
 
648
+/* Frees the engine cache */
648 649
 void cli_cache_destroy(struct cl_engine *engine) {
649 650
     static struct CACHE *cache;
650 651
     unsigned int i;
... ...
@@ -659,7 +673,8 @@ void cli_cache_destroy(struct cl_engine *engine) {
659 659
     mpool_free(engine->mempool, cache);
660 660
 }
661 661
 
662
-static int cache_lookup_hash(unsigned char *md5, struct CACHE *cache) {
662
+/* Looks up an hash in the proper tree */
663
+static int cache_lookup_hash(unsigned char *md5, size_t len, struct CACHE *cache) {
663 664
     unsigned int key = getkey(md5);
664 665
     int ret = CL_VIRUS;
665 666
     struct CACHE *c;
... ...
@@ -670,13 +685,14 @@ static int cache_lookup_hash(unsigned char *md5, struct CACHE *cache) {
670 670
 	return ret;
671 671
     }
672 672
 
673
-    ret = (cacheset_lookup(&c->cacheset, md5, 1024)) ? CL_CLEAN : CL_VIRUS;
674
-    if(ret == CL_CLEAN) cli_warnmsg("cached\n");
673
+    ret = (cacheset_lookup(&c->cacheset, md5, len)) ? CL_CLEAN : CL_VIRUS;
675 674
     pthread_mutex_unlock(&c->mutex);
675
+    /* if(ret == CL_CLEAN) cli_warnmsg("cached\n"); */
676 676
     return ret;
677 677
 }
678 678
 
679
-void cache_add(unsigned char *md5, cli_ctx *ctx) {
679
+/* Adds an hash to the cache */
680
+void cache_add(unsigned char *md5, size_t size, cli_ctx *ctx) {
680 681
     unsigned int key = getkey(md5);
681 682
     struct CACHE *c;
682 683
 
... ...
@@ -690,10 +706,10 @@ void cache_add(unsigned char *md5, cli_ctx *ctx) {
690 690
     }
691 691
 
692 692
 #ifdef USE_LRUHASHCACHE
693
-    cacheset_add(&c->cacheset, md5, 1024, ctx->engine->mempool);
693
+    cacheset_add(&c->cacheset, md5, size, ctx->engine->mempool);
694 694
 #else
695 695
 #ifdef USE_SPLAY
696
-    cacheset_add(&c->cacheset, md5, 1024);
696
+    cacheset_add(&c->cacheset, md5, size);
697 697
 #else
698 698
 #error #define USE_SPLAY or USE_LRUHASHCACHE
699 699
 #endif
... ...
@@ -703,6 +719,8 @@ void cache_add(unsigned char *md5, cli_ctx *ctx) {
703 703
     return;
704 704
 }
705 705
 
706
+/* Hashes a file onto the provided buffer and looks it up the cache.
707
+   Returns CL_VIRUS if found, CL_CLEAN if not FIXME or an error */
706 708
 int cache_check(unsigned char *hash, cli_ctx *ctx) {
707 709
     fmap_t *map = *ctx->fmap;
708 710
     size_t todo = map->len, at = 0;
... ...
@@ -722,5 +740,5 @@ int cache_check(unsigned char *hash, cli_ctx *ctx) {
722 722
 	cli_md5_update(&md5, buf, readme);
723 723
     }
724 724
     cli_md5_final(hash, &md5);
725
-    return cache_lookup_hash(hash, ctx->engine->cache);
725
+    return cache_lookup_hash(hash, map->len, ctx->engine->cache);
726 726
 }
... ...
@@ -24,7 +24,7 @@
24 24
 #include "clamav.h"
25 25
 #include "others.h"
26 26
 
27
-void cache_add(unsigned char *md5, cli_ctx *ctx);
27
+void cache_add(unsigned char *md5, size_t size, cli_ctx *ctx);
28 28
 int cache_check(unsigned char *hash, cli_ctx *ctx);
29 29
 int cli_cache_init(struct cl_engine *engine);
30 30
 void cli_cache_destroy(struct cl_engine *engine);
... ...
@@ -1803,7 +1803,7 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
1803 1803
 	struct stat sb;
1804 1804
 	uint8_t typercg = 1;
1805 1805
 	cli_file_t current_container_type = ctx->container_type; /* TODO: container tracking code TBD - bb#1293 */
1806
-	size_t current_container_size = ctx->container_size;
1806
+	size_t current_container_size = ctx->container_size, hashed_size;
1807 1807
 	unsigned char hash[16];
1808 1808
 
1809 1809
     if(ctx->engine->maxreclevel && ctx->recursion > ctx->engine->maxreclevel) {
... ...
@@ -1846,7 +1846,8 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
1846 1846
 	ctx->fmap--;
1847 1847
 	return CL_CLEAN;
1848 1848
     }
1849
-    
1849
+    hashed_size = (*ctx->fmap)->len;
1850
+
1850 1851
     if(!ctx->options || (ctx->recursion == ctx->engine->maxreclevel)) { /* raw mode (stdin, etc.) or last level of recursion */
1851 1852
 	if(ctx->recursion == ctx->engine->maxreclevel)
1852 1853
 	    cli_dbgmsg("cli_magic_scandesc: Hit recursion limit, only scanning raw file\n");
... ...
@@ -1856,7 +1857,7 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
1856 1856
 	if((ret = cli_fmap_scandesc(ctx, 0, 0, NULL, AC_SCAN_VIR, hash)) == CL_VIRUS)
1857 1857
 	    cli_dbgmsg("%s found in descriptor %d\n", *ctx->virname, desc);
1858 1858
 	else
1859
-	    cache_add(hash, ctx);
1859
+	    cache_add(hash, hashed_size, ctx);
1860 1860
 
1861 1861
 	funmap(*ctx->fmap);
1862 1862
 	ctx->fmap--; 
... ...
@@ -2183,7 +2184,7 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
2183 2183
 	case CL_EMAXFILES:
2184 2184
 	    cli_dbgmsg("Descriptor[%d]: %s\n", desc, cl_strerror(ret));
2185 2185
 	case CL_CLEAN:
2186
-	    cache_add(hash, ctx);
2186
+	    cache_add(hash, hashed_size, ctx);
2187 2187
 	    return CL_CLEAN;
2188 2188
 	default:
2189 2189
 	    return ret;