... | ... |
@@ -27,22 +27,33 @@ |
27 | 27 |
#include <pthread.h> |
28 | 28 |
#include <assert.h> |
29 | 29 |
|
30 |
-#include <sys/types.h> |
|
31 |
-#include <sys/stat.h> |
|
32 |
-#include <fcntl.h> |
|
33 |
-#include <sys/mman.h> |
|
34 |
- |
|
35 | 30 |
#include "md5.h" |
36 | 31 |
#include "mpool.h" |
37 | 32 |
#include "clamav.h" |
38 | 33 |
#include "cache.h" |
39 | 34 |
#include "fmap.h" |
40 | 35 |
|
36 |
+ |
|
37 |
+/* The number of root trees and the chooser function |
|
38 |
+ Each tree is protected by a mutex against concurrent access */ |
|
39 |
+/* #define TREES 1 */ |
|
40 |
+/* static inline unsigned int getkey(uint8_t *hash) { return 0; } */ |
|
41 |
+#define TREES 256 |
|
42 |
+static inline unsigned int getkey(uint8_t *hash) { return *hash; } |
|
43 |
+/* #define TREES 4096 */ |
|
44 |
+/* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | ((unsigned int)(hash[1] & 0xf)<<8) ; } */ |
|
45 |
+/* #define TREES 65536 */ |
|
46 |
+/* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | (((unsigned int)hash[1])<<8) ; } */ |
|
47 |
+ |
|
48 |
+/* The number of nodes in each tree */ |
|
41 | 49 |
#define NODES 256 |
42 | 50 |
|
43 |
-/*#define USE_LRUHASHCACHE*/ |
|
51 |
+ |
|
52 |
+/* The replacement policy algorithm to use */ |
|
53 |
+/* #define USE_LRUHASHCACHE */ |
|
44 | 54 |
#define USE_SPLAY |
45 | 55 |
|
56 |
+/* LRUHASHCACHE --------------------------------------------------------------------- */ |
|
46 | 57 |
#ifdef USE_LRUHASHCACHE |
47 | 58 |
struct cache_key { |
48 | 59 |
int64_t digest[2]; |
... | ... |
@@ -241,8 +252,10 @@ static inline void cacheset_destroy(struct cache_set *cs, mpool_t *mempool) { |
241 | 241 |
|
242 | 242 |
#endif /* USE_LRUHASHCACHE */ |
243 | 243 |
|
244 |
+/* SPLAY --------------------------------------------------------------------- */ |
|
244 | 245 |
#ifdef USE_SPLAY |
245 |
-struct node { |
|
246 |
+ |
|
247 |
+struct node { /* a node */ |
|
246 | 248 |
int64_t digest[2]; |
247 | 249 |
struct node *left; |
248 | 250 |
struct node *right; |
... | ... |
@@ -252,13 +265,14 @@ struct node { |
252 | 252 |
uint32_t size; |
253 | 253 |
}; |
254 | 254 |
|
255 |
-struct cache_set { |
|
255 |
+struct cache_set { /* a tree */ |
|
256 | 256 |
struct node *data; |
257 | 257 |
struct node *root; |
258 | 258 |
struct node *first; |
259 | 259 |
struct node *last; |
260 | 260 |
}; |
261 | 261 |
|
262 |
+/* Allocates all the nodes and sets up the replacement chain */ |
|
262 | 263 |
static int cacheset_init(struct cache_set *cs, mpool_t *mempool) { |
263 | 264 |
unsigned int i; |
264 | 265 |
cs->data = mpool_calloc(mempool, NODES, sizeof(*cs->data)); |
... | ... |
@@ -278,52 +292,53 @@ static int cacheset_init(struct cache_set *cs, mpool_t *mempool) { |
278 | 278 |
return 0; |
279 | 279 |
} |
280 | 280 |
|
281 |
+/* Frees all the nodes */ |
|
281 | 282 |
static inline void cacheset_destroy(struct cache_set *cs, mpool_t *mempool) { |
282 | 283 |
mpool_free(mempool, cs->data); |
283 | 284 |
cs->data = NULL; |
284 | 285 |
} |
285 | 286 |
|
286 |
-static inline int cmp(int64_t *a, int64_t *b) { |
|
287 |
+/* The left/right cooser for the splay tree */ |
|
288 |
+static inline int cmp(int64_t *a, ssize_t sa, int64_t *b, ssize_t sb) { |
|
287 | 289 |
if(a[1] < b[1]) return -1; |
288 | 290 |
if(a[1] > b[1]) return 1; |
289 |
- if(a[0] == b[0]) return 0; |
|
290 | 291 |
if(a[0] < b[0]) return -1; |
291 |
- return 1; |
|
292 |
+ if(a[0] > b[0]) return 1; |
|
293 |
+ if(sa < sb) return -1; |
|
294 |
+ if(sa > sb) return 1; |
|
295 |
+ return 0; |
|
292 | 296 |
} |
293 | 297 |
|
294 |
-/* static inline int cmp(int64_t *a, int64_t *b) { */ |
|
295 |
-/* return memcmp(a, b, 16); */ |
|
296 |
-/* } */ |
|
297 |
- |
|
298 | 298 |
|
299 |
-/*#define PRINT_TREE*/ |
|
299 |
+/* #define PRINT_TREE */ |
|
300 | 300 |
#ifdef PRINT_TREE |
301 | 301 |
#define ptree printf |
302 | 302 |
#else |
303 |
-#define ptree (void) |
|
303 |
+#define ptree(...) |
|
304 | 304 |
#endif |
305 | 305 |
|
306 |
-/*#define CHECK_TREE*/ |
|
306 |
+/* Debug function to print the tree and check its consistency */ |
|
307 |
+/* #define CHECK_TREE */ |
|
307 | 308 |
#ifdef CHECK_TREE |
308 | 309 |
static int printtree(struct cache_set *cs, struct node *n, int d) { |
309 | 310 |
int i; |
310 | 311 |
int ab = 0; |
311 | 312 |
if (n == NULL) return 0; |
312 |
- if(n == cs->root) ptree("--------------------------\n"); |
|
313 |
+ if(n == cs->root) { ptree("--------------------------\n"); } |
|
313 | 314 |
ab |= printtree(cs, n->right, d+1); |
314 | 315 |
if(n->right) { |
315 |
- if(cmp(n->digest, n->right->digest) >= 0) { |
|
316 |
+ if(cmp(n->digest, n->size, n->right->digest, n->right->size) >= 0) { |
|
316 | 317 |
for (i=0; i<d; i++) ptree(" "); |
317 |
- ptree("^^^^ %lld >= %lld - %lld\n", n->digest[1], n->right->digest[1], cmp(n->digest, n->right->digest)); |
|
318 |
+ ptree("^^^^ %lld >= %lld\n", n->digest[1], n->right->digest[1]); |
|
318 | 319 |
ab = 1; |
319 | 320 |
} |
320 | 321 |
} |
321 | 322 |
for (i=0; i<d; i++) ptree(" "); |
322 | 323 |
ptree("%08x(%02u)\n", n->digest[1]>>48, n - cs->data); |
323 | 324 |
if(n->left) { |
324 |
- if(cmp(n->digest, n->left->digest) <= 0) { |
|
325 |
+ if(cmp(n->digest, n->size, n->left->digest, n->left->size) <= 0) { |
|
325 | 326 |
for (i=0; i<d; i++) ptree(" "); |
326 |
- ptree("vvvv %lld <= %lld - %lld\n", n->digest[1], n->left->digest[1], cmp(n->digest, n->left->digest)); |
|
327 |
+ ptree("vvvv %lld <= %lld\n", n->digest[1], n->left->digest[1]); |
|
327 | 328 |
ab = 1; |
328 | 329 |
} |
329 | 330 |
} |
... | ... |
@@ -350,7 +365,8 @@ static int printtree(struct cache_set *cs, struct node *n, int d) { |
350 | 350 |
#define printtree(a,b,c) (0) |
351 | 351 |
#endif |
352 | 352 |
|
353 |
-static int splay(int64_t *md5, struct cache_set *cs) { |
|
353 |
+/* Looks up a node and splays it up to the root of the tree */ |
|
354 |
+static int splay(int64_t *md5, size_t len, struct cache_set *cs) { |
|
354 | 355 |
struct node next = {{0, 0}, NULL, NULL, NULL, NULL, NULL, 0}, *right = &next, *left = &next, *temp, *root = cs->root; |
355 | 356 |
int comp, found = 0; |
356 | 357 |
|
... | ... |
@@ -358,10 +374,10 @@ static int splay(int64_t *md5, struct cache_set *cs) { |
358 | 358 |
return 0; |
359 | 359 |
|
360 | 360 |
while(1) { |
361 |
- comp = cmp(md5, root->digest); |
|
361 |
+ comp = cmp(md5, len, root->digest, root->size); |
|
362 | 362 |
if(comp < 0) { |
363 | 363 |
if(!root->left) break; |
364 |
- if(cmp(md5, root->left->digest) < 0) { |
|
364 |
+ if(cmp(md5, len, root->left->digest, root->left->size) < 0) { |
|
365 | 365 |
temp = root->left; |
366 | 366 |
root->left = temp->right; |
367 | 367 |
if(temp->right) temp->right->up = root; |
... | ... |
@@ -376,7 +392,7 @@ static int splay(int64_t *md5, struct cache_set *cs) { |
376 | 376 |
root = root->left; |
377 | 377 |
} else if(comp > 0) { |
378 | 378 |
if(!root->right) break; |
379 |
- if(cmp(md5, root->right->digest) > 0) { |
|
379 |
+ if(cmp(md5, len, root->right->digest, root->right->size) > 0) { |
|
380 | 380 |
temp = root->right; |
381 | 381 |
root->right = temp->left; |
382 | 382 |
if(temp->left) temp->left->up = root; |
... | ... |
@@ -408,11 +424,13 @@ static int splay(int64_t *md5, struct cache_set *cs) { |
408 | 408 |
return found; |
409 | 409 |
} |
410 | 410 |
|
411 |
+ |
|
412 |
+/* Looks up an hash in the tree and maintains the replacement chain */ |
|
411 | 413 |
static int cacheset_lookup(struct cache_set *cs, unsigned char *md5, size_t size) { |
412 | 414 |
int64_t hash[2]; |
413 | 415 |
|
414 | 416 |
memcpy(hash, md5, 16); |
415 |
- if(splay(hash, cs)) { |
|
417 |
+ if(splay(hash, size, cs)) { |
|
416 | 418 |
struct node *o = cs->root->prev, *p = cs->root, *q = cs->root->next; |
417 | 419 |
#ifdef PRINT_CHAINS |
418 | 420 |
printf("promoting %02d\n", p - cs->data); |
... | ... |
@@ -479,12 +497,15 @@ static int cacheset_lookup(struct cache_set *cs, unsigned char *md5, size_t size |
479 | 479 |
return 0; |
480 | 480 |
} |
481 | 481 |
|
482 |
+/* If the hash is present nothing happens. |
|
483 |
+ Otherwise a new node is created for the hash picking one from the begin of the chain. |
|
484 |
+ Used nodes are moved to the end of the chain */ |
|
482 | 485 |
static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size) { |
483 | 486 |
struct node *newnode; |
484 | 487 |
int64_t hash[2]; |
485 | 488 |
|
486 | 489 |
memcpy(hash, md5, 16); |
487 |
- if(splay(hash, cs)) |
|
490 |
+ if(splay(hash, size, cs)) |
|
488 | 491 |
return; /* Already there */ |
489 | 492 |
|
490 | 493 |
ptree("1:\n"); |
... | ... |
@@ -496,7 +517,7 @@ static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size) |
496 | 496 |
/*#define TAKE_FIRST*/ |
497 | 497 |
#ifdef TAKE_FIRST |
498 | 498 |
if((newnode->left || newnode->right || newnode->up)) { |
499 |
- if(!splay(newnode->digest, cs)) { |
|
499 |
+ if(!splay(newnode->digest, newnode->size, cs)) { |
|
500 | 500 |
cli_errmsg("WTF\n"); |
501 | 501 |
abort(); |
502 | 502 |
} |
... | ... |
@@ -509,7 +530,7 @@ static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size) |
509 | 509 |
} else { |
510 | 510 |
cs->root = newnode->left; |
511 | 511 |
newnode->left->up = NULL; |
512 |
- if(splay(newnode->digest, cs)) { |
|
512 |
+ if(splay(newnode->digest, newnode->size, cs)) { |
|
513 | 513 |
cli_errmsg("WTF #2\n"); |
514 | 514 |
abort(); |
515 | 515 |
} |
... | ... |
@@ -520,7 +541,7 @@ static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size) |
520 | 520 |
newnode->up = NULL; |
521 | 521 |
newnode->right = NULL; |
522 | 522 |
newnode->left = NULL; |
523 |
- if(splay(hash, cs)) { |
|
523 |
+ if(splay(hash, size, cs)) { |
|
524 | 524 |
cli_errmsg("WTF #3\n"); |
525 | 525 |
abort(); |
526 | 526 |
} |
... | ... |
@@ -570,7 +591,7 @@ static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size) |
570 | 570 |
newnode->left = NULL; |
571 | 571 |
newnode->right = NULL; |
572 | 572 |
} else { |
573 |
- if(cmp(hash, cs->root->digest) < 0) { |
|
573 |
+ if(cmp(hash, size, cs->root->digest, cs->root->size) < 0) { |
|
574 | 574 |
newnode->left = cs->root->left; |
575 | 575 |
newnode->right = cs->root; |
576 | 576 |
cs->root->left = NULL; |
... | ... |
@@ -585,6 +606,7 @@ static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size) |
585 | 585 |
newnode->digest[0] = hash[0]; |
586 | 586 |
newnode->digest[1] = hash[1]; |
587 | 587 |
newnode->up = NULL; |
588 |
+ newnode->size = size; |
|
588 | 589 |
cs->root = newnode; |
589 | 590 |
|
590 | 591 |
ptree("3: %lld\n", hash[1]); |
... | ... |
@@ -594,24 +616,15 @@ static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size) |
594 | 594 |
} |
595 | 595 |
#endif /* USE_SPLAY */ |
596 | 596 |
|
597 |
-/* #define TREES 1 */ |
|
598 |
-/* static inline unsigned int getkey(uint8_t *hash) { return 0; } */ |
|
599 |
- |
|
600 |
-#define TREES 256 |
|
601 |
-static inline unsigned int getkey(uint8_t *hash) { return *hash; } |
|
602 |
- |
|
603 |
-/* #define TREES 4096 */ |
|
604 |
-/* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | ((unsigned int)(hash[1] & 0xf)<<8) ; } */ |
|
605 | 597 |
|
606 |
-/* #define TREES 65536 */ |
|
607 |
-/* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | (((unsigned int)hash[1])<<8) ; } */ |
|
598 |
+/* COMMON STUFF --------------------------------------------------------------------- */ |
|
608 | 599 |
|
609 | 600 |
struct CACHE { |
610 | 601 |
struct cache_set cacheset; |
611 | 602 |
pthread_mutex_t mutex; |
612 | 603 |
}; |
613 | 604 |
|
614 |
- |
|
605 |
+/* Allocates the trees for the engine cache */ |
|
615 | 606 |
int cli_cache_init(struct cl_engine *engine) { |
616 | 607 |
static struct CACHE *cache; |
617 | 608 |
unsigned int i, j; |
... | ... |
@@ -645,6 +658,7 @@ int cli_cache_init(struct cl_engine *engine) { |
645 | 645 |
return 0; |
646 | 646 |
} |
647 | 647 |
|
648 |
+/* Frees the engine cache */ |
|
648 | 649 |
void cli_cache_destroy(struct cl_engine *engine) { |
649 | 650 |
static struct CACHE *cache; |
650 | 651 |
unsigned int i; |
... | ... |
@@ -659,7 +673,8 @@ void cli_cache_destroy(struct cl_engine *engine) { |
659 | 659 |
mpool_free(engine->mempool, cache); |
660 | 660 |
} |
661 | 661 |
|
662 |
-static int cache_lookup_hash(unsigned char *md5, struct CACHE *cache) { |
|
662 |
+/* Looks up an hash in the proper tree */ |
|
663 |
+static int cache_lookup_hash(unsigned char *md5, size_t len, struct CACHE *cache) { |
|
663 | 664 |
unsigned int key = getkey(md5); |
664 | 665 |
int ret = CL_VIRUS; |
665 | 666 |
struct CACHE *c; |
... | ... |
@@ -670,13 +685,14 @@ static int cache_lookup_hash(unsigned char *md5, struct CACHE *cache) { |
670 | 670 |
return ret; |
671 | 671 |
} |
672 | 672 |
|
673 |
- ret = (cacheset_lookup(&c->cacheset, md5, 1024)) ? CL_CLEAN : CL_VIRUS; |
|
674 |
- if(ret == CL_CLEAN) cli_warnmsg("cached\n"); |
|
673 |
+ ret = (cacheset_lookup(&c->cacheset, md5, len)) ? CL_CLEAN : CL_VIRUS; |
|
675 | 674 |
pthread_mutex_unlock(&c->mutex); |
675 |
+ /* if(ret == CL_CLEAN) cli_warnmsg("cached\n"); */ |
|
676 | 676 |
return ret; |
677 | 677 |
} |
678 | 678 |
|
679 |
-void cache_add(unsigned char *md5, cli_ctx *ctx) { |
|
679 |
+/* Adds an hash to the cache */ |
|
680 |
+void cache_add(unsigned char *md5, size_t size, cli_ctx *ctx) { |
|
680 | 681 |
unsigned int key = getkey(md5); |
681 | 682 |
struct CACHE *c; |
682 | 683 |
|
... | ... |
@@ -690,10 +706,10 @@ void cache_add(unsigned char *md5, cli_ctx *ctx) { |
690 | 690 |
} |
691 | 691 |
|
692 | 692 |
#ifdef USE_LRUHASHCACHE |
693 |
- cacheset_add(&c->cacheset, md5, 1024, ctx->engine->mempool); |
|
693 |
+ cacheset_add(&c->cacheset, md5, size, ctx->engine->mempool); |
|
694 | 694 |
#else |
695 | 695 |
#ifdef USE_SPLAY |
696 |
- cacheset_add(&c->cacheset, md5, 1024); |
|
696 |
+ cacheset_add(&c->cacheset, md5, size); |
|
697 | 697 |
#else |
698 | 698 |
#error #define USE_SPLAY or USE_LRUHASHCACHE |
699 | 699 |
#endif |
... | ... |
@@ -703,6 +719,8 @@ void cache_add(unsigned char *md5, cli_ctx *ctx) { |
703 | 703 |
return; |
704 | 704 |
} |
705 | 705 |
|
706 |
+/* Hashes a file onto the provided buffer and looks it up the cache. |
|
707 |
+ Returns CL_VIRUS if found, CL_CLEAN if not FIXME or an error */ |
|
706 | 708 |
int cache_check(unsigned char *hash, cli_ctx *ctx) { |
707 | 709 |
fmap_t *map = *ctx->fmap; |
708 | 710 |
size_t todo = map->len, at = 0; |
... | ... |
@@ -722,5 +740,5 @@ int cache_check(unsigned char *hash, cli_ctx *ctx) { |
722 | 722 |
cli_md5_update(&md5, buf, readme); |
723 | 723 |
} |
724 | 724 |
cli_md5_final(hash, &md5); |
725 |
- return cache_lookup_hash(hash, ctx->engine->cache); |
|
725 |
+ return cache_lookup_hash(hash, map->len, ctx->engine->cache); |
|
726 | 726 |
} |
... | ... |
@@ -24,7 +24,7 @@ |
24 | 24 |
#include "clamav.h" |
25 | 25 |
#include "others.h" |
26 | 26 |
|
27 |
-void cache_add(unsigned char *md5, cli_ctx *ctx); |
|
27 |
+void cache_add(unsigned char *md5, size_t size, cli_ctx *ctx); |
|
28 | 28 |
int cache_check(unsigned char *hash, cli_ctx *ctx); |
29 | 29 |
int cli_cache_init(struct cl_engine *engine); |
30 | 30 |
void cli_cache_destroy(struct cl_engine *engine); |
... | ... |
@@ -1803,7 +1803,7 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx) |
1803 | 1803 |
struct stat sb; |
1804 | 1804 |
uint8_t typercg = 1; |
1805 | 1805 |
cli_file_t current_container_type = ctx->container_type; /* TODO: container tracking code TBD - bb#1293 */ |
1806 |
- size_t current_container_size = ctx->container_size; |
|
1806 |
+ size_t current_container_size = ctx->container_size, hashed_size; |
|
1807 | 1807 |
unsigned char hash[16]; |
1808 | 1808 |
|
1809 | 1809 |
if(ctx->engine->maxreclevel && ctx->recursion > ctx->engine->maxreclevel) { |
... | ... |
@@ -1846,7 +1846,8 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx) |
1846 | 1846 |
ctx->fmap--; |
1847 | 1847 |
return CL_CLEAN; |
1848 | 1848 |
} |
1849 |
- |
|
1849 |
+ hashed_size = (*ctx->fmap)->len; |
|
1850 |
+ |
|
1850 | 1851 |
if(!ctx->options || (ctx->recursion == ctx->engine->maxreclevel)) { /* raw mode (stdin, etc.) or last level of recursion */ |
1851 | 1852 |
if(ctx->recursion == ctx->engine->maxreclevel) |
1852 | 1853 |
cli_dbgmsg("cli_magic_scandesc: Hit recursion limit, only scanning raw file\n"); |
... | ... |
@@ -1856,7 +1857,7 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx) |
1856 | 1856 |
if((ret = cli_fmap_scandesc(ctx, 0, 0, NULL, AC_SCAN_VIR, hash)) == CL_VIRUS) |
1857 | 1857 |
cli_dbgmsg("%s found in descriptor %d\n", *ctx->virname, desc); |
1858 | 1858 |
else |
1859 |
- cache_add(hash, ctx); |
|
1859 |
+ cache_add(hash, hashed_size, ctx); |
|
1860 | 1860 |
|
1861 | 1861 |
funmap(*ctx->fmap); |
1862 | 1862 |
ctx->fmap--; |
... | ... |
@@ -2183,7 +2184,7 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx) |
2183 | 2183 |
case CL_EMAXFILES: |
2184 | 2184 |
cli_dbgmsg("Descriptor[%d]: %s\n", desc, cl_strerror(ret)); |
2185 | 2185 |
case CL_CLEAN: |
2186 |
- cache_add(hash, ctx); |
|
2186 |
+ cache_add(hash, hashed_size, ctx); |
|
2187 | 2187 |
return CL_CLEAN; |
2188 | 2188 |
default: |
2189 | 2189 |
return ret; |