git-svn: trunk@3040
Tomasz Kojm authored on 2007/04/30 23:12:38... | ... |
@@ -1,3 +1,7 @@ |
1 |
+Mon Apr 30 15:24:28 CEST 2007 (tk) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav: use BM matcher to handle .mdb sigs |
|
4 |
+ |
|
1 | 5 |
Sat Apr 28 22:26:00 EEST 2007 (edwin) |
2 | 6 |
---------------------------------- |
3 | 7 |
* libclamav/regex_list.c: update code to use new AC matcher |
... | ... |
@@ -109,7 +109,7 @@ extern "C" |
109 | 109 |
|
110 | 110 |
/* internal structures */ |
111 | 111 |
struct cli_md5_node { |
112 |
- char *virname, *viralias; |
|
112 |
+ char *virname; |
|
113 | 113 |
unsigned char *md5; |
114 | 114 |
unsigned int size; |
115 | 115 |
unsigned short fp; |
... | ... |
@@ -135,8 +135,8 @@ struct cl_engine { |
135 | 135 |
/* MD5 */ |
136 | 136 |
struct cli_md5_node **md5_hlist; |
137 | 137 |
|
138 |
- /* MD5 list for PE sections */ |
|
139 |
- struct cli_md5_node *md5_sect; |
|
138 |
+ /* B-M matcher for MD5 sigs for PE sections */ |
|
139 |
+ void *md5_sect; |
|
140 | 140 |
|
141 | 141 |
/* Zip metadata */ |
142 | 142 |
struct cli_meta_node *zip_mlist; |
... | ... |
@@ -51,6 +51,8 @@ |
51 | 51 |
#include "md5.h" |
52 | 52 |
#include "mew.h" |
53 | 53 |
#include "upack.h" |
54 |
+#include "matcher.h" |
|
55 |
+#include "matcher-bm.h" |
|
54 | 56 |
|
55 | 57 |
#ifndef O_BINARY |
56 | 58 |
#define O_BINARY 0 |
... | ... |
@@ -243,7 +245,6 @@ int cli_scanpe(int desc, cli_ctx *ctx) |
243 | 243 |
struct pe_image_optional_hdr32 opt32; |
244 | 244 |
} pe_opt; |
245 | 245 |
struct pe_image_section_hdr *section_hdr; |
246 |
- struct cli_md5_node *md5_sect; |
|
247 | 246 |
struct stat sb; |
248 | 247 |
char sname[9], buff[4096], *tempfile; |
249 | 248 |
unsigned char *ubuff; |
... | ... |
@@ -254,8 +255,9 @@ int cli_scanpe(int desc, cli_ctx *ctx) |
254 | 254 |
char *src = NULL, *dest = NULL; |
255 | 255 |
int ndesc, ret = CL_CLEAN, upack = 0, native=0; |
256 | 256 |
size_t fsize; |
257 |
- uint32_t valign, falign, hdr_size; |
|
257 |
+ uint32_t valign, falign, hdr_size, j; |
|
258 | 258 |
struct cli_exe_section *exe_sections; |
259 |
+ struct cli_matcher *md5_sect; |
|
259 | 260 |
|
260 | 261 |
|
261 | 262 |
if(cli_readn(desc, &e_magic, sizeof(e_magic)) != sizeof(e_magic)) { |
... | ... |
@@ -710,29 +712,27 @@ int cli_scanpe(int desc, cli_ctx *ctx) |
710 | 710 |
} |
711 | 711 |
return CL_CLEAN; /* no ninjas to see here! move along! */ |
712 | 712 |
} |
713 |
- |
|
714 |
- /* check MD5 section sigs */ |
|
715 |
- if(DCONF & PE_CONF_MD5SECT) |
|
716 |
- md5_sect = ctx->engine->md5_sect; |
|
717 |
- else |
|
718 |
- md5_sect = NULL; |
|
719 | 713 |
|
720 |
- while(md5_sect && md5_sect->size < exe_sections[i].rsz) |
|
721 |
- md5_sect = md5_sect->next; |
|
714 |
+ /* check MD5 section sigs */ |
|
715 |
+ md5_sect = ctx->engine->md5_sect; |
|
716 |
+ if((DCONF & PE_CONF_MD5SECT) && md5_sect) { |
|
717 |
+ found = 0; |
|
718 |
+ for(j = 0; j < md5_sect->soff_len && md5_sect->soff[j] <= exe_sections[i].rsz; j++) { |
|
719 |
+ if(md5_sect->soff[j] == exe_sections[i].rsz) { |
|
720 |
+ found = 1; |
|
721 |
+ break; |
|
722 |
+ } |
|
723 |
+ } |
|
722 | 724 |
|
723 |
- if(md5_sect && md5_sect->size == exe_sections[i].rsz) { |
|
724 |
- if(!cli_md5sect(desc, exe_sections[i].raw, exe_sections[i].rsz, md5_dig)) { |
|
725 |
- cli_errmsg("PE: Can't calculate MD5 for section %d\n", i); |
|
726 |
- } else { |
|
727 |
- while(md5_sect && md5_sect->size == exe_sections[i].rsz) { |
|
728 |
- if(!memcmp(md5_dig, md5_sect->md5, 16)) { |
|
729 |
- if(ctx->virname) |
|
730 |
- *ctx->virname = md5_sect->virname; |
|
725 |
+ if(found) { |
|
726 |
+ if(!cli_md5sect(desc, exe_sections[i].raw, exe_sections[i].rsz, md5_dig)) { |
|
727 |
+ cli_errmsg("PE: Can't calculate MD5 for section %u\n", i); |
|
728 |
+ } else { |
|
729 |
+ if(cli_bm_scanbuff(md5_dig, 16, ctx->virname, ctx->engine->md5_sect, 0, 0, -1) == CL_VIRUS) { |
|
731 | 730 |
free(section_hdr); |
732 | 731 |
free(exe_sections); |
733 | 732 |
return CL_VIRUS; |
734 | 733 |
} |
735 |
- md5_sect = md5_sect->next; |
|
736 | 734 |
} |
737 | 735 |
} |
738 | 736 |
} |
... | ... |
@@ -329,26 +329,26 @@ static int cli_initroots(struct cl_engine *engine, unsigned int options) |
329 | 329 |
cli_dbgmsg("Initializing engine->root[%d]\n", i); |
330 | 330 |
root = engine->root[i] = (struct cli_matcher *) cli_calloc(1, sizeof(struct cli_matcher)); |
331 | 331 |
if(!root) { |
332 |
- cli_errmsg("Can't initialise AC pattern matcher\n"); |
|
332 |
+ cli_errmsg("cli_initroots: Can't allocate memory for cli_matcher\n"); |
|
333 | 333 |
return CL_EMEM; |
334 | 334 |
} |
335 | 335 |
|
336 | 336 |
if(options & CL_DB_ACONLY) { |
337 |
- cli_dbgmsg("Only using AC pattern matcher.\n"); |
|
337 |
+ cli_dbgmsg("cli_initroots: Only using AC pattern matcher.\n"); |
|
338 | 338 |
root->ac_only = 1; |
339 | 339 |
} |
340 | 340 |
|
341 | 341 |
cli_dbgmsg("Initialising AC pattern matcher of root[%d]\n", i); |
342 | 342 |
if((ret = cli_ac_init(root, AC_DEFAULT_MIN_DEPTH, AC_DEFAULT_MAX_DEPTH))) { |
343 | 343 |
/* no need to free previously allocated memory here */ |
344 |
- cli_errmsg("Can't initialise AC pattern matcher\n"); |
|
344 |
+ cli_errmsg("cli_initroots: Can't initialise AC pattern matcher\n"); |
|
345 | 345 |
return ret; |
346 | 346 |
} |
347 | 347 |
|
348 | 348 |
if(!root->ac_only) { |
349 |
- cli_dbgmsg("Initializing BM tables of root[%d]\n", i); |
|
349 |
+ cli_dbgmsg("cli_initroots: Initializing BM tables of root[%d]\n", i); |
|
350 | 350 |
if((ret = cli_bm_init(root))) { |
351 |
- cli_errmsg("Can't initialise BM pattern matcher\n"); |
|
351 |
+ cli_errmsg("cli_initroots: Can't initialise BM pattern matcher\n"); |
|
352 | 352 |
return ret; |
353 | 353 |
} |
354 | 354 |
} |
... | ... |
@@ -600,12 +600,20 @@ static int cli_loadndb(FILE *fd, struct cl_engine **engine, unsigned int *signo, |
600 | 600 |
return CL_SUCCESS; |
601 | 601 |
} |
602 | 602 |
|
603 |
+static int scomp(const void *a, const void *b) |
|
604 |
+{ |
|
605 |
+ return *(const uint32_t *)a - *(const uint32_t *)b; |
|
606 |
+} |
|
607 |
+ |
|
603 | 608 |
static int cli_loadhdb(FILE *fd, struct cl_engine **engine, unsigned int *signo, unsigned short mode, unsigned int options) |
604 | 609 |
{ |
605 | 610 |
char buffer[FILEBUFF], *pt; |
606 |
- int line = 0, ret = 0; |
|
607 |
- unsigned int md5f = 0, sizef = 1; |
|
608 |
- struct cli_md5_node *new, *mpt, *last; |
|
611 |
+ int ret = CL_SUCCESS; |
|
612 |
+ uint8_t md5f = 0, sizef = 1, found; |
|
613 |
+ uint32_t line = 0, i; |
|
614 |
+ struct cli_md5_node *new; |
|
615 |
+ struct cli_bm_patt *bm_new; |
|
616 |
+ struct cli_matcher *md5_sect = NULL; |
|
609 | 617 |
|
610 | 618 |
|
611 | 619 |
if((ret = cli_initengine(engine, options))) { |
... | ... |
@@ -638,7 +646,7 @@ static int cli_loadhdb(FILE *fd, struct cl_engine **engine, unsigned int *signo, |
638 | 638 |
} |
639 | 639 |
|
640 | 640 |
if(!(new->md5 = (unsigned char *) cli_hex2str(pt))) { |
641 |
- cli_errmsg("Malformed MD5 string at line %d\n", line); |
|
641 |
+ cli_errmsg("cli_loadhdb: Malformed MD5 string at line %u\n", line); |
|
642 | 642 |
free(pt); |
643 | 643 |
free(new); |
644 | 644 |
ret = CL_EMALFDB; |
... | ... |
@@ -662,32 +670,81 @@ static int cli_loadhdb(FILE *fd, struct cl_engine **engine, unsigned int *signo, |
662 | 662 |
break; |
663 | 663 |
} |
664 | 664 |
|
665 |
- new->viralias = cli_strtok(buffer, 3, ":"); /* aliases are optional */ |
|
666 |
- |
|
667 | 665 |
if(mode == 2) { /* section MD5 */ |
668 | 666 |
if(!(*engine)->md5_sect) { |
669 |
- (*engine)->md5_sect = new; |
|
670 |
- } else { |
|
671 |
- if(new->size <= (*engine)->md5_sect->size) { |
|
672 |
- new->next = (*engine)->md5_sect; |
|
673 |
- (*engine)->md5_sect = new; |
|
674 |
- } else { |
|
675 |
- mpt = (*engine)->md5_sect; |
|
676 |
- while(mpt) { |
|
677 |
- last = mpt; |
|
678 |
- if(!mpt->next || new->size <= mpt->next->size) |
|
679 |
- break; |
|
680 |
- mpt = mpt->next; |
|
681 |
- } |
|
682 |
- new->next = last->next; |
|
683 |
- last->next = new; |
|
667 |
+ (*engine)->md5_sect = (struct cli_matcher *) cli_calloc(sizeof(struct cli_matcher), 1); |
|
668 |
+ if(!(*engine)->md5_sect) { |
|
669 |
+ free(new->virname); |
|
670 |
+ free(new->md5); |
|
671 |
+ free(new); |
|
672 |
+ ret = CL_EMEM; |
|
673 |
+ break; |
|
684 | 674 |
} |
675 |
+ if((ret = cli_bm_init((*engine)->md5_sect))) { |
|
676 |
+ cli_errmsg("cli_loadhdb: Can't initialise BM pattern matcher\n"); |
|
677 |
+ free(new->virname); |
|
678 |
+ free(new->md5); |
|
679 |
+ free(new); |
|
680 |
+ break; |
|
681 |
+ } |
|
682 |
+ } |
|
683 |
+ md5_sect = (*engine)->md5_sect; |
|
684 |
+ |
|
685 |
+ bm_new = (struct cli_bm_patt *) cli_calloc(1, sizeof(struct cli_bm_patt)); |
|
686 |
+ if(!bm_new) { |
|
687 |
+ cli_errmsg("cli_loadhdb: Can't allocate memory for bm_new\n"); |
|
688 |
+ free(new->virname); |
|
689 |
+ free(new->md5); |
|
690 |
+ free(new); |
|
691 |
+ ret = CL_EMEM; |
|
692 |
+ break; |
|
685 | 693 |
} |
694 |
+ |
|
695 |
+ bm_new->pattern = new->md5; |
|
696 |
+ bm_new->length = 16; |
|
697 |
+ bm_new->virname = new->virname; |
|
698 |
+ |
|
699 |
+ found = 0; |
|
700 |
+ for(i = 0; i < md5_sect->soff_len; i++) { |
|
701 |
+ if(md5_sect->soff[i] == new->size) { |
|
702 |
+ found = 1; |
|
703 |
+ break; |
|
704 |
+ } |
|
705 |
+ } |
|
706 |
+ |
|
707 |
+ if(!found) { |
|
708 |
+ md5_sect->soff_len++; |
|
709 |
+ md5_sect->soff = (uint32_t *) cli_realloc(md5_sect->soff, md5_sect->soff_len * sizeof(uint32_t)); |
|
710 |
+ if(!md5_sect->soff) { |
|
711 |
+ cli_errmsg("cli_loadhdb: Can't realloc md5_sect->soff\n"); |
|
712 |
+ free(bm_new->pattern); |
|
713 |
+ free(bm_new->virname); |
|
714 |
+ free(bm_new); |
|
715 |
+ free(new); |
|
716 |
+ ret = CL_EMEM; |
|
717 |
+ break; |
|
718 |
+ } |
|
719 |
+ md5_sect->soff[md5_sect->soff_len - 1] = new->size; |
|
720 |
+ } |
|
721 |
+ |
|
722 |
+ free(new); |
|
723 |
+ |
|
724 |
+ if((ret = cli_bm_addpatt(md5_sect, bm_new))) { |
|
725 |
+ cli_errmsg("cli_loadhdb: Error adding BM pattern\n"); |
|
726 |
+ free(bm_new->pattern); |
|
727 |
+ free(bm_new->virname); |
|
728 |
+ free(bm_new); |
|
729 |
+ break; |
|
730 |
+ } |
|
731 |
+ |
|
686 | 732 |
} else { |
687 | 733 |
if(!(*engine)->md5_hlist) { |
688 |
- cli_dbgmsg("Initializing md5 list structure\n"); |
|
734 |
+ cli_dbgmsg("cli_loadhdb: Initializing MD5 list structure\n"); |
|
689 | 735 |
(*engine)->md5_hlist = (struct cli_md5_node **) cli_calloc(256, sizeof(struct cli_md5_node *)); |
690 | 736 |
if(!(*engine)->md5_hlist) { |
737 |
+ free(new->virname); |
|
738 |
+ free(new->md5); |
|
739 |
+ free(new); |
|
691 | 740 |
ret = CL_EMEM; |
692 | 741 |
break; |
693 | 742 |
} |
... | ... |
@@ -699,13 +756,13 @@ static int cli_loadhdb(FILE *fd, struct cl_engine **engine, unsigned int *signo, |
699 | 699 |
} |
700 | 700 |
|
701 | 701 |
if(!line) { |
702 |
- cli_errmsg("Empty database file\n"); |
|
702 |
+ cli_errmsg("cli_loadhdb: Empty database file\n"); |
|
703 | 703 |
cl_free(*engine); |
704 | 704 |
return CL_EMALFDB; |
705 | 705 |
} |
706 | 706 |
|
707 | 707 |
if(ret) { |
708 |
- cli_errmsg("Problem parsing database at line %d\n", line); |
|
708 |
+ cli_errmsg("cli_loadhdb: Problem parsing database at line %u\n", line); |
|
709 | 709 |
cl_free(*engine); |
710 | 710 |
return ret; |
711 | 711 |
} |
... | ... |
@@ -713,6 +770,9 @@ static int cli_loadhdb(FILE *fd, struct cl_engine **engine, unsigned int *signo, |
713 | 713 |
if(signo) |
714 | 714 |
*signo += line; |
715 | 715 |
|
716 |
+ if(md5_sect) |
|
717 |
+ qsort(md5_sect->soff, md5_sect->soff_len, sizeof(uint32_t), scomp); |
|
718 |
+ |
|
716 | 719 |
return CL_SUCCESS; |
717 | 720 |
} |
718 | 721 |
|
... | ... |
@@ -1450,7 +1510,6 @@ void cl_free(struct cl_engine *engine) |
1450 | 1450 |
free(root); |
1451 | 1451 |
} |
1452 | 1452 |
} |
1453 |
- |
|
1454 | 1453 |
free(engine->root); |
1455 | 1454 |
} |
1456 | 1455 |
|
... | ... |
@@ -1462,23 +1521,16 @@ void cl_free(struct cl_engine *engine) |
1462 | 1462 |
md5pt = md5pt->next; |
1463 | 1463 |
free(md5h->md5); |
1464 | 1464 |
free(md5h->virname); |
1465 |
- if(md5h->viralias) |
|
1466 |
- free(md5h->viralias); |
|
1467 | 1465 |
free(md5h); |
1468 | 1466 |
} |
1469 | 1467 |
} |
1470 | 1468 |
free(engine->md5_hlist); |
1471 | 1469 |
} |
1472 | 1470 |
|
1473 |
- md5pt = engine->md5_sect; |
|
1474 |
- while(md5pt) { |
|
1475 |
- md5h = md5pt; |
|
1476 |
- md5pt = md5pt->next; |
|
1477 |
- free(md5h->md5); |
|
1478 |
- free(md5h->virname); |
|
1479 |
- if(md5h->viralias) |
|
1480 |
- free(md5h->viralias); |
|
1481 |
- free(md5h); |
|
1471 |
+ if((root = engine->md5_sect)) { |
|
1472 |
+ cli_bm_free(root); |
|
1473 |
+ free(root->soff); |
|
1474 |
+ free(root); |
|
1482 | 1475 |
} |
1483 | 1476 |
|
1484 | 1477 |
metapt = engine->zip_mlist; |