Browse code

use BM matcher to handle .mdb sigs

git-svn: trunk@3040

Tomasz Kojm authored on 2007/04/30 23:12:38
Showing 6 changed files
... ...
@@ -1,3 +1,7 @@
1
+Mon Apr 30 15:24:28 CEST 2007 (tk)
2
+----------------------------------
3
+  * libclamav: use BM matcher to handle .mdb sigs
4
+
1 5
 Sat Apr 28 22:26:00 EEST 2007 (edwin)
2 6
 ----------------------------------
3 7
   * libclamav/regex_list.c: update code to use new AC matcher
... ...
@@ -109,7 +109,7 @@ extern "C"
109 109
 
110 110
 /* internal structures */
111 111
 struct cli_md5_node {
112
-    char *virname, *viralias;
112
+    char *virname;
113 113
     unsigned char *md5;
114 114
     unsigned int size;
115 115
     unsigned short fp;
... ...
@@ -135,8 +135,8 @@ struct cl_engine {
135 135
     /* MD5 */
136 136
     struct cli_md5_node **md5_hlist;
137 137
 
138
-    /* MD5 list for PE sections */
139
-    struct cli_md5_node *md5_sect;
138
+    /* B-M matcher for MD5 sigs for PE sections */
139
+    void *md5_sect;
140 140
 
141 141
     /* Zip metadata */
142 142
     struct cli_meta_node *zip_mlist;
... ...
@@ -27,7 +27,6 @@ struct cli_bm_patt {
27 27
     unsigned char *pattern;
28 28
     uint32_t length;
29 29
     char *virname, *offset;
30
-    const char *viralias;
31 30
     uint8_t target;
32 31
     struct cli_bm_patt *next;
33 32
 };
... ...
@@ -44,6 +44,7 @@ struct cli_matcher {
44 44
     /* Extended Boyer-Moore */
45 45
     int32_t *bm_shift;
46 46
     struct cli_bm_patt **bm_suffix;
47
+    uint32_t *soff, soff_len; /* for PE section sigs */
47 48
 
48 49
     /* Extended Aho-Corasick */
49 50
     uint8_t ac_mindepth, ac_maxdepth;
... ...
@@ -51,6 +51,8 @@
51 51
 #include "md5.h"
52 52
 #include "mew.h"
53 53
 #include "upack.h"
54
+#include "matcher.h"
55
+#include "matcher-bm.h"
54 56
 
55 57
 #ifndef	O_BINARY
56 58
 #define	O_BINARY	0
... ...
@@ -243,7 +245,6 @@ int cli_scanpe(int desc, cli_ctx *ctx)
243 243
 	    struct pe_image_optional_hdr32 opt32;
244 244
 	} pe_opt;
245 245
 	struct pe_image_section_hdr *section_hdr;
246
-	struct cli_md5_node *md5_sect;
247 246
 	struct stat sb;
248 247
 	char sname[9], buff[4096], *tempfile;
249 248
 	unsigned char *ubuff;
... ...
@@ -254,8 +255,9 @@ int cli_scanpe(int desc, cli_ctx *ctx)
254 254
 	char *src = NULL, *dest = NULL;
255 255
 	int ndesc, ret = CL_CLEAN, upack = 0, native=0;
256 256
 	size_t fsize;
257
-	uint32_t valign, falign, hdr_size;
257
+	uint32_t valign, falign, hdr_size, j;
258 258
 	struct cli_exe_section *exe_sections;
259
+	struct cli_matcher *md5_sect;
259 260
 
260 261
 
261 262
     if(cli_readn(desc, &e_magic, sizeof(e_magic)) != sizeof(e_magic)) {
... ...
@@ -710,29 +712,27 @@ int cli_scanpe(int desc, cli_ctx *ctx)
710 710
 		}
711 711
 		return CL_CLEAN; /* no ninjas to see here! move along! */
712 712
 	    }
713
-	    
714
-	    /* check MD5 section sigs */
715
-	    if(DCONF & PE_CONF_MD5SECT)
716
-		md5_sect = ctx->engine->md5_sect;
717
-	    else
718
-		md5_sect = NULL;
719 713
 
720
-	    while(md5_sect && md5_sect->size < exe_sections[i].rsz)
721
-	        md5_sect = md5_sect->next;
714
+	    /* check MD5 section sigs */
715
+	    md5_sect = ctx->engine->md5_sect;
716
+	    if((DCONF & PE_CONF_MD5SECT) && md5_sect) {
717
+		found = 0;
718
+		for(j = 0; j < md5_sect->soff_len && md5_sect->soff[j] <= exe_sections[i].rsz; j++) {
719
+		    if(md5_sect->soff[j] == exe_sections[i].rsz) {
720
+			found = 1;
721
+			break;
722
+		    }
723
+		}
722 724
 
723
-	    if(md5_sect && md5_sect->size == exe_sections[i].rsz) {
724
-	        if(!cli_md5sect(desc, exe_sections[i].raw, exe_sections[i].rsz, md5_dig)) {
725
-		    cli_errmsg("PE: Can't calculate MD5 for section %d\n", i);
726
-		} else {
727
-		    while(md5_sect && md5_sect->size == exe_sections[i].rsz) {
728
-		        if(!memcmp(md5_dig, md5_sect->md5, 16)) {
729
-			    if(ctx->virname)
730
-			        *ctx->virname = md5_sect->virname;
725
+		if(found) {
726
+		    if(!cli_md5sect(desc, exe_sections[i].raw, exe_sections[i].rsz, md5_dig)) {
727
+			cli_errmsg("PE: Can't calculate MD5 for section %u\n", i);
728
+		    } else {
729
+			if(cli_bm_scanbuff(md5_dig, 16, ctx->virname, ctx->engine->md5_sect, 0, 0, -1) == CL_VIRUS) {
731 730
 			    free(section_hdr);
732 731
 			    free(exe_sections);
733 732
 			    return CL_VIRUS;
734 733
 			}
735
-			md5_sect = md5_sect->next;
736 734
 		    }
737 735
 		}
738 736
 	    }
... ...
@@ -329,26 +329,26 @@ static int cli_initroots(struct cl_engine *engine, unsigned int options)
329 329
 	    cli_dbgmsg("Initializing engine->root[%d]\n", i);
330 330
 	    root = engine->root[i] = (struct cli_matcher *) cli_calloc(1, sizeof(struct cli_matcher));
331 331
 	    if(!root) {
332
-		cli_errmsg("Can't initialise AC pattern matcher\n");
332
+		cli_errmsg("cli_initroots: Can't allocate memory for cli_matcher\n");
333 333
 		return CL_EMEM;
334 334
 	    }
335 335
 
336 336
 	    if(options & CL_DB_ACONLY) {
337
-		cli_dbgmsg("Only using AC pattern matcher.\n");
337
+		cli_dbgmsg("cli_initroots: Only using AC pattern matcher.\n");
338 338
 		root->ac_only = 1;
339 339
 	    }
340 340
 
341 341
 	    cli_dbgmsg("Initialising AC pattern matcher of root[%d]\n", i);
342 342
 	    if((ret = cli_ac_init(root, AC_DEFAULT_MIN_DEPTH, AC_DEFAULT_MAX_DEPTH))) {
343 343
 		/* no need to free previously allocated memory here */
344
-		cli_errmsg("Can't initialise AC pattern matcher\n");
344
+		cli_errmsg("cli_initroots: Can't initialise AC pattern matcher\n");
345 345
 		return ret;
346 346
 	    }
347 347
 
348 348
 	    if(!root->ac_only) {
349
-		cli_dbgmsg("Initializing BM tables of root[%d]\n", i);
349
+		cli_dbgmsg("cli_initroots: Initializing BM tables of root[%d]\n", i);
350 350
 		if((ret = cli_bm_init(root))) {
351
-		    cli_errmsg("Can't initialise BM pattern matcher\n");
351
+		    cli_errmsg("cli_initroots: Can't initialise BM pattern matcher\n");
352 352
 		    return ret;
353 353
 		}
354 354
 	    }
... ...
@@ -600,12 +600,20 @@ static int cli_loadndb(FILE *fd, struct cl_engine **engine, unsigned int *signo,
600 600
     return CL_SUCCESS;
601 601
 }
602 602
 
603
+static int scomp(const void *a, const void *b)
604
+{
605
+    return *(const uint32_t *)a - *(const uint32_t *)b;
606
+}
607
+
603 608
 static int cli_loadhdb(FILE *fd, struct cl_engine **engine, unsigned int *signo, unsigned short mode, unsigned int options)
604 609
 {
605 610
 	char buffer[FILEBUFF], *pt;
606
-	int line = 0, ret = 0;
607
-	unsigned int md5f = 0, sizef = 1;
608
-	struct cli_md5_node *new, *mpt, *last;
611
+	int ret = CL_SUCCESS;
612
+	uint8_t md5f = 0, sizef = 1, found;
613
+	uint32_t line = 0, i;
614
+	struct cli_md5_node *new;
615
+	struct cli_bm_patt *bm_new;
616
+	struct cli_matcher *md5_sect = NULL;
609 617
 
610 618
 
611 619
     if((ret = cli_initengine(engine, options))) {
... ...
@@ -638,7 +646,7 @@ static int cli_loadhdb(FILE *fd, struct cl_engine **engine, unsigned int *signo,
638 638
 	}
639 639
 
640 640
 	if(!(new->md5 = (unsigned char *) cli_hex2str(pt))) {
641
-	    cli_errmsg("Malformed MD5 string at line %d\n", line);
641
+	    cli_errmsg("cli_loadhdb: Malformed MD5 string at line %u\n", line);
642 642
 	    free(pt);
643 643
 	    free(new);
644 644
 	    ret = CL_EMALFDB;
... ...
@@ -662,32 +670,81 @@ static int cli_loadhdb(FILE *fd, struct cl_engine **engine, unsigned int *signo,
662 662
 	    break;
663 663
 	}
664 664
 
665
-	new->viralias = cli_strtok(buffer, 3, ":"); /* aliases are optional */
666
-
667 665
 	if(mode == 2) { /* section MD5 */
668 666
 	    if(!(*engine)->md5_sect) {
669
-		(*engine)->md5_sect = new;
670
-	    } else {
671
-		if(new->size <= (*engine)->md5_sect->size) {
672
-		    new->next = (*engine)->md5_sect;
673
-		    (*engine)->md5_sect = new;
674
-		} else {
675
-		    mpt = (*engine)->md5_sect;
676
-		    while(mpt) {
677
-			last = mpt;
678
-			if(!mpt->next || new->size <= mpt->next->size)
679
-			    break;
680
-			mpt = mpt->next;
681
-		    }
682
-		    new->next = last->next;
683
-		    last->next = new;
667
+		(*engine)->md5_sect = (struct cli_matcher *) cli_calloc(sizeof(struct cli_matcher), 1);
668
+		if(!(*engine)->md5_sect) {
669
+		    free(new->virname);
670
+		    free(new->md5);
671
+		    free(new);
672
+		    ret = CL_EMEM;
673
+		    break;
684 674
 		}
675
+		if((ret = cli_bm_init((*engine)->md5_sect))) {
676
+		    cli_errmsg("cli_loadhdb: Can't initialise BM pattern matcher\n");
677
+		    free(new->virname);
678
+		    free(new->md5);
679
+		    free(new);
680
+		    break;
681
+		}
682
+	    }
683
+	    md5_sect = (*engine)->md5_sect;
684
+
685
+	    bm_new = (struct cli_bm_patt *) cli_calloc(1, sizeof(struct cli_bm_patt));
686
+	    if(!bm_new) {
687
+		cli_errmsg("cli_loadhdb: Can't allocate memory for bm_new\n");
688
+		free(new->virname);
689
+		free(new->md5);
690
+		free(new);
691
+		ret = CL_EMEM;
692
+		break;
685 693
 	    }
694
+
695
+	    bm_new->pattern = new->md5;
696
+	    bm_new->length = 16;
697
+	    bm_new->virname = new->virname;
698
+
699
+	    found = 0;
700
+	    for(i = 0; i < md5_sect->soff_len; i++) {
701
+		if(md5_sect->soff[i] == new->size) {
702
+		    found = 1;
703
+		    break;
704
+		}
705
+	    }
706
+
707
+	    if(!found) {
708
+		md5_sect->soff_len++;
709
+		md5_sect->soff = (uint32_t *) cli_realloc(md5_sect->soff, md5_sect->soff_len * sizeof(uint32_t));
710
+		if(!md5_sect->soff) {
711
+		    cli_errmsg("cli_loadhdb: Can't realloc md5_sect->soff\n");
712
+		    free(bm_new->pattern);
713
+		    free(bm_new->virname);
714
+		    free(bm_new);
715
+		    free(new);
716
+		    ret = CL_EMEM;
717
+		    break;
718
+		}
719
+		md5_sect->soff[md5_sect->soff_len - 1] = new->size;
720
+	    }
721
+
722
+	    free(new);
723
+
724
+	    if((ret = cli_bm_addpatt(md5_sect, bm_new))) {
725
+		cli_errmsg("cli_loadhdb: Error adding BM pattern\n");
726
+		free(bm_new->pattern);
727
+		free(bm_new->virname);
728
+		free(bm_new);
729
+		break;
730
+	    }
731
+
686 732
 	} else {
687 733
 	    if(!(*engine)->md5_hlist) {
688
-		cli_dbgmsg("Initializing md5 list structure\n");
734
+		cli_dbgmsg("cli_loadhdb: Initializing MD5 list structure\n");
689 735
 		(*engine)->md5_hlist = (struct cli_md5_node **) cli_calloc(256, sizeof(struct cli_md5_node *));
690 736
 		if(!(*engine)->md5_hlist) {
737
+		    free(new->virname);
738
+		    free(new->md5);
739
+		    free(new);
691 740
 		    ret = CL_EMEM;
692 741
 		    break;
693 742
 		}
... ...
@@ -699,13 +756,13 @@ static int cli_loadhdb(FILE *fd, struct cl_engine **engine, unsigned int *signo,
699 699
     }
700 700
 
701 701
     if(!line) {
702
-	cli_errmsg("Empty database file\n");
702
+	cli_errmsg("cli_loadhdb: Empty database file\n");
703 703
 	cl_free(*engine);
704 704
 	return CL_EMALFDB;
705 705
     }
706 706
 
707 707
     if(ret) {
708
-	cli_errmsg("Problem parsing database at line %d\n", line);
708
+	cli_errmsg("cli_loadhdb: Problem parsing database at line %u\n", line);
709 709
 	cl_free(*engine);
710 710
 	return ret;
711 711
     }
... ...
@@ -713,6 +770,9 @@ static int cli_loadhdb(FILE *fd, struct cl_engine **engine, unsigned int *signo,
713 713
     if(signo)
714 714
 	*signo += line;
715 715
 
716
+    if(md5_sect)
717
+	qsort(md5_sect->soff, md5_sect->soff_len, sizeof(uint32_t), scomp);
718
+
716 719
     return CL_SUCCESS;
717 720
 }
718 721
 
... ...
@@ -1450,7 +1510,6 @@ void cl_free(struct cl_engine *engine)
1450 1450
 		free(root);
1451 1451
 	    }
1452 1452
 	}
1453
-
1454 1453
 	free(engine->root);
1455 1454
     }
1456 1455
 
... ...
@@ -1462,23 +1521,16 @@ void cl_free(struct cl_engine *engine)
1462 1462
 		md5pt = md5pt->next;
1463 1463
 		free(md5h->md5);
1464 1464
 		free(md5h->virname);
1465
-		if(md5h->viralias)
1466
-		    free(md5h->viralias);
1467 1465
 		free(md5h);
1468 1466
 	    }
1469 1467
 	}
1470 1468
 	free(engine->md5_hlist);
1471 1469
     }
1472 1470
 
1473
-    md5pt = engine->md5_sect;
1474
-    while(md5pt) {
1475
-	md5h = md5pt;
1476
-	md5pt = md5pt->next;
1477
-	free(md5h->md5);
1478
-	free(md5h->virname);
1479
-	if(md5h->viralias)
1480
-	    free(md5h->viralias);
1481
-	free(md5h);
1471
+    if((root = engine->md5_sect)) {
1472
+	cli_bm_free(root);
1473
+	free(root->soff);
1474
+	free(root);
1482 1475
     }
1483 1476
 
1484 1477
     metapt = engine->zip_mlist;