Browse code

use B-M to handle .hdb and .fp databases and other minor changes

git-svn: trunk@3425

Tomasz Kojm authored on 2007/12/16 05:34:31
Showing 8 changed files
... ...
@@ -1,3 +1,9 @@
1
+Sat Dec 15 20:50:02 CET 2007 (tk)
2
+---------------------------------
3
+  * libclamav: - use B-M to handle .hdb and .fp databases
4
+	       - whitelisting now works for MD5 sigs
5
+	       - other minor cleanups
6
+
1 7
 Sat Dec 15 15:22:54 EET 2007 (edwin)
2 8
 -----------------------------------
3 9
   * libclamav/phishcheck.c: fix leaks introduced by r3417.
... ...
@@ -110,11 +110,14 @@ struct cl_engine {
110 110
     /* Roots table */
111 111
     void **root;
112 112
 
113
-    /* MD5 */
114
-    void **md5_hlist;
113
+    /* B-M matcher for standard MD5 sigs */
114
+    void *md5_hdb;
115 115
 
116 116
     /* B-M matcher for MD5 sigs for PE sections */
117
-    void *md5_sect;
117
+    void *md5_mdb;
118
+
119
+    /* B-M matcher for whitelist db */
120
+    void *md5_fp;
118 121
 
119 122
     /* Zip metadata */
120 123
     void *zip_mlist;
... ...
@@ -93,24 +93,6 @@ int cli_scanbuff(const unsigned char *buffer, uint32_t length, const char **virn
93 93
     return ret;
94 94
 }
95 95
 
96
-struct cli_md5_node *cli_vermd5(const unsigned char *md5, const struct cl_engine *engine)
97
-{
98
-	struct cli_md5_node *pt;
99
-
100
-
101
-    if(!(pt = engine->md5_hlist[md5[0] & 0xff]))
102
-	return NULL;
103
-
104
-    while(pt) {
105
-	if(!memcmp(pt->md5, md5, 16))
106
-	    return pt;
107
-
108
-	pt = pt->next;
109
-    }
110
-
111
-    return NULL;
112
-}
113
-
114 96
 off_t cli_caloff(const char *offstr, struct cli_target_info *info, int fd, cli_file_t ftype, int *ret, unsigned int *maxshift)
115 97
 {
116 98
 	int (*einfo)(int, struct cli_exe_info *) = NULL;
... ...
@@ -214,32 +196,21 @@ off_t cli_caloff(const char *offstr, struct cli_target_info *info, int fd, cli_f
214 214
 
215 215
 static int cli_checkfp(int fd, const struct cl_engine *engine)
216 216
 {
217
-	struct cli_md5_node *md5_node;
218 217
 	unsigned char *digest;
218
+	const char *virname;
219 219
 
220 220
 
221
-    if(engine->md5_hlist) {
222
-
221
+    if(engine->md5_fp) {
223 222
 	if(!(digest = cli_md5digest(fd))) {
224 223
 	    cli_errmsg("cli_checkfp(): Can't generate MD5 checksum\n");
225 224
 	    return 0;
226 225
 	}
227 226
 
228
-	if((md5_node = cli_vermd5(digest, engine)) && md5_node->fp) {
229
-		struct stat sb;
230
-
231
-	    if(fstat(fd, &sb))
232
-		return CL_EIO;
233
-
234
-	    if((unsigned int) sb.st_size != md5_node->size) {
235
-		cli_warnmsg("Detected false positive MD5 match. Please report.\n");
236
-	    } else {
237
-		cli_dbgmsg("Eliminated false positive match (fp sig: %s)\n", md5_node->virname);
238
-		free(digest);
239
-		return 1;
240
-	    }
227
+	if(cli_bm_scanbuff(digest, 16, &virname, engine->md5_fp, 0, 0, -1) == CL_VIRUS) {
228
+	    cli_dbgmsg("Eliminated false positive match (fp sig: %s)\n", virname);
229
+	    free(digest);
230
+	    return 1;
241 231
 	}
242
-
243 232
 	free(digest);
244 233
     }
245 234
 
... ...
@@ -283,7 +254,6 @@ int cli_scandesc(int desc, cli_ctx *ctx, uint8_t otfrec, cli_file_t ftype, uint8
283 283
 	struct cli_ac_data gdata, tdata;
284 284
 	cli_md5_ctx md5ctx;
285 285
 	unsigned char digest[16];
286
-	struct cli_md5_node *md5_node;
287 286
 	struct cli_matcher *groot = NULL, *troot = NULL;
288 287
 
289 288
 
... ...
@@ -331,7 +301,7 @@ int cli_scandesc(int desc, cli_ctx *ctx, uint8_t otfrec, cli_file_t ftype, uint8
331 331
 	    return ret;
332 332
     }
333 333
 
334
-    if(!ftonly && ctx->engine->md5_hlist)
334
+    if(!ftonly && ctx->engine->md5_hdb)
335 335
 	cli_md5_init(&md5ctx);
336 336
 
337 337
     buff = buffer;
... ...
@@ -388,7 +358,7 @@ int cli_scandesc(int desc, cli_ctx *ctx, uint8_t otfrec, cli_file_t ftype, uint8
388 388
 		    type = ret;
389 389
 	    }
390 390
 
391
-	    if(ctx->engine->md5_hlist)
391
+	    if(ctx->engine->md5_hdb)
392 392
 		cli_md5_update(&md5ctx, buff + shift, bytes);
393 393
 	}
394 394
 
... ...
@@ -415,24 +385,10 @@ int cli_scandesc(int desc, cli_ctx *ctx, uint8_t otfrec, cli_file_t ftype, uint8
415 415
     if(troot)
416 416
 	cli_ac_freedata(&tdata);
417 417
 
418
-    if(!ftonly && ctx->engine->md5_hlist) {
418
+    if(!ftonly && ctx->engine->md5_hdb) {
419 419
 	cli_md5_final(digest, &md5ctx);
420
-
421
-	if((md5_node = cli_vermd5(digest, ctx->engine)) && !md5_node->fp) {
422
-		struct stat sb;
423
-
424
-	    if(fstat(desc, &sb))
425
-		return CL_EIO;
426
-
427
-	    if((unsigned int) sb.st_size != md5_node->size) {
428
-		cli_warnmsg("Detected false positive MD5 match. Please report.\n");
429
-	    } else {
430
-		if(ctx->virname)
431
-		    *ctx->virname = md5_node->virname;
432
-
433
-		return CL_VIRUS;
434
-	    }
435
-	}
420
+	if(cli_bm_scanbuff(digest, 16, ctx->virname, ctx->engine->md5_hdb, 0, 0, -1) == CL_VIRUS && (cli_bm_scanbuff(digest, 16, NULL, ctx->engine->md5_fp, 0, 0, -1) != CL_VIRUS))
421
+	    return CL_VIRUS;
436 422
     }
437 423
 
438 424
     return otfrec ? type : CL_CLEAN;
... ...
@@ -54,14 +54,6 @@ struct cli_matcher {
54 54
     uint32_t ac_partsigs, ac_nodes, ac_patterns;
55 55
 };
56 56
 
57
-struct cli_md5_node {
58
-    char *virname;
59
-    unsigned char *md5;
60
-    unsigned int size;
61
-    unsigned short fp;
62
-    struct cli_md5_node *next;
63
-};
64
-
65 57
 struct cli_meta_node {
66 58
     int csize, size, method;
67 59
     unsigned int crc32, fileno, encrypted, maxdepth;
... ...
@@ -83,8 +75,6 @@ int cli_scandesc(int desc, cli_ctx *ctx, uint8_t otfrec, cli_file_t ftype, uint8
83 83
 
84 84
 int cli_validatesig(cli_file_t ftype, const char *offstr, off_t fileoff, struct cli_target_info *info, int desc, const char *virname);
85 85
 
86
-struct cli_md5_node *cli_vermd5(const unsigned char *md5, const struct cl_engine *engine);
87
-
88 86
 off_t cli_caloff(const char *offstr, struct cli_target_info *info, int fd, cli_file_t ftype, int *ret, unsigned int *maxshift);
89 87
 
90 88
 #endif
... ...
@@ -797,13 +797,13 @@ int cli_scanpe(int desc, cli_ctx *ctx)
797 797
 	    if(SCAN_ALGO && (DCONF & PE_CONF_POLIPOS) && !*sname && exe_sections[i].vsz > 40000 && exe_sections[i].vsz < 70000 && exe_sections[i].chr == 0xe0000060) polipos = i;
798 798
 
799 799
 	    /* check MD5 section sigs */
800
-	    md5_sect = ctx->engine->md5_sect;
800
+	    md5_sect = ctx->engine->md5_mdb;
801 801
 	    if((DCONF & PE_CONF_MD5SECT) && md5_sect) {
802 802
 		found = 0;
803 803
 		for(j = 0; j < md5_sect->soff_len && md5_sect->soff[j] <= exe_sections[i].rsz; j++) {
804 804
 		    if(md5_sect->soff[j] == exe_sections[i].rsz) {
805 805
 			unsigned char md5_dig[16];
806
-			if(cli_md5sect(desc, &exe_sections[i], md5_dig) && cli_bm_scanbuff(md5_dig, 16, ctx->virname, ctx->engine->md5_sect, 0, 0, -1) == CL_VIRUS) {
806
+			if(cli_md5sect(desc, &exe_sections[i], md5_dig) && cli_bm_scanbuff(md5_dig, 16, ctx->virname, ctx->engine->md5_mdb, 0, 0, -1) == CL_VIRUS) {
807 807
 				free(section_hdr);
808 808
 				free(exe_sections);
809 809
 				return CL_VIRUS;
... ...
@@ -696,15 +696,48 @@ static int scomp(const void *a, const void *b)
696 696
 #define MD5_HDB	    0
697 697
 #define MD5_MDB	    1
698 698
 #define MD5_FP	    2
699
-static int cli_loadmd5(FILE *fd, struct cl_engine **engine, unsigned int *signo, uint8_t mode, unsigned int options)
699
+
700
+static int cli_md5db_init(struct cl_engine **engine, unsigned int mode)
701
+{
702
+	struct cli_matcher *bm = NULL;
703
+	int ret;
704
+
705
+
706
+    if(mode == MD5_HDB) {
707
+	bm = (*engine)->md5_hdb = (struct cli_matcher *) cli_calloc(sizeof(struct cli_matcher), 1);
708
+    } else if(mode == MD5_MDB) {
709
+	bm = (*engine)->md5_mdb = (struct cli_matcher *) cli_calloc(sizeof(struct cli_matcher), 1);
710
+    } else {
711
+	bm = (*engine)->md5_fp = (struct cli_matcher *) cli_calloc(sizeof(struct cli_matcher), 1);
712
+    }
713
+
714
+    if(!bm)
715
+	return CL_EMEM;
716
+
717
+    if((ret = cli_bm_init(bm))) {
718
+	cli_errmsg("cli_md5db_init: Failed to initialize B-M\n");
719
+	return ret;
720
+    }
721
+
722
+    return CL_SUCCESS;
723
+}
724
+
725
+#define MD5_DB			    \
726
+    if(mode == MD5_HDB)		    \
727
+	db = (*engine)->md5_hdb;    \
728
+    else if(mode == MD5_MDB)	    \
729
+	db = (*engine)->md5_mdb;    \
730
+    else			    \
731
+	db = (*engine)->md5_fp;
732
+
733
+static int cli_loadmd5(FILE *fd, struct cl_engine **engine, unsigned int *signo, unsigned int mode, unsigned int options)
700 734
 {
701 735
 	char buffer[FILEBUFF], *pt;
702 736
 	int ret = CL_SUCCESS;
703
-	uint8_t size_field = 1, md5_field = 0, found;
704
-	uint32_t line = 0, i;
705
-	struct cli_md5_node *new;
706
-	struct cli_bm_patt *bm_new;
707
-	struct cli_matcher *md5_sect = NULL;
737
+	unsigned int size_field = 1, md5_field = 0, found, line = 0, i;
738
+	uint32_t size;
739
+	struct cli_bm_patt *new;
740
+	struct cli_matcher *db = NULL;
708 741
 
709 742
 
710 743
     if((ret = cli_initengine(engine, options))) {
... ...
@@ -721,22 +754,19 @@ static int cli_loadmd5(FILE *fd, struct cl_engine **engine, unsigned int *signo,
721 721
 	line++;
722 722
 	cli_chomp(buffer);
723 723
 
724
-	new = (struct cli_md5_node *) cli_calloc(1, sizeof(struct cli_md5_node));
724
+	new = (struct cli_bm_patt *) cli_calloc(1, sizeof(struct cli_bm_patt));
725 725
 	if(!new) {
726 726
 	    ret = CL_EMEM;
727 727
 	    break;
728 728
 	}
729 729
 
730
-	if(mode == MD5_FP) /* fp */
731
-	    new->fp = 1;
732
-
733 730
 	if(!(pt = cli_strtok(buffer, md5_field, ":"))) {
734 731
 	    free(new);
735 732
 	    ret = CL_EMALFDB;
736 733
 	    break;
737 734
 	}
738 735
 
739
-	if(!(new->md5 = (unsigned char *) cli_hex2str(pt))) {
736
+	if(strlen(pt) != 32 || !(new->pattern = (unsigned char *) cli_hex2str(pt))) {
740 737
 	    cli_errmsg("cli_loadmd5: Malformed MD5 string at line %u\n", line);
741 738
 	    free(pt);
742 739
 	    free(new);
... ...
@@ -744,105 +774,60 @@ static int cli_loadmd5(FILE *fd, struct cl_engine **engine, unsigned int *signo,
744 744
 	    break;
745 745
 	}
746 746
 	free(pt);
747
+	new->length = 16;
747 748
 
748 749
 	if(!(pt = cli_strtok(buffer, size_field, ":"))) {
749
-	    free(new->md5);
750
+	    free(new->pattern);
750 751
 	    free(new);
751 752
 	    ret = CL_EMALFDB;
752 753
 	    break;
753 754
 	}
754
-	new->size = atoi(pt);
755
+	size = atoi(pt);
755 756
 	free(pt);
756 757
 
757 758
 	if(!(new->virname = cli_strtok(buffer, 2, ":"))) {
758
-	    free(new->md5);
759
+	    free(new->pattern);
759 760
 	    free(new);
760 761
 	    ret = CL_EMALFDB;
761 762
 	    break;
762 763
 	}
763 764
 
764
-	if(mode == MD5_MDB) { /* section MD5 */
765
-	    if(!(*engine)->md5_sect) {
766
-		(*engine)->md5_sect = (struct cli_matcher *) cli_calloc(sizeof(struct cli_matcher), 1);
767
-		if(!(*engine)->md5_sect) {
768
-		    free(new->virname);
769
-		    free(new->md5);
770
-		    free(new);
771
-		    ret = CL_EMEM;
772
-		    break;
773
-		}
774
-		if((ret = cli_bm_init((*engine)->md5_sect))) {
775
-		    cli_errmsg("cli_loadmd5: Can't initialise BM pattern matcher\n");
776
-		    free(new->virname);
777
-		    free(new->md5);
778
-		    free(new);
779
-		    break;
780
-		}
781
-	    }
782
-	    md5_sect = (*engine)->md5_sect;
783
-
784
-	    bm_new = (struct cli_bm_patt *) cli_calloc(1, sizeof(struct cli_bm_patt));
785
-	    if(!bm_new) {
786
-		cli_errmsg("cli_loadmd5: Can't allocate memory for bm_new\n");
787
-		free(new->virname);
788
-		free(new->md5);
789
-		free(new);
790
-		ret = CL_EMEM;
791
-		break;
792
-	    }
765
+	MD5_DB;
766
+	if(!db && (ret = cli_md5db_init(engine, mode))) {
767
+	    free(new->pattern);
768
+	    free(new->virname);
769
+	    free(new);
770
+	    break;
771
+	} else {
772
+	    MD5_DB;
773
+	}
793 774
 
794
-	    bm_new->pattern = new->md5;
795
-	    bm_new->length = 16;
796
-	    bm_new->virname = new->virname;
775
+	if((ret = cli_bm_addpatt(db, new))) {
776
+	    cli_errmsg("cli_loadmd5: Error adding BM pattern\n");
777
+	    free(new->pattern);
778
+	    free(new->virname);
779
+	    free(new);
780
+	    break;
781
+	}
797 782
 
783
+	if(mode == MD5_MDB) { /* section MD5 */
798 784
 	    found = 0;
799
-	    for(i = 0; i < md5_sect->soff_len; i++) {
800
-		if(md5_sect->soff[i] == new->size) {
785
+	    for(i = 0; i < db->soff_len; i++) {
786
+		if(db->soff[i] == size) {
801 787
 		    found = 1;
802 788
 		    break;
803 789
 		}
804 790
 	    }
805
-
806 791
 	    if(!found) {
807
-		md5_sect->soff_len++;
808
-		md5_sect->soff = (uint32_t *) cli_realloc2(md5_sect->soff, md5_sect->soff_len * sizeof(uint32_t));
809
-		if(!md5_sect->soff) {
810
-		    cli_errmsg("cli_loadmd5: Can't realloc md5_sect->soff\n");
811
-		    free(bm_new->pattern);
812
-		    free(bm_new->virname);
813
-		    free(bm_new);
814
-		    free(new);
815
-		    ret = CL_EMEM;
816
-		    break;
817
-		}
818
-		md5_sect->soff[md5_sect->soff_len - 1] = new->size;
819
-	    }
820
-
821
-	    free(new);
822
-
823
-	    if((ret = cli_bm_addpatt(md5_sect, bm_new))) {
824
-		cli_errmsg("cli_loadmd5: Error adding BM pattern\n");
825
-		free(bm_new->pattern);
826
-		free(bm_new->virname);
827
-		free(bm_new);
828
-		break;
829
-	    }
830
-
831
-	} else {
832
-	    if(!(*engine)->md5_hlist) {
833
-		cli_dbgmsg("cli_loadmd5: Initializing MD5 list structure\n");
834
-		(*engine)->md5_hlist = cli_calloc(256, sizeof(struct cli_md5_node *));
835
-		if(!(*engine)->md5_hlist) {
836
-		    free(new->virname);
837
-		    free(new->md5);
838
-		    free(new);
792
+		db->soff_len++;
793
+		db->soff = (uint32_t *) cli_realloc2(db->soff, db->soff_len * sizeof(uint32_t));
794
+		if(!db->soff) {
795
+		    cli_errmsg("cli_loadmd5: Can't realloc db->soff\n");
839 796
 		    ret = CL_EMEM;
840 797
 		    break;
841 798
 		}
799
+		db->soff[db->soff_len - 1] = size;
842 800
 	    }
843
-
844
-	    new->next = (*engine)->md5_hlist[new->md5[0] & 0xff];
845
-	    (*engine)->md5_hlist[new->md5[0] & 0xff] = new;
846 801
 	}
847 802
     }
848 803
 
... ...
@@ -861,8 +846,8 @@ static int cli_loadmd5(FILE *fd, struct cl_engine **engine, unsigned int *signo,
861 861
     if(signo)
862 862
 	*signo += line;
863 863
 
864
-    if(md5_sect)
865
-	qsort(md5_sect->soff, md5_sect->soff_len, sizeof(uint32_t), scomp);
864
+    if(db && mode == MD5_MDB)
865
+	qsort(db->soff, db->soff_len, sizeof(uint32_t), scomp);
866 866
 
867 867
     return CL_SUCCESS;
868 868
 }
... ...
@@ -1523,7 +1508,6 @@ int cl_statfree(struct cl_stat *dbstat)
1523 1523
 void cl_free(struct cl_engine *engine)
1524 1524
 {
1525 1525
 	int i;
1526
-	struct cli_md5_node *md5pt, *md5h;
1527 1526
 	struct cli_meta_node *metapt, *metah;
1528 1527
 	struct cli_matcher *root;
1529 1528
 
... ...
@@ -1561,26 +1545,22 @@ void cl_free(struct cl_engine *engine)
1561 1561
 	free(engine->root);
1562 1562
     }
1563 1563
 
1564
-    if(engine->md5_hlist) {
1565
-	for(i = 0; i < 256; i++) {
1566
-	    md5pt = engine->md5_hlist[i];
1567
-	    while(md5pt) {
1568
-		md5h = md5pt;
1569
-		md5pt = md5pt->next;
1570
-		free(md5h->md5);
1571
-		free(md5h->virname);
1572
-		free(md5h);
1573
-	    }
1574
-	}
1575
-	free(engine->md5_hlist);
1564
+    if((root = engine->md5_hdb)) {
1565
+	cli_bm_free(root);
1566
+	free(root);
1576 1567
     }
1577 1568
 
1578
-    if((root = engine->md5_sect)) {
1569
+    if((root = engine->md5_mdb)) {
1579 1570
 	cli_bm_free(root);
1580 1571
 	free(root->soff);
1581 1572
 	free(root);
1582 1573
     }
1583 1574
 
1575
+    if((root = engine->md5_fp)) {
1576
+	cli_bm_free(root);
1577
+	free(root);
1578
+    }
1579
+
1584 1580
     metapt = engine->zip_mlist;
1585 1581
     while(metapt) {
1586 1582
 	metah = metapt;
... ...
@@ -174,14 +174,6 @@ static int cli_unrar_scanmetadata(int desc, unrar_metadata_t *metadata, cli_ctx
174 174
 	}
175 175
     }
176 176
 
177
-/*
178
-    TROG - TODO: multi-volume files
179
-    if((rarlist->item.Flags & 0x03) != 0) {
180
-	cli_dbgmsg("RAR: Skipping %s (split)\n", rarlist->item.Name);
181
-	rarlist = rarlist->next;
182
-	continue;
183
-    }
184
-*/
185 177
     return ret;
186 178
 }
187 179
 
... ...
@@ -231,6 +223,10 @@ static int cli_scanrar(int desc, cli_ctx *ctx, off_t sfx_offset, uint32_t *sfx_c
231 231
 
232 232
     cli_dbgmsg("in scanrar()\n");
233 233
 
234
+    if(sfx_offset)
235
+	if(lseek(desc, sfx_offset, SEEK_SET) == -1)
236
+	    return CL_EIO;
237
+
234 238
     /* generate the temporary directory */
235 239
     dir = cli_gentemp(NULL);
236 240
     if(mkdir(dir, 0700)) {
... ...
@@ -239,9 +235,6 @@ static int cli_scanrar(int desc, cli_ctx *ctx, off_t sfx_offset, uint32_t *sfx_c
239 239
 	return CL_ETMPDIR;
240 240
     }
241 241
 
242
-    if(sfx_offset)
243
-	lseek(desc, sfx_offset, SEEK_SET);
244
-
245 242
     if((ret = unrar_open(desc, dir, &rar_state)) != UNRAR_OK) {
246 243
 	if(!cli_leavetemps_flag)
247 244
 	    cli_rmdirs(dir);
... ...
@@ -893,10 +886,7 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx)
893 893
 		}
894 894
 		free(fullname);
895 895
 		cli_dbgmsg("VBADir: Decompress WM project '%s' macro:%d key:%d length:%d\n", vba_project->name[i], i, vba_project->key[i], vba_project->length[i]);
896
-		if(vba_project->length[i])
897
-		    data = (unsigned char *) wm_decrypt_macro(fd, vba_project->offset[i], vba_project->length[i], vba_project->key[i]);
898
-		else
899
-		    data = NULL;
896
+		data = (unsigned char *) wm_decrypt_macro(fd, vba_project->offset[i], vba_project->length[i], vba_project->key[i]);
900 897
 		close(fd);
901 898
 		
902 899
 		if(!data) {
... ...
@@ -1199,6 +1199,9 @@ wm_decrypt_macro(int fd, off_t offset, uint32_t len, unsigned char key)
1199 1199
 {
1200 1200
 	unsigned char *buff;
1201 1201
 
1202
+	if(!len)
1203
+		return NULL;
1204
+
1202 1205
 	buff = (unsigned char *)cli_malloc(len);
1203 1206
 	if(buff == NULL)
1204 1207
 		return NULL;