Browse code

improve scanning of files whose types are detected on-the-fly

git-svn: trunk@2601

Tomasz Kojm authored on 2007/01/09 05:05:07
Showing 8 changed files
... ...
@@ -1,3 +1,8 @@
1
+Mon Jan  8 20:43:14 CET 2007 (tk)
2
+---------------------------------
3
+  * libclamav: improve scanning of files whose types are detected on-the-fly
4
+	       (closes bb#33)
5
+
1 6
 Sun Jan  7 21:31:06 GMT 2007 (njh)
2 7
 ----------------------------------
3 8
   * libclamav:		Use HAVE_STDBOOL_H
... ...
@@ -404,11 +404,11 @@ void cli_ac_freedata(struct cli_ac_data *data)
404 404
     }
405 405
 }
406 406
 
407
-int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, unsigned short otfrec, unsigned long int offset, unsigned short ftype, int fd, struct cli_matched_type **ftoffset)
407
+int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, unsigned short otfrec, unsigned long int offset, cli_file_t ftype, int fd, struct cli_matched_type **ftoffset)
408 408
 {
409 409
 	struct cli_ac_node *current;
410 410
 	struct cli_ac_patt *pt;
411
-	int type = CL_CLEAN, t, j;
411
+	int type = CL_CLEAN, j;
412 412
         unsigned int i, position, curroff;
413 413
 	uint8_t offnum, found;
414 414
 	struct cli_matched_type *tnode;
... ...
@@ -438,12 +438,7 @@ int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char
438 438
 		    curroff = offset + position - pt->prefix_length;
439 439
 
440 440
 		    if((pt->offset || pt->target) && (!pt->sigid || pt->partno == 1)) {
441
-			if(ftype == CL_TYPE_UNKNOWN_TEXT)
442
-			    t = type;
443
-			else
444
-			    t = ftype;
445
-
446
-			if((fd == -1 && !t) || !cli_validatesig(t, pt->offset, curroff, &info, fd, pt->virname)) {
441
+			if((fd == -1 && !ftype) || !cli_validatesig(ftype, pt->offset, curroff, &info, fd, pt->virname)) {
447 442
 			    pt = pt->next;
448 443
 			    continue;
449 444
 			}
... ...
@@ -40,7 +40,7 @@ struct cli_ac_data {
40 40
 int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern);
41 41
 int cli_ac_initdata(struct cli_ac_data *data, unsigned int partsigs, unsigned int histlen);
42 42
 void cli_ac_freedata(struct cli_ac_data *data);
43
-int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, unsigned short otfrec, unsigned long int offset, unsigned short ftype, int fd, struct cli_matched_type **ftoffset);
43
+int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, unsigned short otfrec, unsigned long int offset, cli_file_t ftype, int fd, struct cli_matched_type **ftoffset);
44 44
 int cli_ac_buildtrie(struct cli_matcher *root);
45 45
 void cli_ac_free(struct cli_matcher *root);
46 46
 void cli_ac_setdepth(unsigned int depth);
... ...
@@ -23,6 +23,7 @@
23 23
 #include "cltypes.h"
24 24
 #include "matcher.h"
25 25
 #include "matcher-bm.h"
26
+#include "filetypes.h"
26 27
 
27 28
 /* TODO: Check prefix regularity and automatically transfer some signatures
28 29
  *	 to AC
... ...
@@ -128,7 +129,7 @@ void cli_bm_free(struct cli_matcher *root)
128 128
     }
129 129
 }
130 130
 
131
-int cli_bm_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, unsigned long int offset, unsigned short ftype, int fd)
131
+int cli_bm_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, unsigned long int offset, cli_file_t ftype, int fd)
132 132
 {
133 133
 	unsigned int i, j, shift, off, found = 0;
134 134
 	int idxtest;
... ...
@@ -23,10 +23,11 @@
23 23
 #include "clamav.h"
24 24
 #include "matcher.h"
25 25
 #include "matcher-bm.h"
26
+#include "filetypes.h"
26 27
 
27 28
 int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern);
28 29
 int cli_bm_init(struct cli_matcher *root);
29
-int cli_bm_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, unsigned long int offset, unsigned short ftype, int fd);
30
+int cli_bm_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, unsigned long int offset, cli_file_t ftype, int fd);
30 31
 void cli_bm_free(struct cli_matcher *root);
31 32
 
32 33
 #endif
... ...
@@ -46,11 +46,11 @@
46 46
 #include "matcher-ncore.h"
47 47
 #endif
48 48
 
49
-static unsigned int targettab[CL_TARGET_TABLE_SIZE] = { 0, CL_TYPE_MSEXE, CL_TYPE_MSOLE2, CL_TYPE_HTML, CL_TYPE_MAIL, CL_TYPE_GRAPHICS, CL_TYPE_ELF };
49
+static cli_file_t targettab[CL_TARGET_TABLE_SIZE] = { 0, CL_TYPE_MSEXE, CL_TYPE_MSOLE2, CL_TYPE_HTML, CL_TYPE_MAIL, CL_TYPE_GRAPHICS, CL_TYPE_ELF };
50 50
 
51 51
 extern short cli_debug_flag;
52 52
 
53
-int cli_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cl_engine *engine, unsigned short ftype)
53
+int cli_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cl_engine *engine, cli_file_t ftype)
54 54
 {
55 55
 	int ret = CL_CLEAN, i;
56 56
 	struct cli_ac_data mdata;
... ...
@@ -121,7 +121,7 @@ struct cli_md5_node *cli_vermd5(const unsigned char *md5, const struct cl_engine
121 121
     return NULL;
122 122
 }
123 123
 
124
-off_t cli_caloff(const char *offstr, struct cli_target_info *info, int fd, unsigned short ftype, int *ret)
124
+off_t cli_caloff(const char *offstr, struct cli_target_info *info, int fd, cli_file_t ftype, int *ret)
125 125
 {
126 126
 	int (*einfo)(int, struct cli_exe_info *) = NULL;
127 127
 	unsigned int n;
... ...
@@ -252,7 +252,7 @@ static int cli_checkfp(int fd, const struct cl_engine *engine)
252 252
     return 0;
253 253
 }
254 254
 
255
-int cli_validatesig(unsigned short ftype, const char *offstr, off_t fileoff, struct cli_target_info *info, int desc, const char *virname)
255
+int cli_validatesig(cli_file_t ftype, const char *offstr, off_t fileoff, struct cli_target_info *info, int desc, const char *virname)
256 256
 {
257 257
 	off_t offset;
258 258
 	int ret;
... ...
@@ -275,7 +275,7 @@ int cli_validatesig(unsigned short ftype, const char *offstr, off_t fileoff, str
275 275
     return 1;
276 276
 }
277 277
 
278
-int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short ftype, struct cli_matched_type **ftoffset)
278
+int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, cli_file_t ftype, unsigned short ftonly, struct cli_matched_type **ftoffset)
279 279
 {
280 280
  	unsigned char *buffer, *buff, *endbl, *upt;
281 281
 	int ret = CL_CLEAN, type = CL_CLEAN, i, bytes;
... ...
@@ -285,7 +285,7 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
285 285
 	MD5_CTX md5ctx;
286 286
 	unsigned char digest[16];
287 287
 	struct cli_md5_node *md5_node;
288
-	struct cli_matcher *groot, *troot = NULL;
288
+	struct cli_matcher *groot = NULL, *troot = NULL;
289 289
 
290 290
 
291 291
     if(!ctx->engine) {
... ...
@@ -303,7 +303,8 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
303 303
     }
304 304
 #endif
305 305
 
306
-    groot = ctx->engine->root[0]; /* generic signatures */
306
+    if(!ftonly)
307
+	groot = ctx->engine->root[0]; /* generic signatures */
307 308
 
308 309
     if(ftype) {
309 310
 	for(i = 1; i < CL_TARGET_TABLE_SIZE; i++) {
... ...
@@ -314,10 +315,17 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
314 314
 	}
315 315
     }
316 316
 
317
-    if(troot)
318
-	maxpatlen = MAX(troot->maxpatlen, groot->maxpatlen);
319
-    else
320
-	maxpatlen = groot->maxpatlen;
317
+    if(ftonly) {
318
+	if(!troot)
319
+	    return CL_CLEAN;
320
+
321
+	maxpatlen = troot->maxpatlen;
322
+    } else {
323
+	if(troot)
324
+	    maxpatlen = MAX(troot->maxpatlen, groot->maxpatlen);
325
+	else
326
+	    maxpatlen = groot->maxpatlen;
327
+    }
321 328
 
322 329
     /* prepare the buffer */
323 330
     buffersize = maxpatlen + SCANBUFF;
... ...
@@ -326,7 +334,7 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
326 326
 	return CL_EMEM;
327 327
     }
328 328
 
329
-    if((ret = cli_ac_initdata(&gdata, groot->ac_partsigs, AC_DEFAULT_TRACKLEN)))
329
+    if(!ftonly && (ret = cli_ac_initdata(&gdata, groot->ac_partsigs, AC_DEFAULT_TRACKLEN)))
330 330
 	return ret;
331 331
 
332 332
     if(troot) {
... ...
@@ -334,10 +342,9 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
334 334
 	    return ret;
335 335
     }
336 336
 
337
-    if(ctx->engine->md5_hlist)
337
+    if(!ftonly && ctx->engine->md5_hlist)
338 338
 	MD5_Init(&md5ctx);
339 339
 
340
-
341 340
     buff = buffer;
342 341
     buff += maxpatlen; /* pointer to read data block */
343 342
     endbl = buff + SCANBUFF - maxpatlen; /* pointer to the last block
... ...
@@ -360,7 +367,8 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
360 360
 
361 361
 	    if(ret == CL_VIRUS) {
362 362
 		free(buffer);
363
-		cli_ac_freedata(&gdata);
363
+		if(!ftonly)
364
+		    cli_ac_freedata(&gdata);
364 365
 		cli_ac_freedata(&tdata);
365 366
 
366 367
 		lseek(desc, 0, SEEK_SET);
... ...
@@ -371,27 +379,29 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
371 371
 	    }
372 372
 	}
373 373
 
374
-	if(groot->ac_only || (ret = cli_bm_scanbuff(upt, length, ctx->virname, groot, offset, ftype, desc)) != CL_VIRUS)
375
-	    ret = cli_ac_scanbuff(upt, length, ctx->virname, groot, &gdata, otfrec, offset, ftype, desc, ftoffset);
374
+	if(!ftonly) {
375
+	    if(groot->ac_only || (ret = cli_bm_scanbuff(upt, length, ctx->virname, groot, offset, ftype, desc)) != CL_VIRUS)
376
+		ret = cli_ac_scanbuff(upt, length, ctx->virname, groot, &gdata, otfrec, offset, ftype, desc, ftoffset);
376 377
 
377
-	if(ret == CL_VIRUS) {
378
-	    free(buffer);
379
-	    cli_ac_freedata(&gdata);
380
-	    if(troot)
381
-		cli_ac_freedata(&tdata);
382
-	    lseek(desc, 0, SEEK_SET);
383
-	    if(cli_checkfp(desc, ctx->engine))
384
-		return CL_CLEAN;
385
-	    else
386
-		return CL_VIRUS;
378
+	    if(ret == CL_VIRUS) {
379
+		free(buffer);
380
+		cli_ac_freedata(&gdata);
381
+		if(troot)
382
+		    cli_ac_freedata(&tdata);
383
+		lseek(desc, 0, SEEK_SET);
384
+		if(cli_checkfp(desc, ctx->engine))
385
+		    return CL_CLEAN;
386
+		else
387
+		    return CL_VIRUS;
387 388
 
388
-	} else if(otfrec && ret >= CL_TYPENO) {
389
-	    if(ret > type)
390
-		type = ret;
391
-	}
389
+	    } else if(otfrec && ret >= CL_TYPENO) {
390
+		if(ret > type)
391
+		    type = ret;
392
+	    }
392 393
 
393
-	if(ctx->engine->md5_hlist)
394
-	    MD5_Update(&md5ctx, buff + shift, bytes);
394
+	    if(ctx->engine->md5_hlist)
395
+		MD5_Update(&md5ctx, buff + shift, bytes);
396
+	}
395 397
 
396 398
 	if(bytes + shift == SCANBUFF) {
397 399
 	    memmove(buffer, endbl, maxpatlen);
... ...
@@ -411,11 +421,12 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
411 411
     }
412 412
 
413 413
     free(buffer);
414
-    cli_ac_freedata(&gdata);
414
+    if(!ftonly)
415
+	cli_ac_freedata(&gdata);
415 416
     if(troot)
416 417
 	cli_ac_freedata(&tdata);
417 418
 
418
-    if(ctx->engine->md5_hlist) {
419
+    if(!ftonly && ctx->engine->md5_hlist) {
419 420
 	MD5_Final(digest, &md5ctx);
420 421
 
421 422
 	if((md5_node = cli_vermd5(digest, ctx->engine)) && !md5_node->fp) {
... ...
@@ -37,14 +37,14 @@ struct cli_target_info {
37 37
     int8_t status; /* 0 == not initialised, 1 == initialised OK, -1 == error */
38 38
 };
39 39
 
40
-int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short ftype, struct cli_matched_type **ftoffset);
40
+int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, cli_file_t ftype, unsigned short ftonly, struct cli_matched_type **ftoffset);
41 41
 
42
-int cli_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cl_engine *engine, unsigned short ftype);
42
+int cli_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cl_engine *engine, cli_file_t ftype);
43 43
 
44
-int cli_validatesig(unsigned short ftype, const char *offstr, off_t fileoff, struct cli_target_info *info, int desc, const char *virname);
44
+int cli_validatesig(cli_file_t ftype, const char *offstr, off_t fileoff, struct cli_target_info *info, int desc, const char *virname);
45 45
 
46 46
 struct cli_md5_node *cli_vermd5(const unsigned char *md5, const struct cl_engine *engine);
47 47
 
48
-off_t cli_caloff(const char *offstr, struct cli_target_info *info, int fd, unsigned short ftype, int *ret);
48
+off_t cli_caloff(const char *offstr, struct cli_target_info *info, int fd, cli_file_t ftype, int *ret);
49 49
 
50 50
 #endif
... ...
@@ -196,7 +196,7 @@ static int cli_scanrar(int desc, cli_ctx *ctx, off_t sfx_offset, uint32_t *sfx_c
196 196
 	if(DETECT_ENCRYPTED && metadata->encrypted) {
197 197
 	    cli_dbgmsg("RAR: Encrypted files found in archive.\n");
198 198
 	    lseek(desc, 0, SEEK_SET);
199
-	    ret = cli_scandesc(desc, ctx, 0, 0, NULL);
199
+	    ret = cli_scandesc(desc, ctx, 0, 0, 0, NULL);
200 200
 	    if(ret < 0) {
201 201
 		break;
202 202
 	    } else if(ret != CL_VIRUS) {
... ...
@@ -400,7 +400,7 @@ static int cli_scanzip(int desc, cli_ctx *ctx, off_t sfx_offset, uint32_t *sfx_c
400 400
 	if(DETECT_ENCRYPTED && encrypted) {
401 401
 	    cli_dbgmsg("Zip: Encrypted files found in archive.\n");
402 402
 	    lseek(desc, 0, SEEK_SET);
403
-	    ret = cli_scandesc(desc, ctx, 0, 0, NULL);
403
+	    ret = cli_scandesc(desc, ctx, 0, 0, 0, NULL);
404 404
 	    if(ret < 0) {
405 405
 		break;
406 406
 	    } else if(ret != CL_VIRUS) {
... ...
@@ -1026,7 +1026,7 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx)
1026 1026
     if (fd >= 0) {
1027 1027
     	ofd = cli_decode_ole_object(fd, dirname);
1028 1028
 	if (ofd >= 0) {
1029
-		ret = cli_scandesc(ofd, ctx, 0, 0, NULL);
1029
+		ret = cli_scandesc(ofd, ctx, 0, 0, 0, NULL);
1030 1030
 		close(ofd);
1031 1031
 	}
1032 1032
 	close(fd);
... ...
@@ -1092,7 +1092,7 @@ static int cli_scanhtml(int desc, cli_ctx *ctx)
1092 1092
     snprintf(fullname, 1024, "%s/comment.html", tempname);
1093 1093
     fd = open(fullname, O_RDONLY|O_BINARY);
1094 1094
     if (fd >= 0) {
1095
-        ret = cli_scandesc(fd, ctx, 0, CL_TYPE_HTML, NULL);
1095
+        ret = cli_scandesc(fd, ctx, 0, CL_TYPE_HTML, 0, NULL);
1096 1096
 	close(fd);
1097 1097
     }
1098 1098
 
... ...
@@ -1107,7 +1107,7 @@ static int cli_scanhtml(int desc, cli_ctx *ctx)
1107 1107
 	snprintf(fullname, 1024, "%s/nocomment.html", tempname);
1108 1108
 	fd = open(fullname, O_RDONLY|O_BINARY);
1109 1109
 	if (fd >= 0) {
1110
-	    ret = cli_scandesc(fd, ctx, 0, CL_TYPE_HTML, NULL);
1110
+	    ret = cli_scandesc(fd, ctx, 0, CL_TYPE_HTML, 0, NULL);
1111 1111
 	    close(fd);
1112 1112
 	}
1113 1113
     }
... ...
@@ -1123,7 +1123,7 @@ static int cli_scanhtml(int desc, cli_ctx *ctx)
1123 1123
 	snprintf(fullname, 1024, "%s/script.html", tempname);
1124 1124
 	fd = open(fullname, O_RDONLY|O_BINARY);
1125 1125
 	if (fd >= 0) {
1126
-	    ret = cli_scandesc(fd, ctx, 0, CL_TYPE_HTML, NULL);
1126
+	    ret = cli_scandesc(fd, ctx, 0, CL_TYPE_HTML, 0, NULL);
1127 1127
 	    close(fd);
1128 1128
 	}
1129 1129
     }
... ...
@@ -1597,7 +1597,7 @@ static int cli_scanraw(int desc, cli_ctx *ctx, cli_file_t type)
1597 1597
 	return CL_EIO;
1598 1598
     }
1599 1599
 
1600
-    if((ret = cli_scandesc(desc, ctx, ftrec, type, &ftoffset)) == CL_VIRUS) {
1600
+    if((ret = cli_scandesc(desc, ctx, ftrec, type, 0, &ftoffset)) == CL_VIRUS) {
1601 1601
 	cli_dbgmsg("%s found in descriptor %d.\n", *ctx->virname, desc);
1602 1602
 	return CL_VIRUS;
1603 1603
 
... ...
@@ -1607,6 +1607,11 @@ static int cli_scanraw(int desc, cli_ctx *ctx, cli_file_t type)
1607 1607
     } else if(ret >= CL_TYPENO) {
1608 1608
 	lseek(desc, 0, SEEK_SET);
1609 1609
 
1610
+	if((nret = cli_scandesc(desc, ctx, 0, ret, 1, NULL)) == CL_VIRUS) {
1611
+	    cli_dbgmsg("%s found in descriptor %d when scanning file type %u\n", *ctx->virname, desc, ret);
1612
+	    return CL_VIRUS;
1613
+	}
1614
+
1610 1615
 	ret == CL_TYPE_MAIL ? ctx->mrec++ : ctx->arec++;
1611 1616
 	switch(ret) {
1612 1617
 	    case CL_TYPE_HTML:
... ...
@@ -1692,7 +1697,7 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
1692 1692
 
1693 1693
     if(!ctx->options) { /* raw mode (stdin, etc.) */
1694 1694
 	cli_dbgmsg("Raw mode: No support for special files\n");
1695
-	if((ret = cli_scandesc(desc, ctx, 0, 0, NULL)) == CL_VIRUS)
1695
+	if((ret = cli_scandesc(desc, ctx, 0, 0, 0, NULL)) == CL_VIRUS)
1696 1696
 	    cli_dbgmsg("%s found in descriptor %d\n", *ctx->virname, desc);
1697 1697
 	return ret;
1698 1698
     }