Browse code

optimise scanning of SFX archives

git-svn: trunk@1871

Tomasz Kojm authored on 2006/03/25 09:10:15
Showing 4 changed files
... ...
@@ -1,3 +1,7 @@
1
+Sat Mar 25 01:05:50 CET 2006 (tk)
2
+---------------------------------
3
+  * libclamav: optimise scanning of SFX archives
4
+
1 5
 Wed Mar 22 19:04:15 CET 2006 (tk)
2 6
 ---------------------------------
3 7
   * libclamav/htmlnorm.c: fix typo spotted by Gianluigi Tiesi
... ...
@@ -20,8 +20,11 @@
20 20
 #ifndef __FILETYPES_H
21 21
 #define __FILETYPES_H
22 22
 
23
+#include <sys/types.h>
24
+
23 25
 #define MAGIC_BUFFER_SIZE 50
24 26
 #define CL_TYPENO 500
27
+#define SFX_MAX_TESTS 10
25 28
 
26 29
 typedef enum {
27 30
     CL_TYPE_UNKNOWN_TEXT = CL_TYPENO,
... ...
@@ -60,7 +63,8 @@ typedef enum {
60 60
 
61 61
 struct cli_matched_type {
62 62
     cli_file_t type;
63
-    size_t offset;
63
+    off_t offset;
64
+    unsigned short cnt;
64 65
     struct cli_matched_type *next;
65 66
 };
66 67
 
... ...
@@ -318,17 +318,25 @@ int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virnam
318 318
 				    if(pt->type) {
319 319
 					if(otfrec) {
320 320
 					    if(pt->type > type || pt->type >= CL_TYPE_SFX) {
321
-						cli_dbgmsg("Matched signature for file type: %s\n", pt->virname);
321
+						cli_dbgmsg("Matched signature for file type %s at %d\n", pt->virname, offset + position);
322 322
 						type = pt->type;
323
-						if(ftoffset && ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) {
323
+						if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < SFX_MAX_TESTS) && ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) {
324 324
 						    if(!(tnode = cli_calloc(1, sizeof(struct cli_matched_type)))) {
325 325
 							cli_errmsg("Can't alloc memory for new type node\n");
326 326
 							return CL_EMEM;
327 327
 						    }
328
+
328 329
 						    tnode->type = type;
329 330
 						    tnode->offset = offset + position;
331
+
332
+						    if(*ftoffset)
333
+							tnode->cnt = (*ftoffset)->cnt + 1;
334
+						    else
335
+							tnode->cnt = 1;
336
+
330 337
 						    tnode->next = *ftoffset;
331 338
 						    *ftoffset = tnode;
339
+
332 340
 						}
333 341
 					    }
334 342
 					}
... ...
@@ -346,16 +354,21 @@ int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virnam
346 346
 			if(pt->type) {
347 347
 			    if(otfrec) {
348 348
 				if(pt->type > type || pt->type >= CL_TYPE_SFX) {
349
-				    cli_dbgmsg("Matched signature for file type: %s\n", pt->virname);
350
-
349
+				    cli_dbgmsg("Matched signature for file type %s at %d\n", pt->virname, offset + position);
351 350
 				    type = pt->type;
352
-				    if(ftoffset && ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) {
351
+				    if(ftoffset && (!*ftoffset ||(*ftoffset)->cnt < SFX_MAX_TESTS) && ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) {
353 352
 					if(!(tnode = cli_calloc(1, sizeof(struct cli_matched_type)))) {
354 353
 					    cli_errmsg("Can't alloc memory for new type node\n");
355 354
 					    return CL_EMEM;
356 355
 					}
357 356
 					tnode->type = type;
358 357
 					tnode->offset = offset + position;
358
+
359
+					if(*ftoffset)
360
+					    tnode->cnt = (*ftoffset)->cnt + 1;
361
+					else
362
+					    tnode->cnt = 1;
363
+
359 364
 					tnode->next = *ftoffset;
360 365
 					*ftoffset = tnode;
361 366
 				    }
... ...
@@ -107,7 +107,7 @@ static void cli_unlock_mutex(void *mtx)
107 107
 #endif
108 108
 */
109 109
 
110
-static int cli_scanrar(int desc, cli_ctx *ctx, unsigned long int offset)
110
+static int cli_scanrar(int desc, cli_ctx *ctx, off_t sfx_offset, uint32_t *sfx_check)
111 111
 {
112 112
 	int fd, ret = CL_CLEAN;
113 113
 	unsigned int files = 0;
... ...
@@ -126,7 +126,9 @@ static int cli_scanrar(int desc, cli_ctx *ctx, unsigned long int offset)
126 126
 	return CL_ETMPDIR;
127 127
     }
128 128
 
129
-    lseek(desc, offset, SEEK_SET);
129
+    if(sfx_offset)
130
+	lseek(desc, sfx_offset, SEEK_SET);
131
+
130 132
     metadata = metadata_tmp = cli_unrar(desc, dir, ctx->limits);
131 133
 
132 134
     if(cli_scandir(dir, ctx) == CL_VIRUS) {
... ...
@@ -135,6 +137,13 @@ static int cli_scanrar(int desc, cli_ctx *ctx, unsigned long int offset)
135 135
 
136 136
 	files++;
137 137
 
138
+	if(files == 1 && sfx_check) {
139
+	    if(*sfx_check == metadata->crc)
140
+		break;
141
+	    else
142
+		*sfx_check = metadata->crc;
143
+	}
144
+
138 145
 	cli_dbgmsg("RAR: %s, crc32: 0x%x, encrypted: %d, compressed: %u, normal: %u, method: %d, ratio: %d (max: %d)\n",
139 146
 		metadata->filename, metadata->crc, metadata->encrypted, metadata->pack_size,
140 147
 		metadata->unpack_size, metadata->method,
... ...
@@ -254,7 +263,7 @@ static int cli_scanrar(int desc, cli_ctx *ctx, unsigned long int offset)
254 254
 }
255 255
 
256 256
 #ifdef HAVE_ZLIB_H
257
-static int cli_scanzip(int desc, cli_ctx *ctx, unsigned long int offset)
257
+static int cli_scanzip(int desc, cli_ctx *ctx, off_t sfx_offset, uint32_t *sfx_check)
258 258
 {
259 259
 	ZZIP_DIR *zdir;
260 260
 	ZZIP_DIRENT zdirent;
... ...
@@ -272,8 +281,8 @@ static int cli_scanzip(int desc, cli_ctx *ctx, unsigned long int offset)
272 272
 
273 273
     cli_dbgmsg("in scanzip()\n");
274 274
 
275
-    if(offset)
276
-	lseek(desc, offset, SEEK_SET);
275
+    if(sfx_offset)
276
+	lseek(desc, sfx_offset, SEEK_SET);
277 277
 
278 278
     if((zdir = zzip_dir_fdopen(dup(desc), &err)) == NULL) {
279 279
 	cli_dbgmsg("Zip: zzip_dir_fdopen() return code: %d\n", err);
... ...
@@ -292,6 +301,13 @@ static int cli_scanzip(int desc, cli_ctx *ctx, unsigned long int offset)
292 292
     while(zzip_dir_read(zdir, &zdirent)) {
293 293
 	files++;
294 294
 
295
+	if(files == 1 && sfx_check) {
296
+	    if(*sfx_check == zdirent.d_crc32)
297
+		break;
298
+	    else
299
+		*sfx_check = zdirent.d_crc32;
300
+	}
301
+
295 302
 	if(!zdirent.d_name || !strlen(zdirent.d_name)) { /* Mimail fix */
296 303
 	    cli_dbgmsg("Zip: strlen(zdirent.d_name) == %d\n", strlen(zdirent.d_name));
297 304
 	    *ctx->virname = "Suspect.Zip";
... ...
@@ -1492,6 +1508,7 @@ static int cli_scanraw(int desc, cli_ctx *ctx, cli_file_t type)
1492 1492
 	int ret = CL_CLEAN;
1493 1493
 	unsigned short ftrec;
1494 1494
 	struct cli_matched_type *ftoffset = NULL, *fpt;
1495
+	uint32_t lastzip, lastrar;
1495 1496
 
1496 1497
 
1497 1498
     switch(type) {
... ...
@@ -1536,15 +1553,16 @@ static int cli_scanraw(int desc, cli_ctx *ctx, cli_file_t type)
1536 1536
 	    case CL_TYPE_ZIPSFX:
1537 1537
 		if(type == CL_TYPE_MSEXE) {
1538 1538
 		    if(SCAN_ARCHIVE) {
1539
+			lastzip = lastrar = 0xdeadbeef;
1539 1540
 			fpt = ftoffset;
1540 1541
 			while(fpt) {
1541 1542
 			    if(fpt->type == CL_TYPE_RARSFX) {
1542 1543
 				cli_dbgmsg("RAR-SFX signature found at %d\n", fpt->offset);
1543
-				if((ret = cli_scanrar(desc, ctx, fpt->offset) == CL_VIRUS))
1544
+				if((ret = cli_scanrar(desc, ctx, fpt->offset, &lastrar) == CL_VIRUS))
1544 1545
 				    break;
1545 1546
 			    } else if(fpt->type == CL_TYPE_ZIPSFX) {
1546 1547
 				cli_dbgmsg("ZIP-SFX signature found at %d\n", fpt->offset);
1547
-				if((ret = cli_scanzip(desc, ctx, fpt->offset) == CL_VIRUS))
1548
+				if((ret = cli_scanzip(desc, ctx, fpt->offset, &lastzip) == CL_VIRUS))
1548 1549
 				    break;
1549 1550
 			    }
1550 1551
 			    fpt = fpt->next;
... ...
@@ -1633,12 +1651,12 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
1633 1633
     switch(type) {
1634 1634
 	case CL_TYPE_RAR:
1635 1635
 	    if(SCAN_ARCHIVE)
1636
-		ret = cli_scanrar(desc, ctx, 0);
1636
+		ret = cli_scanrar(desc, ctx, 0, NULL);
1637 1637
 	    break;
1638 1638
 
1639 1639
 	case CL_TYPE_ZIP:
1640 1640
 	    if(SCAN_ARCHIVE)
1641
-		ret = cli_scanzip(desc, ctx, 0);
1641
+		ret = cli_scanzip(desc, ctx, 0, NULL);
1642 1642
 	    break;
1643 1643
 
1644 1644
 	case CL_TYPE_GZ: