Browse code

improve file type recognizer and add CL_TYPE_RARSFX

git-svn: trunk@1634

Tomasz Kojm authored on 2005/06/24 22:54:41
Showing 8 changed files
... ...
@@ -1,3 +1,7 @@
1
+Fri Jun 24 15:48:26 CEST 2005 (tk)
2
+----------------------------------
3
+  * libclamav: improve file type recognizer and add CL_TYPE_RARSFX
4
+
1 5
 Thu Jun 23 22:52:43 CEST 2005 (tk)
2 6
 ----------------------------------
3 7
   * libclamav/mspack/qtmd.c: fix possible crash
... ...
@@ -169,6 +169,8 @@ static const struct cli_smagic_s cli_smagic[] = {
169 169
     {"3c696672616d65", "HTML data", CL_TYPE_HTML},      /* <iframe */
170 170
     {"3c494652414d45", "HTML data", CL_TYPE_HTML},      /* <IFRAME */
171 171
 
172
+    {"526172211a0700", "RAR-SFX", CL_TYPE_RARSFX},
173
+
172 174
     {NULL,  NULL,   CL_TYPE_UNKNOWN_DATA}
173 175
 };
174 176
 
... ...
@@ -46,6 +46,7 @@ typedef enum {
46 46
     CL_TYPE_PDF,
47 47
 
48 48
     /* bigger numbers have higher priority (in o-t-f detection) */
49
+    CL_TYPE_RARSFX,
49 50
     CL_TYPE_HTML, /* on the fly */
50 51
     CL_TYPE_MAIL  /* magic + on the fly */
51 52
 
... ...
@@ -263,7 +263,7 @@ inline static int cli_findpos(const char *buffer, int offset, int length, const
263 263
     return 1;
264 264
 }
265 265
 
266
-int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, short otfrec, unsigned long int offset, unsigned long int *partoff, unsigned short ftype, int fd)
266
+int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, short otfrec, unsigned long int offset, unsigned long int *partoff, unsigned short ftype, int fd, unsigned long int *ftoffset)
267 267
 {
268 268
 	struct cli_ac_node *current;
269 269
 	struct cli_ac_patt *pt;
... ...
@@ -322,6 +322,8 @@ int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virnam
322 322
 					    if(pt->type > type) {
323 323
 						cli_dbgmsg("Matched signature for file type: %s\n", pt->virname);
324 324
 						type = pt->type;
325
+						if(ftoffset)
326
+						    *ftoffset = offset + position;
325 327
 					    }
326 328
 					}
327 329
 				    } else {
... ...
@@ -341,6 +343,8 @@ int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virnam
341 341
 				    cli_dbgmsg("Matched signature for file type: %s\n", pt->virname);
342 342
 
343 343
 				    type = pt->type;
344
+				    if(ftoffset)
345
+					*ftoffset = offset + position;
344 346
 				}
345 347
 			    }
346 348
 			} else {
... ...
@@ -23,7 +23,7 @@
23 23
 #include "matcher.h"
24 24
 
25 25
 int cli_ac_addpatt(struct cl_node *root, struct cli_ac_patt *pattern);
26
-int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, short otfrec, unsigned long int offset, unsigned long int *partoff, unsigned short ftype, int fd);
26
+int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, short otfrec, unsigned long int offset, unsigned long int *partoff, unsigned short ftype, int fd, unsigned long int *ftoffset);
27 27
 int cli_ac_buildtrie(struct cl_node *root);
28 28
 void cli_ac_free(struct cl_node *root);
29 29
 
... ...
@@ -66,7 +66,7 @@ int cli_scanbuff(const char *buffer, unsigned int length, const char **virname,
66 66
     }
67 67
 
68 68
     if((ret = cli_bm_scanbuff(buffer, length, virname, root, 0, ftype, -1)) != CL_VIRUS)
69
-	ret = cli_ac_scanbuff(buffer, length, virname, root, partcnt, 0, 0, partoff, ftype, -1);
69
+	ret = cli_ac_scanbuff(buffer, length, virname, root, partcnt, 0, 0, partoff, ftype, -1, NULL);
70 70
 
71 71
     free(partcnt);
72 72
     free(partoff);
... ...
@@ -237,7 +237,7 @@ int cli_validatesig(unsigned short target, unsigned short ftype, const char *off
237 237
     return 1;
238 238
 }
239 239
 
240
-int cli_scandesc(int desc, const char **virname, long int *scanned, const struct cl_node *root, short otfrec, unsigned short ftype)
240
+int cli_scandesc(int desc, const char **virname, long int *scanned, const struct cl_node *root, short otfrec, unsigned short ftype, unsigned long int *ftoffset)
241 241
 {
242 242
  	char *buffer, *buff, *endbl, *pt;
243 243
 	int bytes, buffsize, length, ret, *partcnt, type = CL_CLEAN;
... ...
@@ -293,7 +293,7 @@ int cli_scandesc(int desc, const char **virname, long int *scanned, const struct
293 293
 	    length -= SCANBUFF - bytes;
294 294
 
295 295
 	if(cli_bm_scanbuff(pt, length, virname, root, offset, ftype, desc) == CL_VIRUS ||
296
-	   (ret = cli_ac_scanbuff(pt, length, virname, root, partcnt, otfrec, offset, partoff, ftype, desc)) == CL_VIRUS) {
296
+	   (ret = cli_ac_scanbuff(pt, length, virname, root, partcnt, otfrec, offset, partoff, ftype, desc, ftoffset)) == CL_VIRUS) {
297 297
 	    free(buffer);
298 298
 	    free(partcnt);
299 299
 	    free(partoff);
... ...
@@ -1,5 +1,5 @@
1 1
 /*
2
- *  Copyright (C) 2002 - 2004 Tomasz Kojm <tkojm@clamav.net>
2
+ *  Copyright (C) 2002 - 2005 Tomasz Kojm <tkojm@clamav.net>
3 3
  *
4 4
  *  This program is free software; you can redistribute it and/or modify
5 5
  *  it under the terms of the GNU General Public License as published by
... ...
@@ -21,7 +21,7 @@
21 21
 
22 22
 #include "clamav.h"
23 23
 
24
-int cli_scandesc(int desc, const char **virname, long int *scanned, const struct cl_node *root, short otfrec, unsigned short ftype);
24
+int cli_scandesc(int desc, const char **virname, long int *scanned, const struct cl_node *root, short otfrec, unsigned short ftype, unsigned long int *ftoffset);
25 25
 
26 26
 int cli_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, unsigned short ftype);
27 27
 
... ...
@@ -189,7 +189,7 @@ static int cli_scanrar(int desc, const char **virname, long int *scanned, const
189 189
 	if(DETECT_ENCRYPTED && metadata->encrypted) {
190 190
 	    cli_dbgmsg("RAR: Encrypted files found in archive.\n");
191 191
 	    lseek(desc, 0, SEEK_SET);
192
-	    ret = cli_scandesc(desc, virname, scanned, root, 0, 0);
192
+	    ret = cli_scandesc(desc, virname, scanned, root, 0, 0, NULL);
193 193
 	    if(ret < 0) {
194 194
 		break;
195 195
 	    } else if(ret != CL_VIRUS) {
... ...
@@ -388,7 +388,7 @@ static int cli_scanzip(int desc, const char **virname, long int *scanned, const
388 388
 	if(DETECT_ENCRYPTED && encrypted) {
389 389
 	    cli_dbgmsg("Zip: Encrypted files found in archive.\n");
390 390
 	    lseek(desc, 0, SEEK_SET);
391
-	    ret = cli_scandesc(desc, virname, scanned, root, 0, 0);
391
+	    ret = cli_scandesc(desc, virname, scanned, root, 0, 0, NULL);
392 392
 	    if(ret < 0) {
393 393
 		break;
394 394
 	    } else if(ret != CL_VIRUS) {
... ...
@@ -1011,7 +1011,7 @@ static int cli_vba_scandir(const char *dirname, const char **virname, long int *
1011 1011
     if (fd >= 0) {
1012 1012
     	ofd = cli_decode_ole_object(fd, dirname);
1013 1013
 	if (ofd >= 0) {
1014
-		ret = cli_scandesc(ofd, virname, scanned, root, 0, 0);
1014
+		ret = cli_scandesc(ofd, virname, scanned, root, 0, 0, NULL);
1015 1015
 		close(ofd);
1016 1016
 	}
1017 1017
 	close(fd);
... ...
@@ -1077,7 +1077,7 @@ static int cli_scanhtml(int desc, const char **virname, long int *scanned, const
1077 1077
     snprintf(fullname, 1024, "%s/comment.html", tempname);
1078 1078
     fd = open(fullname, O_RDONLY);
1079 1079
     if (fd >= 0) {
1080
-        ret = cli_scandesc(fd, virname, scanned, root, 0, CL_TYPE_HTML);
1080
+        ret = cli_scandesc(fd, virname, scanned, root, 0, CL_TYPE_HTML, NULL);
1081 1081
 	close(fd);
1082 1082
     }
1083 1083
 
... ...
@@ -1092,7 +1092,7 @@ static int cli_scanhtml(int desc, const char **virname, long int *scanned, const
1092 1092
 	snprintf(fullname, 1024, "%s/nocomment.html", tempname);
1093 1093
 	fd = open(fullname, O_RDONLY);
1094 1094
 	if (fd >= 0) {
1095
-	    ret = cli_scandesc(fd, virname, scanned, root, 0, CL_TYPE_HTML);
1095
+	    ret = cli_scandesc(fd, virname, scanned, root, 0, CL_TYPE_HTML, NULL);
1096 1096
 	    close(fd);
1097 1097
 	}
1098 1098
     }
... ...
@@ -1108,7 +1108,7 @@ static int cli_scanhtml(int desc, const char **virname, long int *scanned, const
1108 1108
 	snprintf(fullname, 1024, "%s/script.html", tempname);
1109 1109
 	fd = open(fullname, O_RDONLY);
1110 1110
 	if (fd >= 0) {
1111
-	    ret = cli_scandesc(fd, virname, scanned, root, 0, CL_TYPE_HTML);
1111
+	    ret = cli_scandesc(fd, virname, scanned, root, 0, CL_TYPE_HTML, NULL);
1112 1112
 	    close(fd);
1113 1113
 	}
1114 1114
     }
... ...
@@ -1406,7 +1406,7 @@ int cli_magic_scandesc(int desc, const char **virname, long int *scanned, const
1406 1406
 
1407 1407
     if(!options) { /* raw mode (stdin, etc.) */
1408 1408
 	cli_dbgmsg("Raw mode: No support for special files\n");
1409
-	if((ret = cli_scandesc(desc, virname, scanned, root, 0, 0) == CL_VIRUS))
1409
+	if((ret = cli_scandesc(desc, virname, scanned, root, 0, 0, NULL) == CL_VIRUS))
1410 1410
 	    cli_dbgmsg("%s found in descriptor %d\n", *virname, desc);
1411 1411
 	return ret;
1412 1412
     }
... ...
@@ -1538,13 +1538,22 @@ int cli_magic_scandesc(int desc, const char **virname, long int *scanned, const
1538 1538
     type == CL_TYPE_MAIL ? mrec-- : arec--;
1539 1539
 
1540 1540
     if(type != CL_TYPE_DATA && ret != CL_VIRUS) { /* scan the raw file */
1541
-	    int typerec;
1541
+	    int ftrec;
1542
+	    unsigned long int ftoffset;
1543
+
1544
+	switch(type) {
1545
+	    case CL_TYPE_UNKNOWN_TEXT:
1546
+	    case CL_TYPE_MSEXE:
1547
+		ftrec = 1;
1548
+		break;
1549
+	    default:
1550
+		ftrec = 0;
1551
+	}
1542 1552
 
1543
-	type == CL_TYPE_UNKNOWN_TEXT ? (typerec = 1) : (typerec = 0);
1544 1553
 	if(lseek(desc, 0, SEEK_SET) < 0)
1545 1554
 	    cli_errmsg("lseek() failed, trying to continue anyway...\n");
1546 1555
 
1547
-	if((nret = cli_scandesc(desc, virname, scanned, root, typerec, type)) == CL_VIRUS) {
1556
+	if((nret = cli_scandesc(desc, virname, scanned, root, ftrec, type, &ftoffset)) == CL_VIRUS) {
1548 1557
 	    cli_dbgmsg("%s found in descriptor %d.\n", *virname, desc);
1549 1558
 	    return CL_VIRUS;
1550 1559
 
... ...
@@ -1567,6 +1576,11 @@ int cli_magic_scandesc(int desc, const char **virname, long int *scanned, const
1567 1567
 			if(cli_scanmail(desc, virname, scanned, root, limits, options, arec, mrec) == CL_VIRUS)
1568 1568
 			    return CL_VIRUS;
1569 1569
 		    break;
1570
+
1571
+		case CL_TYPE_RARSFX:
1572
+		    if(SCAN_ARCHIVE)
1573
+			cli_dbgmsg("RAR-SFX found at %d\n", ftoffset);
1574
+		    break;
1570 1575
 	    }
1571 1576
 	    nret == CL_TYPE_MAIL ? mrec-- : arec--;
1572 1577
 	}