Browse code

filetype detection improvements

git-svn: trunk@3662

Tomasz Kojm authored on 2008/02/21 07:04:48
Showing 15 changed files
... ...
@@ -1,3 +1,17 @@
1
+Wed Feb 20 22:03:07 CET 2008 (tk)
2
+---------------------------------
3
+  * libclamav: filetype detection improvements:
4
+	- allow manual selection of matching method for each filetype signature
5
+	- A-C filetype magic sigs can be limited to specific file formats
6
+	- allow ndb-like offsets inside A-C filetype sigs
7
+	- filetype sigs can be limited to specific f-levels
8
+	- optimize filetype sigs handling inside cli_ac_scanbuff()
9
+	- MAGIC_BUFFER_SIZE increased to 1024 bytes
10
+	- A-C filetype sigs for CL_TYPE_IGNORED are guaranteed to work and take
11
+	  precedence within MAGIC_BUFFER_SIZE file space (shouldn't be used
12
+	  outside it)
13
+	- rename daily.ft to daily.ftm (to avoid problems with older snapshots)
14
+
1 15
 Wed Feb 20 16:49:13 EET 2008 (edwin)
2 16
 ------------------------------------
3 17
   * libclamav/htmlnorm.c: generate only nocomment.html (always contains script too) and notags.html (bb #851)
... ...
@@ -713,7 +713,7 @@ fileblobScan(const fileblob *fb)
713 713
 		ftype = cli_filetype2(fd, fb->ctx->engine);
714 714
 		if(ftype >= CL_TYPE_TEXT_ASCII && ftype <= CL_TYPE_TEXT_UTF16BE) {
715 715
 			lseek(fd, 0, SEEK_SET);
716
-			rc = cli_scandesc(fd, fb->ctx, 0, CL_TYPE_MAIL, 0, NULL);
716
+			rc = cli_scandesc(fd, fb->ctx, CL_TYPE_MAIL, 0, NULL, AC_SCAN_VIR);
717 717
 		}
718 718
 	}
719 719
 
... ...
@@ -1,5 +1,5 @@
1 1
 /*
2
- *  Copyright (C) 2007 Sourcefire, Inc.
2
+ *  Copyright (C) 2007 - 2008 Sourcefire, Inc.
3 3
  *  Author: Tomasz Kojm <tkojm@clamav.net>
4 4
  *
5 5
  *  Copyright (C) 2002 - 2005 Tomasz Kojm <tkojm@clamav.net>
... ...
@@ -53,6 +53,7 @@ static const struct ftmap_s {
53 53
     { "CL_TYPE_TEXT_UTF16BE",	CL_TYPE_TEXT_UTF16BE	},
54 54
     { "CL_TYPE_BINARY_DATA",	CL_TYPE_BINARY_DATA	},
55 55
     { "CL_TYPE_IGNORED",	CL_TYPE_IGNORED		},
56
+    { "CL_TYPE_ANY",		0			}, /* for ft-sigs */
56 57
     { "CL_TYPE_MSEXE",		CL_TYPE_MSEXE		},
57 58
     { "CL_TYPE_ELF",		CL_TYPE_ELF		},
58 59
     { "CL_TYPE_POSIX_TAR",	CL_TYPE_POSIX_TAR	},
... ...
@@ -166,7 +167,7 @@ cli_file_t cli_filetype2(int desc, const struct cl_engine *engine)
166 166
 	if(cli_ac_initdata(&mdata, root->ac_partsigs, AC_DEFAULT_TRACKLEN))
167 167
 	    return ret;
168 168
 
169
-	sret = cli_ac_scanbuff(smallbuff, bread, NULL, engine->root[0], &mdata, 1, 0, 0, -1, NULL);
169
+	sret = cli_ac_scanbuff(smallbuff, bread, NULL, engine->root[0], &mdata, 0, ret, desc, NULL, AC_SCAN_FT);
170 170
 
171 171
 	cli_ac_freedata(&mdata);
172 172
 
... ...
@@ -178,7 +179,7 @@ cli_file_t cli_filetype2(int desc, const struct cl_engine *engine)
178 178
 
179 179
 	    decoded = (unsigned char *) cli_utf16toascii((char *) smallbuff, bread);
180 180
 	    if(decoded) {
181
-		sret = cli_ac_scanbuff(decoded, strlen((char *) decoded), NULL, engine->root[0], &mdata, 1, 0, 0, -1, NULL);
181
+		sret = cli_ac_scanbuff(decoded, strlen((char *) decoded), NULL, engine->root[0], &mdata, 0, CL_TYPE_TEXT_ASCII, desc, NULL, AC_SCAN_FT);
182 182
 		free(decoded);
183 183
 		if(sret == CL_TYPE_HTML)
184 184
 		    ret = CL_TYPE_HTML_UTF16;
... ...
@@ -212,7 +213,7 @@ cli_file_t cli_filetype2(int desc, const struct cl_engine *engine)
212 212
 					    return ret;
213 213
 
214 214
 				    if(out_area.length > 0) {
215
-					    sret = cli_ac_scanbuff(decodedbuff, out_area.length, NULL, engine->root[0], &mdata, 1, 0, 0, -1, NULL);
215
+					    sret = cli_ac_scanbuff(decodedbuff, out_area.length, NULL, engine->root[0], &mdata, 0, 0, desc, NULL, AC_SCAN_FT); /* FIXME: can we use CL_TYPE_TEXT_ASCII instead of 0? */
216 216
 					    if(sret == CL_TYPE_HTML) {
217 217
 						    cli_dbgmsg("cli_filetype2: detected HTML signature in Unicode file\n");
218 218
 						    /* htmlnorm is able to handle any unicode now, since it skips null chars */
... ...
@@ -1,5 +1,5 @@
1 1
 /*
2
- *  Copyright (C) 2007 Sourcefire, Inc.
2
+ *  Copyright (C) 2007 - 2008 Sourcefire, Inc.
3 3
  *  Author: Tomasz Kojm <tkojm@clamav.net>
4 4
  *
5 5
  *  Copyright (C) 2002 - 2005 Tomasz Kojm <tkojm@clamav.net>
... ...
@@ -28,7 +28,7 @@
28 28
 #include "clamav.h"
29 29
 #include "cltypes.h"
30 30
 
31
-#define MAGIC_BUFFER_SIZE 512
31
+#define MAGIC_BUFFER_SIZE 1024
32 32
 #define CL_TYPENO 500
33 33
 #define MAX_EMBEDDED_OBJ 10
34 34
 
... ...
@@ -39,7 +39,6 @@ typedef enum {
39 39
     CL_TYPE_TEXT_UTF16BE,
40 40
     CL_TYPE_BINARY_DATA,
41 41
     /* Please do not add any new types above this line */
42
-    CL_TYPE_IGNORED,
43 42
     CL_TYPE_ERROR,
44 43
     CL_TYPE_MSEXE,
45 44
     CL_TYPE_ELF,
... ...
@@ -76,7 +75,8 @@ typedef enum {
76 76
     CL_TYPE_CABSFX,
77 77
     CL_TYPE_ARJSFX,
78 78
     CL_TYPE_NULSFT, /* on the fly */
79
-    CL_TYPE_AUTOIT
79
+    CL_TYPE_AUTOIT,
80
+    CL_TYPE_IGNORED /* please don't add anything below */
80 81
 } cli_file_t;
81 82
 
82 83
 struct cli_ftype {
... ...
@@ -1,6 +1,6 @@
1 1
 /*
2
- *  Static filetype data for use when daily.ft is not available.
3
- *  Copyright (C) 2007 Sourcefire, Inc.
2
+ *  Static filetype data for use when daily.ftm is not available.
3
+ *  Copyright (C) 2007 - 2008 Sourcefire, Inc.
4 4
  *  Author: Tomasz Kojm <tkojm@clamav.net>
5 5
  *
6 6
  *  This program is free software; you can redistribute it and/or modify
... ...
@@ -23,7 +23,7 @@
23 23
 
24 24
 /* Generated with the following perl script:
25 25
 #!/usr/bin/perl
26
-open(FT, "daily.ft") or die "Can't open daily.ft";
26
+open(FT, "daily.ftm") or die "Can't open daily.ftm";
27 27
 print "static const char *ftypes_int[] = {\n";
28 28
 while($line = <FT>) {
29 29
     chomp($line);
... ...
@@ -33,104 +33,104 @@ print "  NULL\n};\n"
33 33
 */
34 34
 
35 35
 static const char *ftypes_int[] = {
36
-  "0:4d5a:MS-EXE/DLL:CL_TYPE_MSEXE",
37
-  "0:7f454c46:ELF:CL_TYPE_ELF",
38
-  "0:52617221:RAR:CL_TYPE_RAR",
39
-  "0:504b0304:ZIP:CL_TYPE_ZIP",
40
-  "0:504b3030504b0304:ZIP:CL_TYPE_ZIP",
41
-  "0:1f8b:GZip:CL_TYPE_GZ",
42
-  "0:425a68:BZip:CL_TYPE_BZ",
43
-  "0:60ea:ARJ:CL_TYPE_ARJ",
44
-  "0:535a4444:compress.exe'd:CL_TYPE_MSSZDD",
45
-  "0:4d534346:MS CAB:CL_TYPE_MSCAB",
46
-  "0:49545346:MS CHM:CL_TYPE_MSCHM",
47
-  "8:19040010:SIS:CL_TYPE_SIS",
48
-  "0:23407e5e:SCRENC:CL_TYPE_SCRENC",
49
-  "0:28546869732066696c65206d75737420626520636f6e76657274656420776974682042696e48657820342e3029:BinHex:CL_TYPE_BINHEX",
50
-  "0:46726f6d20:MBox:CL_TYPE_MAIL",
51
-  "0:52656365697665643a20:Raw mail:CL_TYPE_MAIL",
52
-  "0:52657475726e2d506174683a20:Maildir:CL_TYPE_MAIL",
53
-  "0:52657475726e2d706174683a20:Maildir:CL_TYPE_MAIL",
54
-  "0:44656c6976657265642d546f3a20:Mail:CL_TYPE_MAIL",
55
-  "0:582d5549444c3a20:Mail:CL_TYPE_MAIL",
56
-  "0:582d4170706172656e746c792d546f3a20:Mail:CL_TYPE_MAIL",
57
-  "0:582d456e76656c6f70652d46726f6d3a20:Mail:CL_TYPE_MAIL",
58
-  "0:582d4f726967696e616c2d546f3a20:Mail:CL_TYPE_MAIL",
59
-  "0:582d53796d616e7465632d:Symantec:CL_TYPE_MAIL",
60
-  "0:582d455653:EVS mail:CL_TYPE_MAIL",
61
-  "0:582d5265616c2d546f3a20:Mail:CL_TYPE_MAIL",
62
-  "0:582d53696576653a20:Mail:CL_TYPE_MAIL",
63
-  "0:3e46726f6d20:Mail:CL_TYPE_MAIL",
64
-  "0:446174653a20:Mail:CL_TYPE_MAIL",
65
-  "0:4d6573736167652d49643a20:Mail:CL_TYPE_MAIL",
66
-  "0:4d6573736167652d49443a20:Mail:CL_TYPE_MAIL",
67
-  "0:456e76656c6f70652d746f3a20:Mail:CL_TYPE_MAIL",
68
-  "0:44656c69766572792d646174653a20:Mail:CL_TYPE_MAIL",
69
-  "0:546f3a20:Mail:CL_TYPE_MAIL",
70
-  "0:5375626a6563743a20:Mail:CL_TYPE_MAIL",
71
-  "0:466f723a20:Eserv mail:CL_TYPE_MAIL",
72
-  "0:46726f6d3a20:Exim mail:CL_TYPE_MAIL",
73
-  "0:763a0d0a52656365697665643a20:VPOP3 Mail (DOS):CL_TYPE_MAIL",
74
-  "0:763a0a52656365697665643a20:VPOP3 Mail (UNIX):CL_TYPE_MAIL",
75
-  "0:48692e20546869732069732074686520716d61696c2d73656e64:Qmail bounce:CL_TYPE_MAIL",
76
-  "0:789f3e22:TNEF:CL_TYPE_TNEF",
77
-  "0:626567696e20:UUencoded:CL_TYPE_UUENCODED",
78
-  "0:474946:GIF:CL_TYPE_GRAPHICS",
79
-  "0:424d:BMP:CL_TYPE_GRAPHICS",
80
-  "0:ffd8ff:JPEG:CL_TYPE_GRAPHICS",
81
-  "6:4a464946:JPEG:CL_TYPE_GRAPHICS",
82
-  "6:45786966:JPEG:CL_TYPE_GRAPHICS",
83
-  "0:89504e47:PNG:CL_TYPE_GRAPHICS",
84
-  "0:52494646:RIFF:CL_TYPE_RIFF",
85
-  "0:52494658:RIFX:CL_TYPE_RIFF",
86
-  "0:d0cf11e0a1b11ae1:OLE2 container:CL_TYPE_MSOLE2",
87
-  "0:255044462d:PDF document:CL_TYPE_PDF",
88
-  "0:b6b9acaefeffffff:CryptFF:CL_TYPE_CRYPTFF",
89
-  "0:7b5c727466:RTF:CL_TYPE_RTF",
90
-  "0:000001b3:MPEG video stream:CL_TYPE_IGNORED",
91
-  "0:000001ba:MPEG sys stream:CL_TYPE_IGNORED",
92
-  "0:4f676753:Ogg Stream:CL_TYPE_IGNORED",
93
-  "0:494433:MP3:CL_TYPE_IGNORED",
94
-  "0:fffb90:MP3:CL_TYPE_IGNORED",
95
-  "0:252150532d41646f62652d:PostScript:CL_TYPE_IGNORED",
96
-  "0:3026b2758e66cf:WMA/WMV/ASF:CL_TYPE_IGNORED",
97
-  "0:2e524d46:Real Media File:CL_TYPE_IGNORED",
98
-  "*:0a46726f6d3a20{-2048}0a436f6e74656e742d547970653a20:Mail file:CL_TYPE_MAIL",
99
-  "*:0a52656365697665643a20{-2048}0a436f6e74656e742d547970653a20:Mail file:CL_TYPE_MAIL",
100
-  "*:0a52656365697665643a20{-2048}0a436f6e74656e742d747970653a20:Mail file:CL_TYPE_MAIL",
101
-  "*:4d494d452d56657273696f6e3a20{-2048}0a436f6e74656e742d547970653a20:Mail file:CL_TYPE_MAIL",
102
-  "*:3c4120*(68|48)(72|52)6566:HTML data:CL_TYPE_HTML",
103
-  "*:3c6120*(68|48)(72|52)6566:HTML data:CL_TYPE_HTML",
104
-  "*:3c6120*(68|48)(72|52)4546:HTML data:CL_TYPE_HTML",
105
-  "*:3c4120*(68|48)(72|52)4546:HTML data:CL_TYPE_HTML",
106
-  "*:3c68746d6c3e:HTML data:CL_TYPE_HTML",
107
-  "*:3c48544d4c3e:HTML data:CL_TYPE_HTML",
108
-  "*:3c48746d6c3e:HTML data:CL_TYPE_HTML",
109
-  "*:3c686561643e:HTML data:CL_TYPE_HTML",
110
-  "*:3c484541443e:HTML data:CL_TYPE_HTML",
111
-  "*:3c486561643e:HTML data:CL_TYPE_HTML",
112
-  "*:3c696d67:HTML data:CL_TYPE_HTML",
113
-  "*:3c494d47:HTML data:CL_TYPE_HTML",
114
-  "*:3c496d67:HTML data:CL_TYPE_HTML",
115
-  "*:3c736372697074:HTML data:CL_TYPE_HTML",
116
-  "*:3c536372697074:HTML data:CL_TYPE_HTML",
117
-  "*:3c534352495054:HTML data:CL_TYPE_HTML",
118
-  "*:3c6f626a656374:HTML data:CL_TYPE_HTML",
119
-  "*:3c4f626a656374:HTML data:CL_TYPE_HTML",
120
-  "*:3c4f424a454354:HTML data:CL_TYPE_HTML",
121
-  "*:3c696672616d65:HTML data:CL_TYPE_HTML",
122
-  "*:3c494652414d45:HTML data:CL_TYPE_HTML",
123
-  "*:3c7461626c65:HTML data:CL_TYPE_HTML",
124
-  "*:3c5441424c45:HTML data:CL_TYPE_HTML",
125
-  "*:526172211a0700:RAR-SFX:CL_TYPE_RARSFX",
126
-  "*:504b0304:ZIP-SFX:CL_TYPE_ZIPSFX",
127
-  "*:4d534346:CAB-SFX:CL_TYPE_CABSFX",
128
-  "*:60ea{7}0002:ARJ-SFX:CL_TYPE_ARJSFX",
129
-  "*:60ea{7}0102:ARJ-SFX:CL_TYPE_ARJSFX",
130
-  "*:60ea{7}0202:ARJ-SFX:CL_TYPE_ARJSFX",
131
-  "*:efbeadde4e756c6c736f6674496e7374:NSIS:CL_TYPE_NULSFT",
132
-  "*:a3484bbe986c4aa9994c530a86d6487d41553321454130(35|36):AUTOIT:CL_TYPE_AUTOIT",
133
-  "*:4d5a{60-300}50450000:PE:CL_TYPE_MSEXE",
36
+  "0:0:4d5a:MS-EXE/DLL:CL_TYPE_ANY:CL_TYPE_MSEXE",
37
+  "0:0:7f454c46:ELF:CL_TYPE_ANY:CL_TYPE_ELF",
38
+  "0:0:52617221:RAR:CL_TYPE_ANY:CL_TYPE_RAR",
39
+  "0:0:504b0304:ZIP:CL_TYPE_ANY:CL_TYPE_ZIP",
40
+  "0:0:504b3030504b0304:ZIP:CL_TYPE_ANY:CL_TYPE_ZIP",
41
+  "0:0:1f8b:GZip:CL_TYPE_ANY:CL_TYPE_GZ",
42
+  "0:0:425a68:BZip:CL_TYPE_ANY:CL_TYPE_BZ",
43
+  "0:0:60ea:ARJ:CL_TYPE_ANY:CL_TYPE_ARJ",
44
+  "0:0:535a4444:compress.exe'd:CL_TYPE_ANY:CL_TYPE_MSSZDD",
45
+  "0:0:4d534346:MS CAB:CL_TYPE_ANY:CL_TYPE_MSCAB",
46
+  "0:0:49545346:MS CHM:CL_TYPE_ANY:CL_TYPE_MSCHM",
47
+  "0:8:19040010:SIS:CL_TYPE_ANY:CL_TYPE_SIS",
48
+  "0:0:23407e5e:SCRENC:CL_TYPE_ANY:CL_TYPE_SCRENC",
49
+  "0:0:28546869732066696c65206d75737420626520636f6e76657274656420776974682042696e48657820342e3029:BinHex:CL_TYPE_ANY:CL_TYPE_BINHEX",
50
+  "0:0:46726f6d20:MBox:CL_TYPE_ANY:CL_TYPE_MAIL",
51
+  "0:0:52656365697665643a20:Raw mail:CL_TYPE_ANY:CL_TYPE_MAIL",
52
+  "0:0:52657475726e2d506174683a20:Maildir:CL_TYPE_ANY:CL_TYPE_MAIL",
53
+  "0:0:52657475726e2d706174683a20:Maildir:CL_TYPE_ANY:CL_TYPE_MAIL",
54
+  "0:0:44656c6976657265642d546f3a20:Mail:CL_TYPE_ANY:CL_TYPE_MAIL",
55
+  "0:0:582d5549444c3a20:Mail:CL_TYPE_ANY:CL_TYPE_MAIL",
56
+  "0:0:582d4170706172656e746c792d546f3a20:Mail:CL_TYPE_ANY:CL_TYPE_MAIL",
57
+  "0:0:582d456e76656c6f70652d46726f6d3a20:Mail:CL_TYPE_ANY:CL_TYPE_MAIL",
58
+  "0:0:582d4f726967696e616c2d546f3a20:Mail:CL_TYPE_ANY:CL_TYPE_MAIL",
59
+  "0:0:582d53796d616e7465632d:Symantec:CL_TYPE_ANY:CL_TYPE_MAIL",
60
+  "0:0:582d455653:EVS mail:CL_TYPE_ANY:CL_TYPE_MAIL",
61
+  "0:0:582d5265616c2d546f3a20:Mail:CL_TYPE_ANY:CL_TYPE_MAIL",
62
+  "0:0:582d53696576653a20:Mail:CL_TYPE_ANY:CL_TYPE_MAIL",
63
+  "0:0:3e46726f6d20:Mail:CL_TYPE_ANY:CL_TYPE_MAIL",
64
+  "0:0:446174653a20:Mail:CL_TYPE_ANY:CL_TYPE_MAIL",
65
+  "0:0:4d6573736167652d49643a20:Mail:CL_TYPE_ANY:CL_TYPE_MAIL",
66
+  "0:0:4d6573736167652d49443a20:Mail:CL_TYPE_ANY:CL_TYPE_MAIL",
67
+  "0:0:456e76656c6f70652d746f3a20:Mail:CL_TYPE_ANY:CL_TYPE_MAIL",
68
+  "0:0:44656c69766572792d646174653a20:Mail:CL_TYPE_ANY:CL_TYPE_MAIL",
69
+  "0:0:546f3a20:Mail:CL_TYPE_ANY:CL_TYPE_MAIL",
70
+  "0:0:5375626a6563743a20:Mail:CL_TYPE_ANY:CL_TYPE_MAIL",
71
+  "0:0:466f723a20:Eserv mail:CL_TYPE_ANY:CL_TYPE_MAIL",
72
+  "0:0:46726f6d3a20:Exim mail:CL_TYPE_ANY:CL_TYPE_MAIL",
73
+  "0:0:763a0d0a52656365697665643a20:VPOP3 Mail (DOS):CL_TYPE_ANY:CL_TYPE_MAIL",
74
+  "0:0:763a0a52656365697665643a20:VPOP3 Mail (UNIX):CL_TYPE_ANY:CL_TYPE_MAIL",
75
+  "0:0:48692e20546869732069732074686520716d61696c2d73656e64:Qmail bounce:CL_TYPE_ANY:CL_TYPE_MAIL",
76
+  "0:0:789f3e22:TNEF:CL_TYPE_ANY:CL_TYPE_TNEF",
77
+  "0:0:626567696e20:UUencoded:CL_TYPE_ANY:CL_TYPE_UUENCODED",
78
+  "0:0:474946:GIF:CL_TYPE_ANY:CL_TYPE_GRAPHICS",
79
+  "0:0:424d:BMP:CL_TYPE_ANY:CL_TYPE_GRAPHICS",
80
+  "0:0:ffd8ff:JPEG:CL_TYPE_ANY:CL_TYPE_GRAPHICS",
81
+  "0:6:4a464946:JPEG:CL_TYPE_ANY:CL_TYPE_GRAPHICS",
82
+  "0:6:45786966:JPEG:CL_TYPE_ANY:CL_TYPE_GRAPHICS",
83
+  "0:0:89504e47:PNG:CL_TYPE_ANY:CL_TYPE_GRAPHICS",
84
+  "0:0:52494646:RIFF:CL_TYPE_ANY:CL_TYPE_RIFF",
85
+  "0:0:52494658:RIFX:CL_TYPE_ANY:CL_TYPE_RIFF",
86
+  "0:0:d0cf11e0a1b11ae1:OLE2 container:CL_TYPE_ANY:CL_TYPE_MSOLE2",
87
+  "0:0:255044462d:PDF document:CL_TYPE_ANY:CL_TYPE_PDF",
88
+  "0:0:b6b9acaefeffffff:CryptFF:CL_TYPE_ANY:CL_TYPE_CRYPTFF",
89
+  "0:0:7b5c727466:RTF:CL_TYPE_ANY:CL_TYPE_RTF",
90
+  "0:0:000001b3:MPEG video stream:CL_TYPE_ANY:CL_TYPE_IGNORED",
91
+  "0:0:000001ba:MPEG sys stream:CL_TYPE_ANY:CL_TYPE_IGNORED",
92
+  "0:0:4f676753:Ogg Stream:CL_TYPE_ANY:CL_TYPE_IGNORED",
93
+  "0:0:494433:MP3:CL_TYPE_ANY:CL_TYPE_IGNORED",
94
+  "0:0:fffb90:MP3:CL_TYPE_ANY:CL_TYPE_IGNORED",
95
+  "0:0:252150532d41646f62652d:PostScript:CL_TYPE_ANY:CL_TYPE_IGNORED",
96
+  "0:0:3026b2758e66cf:WMA/WMV/ASF:CL_TYPE_ANY:CL_TYPE_IGNORED",
97
+  "0:0:2e524d46:Real Media File:CL_TYPE_ANY:CL_TYPE_IGNORED",
98
+  "1:*:0a46726f6d3a20{-2048}0a436f6e74656e742d547970653a20:Mail file:CL_TYPE_ANY:CL_TYPE_MAIL",
99
+  "1:*:0a52656365697665643a20{-2048}0a436f6e74656e742d547970653a20:Mail file:CL_TYPE_ANY:CL_TYPE_MAIL",
100
+  "1:*:0a52656365697665643a20{-2048}0a436f6e74656e742d747970653a20:Mail file:CL_TYPE_ANY:CL_TYPE_MAIL",
101
+  "1:*:4d494d452d56657273696f6e3a20{-2048}0a436f6e74656e742d547970653a20:Mail file:CL_TYPE_ANY:CL_TYPE_MAIL",
102
+  "1:*:3c4120*(68|48)(72|52)6566:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
103
+  "1:*:3c6120*(68|48)(72|52)6566:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
104
+  "1:*:3c6120*(68|48)(72|52)4546:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
105
+  "1:*:3c4120*(68|48)(72|52)4546:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
106
+  "1:*:3c68746d6c3e:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
107
+  "1:*:3c48544d4c3e:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
108
+  "1:*:3c48746d6c3e:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
109
+  "1:*:3c686561643e:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
110
+  "1:*:3c484541443e:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
111
+  "1:*:3c486561643e:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
112
+  "1:*:3c696d67:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
113
+  "1:*:3c494d47:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
114
+  "1:*:3c496d67:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
115
+  "1:*:3c736372697074:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
116
+  "1:*:3c536372697074:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
117
+  "1:*:3c534352495054:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
118
+  "1:*:3c6f626a656374:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
119
+  "1:*:3c4f626a656374:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
120
+  "1:*:3c4f424a454354:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
121
+  "1:*:3c696672616d65:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
122
+  "1:*:3c494652414d45:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
123
+  "1:*:3c7461626c65:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
124
+  "1:*:3c5441424c45:HTML data:CL_TYPE_ANY:CL_TYPE_HTML",
125
+  "1:*:526172211a0700:RAR-SFX:CL_TYPE_ANY:CL_TYPE_RARSFX",
126
+  "1:*:504b0304:ZIP-SFX:CL_TYPE_ANY:CL_TYPE_ZIPSFX",
127
+  "1:*:4d534346:CAB-SFX:CL_TYPE_ANY:CL_TYPE_CABSFX",
128
+  "1:*:60ea{7}0002:ARJ-SFX:CL_TYPE_ANY:CL_TYPE_ARJSFX",
129
+  "1:*:60ea{7}0102:ARJ-SFX:CL_TYPE_ANY:CL_TYPE_ARJSFX",
130
+  "1:*:60ea{7}0202:ARJ-SFX:CL_TYPE_ANY:CL_TYPE_ARJSFX",
131
+  "1:*:efbeadde4e756c6c736f6674496e7374:NSIS:CL_TYPE_ANY:CL_TYPE_NULSFT",
132
+  "1:*:a3484bbe986c4aa9994c530a86d6487d41553321454130(35|36):AUTOIT:CL_TYPE_ANY:CL_TYPE_AUTOIT",
133
+  "1:*:4d5a{60-300}50450000:PE:CL_TYPE_ANY:CL_TYPE_MSEXE",
134 134
   NULL
135 135
 };
136 136
 
... ...
@@ -568,7 +568,7 @@ inline static int ac_addtype(struct cli_matched_type **list, cli_file_t type, of
568 568
     return CL_SUCCESS;
569 569
 }
570 570
 
571
-int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, uint8_t otfrec, uint32_t offset, cli_file_t ftype, int fd, struct cli_matched_type **ftoffset)
571
+int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, uint32_t offset, cli_file_t ftype, int fd, struct cli_matched_type **ftoffset, unsigned int mode)
572 572
 {
573 573
 	struct cli_ac_node *current;
574 574
 	struct cli_ac_patt *patt, *pt;
... ...
@@ -605,6 +605,12 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
605 605
 		if(ac_findmatch(buffer, bp, length, patt, &matchend)) {
606 606
 		    pt = patt;
607 607
 		    while(pt) {
608
+
609
+			if((pt->type && !(mode & AC_SCAN_FT)) || (!pt->type && !(mode & AC_SCAN_VIR))) {
610
+			    pt = pt->next_same;
611
+			    continue;
612
+			}
613
+
608 614
 			realoff = offset + bp - pt->prefix_length;
609 615
 
610 616
 			if((pt->offset || pt->target) && (!pt->sigid || pt->partno == 1)) {
... ...
@@ -670,25 +676,31 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
670 670
 				    offmatrix[pt->parts - 1][offmatrix[pt->partno - 1][0]] = realoff;
671 671
 			    } else if(found && pt->partno == pt->parts) {
672 672
 				if(pt->type) {
673
-				    if(otfrec) {
674
-					if(pt->type > type || pt->type >= CL_TYPE_SFX || pt->type == CL_TYPE_MSEXE) {
675
-					    cli_dbgmsg("Matched signature for file type %s\n", pt->virname);
676
-					    type = pt->type;
677
-					    if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ) && ((ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP) && type == CL_TYPE_MSEXE)))  {
678
-						/* FIXME: we don't know which offset of the first part is the correct one */
679
-						for(j = 1; j <= AC_DEFAULT_TRACKLEN && offmatrix[0][j] != -1; j++) {
680
-						    if(ac_addtype(ftoffset, type, offmatrix[pt->parts - 1][j])) {
681
-							if(info.exeinfo.section)
682
-							    free(info.exeinfo.section);
683
-							return CL_EMEM;
684
-						    }
673
+
674
+				    if(pt->type == CL_TYPE_IGNORED && (!pt->rtype || ftype == pt->rtype)) {
675
+					if(info.exeinfo.section)
676
+					    free(info.exeinfo.section);
677
+
678
+					return CL_TYPE_IGNORED;
679
+				    }
680
+
681
+				    if((pt->type > type || pt->type >= CL_TYPE_SFX || pt->type == CL_TYPE_MSEXE) && (!pt->rtype || ftype == pt->rtype)) {
682
+					cli_dbgmsg("Matched signature for file type %s\n", pt->virname);
683
+					type = pt->type;
684
+					if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ) && ((ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP) && type == CL_TYPE_MSEXE)))  {
685
+					    /* FIXME: we don't know which offset of the first part is the correct one */
686
+					    for(j = 1; j <= AC_DEFAULT_TRACKLEN && offmatrix[0][j] != -1; j++) {
687
+						if(ac_addtype(ftoffset, type, offmatrix[pt->parts - 1][j])) {
688
+						    if(info.exeinfo.section)
689
+							free(info.exeinfo.section);
690
+						    return CL_EMEM;
685 691
 						}
686 692
 					    }
687
-
688
-					    memset(offmatrix[0], -1, pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t));
689
-					    for(j = 0; j < pt->parts; j++)
690
-						offmatrix[j][0] = 0;
691 693
 					}
694
+
695
+					memset(offmatrix[0], -1, pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t));
696
+					for(j = 0; j < pt->parts; j++)
697
+					    offmatrix[j][0] = 0;
692 698
 				    }
693 699
 
694 700
 				} else { /* !pt->type */
... ...
@@ -704,17 +716,21 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
704 704
 
705 705
 			} else { /* old type signature */
706 706
 			    if(pt->type) {
707
-				if(otfrec) {
708
-				    if(pt->type > type || pt->type >= CL_TYPE_SFX || pt->type == CL_TYPE_MSEXE) {
709
-					cli_dbgmsg("Matched signature for file type %s at %u\n", pt->virname, realoff);
710
-					type = pt->type;
711
-					if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ) && ((ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP) && type == CL_TYPE_MSEXE)))  {
707
+				if(pt->type == CL_TYPE_IGNORED && (!pt->rtype || ftype == pt->rtype)) {
708
+				    if(info.exeinfo.section)
709
+					free(info.exeinfo.section);
712 710
 
713
-					    if(ac_addtype(ftoffset, type, realoff)) {
714
-						if(info.exeinfo.section)
715
-						    free(info.exeinfo.section);
716
-						return CL_EMEM;
717
-					    }
711
+				    return CL_TYPE_IGNORED;
712
+				}
713
+				if((pt->type > type || pt->type >= CL_TYPE_SFX || pt->type == CL_TYPE_MSEXE) && (!pt->rtype || ftype == pt->rtype)) {
714
+				    cli_dbgmsg("Matched signature for file type %s at %u\n", pt->virname, realoff);
715
+				    type = pt->type;
716
+				    if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ) && ((ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP) && type == CL_TYPE_MSEXE)))  {
717
+
718
+					if(ac_addtype(ftoffset, type, realoff)) {
719
+					    if(info.exeinfo.section)
720
+						free(info.exeinfo.section);
721
+					    return CL_EMEM;
718 722
 					}
719 723
 				    }
720 724
 				}
... ...
@@ -738,11 +754,11 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
738 738
     if(info.exeinfo.section)
739 739
 	free(info.exeinfo.section);
740 740
 
741
-    return otfrec ? type : CL_CLEAN;
741
+    return (mode & AC_SCAN_FT) ? type : CL_CLEAN;
742 742
 }
743 743
 
744 744
 /* FIXME: clean up the code */
745
-int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hexsig, uint32_t sigid, uint16_t parts, uint16_t partno, uint16_t type, uint32_t mindist, uint32_t maxdist, const char *offset, uint8_t target)
745
+int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hexsig, uint32_t sigid, uint16_t parts, uint16_t partno, uint16_t rtype, uint16_t type, uint32_t mindist, uint32_t maxdist, const char *offset, uint8_t target)
746 746
 {
747 747
 	struct cli_ac_patt *new;
748 748
 	char *pt, *pt2, *hex = NULL, *hexcpy = NULL;
... ...
@@ -758,6 +774,7 @@ int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hex
758 758
     if((new = (struct cli_ac_patt *) cli_calloc(1, sizeof(struct cli_ac_patt))) == NULL)
759 759
 	return CL_EMEM;
760 760
 
761
+    new->rtype = rtype;
761 762
     new->type = type;
762 763
     new->sigid = sigid;
763 764
     new->parts = parts;
... ...
@@ -33,6 +33,9 @@
33 33
 #define AC_CH_MAXDIST 32
34 34
 extern uint8_t cli_ac_mindepth, cli_ac_maxdepth;
35 35
 
36
+#define AC_SCAN_VIR 1
37
+#define AC_SCAN_FT  2
38
+
36 39
 struct cli_ac_data {
37 40
     int32_t ***offmatrix;
38 41
     uint32_t partsigs;
... ...
@@ -58,7 +61,7 @@ struct cli_ac_patt {
58 58
     struct cli_ac_patt *next, *next_same;
59 59
     uint8_t depth;
60 60
     uint8_t target;
61
-    uint16_t type;
61
+    uint16_t rtype, type;
62 62
 };
63 63
 
64 64
 struct cli_ac_node {
... ...
@@ -72,11 +75,11 @@ struct cli_ac_node {
72 72
 int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern);
73 73
 int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint8_t tracklen);
74 74
 void cli_ac_freedata(struct cli_ac_data *data);
75
-int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, uint8_t otfrec, uint32_t offset, cli_file_t ftype, int fd, struct cli_matched_type **ftoffset);
75
+int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, uint32_t offset, cli_file_t ftype, int fd, struct cli_matched_type **ftoffset, unsigned int mode);
76 76
 int cli_ac_buildtrie(struct cli_matcher *root);
77 77
 int cli_ac_init(struct cli_matcher *root, uint8_t mindepth, uint8_t maxdepth);
78 78
 void cli_ac_free(struct cli_matcher *root);
79
-int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hexsig, uint32_t sigid, uint16_t parts, uint16_t partno, uint16_t type, uint32_t mindist, uint32_t maxdist, const char *offset, uint8_t target);
79
+int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hexsig, uint32_t sigid, uint16_t parts, uint16_t partno, uint16_t rtype, uint16_t type, uint32_t mindist, uint32_t maxdist, const char *offset, uint8_t target);
80 80
 void cli_ac_setdepth(uint8_t mindepth, uint8_t maxdepth);
81 81
 
82 82
 #endif
... ...
@@ -77,7 +77,7 @@ int cli_scanbuff(const unsigned char *buffer, uint32_t length, cli_ctx *ctx, cli
77 77
 	    return ret;
78 78
 
79 79
 	if(troot->ac_only || (ret = cli_bm_scanbuff(buffer, length, virname, troot, 0, ftype, -1)) != CL_VIRUS)
80
-	    ret = cli_ac_scanbuff(buffer, length, virname, troot, &mdata, 0, 0, ftype, -1, NULL);
80
+	    ret = cli_ac_scanbuff(buffer, length, virname, troot, &mdata, 0, ftype, -1, NULL, AC_SCAN_VIR);
81 81
 
82 82
 	cli_ac_freedata(&mdata);
83 83
 
... ...
@@ -89,7 +89,7 @@ int cli_scanbuff(const unsigned char *buffer, uint32_t length, cli_ctx *ctx, cli
89 89
 	return ret;
90 90
 
91 91
     if(groot->ac_only || (ret = cli_bm_scanbuff(buffer, length, virname, groot, 0, ftype, -1)) != CL_VIRUS)
92
-	ret = cli_ac_scanbuff(buffer, length, virname, groot, &mdata, 0, 0, ftype, -1, NULL);
92
+	ret = cli_ac_scanbuff(buffer, length, virname, groot, &mdata, 0, ftype, -1, NULL, AC_SCAN_VIR);
93 93
 
94 94
     cli_ac_freedata(&mdata);
95 95
 
... ...
@@ -249,7 +249,7 @@ int cli_validatesig(cli_file_t ftype, const char *offstr, off_t fileoff, struct
249 249
     return 1;
250 250
 }
251 251
 
252
-int cli_scandesc(int desc, cli_ctx *ctx, uint8_t otfrec, cli_file_t ftype, uint8_t ftonly, struct cli_matched_type **ftoffset)
252
+int cli_scandesc(int desc, cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli_matched_type **ftoffset, unsigned int acmode)
253 253
 {
254 254
  	unsigned char *buffer, *buff, *endbl, *upt;
255 255
 	int ret = CL_CLEAN, type = CL_CLEAN, i, bytes;
... ...
@@ -325,7 +325,7 @@ int cli_scandesc(int desc, cli_ctx *ctx, uint8_t otfrec, cli_file_t ftype, uint8
325 325
 
326 326
 	if(troot) {
327 327
 	    if(troot->ac_only || (ret = cli_bm_scanbuff(upt, length, ctx->virname, troot, offset, ftype, desc)) != CL_VIRUS)
328
-		ret = cli_ac_scanbuff(upt, length, ctx->virname, troot, &tdata, otfrec, offset, ftype, desc, ftoffset);
328
+		ret = cli_ac_scanbuff(upt, length, ctx->virname, troot, &tdata, offset, ftype, desc, ftoffset, acmode);
329 329
 
330 330
 	    if(ret == CL_VIRUS) {
331 331
 		free(buffer);
... ...
@@ -343,7 +343,7 @@ int cli_scandesc(int desc, cli_ctx *ctx, uint8_t otfrec, cli_file_t ftype, uint8
343 343
 
344 344
 	if(!ftonly) {
345 345
 	    if(groot->ac_only || (ret = cli_bm_scanbuff(upt, length, ctx->virname, groot, offset, ftype, desc)) != CL_VIRUS)
346
-		ret = cli_ac_scanbuff(upt, length, ctx->virname, groot, &gdata, otfrec, offset, ftype, desc, ftoffset);
346
+		ret = cli_ac_scanbuff(upt, length, ctx->virname, groot, &gdata, offset, ftype, desc, ftoffset, acmode);
347 347
 
348 348
 	    if(ret == CL_VIRUS) {
349 349
 		free(buffer);
... ...
@@ -356,7 +356,7 @@ int cli_scandesc(int desc, cli_ctx *ctx, uint8_t otfrec, cli_file_t ftype, uint8
356 356
 		else
357 357
 		    return CL_VIRUS;
358 358
 
359
-	    } else if(otfrec && ret >= CL_TYPENO) {
359
+	    } else if((acmode & AC_SCAN_FT) && ret >= CL_TYPENO) {
360 360
 		if(ret > type)
361 361
 		    type = ret;
362 362
 	    }
... ...
@@ -394,5 +394,5 @@ int cli_scandesc(int desc, cli_ctx *ctx, uint8_t otfrec, cli_file_t ftype, uint8
394 394
 	    return CL_VIRUS;
395 395
     }
396 396
 
397
-    return otfrec ? type : CL_CLEAN;
397
+    return (acmode & AC_SCAN_FT) ? type : CL_CLEAN;
398 398
 }
... ...
@@ -94,7 +94,7 @@ struct cli_target_info {
94 94
 
95 95
 int cli_scanbuff(const unsigned char *buffer, uint32_t length, cli_ctx *ctx, cli_file_t ftype);
96 96
 
97
-int cli_scandesc(int desc, cli_ctx *ctx, uint8_t otfrec, cli_file_t ftype, uint8_t ftonly, struct cli_matched_type **ftoffset);
97
+int cli_scandesc(int desc, cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli_matched_type **ftoffset, unsigned int acmode);
98 98
 
99 99
 int cli_validatesig(cli_file_t ftype, const char *offstr, off_t fileoff, struct cli_target_info *info, int desc, const char *virname);
100 100
 
... ...
@@ -537,7 +537,7 @@ int cli_scannulsft(int desc, cli_ctx *ctx, off_t offset) {
537 537
 	  cli_dbgmsg("NSIS: Successully extracted file #%u\n", nsist.fno);
538 538
 	  lseek(nsist.ofd, 0, SEEK_SET);
539 539
 	  if(nsist.fno == 1)
540
-	    ret=cli_scandesc(nsist.ofd, ctx, 0, 0, 0, NULL);
540
+	    ret=cli_scandesc(nsist.ofd, ctx, 0, 0, NULL, AC_SCAN_VIR);
541 541
 	  else
542 542
 	    ret=cli_magic_scandesc(nsist.ofd, ctx);
543 543
 	  close(nsist.ofd);
... ...
@@ -93,7 +93,7 @@ struct cli_ignored {
93 93
 int cl_loaddb(const char *filename, struct cl_engine **engine, unsigned int *signo);
94 94
 int cl_loaddbdir(const char *dirname, struct cl_engine **engine, unsigned int *signo);
95 95
 
96
-int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hexsig, unsigned short type, const char *offset, unsigned short target)
96
+int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hexsig, uint16_t rtype, uint16_t type, const char *offset, uint8_t target)
97 97
 {
98 98
 	struct cli_bm_patt *bm_new;
99 99
 	char *pt, *hexcpy, *start, *n;
... ...
@@ -136,7 +136,7 @@ int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hex
136 136
 		*pt++ = 0;
137 137
 	    }
138 138
 
139
-	    if((ret = cli_ac_addsig(root, virname, start, root->ac_partsigs, parts, i, type, mindist, maxdist, offset, target))) {
139
+	    if((ret = cli_ac_addsig(root, virname, start, root->ac_partsigs, parts, i, rtype, type, mindist, maxdist, offset, target))) {
140 140
 		cli_errmsg("cli_parse_add(): Problem adding signature (1).\n");
141 141
 		error = 1;
142 142
 		break;
... ...
@@ -216,7 +216,7 @@ int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hex
216 216
 		return CL_EMALFDB;
217 217
 	    }
218 218
 
219
-	    if((ret = cli_ac_addsig(root, virname, pt, root->ac_partsigs, parts, i, type, 0, 0, offset, target))) {
219
+	    if((ret = cli_ac_addsig(root, virname, pt, root->ac_partsigs, parts, i, rtype, type, 0, 0, offset, target))) {
220 220
 		cli_errmsg("cli_parse_add(): Problem adding signature (2).\n");
221 221
 		free(pt);
222 222
 		return ret;
... ...
@@ -226,7 +226,7 @@ int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hex
226 226
 	}
227 227
 
228 228
     } else if(root->ac_only || strpbrk(hexsig, "?(") || type) {
229
-	if((ret = cli_ac_addsig(root, virname, hexsig, 0, 0, 0, type, 0, 0, offset, target))) {
229
+	if((ret = cli_ac_addsig(root, virname, hexsig, 0, 0, 0, rtype, type, 0, 0, offset, target))) {
230 230
 	    cli_errmsg("cli_parse_add(): Problem adding signature (3).\n");
231 231
 	    return ret;
232 232
 	}
... ...
@@ -449,7 +449,7 @@ static int cli_loaddb(FILE *fs, struct cl_engine **engine, unsigned int *signo,
449 449
 
450 450
 	if(*pt == '=') continue;
451 451
 
452
-	if((ret = cli_parse_add(root, start, pt, 0, NULL, 0))) {
452
+	if((ret = cli_parse_add(root, start, pt, 0, 0, NULL, 0))) {
453 453
 	    ret = CL_EMALFDB;
454 454
 	    break;
455 455
 	}
... ...
@@ -628,7 +628,7 @@ static int cli_loadndb(FILE *fs, struct cl_engine **engine, unsigned int *signo,
628 628
 	    break;
629 629
 	}
630 630
 
631
-	if((ret = cli_parse_add(root, virname, sig, 0, offset, target))) {
631
+	if((ret = cli_parse_add(root, virname, sig, 0, 0, offset, target))) {
632 632
 	    ret = CL_EMALFDB;
633 633
 	    break;
634 634
 	}
... ...
@@ -658,14 +658,14 @@ static int cli_loadndb(FILE *fs, struct cl_engine **engine, unsigned int *signo,
658 658
     return CL_SUCCESS;
659 659
 }
660 660
 
661
-#define FT_TOKENS 4
662
-static int cli_loadft(FILE *fs, struct cl_engine **engine, unsigned int options, unsigned int internal, gzFile *gzs, unsigned int gzrsize)
661
+#define FTM_TOKENS 8	
662
+static int cli_loadftm(FILE *fs, struct cl_engine **engine, unsigned int options, unsigned int internal, gzFile *gzs, unsigned int gzrsize)
663 663
 {
664
-	const char *tokens[FT_TOKENS];
664
+	const char *tokens[FTM_TOKENS], *pt;
665 665
 	char buffer[FILEBUFF];
666
-	unsigned int line = 0;
666
+	unsigned int line = 0, sigs = 0;
667 667
 	struct cli_ftype *new;
668
-	cli_file_t type;
668
+	cli_file_t rtype, type;
669 669
 	int ret;
670 670
 
671 671
 
... ...
@@ -690,40 +690,60 @@ static int cli_loadft(FILE *fs, struct cl_engine **engine, unsigned int options,
690 690
 	    cli_chomp(buffer);
691 691
 	}
692 692
 	line++;
693
-	cli_strtokenize(buffer, ':', FT_TOKENS, tokens);
693
+	cli_strtokenize(buffer, ':', FTM_TOKENS, tokens);
694 694
 
695
-	if(!tokens[0] || !tokens[1] || !tokens[2] || !tokens[3]) {
695
+	if(!tokens[0] || !tokens[1] || !tokens[2] || !tokens[3] || !tokens[4] || !tokens[5]) {
696 696
 	    ret = CL_EMALFDB;
697 697
 	    break;
698 698
 	}
699 699
 
700
-	type = cli_ftcode(tokens[3]);
701
-	if(type == CL_TYPE_ERROR) {
700
+	if((pt = tokens[6])) { /* min version */
701
+	    if(!cli_isnumber(pt)) {
702
+		ret = CL_EMALFDB;
703
+		break;
704
+	    }
705
+	    if((unsigned int) atoi(pt) > cl_retflevel()) {
706
+		cli_dbgmsg("cli_loadftm: File type signature for %s not loaded (required f-level: %u)\n", tokens[3], atoi(pt));
707
+		continue;
708
+	    }
709
+	    if((pt = tokens[7])) { /* max version */
710
+		if(!cli_isnumber(pt)) {
711
+		    ret = CL_EMALFDB;
712
+		    break;
713
+		}
714
+		if((unsigned int) atoi(pt) < cl_retflevel())
715
+		    continue;
716
+	    }
717
+	}
718
+
719
+	rtype = cli_ftcode(tokens[4]);
720
+	type = cli_ftcode(tokens[5]);
721
+	if(rtype == CL_TYPE_ERROR || type == CL_TYPE_ERROR) {
702 722
 	    ret = CL_EMALFDB;
703 723
 	    break;
704 724
 	}
705 725
 
706
-	if(*tokens[0] == '*') {
707
-	    if((ret = cli_parse_add((*engine)->root[0], tokens[2], tokens[1], type, NULL, 0)))
726
+	if(atoi(tokens[0]) == 1) { /* A-C */
727
+	    if((ret = cli_parse_add((*engine)->root[0], tokens[3], tokens[2], rtype, type, strcmp(tokens[1], "*") ? tokens[1] : NULL, 0)))
708 728
 		break;
709 729
 
710
-	} else {
730
+	} else if(atoi(tokens[0]) == 0) { /* memcmp() */
711 731
 	    new = (struct cli_ftype *) cli_malloc(sizeof(struct cli_ftype));
712 732
 	    if(!new) {
713 733
 		ret = CL_EMEM;
714 734
 		break;
715 735
 	    }
716 736
 	    new->type = type;
717
-	    new->offset = atoi(tokens[0]);
718
-	    new->magic = (unsigned char *) cli_hex2str(tokens[1]);
737
+	    new->offset = atoi(tokens[1]);
738
+	    new->magic = (unsigned char *) cli_hex2str(tokens[2]);
719 739
 	    if(!new->magic) {
720
-		cli_errmsg("cli_loadft: Can't decode the hex string\n");
740
+		cli_errmsg("cli_loadftm: Can't decode the hex string\n");
721 741
 		ret = CL_EMALFDB;
722 742
 		free(new);
723 743
 		break;
724 744
 	    }
725
-	    new->length = strlen(tokens[1]) / 2;
726
-	    new->tname = cli_strdup(tokens[2]);
745
+	    new->length = strlen(tokens[2]) / 2;
746
+	    new->tname = cli_strdup(tokens[3]);
727 747
 	    if(!new->tname) {
728 748
 		free(new->magic);
729 749
 		free(new);
... ...
@@ -732,22 +752,27 @@ static int cli_loadft(FILE *fs, struct cl_engine **engine, unsigned int options,
732 732
 	    }
733 733
 	    new->next = (*engine)->ftypes;
734 734
 	    (*engine)->ftypes = new;
735
+
736
+	} else {
737
+	    cli_dbgmsg("cli_loadftm: Unsupported mode %u\n", atoi(tokens[0]));
738
+	    continue;
735 739
 	}
740
+	sigs++;
736 741
     }
737 742
 
738
-    if(!line) {
739
-	cli_errmsg("Empty %s filetype database\n", internal ? "built-in" : ".ft");
743
+    if(ret) {
744
+	cli_errmsg("Problem parsing %s filetype database at line %u\n", internal ? "built-in" : "external", line);
740 745
 	cl_free(*engine);
741
-	return CL_EMALFDB;
746
+	return ret;
742 747
     }
743 748
 
744
-    if(ret) {
745
-	cli_errmsg("Problem parsing %s filetype database at line %u\n", internal ? "built-in" : ".ft", line);
749
+    if(!sigs) {
750
+	cli_errmsg("Empty %s filetype database\n", internal ? "built-in" : "external");
746 751
 	cl_free(*engine);
747
-	return ret;
752
+	return CL_EMALFDB;
748 753
     }
749 754
 
750
-    cli_dbgmsg("Loaded %u filetype definitions\n", line);
755
+    cli_dbgmsg("Loaded %u filetype definitions\n", sigs);
751 756
     return CL_SUCCESS;
752 757
 }
753 758
 
... ...
@@ -1277,8 +1302,8 @@ int cli_load(const char *filename, struct cl_engine **engine, unsigned int *sign
1277 1277
 	    ret = cli_loadpdb(fs, engine, options, gzs, gzrsize);
1278 1278
 	} else
1279 1279
 	    skipped = 1;
1280
-    } else if(cli_strbcasestr(dbname, ".ft")) {
1281
-	ret = cli_loadft(fs, engine, options, 0, gzs, gzrsize);
1280
+    } else if(cli_strbcasestr(dbname, ".ftm")) {
1281
+	ret = cli_loadftm(fs, engine, options, 0, gzs, gzrsize);
1282 1282
 
1283 1283
     } else if(cli_strbcasestr(dbname, ".ign")) {
1284 1284
 	ret = cli_loadign(fs, engine, options, gzs, gzrsize);
... ...
@@ -1757,7 +1782,7 @@ int cl_build(struct cl_engine *engine)
1757 1757
 	return CL_ENULLARG;
1758 1758
 
1759 1759
     if(!engine->ftypes)
1760
-	if((ret = cli_loadft(NULL, &engine, 0, 1, NULL, 0)))
1760
+	if((ret = cli_loadftm(NULL, &engine, 0, 1, NULL, 0)))
1761 1761
 	    return ret;
1762 1762
 
1763 1763
     for(i = 0; i < CLI_MTARGETS; i++) {
... ...
@@ -24,6 +24,7 @@
24 24
 #include "clamav.h"
25 25
 #include "matcher.h"
26 26
 #include "str.h"
27
+#include "cltypes.h"
27 28
 
28 29
 #define CLI_DBEXT(ext)				\
29 30
     (						\
... ...
@@ -42,14 +43,14 @@
42 42
 	cli_strbcasestr(ext, ".rmd")   ||	\
43 43
 	cli_strbcasestr(ext, ".pdb")   ||	\
44 44
 	cli_strbcasestr(ext, ".wdb")   ||	\
45
-	cli_strbcasestr(ext, ".ft")    ||	\
45
+	cli_strbcasestr(ext, ".ftm")   ||	\
46 46
 	cli_strbcasestr(ext, ".ign")   ||	\
47 47
 	cli_strbcasestr(ext, ".cvd")   ||	\
48 48
 	cli_strbcasestr(ext, ".cld")		\
49 49
     )
50 50
 
51 51
 
52
-int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hexsig, unsigned short type, const char *offset, unsigned short target);
52
+int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hexsig, uint16_t rtype, uint16_t type, const char *offset, uint8_t target);
53 53
 
54 54
 int cli_initengine(struct cl_engine **engine, unsigned int options);
55 55
 
... ...
@@ -287,7 +287,7 @@ int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* di
287 287
 
288 288
 			for(i = 0; i < matcher->root_hosts_cnt; i++) {
289 289
 				/* doesn't need to match terminating \0*/
290
-				rc = cli_ac_scanbuff((unsigned char*)buffer,buffer_len,info, &matcher->root_hosts[i] ,&mdata,0,0,0,-1,NULL);
290
+				rc = cli_ac_scanbuff((unsigned char*)buffer,buffer_len,info, &matcher->root_hosts[i] ,&mdata,0,0,-1,NULL,AC_SCAN_VIR);
291 291
 				cli_ac_freedata(&mdata);
292 292
 				if(rc) {
293 293
 					char c;
... ...
@@ -443,6 +443,7 @@ static int add_regex_list_element(struct cli_matcher* root,const char* pattern,c
443 443
 
444 444
        len = strlen(pattern);
445 445
        /* need not to match \0 too */
446
+       new->rtype = 0;
446 447
        new->type = 0;
447 448
        new->sigid = 0;
448 449
        new->parts = 0;
... ...
@@ -165,7 +165,7 @@ static int cli_scandir(const char *dirname, cli_ctx *ctx, cli_file_t container)
165 165
 				    ftype = cli_filetype2(fd, ctx->engine);
166 166
 				    if(ftype >= CL_TYPE_TEXT_ASCII && ftype <= CL_TYPE_TEXT_UTF16BE) {
167 167
 					lseek(fd, 0, SEEK_SET);
168
-					ret = cli_scandesc(fd, ctx, 0, CL_TYPE_MAIL, 0, NULL);
168
+					ret = cli_scandesc(fd, ctx, CL_TYPE_MAIL, 0, NULL, AC_SCAN_VIR);
169 169
 				    }
170 170
 				    close(fd);
171 171
 				    if(ret == CL_VIRUS) {
... ...
@@ -250,7 +250,7 @@ static int cli_unrar_scanmetadata(int desc, unrar_metadata_t *metadata, cli_ctx
250 250
     if(DETECT_ENCRYPTED && metadata->encrypted) {
251 251
 	cli_dbgmsg("RAR: Encrypted files found in archive.\n");
252 252
 	lseek(desc, 0, SEEK_SET);
253
-	ret = cli_scandesc(desc, ctx, 0, 0, 0, NULL);
253
+	ret = cli_scandesc(desc, ctx, 0, 0, NULL, AC_SCAN_VIR);
254 254
 	if(ret != CL_VIRUS) {
255 255
 	    *ctx->virname = "Encrypted.RAR";
256 256
 	    return CL_VIRUS;
... ...
@@ -829,7 +829,7 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx)
829 829
     if (fd >= 0) {
830 830
     	ofd = cli_decode_ole_object(fd, dirname);
831 831
 	if (ofd >= 0) {
832
-		ret = cli_scandesc(ofd, ctx, 0, 0, 0, NULL);
832
+		ret = cli_scandesc(ofd, ctx, 0, 0, NULL, AC_SCAN_VIR);
833 833
 		close(ofd);
834 834
 	}
835 835
 	close(fd);
... ...
@@ -913,7 +913,7 @@ static int cli_scanhtml(int desc, cli_ctx *ctx)
913 913
     snprintf(fullname, 1024, "%s/nocomment.html", tempname);
914 914
     fd = open(fullname, O_RDONLY|O_BINARY);
915 915
     if (fd >= 0) {
916
-	    ret = cli_scandesc(fd, ctx, 0, CL_TYPE_HTML, 0, NULL);
916
+	    ret = cli_scandesc(fd, ctx, CL_TYPE_HTML, 0, NULL, AC_SCAN_VIR);
917 917
 	    close(fd);
918 918
     }
919 919
 
... ...
@@ -923,7 +923,7 @@ static int cli_scanhtml(int desc, cli_ctx *ctx)
923 923
 	    snprintf(fullname, 1024, "%s/notags.html", tempname);
924 924
 	    fd = open(fullname, O_RDONLY|O_BINARY);
925 925
 	    if(fd >= 0) {
926
-		    ret = cli_scandesc(fd, ctx, 0, CL_TYPE_HTML, 0, NULL);
926
+		    ret = cli_scandesc(fd, ctx, CL_TYPE_HTML, 0, NULL, AC_SCAN_VIR);
927 927
 		    close(fd);
928 928
 	    }
929 929
     }
... ...
@@ -1519,20 +1519,19 @@ static int cli_scanembpe(int desc, cli_ctx *ctx)
1519 1519
 static int cli_scanraw(int desc, cli_ctx *ctx, cli_file_t type, uint8_t typercg)
1520 1520
 {
1521 1521
 	int ret = CL_CLEAN, nret = CL_CLEAN;
1522
-	uint8_t ftrec = 0, break_loop = 0;
1523 1522
 	struct cli_matched_type *ftoffset = NULL, *fpt;
1524 1523
 	uint32_t lastzip, lastrar;
1525 1524
 	struct cli_exe_info peinfo;
1525
+	unsigned int acmode = AC_SCAN_VIR, break_loop = 0;
1526 1526
 
1527 1527
 
1528 1528
     if(typercg) switch(type) {
1529 1529
 	case CL_TYPE_TEXT_ASCII:
1530 1530
 	case CL_TYPE_MSEXE:
1531 1531
 	case CL_TYPE_ZIP:
1532
-	    ftrec = 1;
1533
-	    break;
1532
+	    acmode |= AC_SCAN_FT;
1534 1533
 	default:
1535
-	    ftrec = 0;
1534
+	    break;
1536 1535
     }
1537 1536
 
1538 1537
     if(lseek(desc, 0, SEEK_SET) < 0) {
... ...
@@ -1540,7 +1539,7 @@ static int cli_scanraw(int desc, cli_ctx *ctx, cli_file_t type, uint8_t typercg)
1540 1540
 	return CL_EIO;
1541 1541
     }
1542 1542
 
1543
-    ret = cli_scandesc(desc, ctx, ftrec, type == CL_TYPE_TEXT_ASCII ? 0 : type, 0, &ftoffset);
1543
+    ret = cli_scandesc(desc, ctx, type == CL_TYPE_TEXT_ASCII ? 0 : type, 0, &ftoffset, acmode);
1544 1544
 
1545 1545
     if(ret >= CL_TYPENO) {
1546 1546
 
... ...
@@ -1692,7 +1691,7 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
1692 1692
 
1693 1693
     if(!ctx->options) { /* raw mode (stdin, etc.) */
1694 1694
 	cli_dbgmsg("Raw mode: No support for special files\n");
1695
-	if((ret = cli_scandesc(desc, ctx, 0, 0, 0, NULL)) == CL_VIRUS)
1695
+	if((ret = cli_scandesc(desc, ctx, 0, 0, NULL, AC_SCAN_VIR)) == CL_VIRUS)
1696 1696
 	    cli_dbgmsg("%s found in descriptor %d\n", *ctx->virname, desc);
1697 1697
 	return ret;
1698 1698
     }
... ...
@@ -73,7 +73,7 @@ static const struct dblist_s {
73 73
     { "COPYING",    0 },
74 74
     { "daily.cfg",  0 },
75 75
     { "daily.ign",  0 },
76
-    { "daily.ft",   0 },
76
+    { "daily.ftm",  0 },
77 77
     { "main.info",  0 },    { "daily.info", 0 },
78 78
 
79 79
     /* databases */