Browse code

improved filetype detection code

git-svn: trunk@3421

Tomasz Kojm authored on 2007/12/15 07:39:37
Showing 12 changed files
... ...
@@ -1,3 +1,9 @@
1
+Fri Dec 14 22:55:32 CET 2007 (tk)
2
+---------------------------------
3
+  * libclamav: improved filetype detection code; filetype definitions can now
4
+	       be distributed inside daily.cvd
5
+  * sigtool/sigtool.c: handle daily.ft
6
+
1 7
 Fri Dec 14 13:02:38 EET 2007 (edwin)
2 8
 ------------------------------------
3 9
   * libclamav/phishcheck.[ch]:
... ...
@@ -54,6 +54,7 @@ libclamav_la_SOURCES = \
54 54
 	scanners.h \
55 55
 	filetypes.c \
56 56
 	filetypes.h \
57
+	filetypes_int.h \
57 58
 	rtf.c \
58 59
 	rtf.h \
59 60
 	blob.c \
... ...
@@ -275,6 +275,7 @@ libclamav_la_SOURCES = \
275 275
 	scanners.h \
276 276
 	filetypes.c \
277 277
 	filetypes.h \
278
+	filetypes_int.h \
278 279
 	rtf.c \
279 280
 	rtf.h \
280 281
 	blob.c \
... ...
@@ -129,6 +129,9 @@ struct cl_engine {
129 129
 
130 130
     /* Dynamic configuration */
131 131
     void *dconf;
132
+
133
+    /* Filetype definitions */
134
+    void *ftypes;
132 135
 };
133 136
 
134 137
 struct cl_limits {
... ...
@@ -1,4 +1,7 @@
1 1
 /*
2
+ *  Copyright (C) 2007 Sourcefire, Inc.
3
+ *  Author: Tomasz Kojm <tkojm@clamav.net>
4
+ *
2 5
  *  Copyright (C) 2002 - 2005 Tomasz Kojm <tkojm@clamav.net>
3 6
  *  With enhancements from Thomas Lamy <Thomas.Lamy@in-online.net>
4 7
  *
... ...
@@ -24,7 +27,6 @@
24 24
 #include <stdio.h>
25 25
 #include <string.h>
26 26
 #include <stdlib.h>
27
-#include <ctype.h>
28 27
 #include <sys/types.h>
29 28
 #ifdef	HAVE_UNISTD_H
30 29
 #include <unistd.h>
... ...
@@ -40,214 +42,102 @@
40 40
 #include "htmlnorm.h"
41 41
 #include "entconv.h"
42 42
 
43
-struct cli_magic_s {
44
-    size_t offset;
45
-    const char *magic;
46
-    size_t length;
47
-    const char *descr;
48
-    cli_file_t type;
43
+static const struct ftmap_s {
44
+    const char *name;
45
+    cli_file_t code;
46
+} ftmap[] = {
47
+    { "CL_TYPE_UNKNOWN_TEXT",	CL_TYPE_UNKNOWN_TEXT	},
48
+    { "CL_TYPE_UNKNOWN_DATA",	CL_TYPE_UNKNOWN_DATA	},
49
+    { "CL_TYPE_IGNORED",	CL_TYPE_IGNORED		},
50
+    { "CL_TYPE_MSEXE",		CL_TYPE_MSEXE		},
51
+    { "CL_TYPE_ELF",		CL_TYPE_ELF		},
52
+    { "CL_TYPE_POSIX_TAR",	CL_TYPE_POSIX_TAR	},
53
+    { "CL_TYPE_OLD_TAR",	CL_TYPE_OLD_TAR		},
54
+    { "CL_TYPE_GZ",		CL_TYPE_GZ		},
55
+    { "CL_TYPE_ZIP",		CL_TYPE_ZIP		},
56
+    { "CL_TYPE_BZ",		CL_TYPE_BZ		},
57
+    { "CL_TYPE_RAR",		CL_TYPE_RAR		},
58
+    { "CL_TYPE_ARJ",		CL_TYPE_ARJ		},
59
+    { "CL_TYPE_MSSZDD",		CL_TYPE_MSSZDD		},
60
+    { "CL_TYPE_MSOLE2",		CL_TYPE_MSOLE2		},
61
+    { "CL_TYPE_MSCAB",		CL_TYPE_MSCAB		},
62
+    { "CL_TYPE_MSCHM",		CL_TYPE_MSCHM		},
63
+    { "CL_TYPE_SIS",		CL_TYPE_SIS		},
64
+    { "CL_TYPE_SCRENC",		CL_TYPE_SCRENC		},
65
+    { "CL_TYPE_GRAPHICS",	CL_TYPE_GRAPHICS	},
66
+    { "CL_TYPE_RIFF",		CL_TYPE_RIFF		},
67
+    { "CL_TYPE_BINHEX",		CL_TYPE_BINHEX		},
68
+    { "CL_TYPE_TNEF",		CL_TYPE_TNEF		},
69
+    { "CL_TYPE_CRYPTFF",	CL_TYPE_CRYPTFF		},
70
+    { "CL_TYPE_PDF",		CL_TYPE_PDF		},
71
+    { "CL_TYPE_UUENCODED",	CL_TYPE_UUENCODED	},
72
+    { "CL_TYPE_PST",		CL_TYPE_PST		},
73
+    { "CL_TYPE_HTML_UTF16",	CL_TYPE_HTML_UTF16	},
74
+    { "CL_TYPE_RTF",		CL_TYPE_RTF		},
75
+    { "CL_TYPE_HTML",		CL_TYPE_HTML		},
76
+    { "CL_TYPE_MAIL",		CL_TYPE_MAIL		},
77
+    { "CL_TYPE_SFX",		CL_TYPE_SFX		},
78
+    { "CL_TYPE_ZIPSFX",		CL_TYPE_ZIPSFX		},
79
+    { "CL_TYPE_RARSFX",		CL_TYPE_RARSFX		},
80
+    { "CL_TYPE_CABSFX",		CL_TYPE_CABSFX		},
81
+    { "CL_TYPE_ARJSFX",		CL_TYPE_ARJSFX		},
82
+    { "CL_TYPE_NULSFT",		CL_TYPE_NULSFT		},
83
+    { "CL_TYPE_AUTOIT",		CL_TYPE_AUTOIT		},
84
+    { NULL,			CL_TYPE_UNKNOWN_DATA	}
49 85
 };
50 86
 
51
-struct cli_smagic_s {
52
-    const char *sig;
53
-    const char *descr;
54
-    cli_file_t type;
55
-};
87
+cli_file_t cli_ftcode(const char *name)
88
+{
89
+	unsigned int i;
56 90
 
57
-static const struct cli_magic_s cli_magic[] = {
58
-
59
-    /* Executables */
60
-
61
-    {0,  "MZ",				2,  "DOS/W32 executable/library/driver", CL_TYPE_MSEXE},
62
-    {0,	 "\177ELF",			4,  "ELF",		CL_TYPE_ELF},
63
-
64
-    /* Archives */
65
-
66
-    {0,	    "Rar!",			4,  "RAR",		CL_TYPE_RAR},
67
-    {0,	    "PK\003\004",		4,  "ZIP",		CL_TYPE_ZIP},
68
-    {0,	    "PK00PK\003\004",		8,  "ZIP",		CL_TYPE_ZIP},
69
-    {0,	    "\037\213",			2,  "GZip",		CL_TYPE_GZ},
70
-    {0,	    "BZh",			3,  "BZip",		CL_TYPE_BZ},
71
-    {0,	    "\x60\xea",			2,  "ARJ",		CL_TYPE_ARJ},
72
-    {0,	    "SZDD",			4,  "compress.exe'd",	CL_TYPE_MSSZDD},
73
-    {0,	    "MSCF",			4,  "MS CAB",		CL_TYPE_MSCAB},
74
-    {0,	    "ITSF",			4,  "MS CHM",           CL_TYPE_MSCHM},
75
-    {8,	    "\x19\x04\x00\x10",		4,  "SIS",		CL_TYPE_SIS},
76
-    {0,	    "\x7a\x1a\x20\x10",		4,  "SIS",		CL_TYPE_SIS},
77
-    {0,     "#@~^",			4,  "SCRENC",		CL_TYPE_SCRENC},
78
-    {0,     "(This file must be converted with BinHex 4.0)",
79
-				       45, "BinHex",		CL_TYPE_BINHEX},
80
-
81
-    /* Mail */
82
-
83
-    {0,  "From ",			 5, "MBox",		  CL_TYPE_MAIL},
84
-    {0,  "Received: ",			10, "Raw mail",		  CL_TYPE_MAIL},
85
-    {0,  "Return-Path: ",		13, "Maildir",		  CL_TYPE_MAIL},
86
-    {0,  "Return-path: ",		13, "Maildir",		  CL_TYPE_MAIL},
87
-    {0,  "Delivered-To: ",		14, "Mail",		  CL_TYPE_MAIL},
88
-    {0,  "X-UIDL: ",			 8, "Mail",		  CL_TYPE_MAIL},
89
-    {0,  "X-Apparently-To: ",		17, "Mail",		  CL_TYPE_MAIL},
90
-    {0,  "X-Envelope-From: ",		17, "Mail",		  CL_TYPE_MAIL},
91
-    {0,  "X-Original-To: ",		15, "Mail",		  CL_TYPE_MAIL},
92
-    {0,  "X-Symantec-",			11, "Symantec",		  CL_TYPE_MAIL},
93
-    {0,  "X-EVS",			 5, "EVS mail",		  CL_TYPE_MAIL},
94
-    {0,  "X-Real-To: ",                 11, "Mail",               CL_TYPE_MAIL},
95
-    {0,  "X-Sieve: ",			 9, "Mail",		  CL_TYPE_MAIL},
96
-    {0,  ">From ",			 6, "Mail",		  CL_TYPE_MAIL},
97
-    {0,  "Date: ",			 6, "Mail",		  CL_TYPE_MAIL},
98
-    {0,  "Message-Id: ",		12, "Mail",		  CL_TYPE_MAIL},
99
-    {0,  "Message-ID: ",		12, "Mail",		  CL_TYPE_MAIL},
100
-    {0,  "Envelope-to: ",		13, "Mail",		  CL_TYPE_MAIL},
101
-    {0,  "Delivery-date: ",		15, "Mail",		  CL_TYPE_MAIL},
102
-    {0,  "To: ",			 4, "Mail",		  CL_TYPE_MAIL},
103
-    {0,  "Subject: ",			 9, "Mail",		  CL_TYPE_MAIL},
104
-    {0,  "For: ",			 5, "Eserv mail",	  CL_TYPE_MAIL},
105
-    {0,  "From: ",			 6, "Exim mail",	  CL_TYPE_MAIL},
106
-    {0,  "v:\015\012Received: ",	14, "VPOP3 Mail (DOS)",	  CL_TYPE_MAIL},
107
-    {0,  "v:\012Received: ",		13, "VPOP3 Mail (UNIX)",  CL_TYPE_MAIL},
108
-    {0,  "Hi. This is the qmail-send",  26, "Qmail bounce",	  CL_TYPE_MAIL},
109
-    {0,  "\170\237\076\042",		 4, "TNEF",               CL_TYPE_TNEF},
110
-
111
-    {0,  "begin ",			6,  "UUencoded",	  CL_TYPE_UUENCODED},
112
-    {0, "\041\102\104\116",		4, "PST",		  CL_TYPE_PST},
113
-
114
-    /* Graphics (may contain exploits against MS systems) */
115
-
116
-    {0,  "GIF",				 3, "GIF",	    CL_TYPE_GRAPHICS},
117
-    {0,  "BM",				 2, "BMP",          CL_TYPE_GRAPHICS},
118
-    {0,  "\377\330\377",		 3, "JPEG",         CL_TYPE_GRAPHICS},
119
-    {6,  "JFIF",			 4, "JPEG",         CL_TYPE_GRAPHICS},
120
-    {6,  "Exif",			 4, "JPEG",         CL_TYPE_GRAPHICS},
121
-    {0,  "\x89PNG",			 4, "PNG",          CL_TYPE_GRAPHICS},
122
-    {0,  "RIFF",                         4, "RIFF",         CL_TYPE_RIFF},
123
-    {0,  "RIFX",                         4, "RIFX",         CL_TYPE_RIFF},
124
-
125
-    /* Others */
126
-
127
-    {0,  "\320\317\021\340\241\261\032\341", 8, "OLE2 container", CL_TYPE_MSOLE2},
128
-    {0,  "%PDF-",			 5, "PDF document", CL_TYPE_PDF},
129
-    {0,  "\266\271\254\256\376\377\377\377", 8, "CryptFF", CL_TYPE_CRYPTFF},
130
-    {0,  "{\\rtf",                           5, "RTF", CL_TYPE_RTF}, 
131
-
132
-    /* Ignored types */
133
-
134
-    {0,  "\000\000\001\263",             4, "MPEG video stream",  CL_TYPE_DATA},
135
-    {0,  "\000\000\001\272",             4, "MPEG sys stream",    CL_TYPE_DATA},
136
-    {0,  "OggS",                         4, "Ogg Stream",         CL_TYPE_DATA},
137
-    {0,  "ID3",				 3, "MP3",		  CL_TYPE_DATA},
138
-    {0,  "\377\373\220",		 3, "MP3",		  CL_TYPE_DATA},
139
-    {0,  "%!PS-Adobe-",			11, "PostScript",	  CL_TYPE_DATA},
140
-    {0,  "\060\046\262\165\216\146\317", 7, "WMA/WMV/ASF",	  CL_TYPE_DATA},
141
-    {0,  ".RMF" ,			 4, "Real Media File",	  CL_TYPE_DATA},
142
-
143
-    {0, NULL,				 0, NULL,		  CL_TYPE_UNKNOWN_DATA}
144
-};
91
+    for(i = 0; ftmap[i].name; i++)
92
+	if(!strcmp(ftmap[i].name, name))
93
+	    return ftmap[i].code;
145 94
 
146
-static const struct cli_smagic_s cli_smagic[] = {
147
-
148
-    /* "\nFrom: " * "\nContent-Type: " */
149
-    {"0a46726f6d3a20{-2048}0a436f6e74656e742d547970653a20", "Mail file", CL_TYPE_MAIL},
150
-
151
-    /* "\nReceived: " * "\nContent-Type: " */
152
-    {"0a52656365697665643a20{-2048}0a436f6e74656e742d547970653a20", "Mail file", CL_TYPE_MAIL},
153
-
154
-    /* "\nReceived: " * "\nContent-type: " */
155
-    {"0a52656365697665643a20{-2048}0a436f6e74656e742d747970653a20", "Mail file", CL_TYPE_MAIL},
156
-
157
-    /* "MIME-Version: " * "\nContent-Type: " */
158
-    {"4d494d452d56657273696f6e3a20{-2048}0a436f6e74656e742d547970653a20", "Mail file", CL_TYPE_MAIL},
159
-
160
-    /* remember the matcher is case sensitive */
161
-    {"3c62723e",       "HTML data", CL_TYPE_HTML},	/* <br> */
162
-    {"3c42723e",       "HTML data", CL_TYPE_HTML},	/* <Br> */
163
-    {"3c42523e",       "HTML data", CL_TYPE_HTML},	/* <BR> */
164
-    {"3c703e",	       "HTML data", CL_TYPE_HTML},	/* <p> */
165
-    {"3c503e",	       "HTML data", CL_TYPE_HTML},	/* <P> */
166
-    {"68726566",       "HTML data", CL_TYPE_HTML},	/* href */
167
-    {"48726566",       "HTML data", CL_TYPE_HTML},	/* Href */
168
-    {"48524546",       "HTML data", CL_TYPE_HTML},	/* HREF */
169
-    {"3c68746d6c3e",   "HTML data", CL_TYPE_HTML},      /* <html> */
170
-    {"3c48544d4c3e",   "HTML data", CL_TYPE_HTML},      /* <HTML> */
171
-    {"3c48746d6c3e",   "HTML data", CL_TYPE_HTML},      /* <Html> */
172
-    {"3c686561643e",   "HTML data", CL_TYPE_HTML},      /* <head> */
173
-    {"3c484541443e",   "HTML data", CL_TYPE_HTML},      /* <HEAD> */
174
-    {"3c486561643e",   "HTML data", CL_TYPE_HTML},      /* <Head> */
175
-    {"3c666f6e74",     "HTML data", CL_TYPE_HTML},	/* <font */
176
-    {"3c466f6e74",     "HTML data", CL_TYPE_HTML},	/* <Font */
177
-    {"3c464f4e54",     "HTML data", CL_TYPE_HTML},	/* <FONT */
178
-    {"3c696d67",       "HTML data", CL_TYPE_HTML},      /* <img */
179
-    {"3c494d47",       "HTML data", CL_TYPE_HTML},      /* <IMG */
180
-    {"3c496d67",       "HTML data", CL_TYPE_HTML},      /* <Img */
181
-    {"3c736372697074", "HTML data", CL_TYPE_HTML},	/* <script */
182
-    {"3c536372697074", "HTML data", CL_TYPE_HTML},	/* <Script */
183
-    {"3c534352495054", "HTML data", CL_TYPE_HTML},	/* <SCRIPT */
184
-    {"3c6f626a656374", "HTML data", CL_TYPE_HTML},      /* <object */
185
-    {"3c4f626a656374", "HTML data", CL_TYPE_HTML},      /* <Object */
186
-    {"3c4f424a454354", "HTML data", CL_TYPE_HTML},      /* <OBJECT */
187
-    {"3c696672616d65", "HTML data", CL_TYPE_HTML},      /* <iframe */
188
-    {"3c494652414d45", "HTML data", CL_TYPE_HTML},      /* <IFRAME */
189
-    {"3c7461626c65",   "HTML data", CL_TYPE_HTML},	/* <table */
190
-    {"3c5441424c45",   "HTML data", CL_TYPE_HTML},	/* <TABLE */
191
-
192
-    {"526172211a0700", "RAR-SFX", CL_TYPE_RARSFX},
193
-    {"504b0304", "ZIP-SFX", CL_TYPE_ZIPSFX},
194
-    {"4d534346", "CAB-SFX", CL_TYPE_CABSFX},
195
-    {"60ea{7}0002", "ARJ-SFX", CL_TYPE_ARJSFX},
196
-    {"60ea{7}0102", "ARJ-SFX", CL_TYPE_ARJSFX},
197
-    {"60ea{7}0202", "ARJ-SFX", CL_TYPE_ARJSFX},
198
-    {"efbeadde4e756c6c736f6674496e7374", "NSIS", CL_TYPE_NULSFT},
199
-    {"a3484bbe986c4aa9994c530a86d6487d41553321454130(35|36)", "AUTOIT", CL_TYPE_AUTOIT},
200
-
201
-    {"4d5a{60-300}50450000", "PE", CL_TYPE_MSEXE},
202
-
203
-    {NULL,  NULL,   CL_TYPE_UNKNOWN_DATA}
204
-};
95
+    return CL_TYPE_ERROR;
96
+}
205 97
 
206
-static char internat[256] = {
207
-    /* TODO: Remember to buy a beer to Joerg Wunsch <joerg@FreeBSD.ORG> */
208
-    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,  /* 0x0X */
209
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,  /* 0x1X */
210
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x2X */
211
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x3X */
212
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x4X */
213
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x5X */
214
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x6X */
215
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,  /* 0x7X */
216
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x8X */
217
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x9X */
218
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0xaX */
219
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0xbX */
220
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0xcX */
221
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0xdX */
222
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0xeX */
223
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1   /* 0xfX */
224
-};
98
+void cli_ftfree(struct cli_ftype *ftypes)
99
+{
100
+	struct cli_ftype *pt;
101
+
102
+    while(ftypes) {
103
+	pt = ftypes;
104
+	ftypes = ftypes->next;
105
+	free(pt->magic);
106
+	free(pt->tname);
107
+	free(pt);
108
+    }
109
+}
225 110
 
226
-cli_file_t cli_filetype(const unsigned char *buf, size_t buflen)
111
+cli_file_t cli_filetype(const unsigned char *buf, size_t buflen, const struct cl_engine *engine)
227 112
 {
228
-	int i, text = 1, len;
113
+	struct cli_ftype *ftype = engine->ftypes;
229 114
 
230 115
 
231
-    for(i = 0; cli_magic[i].magic; i++) {
232
-	if(buflen >= cli_magic[i].offset+cli_magic[i].length) {
233
-	    if(memcmp(buf+cli_magic[i].offset, cli_magic[i].magic, cli_magic[i].length) == 0) {
234
-		cli_dbgmsg("Recognized %s file\n", cli_magic[i].descr);
235
-		return cli_magic[i].type;
116
+    while(ftype) {
117
+	if(ftype->offset + ftype->length <= buflen) {
118
+	    if(!memcmp(buf + ftype->offset, ftype->magic, ftype->length)) {
119
+		cli_dbgmsg("Recognized %s file\n", ftype->tname);
120
+		return ftype->type;
236 121
 	    }
237 122
 	}
123
+	ftype = ftype->next;
238 124
     }
239 125
 
240
-/* improve or drop this code
126
+/* FIXME: improve or drop this code
241 127
  * https://wwws.clamav.net/bugzilla/show_bug.cgi?id=373
242 128
  *
129
+	int i, text = 1, len;
243 130
     buflen < 25 ? (len = buflen) : (len = 25);
244 131
     for(i = 0; i < len; i++)
245 132
 	if(!iscntrl(buf[i]) && !isprint(buf[i]) && !internat[buf[i] & 0xff]) {
246 133
 	    text = 0;
247 134
 	    break;
248 135
 	}
249
-*/
250 136
     return text ? CL_TYPE_UNKNOWN_TEXT : CL_TYPE_UNKNOWN_DATA;
137
+*/
138
+    return CL_TYPE_UNKNOWN_TEXT;
251 139
 }
252 140
 
253 141
 int is_tar(unsigned char *buf, unsigned int nbytes);
... ...
@@ -263,7 +153,7 @@ cli_file_t cli_filetype2(int desc, const struct cl_engine *engine)
263 263
 
264 264
     memset(smallbuff, 0, sizeof(smallbuff));
265 265
     if((bread = read(desc, smallbuff, MAGIC_BUFFER_SIZE)) > 0)
266
-	ret = cli_filetype(smallbuff, bread);
266
+	ret = cli_filetype(smallbuff, bread, engine);
267 267
 
268 268
     if(engine && ret == CL_TYPE_UNKNOWN_TEXT) {
269 269
 	root = engine->root[0];
... ...
@@ -357,10 +247,10 @@ cli_file_t cli_filetype2(int desc, const struct cl_engine *engine)
357 357
 
358 358
 	    if(!memcmp(bigbuff + 32769, "CD001" , 5) || !memcmp(bigbuff + 37633, "CD001" , 5)) {
359 359
 		cli_dbgmsg("Recognized ISO 9660 CD-ROM data\n");
360
-		ret = CL_TYPE_DATA;
360
+		ret = CL_TYPE_IGNORED;
361 361
 	    } else if(!memcmp(bigbuff + 32776, "CDROM" , 5)) {
362 362
 		cli_dbgmsg("Recognized High Sierra CD-ROM data\n");
363
-		ret = CL_TYPE_DATA;
363
+		ret = CL_TYPE_IGNORED;
364 364
 	    }
365 365
 	}
366 366
 
... ...
@@ -369,38 +259,3 @@ cli_file_t cli_filetype2(int desc, const struct cl_engine *engine)
369 369
 
370 370
     return ret;
371 371
 }
372
-
373
-int cli_addtypesigs(struct cl_engine *engine)
374
-{
375
-	int i, ret;
376
-	struct cli_matcher *root;
377
-
378
-
379
-    if(!engine->root[0]) {
380
-	cli_dbgmsg("cli_addtypesigs: Need to allocate AC trie in engine->root[0]\n");
381
-	root = engine->root[0] = (struct cli_matcher *) cli_calloc(1, sizeof(struct cli_matcher));
382
-	if(!root) {
383
-	    cli_errmsg("cli_addtypesigs: Can't initialise AC pattern matcher\n");
384
-	    return CL_EMEM;
385
-	}
386
-
387
-	if((ret = cli_ac_init(root, cli_ac_mindepth, cli_ac_maxdepth))) {
388
-	    /* No need to free previously allocated memory here - all engine
389
-	     * elements will be properly freed by cl_free()
390
-	     */
391
-	    cli_errmsg("cli_addtypesigs: Can't initialise AC pattern matcher\n");
392
-	    return ret;
393
-	}
394
-    } else {
395
-	root = engine->root[0];
396
-    }
397
-
398
-    for(i = 0; cli_smagic[i].sig; i++) {
399
-	if((ret = cli_parse_add(root, cli_smagic[i].descr, cli_smagic[i].sig, cli_smagic[i].type, NULL, 0))) {
400
-	    cli_errmsg("cli_addtypesigs: Problem adding signature for %s\n", cli_smagic[i].descr);
401
-	    return ret;
402
-	}
403
-    }
404
-
405
-    return 0;
406
-}
... ...
@@ -1,4 +1,7 @@
1 1
 /*
2
+ *  Copyright (C) 2007 Sourcefire, Inc.
3
+ *  Author: Tomasz Kojm <tkojm@clamav.net>
4
+ *
2 5
  *  Copyright (C) 2002 - 2005 Tomasz Kojm <tkojm@clamav.net>
3 6
  *  With enhancements from Thomas Lamy <Thomas.Lamy@in-online.net>
4 7
  *
... ...
@@ -22,6 +25,9 @@
22 22
 
23 23
 #include <sys/types.h>
24 24
 
25
+#include "clamav.h"
26
+#include "cltypes.h"
27
+
25 28
 #define MAGIC_BUFFER_SIZE 256
26 29
 #define CL_TYPENO 500
27 30
 #define MAX_EMBEDDED_OBJ 10
... ...
@@ -29,9 +35,10 @@
29 29
 typedef enum {
30 30
     CL_TYPE_UNKNOWN_TEXT = CL_TYPENO,
31 31
     CL_TYPE_UNKNOWN_DATA,
32
+    CL_TYPE_IGNORED,
33
+    CL_TYPE_ERROR,
32 34
     CL_TYPE_MSEXE,
33 35
     CL_TYPE_ELF,
34
-    CL_TYPE_DATA,
35 36
     CL_TYPE_POSIX_TAR,
36 37
     CL_TYPE_OLD_TAR,
37 38
     CL_TYPE_GZ,
... ...
@@ -68,6 +75,15 @@ typedef enum {
68 68
     CL_TYPE_AUTOIT
69 69
 } cli_file_t;
70 70
 
71
+struct cli_ftype {
72
+    cli_file_t type;
73
+    uint32_t offset;
74
+    unsigned char *magic;
75
+    uint16_t length;
76
+    char *tname;
77
+    struct cli_ftype *next;
78
+};
79
+
71 80
 struct cli_matched_type {
72 81
     cli_file_t type;
73 82
     off_t offset;
... ...
@@ -75,7 +91,9 @@ struct cli_matched_type {
75 75
     struct cli_matched_type *next;
76 76
 };
77 77
 
78
-cli_file_t cli_filetype(const unsigned char *buf, size_t buflen);
78
+cli_file_t cli_ftcode(const char *name);
79
+void cli_ftfree(struct cli_ftype *ftypes);
80
+cli_file_t cli_filetype(const unsigned char *buf, size_t buflen, const struct cl_engine *engine);
79 81
 cli_file_t cli_filetype2(int desc, const struct cl_engine *engine);
80 82
 int cli_addtypesigs(struct cl_engine *engine);
81 83
 
82 84
new file mode 100644
... ...
@@ -0,0 +1,145 @@
0
+/*
1
+ *  Static filetype data for use when daily.ft is not available.
2
+ *  Copyright (C) 2007 Sourcefire, Inc.
3
+ *  Author: Tomasz Kojm <tkojm@clamav.net>
4
+ *
5
+ *  This program is free software; you can redistribute it and/or modify
6
+ *  it under the terms of the GNU General Public License version 2 as
7
+ *  published by the Free Software Foundation.
8
+ *
9
+ *  This program is distributed in the hope that it will be useful,
10
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+ *  GNU General Public License for more details.
13
+ *
14
+ *  You should have received a copy of the GNU General Public License
15
+ *  along with this program; if not, write to the Free Software
16
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17
+ *  MA 02110-1301, USA.
18
+ */
19
+
20
+#ifndef __FILETYPES_INT_H
21
+#define __FILETYPES_INT_H
22
+
23
+/* Generated with the following perl script:
24
+#!/usr/bin/perl
25
+open(FT, "daily.ft") or die "Can't open daily.ft";
26
+print "static const char *ftypes_int[] = {\n";
27
+while($line = <FT>) {
28
+    chomp($line);
29
+    print "  \"$line\",\n";
30
+}
31
+print "  NULL\n};\n"
32
+*/
33
+
34
+static const char *ftypes_int[] = {
35
+  "0:4d5a:MS-EXE/DLL:CL_TYPE_MSEXE",
36
+  "0:7f454c46:ELF:CL_TYPE_ELF",
37
+  "0:52617221:RAR:CL_TYPE_RAR",
38
+  "0:504b0304:ZIP:CL_TYPE_ZIP",
39
+  "0:504b3030504b0304:ZIP:CL_TYPE_ZIP",
40
+  "0:1f8b:GZip:CL_TYPE_GZ",
41
+  "0:425a68:BZip:CL_TYPE_BZ",
42
+  "0:60ea:ARJ:CL_TYPE_ARJ",
43
+  "0:535a4444:compress.exe'd:CL_TYPE_MSSZDD",
44
+  "0:4d534346:MS CAB:CL_TYPE_MSCAB",
45
+  "0:49545346:MS CHM:CL_TYPE_MSCHM",
46
+  "8:19040010:SIS:CL_TYPE_SIS",
47
+  "0:23407e5e:SCRENC:CL_TYPE_SCRENC",
48
+  "0:28546869732066696c65206d75737420626520636f6e76657274656420776974682042696e48657820342e3029:BinHex:CL_TYPE_BINHEX",
49
+  "0:46726f6d20:MBox:CL_TYPE_MAIL",
50
+  "0:52656365697665643a20:Raw mail:CL_TYPE_MAIL",
51
+  "0:52657475726e2d506174683a20:Maildir:CL_TYPE_MAIL",
52
+  "0:52657475726e2d706174683a20:Maildir:CL_TYPE_MAIL",
53
+  "0:44656c6976657265642d546f3a20:Mail:CL_TYPE_MAIL",
54
+  "0:582d5549444c3a20:Mail:CL_TYPE_MAIL",
55
+  "0:582d4170706172656e746c792d546f3a20:Mail:CL_TYPE_MAIL",
56
+  "0:582d456e76656c6f70652d46726f6d3a20:Mail:CL_TYPE_MAIL",
57
+  "0:582d4f726967696e616c2d546f3a20:Mail:CL_TYPE_MAIL",
58
+  "0:582d53796d616e7465632d:Symantec:CL_TYPE_MAIL",
59
+  "0:582d455653:EVS mail:CL_TYPE_MAIL",
60
+  "0:582d5265616c2d546f3a20:Mail:CL_TYPE_MAIL",
61
+  "0:582d53696576653a20:Mail:CL_TYPE_MAIL",
62
+  "0:3e46726f6d20:Mail:CL_TYPE_MAIL",
63
+  "0:446174653a20:Mail:CL_TYPE_MAIL",
64
+  "0:4d6573736167652d49643a20:Mail:CL_TYPE_MAIL",
65
+  "0:4d6573736167652d49443a20:Mail:CL_TYPE_MAIL",
66
+  "0:456e76656c6f70652d746f3a20:Mail:CL_TYPE_MAIL",
67
+  "0:44656c69766572792d646174653a20:Mail:CL_TYPE_MAIL",
68
+  "0:546f3a20:Mail:CL_TYPE_MAIL",
69
+  "0:5375626a6563743a20:Mail:CL_TYPE_MAIL",
70
+  "0:466f723a20:Eserv mail:CL_TYPE_MAIL",
71
+  "0:46726f6d3a20:Exim mail:CL_TYPE_MAIL",
72
+  "0:763a0d0a52656365697665643a20:VPOP3 Mail (DOS):CL_TYPE_MAIL",
73
+  "0:763a0a52656365697665643a20:VPOP3 Mail (UNIX):CL_TYPE_MAIL",
74
+  "0:48692e20546869732069732074686520716d61696c2d73656e64:Qmail bounce:CL_TYPE_MAIL",
75
+  "0:789f3e22:TNEF:CL_TYPE_TNEF",
76
+  "0:626567696e20:UUencoded:CL_TYPE_UUENCODED",
77
+  "0:2142444e:PST:CL_TYPE_PST",
78
+  "0:474946:GIF:CL_TYPE_GRAPHICS",
79
+  "0:424d:BMP:CL_TYPE_GRAPHICS",
80
+  "0:ffd8ff:JPEG:CL_TYPE_GRAPHICS",
81
+  "6:4a464946:JPEG:CL_TYPE_GRAPHICS",
82
+  "6:45786966:JPEG:CL_TYPE_GRAPHICS",
83
+  "0:89504e47:PNG:CL_TYPE_GRAPHICS",
84
+  "0:52494646:RIFF:CL_TYPE_RIFF",
85
+  "0:52494658:RIFX:CL_TYPE_RIFF",
86
+  "0:d0cf11e0a1b11ae1:OLE2 container:CL_TYPE_MSOLE2",
87
+  "0:255044462d:PDF document:CL_TYPE_PDF",
88
+  "0:b6b9acaefeffffff:CryptFF:CL_TYPE_CRYPTFF",
89
+  "0:7b5c727466:RTF:CL_TYPE_RTF",
90
+  "0:000001b3:MPEG video stream:CL_TYPE_IGNORED",
91
+  "0:000001ba:MPEG sys stream:CL_TYPE_IGNORED",
92
+  "0:4f676753:Ogg Stream:CL_TYPE_IGNORED",
93
+  "0:494433:MP3:CL_TYPE_IGNORED",
94
+  "0:fffb90:MP3:CL_TYPE_IGNORED",
95
+  "0:252150532d41646f62652d:PostScript:CL_TYPE_IGNORED",
96
+  "0:3026b2758e66cf:WMA/WMV/ASF:CL_TYPE_IGNORED",
97
+  "0:2e524d46:Real Media File:CL_TYPE_IGNORED",
98
+  "*:0a46726f6d3a20{-2048}0a436f6e74656e742d547970653a20:Mail file:CL_TYPE_MAIL",
99
+  "*:0a52656365697665643a20{-2048}0a436f6e74656e742d547970653a20:Mail file:CL_TYPE_MAIL",
100
+  "*:0a52656365697665643a20{-2048}0a436f6e74656e742d747970653a20:Mail file:CL_TYPE_MAIL",
101
+  "*:4d494d452d56657273696f6e3a20{-2048}0a436f6e74656e742d547970653a20:Mail file:CL_TYPE_MAIL",
102
+  "*:3c62723e:HTML data:CL_TYPE_HTML",
103
+  "*:3c42723e:HTML data:CL_TYPE_HTML",
104
+  "*:3c42523e:HTML data:CL_TYPE_HTML",
105
+  "*:3c703e:HTML data:CL_TYPE_HTML",
106
+  "*:3c503e:HTML data:CL_TYPE_HTML",
107
+  "*:68726566:HTML data:CL_TYPE_HTML",
108
+  "*:48726566:HTML data:CL_TYPE_HTML",
109
+  "*:48524546:HTML data:CL_TYPE_HTML",
110
+  "*:3c68746d6c3e:HTML data:CL_TYPE_HTML",
111
+  "*:3c48544d4c3e:HTML data:CL_TYPE_HTML",
112
+  "*:3c48746d6c3e:HTML data:CL_TYPE_HTML",
113
+  "*:3c686561643e:HTML data:CL_TYPE_HTML",
114
+  "*:3c484541443e:HTML data:CL_TYPE_HTML",
115
+  "*:3c486561643e:HTML data:CL_TYPE_HTML",
116
+  "*:3c666f6e74:HTML data:CL_TYPE_HTML",
117
+  "*:3c466f6e74:HTML data:CL_TYPE_HTML",
118
+  "*:3c464f4e54:HTML data:CL_TYPE_HTML",
119
+  "*:3c696d67:HTML data:CL_TYPE_HTML",
120
+  "*:3c494d47:HTML data:CL_TYPE_HTML",
121
+  "*:3c496d67:HTML data:CL_TYPE_HTML",
122
+  "*:3c736372697074:HTML data:CL_TYPE_HTML",
123
+  "*:3c536372697074:HTML data:CL_TYPE_HTML",
124
+  "*:3c534352495054:HTML data:CL_TYPE_HTML",
125
+  "*:3c6f626a656374:HTML data:CL_TYPE_HTML",
126
+  "*:3c4f626a656374:HTML data:CL_TYPE_HTML",
127
+  "*:3c4f424a454354:HTML data:CL_TYPE_HTML",
128
+  "*:3c696672616d65:HTML data:CL_TYPE_HTML",
129
+  "*:3c494652414d45:HTML data:CL_TYPE_HTML",
130
+  "*:3c7461626c65:HTML data:CL_TYPE_HTML",
131
+  "*:3c5441424c45:HTML data:CL_TYPE_HTML",
132
+  "*:526172211a0700:RAR-SFX:CL_TYPE_RARSFX",
133
+  "*:504b0304:ZIP-SFX:CL_TYPE_ZIPSFX",
134
+  "*:4d534346:CAB-SFX:CL_TYPE_CABSFX",
135
+  "*:60ea{7}0002:ARJ-SFX:CL_TYPE_ARJSFX",
136
+  "*:60ea{7}0102:ARJ-SFX:CL_TYPE_ARJSFX",
137
+  "*:60ea{7}0202:ARJ-SFX:CL_TYPE_ARJSFX",
138
+  "*:efbeadde4e756c6c736f6674496e7374:NSIS:CL_TYPE_NULSFT",
139
+  "*:a3484bbe986c4aa9994c530a86d6487d41553321454130(35|36):AUTOIT:CL_TYPE_AUTOIT",
140
+  "*:4d5a{60-300}50450000:PE:CL_TYPE_MSEXE",
141
+  NULL
142
+};
143
+
144
+#endif
... ...
@@ -239,7 +239,7 @@ static	int	rfc1341(message *m, const char *dir);
239 239
 #endif
240 240
 static	bool	usefulHeader(int commandNumber, const char *cmd);
241 241
 static	char	*getline_from_mbox(char *buffer, size_t len, FILE *fin);
242
-static	bool	isBounceStart(const char *line);
242
+static	bool	isBounceStart(mbox_ctx *mctx, const char *line);
243 243
 static	bool	exportBinhexMessage(mbox_ctx *mctx, message *m);
244 244
 static	int	exportBounceMessage(mbox_ctx *ctx, text *start);
245 245
 static	message	*do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, mbox_ctx *mctx, message *messageIn, text **tptr, unsigned int recursion_level);
... ...
@@ -608,7 +608,7 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx)
608 608
 
609 609
 		free_map();
610 610
 
611
-		type = cli_filetype(start, size);
611
+		type = cli_filetype(start, size, ctx->engine);
612 612
 
613 613
 		if((type == CL_TYPE_UNKNOWN_TEXT) &&
614 614
 		   (strncmp(start, "Microsoft Mail Internet Headers", 31) == 0))
... ...
@@ -2848,12 +2848,12 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
2848 2848
 
2849 2849
 			if(lookahead_definately_is_bounce)
2850 2850
 				lookahead_definately_is_bounce = FALSE;
2851
-			else if(!isBounceStart(lineGetData(l)))
2851
+			else if(!isBounceStart(mctx, lineGetData(l)))
2852 2852
 				continue;
2853 2853
 
2854 2854
 			lookahead = t->t_next;
2855 2855
 			if(lookahead) {
2856
-				if(isBounceStart(lineGetData(lookahead->t_line))) {
2856
+				if(isBounceStart(mctx, lineGetData(lookahead->t_line))) {
2857 2857
 					lookahead_definately_is_bounce = TRUE;
2858 2858
 					/* don't save worthless header lines */
2859 2859
 					continue;
... ...
@@ -2957,7 +2957,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
2957 2957
 				l = t->t_line;
2958 2958
 				if((!inheader) && l) {
2959 2959
 					s = lineGetData(l);
2960
-					if(isBounceStart(s)) {
2960
+					if(isBounceStart(mctx, s)) {
2961 2961
 						cli_dbgmsg("Found the start of another bounce candidate (%s)\n", s);
2962 2962
 						lookahead_definately_is_bounce = TRUE;
2963 2963
 						break;
... ...
@@ -4796,7 +4796,7 @@ getline_from_mbox(char *buffer, size_t len, FILE *fin)
4796 4796
  * Is this line a candidate for the start of a bounce message?
4797 4797
  */
4798 4798
 static bool
4799
-isBounceStart(const char *line)
4799
+isBounceStart(mbox_ctx *mctx, const char *line)
4800 4800
 {
4801 4801
 	size_t len;
4802 4802
 
... ...
@@ -4832,7 +4832,7 @@ isBounceStart(const char *line)
4832 4832
 			return FALSE;
4833 4833
 		return TRUE;
4834 4834
 	}
4835
-	return cli_filetype((const unsigned char *)line, len) == CL_TYPE_MAIL;
4835
+	return cli_filetype((const unsigned char *)line, len, mctx->ctx->engine) == CL_TYPE_MAIL;
4836 4836
 }
4837 4837
 
4838 4838
 /*
... ...
@@ -1102,7 +1102,7 @@ messageIsEncoding(message *m)
1102 1102
 		m->encoding = m->body_last;
1103 1103
 	else if((m->bounce == NULL) &&
1104 1104
 		(strncasecmp(line, "Received: ", 10) == 0) &&
1105
-		(cli_filetype((const unsigned char *)line, strlen(line)) == CL_TYPE_MAIL))
1105
+		(cli_filetype((const unsigned char *)line, strlen(line), m->ctx->engine) == CL_TYPE_MAIL))
1106 1106
 			m->bounce = m->body_last;
1107 1107
 		/* Not needed with fast track visa technology */
1108 1108
 	/*else if((m->uuencode == NULL) && isuuencodebegin(line))
... ...
@@ -53,6 +53,8 @@
53 53
 #include "str.h"
54 54
 #include "dconf.h"
55 55
 #include "lockdb.h"
56
+#include "filetypes.h"
57
+#include "filetypes_int.h"
56 58
 #include "readdb.h"
57 59
 
58 60
 #include "phishcheck.h"
... ...
@@ -76,7 +78,6 @@ static pthread_mutex_t cli_ref_mutex = PTHREAD_MUTEX_INITIALIZER;
76 76
 int cl_loaddb(const char *filename, struct cl_engine **engine, unsigned int *signo);
77 77
 int cl_loaddbdir(const char *dirname, struct cl_engine **engine, unsigned int *signo);
78 78
 
79
-
80 79
 int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hexsig, unsigned short type, const char *offset, unsigned short target)
81 80
 {
82 81
 	struct cli_bm_patt *bm_new;
... ...
@@ -594,6 +595,99 @@ static int cli_loadndb(FILE *fd, struct cl_engine **engine, unsigned int *signo,
594 594
     return CL_SUCCESS;
595 595
 }
596 596
 
597
+#define FT_TOKENS 4
598
+static int cli_loadft(FILE *fd, struct cl_engine **engine, unsigned int options, unsigned int internal)
599
+{
600
+	const char *tokens[FT_TOKENS];
601
+	char buffer[FILEBUFF];
602
+	unsigned int line = 0;
603
+	struct cli_ftype *new;
604
+	cli_file_t type;
605
+	int ret;
606
+
607
+
608
+    if((ret = cli_initengine(engine, options))) {
609
+	cl_free(*engine);
610
+	return ret;
611
+    }
612
+
613
+    if((ret = cli_initroots(*engine, options))) {
614
+	cl_free(*engine);
615
+	return ret;
616
+    }
617
+
618
+    while(1) {
619
+	if(internal) {
620
+	    if(!ftypes_int[line])
621
+		break;
622
+	    strncpy(buffer, ftypes_int[line], sizeof(buffer));
623
+	} else {
624
+	    if(!fgets(buffer, FILEBUFF, fd))
625
+		break;
626
+	    cli_chomp(buffer);
627
+	}
628
+	line++;
629
+	cli_strtokenize(buffer, ':', FT_TOKENS, tokens);
630
+
631
+	if(!tokens[0] || !tokens[1] || !tokens[2] || !tokens[3]) {
632
+	    ret = CL_EMALFDB;
633
+	    break;
634
+	}
635
+
636
+	type = cli_ftcode(tokens[3]);
637
+	if(type == CL_TYPE_ERROR) {
638
+	    ret = CL_EMALFDB;
639
+	    break;
640
+	}
641
+
642
+	if(*tokens[0] == '*') {
643
+	    if((ret = cli_parse_add((*engine)->root[0], tokens[2], tokens[1], type, NULL, 0)))
644
+		break;
645
+
646
+	} else {
647
+	    new = (struct cli_ftype *) cli_malloc(sizeof(struct cli_ftype));
648
+	    if(!new) {
649
+		ret = CL_EMEM;
650
+		break;
651
+	    }
652
+	    new->type = type;
653
+	    new->offset = atoi(tokens[0]);
654
+	    new->magic = (unsigned char *) cli_hex2str(tokens[1]);
655
+	    if(!new->magic) {
656
+		cli_errmsg("cli_loadft: Can't decode the hex string\n");
657
+		ret = CL_EMALFDB;
658
+		free(new);
659
+		break;
660
+	    }
661
+	    new->length = strlen(tokens[1]) / 2;
662
+	    new->tname = cli_strdup(tokens[2]);
663
+	    if(!new->tname) {
664
+		free(new->magic);
665
+		free(new);
666
+		ret = CL_EMEM;
667
+		break;
668
+	    }
669
+	    new->next = (*engine)->ftypes;
670
+	    (*engine)->ftypes = new;
671
+	}
672
+    }
673
+
674
+    if(!line) {
675
+	cli_errmsg("Empty %s filetype database\n", internal ? "built-in" : ".ft");
676
+	cl_free(*engine);
677
+	return CL_EMALFDB;
678
+    }
679
+
680
+    if(ret) {
681
+	cli_errmsg("Problem parsing %s filetype database at line %u\n", internal ? "built-in" : ".ft", line);
682
+	cl_free(*engine);
683
+	return ret;
684
+    }
685
+
686
+    cli_dbgmsg("Loaded %u filetype definitions\n", line);
687
+    return CL_SUCCESS;
688
+}
689
+
597 690
 static int scomp(const void *a, const void *b)
598 691
 {
599 692
     return *(const uint32_t *)a - *(const uint32_t *)b;
... ...
@@ -1022,6 +1116,9 @@ static int cli_load(const char *filename, struct cl_engine **engine, unsigned in
1022 1022
 	    ret = cli_loadpdb(fd, engine, options);
1023 1023
 	else
1024 1024
 	    skipped = 1;
1025
+    } else if(cli_strbcasestr(filename, ".ft")) {
1026
+	ret = cli_loadft(fd, engine, options, 0);
1027
+
1025 1028
     } else {
1026 1029
 	cli_dbgmsg("cli_load: unknown extension - assuming old database format\n");
1027 1030
 	ret = cli_loaddb(fd, engine, signo, options);
... ...
@@ -1061,6 +1158,7 @@ int cl_loaddb(const char *filename, struct cl_engine **engine, unsigned int *sig
1061 1061
 	cli_strbcasestr(ext, ".rmd")   ||	\
1062 1062
 	cli_strbcasestr(ext, ".pdb")   ||	\
1063 1063
 	cli_strbcasestr(ext, ".wdb")   ||	\
1064
+	cli_strbcasestr(ext, ".ft")    ||	\
1064 1065
 	cli_strbcasestr(ext, ".inc")   ||	\
1065 1066
 	cli_strbcasestr(ext, ".cvd")		\
1066 1067
     )
... ...
@@ -1509,18 +1607,24 @@ void cl_free(struct cl_engine *engine)
1509 1509
     if(engine->dconf)
1510 1510
 	free(engine->dconf);
1511 1511
 
1512
+    cli_ftfree(engine->ftypes);
1512 1513
     cli_freelocks();
1513 1514
     free(engine);
1514 1515
 }
1515 1516
 
1516 1517
 int cl_build(struct cl_engine *engine)
1517 1518
 {
1518
-	int i, ret;
1519
+	unsigned int i;
1520
+	int ret;
1519 1521
 	struct cli_matcher *root;
1520 1522
 
1521 1523
 
1522
-    if((ret = cli_addtypesigs(engine)))
1523
-	return ret;
1524
+    if(!engine)
1525
+	return CL_ENULLARG;
1526
+
1527
+    if(!engine->ftypes)
1528
+	if((ret = cli_loadft(NULL, &engine, 0, 1)))
1529
+	    return ret;
1524 1530
 
1525 1531
     for(i = 0; i < CL_TARGET_TABLE_SIZE; i++)
1526 1532
 	if((root = engine->root[i]))
... ...
@@ -1768,7 +1768,7 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
1768 1768
     type = cli_filetype2(desc, ctx->engine);
1769 1769
     lseek(desc, 0, SEEK_SET);
1770 1770
 
1771
-    if(type != CL_TYPE_DATA && ctx->engine->sdb) {
1771
+    if(type != CL_TYPE_IGNORED && ctx->engine->sdb) {
1772 1772
 	if((ret = cli_scanraw(desc, ctx, type, 0)) == CL_VIRUS)
1773 1773
 	    return CL_VIRUS;
1774 1774
 	lseek(desc, 0, SEEK_SET);
... ...
@@ -1777,6 +1777,9 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
1777 1777
     type == CL_TYPE_MAIL ? ctx->mrec++ : ctx->arec++;
1778 1778
 
1779 1779
     switch(type) {
1780
+	case CL_TYPE_IGNORED:
1781
+	    break;
1782
+
1780 1783
 	case CL_TYPE_RAR:
1781 1784
 #ifdef ENABLE_UNRAR
1782 1785
 	    if(SCAN_ARCHIVE && (DCONF_ARCH & ARCH_CONF_RAR))
... ...
@@ -1922,14 +1925,6 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
1922 1922
 		ret = cli_scansis(desc, ctx);
1923 1923
 	    break;
1924 1924
 
1925
-	case CL_TYPE_DATA:
1926
-	    /* it could be a false positive and a standard DOS .COM file */
1927
-	    {
1928
-		struct stat s;
1929
-		if(fstat(desc, &s) == 0 && S_ISREG(s.st_mode) && s.st_size < 65536)
1930
-		type = CL_TYPE_UNKNOWN_DATA;
1931
-	    }
1932
-
1933 1925
 	case CL_TYPE_UNKNOWN_DATA:
1934 1926
 	    ret = cli_check_mydoom_log(desc, ctx->virname);
1935 1927
 	    break;
... ...
@@ -1947,7 +1942,7 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
1947 1947
 	}
1948 1948
     }
1949 1949
 
1950
-    if(type != CL_TYPE_DATA && ret != CL_VIRUS && !ctx->engine->sdb) {
1950
+    if(type != CL_TYPE_IGNORED && ret != CL_VIRUS && !ctx->engine->sdb) {
1951 1951
 	if(cli_scanraw(desc, ctx, type, typercg) == CL_VIRUS)
1952 1952
 	    return CL_VIRUS;
1953 1953
     }
... ...
@@ -701,7 +701,7 @@ static int build(struct optstruct *opt)
701 701
 				 "daily.ndb", "main.ndu", "daily.ndu",
702 702
 				 "main.sdb", "daily.sdb", "main.zmd",
703 703
 				 "daily.zmd", "main.rmd", "daily.rmd",
704
-				 "main.fp", "daily.fp", "main.mdb",
704
+				 "main.fp", "daily.fp", "daily.ft", "main.mdb",
705 705
 				 "daily.mdb", "main.mdu", "daily.mdu",
706 706
 				 "daily.info", "main.info", "main.wdb",
707 707
 				 "daily.wdb", "main.pdb", "daily.pdb",