Browse code

Use new patter matching algorithm. Cleanup.

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@674 77e5149b-7576-45b1-b177-96237e5ba77b

Tomasz Kojm authored on 2004/07/20 02:54:40
Showing 34 changed files
... ...
@@ -1,3 +1,12 @@
1
+Mon Jul 19 19:46:03 CEST 2004 (tk)
2
+----------------------------------
3
+  * libclamav: use new, faster and memory efficient algorithm (multipattern
4
+	       variant of Boyer-Moore) for static signature matching (not
5
+	       yet fully optimised)
6
+  * libclamav: API: cl_build, cl_free succeed cl_buildtrie, cl_freetrie
7
+	       (old functions still supported)
8
+  * all: minor cleanup; fix compilation warnings
9
+
1 10
 Fri Jul 16 17:32:40 CEST 2004 (tk)
2 11
 ----------------------------------
3 12
   * libclamav: scanners: fix memory leak in new code (thanks to Trog)
... ...
@@ -46,7 +46,7 @@ while test $# -gt 0; do
46 46
 	;;
47 47
 
48 48
     --version)
49
-	echo devel-20040706
49
+	echo devel-20040718
50 50
 	exit 0
51 51
 	;;
52 52
 
... ...
@@ -68,7 +68,6 @@ void clamd(struct optstruct *opt)
68 68
 	struct cl_node *root = NULL;
69 69
 	const char *dbdir, *cfgfile;
70 70
 	int ret, virnum = 0, tcpsock;
71
-	char *var;
72 71
 #ifdef C_LINUX
73 72
 	struct stat sb;
74 73
 #endif
... ...
@@ -269,7 +268,7 @@ void clamd(struct optstruct *opt)
269 269
     }
270 270
 
271 271
     logg("Protecting against %d viruses.\n", virnum);
272
-    if((ret = cl_buildtrie(root)) != 0) {
272
+    if((ret = cl_build(root)) != 0) {
273 273
 	fprintf(stderr, "ERROR: Database initialization error: %s\n", cl_strerror(ret));;
274 274
 	logg("!Database initialization error: %s\n", cl_strerror(ret));;
275 275
 	exit(1);
... ...
@@ -43,6 +43,8 @@
43 43
 #include "shared.h"
44 44
 #include "output.h"
45 45
 
46
+#include "../libclamav/others.h"
47
+
46 48
 #ifdef C_LINUX
47 49
 dev_t procdev; /* /proc device */
48 50
 #endif
... ...
@@ -229,7 +231,7 @@ int scanstream(int odesc, unsigned long int *scanned, const struct cl_node *root
229 229
 
230 230
 
231 231
     while(!bound && portscan--) {
232
-	if((port = cl_rndnum(60000)) < 1024)
232
+	if((port = cli_rndnum(60000)) < 1024)
233 233
 	    port += 2139;
234 234
 
235 235
 	memset((char *) &server, 0, sizeof(server));
... ...
@@ -144,7 +144,7 @@ static struct cl_node *reload_db(struct cl_node *root, const struct cfgstruct *c
144 144
 
145 145
     /* release old structure */
146 146
     if(root) {
147
-	cl_freetrie(root);
147
+	cl_free(root);
148 148
 	root = NULL;
149 149
     }
150 150
 
... ...
@@ -173,8 +173,8 @@ static struct cl_node *reload_db(struct cl_node *root, const struct cfgstruct *c
173 173
 	exit(-1);
174 174
     }
175 175
 
176
-    if((retval = cl_buildtrie(root)) != 0) {
177
-	logg("!Database initialization error: can't build the trie: %s\n",
176
+    if((retval = cl_build(root)) != 0) {
177
+	logg("!Database initialization error: can't build engine: %s\n",
178 178
 	cl_strerror(retval));
179 179
 	exit(-1);
180 180
     }
... ...
@@ -54,11 +54,14 @@
54 54
 #include "memory.h"
55 55
 #include "output.h"
56 56
 #include "cfgparser.h"
57
+#include "../libclamav/others.h"
57 58
 
58 59
 #ifdef C_LINUX
59 60
 dev_t procdev;
60 61
 #endif
61 62
 
63
+extern int cli_mbox(const char *dir, int desc); /* FIXME */
64
+
62 65
 int scanmanager(const struct optstruct *opt)
63 66
 {
64 67
 	mode_t fmode;
... ...
@@ -154,8 +157,7 @@ int scanmanager(const struct optstruct *opt)
154 154
 	return 50;
155 155
     }
156 156
 
157
-    /* build the proper trie */
158
-    if((ret=cl_buildtrie(trie)) != 0) {
157
+    if((ret = cl_build(trie)) != 0) {
159 158
 	mprintf("@Database initialization error: %s\n", cl_strerror(ret));;
160 159
 	return 50;
161 160
     }
... ...
@@ -243,7 +245,7 @@ int scanmanager(const struct optstruct *opt)
243 243
 		}
244 244
 		/* generate the temporary directory */
245 245
 
246
-		dir = cl_gentemp(tmpdir);
246
+		dir = cli_gentemp(tmpdir);
247 247
 		if(mkdir(dir, 0700)) {
248 248
 			mprintf("@Can't create the temporary directory %s\n", dir);
249 249
 			exit(63); /* critical */
... ...
@@ -255,7 +257,7 @@ int scanmanager(const struct optstruct *opt)
255 255
 		/*
256 256
 		 * Extract the attachments into the temporary directory
257 257
 		 */
258
-		ret = cl_mbox(dir, 0);
258
+		ret = cli_mbox(dir, 0);
259 259
 
260 260
 		if(ret == 0) {
261 261
 			/* fix permissions of extracted files */
... ...
@@ -325,7 +327,7 @@ int scanmanager(const struct optstruct *opt)
325 325
     }
326 326
 
327 327
     /* free the trie */
328
-    cl_freetrie(trie);
328
+    cl_free(trie);
329 329
 
330 330
     free(limits);
331 331
 
... ...
@@ -566,7 +568,7 @@ int scancompressed(const char *filename, struct cl_node *root, const struct pass
566 566
 
567 567
     /* generate the temporary directory */
568 568
 
569
-    gendir = cl_gentemp(tmpdir);
569
+    gendir = cli_gentemp(tmpdir);
570 570
     if(mkdir(gendir, 0700)) {
571 571
 	mprintf("@Can't create the temporary directory %s\n", gendir);
572 572
 	exit(63); /* critical */
... ...
@@ -759,7 +761,7 @@ int scandenied(const char *filename, struct cl_node *root, const struct passwd *
759 759
     }
760 760
 
761 761
     /* generate the temporary directory */
762
-    gendir = cl_gentemp(tmpdir);
762
+    gendir = cli_gentemp(tmpdir);
763 763
     if(mkdir(gendir, 0700)) {
764 764
 	mprintf("@Can't create the temporary directory %s\n", gendir);
765 765
 	exit(63); /* critical */
... ...
@@ -57,10 +57,10 @@ int main(int argc, char **argv)
57 57
 
58 58
     printf("Loaded %d signatures.\n", no);
59 59
 
60
-    /* build the final trie */
61
-    if((ret = cl_buildtrie(root))) {
60
+    /* build engine */
61
+    if((ret = cl_build(root))) {
62 62
 	printf("Database initialization error: %s\n", cl_strerror(ret));;
63
-	cl_freetrie(root); /* free the partial trie */
63
+	cl_free(root);
64 64
 	close(fd);
65 65
 	exit(2);
66 66
     }
... ...
@@ -85,7 +85,7 @@ int main(int argc, char **argv)
85 85
     mb = size * (CL_COUNT_PRECISION / 1024) / 1024.0;
86 86
     printf("Data scanned: %2.2Lf Mb\n", mb);
87 87
 
88
-    cl_freetrie(root);
88
+    cl_free(root);
89 89
 
90 90
     close(fd);
91 91
     exit(ret == CL_VIRUS ? 1 : 0);
... ...
@@ -28,6 +28,7 @@
28 28
 #include <string.h>
29 29
 #include <errno.h>
30 30
 #include <signal.h>
31
+#include <time.h>
31 32
 #include <sys/types.h>
32 33
 #include <sys/stat.h>
33 34
 #include <fcntl.h>
... ...
@@ -44,6 +44,8 @@
44 44
 #include "manager.h"
45 45
 #include "notify.h"
46 46
 #include "memory.h"
47
+#include "output.h"
48
+#include "../libclamav/others.h"
47 49
 
48 50
 int downloadmanager(const struct cfgstruct *copt, const struct optstruct *opt, const char *hostname)
49 51
 {
... ...
@@ -213,7 +215,7 @@ int downloaddb(const char *localname, const char *remotename, const char *hostna
213 213
     /* temporary file is created in clamav's directory thus we don't need
214 214
      * to create it immediately because race condition is not possible here
215 215
      */
216
-    tempname = cl_gentemp(".");
216
+    tempname = cli_gentemp(".");
217 217
 
218 218
     if((ret = get_database(remotename, hostfd, tempname, hostname, proxy, user, pass))) {
219 219
         mprintf("@Can't download %s from %s\n", remotename, ipaddr);
... ...
@@ -34,6 +34,7 @@
34 34
 
35 35
 #include "others.h"
36 36
 #include "cfgparser.h"
37
+#include "output.h"
37 38
 
38 39
 int notify(const char *cfgfile)
39 40
 {
... ...
@@ -26,6 +26,10 @@ include_HEADERS = clamav.h
26 26
 
27 27
 libclamav_la_SOURCES = \
28 28
 	clamav.h \
29
+        matcher-ac.c \
30
+        matcher-ac.h \
31
+        matcher-bm.c \
32
+        matcher-bm.h \
29 33
         matcher.c \
30 34
         matcher.h \
31 35
         md5.c \
... ...
@@ -72,13 +72,13 @@ am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"
72 72
 libLTLIBRARIES_INSTALL = $(INSTALL)
73 73
 LTLIBRARIES = $(lib_LTLIBRARIES)
74 74
 libclamav_la_DEPENDENCIES =
75
-am_libclamav_la_OBJECTS = matcher.lo md5.lo others.lo readdb.lo cvd.lo \
76
-	dsig.lo str.lo scanners.lo filetypes.lo unrarlib.lo \
77
-	zzip-dir.lo zzip-err.lo zzip-file.lo zzip-info.lo zzip-io.lo \
78
-	zzip-stat.lo zzip-zip.lo strc.lo blob.lo mbox.lo message.lo \
79
-	snprintf.lo strrcpy.lo table.lo text.lo ole2_extract.lo \
80
-	vba_extract.lo msexpand.lo pe.lo cabd.lo lzxd.lo mszipd.lo \
81
-	qtmd.lo system.lo upx.lo htmlnorm.lo
75
+am_libclamav_la_OBJECTS = matcher-ac.lo matcher-bm.lo matcher.lo \
76
+	md5.lo others.lo readdb.lo cvd.lo dsig.lo str.lo scanners.lo \
77
+	filetypes.lo unrarlib.lo zzip-dir.lo zzip-err.lo zzip-file.lo \
78
+	zzip-info.lo zzip-io.lo zzip-stat.lo zzip-zip.lo strc.lo \
79
+	blob.lo mbox.lo message.lo snprintf.lo strrcpy.lo table.lo \
80
+	text.lo ole2_extract.lo vba_extract.lo msexpand.lo pe.lo \
81
+	cabd.lo lzxd.lo mszipd.lo qtmd.lo system.lo upx.lo htmlnorm.lo
82 82
 libclamav_la_OBJECTS = $(am_libclamav_la_OBJECTS)
83 83
 DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)
84 84
 depcomp = $(SHELL) $(top_srcdir)/depcomp
... ...
@@ -86,7 +86,8 @@ am__depfiles_maybe = depfiles
86 86
 @AMDEP_TRUE@DEP_FILES = ./$(DEPDIR)/blob.Plo ./$(DEPDIR)/cabd.Plo \
87 87
 @AMDEP_TRUE@	./$(DEPDIR)/cvd.Plo ./$(DEPDIR)/dsig.Plo \
88 88
 @AMDEP_TRUE@	./$(DEPDIR)/filetypes.Plo ./$(DEPDIR)/htmlnorm.Plo \
89
-@AMDEP_TRUE@	./$(DEPDIR)/lzxd.Plo ./$(DEPDIR)/matcher.Plo \
89
+@AMDEP_TRUE@	./$(DEPDIR)/lzxd.Plo ./$(DEPDIR)/matcher-ac.Plo \
90
+@AMDEP_TRUE@	./$(DEPDIR)/matcher-bm.Plo ./$(DEPDIR)/matcher.Plo \
90 91
 @AMDEP_TRUE@	./$(DEPDIR)/mbox.Plo ./$(DEPDIR)/md5.Plo \
91 92
 @AMDEP_TRUE@	./$(DEPDIR)/message.Plo ./$(DEPDIR)/msexpand.Plo \
92 93
 @AMDEP_TRUE@	./$(DEPDIR)/mszipd.Plo \
... ...
@@ -231,6 +232,10 @@ libclamav_la_LDFLAGS = @TH_SAFE@ -version-info @LIBCLAMAV_VERSION@
231 231
 include_HEADERS = clamav.h
232 232
 libclamav_la_SOURCES = \
233 233
 	clamav.h \
234
+        matcher-ac.c \
235
+        matcher-ac.h \
236
+        matcher-bm.c \
237
+        matcher-bm.h \
234 238
         matcher.c \
235 239
         matcher.h \
236 240
         md5.c \
... ...
@@ -382,6 +387,8 @@ distclean-compile:
382 382
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/filetypes.Plo@am__quote@
383 383
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htmlnorm.Plo@am__quote@
384 384
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lzxd.Plo@am__quote@
385
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matcher-ac.Plo@am__quote@
386
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matcher-bm.Plo@am__quote@
385 387
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matcher.Plo@am__quote@
386 388
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbox.Plo@am__quote@
387 389
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md5.Plo@am__quote@
... ...
@@ -29,8 +29,6 @@ extern "C"
29 29
 #endif
30 30
  
31 31
 
32
-#define CL_NUM_CHILDS 256
33
-#define CL_MIN_LENGTH 2
34 32
 
35 33
 #define CL_COUNT_PRECISION 4096
36 34
 
... ...
@@ -76,19 +74,27 @@ extern "C"
76 76
 #define CL_HTML		32
77 77
 #define CL_PE		64
78 78
 
79
-struct cli_patt {
79
+
80
+struct cli_bm_patt {
81
+    const char *pattern;
82
+    char *virname;
83
+    int length; 
84
+    struct cli_bm_patt *next;
85
+};
86
+
87
+struct cli_ac_patt {
80 88
     short int *pattern;
81 89
     unsigned int length, mindist, maxdist;
82 90
     char *virname;
83 91
     unsigned short int sigid, parts, partno, type, alt, *altn;
84 92
     char **altc;
85
-    struct cli_patt *next;
93
+    struct cli_ac_patt *next;
86 94
 };
87 95
 
88 96
 struct cli_ac_node {
89 97
     char islast;
90
-    struct cli_patt *list;
91
-    struct cli_ac_node *trans[CL_NUM_CHILDS], *fail;
98
+    struct cli_ac_patt *list;
99
+    struct cli_ac_node *trans[256], *fail;
92 100
 };
93 101
 
94 102
 struct cli_md5_node {
... ...
@@ -97,13 +103,18 @@ struct cli_md5_node {
97 97
 };
98 98
 
99 99
 struct cl_node {
100
-    /* Aho-Corasick */
101
-    struct cli_ac_node *ac_root, **nodetable;
102
-    unsigned int maxpatlen, partsigs;
103
-    unsigned int nodes;
100
+    unsigned int maxpatlen; /* maximal length of pattern in db */
101
+
102
+    /* Extended Boyer-Moore */
103
+    int *bm_shift;
104
+    struct cli_bm_patt **bm_suffix;
105
+
106
+    /* Extended Aho-Corasick */
107
+    struct cli_ac_node *ac_root, **ac_nodetable;
108
+    unsigned int ac_partsigs, ac_nodes;
104 109
 
105 110
     /* MD5 */
106
-    struct cli_md5_node *hlist[256];
111
+    struct cli_md5_node **md5_hlist;
107 112
 };
108 113
 
109 114
 struct cl_limits {
... ...
@@ -141,12 +152,13 @@ extern int cl_scandesc(int desc, const char **virname, unsigned long int *scanne
141 141
 
142 142
 extern int cl_scanfile(const char *filename, const char **virname, unsigned long int *scanned, const struct cl_node *root, const struct cl_limits *limits, int options);
143 143
 
144
-/* database loading */
144
+/* database */
145 145
 extern int cl_loaddb(const char *filename, struct cl_node **root, int *virnum);
146 146
 extern int cl_loaddbdir(const char *dirname, struct cl_node **root, int *virnum);
147 147
 extern const char *cl_retdbdir(void);
148 148
 extern int cl_retflevel(void);
149 149
 
150
+/* CVD */
150 151
 extern struct cl_cvd *cl_cvdhead(const char *file);
151 152
 extern struct cl_cvd *cl_cvdparse(const char *head);
152 153
 extern int cl_cvdverify(const char *file);
... ...
@@ -162,24 +174,12 @@ extern void cl_debug(void);
162 162
 
163 163
 extern void cl_settempdir(const char *dir, short leavetemps);
164 164
 
165
-/* build a trie */
166
-extern int cl_buildtrie(struct cl_node *root);
167
-
168
-extern void cl_freetrie(struct cl_node *root);
165
+extern int cl_build(struct cl_node *root);
166
+extern void cl_free(struct cl_node *root);
169 167
 
170 168
 extern const char *cl_strerror(int clerror);
171 169
 extern const char *cl_perror(int clerror); /* deprecated */
172 170
 
173
-extern char *cl_md5buff(const char *buffer, unsigned int length);
174
-
175
-extern int cl_mbox(const char *dir, int desc);
176
-
177
-/* compute MD5 message digest from file (compatible with md5sum(1)) */
178
-extern char *cl_md5file(const char *filename);
179
-
180
-/* generate unique file name in temporary directory */
181
-char *cl_gentemp(const char *dir);
182
-
183 171
 #ifdef __cplusplus
184 172
 };
185 173
 #endif
... ...
@@ -342,7 +342,7 @@ int cli_cvdload(FILE *fd, struct cl_node **root, int *virnum)
342 342
 	tmpdir = "/tmp";
343 343
 #endif
344 344
 
345
-    dir = cl_gentemp(tmpdir);
345
+    dir = cli_gentemp(tmpdir);
346 346
     if(mkdir(dir, 0700)) {
347 347
 	cli_errmsg("cli_cvdload():  Can't create temporary directory %s\n", dir);
348 348
 	return CL_ETMPDIR;
... ...
@@ -361,7 +361,7 @@ int cli_cvdload(FILE *fd, struct cl_node **root, int *virnum)
361 361
 
362 362
 	    /* start */
363 363
 
364
-	    tmp = cl_gentemp(tmpdir);
364
+	    tmp = cli_gentemp(tmpdir);
365 365
 	    if((tmpd = fopen(tmp, "wb+")) == NULL) {
366 366
 		cli_errmsg("Can't create temporary file %s\n", tmp);
367 367
 		free(dir);
... ...
@@ -33,6 +33,7 @@
33 33
 #include "clamav.h"
34 34
 #include "others.h"
35 35
 #include "dsig.h"
36
+#include "str.h"
36 37
 
37 38
 static const char *cli_nstr = "118640995551645342603070001658453189751527774412027743746599405743243142607464144767361060640655844749760788890022283424922762488917565551002467771109669598189410434699034532232228621591089508178591428456220796841621637175567590476666928698770143328137383952820383197532047771780196576957695822641224262693037"; /* 1024 bits */
38 39
 
... ...
@@ -118,7 +119,7 @@ int cli_versig(const char *md5, const char *dsig)
118 118
 	return CL_EDSIG;
119 119
     }
120 120
 
121
-    pt2 = cl_str2hex(pt, 16);
121
+    pt2 = cli_str2hex(pt, 16);
122 122
     free(pt);
123 123
 
124 124
     cli_dbgmsg("Decoded signature: %s\n", pt2);
... ...
@@ -27,6 +27,8 @@
27 27
 
28 28
 #include "clamav.h"
29 29
 #include "filetypes.h"
30
+#include "others.h"
31
+#include "readdb.h"
30 32
 
31 33
 struct cli_magic_s {
32 34
     int offset;
... ...
@@ -21,6 +21,8 @@
21 21
 #endif
22 22
 
23 23
 #include <stdio.h>
24
+#include <string.h>
25
+#include <ctype.h>
24 26
 #include <unistd.h>
25 27
 #include <sys/types.h>
26 28
 #include <sys/stat.h>
27 29
new file mode 100644
... ...
@@ -0,0 +1,352 @@
0
+/*
1
+ *  C implementation of the Aho-Corasick pattern matching algorithm. It's based
2
+ *  on ScannerDaemon's Java version by Kurt Huwig and
3
+ *  http://www-sr.informatik.uni-tuebingen.de/~buehler/AC/AC.html
4
+ *  Thanks to Kurt Huwig for pointing me to this page.
5
+ *
6
+ *  Copyright (C) 2002 - 2004 Tomasz Kojm <tkojm@clamav.net>
7
+ *
8
+ *  This program is free software; you can redistribute it and/or modify
9
+ *  it under the terms of the GNU General Public License as published by
10
+ *  the Free Software Foundation; either version 2 of the License, or
11
+ *  (at your option) any later version.
12
+ *
13
+ *  This program is distributed in the hope that it will be useful,
14
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
+ *  GNU General Public License for more details.
17
+ *
18
+ *  You should have received a copy of the GNU General Public License
19
+ *  along with this program; if not, write to the Free Software
20
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21
+ */
22
+
23
+#if HAVE_CONFIG_H
24
+#include "clamav-config.h"
25
+#endif
26
+
27
+#include <stdio.h>
28
+#include <string.h>
29
+#include <stdlib.h>
30
+#include <unistd.h>
31
+
32
+#include "clamav.h"
33
+#include "others.h"
34
+#include "matcher-ac.h"
35
+#include "unrarlib.h"
36
+#include "defaults.h"
37
+#include "filetypes.h"
38
+
39
+#define AC_MIN_LENGTH 2
40
+
41
+struct nodelist {
42
+    struct cli_ac_node *node;
43
+    struct nodelist *next;
44
+};
45
+
46
+int cli_ac_addpatt(struct cl_node *root, struct cli_ac_patt *pattern)
47
+{
48
+	struct cli_ac_node *pos, *next;
49
+	int i;
50
+
51
+    if(pattern->length < AC_MIN_LENGTH)
52
+	return CL_EPATSHORT;
53
+
54
+    pos = root->ac_root;
55
+
56
+    for(i = 0; i < AC_MIN_LENGTH; i++) {
57
+	next = pos->trans[((unsigned char) pattern->pattern[i]) & 0xff]; 
58
+
59
+	if(!next) {
60
+	    next = (struct cli_ac_node *) cli_calloc(1, sizeof(struct cli_ac_node));
61
+	    if(!next) {
62
+		cli_dbgmsg("Unable to allocate pattern node (%d)\n", sizeof(struct cl_node));
63
+		return CL_EMEM;
64
+	    }
65
+
66
+	    root->ac_nodes++;
67
+	    root->ac_nodetable = (struct cli_ac_node **) cli_realloc(root->ac_nodetable, (root->ac_nodes) * sizeof(struct cli_ac_node *));
68
+	    if(root->ac_nodetable == NULL) {
69
+		cli_dbgmsg("Unable to realloc nodetable (%d)\n", (root->ac_nodes) * sizeof(struct cl_node *));
70
+		return CL_EMEM;
71
+	    }
72
+	    root->ac_nodetable[root->ac_nodes - 1] = next;
73
+
74
+	    pos->trans[((unsigned char) pattern->pattern[i]) & 0xff] = next;
75
+	}
76
+
77
+	pos = next;
78
+    }
79
+
80
+    pos->islast = 1;
81
+
82
+    pattern->next = pos->list;
83
+    pos->list = pattern;
84
+
85
+    return 0;
86
+}
87
+
88
+static int cli_enqueue(struct nodelist **bfs, struct cli_ac_node *n)
89
+{
90
+	struct nodelist *new;
91
+
92
+    new = (struct nodelist *) cli_calloc(1, sizeof(struct nodelist));
93
+    if (new == NULL) {
94
+	cli_dbgmsg("Unable to allocate node list (%d)\n", sizeof(struct nodelist));
95
+	return CL_EMEM;
96
+    }
97
+
98
+    new->next = *bfs;
99
+    new->node = n;
100
+    *bfs = new;
101
+    return 0;
102
+}
103
+
104
+static struct cli_ac_node *cli_dequeue(struct nodelist **bfs)
105
+{
106
+	struct nodelist *handler, *prev = NULL;
107
+	struct cli_ac_node *pt;
108
+
109
+    handler = *bfs;
110
+
111
+    while(handler && handler->next) {
112
+	prev = handler;
113
+	handler = handler->next;
114
+    }
115
+
116
+    if(!handler) {
117
+	return NULL;
118
+    } else {
119
+	pt = handler->node;
120
+	free(handler);
121
+	if(prev)
122
+	    prev->next = NULL;
123
+	else
124
+	    *bfs = NULL;
125
+
126
+	return pt;
127
+    }
128
+}
129
+
130
+static int cli_maketrans(struct cl_node *root)
131
+{
132
+	struct nodelist *bfs = NULL;
133
+	struct cli_ac_node *ac_root = root->ac_root, *child, *node;
134
+	int i, ret;
135
+
136
+
137
+    ac_root->fail = NULL;
138
+    if((ret = cli_enqueue(&bfs, ac_root)) != 0) {
139
+	return ret;
140
+    }
141
+
142
+    while((node = cli_dequeue(&bfs))) {
143
+	if(node->islast)
144
+	    continue;
145
+
146
+	for(i = 0; i < 256; i++) {
147
+	    child = node->trans[i];
148
+	    if(!child) {
149
+		if(node->fail)
150
+		    node->trans[i] = (node->fail)->trans[i];
151
+		else
152
+		    node->trans[i] = ac_root;
153
+	    } else {
154
+		if(node->fail)
155
+		    child->fail = (node->fail)->trans[i];
156
+		else
157
+		    child->fail = ac_root;
158
+
159
+		if((ret = cli_enqueue(&bfs, child)) != 0) {
160
+		    return ret;
161
+		}
162
+	    }
163
+	}
164
+    }
165
+    return 0;
166
+}
167
+
168
+int cli_ac_buildtrie(struct cl_node *root)
169
+{
170
+	int ret;
171
+
172
+    if(!root)
173
+	return CL_EMALFDB;
174
+
175
+    if(!root->ac_root) {
176
+	cli_dbgmsg("Pattern matcher not initialised\n");
177
+	return 0;
178
+    }
179
+
180
+    if((ret = cli_addtypesigs(root)))
181
+	return ret;
182
+
183
+    return cli_maketrans(root);
184
+}
185
+
186
+static void cli_freepatt(struct cli_ac_patt *list)
187
+{
188
+	struct cli_ac_patt *handler, *prev;
189
+	int i;
190
+
191
+
192
+    handler = list;
193
+
194
+    while(handler) {
195
+	free(handler->pattern);
196
+	free(handler->virname);
197
+	if(handler->alt) {
198
+	    free(handler->altn);
199
+	    for(i = 0; i < handler->alt; i++)
200
+		free(handler->altc[i]);
201
+	    free(handler->altc);
202
+	}
203
+	prev = handler;
204
+	handler = handler->next;
205
+	free(prev);
206
+    }
207
+}
208
+
209
+void cli_ac_free(struct cl_node *root)
210
+{
211
+	unsigned int i;
212
+
213
+
214
+    for(i = 0; i < root->ac_nodes; i++) {
215
+	cli_freepatt(root->ac_nodetable[i]->list);
216
+	free(root->ac_nodetable[i]);
217
+    }
218
+
219
+    if(root->ac_nodetable)
220
+	free(root->ac_nodetable);
221
+
222
+    if(root->ac_root)
223
+	free(root->ac_root);
224
+}
225
+
226
+static int inline cli_findpos(const char *buffer, int offset, int length, const struct cli_ac_patt *pattern)
227
+{
228
+	int bufferpos = offset + AC_MIN_LENGTH;
229
+	int postfixend = offset + length;
230
+	unsigned int i, j, alt = 0, found = 0;
231
+
232
+
233
+    if(bufferpos >= length)
234
+	bufferpos %= length;
235
+
236
+    for(i = AC_MIN_LENGTH; i < pattern->length; i++) {
237
+
238
+	if(bufferpos == postfixend)
239
+	    return 0;
240
+
241
+	if(pattern->pattern[i] == CLI_ALT) {
242
+	    for(j = 0; j < pattern->altn[alt]; j++) {
243
+		if(pattern->altc[alt][j] == buffer[bufferpos])
244
+		    found = 1;
245
+	    }
246
+
247
+	    if(!found)
248
+		return 0;
249
+	    alt++;
250
+
251
+	} else if(pattern->pattern[i] != CLI_IGN && (char) pattern->pattern[i] != buffer[bufferpos])
252
+	    return 0;
253
+
254
+	bufferpos++;
255
+
256
+	if(bufferpos == length)
257
+	    bufferpos = 0;
258
+    }
259
+
260
+    return 1;
261
+}
262
+
263
+int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, int typerec, unsigned long int offset, unsigned long int *partoff)
264
+{
265
+	struct cli_ac_node *current;
266
+	struct cli_ac_patt *pt;
267
+	int position, type = CL_CLEAN, dist;
268
+        unsigned int i;
269
+
270
+
271
+    if(!root->ac_root) {
272
+	cli_dbgmsg("cli_ac_scanbuff: Pattern matcher not initialised\n");
273
+	return CL_CLEAN;
274
+    }
275
+
276
+    if(!partcnt || !partoff) {
277
+	cli_dbgmsg("cli_ac_scanbuff(): partcnt == NULL || partoff == NULL\n");
278
+	return CL_ENULLARG;
279
+    }
280
+
281
+    current = root->ac_root;
282
+
283
+    for(i = 0; i < length; i++)  {
284
+	current = current->trans[(unsigned char) buffer[i] & 0xff];
285
+
286
+	if(current->islast) {
287
+	    position = i - AC_MIN_LENGTH + 1;
288
+
289
+	    pt = current->list;
290
+	    while(pt) {
291
+		if(cli_findpos(buffer, position, length, pt)) {
292
+		    if(pt->sigid) { /* it's a partial signature */
293
+			if(partcnt[pt->sigid] + 1 == pt->partno) {
294
+
295
+			    dist = 1;
296
+			    if(pt->maxdist)
297
+				if(offset + i - partoff[pt->sigid] > pt->maxdist)
298
+				    dist = 0;
299
+
300
+			    if(dist && pt->mindist)
301
+				if(offset + i - partoff[pt->sigid] < pt->mindist)
302
+				    dist = 0;
303
+
304
+			    if(dist) {
305
+				partoff[pt->sigid] = offset + i + pt->length;
306
+
307
+				if(++partcnt[pt->sigid] == pt->parts) { /* the last one */
308
+				    if(pt->type) {
309
+					if(typerec) {
310
+					    if(pt->type > type) {
311
+						cli_dbgmsg("Matched signature for file type: %s\n", pt->virname);
312
+						type = pt->type;
313
+					    }
314
+					}
315
+				    } else {
316
+					if(virname)
317
+					    *virname = pt->virname;
318
+
319
+					return CL_VIRUS;
320
+				    }
321
+				}
322
+			    }
323
+			}
324
+
325
+		    } else { /* old type signature */
326
+			if(pt->type) {
327
+			    if(typerec) {
328
+				if(pt->type > type) {
329
+				    cli_dbgmsg("Matched signature for file type: %s\n", pt->virname);
330
+
331
+				    type = pt->type;
332
+				}
333
+			    }
334
+			} else {
335
+			    if(virname)
336
+				*virname = pt->virname;
337
+
338
+			    return CL_VIRUS;
339
+			}
340
+		    }
341
+		}
342
+
343
+		pt = pt->next;
344
+	    }
345
+
346
+	    current = current->fail;
347
+	}
348
+    }
349
+
350
+    return typerec ? type : CL_CLEAN;
351
+}
0 352
new file mode 100644
... ...
@@ -0,0 +1,29 @@
0
+/*
1
+ *  Copyright (C) 2002 - 2004 Tomasz Kojm <tkojm@clamav.net>
2
+ *
3
+ *  This program is free software; you can redistribute it and/or modify
4
+ *  it under the terms of the GNU General Public License as published by
5
+ *  the Free Software Foundation; either version 2 of the License, or
6
+ *  (at your option) any later version.
7
+ *
8
+ *  This program is distributed in the hope that it will be useful,
9
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
+ *  GNU General Public License for more details.
12
+ *
13
+ *  You should have received a copy of the GNU General Public License
14
+ *  along with this program; if not, write to the Free Software
15
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
+ */
17
+
18
+#ifndef __MATCHER_AC_H
19
+#define __MATCHER_AC_H
20
+
21
+#include "clamav.h"
22
+
23
+int cli_ac_addpatt(struct cl_node *root, struct cli_ac_patt *pattern);
24
+int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, int typerec, unsigned long int offset, unsigned long int *partoff);
25
+int cli_ac_buildtrie(struct cl_node *root);
26
+void cli_ac_free(struct cl_node *root);
27
+
28
+#endif
0 29
new file mode 100644
... ...
@@ -0,0 +1,149 @@
0
+/*
1
+ *  Copyright (C) 2004 Tomasz Kojm <tkojm@clamav.net>
2
+ *
3
+ *  This program is free software; you can redistribute it and/or modify
4
+ *  it under the terms of the GNU General Public License as published by
5
+ *  the Free Software Foundation; either version 2 of the License, or
6
+ *  (at your option) any later version.
7
+ *
8
+ *  This program is distributed in the hope that it will be useful,
9
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
+ *  GNU General Public License for more details.
12
+ *
13
+ *  You should have received a copy of the GNU General Public License
14
+ *  along with this program; if not, write to the Free Software
15
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
+ */
17
+
18
+#include "clamav.h"
19
+#include "memory.h"
20
+#include "others.h"
21
+#include "cltypes.h"
22
+
23
+#define BM_MIN_LENGTH 10
24
+#define BM_BLOCK_SIZE 3
25
+
26
+#define MIN(a,b) (a < b) ? a : b
27
+
28
+
29
+int cli_bm_addpatt(struct cl_node *root, struct cli_bm_patt *pattern)
30
+{
31
+	int i;
32
+	uint16_t idx;
33
+	const char *pt = pattern->pattern;
34
+	struct cli_bm_patt *prev, *next = NULL;
35
+
36
+
37
+    if(pattern->length < BM_MIN_LENGTH) {
38
+	cli_dbgmsg("Ignoring signature for %s (too short)\n", pattern->virname);
39
+	/* return CL_EPATSHORT; */
40
+    }
41
+
42
+    for(i = BM_MIN_LENGTH - BM_BLOCK_SIZE; i >= 0; i--) {
43
+	idx = 211 * ((unsigned char) pt[i]) + 37 * ((unsigned char) pt[i + 1]) + (unsigned char) pt[i + 2];
44
+	root->bm_shift[idx] = MIN(root->bm_shift[idx], BM_MIN_LENGTH - BM_BLOCK_SIZE - i);
45
+    }
46
+
47
+    i = BM_MIN_LENGTH - BM_BLOCK_SIZE;
48
+    idx = 211 * ((unsigned char) pt[i]) + 37 * ((unsigned char) pt[i + 1]) + (unsigned char) pt[i + 2];
49
+
50
+    prev = next = root->bm_suffix[idx];
51
+
52
+    while(next) {
53
+	if(next->pattern[0] >= pt[0])
54
+	    break;
55
+	prev = next;
56
+	next = next->next;
57
+    }
58
+
59
+    if(prev == root->bm_suffix[idx]) {
60
+	pattern->next = root->bm_suffix[idx];
61
+	root->bm_suffix[idx] = pattern;
62
+    } else {
63
+	pattern->next = prev->next;
64
+	prev->next = pattern;
65
+    }
66
+
67
+    return 0;
68
+}
69
+
70
+int cli_bm_init(struct cl_node *root)
71
+{
72
+	int i;
73
+
74
+
75
+    cli_dbgmsg("in cli_bm_init()\n");
76
+
77
+    if(!(root->bm_shift = (int *) cli_malloc(65536 * sizeof(int))))
78
+	return CL_EMEM;
79
+
80
+    if(!(root->bm_suffix = (struct cli_bm_patt **) cli_calloc(65536, sizeof(struct cli_bm_patt *)))) {
81
+	free(root->bm_shift);
82
+	return CL_EMEM;
83
+    }
84
+
85
+    for(i = 0; i < 65536; i++)
86
+	root->bm_shift[i] = BM_MIN_LENGTH - BM_BLOCK_SIZE + 1;
87
+
88
+    return 0;
89
+}
90
+
91
+void cli_bm_free(struct cl_node *root)
92
+{
93
+    if(root->bm_shift)
94
+	free(root->bm_shift);
95
+
96
+    if(root->bm_suffix)
97
+	free(root->bm_suffix);
98
+}
99
+
100
+int cli_bm_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root)
101
+{
102
+	int i, j, shift, off, found;
103
+	uint16_t idx;
104
+	struct cli_bm_patt *p;
105
+	const char *bp;
106
+	char prefix;
107
+
108
+
109
+    for(i = BM_MIN_LENGTH - BM_BLOCK_SIZE; i < length; ) {
110
+	idx = 211 * ((unsigned char) buffer[i]) + 37 * ((unsigned char) buffer[i + 1]) + (unsigned char) buffer[i + 2];
111
+
112
+	shift = root->bm_shift[idx];
113
+
114
+	if(shift == 0) {
115
+
116
+	    prefix = buffer[i - BM_MIN_LENGTH + BM_BLOCK_SIZE];
117
+	    p = root->bm_suffix[idx];
118
+
119
+	    while(p && p->pattern[0] != prefix)
120
+		p = p->next;
121
+
122
+	    while(p && p->pattern[0] == prefix) {
123
+		off = i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
124
+		bp = buffer + off;
125
+		found = 1;
126
+		for(j = 0; j < p->length && off < length; j++, off++) {
127
+		    if(bp[j] != p->pattern[j]) {
128
+			found = 0;
129
+			break;
130
+		    }
131
+		}
132
+		if(found && p->length == j) {
133
+		    if(virname)
134
+			*virname = p->virname;
135
+
136
+		    return CL_VIRUS;
137
+		}
138
+		p = p->next;
139
+	    }
140
+
141
+	    shift = 1;
142
+	}
143
+
144
+	i += shift;
145
+    }
146
+
147
+    return 0;
148
+}
0 149
new file mode 100644
... ...
@@ -0,0 +1,29 @@
0
+/*
1
+ *  Copyright (C) 2004 Tomasz Kojm <tkojm@clamav.net>
2
+ *
3
+ *  This program is free software; you can redistribute it and/or modify
4
+ *  it under the terms of the GNU General Public License as published by
5
+ *  the Free Software Foundation; either version 2 of the License, or
6
+ *  (at your option) any later version.
7
+ *
8
+ *  This program is distributed in the hope that it will be useful,
9
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
+ *  GNU General Public License for more details.
12
+ *
13
+ *  You should have received a copy of the GNU General Public License
14
+ *  along with this program; if not, write to the Free Software
15
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
+ */
17
+
18
+#ifndef __MATCHER_BM_H
19
+#define __MATCHER_BM_H
20
+
21
+#include "clamav.h"
22
+
23
+int cli_bm_addpatt(struct cl_node *root, struct cli_bm_patt *pattern);
24
+int cli_bm_init(struct cl_node *root);
25
+int cli_bm_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root);
26
+void cli_bm_free(struct cl_node *root);
27
+
28
+#endif
... ...
@@ -1,9 +1,4 @@
1 1
 /*
2
- *  C implementation of the Aho-Corasick pattern matching algorithm. It's based
3
- *  on ScannerDaemon's Java version by Kurt Huwig and
4
- *  http://www-sr.informatik.uni-tuebingen.de/~buehler/AC/AC.html
5
- *  Thanks to Kurt Huwig for pointing me to this page.
6
- *
7 2
  *  Copyright (C) 2002 - 2004 Tomasz Kojm <tkojm@clamav.net>
8 3
  *
9 4
  *  This program is free software; you can redistribute it and/or modify
... ...
@@ -25,345 +20,212 @@
25 25
 #include "clamav-config.h"
26 26
 #endif
27 27
 
28
-#include <stdio.h>
29 28
 #include <string.h>
30
-#include <stdlib.h>
31
-#include <unistd.h>
32 29
 
33 30
 #include "clamav.h"
34 31
 #include "others.h"
35
-#include "matcher.h"
36
-#include "unrarlib.h"
37
-#include "defaults.h"
32
+#include "matcher-ac.h"
33
+#include "matcher-bm.h"
34
+#include "md5.h"
38 35
 #include "filetypes.h"
39 36
 
40
-int cli_addpatt(struct cl_node *root, struct cli_patt *pattern)
41
-{
42
-	struct cli_ac_node *pos, *next;
43
-	int i;
44
-
45
-    if(pattern->length < CL_MIN_LENGTH) {
46
-	return CL_EPATSHORT;
47
-    }
48
-
49
-    pos = root->ac_root;
37
+#define MD5_BLOCKSIZE 4096
50 38
 
51
-    for(i = 0; i < CL_MIN_LENGTH; i++) {
52
-	next = pos->trans[((unsigned char) pattern->pattern[i]) & 0xff]; 
53 39
 
54
-	if(!next) {
55
-	    next = (struct cli_ac_node *) cli_calloc(1, sizeof(struct cli_ac_node));
56
-	    if(!next) {
57
-		cli_dbgmsg("Unable to allocate pattern node (%d)\n", sizeof(struct cl_node));
58
-		return CL_EMEM;
59
-	    }
60
-
61
-	    root->nodes++;
62
-	    root->nodetable = (struct cli_ac_node **) realloc(root->nodetable, (root->nodes) * sizeof(struct cli_ac_node *));
63
-	    if (root->nodetable == NULL) {
64
-		cli_dbgmsg("Unable to realloc nodetable (%d)\n", (root->nodes) * sizeof(struct cl_node *));
65
-		return CL_EMEM;
66
-	    }
67
-	    root->nodetable[root->nodes - 1] = next;
40
+int cl_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root)
41
+{
42
+	int ret, *partcnt;
43
+	unsigned long int *partoff;
68 44
 
69
-	    pos->trans[((unsigned char) pattern->pattern[i]) & 0xff] = next;
70
-	}
71 45
 
72
-	pos = next;
46
+    if((partcnt = (int *) cli_calloc(root->ac_partsigs + 1, sizeof(int))) == NULL) {
47
+	cli_dbgmsg("cl_scanbuff(): unable to cli_calloc(%d, %d)\n", root->ac_partsigs + 1, sizeof(int));
48
+	return CL_EMEM;
73 49
     }
74 50
 
75
-    pos->islast = 1;
76
-
77
-    pattern->next = pos->list;
78
-    pos->list = pattern;
79
-
80
-    return 0;
81
-}
82
-
83
-static int cli_enqueue(struct nodelist **bfs, struct cli_ac_node *n)
84
-{
85
-	struct nodelist *new;
86
-
87
-    new = (struct nodelist *) cli_calloc(1, sizeof(struct nodelist));
88
-    if (new == NULL) {
89
-	cli_dbgmsg("Unable to allocate node list (%d)\n", sizeof(struct nodelist));
51
+    if((partoff = (unsigned long int *) cli_calloc(root->ac_partsigs + 1, sizeof(unsigned long int))) == NULL) {
52
+	cli_dbgmsg("cl_scanbuff(): unable to cli_calloc(%d, %d)\n", root->ac_partsigs + 1, sizeof(unsigned long int));
53
+	free(partcnt);
90 54
 	return CL_EMEM;
91 55
     }
92 56
 
93
-    new->next = *bfs;
94
-    new->node = n;
95
-    *bfs = new;
96
-    return 0;
57
+    if((ret = cli_bm_scanbuff(buffer, length, virname, root)) != CL_VIRUS)
58
+	ret = cli_ac_scanbuff(buffer, length, virname, root, partcnt, 0, 0, partoff);
59
+
60
+    free(partcnt);
61
+    free(partoff);
62
+    return ret;
97 63
 }
98 64
 
99
-static struct cli_ac_node *cli_dequeue(struct nodelist **bfs)
65
+static struct cli_md5_node *cli_vermd5(const char *md5, const struct cl_node *root)
100 66
 {
101
-	struct nodelist *handler, *prev = NULL;
102
-	struct cli_ac_node *pt;
103
-
104
-    handler = *bfs;
67
+	struct cli_md5_node *pt;
105 68
 
106
-    while(handler && handler->next) {
107
-	prev = handler;
108
-	handler = handler->next;
109
-    }
110 69
 
111
-    if(!handler) {
70
+    if(!(pt = root->md5_hlist[md5[0] & 0xff]))
112 71
 	return NULL;
113
-    } else {
114
-	pt = handler->node;
115
-	free(handler);
116
-	if(prev)
117
-	    prev->next = NULL;
118
-	else
119
-	    *bfs = NULL;
120
-
121
-	return pt;
122
-    }
123
-}
124
-
125
-static int cli_maketrans(struct cl_node *root)
126
-{
127
-	struct nodelist *bfs = NULL;
128
-	struct cli_ac_node *ac_root = root->ac_root, *child, *node;
129
-	int i, ret;
130 72
 
73
+    while(pt) {
74
+	if(!memcmp(pt->md5, md5, 16))
75
+	    return pt;
131 76
 
132
-    ac_root->fail = NULL;
133
-    if((ret = cli_enqueue(&bfs, ac_root)) != 0) {
134
-	return ret;
77
+	pt = pt->next;
135 78
     }
136 79
 
137
-    while((node = cli_dequeue(&bfs))) {
138
-	if(node->islast)
139
-	    continue;
140
-
141
-	for(i = 0; i < CL_NUM_CHILDS; i++) {
142
-	    child = node->trans[i];
143
-	    if(!child) {
144
-		if(node->fail)
145
-		    node->trans[i] = (node->fail)->trans[i];
146
-		else
147
-		    node->trans[i] = ac_root;
148
-	    } else {
149
-		if(node->fail)
150
-		    child->fail = (node->fail)->trans[i];
151
-		else
152
-		    child->fail = ac_root;
153
-
154
-		if((ret = cli_enqueue(&bfs, child)) != 0) {
155
-		    return ret;
156
-		}
157
-	    }
158
-	}
159
-    }
160
-    return 0;
80
+    return NULL;
161 81
 }
162 82
 
163
-int cl_buildtrie(struct cl_node *root)
83
+int cli_scandesc(int desc, const char **virname, long int *scanned, const struct cl_node *root, int typerec)
164 84
 {
165
-	int ret;
85
+ 	char *buffer, *buff, *endbl, *pt;
86
+	int bytes, buffsize, length, ret, *partcnt, type = CL_CLEAN;
87
+	unsigned long int *partoff, offset = 0;
88
+	struct md5_ctx ctx;
89
+        unsigned char md5buff[16];
90
+	struct cli_md5_node *md5_node;
166 91
 
167
-    if(!root)
168
-	return CL_EMALFDB;
169 92
 
170
-    if(!root->ac_root) {
171
-	cli_dbgmsg("Pattern matcher not initialised\n");
172
-	return 0;
93
+    if(!root) {
94
+	cli_errmsg("cli_scandesc: root == NULL\n");
95
+	return CL_ENULLARG;
173 96
     }
174 97
 
175
-    if((ret = cli_addtypesigs(root)))
176
-	return ret;
98
+    /* prepare the buffer */
99
+    buffsize = root->maxpatlen + SCANBUFF;
100
+    if(!(buffer = (char *) cli_calloc(buffsize, sizeof(char)))) {
101
+	cli_dbgmsg("cli_scandesc(): unable to cli_malloc(%d)\n", buffsize);
102
+	return CL_EMEM;
103
+    }
177 104
 
178
-    return cli_maketrans(root);
179
-}
105
+    if((partcnt = (int *) cli_calloc(root->ac_partsigs + 1, sizeof(int))) == NULL) {
106
+	cli_dbgmsg("cli_scandesc(): unable to cli_calloc(%d, %d)\n", root->ac_partsigs + 1, sizeof(int));
107
+	free(buffer);
108
+	return CL_EMEM;
109
+    }
180 110
 
181
-static void cli_freepatt(struct cli_patt *list)
182
-{
183
-	struct cli_patt *handler, *prev;
184
-	int i;
111
+    if((partoff = (unsigned long int *) cli_calloc(root->ac_partsigs + 1, sizeof(unsigned long int))) == NULL) {
112
+	cli_dbgmsg("cli_scanbuff(): unable to cli_calloc(%d, %d)\n", root->ac_partsigs + 1, sizeof(unsigned long int));
113
+	free(buffer);
114
+	free(partcnt);
115
+	return CL_EMEM;
116
+    }
185 117
 
118
+    if(root->md5_hlist)
119
+	md5_init_ctx (&ctx);
186 120
 
187
-    handler = list;
121
+    buff = buffer;
122
+    buff += root->maxpatlen; /* pointer to read data block */
123
+    endbl = buff + SCANBUFF - root->maxpatlen; /* pointer to the last block
124
+						* length of root->maxpatlen
125
+						*/
188 126
 
189
-    while(handler) {
190
-	free(handler->pattern);
191
-	free(handler->virname);
192
-	if(handler->alt) {
193
-	    free(handler->altn);
194
-	    for(i = 0; i < handler->alt; i++)
195
-		free(handler->altc[i]);
196
-	    free(handler->altc);
197
-	}
198
-	prev = handler;
199
-	handler = handler->next;
200
-	free(prev);
201
-    }
202
-}
127
+    pt= buff;
128
+    length = SCANBUFF;
129
+    while((bytes = read(desc, buff, SCANBUFF)) > 0) {
203 130
 
204
-void cl_freetrie(struct cl_node *root)
205
-{
206
-	unsigned int i;
131
+	if(scanned)
132
+	    *scanned += bytes / CL_COUNT_PRECISION;
207 133
 
134
+	if(bytes < SCANBUFF)
135
+	    length -= SCANBUFF - bytes;
208 136
 
209
-    for(i = 0; i < root->nodes; i++) {
210
-	cli_freepatt(root->nodetable[i]->list);
211
-	free(root->nodetable[i]);
212
-    }
137
+	if(cli_bm_scanbuff(pt, length, virname, root) == CL_VIRUS ||
138
+	   (ret = cli_ac_scanbuff(pt, length, virname, root, partcnt, typerec, offset, partoff)) == CL_VIRUS) {
139
+	    free(buffer);
140
+	    free(partcnt);
141
+	    free(partoff);
142
+	    return CL_VIRUS;
213 143
 
214
-    free(root->nodetable);
215
-    free(root->ac_root);
216
-    free(root);
217
-}
144
+	} else if(typerec && ret >= CL_TYPENO) {
145
+	    if(ret >= type)
146
+		type = ret;
147
+	}
218 148
 
219
-int inline cli_findpos(const char *buffer, int offset, int length, const struct cli_patt *pattern)
220
-{
221
-	int bufferpos = offset + CL_MIN_LENGTH;
222
-	int postfixend = offset + length;
223
-	unsigned int i, j, alt = 0, found = 0;
149
+	if(bytes == SCANBUFF) {
150
+	    memmove(buffer, endbl, root->maxpatlen);
151
+	    offset += bytes - root->maxpatlen;
152
+	}
224 153
 
154
+        pt = buffer;
155
+        length = buffsize;
225 156
 
226
-    if(bufferpos >= length)
227
-	bufferpos %= length;
157
+	/* compute MD5 */
228 158
 
229
-    for(i = CL_MIN_LENGTH; i < pattern->length; i++) {
159
+	if(root->md5_hlist) {
160
+	    if(bytes % 64 == 0) {
161
+		md5_process_block(buff, bytes, &ctx);
162
+	    } else {
163
+		    int block = bytes;
164
+		    char *mpt = buff;
230 165
 
231
-	if(bufferpos == postfixend)
232
-	    return 0;
166
+		while(block >= MD5_BLOCKSIZE) {
167
+		    md5_process_block(mpt, MD5_BLOCKSIZE, &ctx);
168
+		    mpt += MD5_BLOCKSIZE;
169
+		    block -= MD5_BLOCKSIZE;
170
+		}
233 171
 
234
-	if(pattern->pattern[i] == CLI_ALT) {
235
-	    for(j = 0; j < pattern->altn[alt]; j++) {
236
-		if(pattern->altc[alt][j] == buffer[bufferpos])
237
-		    found = 1;
172
+		if(block)
173
+		    md5_process_bytes(mpt, block, &ctx);
238 174
 	    }
175
+	}
176
+    }
239 177
 
240
-	    if(!found)
241
-		return 0;
242
-	    alt++;
243
-
244
-	} else if(pattern->pattern[i] != CLI_IGN && (char) pattern->pattern[i] != buffer[bufferpos])
245
-	    return 0;
178
+    free(buffer);
179
+    free(partcnt);
180
+    free(partoff);
246 181
 
247
-	bufferpos++;
182
+    if(root->md5_hlist) {
183
+	md5_finish_ctx(&ctx, &md5buff);
248 184
 
249
-	if(bufferpos == length)
250
-	    bufferpos = 0;
185
+	if((md5_node = cli_vermd5(md5buff, root))) {
186
+	    if(virname)
187
+		*virname = md5_node->virname;
188
+	    return CL_VIRUS;
189
+	}
251 190
     }
252 191
 
253
-    return 1;
192
+    return typerec ? type : CL_CLEAN;
254 193
 }
255 194
 
256
-int cli_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, int typerec, unsigned long int offset, unsigned long int *partoff)
195
+int cl_build(struct cl_node *root)
257 196
 {
258
-	struct cli_ac_node *current;
259
-	struct cli_patt *pt;
260
-	int position, type = CL_CLEAN, dist;
261
-        unsigned int i;
197
+    return cli_ac_buildtrie(root);
198
+}
262 199
 
200
+void cl_free(struct cl_node *root)
201
+{
202
+	int i;
203
+	struct cli_md5_node *pt, *h;
263 204
 
264
-    if(!root->ac_root) {
265
-	cli_dbgmsg("cli_scanbuff: Pattern matcher not initialised\n");
266
-	return CL_CLEAN;
267
-    }
268 205
 
269
-    if(!partcnt || !partoff) {
270
-	cli_dbgmsg("cli_scanbuff(): partcnt == NULL || partoff == NULL\n");
271
-	return CL_EMEM;
206
+    if(!root) {
207
+	cli_errmsg("cl_free: root == NULL\n");
208
+	return;
272 209
     }
273 210
 
274
-    current = root->ac_root;
275
-
276
-    for(i = 0; i < length; i++)  {
277
-	current = current->trans[(unsigned char) buffer[i] & 0xff];
278
-
279
-	if(current->islast) {
280
-	    position = i - CL_MIN_LENGTH + 1;
281
-
282
-	    pt = current->list;
283
-	    while(pt) {
284
-		if(cli_findpos(buffer, position, length, pt)) {
285
-		    if(pt->sigid) { /* it's a partial signature */
286
-			if(partcnt[pt->sigid] + 1 == pt->partno) {
287
-
288
-			    dist = 1;
289
-			    if(pt->maxdist)
290
-				if(offset + i - partoff[pt->sigid] > pt->maxdist)
291
-				    dist = 0;
292
-
293
-			    if(dist && pt->mindist)
294
-				if(offset + i - partoff[pt->sigid] < pt->mindist)
295
-				    dist = 0;
296
-
297
-			    if(dist) {
298
-				partoff[pt->sigid] = offset + i + pt->length;
299
-
300
-				if(++partcnt[pt->sigid] == pt->parts) { /* the last one */
301
-				    if(pt->type) {
302
-					if(typerec) {
303
-					    if(pt->type > type) {
304
-						cli_dbgmsg("Matched signature for file type: %s\n", pt->virname);
305
-						type = pt->type;
306
-					    }
307
-					}
308
-				    } else {
309
-					if(virname)
310
-					    *virname = pt->virname;
311
-
312
-					return CL_VIRUS;
313
-				    }
314
-				}
315
-			    }
316
-			}
317
-
318
-		    } else { /* old type signature */
319
-			if(pt->type) {
320
-			    if(typerec) {
321
-				if(pt->type > type) {
322
-				    cli_dbgmsg("Matched signature for file type: %s\n", pt->virname);
323
-
324
-				    type = pt->type;
325
-				}
326
-			    }
327
-			} else {
328
-			    if(virname)
329
-				*virname = pt->virname;
330
-
331
-			    return CL_VIRUS;
332
-			}
333
-		    }
334
-		}
211
+    cli_ac_free(root);
212
+    cli_bm_free(root);
335 213
 
214
+    if(root->md5_hlist) {
215
+	for(i = 0; i < 256; i++) {
216
+	    while((pt = root->md5_hlist[i])) {
217
+		h = pt;
336 218
 		pt = pt->next;
219
+		free(h);
337 220
 	    }
338
-
339
-	    current = current->fail;
340 221
 	}
222
+	free(root->md5_hlist);
341 223
     }
342 224
 
343
-    return typerec ? type : CL_CLEAN;
225
+    free(root);
344 226
 }
345 227
 
346
-int cl_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root)
347
-
228
+int cl_buildtrie(struct cl_node *root) /* for backward compatibility */
348 229
 {
349
-	int ret, *partcnt;
350
-	unsigned long int *partoff;
351
-
352
-
353
-    if((partcnt = (int *) cli_calloc(root->partsigs + 1, sizeof(int))) == NULL) {
354
-	cli_dbgmsg("cli_scanbuff(): unable to cli_calloc(%d, %d)\n", root->partsigs + 1, sizeof(int));
355
-	return CL_EMEM;
356
-    }
357
-
358
-    if((partoff = (unsigned long int *) cli_calloc(root->partsigs + 1, sizeof(unsigned long int))) == NULL) {
359
-	cli_dbgmsg("cli_scanbuff(): unable to cli_calloc(%d, %d)\n", root->partsigs + 1, sizeof(unsigned long int));
360
-	free(partcnt);
361
-	return CL_EMEM;
362
-    }
363
-
364
-    ret = cli_scanbuff(buffer, length, virname, root, partcnt, 0, 0, partoff);
230
+    return cl_build(root);
231
+}
365 232
 
366
-    free(partcnt);
367
-    free(partoff);
368
-    return ret;
233
+void cl_freetrie(struct cl_node *root) /* for backward compatibility */
234
+{
235
+    return cl_free(root);
369 236
 }
... ...
@@ -21,16 +21,6 @@
21 21
 
22 22
 #include "clamav.h"
23 23
 
24
-struct nodelist {
25
-    struct cli_ac_node *node;
26
-    struct nodelist *next;
27
-};
28
-
29
-int cli_addpatt(struct cl_node *root, struct cli_patt *pattern);
30
-struct nodelist *cli_bfsadd(struct nodelist *bfs, struct cl_node *n);
31
-void cli_failtrans(struct cl_node *root);
32
-void cli_fasttrie(struct cl_node *n, struct cl_node *root);
33
-int cli_findpos(const char *buffer, int offset, int length, const struct cli_patt *pattern);
34
-int cli_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, int typerec, unsigned long int offset, unsigned long int *partoff);
24
+int cli_scandesc(int desc, const char **virname, long int *scanned, const struct cl_node *root, int typerec);
35 25
 
36 26
 #endif
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: mbox.c,v $
20
+ * Revision 1.87  2004/07/19 17:54:40  kojm
21
+ * Use new patter matching algorithm. Cleanup.
22
+ *
20 23
  * Revision 1.86  2004/07/06 09:32:45  nigelhorne
21 24
  * Better handling of Gibe.3 boundary exploit
22 25
  *
... ...
@@ -246,7 +249,7 @@
246 246
  * Compilable under SCO; removed duplicate code with message.c
247 247
  *
248 248
  */
249
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.86 2004/07/06 09:32:45 nigelhorne Exp $";
249
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.87 2004/07/19 17:54:40 kojm Exp $";
250 250
 
251 251
 #if HAVE_CONFIG_H
252 252
 #include "clamav-config.h"
... ...
@@ -428,7 +431,7 @@ static	table_t	*rfc821Table, *subtypeTable;
428 428
  * TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody
429 429
  */
430 430
 int
431
-cl_mbox(const char *dir, int desc)
431
+cli_mbox(const char *dir, int desc)
432 432
 {
433 433
 	int retcode, i;
434 434
 	message *m, *body;
... ...
@@ -41,7 +41,7 @@
41 41
 
42 42
 #ifdef CL_THREAD_SAFE
43 43
 #  include <pthread.h>
44
-pthread_mutex_t cl_gentemp_mutex = PTHREAD_MUTEX_INITIALIZER;
44
+pthread_mutex_t cli_gentemp_mutex = PTHREAD_MUTEX_INITIALIZER;
45 45
 #endif
46 46
 
47 47
 #include "clamav.h"
... ...
@@ -163,20 +163,13 @@ const char *cl_perror(int clerror)
163 163
     return cl_strerror(clerror);
164 164
 }
165 165
 
166
-char *cl_md5file(const char *filename)
166
+char *cli_md5stream(FILE *fd)
167 167
 {
168
-	FILE *fd;
169 168
 	unsigned char buffer[16];
170 169
 	char *md5str;
171 170
 	int i, cnt=0;
172 171
 
173
-    if((fd = fopen(filename, "rb")) == NULL) {
174
-	cli_errmsg("md5_file(): Can't read file %s\n", filename);
175
-	return NULL;
176
-    }
177
-
178 172
     md5_stream(fd, &buffer);
179
-    fclose(fd);
180 173
 
181 174
     md5str = (char*) calloc(32 + 1, sizeof(char));
182 175
 
... ...
@@ -186,13 +179,21 @@ char *cl_md5file(const char *filename)
186 186
     return(md5str);
187 187
 }
188 188
 
189
-char *cli_md5stream(FILE *fd)
189
+char *cli_md5file(const char *filename)
190 190
 {
191
+	FILE *fd;
191 192
 	unsigned char buffer[16];
192 193
 	char *md5str;
193 194
 	int i, cnt=0;
194 195
 
196
+
197
+    if((fd = fopen(filename, "rb")) == NULL) {
198
+	cli_errmsg("cli_md5file(): Can't read file %s\n", filename);
199
+	return NULL;
200
+    }
201
+
195 202
     md5_stream(fd, &buffer);
203
+    fclose(fd);
196 204
 
197 205
     md5str = (char*) calloc(32 + 1, sizeof(char));
198 206
 
... ...
@@ -202,7 +203,7 @@ char *cli_md5stream(FILE *fd)
202 202
     return(md5str);
203 203
 }
204 204
 
205
-char *cl_md5buff(const char *buffer, unsigned int len)
205
+static char *cli_md5buff(const char *buffer, unsigned int len)
206 206
 {
207 207
 	unsigned char md5buff[16];
208 208
 	char *md5str;
... ...
@@ -264,7 +265,7 @@ void *cli_realloc(void *ptr, size_t size)
264 264
     } else return alloc;
265 265
 }
266 266
 
267
-unsigned int cl_rndnum(unsigned int max)
267
+unsigned int cli_rndnum(unsigned int max)
268 268
 {
269 269
     struct timeval tv;
270 270
 
... ...
@@ -292,7 +293,7 @@ void cl_settempdir(const char *dir, short leavetemps)
292 292
     cli_leavetemps_flag = leavetemps;
293 293
 }
294 294
 
295
-char *cl_gentemp(const char *dir)
295
+char *cli_gentemp(const char *dir)
296 296
 {
297 297
 	char *name, *tmp;
298 298
         const char *mdir;
... ...
@@ -308,28 +309,28 @@ char *cl_gentemp(const char *dir)
308 308
 
309 309
     name = (char*) cli_calloc(strlen(mdir) + 1 + 16 + 1 + 7, sizeof(char));
310 310
     if(name == NULL) {
311
-	cli_dbgmsg("cl_gentemp('%s'): out of memory\n", dir);
311
+	cli_dbgmsg("cli_gentemp('%s'): out of memory\n", dir);
312 312
 	return NULL;
313 313
     }
314 314
 
315 315
 #ifdef CL_THREAD_SAFE
316
-    pthread_mutex_lock(&cl_gentemp_mutex);
316
+    pthread_mutex_lock(&cli_gentemp_mutex);
317 317
 #endif
318 318
 
319 319
     memcpy(salt, oldmd5buff, 16);
320 320
 
321 321
     do {
322 322
 	for(i = 16; i < 48; i++)
323
-	    salt[i] = cl_rndnum(255);
323
+	    salt[i] = cli_rndnum(255);
324 324
 
325
-	tmp = cl_md5buff(( char* ) salt, 48);
325
+	tmp = cli_md5buff(( char* ) salt, 48);
326 326
 	sprintf(name, "%s/clamav-", mdir);
327 327
 	strncat(name, tmp, 16);
328 328
 	free(tmp);
329 329
     } while(stat(name, &foo) != -1);
330 330
 
331 331
 #ifdef CL_THREAD_SAFE
332
-    pthread_mutex_unlock(&cl_gentemp_mutex);
332
+    pthread_mutex_unlock(&cli_gentemp_mutex);
333 333
 #endif
334 334
 
335 335
     return(name);
... ...
@@ -456,11 +457,13 @@ int cli_writen(int fd, void *buff, unsigned int count)
456 456
 
457 457
 int32_t cli_readint32(const char *buff)
458 458
 {
459
-	int32_t ret, shift, i = 0;
459
+	int32_t ret;
460 460
 
461 461
 #if WORDS_BIGENDIAN == 0
462 462
     ret = *(int32_t *) buff;
463 463
 #else
464
+	int32_t shift, i = 0;
465
+
464 466
     ret = 0;
465 467
     for(shift = 0; shift < 32; shift += 8) {
466 468
       ret |= (buff[i] & 0xff ) << shift;
... ...
@@ -1,5 +1,5 @@
1 1
 /*
2
- *  Copyright (C) 1999-2002 Tomasz Kojm <zolw@konarski.edu.pl>
2
+ *  Copyright (C) 1999 - 2004 Tomasz Kojm <tk@clamav.net>
3 3
  *
4 4
  *  This program is free software; you can redistribute it and/or modify
5 5
  *  it under the terms of the GNU General Public License as published by
... ...
@@ -16,8 +16,8 @@
16 16
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 17
  */
18 18
 
19
-#ifndef __OTHERS_H
20
-#define __OTHERS_H
19
+#ifndef __OTHERS_H_LC
20
+#define __OTHERS_H_LC
21 21
 
22 22
 #include <stdio.h>
23 23
 #include <stdlib.h>
... ...
@@ -34,5 +34,8 @@ char *cli_md5stream(FILE *fd);
34 34
 int cli_readn(int fd, void *buff, unsigned int count);
35 35
 int cli_writen(int fd, void *buff, unsigned int count);
36 36
 int32_t cli_readint32(const char *buff);
37
+char *cli_gentemp(const char *dir);
38
+unsigned int cli_rndnum(unsigned int max);
39
+char *cli_md5file(const char *filename);
37 40
 
38 41
 #endif
... ...
@@ -229,7 +229,7 @@ int cli_scanpe(int desc, const char **virname, long int *scanned, const struct c
229 229
 	uint16_t nsections;
230 230
 	uint32_t e_lfanew; /* address of new exe header */
231 231
 	uint32_t ep; /* entry point (raw) */
232
-	uint32_t timestamp;
232
+	time_t timestamp;
233 233
 	struct pe_image_file_hdr file_hdr;
234 234
 	struct pe_image_optional_hdr optional_hdr;
235 235
 	struct pe_image_section_hdr *section_hdr;
... ...
@@ -313,8 +313,8 @@ int cli_scanpe(int desc, const char **virname, long int *scanned, const struct c
313 313
     nsections = EC16(file_hdr.NumberOfSections);
314 314
     cli_dbgmsg("NumberOfSections: %d\n", nsections);
315 315
 
316
-    timestamp = EC32(file_hdr.TimeDateStamp);
317
-    cli_dbgmsg("TimeDateStamp: %s", ctime((time_t *) &timestamp));
316
+    timestamp = (time_t) EC32(file_hdr.TimeDateStamp);
317
+    cli_dbgmsg("TimeDateStamp: %s", ctime(&timestamp));
318 318
 
319 319
     cli_dbgmsg("SizeOfOptionalHeader: %d\n", EC16(file_hdr.SizeOfOptionalHeader));
320 320
 
... ...
@@ -396,7 +396,7 @@ int cli_scanpe(int desc, const char **virname, long int *scanned, const struct c
396 396
 		cli_dbgmsg("Section contains free space\n");
397 397
 		/*
398 398
 		cli_dbgmsg("Dumping %d bytes\n", section_hdr.SizeOfRawData - section_hdr.VirtualSize);
399
-		ddump(desc, section_hdr.PointerToRawData + section_hdr.VirtualSize, section_hdr.SizeOfRawData - section_hdr.VirtualSize, cl_gentemp(NULL));
399
+		ddump(desc, section_hdr.PointerToRawData + section_hdr.VirtualSize, section_hdr.SizeOfRawData - section_hdr.VirtualSize, cli_gentemp(NULL));
400 400
 		*/
401 401
 
402 402
 	    }
... ...
@@ -410,7 +410,7 @@ int cli_scanpe(int desc, const char **virname, long int *scanned, const struct c
410 410
 	    int ptrd = section_hdr.PointerToRawData & ~(optional_hdr.FileAlignment - 1);
411 411
 
412 412
 	    cli_dbgmsg("WinZip section\n");
413
-	    ddump(desc, ptrd, section_hdr.SizeOfRawData, cl_gentemp(NULL));
413
+	    ddump(desc, ptrd, section_hdr.SizeOfRawData, cli_gentemp(NULL));
414 414
 	}
415 415
 */
416 416
 
... ...
@@ -528,7 +528,7 @@ int cli_scanpe(int desc, const char **virname, long int *scanned, const struct c
528 528
 	}
529 529
 
530 530
 	if(upxfn) {
531
-		int ret, skew = cli_readint32(buff + 2) - EC32(optional_hdr.ImageBase) - EC32(section_hdr[i+1].VirtualAddress);
531
+		int skew = cli_readint32(buff + 2) - EC32(optional_hdr.ImageBase) - EC32(section_hdr[i+1].VirtualAddress);
532 532
 
533 533
 	    if(buff[1] != '\xbe' || skew <= 0 || skew > 0x2e ) { /* FIXME: legit skews?? */
534 534
 		skew = 0; 
... ...
@@ -580,7 +580,7 @@ int cli_scanpe(int desc, const char **virname, long int *scanned, const struct c
580 580
 	    int ndesc;
581 581
 
582 582
 	    if(cli_leavetemps_flag) {
583
-		tempfile = cl_gentemp(NULL);
583
+		tempfile = cli_gentemp(NULL);
584 584
 		if((ndesc = open(tempfile, O_WRONLY|O_CREAT|O_TRUNC, S_IRWXU)) < 0) {
585 585
 		    cli_dbgmsg("UPX: Can't create file %s\n", tempfile);
586 586
 		    free(section_hdr);
... ...
@@ -33,20 +33,22 @@
33 33
 #include "clamav.h"
34 34
 #include "cvd.h"
35 35
 #include "strings.h"
36
-#include "matcher.h"
36
+#include "matcher-ac.h"
37
+#include "matcher-bm.h"
37 38
 #include "others.h"
38 39
 #include "str.h"
39 40
 #include "defaults.h"
40 41
 
42
+/* TODO: clean up the code */
41 43
 
42 44
 static int cli_addsig(struct cl_node *root, const char *virname, const char *hexsig, int sigid, int parts, int partno, int type, unsigned int mindist, unsigned int maxdist)
43 45
 {
44
-	struct cli_patt *new;
46
+	struct cli_ac_patt *new;
45 47
 	char *pt, *hex;
46 48
 	int virlen, ret, i, error = 0;
47 49
 
48 50
 
49
-    if((new = (struct cli_patt *) cli_calloc(1, sizeof(struct cli_patt))) == NULL)
51
+    if((new = (struct cli_ac_patt *) cli_calloc(1, sizeof(struct cli_ac_patt))) == NULL)
50 52
 	return CL_EMEM;
51 53
 
52 54
     new->type = type;
... ...
@@ -200,7 +202,7 @@ static int cli_addsig(struct cl_node *root, const char *virname, const char *hex
200 200
 
201 201
     strncpy(new->virname, virname, virlen);
202 202
 
203
-    if((ret = cli_addpatt(root, new))) {
203
+    if((ret = cli_ac_addpatt(root, new))) {
204 204
 	free(new->virname);
205 205
 	if(new->alt) {
206 206
 	    free(new->altn);
... ...
@@ -219,9 +221,9 @@ static int cli_addsig(struct cl_node *root, const char *virname, const char *hex
219 219
     return 0;
220 220
 }
221 221
 
222
-int cli_parse_add(struct cl_node *root, char *virname, const char *hexsig, int type)
222
+int cli_parse_add(struct cl_node *root, const char *virname, const char *hexsig, int type)
223 223
 {
224
-	struct cli_patt *new;
224
+	struct cli_bm_patt *bm_new;
225 225
 	char *pt, *hexcpy, *start, *n;
226 226
 	int ret, virlen, parts = 0, i, len;
227 227
 	int mindist = 0, maxdist = 0, error = 0;
... ...
@@ -229,12 +231,11 @@ int cli_parse_add(struct cl_node *root, char *virname, const char *hexsig, int t
229 229
 
230 230
     if(strchr(hexsig, '{')) {
231 231
 
232
-	root->partsigs++;
232
+	root->ac_partsigs++;
233 233
 
234 234
 	if(!(hexcpy = strdup(hexsig)))
235 235
 	    return CL_EMEM;
236 236
 
237
-
238 237
 	len = strlen(hexsig);
239 238
 	for(i = 0; i < len; i++)
240 239
 	    if(hexsig[i] == '{')
... ...
@@ -251,7 +252,7 @@ int cli_parse_add(struct cl_node *root, char *virname, const char *hexsig, int t
251 251
 		*pt++ = 0;
252 252
 	    }
253 253
 
254
-	    if((ret = cli_addsig(root, virname, start, root->partsigs, parts, i, type, mindist, maxdist))) {
254
+	    if((ret = cli_addsig(root, virname, start, root->ac_partsigs, parts, i, type, mindist, maxdist))) {
255 255
 		cli_errmsg("cli_parse_add(): Problem adding signature.\n");
256 256
 		error = 1;
257 257
 		break;
... ...
@@ -303,14 +304,14 @@ int cli_parse_add(struct cl_node *root, char *virname, const char *hexsig, int t
303 303
 	    return CL_EMALFDB;
304 304
 
305 305
     } else if(strchr(hexsig, '*')) {
306
-	root->partsigs++;
306
+	root->ac_partsigs++;
307 307
 
308 308
 	len = strlen(hexsig);
309 309
 	for(i = 0; i < len; i++)
310 310
 	    if(hexsig[i] == '*')
311 311
 		parts++;
312 312
 
313
-	if(parts) /* there's always one part more */
313
+	if(parts)
314 314
 	    parts++;
315 315
 
316 316
 	for(i = 1; i <= parts; i++) {
... ...
@@ -319,7 +320,7 @@ int cli_parse_add(struct cl_node *root, char *virname, const char *hexsig, int t
319 319
 		return CL_EMALFDB;
320 320
 	    }
321 321
 
322
-	    if((ret = cli_addsig(root, virname, pt, root->partsigs, parts, i, type, 0, 0))) {
322
+	    if((ret = cli_addsig(root, virname, pt, root->ac_partsigs, parts, i, type, 0, 0))) {
323 323
 		cli_errmsg("cli_parse_add(): Problem adding signature.\n");
324 324
 		free(pt);
325 325
 		return ret;
... ...
@@ -328,9 +329,44 @@ int cli_parse_add(struct cl_node *root, char *virname, const char *hexsig, int t
328 328
 	    free(pt);
329 329
 	}
330 330
 
331
-    } else { /* static */
331
+    } else if(strpbrk(hexsig, "?(") || type) {
332 332
 	if((ret = cli_addsig(root, virname, hexsig, 0, 0, 0, type, 0, 0))) {
333
-	    cli_errmsg("cli_parse_add(): Problem adding signature.\n");
333
+	    cli_errmsg("cli_parse_add(): Problem adding signature\n");
334
+	    return ret;
335
+	}
336
+
337
+    } else {
338
+	bm_new = (struct cli_bm_patt *) calloc(1, sizeof(struct cli_bm_patt));
339
+	if(!bm_new)
340
+	    return CL_EMEM;
341
+
342
+	if(!(bm_new->pattern = cli_hex2str(hexsig)))
343
+	    return CL_EMALFDB;
344
+
345
+	bm_new->length = strlen(hexsig) / 2;
346
+
347
+	if((pt = strstr(virname, "(Clam)")))
348
+	    virlen = strlen(virname) - strlen(pt) - 1;
349
+	else
350
+	    virlen = strlen(virname);
351
+
352
+	if(virlen <= 0) {
353
+	    free(bm_new);
354
+	    return CL_EMALFDB;
355
+	}
356
+
357
+	if((bm_new->virname = cli_calloc(virlen + 1, sizeof(char))) == NULL) {
358
+	    free(bm_new);
359
+	    return CL_EMEM;
360
+	}
361
+
362
+	strncpy(bm_new->virname, virname, virlen);
363
+
364
+	if(bm_new->length > root->maxpatlen)
365
+	    root->maxpatlen = bm_new->length;
366
+
367
+	if((ret = cli_bm_addpatt(root, bm_new))) {
368
+	    cli_errmsg("cli_parse_add(): Problem adding signature\n");
334 369
 	    return ret;
335 370
 	}
336 371
     }
... ...
@@ -356,10 +392,19 @@ static int cli_loaddb(FILE *fd, struct cl_node **root, int *virnum)
356 356
 	(*root)->ac_root =  (struct cli_ac_node *) cli_calloc(1, sizeof(struct cli_ac_node));
357 357
 	if(!(*root)->ac_root) {
358 358
 	    free(*root);
359
+	    cli_errmsg("Can't initialise AC pattern matcher\n");
359 360
 	    return CL_EMEM;
360 361
 	}
361 362
     }
362 363
 
364
+    if(!(*root)->bm_shift) {
365
+	cli_dbgmsg("Initializing BM tables\n");
366
+	if((ret = cli_bm_init(*root))) {
367
+	    cli_errmsg("Can't initialise BM pattern matcher\n");
368
+	    return ret;
369
+	}
370
+    }
371
+
363 372
     while(fgets(buffer, FILEBUFF, fd)) {
364 373
 	line++;
365 374
 	cli_chomp(buffer);
... ...
@@ -385,12 +430,12 @@ static int cli_loaddb(FILE *fd, struct cl_node **root, int *virnum)
385 385
 
386 386
     if(!line) {
387 387
 	cli_errmsg("Empty database file\n");
388
-	/* FIXME: release memory */
388
+	cl_free(*root);
389 389
 	return CL_EMALFDB;
390 390
     }
391 391
 
392 392
     if(ret) {
393
-	/* FIXME: release memory */
393
+	cl_free(*root);
394 394
 	return ret;
395 395
     }
396 396
 
... ...
@@ -402,7 +447,7 @@ static int cli_loaddb(FILE *fd, struct cl_node **root, int *virnum)
402 402
 
403 403
 static int cli_loadhdb(FILE *fd, struct cl_node **root, int *virnum)
404 404
 {
405
-	char buffer[FILEBUFF], *pt, *start;
405
+	char buffer[FILEBUFF], *pt;
406 406
 	int line = 0, ret = 0;
407 407
 	struct cli_md5_node *new;
408 408
 
... ...
@@ -448,18 +493,27 @@ static int cli_loadhdb(FILE *fd, struct cl_node **root, int *virnum)
448 448
 
449 449
 	new->viralias = cli_strtok(buffer, 1, ":"); /* aliases are optional */
450 450
 
451
-	new->next = (*root)->hlist[new->md5[0] & 0xff];
452
-	(*root)->hlist[new->md5[0] & 0xff] = new;
451
+	if(!(*root)->md5_hlist) {
452
+	    cli_dbgmsg("Initializing md5 list structure\n");
453
+	    (*root)->md5_hlist = (struct cli_md5_node **) cli_calloc(256, sizeof(struct cli_md5_node *));
454
+	    if(!(*root)->md5_hlist) {
455
+		ret = CL_EMEM;
456
+		break;
457
+	    }
458
+	}
459
+
460
+	new->next = (*root)->md5_hlist[new->md5[0] & 0xff];
461
+	(*root)->md5_hlist[new->md5[0] & 0xff] = new;
453 462
     }
454 463
 
455 464
     if(!line) {
456 465
 	cli_errmsg("Empty database file\n");
457
-	/* FIXME: release memory */
466
+	cl_free(*root);
458 467
 	return CL_EMALFDB;
459 468
     }
460 469
 
461 470
     if(ret) {
462
-	/* FIXME: release memory */
471
+	cl_free(*root);
463 472
 	return ret;
464 473
     }
465 474
 
... ...
@@ -583,7 +637,12 @@ int cl_statinidir(const char *dirname, struct cl_stat *dbstat)
583 583
 	if(dent->d_ino)
584 584
 #endif
585 585
 	{
586
-	    if(strcmp(dent->d_name, ".") && strcmp(dent->d_name, "..") && (cli_strbcasestr(dent->d_name, ".db") || cli_strbcasestr(dent->d_name, ".db2") || cli_strbcasestr(dent->d_name, ".db3") || cli_strbcasestr(dent->d_name, ".hdb") || cli_strbcasestr(dent->d_name, ".cvd"))) {
586
+	    if(strcmp(dent->d_name, ".") && strcmp(dent->d_name, "..") &&
587
+	    (cli_strbcasestr(dent->d_name, ".db")  ||
588
+	    cli_strbcasestr(dent->d_name, ".db2")  || 
589
+	    cli_strbcasestr(dent->d_name, ".db3")  || 
590
+	    cli_strbcasestr(dent->d_name, ".hdb")  || 
591
+	    cli_strbcasestr(dent->d_name, ".cvd"))) {
587 592
 
588 593
 		dbstat->no++;
589 594
 		dbstat->stattab = (struct stat *) realloc(dbstat->stattab, dbstat->no * sizeof(struct stat));
... ...
@@ -625,7 +684,12 @@ int cl_statchkdir(const struct cl_stat *dbstat)
625 625
 	if(dent->d_ino)
626 626
 #endif
627 627
 	{
628
-	    if(strcmp(dent->d_name, ".") && strcmp(dent->d_name, "..") && (cli_strbcasestr(dent->d_name, ".db") || cli_strbcasestr(dent->d_name, ".db2") || cli_strbcasestr(dent->d_name, ".db3") || cli_strbcasestr(dent->d_name, ".hdb") || cli_strbcasestr(dent->d_name, ".cvd"))) {
628
+	    if(strcmp(dent->d_name, ".") && strcmp(dent->d_name, "..") &&
629
+	    (cli_strbcasestr(dent->d_name, ".db")  ||
630
+	    cli_strbcasestr(dent->d_name, ".db2")  || 
631
+	    cli_strbcasestr(dent->d_name, ".db3")  || 
632
+	    cli_strbcasestr(dent->d_name, ".hdb")  || 
633
+	    cli_strbcasestr(dent->d_name, ".cvd"))) {
629 634
 
630 635
                 fname = cli_calloc(strlen(dbstat->dir) + strlen(dent->d_name) + 2, sizeof(char));
631 636
 		sprintf(fname, "%s/%s", dbstat->dir, dent->d_name);
... ...
@@ -666,7 +730,7 @@ int cl_statfree(struct cl_stat *dbstat)
666 666
 	    dbstat->dir = NULL;
667 667
 	}
668 668
     } else {
669
-        cli_errmsg("cl_statfree(): Null argument passed.\n");
669
+        cli_errmsg("cl_statfree(): Null argument passed\n");
670 670
 	return CL_ENULLARG;
671 671
     }
672 672
 
... ...
@@ -19,6 +19,6 @@
19 19
 #ifndef __READDB_H
20 20
 #define __READDB_H
21 21
 
22
-int cli_parse_add(struct cl_node *root, char *virname, const char *hexsig, int type);
22
+int cli_parse_add(struct cl_node *root, const char *virname, const char *hexsig, int type);
23 23
 
24 24
 #endif
... ...
@@ -47,8 +47,12 @@ int cli_scanrar_inuse = 0;
47 47
 
48 48
 extern short cli_leavetemps_flag;
49 49
 
50
+extern int cli_mbox(const char *dir, int desc); /* FIXME */
51
+
50 52
 #include "clamav.h"
51 53
 #include "others.h"
54
+#include "matcher-ac.h"
55
+#include "matcher-bm.h"
52 56
 #include "matcher.h"
53 57
 #include "unrarlib.h"
54 58
 #include "ole2_extract.h"
... ...
@@ -57,7 +61,6 @@ extern short cli_leavetemps_flag;
57 57
 #include "pe.h"
58 58
 #include "filetypes.h"
59 59
 #include "htmlnorm.h"
60
-#include "md5.h"
61 60
 
62 61
 #ifdef HAVE_ZLIB_H
63 62
 #include <zlib.h>
... ...
@@ -78,131 +81,11 @@ extern short cli_leavetemps_flag;
78 78
 
79 79
 #define MAX_MAIL_RECURSION  15
80 80
 
81
-#define MD5_BLOCKSIZE 4096
82 81
 
83 82
 static int cli_magic_scandesc(int desc, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, int options, int *arec, int *mrec);
84 83
 static int cli_scanfile(const char *filename, const char **virname, unsigned long int *scanned, const struct cl_node *root, const struct cl_limits *limits, int options, int *arec, int *mrec);
85 84
 
86 85
 
87
-struct cli_md5_node *cli_vermd5(const char *md5, const struct cl_node *root)
88
-{
89
-	struct cli_md5_node *pt;
90
-
91
-
92
-    if(!(pt = root->hlist[md5[0] & 0xff]))
93
-	return NULL;
94
-
95
-    while(pt) {
96
-	if(!memcmp(pt->md5, md5, 16))
97
-	    return pt;
98
-
99
-	pt = pt->next;
100
-    }
101
-
102
-    return NULL;
103
-}
104
-
105
-static int cli_scandesc(int desc, const char **virname, long int *scanned, const struct cl_node *root, int typerec)
106
-{
107
- 	char *buffer, *buff, *endbl, *pt;
108
-	int bytes, buffsize, length, ret, *partcnt, type = CL_CLEAN;
109
-	unsigned long int *partoff, offset = 0;
110
-	struct md5_ctx ctx;
111
-        unsigned char md5buff[16];
112
-	struct cli_md5_node *md5_node;
113
-
114
-
115
-    /* prepare the buffer */
116
-    buffsize = root->maxpatlen + SCANBUFF;
117
-    if(!(buffer = (char *) cli_calloc(buffsize, sizeof(char)))) {
118
-	cli_dbgmsg("cli_scandesc(): unable to cli_malloc(%d)\n", buffsize);
119
-	return CL_EMEM;
120
-    }
121
-
122
-    if((partcnt = (int *) cli_calloc(root->partsigs + 1, sizeof(int))) == NULL) {
123
-	cli_dbgmsg("cli_scandesc(): unable to cli_calloc(%d, %d)\n", root->partsigs + 1, sizeof(int));
124
-	free(buffer);
125
-	return CL_EMEM;
126
-    }
127
-
128
-    if((partoff = (unsigned long int *) cli_calloc(root->partsigs + 1, sizeof(unsigned long int))) == NULL) {
129
-	cli_dbgmsg("cli_scanbuff(): unable to cli_calloc(%d, %d)\n", root->partsigs + 1, sizeof(unsigned long int));
130
-	free(buffer);
131
-	free(partcnt);
132
-	return CL_EMEM;
133
-    }
134
-
135
-    md5_init_ctx (&ctx);
136
-
137
-    buff = buffer;
138
-    buff += root->maxpatlen; /* pointer to read data block */
139
-    endbl = buff + SCANBUFF - root->maxpatlen; /* pointer to the last block
140
-						* length of root->maxpatlen
141
-						*/
142
-
143
-    pt= buff;
144
-    length = SCANBUFF;
145
-    while((bytes = read(desc, buff, SCANBUFF)) > 0) {
146
-
147
-	if(scanned != NULL)
148
-	    *scanned += bytes / CL_COUNT_PRECISION;
149
-
150
-	if(bytes < SCANBUFF)
151
-	    length -= SCANBUFF - bytes;
152
-
153
-	if((ret = cli_scanbuff(pt, length, virname, root, partcnt, typerec, offset, partoff)) == CL_VIRUS) {
154
-	    free(buffer);
155
-	    free(partcnt);
156
-	    free(partoff);
157
-	    return ret;
158
-
159
-	} else if(typerec && ret >= CL_TYPENO) {
160
-	    if(ret >= type)
161
-		type = ret;
162
-	}
163
-
164
-	if(bytes == SCANBUFF) {
165
-	    memmove(buffer, endbl, root->maxpatlen);
166
-	    offset += bytes - root->maxpatlen;
167
-	}
168
-
169
-        pt = buffer;
170
-        length = buffsize;
171
-
172
-	/* compute MD5 */
173
-
174
-	if(bytes % 64 == 0) {
175
-	    md5_process_block(buff, bytes, &ctx);
176
-	} else {
177
-		int block = bytes;
178
-		char *mpt = buff;
179
-
180
-	    while(block >= MD5_BLOCKSIZE) {
181
-		md5_process_block(mpt, MD5_BLOCKSIZE, &ctx);
182
-		mpt += MD5_BLOCKSIZE;
183
-		block -= MD5_BLOCKSIZE;
184
-	    }
185
-
186
-	    if(block)
187
-		md5_process_bytes(mpt, block, &ctx);
188
-	}
189
-    }
190
-
191
-    free(buffer);
192
-    free(partcnt);
193
-    free(partoff);
194
-
195
-    md5_finish_ctx(&ctx, &md5buff);
196
-
197
-    if((md5_node = cli_vermd5(md5buff, root))) {
198
-	if(virname)
199
-	    *virname = md5_node->virname;
200
-	return CL_VIRUS;
201
-    }
202
-
203
-    return typerec ? type : CL_CLEAN;
204
-}
205
-
206 86
 #ifdef CL_THREAD_SAFE
207 87
 static void cli_unlock_mutex(void *mtx)
208 88
 {
... ...
@@ -738,7 +621,7 @@ static int cli_scanmscab(int desc, const char **virname, long int *scanned, cons
738 738
 
739 739
     for(cab = base; cab; cab = cab->next) {
740 740
 	for(file = cab->files; file; file = file->next) {
741
-	    tempname = cl_gentemp(tmpdir);
741
+	    tempname = cli_gentemp(tmpdir);
742 742
 	    cli_dbgmsg("Extracting data to %s\n", tempname);
743 743
 	    if(cabd->extract(cabd, file, tempname)) {
744 744
 		cli_dbgmsg("libmscab error code: %d\n", cabd->last_error(cabd));
... ...
@@ -781,7 +664,6 @@ static int cli_scanhtml(int desc, const char **virname, long int *scanned, const
781 781
     return CL_CLEAN;
782 782
 #endif
783 783
 
784
-#ifdef HAVE_MMAP
785 784
     /* TODO: do file operations if mmap fails */
786 785
     if(membuff == MAP_FAILED) {
787 786
 	cli_dbgmsg("mmap failed\n");
... ...
@@ -807,7 +689,6 @@ static int cli_scanhtml(int desc, const char **virname, long int *scanned, const
807 807
 
808 808
     free(newbuff);
809 809
     return ret;
810
-#endif
811 810
 }
812 811
 
813 812
 static int cli_scandir(const char *dirname, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, int options, int *arec, int *mrec)
... ...
@@ -1001,7 +882,7 @@ static int cli_scanole2(int desc, const char **virname, long int *scanned, const
1001 1001
 #endif
1002 1002
 
1003 1003
     /* generate the temporary directory */
1004
-    dir = cl_gentemp(tmpdir);
1004
+    dir = cli_gentemp(tmpdir);
1005 1005
     if(mkdir(dir, 0700)) {
1006 1006
 	cli_dbgmsg("ScanOLE2 -> Can't create temporary directory %s\n", dir);
1007 1007
 	return CL_ETMPDIR;
... ...
@@ -1042,7 +923,7 @@ static int cli_scanmail(int desc, const char **virname, long int *scanned, const
1042 1042
 #endif
1043 1043
 
1044 1044
 	/* generate the temporary directory */
1045
-	dir = cl_gentemp(tmpdir);
1045
+	dir = cli_gentemp(tmpdir);
1046 1046
 	if(mkdir(dir, 0700)) {
1047 1047
 	    cli_dbgmsg("ScanMail -> Can't create temporary directory %s\n", dir);
1048 1048
 	    return CL_ETMPDIR;
... ...
@@ -1051,7 +932,7 @@ static int cli_scanmail(int desc, const char **virname, long int *scanned, const
1051 1051
 	/*
1052 1052
 	 * Extract the attachments into the temporary directory
1053 1053
 	 */
1054
-	ret = cl_mbox(dir, desc);
1054
+	ret = cli_mbox(dir, desc);
1055 1055
 	/* FIXME: check mbox return code */
1056 1056
 
1057 1057
 	ret = cli_scandir(dir, virname, scanned, root, limits, options, arec, mrec);
... ...
@@ -1164,7 +1045,7 @@ static int cli_magic_scandesc(int desc, const char **virname, long int *scanned,
1164 1164
 		type = CL_UNKNOWN_TYPE;
1165 1165
 	    }
1166 1166
 
1167
-        case CL_UNKNOWN_TYPE:
1167
+	default:
1168 1168
 	    break;
1169 1169
     }
1170 1170
 
... ...
@@ -1209,6 +1090,9 @@ static int cli_magic_scandesc(int desc, const char **virname, long int *scanned,
1209 1209
 	    if(SCAN_PE)
1210 1210
 		ret = cli_scanpe(desc, virname, scanned, root, limits, options, arec, mrec);
1211 1211
 	    break;
1212
+
1213
+	default:
1214
+	    break;
1212 1215
     }
1213 1216
     (*arec)--;
1214 1217
 
... ...
@@ -55,7 +55,7 @@ short int *cli_hex2si(const char *hex)
55 55
     len = strlen(hex);
56 56
 
57 57
     if(len % 2 != 0) {
58
-	cli_errmsg("cl_hex2si(): Malformed hexstring: %s (length: %d)\n", hex, len);
58
+	cli_errmsg("cli_hex2si(): Malformed hexstring: %s (length: %d)\n", hex, len);
59 59
 	return NULL;
60 60
     }
61 61
 
... ...
@@ -99,7 +99,7 @@ char *cli_hex2str(const char *hex)
99 99
     len = strlen(hex);
100 100
 
101 101
     if(len % 2 != 0) {
102
-	cli_errmsg("cl_hex2str(): Malformed hexstring: %s (length: %d)\n", hex, len);
102
+	cli_errmsg("cli_hex2str(): Malformed hexstring: %s (length: %d)\n", hex, len);
103 103
 	return NULL;
104 104
     }
105 105
 
... ...
@@ -129,7 +129,7 @@ char *cli_hex2str(const char *hex)
129 129
     return str;
130 130
 }
131 131
 
132
-char *cl_str2hex(const char *string, unsigned int len)
132
+char *cli_str2hex(const char *string, unsigned int len)
133 133
 {
134 134
 	char *hexstr;
135 135
 	char HEX[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
... ...
@@ -24,5 +24,6 @@ int cli_chomp(char *string);
24 24
 char *cli_strtok(const char *line, int field, const char *delim);
25 25
 short int *cli_hex2si(const char *hex);
26 26
 char *cli_hex2str(const char *hex);
27
+char *cli_str2hex(const char *string, unsigned int len);
27 28
 
28 29
 #endif
... ...
@@ -813,7 +813,7 @@ static char *ppt_stream_iter(int fd)
813 813
 #endif
814 814
 
815 815
 	/* generate the temporary directory */
816
-	out_dir = cl_gentemp(tmpdir);
816
+	out_dir = cli_gentemp(tmpdir);
817 817
 	if(mkdir(out_dir, 0700)) {
818 818
 	    printf("ScanOLE2 -> Can't create temporary directory %s\n", out_dir);
819 819
 	    close(fd);
... ...
@@ -48,6 +48,9 @@
48 48
 #include "memory.h"
49 49
 #include "output.h"
50 50
 #include "cfgparser.h"
51
+#include "../clamscan/others.h"
52
+#include "../libclamav/others.h"
53
+#include "../libclamav/str.h"
51 54
 
52 55
 #define LINE 1024
53 56
 
... ...
@@ -104,7 +107,7 @@ char *cut(const char *file, long int start, long int end)
104 104
 	exit(13);
105 105
     }
106 106
 
107
-    if((fname = cl_gentemp(".")) == NULL) {
107
+    if((fname = cli_gentemp(".")) == NULL) {
108 108
 	mprintf("!Can't generate temporary file name.\n");
109 109
 	exit(1);
110 110
     }
... ...
@@ -147,7 +150,7 @@ char *change(const char *file, long int x)
147 147
 	exit(13);
148 148
     }
149 149
 
150
-    if((fname = cl_gentemp(".")) == NULL) {
150
+    if((fname = cli_gentemp(".")) == NULL) {
151 151
 	mprintf("!Can't generate temporary file name.\n");
152 152
 	exit(1);
153 153
     }
... ...
@@ -203,7 +206,7 @@ void sigtool(struct optstruct *opt)
203 203
     if(optl(opt, "hex-dump")) {
204 204
 
205 205
 	while((bytes = read(0, buffer, FILEBUFF)) > 0) {
206
-	    pt = cl_str2hex(buffer, bytes);
206
+	    pt = cli_str2hex(buffer, bytes);
207 207
 	    write(1, pt, 2 * bytes);
208 208
 	    free(pt);
209 209
 	}
... ...
@@ -423,7 +426,7 @@ void sigtool(struct optstruct *opt)
423 423
 	if(fileinfo(signame, 1) != -1) {
424 424
 	    mprintf("File %s exists.\n", signame);
425 425
 	    free(signame);
426
-	    signame = cl_gentemp(".");
426
+	    signame = cli_gentemp(".");
427 427
 	}
428 428
 
429 429
 	bsigname = (char *) mcalloc(strlen(f) + 10, sizeof(char));
... ...
@@ -431,7 +434,7 @@ void sigtool(struct optstruct *opt)
431 431
 	if(fileinfo(bsigname, 1) != -1) {
432 432
 	    mprintf("File %s exists.\n", bsigname);
433 433
 	    free(bsigname);
434
-	    bsigname = cl_gentemp(".");
434
+	    bsigname = cli_gentemp(".");
435 435
 	}
436 436
 
437 437
 	if((wd = fopen(signame, "wb")) == NULL) {
... ...
@@ -444,7 +447,7 @@ void sigtool(struct optstruct *opt)
444 444
 	mprintf("Saving signature in %s file.\n", signame);
445 445
 
446 446
 	while((bytes = fread(buffer, 1, FILEBUFF, fd)) > 0) {
447
-	    pt = cl_str2hex(buffer, bytes);
447
+	    pt = cli_str2hex(buffer, bytes);
448 448
 	    fwrite(pt, 1, 2 * bytes, wd);
449 449
 	    free(pt);
450 450
 	}
... ...
@@ -551,7 +554,7 @@ int build(struct optstruct *opt)
551 551
 	exit(1);
552 552
     }
553 553
 
554
-    cl_freetrie(root);
554
+    cl_free(root);
555 555
 
556 556
     mprintf("Database properly parsed.\n");
557 557
 
... ...
@@ -569,7 +572,7 @@ int build(struct optstruct *opt)
569 569
 	}
570 570
     }
571 571
 
572
-    tarfile = cl_gentemp(".");
572
+    tarfile = cli_gentemp(".");
573 573
 
574 574
     switch(fork()) {
575 575
 	case -1:
... ...
@@ -597,7 +600,7 @@ int build(struct optstruct *opt)
597 597
 	exit(1);
598 598
     }
599 599
 
600
-    gzfile = cl_gentemp(".");
600
+    gzfile = cli_gentemp(".");
601 601
     if((gz = gzopen(gzfile, "wb")) == NULL) {
602 602
 	mprintf("!Can't open file %s to write.\n", gzfile);
603 603
 	exit(1);
... ...
@@ -655,7 +658,7 @@ int build(struct optstruct *opt)
655 655
     strcat(header, smbuff);
656 656
 
657 657
     /* MD5 */
658
-    pt = cl_md5file(gzfile);
658
+    pt = cli_md5file(gzfile);
659 659
     strcat(header, pt);
660 660
     free(pt);
661 661
     strcat(header, ":");
... ...
@@ -880,7 +883,7 @@ int listdb(const char *filename)
880 880
 	    tmpdir = "/tmp";
881 881
 #endif
882 882
 
883
-	dir = cl_gentemp(tmpdir);
883
+	dir = cli_gentemp(tmpdir);
884 884
 	if(mkdir(dir, 0700)) {
885 885
 	    mprintf("!listdb(): Can't create temporary directory %s\n", dir);
886 886
 	    free(buffer);
... ...
@@ -894,7 +897,7 @@ int listdb(const char *filename)
894 894
 
895 895
 	/* start */
896 896
 
897
-	tmp = cl_gentemp(tmpdir);
897
+	tmp = cli_gentemp(tmpdir);
898 898
 	if((tmpd = fopen(tmp, "wb+")) == NULL) {
899 899
 	    mprintf("!listdb(): Can't create temporary file %s\n", tmp);
900 900
 	    free(dir);