git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@767 77e5149b-7576-45b1-b177-96237e5ba77b
Tomasz Kojm authored on 2004/08/19 04:06:42... | ... |
@@ -65,12 +65,13 @@ libclamav/unrarlib.c: Christian Scheurer and Johannes Winkelmann, see |
65 | 65 |
www.unrarlib.org |
66 | 66 |
libclamav/mspack: Stuart Caie |
67 | 67 |
|
68 |
-Code patches submitted by (in alphabetical order): |
|
68 |
+Patch submitters (in alphabetical order): |
|
69 | 69 |
|
70 | 70 |
Kamil Andrusz <wizz*mniam.net> |
71 | 71 |
Patrick Bihan-Faou <patrick*mindstep.com> |
72 | 72 |
Martin Blapp <mb*imp.ch> |
73 | 73 |
Igor Brezac <igor*ipass.net> |
74 |
+Mike Brudenell <pmb1*york.ac.uk> |
|
74 | 75 |
Len Budney <lbudney*pobox.com> |
75 | 76 |
David Champion <dgc*uchicago.edu> |
76 | 77 |
Andrey Cherezov <andrey*cherezov.koenig.su> |
... | ... |
@@ -1,3 +1,14 @@ |
1 |
+Wed Aug 18 20:37:42 CEST 2004 (tk) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav/contrib: Include database optimisation tool (optimize/optimize.c). |
|
4 |
+ It's ClamAV specific and attempts to normalise signature |
|
5 |
+ prefixes so there are more signatures using the same |
|
6 |
+ prefix (and saving nodes in the Aho-Corasick pattern |
|
7 |
+ matcher (but slowing it down)). Included for educational |
|
8 |
+ purposes. |
|
9 |
+ * clamscan/sigtool: fix compilation problems with Sun's SUNWspro C (patch |
|
10 |
+ by Mike Brudenell <pmb1*york.ac.uk>) |
|
11 |
+ |
|
1 | 12 |
Wed Aug 18 16:54:01 BST 2004 (njh) |
2 | 13 |
---------------------------------- |
3 | 14 |
* libclamav/mbox.c: Only followURL if CL_MAILURL is set. |
... | ... |
@@ -526,7 +526,11 @@ int scancompressed(const char *filename, struct cl_node *root, const struct pass |
526 | 526 |
|
527 | 527 |
/* unpack file - as unprivileged user */ |
528 | 528 |
if(cli_strbcasestr(filename, ".zip")) { |
529 |
- char *args[] = { "unzip", "-P", "clam", "-o", (char *) filename, NULL }; |
|
529 |
+ char *args[] = { "unzip", "-P", "clam", "-o", NULL, NULL }; |
|
530 |
+ /* Sun's SUNWspro C compiler doesn't allow direct initialisation |
|
531 |
+ * with a variable |
|
532 |
+ */ |
|
533 |
+ args[4] = (char *) filename; |
|
530 | 534 |
|
531 | 535 |
if((userprg = getargl(opt, "unzip"))) |
532 | 536 |
ret = clamav_unpack(userprg, args, gendir, user, opt); |
... | ... |
@@ -534,56 +538,64 @@ int scancompressed(const char *filename, struct cl_node *root, const struct pass |
534 | 534 |
ret = clamav_unpack("unzip", args, gendir, user, opt); |
535 | 535 |
|
536 | 536 |
} else if(cli_strbcasestr(filename, ".rar")) { |
537 |
- char *args[] = { "unrar", "x", "-p-", "-y", (char *) filename, NULL }; |
|
537 |
+ char *args[] = { "unrar", "x", "-p-", "-y", NULL, NULL }; |
|
538 |
+ args[4] = (char *) filename; |
|
538 | 539 |
if((userprg = getargl(opt, "unrar"))) |
539 | 540 |
ret = clamav_unpack(userprg, args, gendir, user, opt); |
540 | 541 |
else |
541 | 542 |
ret = clamav_unpack("unrar", args, gendir, user, opt); |
542 | 543 |
|
543 | 544 |
} else if(cli_strbcasestr(filename, ".arj")) { |
544 |
- char *args[] = { "arj", "x","-y", (char *) filename, NULL }; |
|
545 |
+ char *args[] = { "arj", "x","-y", NULL, NULL }; |
|
546 |
+ args[3] = (char *) filename; |
|
545 | 547 |
if((userprg = getargl(opt, "arj"))) |
546 | 548 |
ret = clamav_unpack(userprg, args, gendir, user, opt); |
547 | 549 |
else |
548 | 550 |
ret = clamav_unpack("arj", args, gendir, user, opt); |
549 | 551 |
|
550 | 552 |
} else if(cli_strbcasestr(filename, ".zoo")) { |
551 |
- char *args[] = { "unzoo", "-x","-j","./", (char *) filename, NULL }; |
|
553 |
+ char *args[] = { "unzoo", "-x","-j","./", NULL, NULL }; |
|
554 |
+ args[4] = (char *) filename; |
|
552 | 555 |
if((userprg = getargl(opt, "unzoo"))) |
553 | 556 |
ret = clamav_unpack(userprg, args, gendir, user, opt); |
554 | 557 |
else |
555 | 558 |
ret = clamav_unpack("unzoo", args, gendir, user, opt); |
556 | 559 |
|
557 | 560 |
} else if(cli_strbcasestr(filename, ".jar")) { |
558 |
- char *args[] = { "unzip", "-P", "clam", "-o", (char *) filename, NULL }; |
|
561 |
+ char *args[] = { "unzip", "-P", "clam", "-o", NULL, NULL }; |
|
562 |
+ args[4] = (char *) filename; |
|
559 | 563 |
if((userprg = getargl(opt, "jar"))) |
560 | 564 |
ret = clamav_unpack(userprg, args, gendir, user, opt); |
561 | 565 |
else |
562 | 566 |
ret = clamav_unpack("unzip", args, gendir, user, opt); |
563 | 567 |
|
564 | 568 |
} else if(cli_strbcasestr(filename, ".lzh")) { |
565 |
- char *args[] = { "lha", "xf", (char *) filename, NULL }; |
|
569 |
+ char *args[] = { "lha", "xf", NULL, NULL }; |
|
570 |
+ args[2] = (char *) filename; |
|
566 | 571 |
if((userprg = getargl(opt, "lha"))) |
567 | 572 |
ret = clamav_unpack(userprg, args, gendir, user, opt); |
568 | 573 |
else |
569 | 574 |
ret = clamav_unpack("lha", args, gendir, user, opt); |
570 | 575 |
|
571 | 576 |
} else if(cli_strbcasestr(filename, ".tar")) { |
572 |
- char *args[] = { "tar", "-xpvf", (char *) filename, NULL }; |
|
577 |
+ char *args[] = { "tar", "-xpvf", NULL, NULL }; |
|
578 |
+ args[2] = (char *) filename; |
|
573 | 579 |
if((userprg = getargl(opt, "tar"))) |
574 | 580 |
ret = clamav_unpack(userprg, args, gendir, user, opt); |
575 | 581 |
else |
576 | 582 |
ret = clamav_unpack("tar", args, gendir, user, opt); |
577 | 583 |
|
578 | 584 |
} else if(cli_strbcasestr(filename, ".deb")) { |
579 |
- char *args[] = { "ar", "x", (char *) filename, NULL }; |
|
585 |
+ char *args[] = { "ar", "x", NULL, NULL }; |
|
586 |
+ args[2] = (char *) filename; |
|
580 | 587 |
if((userprg = getargl(opt, "deb"))) |
581 | 588 |
ret = clamav_unpack(userprg, args, gendir, user, opt); |
582 | 589 |
else |
583 | 590 |
ret = clamav_unpack("ar", args, gendir, user, opt); |
584 | 591 |
|
585 | 592 |
} else if((cli_strbcasestr(filename, ".tar.gz") || cli_strbcasestr(filename, ".tgz"))) { |
586 |
- char *args[] = { "tar", "-zxpvf", (char *) filename, NULL }; |
|
593 |
+ char *args[] = { "tar", "-zxpvf", NULL, NULL }; |
|
594 |
+ args[2] = (char *) filename; |
|
587 | 595 |
if((userprg = getargl(opt, "tgz"))) |
588 | 596 |
ret = clamav_unpack(userprg, args, gendir, user, opt); |
589 | 597 |
else |
590 | 598 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,210 @@ |
0 |
+/* |
|
1 |
+ * Copyright (C) 2004 Tomasz Kojm <tkojm@clamav.net> |
|
2 |
+ * |
|
3 |
+ * This program is free software; you can redistribute it and/or modify |
|
4 |
+ * it under the terms of the GNU General Public License as published by |
|
5 |
+ * the Free Software Foundation; either version 2 of the License, or |
|
6 |
+ * (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * This program is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
11 |
+ * GNU General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU General Public License |
|
14 |
+ * along with this program; if not, write to the Free Software |
|
15 |
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#include <stdio.h> |
|
19 |
+#include <string.h> |
|
20 |
+#include <ctype.h> |
|
21 |
+#include <stdlib.h> |
|
22 |
+ |
|
23 |
+#define MINLENGTH 10 /* only optimize signatures longer than MINLENGT */ |
|
24 |
+#define FILEBUFF 16384 |
|
25 |
+#define ANALYZE 6 /* only analyze first ANALYZE characters */ |
|
26 |
+ |
|
27 |
+int hex2int(int c) |
|
28 |
+{ |
|
29 |
+ int l = tolower(c); |
|
30 |
+ |
|
31 |
+ if (!isascii(l)) |
|
32 |
+ return -1; |
|
33 |
+ if (isdigit(l)) |
|
34 |
+ return l - '0'; |
|
35 |
+ if ((l >= 'a') && (l <= 'f')) |
|
36 |
+ return l + 10 - 'a'; |
|
37 |
+ |
|
38 |
+ return -1; |
|
39 |
+} |
|
40 |
+ |
|
41 |
+char *hex2str(const char *hex, int howmany) |
|
42 |
+{ |
|
43 |
+ short int val, c; |
|
44 |
+ int i, len; |
|
45 |
+ char *str, *ptr; |
|
46 |
+ |
|
47 |
+ len = strlen(hex); |
|
48 |
+ |
|
49 |
+ /* additional check - hex strings are parity length here */ |
|
50 |
+ if(len % 2 != 0) { |
|
51 |
+ printf("hex2str(): Malformed hexstring: %s (length: %d)\n", hex, len); |
|
52 |
+ return NULL; |
|
53 |
+ } |
|
54 |
+ |
|
55 |
+ str = calloc((howmany / 2) + 1, sizeof(char)); |
|
56 |
+ if(!str) |
|
57 |
+ return NULL; |
|
58 |
+ |
|
59 |
+ ptr = str; |
|
60 |
+ |
|
61 |
+ if(howmany > len) |
|
62 |
+ howmany = len; |
|
63 |
+ |
|
64 |
+ for(i = 0; i < howmany; i += 2) { |
|
65 |
+ if(hex[i] == '?') { |
|
66 |
+ printf("Can't optimize polymorphic signature.\n"); |
|
67 |
+ free(str); |
|
68 |
+ return NULL; |
|
69 |
+ } else { |
|
70 |
+ if((c = hex2int(hex[i])) >= 0) { |
|
71 |
+ val = c; |
|
72 |
+ if((c = hex2int(hex[i+1])) >= 0) { |
|
73 |
+ val = (val << 4) + c; |
|
74 |
+ } else { |
|
75 |
+ free(str); |
|
76 |
+ return NULL; |
|
77 |
+ } |
|
78 |
+ } else { |
|
79 |
+ free(str); |
|
80 |
+ return NULL; |
|
81 |
+ } |
|
82 |
+ } |
|
83 |
+ *ptr++ = val; |
|
84 |
+ } |
|
85 |
+ |
|
86 |
+ return str; |
|
87 |
+} |
|
88 |
+ |
|
89 |
+void chomp(char *string) |
|
90 |
+{ |
|
91 |
+ size_t l = strlen(string); |
|
92 |
+ |
|
93 |
+ if(l == 0) |
|
94 |
+ return; |
|
95 |
+ |
|
96 |
+ --l; |
|
97 |
+ if((string[l] == '\n') || (string[l] == '\r')) { |
|
98 |
+ string[l] = '\0'; |
|
99 |
+ |
|
100 |
+ if(l > 0) { |
|
101 |
+ --l; |
|
102 |
+ if(string[l] == '\r') |
|
103 |
+ string[l] = '\0'; |
|
104 |
+ } |
|
105 |
+ } |
|
106 |
+} |
|
107 |
+ |
|
108 |
+int main(int argc, char **argv) |
|
109 |
+{ |
|
110 |
+ int line = 0, found, i, nodes = 0, optimized = 0, optimal = 0; |
|
111 |
+ unsigned char c1, c2; |
|
112 |
+ char *buffer, *start, *pt, **prefix, *sig; |
|
113 |
+ FILE *in, *out; |
|
114 |
+ |
|
115 |
+ |
|
116 |
+ if(argc != 3) { |
|
117 |
+ printf("%s input_db output_db\n", argv[0]); |
|
118 |
+ exit(1); |
|
119 |
+ } |
|
120 |
+ |
|
121 |
+ if((in = fopen(argv[1], "rb")) == NULL) { |
|
122 |
+ printf("Can't open input database %s\n", argv[1]); |
|
123 |
+ exit(1); |
|
124 |
+ } |
|
125 |
+ |
|
126 |
+ if((out = fopen(argv[2], "wb")) == NULL) { |
|
127 |
+ printf("Can't open output database %s\n", argv[1]); |
|
128 |
+ exit(1); |
|
129 |
+ } |
|
130 |
+ |
|
131 |
+ prefix = (char **) calloc(256, sizeof(char *)); |
|
132 |
+ for(i = 0; i < 256; i++) |
|
133 |
+ prefix[i] = (char *) calloc(256, sizeof(char)); |
|
134 |
+ |
|
135 |
+ if(!(buffer = (char *) malloc(FILEBUFF))) { |
|
136 |
+ exit(1); |
|
137 |
+ } |
|
138 |
+ |
|
139 |
+ memset(buffer, 0, FILEBUFF); |
|
140 |
+ |
|
141 |
+ while(fgets(buffer, FILEBUFF, in)) { |
|
142 |
+ |
|
143 |
+ line++; |
|
144 |
+ chomp(buffer); |
|
145 |
+ |
|
146 |
+ pt = strchr(buffer, '='); |
|
147 |
+ if(!pt) { |
|
148 |
+ printf("Malformed pattern line %d.\n", line); |
|
149 |
+ free(buffer); |
|
150 |
+ exit(1); |
|
151 |
+ } |
|
152 |
+ |
|
153 |
+ start = buffer; |
|
154 |
+ *pt++ = 0; |
|
155 |
+ |
|
156 |
+ if(*pt == '=') |
|
157 |
+ continue; |
|
158 |
+ |
|
159 |
+ if(strlen(pt) < MINLENGTH) { |
|
160 |
+ fprintf(out, "%s=%s\n", start, pt); |
|
161 |
+ continue; |
|
162 |
+ } |
|
163 |
+ |
|
164 |
+ sig = hex2str(pt, 2 * ANALYZE); |
|
165 |
+ |
|
166 |
+ if(!sig) { |
|
167 |
+ printf("Can't decode signature %d\n", line); |
|
168 |
+ exit(1); |
|
169 |
+ } |
|
170 |
+ |
|
171 |
+ found = -1; |
|
172 |
+ |
|
173 |
+ for(i = 0; i < ANALYZE - 1; i++) { |
|
174 |
+ c1 = ((unsigned char) sig[i]) & 0xff; |
|
175 |
+ c2 = ((unsigned char) sig[i + 1]) & 0xff; |
|
176 |
+ |
|
177 |
+ if(prefix[c1][c2]) { |
|
178 |
+ found = i; |
|
179 |
+ break; |
|
180 |
+ } |
|
181 |
+ } |
|
182 |
+ |
|
183 |
+ if(found < 0) { |
|
184 |
+ printf("Can't optimize signature %d\n", line); |
|
185 |
+ prefix[c1][c2] = 1; |
|
186 |
+ nodes++; |
|
187 |
+ } else if(found == 0) { |
|
188 |
+ printf("Signature %d is already optimal.\n", line); |
|
189 |
+ optimal++; |
|
190 |
+ } else { |
|
191 |
+ pt = pt + 2 * found; |
|
192 |
+ printf("Signature %d optimized (new start at %d byte)\n", line, found); |
|
193 |
+ optimized++; |
|
194 |
+ } |
|
195 |
+ |
|
196 |
+ fprintf(out, "%s=%s\n", start, pt); |
|
197 |
+ } |
|
198 |
+ |
|
199 |
+ fclose(in); |
|
200 |
+ fclose(out); |
|
201 |
+ |
|
202 |
+ free(buffer); |
|
203 |
+ for(i = 0; i < 256; i++) |
|
204 |
+ free(prefix[i]); |
|
205 |
+ free(prefix); |
|
206 |
+ |
|
207 |
+ printf("Nodes: %d, Optimal: %d, Signatures optimized: %d\n", nodes, optimal, optimized); |
|
208 |
+ exit(0); |
|
209 |
+} |
... | ... |
@@ -254,7 +254,8 @@ int build(struct optstruct *opt) |
254 | 254 |
exit(1); |
255 | 255 |
case 0: |
256 | 256 |
{ |
257 |
- char *args[] = { "tar", "-cvf", tarfile, "COPYING", "viruses.db", "viruses.db2", "Notes", "viruses.db3", NULL }; |
|
257 |
+ char *args[] = { "tar", "-cvf", NULL, "COPYING", "viruses.db", "viruses.db2", "Notes", "viruses.db3", NULL }; |
|
258 |
+ args[2] = tarfile; |
|
258 | 259 |
execv("/bin/tar", args); |
259 | 260 |
mprintf("!Can't execute tar\n"); |
260 | 261 |
perror("tar"); |