Browse code

fix compilation problems with Sun's SUNWspro C; include optimize.c

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@767 77e5149b-7576-45b1-b177-96237e5ba77b

Tomasz Kojm authored on 2004/08/19 04:06:42
Showing 5 changed files
... ...
@@ -65,12 +65,13 @@ libclamav/unrarlib.c: Christian Scheurer and Johannes Winkelmann, see
65 65
 							    www.unrarlib.org
66 66
 libclamav/mspack: Stuart Caie
67 67
 
68
-Code patches submitted by (in alphabetical order):
68
+Patch submitters (in alphabetical order):
69 69
 
70 70
 Kamil Andrusz <wizz*mniam.net>
71 71
 Patrick Bihan-Faou <patrick*mindstep.com>
72 72
 Martin Blapp <mb*imp.ch>
73 73
 Igor Brezac <igor*ipass.net>
74
+Mike Brudenell <pmb1*york.ac.uk>
74 75
 Len Budney <lbudney*pobox.com>
75 76
 David Champion <dgc*uchicago.edu>
76 77
 Andrey Cherezov <andrey*cherezov.koenig.su>
... ...
@@ -1,3 +1,14 @@
1
+Wed Aug 18 20:37:42 CEST 2004 (tk)
2
+----------------------------------
3
+  * libclamav/contrib: Include database optimisation tool (optimize/optimize.c).
4
+		       It's ClamAV specific and attempts to normalise signature
5
+		       prefixes so there are more signatures using the same
6
+		       prefix (and saving nodes in the Aho-Corasick pattern
7
+		       matcher (but slowing it down)). Included for educational
8
+		       purposes.
9
+  * clamscan/sigtool: fix compilation problems with Sun's SUNWspro C (patch
10
+		      by Mike Brudenell <pmb1*york.ac.uk>)
11
+
1 12
 Wed Aug 18 16:54:01 BST 2004 (njh)
2 13
 ----------------------------------
3 14
   *	libclamav/mbox.c:	Only followURL if CL_MAILURL is set.
... ...
@@ -526,7 +526,11 @@ int scancompressed(const char *filename, struct cl_node *root, const struct pass
526 526
 
527 527
     /* unpack file  - as unprivileged user */
528 528
     if(cli_strbcasestr(filename, ".zip")) {
529
-	char *args[] = { "unzip", "-P", "clam", "-o", (char *) filename, NULL };
529
+	char *args[] = { "unzip", "-P", "clam", "-o", NULL, NULL };
530
+	/* Sun's SUNWspro C compiler doesn't allow direct initialisation
531
+	 * with a variable
532
+	 */
533
+	args[4] = (char *) filename;
530 534
 
531 535
 	if((userprg = getargl(opt, "unzip")))
532 536
 	    ret = clamav_unpack(userprg, args, gendir, user, opt);
... ...
@@ -534,56 +538,64 @@ int scancompressed(const char *filename, struct cl_node *root, const struct pass
534 534
 	    ret = clamav_unpack("unzip", args, gendir, user, opt);
535 535
 
536 536
     } else if(cli_strbcasestr(filename, ".rar")) { 
537
-	char *args[] = { "unrar", "x", "-p-", "-y", (char *) filename, NULL };
537
+	char *args[] = { "unrar", "x", "-p-", "-y", NULL, NULL };
538
+	args[4] = (char *) filename;
538 539
 	if((userprg = getargl(opt, "unrar")))
539 540
 	    ret = clamav_unpack(userprg, args, gendir, user, opt);
540 541
 	else
541 542
 	    ret = clamav_unpack("unrar", args, gendir, user, opt);
542 543
 
543 544
     } else if(cli_strbcasestr(filename, ".arj")) { 
544
-        char *args[] = { "arj", "x","-y", (char *) filename, NULL };
545
+        char *args[] = { "arj", "x","-y", NULL, NULL };
546
+	args[3] = (char *) filename;
545 547
         if((userprg = getargl(opt, "arj")))
546 548
 	    ret = clamav_unpack(userprg, args, gendir, user, opt);
547 549
 	else
548 550
 	    ret = clamav_unpack("arj", args, gendir, user, opt);
549 551
 
550 552
     } else if(cli_strbcasestr(filename, ".zoo")) { 
551
-	char *args[] = { "unzoo", "-x","-j","./", (char *) filename, NULL };
553
+	char *args[] = { "unzoo", "-x","-j","./", NULL, NULL };
554
+	args[4] = (char *) filename;
552 555
 	if((userprg = getargl(opt, "unzoo")))
553 556
 	    ret = clamav_unpack(userprg, args, gendir, user, opt);
554 557
 	else
555 558
 	    ret = clamav_unpack("unzoo", args, gendir, user, opt);
556 559
 
557 560
     } else if(cli_strbcasestr(filename, ".jar")) { 
558
-	char *args[] = { "unzip", "-P", "clam", "-o", (char *) filename, NULL };
561
+	char *args[] = { "unzip", "-P", "clam", "-o", NULL, NULL };
562
+	args[4] = (char *) filename;
559 563
 	if((userprg = getargl(opt, "jar")))
560 564
 	    ret = clamav_unpack(userprg, args, gendir, user, opt);
561 565
 	else
562 566
 	    ret = clamav_unpack("unzip", args, gendir, user, opt);
563 567
 
564 568
     } else if(cli_strbcasestr(filename, ".lzh")) { 
565
-	char *args[] = { "lha", "xf", (char *) filename, NULL };
569
+	char *args[] = { "lha", "xf", NULL, NULL };
570
+	args[2] = (char *) filename;
566 571
 	if((userprg = getargl(opt, "lha")))
567 572
 	    ret = clamav_unpack(userprg, args, gendir, user, opt);
568 573
 	else
569 574
 	    ret = clamav_unpack("lha", args, gendir, user, opt);
570 575
 
571 576
     } else if(cli_strbcasestr(filename, ".tar")) { 
572
-	char *args[] = { "tar", "-xpvf", (char *) filename, NULL };
577
+	char *args[] = { "tar", "-xpvf", NULL, NULL };
578
+	args[2] = (char *) filename;
573 579
 	if((userprg = getargl(opt, "tar")))
574 580
 	    ret = clamav_unpack(userprg, args, gendir, user, opt);
575 581
 	else
576 582
 	    ret = clamav_unpack("tar", args, gendir, user, opt);
577 583
 
578 584
     } else if(cli_strbcasestr(filename, ".deb")) { 
579
-	char *args[] = { "ar", "x", (char *) filename, NULL };
585
+	char *args[] = { "ar", "x", NULL, NULL };
586
+	args[2] = (char *) filename;
580 587
 	if((userprg = getargl(opt, "deb")))
581 588
 	    ret = clamav_unpack(userprg, args, gendir, user, opt);
582 589
 	else
583 590
 	    ret = clamav_unpack("ar", args, gendir, user, opt);
584 591
 
585 592
     } else if((cli_strbcasestr(filename, ".tar.gz") || cli_strbcasestr(filename, ".tgz"))) {
586
-	char *args[] = { "tar", "-zxpvf", (char *) filename, NULL };
593
+	char *args[] = { "tar", "-zxpvf", NULL, NULL };
594
+	args[2] = (char *) filename;
587 595
 	if((userprg = getargl(opt, "tgz")))
588 596
 	    ret = clamav_unpack(userprg, args, gendir, user, opt);
589 597
 	else
590 598
new file mode 100644
... ...
@@ -0,0 +1,210 @@
0
+/*
1
+ *  Copyright (C) 2004 Tomasz Kojm <tkojm@clamav.net>
2
+ *
3
+ *  This program is free software; you can redistribute it and/or modify
4
+ *  it under the terms of the GNU General Public License as published by
5
+ *  the Free Software Foundation; either version 2 of the License, or
6
+ *  (at your option) any later version.
7
+ *
8
+ *  This program is distributed in the hope that it will be useful,
9
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
+ *  GNU General Public License for more details.
12
+ *
13
+ *  You should have received a copy of the GNU General Public License
14
+ *  along with this program; if not, write to the Free Software
15
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
+ */
17
+
18
+#include <stdio.h>
19
+#include <string.h>
20
+#include <ctype.h>
21
+#include <stdlib.h>
22
+
23
+#define MINLENGTH 10 /* only optimize signatures longer than MINLENGT */
24
+#define FILEBUFF 16384
25
+#define ANALYZE 6 /* only analyze first ANALYZE characters */
26
+
27
+int hex2int(int c)
28
+{
29
+	int l = tolower(c);
30
+
31
+    if (!isascii(l))
32
+    	return -1;
33
+    if (isdigit(l))
34
+	return l - '0';
35
+    if ((l >= 'a') && (l <= 'f'))
36
+	return l + 10 - 'a';
37
+
38
+    return -1;
39
+}
40
+
41
+char *hex2str(const char *hex, int howmany)
42
+{
43
+	short int val, c;
44
+	int i, len;
45
+	char *str, *ptr;
46
+
47
+    len = strlen(hex);
48
+
49
+    /* additional check - hex strings are parity length here */
50
+    if(len % 2 != 0) {
51
+	printf("hex2str(): Malformed hexstring: %s (length: %d)\n", hex, len);
52
+	return NULL;
53
+    }
54
+
55
+    str = calloc((howmany / 2) + 1, sizeof(char));
56
+    if(!str)
57
+	return NULL;
58
+
59
+    ptr = str;
60
+
61
+    if(howmany > len)
62
+	howmany = len;
63
+
64
+    for(i = 0; i < howmany; i += 2) {
65
+	if(hex[i] == '?') {
66
+	    printf("Can't optimize polymorphic signature.\n");
67
+	    free(str);
68
+	    return NULL;
69
+	} else {
70
+	    if((c = hex2int(hex[i])) >= 0) {
71
+		val = c;
72
+		if((c = hex2int(hex[i+1])) >= 0) {
73
+		    val = (val << 4) + c;
74
+		} else { 
75
+		    free(str);
76
+		    return NULL;
77
+		}
78
+	    } else {
79
+		free(str);
80
+		return NULL;
81
+	    }
82
+	}
83
+	*ptr++ = val;
84
+    }
85
+
86
+    return str;
87
+}
88
+
89
+void chomp(char *string)
90
+{
91
+	size_t l = strlen(string);
92
+
93
+    if(l == 0)
94
+	return;
95
+
96
+    --l;
97
+    if((string[l] == '\n') || (string[l] == '\r')) {
98
+	string[l] = '\0';
99
+
100
+	if(l > 0) {
101
+	    --l;
102
+	    if(string[l] == '\r')
103
+		string[l] = '\0';
104
+	}
105
+    }
106
+}
107
+
108
+int main(int argc, char **argv)
109
+{
110
+	int line = 0, found, i, nodes = 0, optimized = 0, optimal = 0;
111
+	unsigned char c1, c2;
112
+	char *buffer, *start, *pt, **prefix, *sig;
113
+	FILE *in, *out;
114
+
115
+
116
+    if(argc != 3) {
117
+	printf("%s input_db output_db\n", argv[0]);
118
+	exit(1);
119
+    }
120
+
121
+    if((in = fopen(argv[1], "rb")) == NULL) {
122
+	printf("Can't open input database %s\n", argv[1]);
123
+	exit(1);
124
+    }
125
+
126
+    if((out = fopen(argv[2], "wb")) == NULL) {
127
+	printf("Can't open output database %s\n", argv[1]);
128
+	exit(1);
129
+    }
130
+
131
+    prefix = (char **) calloc(256, sizeof(char *));
132
+    for(i = 0; i < 256; i++)
133
+	prefix[i] = (char *) calloc(256, sizeof(char));
134
+
135
+    if(!(buffer = (char *) malloc(FILEBUFF))) {
136
+	exit(1);
137
+    }
138
+
139
+    memset(buffer, 0, FILEBUFF);
140
+
141
+    while(fgets(buffer, FILEBUFF, in)) {
142
+
143
+	line++;
144
+	chomp(buffer);
145
+
146
+	pt = strchr(buffer, '=');
147
+	if(!pt) {
148
+	    printf("Malformed pattern line %d.\n", line);
149
+	    free(buffer);
150
+	    exit(1);
151
+	}
152
+
153
+	start = buffer;
154
+	*pt++ = 0;
155
+
156
+	if(*pt == '=')
157
+	    continue;
158
+
159
+	if(strlen(pt) < MINLENGTH) {
160
+	    fprintf(out, "%s=%s\n", start, pt);
161
+	    continue;
162
+	}
163
+
164
+	sig = hex2str(pt, 2 * ANALYZE);
165
+
166
+	if(!sig) {
167
+	    printf("Can't decode signature %d\n", line);
168
+	    exit(1);
169
+	}
170
+
171
+	found = -1;
172
+
173
+	for(i = 0; i < ANALYZE - 1; i++) {
174
+	    c1 = ((unsigned char) sig[i]) & 0xff;
175
+	    c2 = ((unsigned char) sig[i + 1]) & 0xff;
176
+
177
+	    if(prefix[c1][c2]) {
178
+		found = i;
179
+		break;
180
+	    }
181
+	}
182
+
183
+	if(found < 0) {
184
+	    printf("Can't optimize signature %d\n", line);
185
+	    prefix[c1][c2] = 1;
186
+	    nodes++;
187
+	} else if(found == 0) {
188
+	    printf("Signature %d is already optimal.\n", line);
189
+	    optimal++;
190
+	} else {
191
+	    pt = pt + 2 * found;
192
+	    printf("Signature %d optimized (new start at %d byte)\n", line, found);
193
+	    optimized++;
194
+	}
195
+
196
+	fprintf(out, "%s=%s\n", start, pt);
197
+    }
198
+
199
+    fclose(in);
200
+    fclose(out);
201
+
202
+    free(buffer);
203
+    for(i = 0; i < 256; i++)
204
+	free(prefix[i]);
205
+    free(prefix);
206
+
207
+    printf("Nodes: %d, Optimal: %d, Signatures optimized: %d\n", nodes, optimal, optimized);
208
+    exit(0);
209
+}
... ...
@@ -254,7 +254,8 @@ int build(struct optstruct *opt)
254 254
 	    exit(1);
255 255
 	case 0:
256 256
 	    {
257
-		char *args[] = { "tar", "-cvf", tarfile, "COPYING", "viruses.db", "viruses.db2", "Notes", "viruses.db3", NULL };
257
+		char *args[] = { "tar", "-cvf", NULL, "COPYING", "viruses.db", "viruses.db2", "Notes", "viruses.db3", NULL };
258
+		args[2] = tarfile;
258 259
 		execv("/bin/tar", args);
259 260
 		mprintf("!Can't execute tar\n");
260 261
 		perror("tar");