Browse code

extend engine to support character alternatives and distance limits in multipattern signatures

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@661 77e5149b-7576-45b1-b177-96237e5ba77b

Tomasz Kojm authored on 2004/07/08 22:48:58
Showing 8 changed files
... ...
@@ -1,3 +1,10 @@
1
+Thu Jul  8 15:39:29 CEST 2004 (tk)
2
+----------------------------------
3
+  * libclamav: extend engine to support character alternatives (a|b|..|z)
4
+	       and distance limits in multipattern signatures (exact: {n}
5
+	       and variable: {n-} (n or more), {-n} (n or less), {n1-n2}
6
+	       (n1, n2 or between them))
7
+
1 8
 Wed Jul  7 02:17:00 CEST 2004 (tk)
2 9
 ----------------------------------
3 10
   * libclamav: pe: Improve UPX detection (thanks to aCaB). Ignore old "ZM"
... ...
@@ -78,9 +78,10 @@ extern "C"
78 78
 
79 79
 struct cli_patt {
80 80
     short int *pattern;
81
-    unsigned int length;
81
+    unsigned int length, mindist, maxdist;
82 82
     char *virname;
83
-    unsigned short int sigid, parts, partno, type;
83
+    unsigned short int sigid, parts, partno, type, alt, *altn;
84
+    char **altc;
84 85
     struct cli_patt *next;
85 86
 };
86 87
 
... ...
@@ -21,4 +21,6 @@
21 21
 #endif
22 22
 
23 23
 #define VIRUSDB DATADIR "/viruses.db"
24
+
24 25
 #define CLI_IGN -200
26
+#define CLI_ALT -201
... ...
@@ -176,6 +176,7 @@ int cl_buildtrie(struct cl_node *root)
176 176
 static void cli_freepatt(struct cli_patt *list)
177 177
 {
178 178
 	struct cli_patt *handler, *prev;
179
+	int i;
179 180
 
180 181
 
181 182
     handler = list;
... ...
@@ -183,6 +184,12 @@ static void cli_freepatt(struct cli_patt *list)
183 183
     while(handler) {
184 184
 	free(handler->pattern);
185 185
 	free(handler->virname);
186
+	if(handler->alt) {
187
+	    free(handler->altn);
188
+	    for(i = 0; i < handler->alt; i++)
189
+		free(handler->altc[i]);
190
+	    free(handler->altc);
191
+	}
186 192
 	prev = handler;
187 193
 	handler = handler->next;
188 194
 	free(prev);
... ...
@@ -206,9 +213,10 @@ int inline cli_findpos(const char *buffer, int offset, int length, const struct
206 206
 {
207 207
 	int bufferpos = offset + CL_MIN_LENGTH;
208 208
 	int postfixend = offset + length;
209
-	unsigned int i;
209
+	unsigned int i, j, alt = 0, found = 0;
210 210
 
211
-    if (bufferpos >= length)
211
+
212
+    if(bufferpos >= length)
212 213
 	bufferpos %= length;
213 214
 
214 215
     for(i = CL_MIN_LENGTH; i < pattern->length; i++) {
... ...
@@ -216,30 +224,41 @@ int inline cli_findpos(const char *buffer, int offset, int length, const struct
216 216
 	if(bufferpos == postfixend)
217 217
 	    return 0;
218 218
 
219
-	if(pattern->pattern[i] != CLI_IGN && (char) pattern->pattern[i] != buffer[bufferpos])
219
+	if(pattern->pattern[i] == CLI_ALT) {
220
+	    for(j = 0; j < pattern->altn[alt]; j++) {
221
+		if(pattern->altc[alt][j] == buffer[bufferpos])
222
+		    found = 1;
223
+	    }
224
+
225
+	    if(!found)
226
+		return 0;
227
+	    alt++;
228
+
229
+	} else if(pattern->pattern[i] != CLI_IGN && (char) pattern->pattern[i] != buffer[bufferpos])
220 230
 	    return 0;
221 231
 
222 232
 	bufferpos++;
223 233
 
224
-	if (bufferpos == length)
234
+	if(bufferpos == length)
225 235
 	    bufferpos = 0;
226 236
     }
227 237
 
228 238
     return 1;
229 239
 }
230 240
 
231
-int cli_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, int typerec)
241
+int cli_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, int typerec, unsigned long int offset, unsigned long int *partoff)
232 242
 {
233 243
 	struct cl_node *current;
234 244
 	struct cli_patt *pt;
235
-	int position, type = CL_CLEAN;
245
+	int position, type = CL_CLEAN, dist;
236 246
         unsigned int i;
237 247
 
248
+int j;
238 249
 
239 250
     current = (struct cl_node *) root;
240 251
 
241
-    if(!partcnt) {
242
-	cli_dbgmsg("cli_scanbuff(): partcnt == NULL\n");
252
+    if(!partcnt || !partoff) {
253
+	cli_dbgmsg("cli_scanbuff(): partcnt == NULL || partoff == NULL\n");
243 254
 	return CL_EMEM;
244 255
     }
245 256
 
... ...
@@ -254,21 +273,37 @@ int cli_scanbuff(const char *buffer, unsigned int length, const char **virname,
254 254
 		if(cli_findpos(buffer, position, length, pt)) {
255 255
 		    if(pt->sigid) { /* it's a partial signature */
256 256
 			if(partcnt[pt->sigid] + 1 == pt->partno) {
257
-			    if(++partcnt[pt->sigid] == pt->parts) { /* the last one */
258
-				if(pt->type) {
259
-				    if(typerec) {
260
-					cli_dbgmsg("Matched signature for file type: %s\n", pt->virname);
261
-					if(pt->type > type)
262
-					    type = pt->type;
263
-				    }
264
-				} else {
265
-				    if(virname)
266
-					*virname = pt->virname;
267 257
 
268
-				    return CL_VIRUS;
258
+			    dist = 1;
259
+			    if(pt->maxdist)
260
+				if(offset + i - partoff[pt->sigid] > pt->maxdist)
261
+				    dist = 0;
262
+
263
+			    if(dist && pt->mindist)
264
+				if(offset + i - partoff[pt->sigid] < pt->mindist)
265
+				    dist = 0;
266
+
267
+
268
+			    if(dist) {
269
+				partoff[pt->sigid] = offset + i + pt->length;
270
+
271
+				if(++partcnt[pt->sigid] == pt->parts) { /* the last one */
272
+				    if(pt->type) {
273
+					if(typerec) {
274
+					    cli_dbgmsg("Matched signature for file type: %s\n", pt->virname);
275
+					    if(pt->type > type)
276
+						type = pt->type;
277
+					}
278
+				    } else {
279
+					if(virname)
280
+					    *virname = pt->virname;
281
+
282
+					return CL_VIRUS;
283
+				    }
269 284
 				}
270 285
 			    }
271 286
 			}
287
+
272 288
 		    } else { /* old type signature */
273 289
 			if(pt->type) {
274 290
 			    if(typerec) {
... ...
@@ -298,6 +333,7 @@ int cl_scanbuff(const char *buffer, unsigned int length, const char **virname, c
298 298
 
299 299
 {
300 300
 	int ret, *partcnt;
301
+	unsigned long int *partoff;
301 302
 
302 303
 
303 304
     if((partcnt = (int *) cli_calloc(root->partsigs + 1, sizeof(int))) == NULL) {
... ...
@@ -305,7 +341,12 @@ int cl_scanbuff(const char *buffer, unsigned int length, const char **virname, c
305 305
 	return CL_EMEM;
306 306
     }
307 307
 
308
-    ret = cli_scanbuff(buffer, length, virname, root, partcnt, 0);
308
+    if((partoff = (unsigned long int *) cli_calloc(root->partsigs + 1, sizeof(unsigned long int))) == NULL) {
309
+	cli_dbgmsg("cli_scanbuff(): unable to cli_calloc(%d, %d)\n", root->partsigs + 1, sizeof(unsigned long int));
310
+	return CL_EMEM;
311
+    }
312
+
313
+    ret = cli_scanbuff(buffer, length, virname, root, partcnt, 0, 0, partoff);
309 314
 
310 315
     free(partcnt);
311 316
     return ret;
... ...
@@ -31,6 +31,6 @@ struct nodelist *cli_bfsadd(struct nodelist *bfs, struct cl_node *n);
31 31
 void cli_failtrans(struct cl_node *root);
32 32
 void cli_fasttrie(struct cl_node *n, struct cl_node *root);
33 33
 int cli_findpos(const char *buffer, int offset, int length, const struct cli_patt *pattern);
34
-int cli_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, int typerec);
34
+int cli_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, int typerec, unsigned long int offset, unsigned long int *partoff);
35 35
 
36 36
 #endif
... ...
@@ -39,11 +39,11 @@
39 39
 #include "defaults.h"
40 40
 
41 41
 
42
-static int cli_addsig(struct cl_node *root, const char *virname, const char *hexsig, int sigid, int parts, int partno, int type)
42
+static int cli_addsig(struct cl_node *root, const char *virname, const char *hexsig, int sigid, int parts, int partno, int type, unsigned int mindist, unsigned int maxdist)
43 43
 {
44 44
 	struct cli_patt *new;
45
-	char *pt;
46
-	int virlen, ret;
45
+	char *pt, *hex;
46
+	int virlen, ret, i, error = 0;
47 47
 
48 48
 
49 49
     if((new = (struct cli_patt *) cli_calloc(1, sizeof(struct cli_patt))) == NULL)
... ...
@@ -53,13 +53,119 @@ static int cli_addsig(struct cl_node *root, const char *virname, const char *hex
53 53
     new->sigid = sigid;
54 54
     new->parts = parts;
55 55
     new->partno = partno;
56
+    new->mindist = mindist;
57
+    new->maxdist = maxdist;
56 58
 
57
-    new->length = strlen(hexsig) / 2;
59
+    if(strchr(hexsig, '(')) {
60
+	    char *hexcpy, *hexnew, *start, *h;
61
+	    short int *c;
62
+
63
+	if(!(hexcpy = strdup(hexsig))) {
64
+	    free(new);
65
+	    return CL_EMEM;
66
+	}
67
+
68
+	if(!(hexnew = (char *) cli_calloc(strlen(hexsig) + 1, 1))) {
69
+	    free(hexcpy);
70
+	    free(new);
71
+	    return CL_EMEM;
72
+	}
73
+
74
+	start = pt = hexcpy;
75
+	while((pt = strchr(start, '('))) {
76
+	    *pt++ = 0;
77
+
78
+	    if(!start) {
79
+		error = 1;
80
+		break;
81
+	    }
82
+
83
+	    strcat(hexnew, start);
84
+	    strcat(hexnew, "@@");
85
+
86
+	    if(!(start = strchr(pt, ')'))) {
87
+		error = 1;
88
+		break;
89
+	    }
90
+	    *start++ = 0;
91
+
92
+	    new->alt++;
93
+	    new->altn = (unsigned short int *) realloc(new->altn, new->alt);
94
+	    new->altn[new->alt - 1] = 0;
95
+	    new->altc = (char **) realloc(new->altc, new->alt);
96
+
97
+	    for(i = 0; i < strlen(pt); i++)
98
+		if(pt[i] == '|')
99
+		    new->altn[new->alt - 1]++;
100
+
101
+	    if(!new->altn[new->alt - 1]) {
102
+		error = 1;
103
+		break;
104
+	    } else
105
+		new->altn[new->alt - 1]++;
106
+
107
+	    if(!(new->altc[new->alt - 1] = (char *) cli_calloc(new->altn[new->alt - 1], 1))) {
108
+		error = 1;
109
+		break;
110
+	    }
111
+
112
+	    for(i = 0; i < new->altn[new->alt - 1]; i++) {
113
+		if((h = cli_strtok(pt, i, "|")) == NULL) {
114
+		    error = 1;
115
+		    break;
116
+		}
117
+
118
+		if((c = cl_hex2str(h)) == NULL) {
119
+		    free(h);
120
+		    error = 1;
121
+		    break;
122
+		}
123
+
124
+		new->altc[new->alt - 1][i] = (char) *c;
125
+		free(c);
126
+		free(h);
127
+	    }
128
+
129
+	    if(error)
130
+		break;
131
+	}
132
+
133
+	if(start)
134
+	    strcat(hexnew, start);
135
+
136
+	hex = hexnew;
137
+	free(hexcpy);
138
+
139
+	if(error) {
140
+	    free(hexcpy);
141
+	    free(hexnew);
142
+	    if(new->alt) {
143
+		free(new->altn);
144
+		for(i = 0; i < new->alt; i++)
145
+		    free(new->altc[i]);
146
+		free(new->altc);
147
+	    }
148
+	    free(new);
149
+	    return CL_EMALFDB;
150
+	}
151
+
152
+    } else
153
+	hex = (char *) hexsig;
154
+
155
+
156
+    new->length = strlen(hex) / 2;
58 157
 
59 158
     if(new->length > root->maxpatlen)
60 159
 	root->maxpatlen = new->length;
61 160
 
62
-    if((new->pattern = cl_hex2str(hexsig)) == NULL) {
161
+    if((new->pattern = cl_hex2str(hex)) == NULL) {
162
+	if(new->alt) {
163
+	    free(new->altn);
164
+	    for(i = 0; i < new->alt; i++)
165
+		free(new->altc[i]);
166
+	    free(new->altc);
167
+	    free(hex);
168
+	}
63 169
 	free(new);
64 170
 	return CL_EMALFDB;
65 171
     }
... ...
@@ -70,11 +176,25 @@ static int cli_addsig(struct cl_node *root, const char *virname, const char *hex
70 70
 	virlen = strlen(virname);
71 71
 
72 72
     if(virlen <= 0) {
73
+	if(new->alt) {
74
+	    free(new->altn);
75
+	    for(i = 0; i < new->alt; i++)
76
+		free(new->altc[i]);
77
+	    free(new->altc);
78
+	    free(hex);
79
+	}
73 80
 	free(new);
74 81
 	return CL_EMALFDB;
75 82
     }
76 83
 
77 84
     if((new->virname = cli_calloc(virlen + 1, sizeof(char))) == NULL) {
85
+	if(new->alt) {
86
+	    free(new->altn);
87
+	    for(i = 0; i < new->alt; i++)
88
+		free(new->altc[i]);
89
+	    free(new->altc);
90
+	    free(hex);
91
+	}
78 92
 	free(new);
79 93
 	return CL_EMEM;
80 94
     }
... ...
@@ -83,21 +203,107 @@ static int cli_addsig(struct cl_node *root, const char *virname, const char *hex
83 83
 
84 84
     if((ret = cli_addpatt(root, new))) {
85 85
 	free(new->virname);
86
+	if(new->alt) {
87
+	    free(new->altn);
88
+	    for(i = 0; i < new->alt; i++)
89
+		free(new->altc[i]);
90
+	    free(new->altc);
91
+	    free(hex);
92
+	}
86 93
 	free(new);
87 94
 	return ret;
88 95
     }
89 96
 
97
+    if(new->alt)
98
+	free(hex);
99
+
90 100
     return 0;
91 101
 }
92 102
 
93 103
 int cli_parse_add(struct cl_node *root, char *virname, const char *hexsig, int type)
94 104
 {
95 105
 	struct cli_patt *new;
96
-	char *pt;
106
+	char *pt, *hexcpy, *start, *n;
97 107
 	int ret, virlen, parts = 0, i, len;
108
+	int mindist = 0, maxdist = 0, error = 0;
98 109
 
99 110
 
100
-    if(strchr(hexsig, '*')) {
111
+    if(strchr(hexsig, '{')) {
112
+
113
+	root->partsigs++;
114
+
115
+	if(!(hexcpy = strdup(hexsig)))
116
+	    return CL_EMEM;
117
+
118
+
119
+	len = strlen(hexsig);
120
+	for(i = 0; i < len; i++)
121
+	    if(hexsig[i] == '{')
122
+		parts++;
123
+
124
+	if(parts)
125
+	    parts++;
126
+
127
+	start = pt = hexcpy;
128
+	for(i = 1; i <= parts; i++) {
129
+
130
+	    if(i != parts) {
131
+		pt = strchr(start, '{');
132
+		*pt++ = 0;
133
+	    }
134
+
135
+	    if((ret = cli_addsig(root, virname, start, root->partsigs, parts, i, type, mindist, maxdist))) {
136
+		cli_errmsg("cli_parse_add(): Problem adding signature.\n");
137
+		error = 1;
138
+		break;
139
+	    }
140
+
141
+	    if(i == parts)
142
+		break;
143
+
144
+	    if(!(start = strchr(pt, '}'))) {
145
+		error = 1;
146
+		break;
147
+	    }
148
+	    *start++ = 0;
149
+
150
+	    if(!pt) {
151
+		error = 1;
152
+		break;
153
+	    }
154
+
155
+	    mindist = maxdist = 0;
156
+	    if(!strchr(pt, '-')) {
157
+		if((mindist = maxdist = atoi(pt)) < 0) {
158
+		    error = 1;
159
+		    break;
160
+		}
161
+	    } else {
162
+		if((n = cli_strtok(pt, 0, "-")) != NULL) {
163
+		    if((mindist = atoi(n)) < 0) {
164
+			error = 1;
165
+			free(n);
166
+			break;
167
+		    }
168
+		    free(n);
169
+		}
170
+
171
+		if((n = cli_strtok(pt, 1, "-")) != NULL) {
172
+		    if((maxdist = atoi(n)) < 0) {
173
+			error = 1;
174
+			free(n);
175
+			break;
176
+		    }
177
+		    free(n);
178
+		}
179
+	    }
180
+	}
181
+
182
+	free(hexcpy);
183
+	if(error)
184
+	    return CL_EMALFDB;
185
+
186
+    } else if(strchr(hexsig, '*')) {
101 187
 	root->partsigs++;
102 188
 
103 189
 	len = strlen(hexsig);
... ...
@@ -114,7 +320,7 @@ int cli_parse_add(struct cl_node *root, char *virname, const char *hexsig, int t
114 114
 		return CL_EMALFDB;
115 115
 	    }
116 116
 
117
-	    if((ret = cli_addsig(root, virname, pt, root->partsigs, parts, i, type))) {
117
+	    if((ret = cli_addsig(root, virname, pt, root->partsigs, parts, i, type, 0, 0))) {
118 118
 		cli_errmsg("cli_parse_add(): Problem adding signature.\n");
119 119
 		free(pt);
120 120
 		return ret;
... ...
@@ -124,7 +330,7 @@ int cli_parse_add(struct cl_node *root, char *virname, const char *hexsig, int t
124 124
 	}
125 125
 
126 126
     } else { /* static */
127
-	if((ret = cli_addsig(root, virname, hexsig, 0, 0, 0, type))) {
127
+	if((ret = cli_addsig(root, virname, hexsig, 0, 0, 0, type, 0, 0))) {
128 128
 	    cli_errmsg("cli_parse_add(): Problem adding signature.\n");
129 129
 	    return ret;
130 130
 	}
... ...
@@ -175,12 +381,6 @@ int cl_loaddb(const char *filename, struct cl_node **root, int *virnum)
175 175
 
176 176
     while(fgets(buffer, FILEBUFF, fd)) {
177 177
 
178
-	/* for forward compatibility */
179
-	if(strchr(buffer, '{') || strchr(buffer, '}')) {
180
-	    cli_dbgmsg("Not suported signature type detected at line %d. Skipping.\n", line);
181
-	    continue;
182
-	}
183
-
184 178
 	line++;
185 179
 	cli_chomp(buffer);
186 180
 
... ...
@@ -85,6 +85,7 @@ static int cli_scandesc(int desc, const char **virname, long int *scanned, const
85 85
 {
86 86
  	char *buffer, *buff, *endbl, *pt;
87 87
 	int bytes, buffsize, length, ret, *partcnt, type = CL_CLEAN;
88
+	unsigned long int *partoff, offset = 0;
88 89
 
89 90
 
90 91
     /* prepare the buffer */
... ...
@@ -100,6 +101,11 @@ static int cli_scandesc(int desc, const char **virname, long int *scanned, const
100 100
 	return CL_EMEM;
101 101
     }
102 102
 
103
+    if((partoff = (unsigned long int *) cli_calloc(root->partsigs + 1, sizeof(unsigned long int))) == NULL) {
104
+	cli_dbgmsg("cli_scanbuff(): unable to cli_calloc(%d, %d)\n", root->partsigs + 1, sizeof(unsigned long int));
105
+	return CL_EMEM;
106
+    }
107
+
103 108
     buff = buffer;
104 109
     buff += root->maxpatlen; /* pointer to read data block */
105 110
     endbl = buff + SCANBUFF - root->maxpatlen; /* pointer to the last block
... ...
@@ -116,7 +122,7 @@ static int cli_scandesc(int desc, const char **virname, long int *scanned, const
116 116
 	if(bytes < SCANBUFF)
117 117
 	    length -= SCANBUFF - bytes;
118 118
 
119
-	if((ret = cli_scanbuff(pt, length, virname, root, partcnt, typerec)) == CL_VIRUS) {
119
+	if((ret = cli_scanbuff(pt, length, virname, root, partcnt, typerec, offset, partoff)) == CL_VIRUS) {
120 120
 	    free(buffer);
121 121
 	    free(partcnt);
122 122
 	    return ret;
... ...
@@ -126,12 +132,13 @@ static int cli_scandesc(int desc, const char **virname, long int *scanned, const
126 126
 		type = ret;
127 127
 	}
128 128
 
129
-	if(bytes == SCANBUFF)
129
+	if(bytes == SCANBUFF) {
130 130
 	    memmove(buffer, endbl, root->maxpatlen);
131
+	    offset += bytes - root->maxpatlen;
132
+	}
131 133
 
132 134
         pt = buffer;
133
-        length=buffsize;
134
-
135
+        length = buffsize;
135 136
     }
136 137
 
137 138
     free(buffer);
... ...
@@ -68,6 +68,8 @@ short int *cl_hex2str(const char *hex)
68 68
     for(i = 0; i < len; i += 2) {
69 69
 	if(hex[i] == '?') {
70 70
 	    val = CLI_IGN;
71
+	} else if(hex[i] == '@') {
72
+	    val = CLI_ALT;
71 73
 	} else {
72 74
 	    if((c = cli_hex2int(hex[i])) >= 0) {
73 75
 		val = c;