Browse code

libclamav: handle relative offsets with cli_ac_data; fix offset logic

Tomasz Kojm authored on 2009/08/21 22:55:10
Showing 11 changed files
... ...
@@ -1,3 +1,7 @@
1
+Fri Aug 21 15:53:35 CEST 2009 (tk)
2
+----------------------------------
3
+ * libclamav: handle relative offsets with cli_ac_data; fix offset logic
4
+
1 5
 Fri Aug 21 02:17:11 CEST 2009 (acab)
2 6
 ------------------------------------
3 7
  * libclamav/ishield.c: properly free() header
... ...
@@ -173,7 +173,7 @@ cli_file_t cli_filetype2(int desc, const struct cl_engine *engine)
173 173
 	if(!root)
174 174
 	    return ret;
175 175
 
176
-	if(cli_ac_initdata(&mdata, root->ac_partsigs, root->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN))
176
+	if(cli_ac_initdata(&mdata, root->ac_partsigs, root->ac_lsigs, root->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))
177 177
 	    return ret;
178 178
 
179 179
 	sret = cli_ac_scanbuff(buff, bread, NULL, NULL, NULL, engine->root[0], &mdata, 0, ret, NULL, AC_SCAN_FT, NULL);
... ...
@@ -183,7 +183,7 @@ cli_file_t cli_filetype2(int desc, const struct cl_engine *engine)
183 183
 	if(sret >= CL_TYPENO) {
184 184
 	    ret = sret;
185 185
 	} else {
186
-	    if(cli_ac_initdata(&mdata, root->ac_partsigs, root->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN))
186
+	    if(cli_ac_initdata(&mdata, root->ac_partsigs, root->ac_lsigs, root->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))
187 187
 		return ret;
188 188
 
189 189
 	    decoded = (unsigned char *) cli_utf16toascii((char *) buff, bread);
... ...
@@ -217,7 +217,7 @@ cli_file_t cli_filetype2(int desc, const struct cl_engine *engine)
217 217
 			     * However when detecting whether a file is HTML or not, we need exact conversion.
218 218
 			     * (just eliminating zeros and matching would introduce false positives */
219 219
 			    if(encoding_normalize_toascii(&in_area, encoding, &out_area) >= 0 && out_area.length > 0) {
220
-				    if(cli_ac_initdata(&mdata, root->ac_partsigs, root->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN))
220
+				    if(cli_ac_initdata(&mdata, root->ac_partsigs, root->ac_lsigs, root->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))
221 221
 					    return ret;
222 222
 
223 223
 				    if(out_area.length > 0) {
... ...
@@ -769,7 +769,7 @@ inline static int ac_findmatch(const unsigned char *buffer, uint32_t offset, uin
769 769
     return 1;
770 770
 }
771 771
 
772
-int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint32_t lsigs, uint8_t tracklen)
772
+int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint32_t lsigs, uint32_t reloffsigs, uint8_t tracklen)
773 773
 {
774 774
 	unsigned int i;
775 775
 
... ...
@@ -779,12 +779,24 @@ int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint32_t lsigs,
779 779
 	return CL_ENULLARG;
780 780
     }
781 781
 
782
-    data->partsigs = partsigs;
782
+    data->reloffsigs = reloffsigs;
783
+    if(reloffsigs) {
784
+	data->offset = (uint32_t *) cli_malloc(reloffsigs * 2 * sizeof(uint32_t));
785
+	if(!data->offset) {
786
+	    cli_errmsg("cli_ac_init: Can't allocate memory for data->offset\n");
787
+	    return CL_EMEM;
788
+	}
789
+	for(i = 0; i < reloffsigs * 2; i += 2)
790
+	    data->offset[i] = CLI_OFF_NONE;
791
+    }
783 792
 
793
+    data->partsigs = partsigs;
784 794
     if(partsigs) {
785 795
 	data->offmatrix = (int32_t ***) cli_calloc(partsigs, sizeof(int32_t **));
786 796
 	if(!data->offmatrix) {
787 797
 	    cli_errmsg("cli_ac_init: Can't allocate memory for data->offmatrix\n");
798
+	    if(reloffsigs)
799
+		free(data->offset);
788 800
 	    return CL_EMEM;
789 801
 	}
790 802
     }
... ...
@@ -795,6 +807,8 @@ int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint32_t lsigs,
795 795
 	if(!data->lsigcnt) {
796 796
 	    if(partsigs)
797 797
 		free(data->offmatrix);
798
+	    if(reloffsigs)
799
+		free(data->offset);
798 800
 	    cli_errmsg("cli_ac_init: Can't allocate memory for data->lsigcnt\n");
799 801
 	    return CL_EMEM;
800 802
 	}
... ...
@@ -803,17 +817,19 @@ int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint32_t lsigs,
803 803
 	    free(data->lsigcnt);
804 804
 	    if(partsigs)
805 805
 		free(data->offmatrix);
806
+	    if(reloffsigs)
807
+		free(data->offset);
806 808
 	    cli_errmsg("cli_ac_init: Can't allocate memory for data->lsigcnt[0]\n");
807 809
 	    return CL_EMEM;
808 810
 	}
809 811
 	for(i = 1; i < lsigs; i++)
810 812
 	    data->lsigcnt[i] = data->lsigcnt[0] + 64 * i;
811
-     }
812
- 
813
+    }
814
+
813 815
     return CL_SUCCESS;
814 816
 }
815 817
 
816
-int cli_ac_caloff(struct cli_matcher *root, int fd)
818
+int cli_ac_caloff(const struct cli_matcher *root, struct cli_ac_data *data, int fd)
817 819
 {
818 820
 	int ret;
819 821
 	unsigned int i;
... ...
@@ -824,8 +840,8 @@ int cli_ac_caloff(struct cli_matcher *root, int fd)
824 824
     for(i = 0; i < root->ac_reloff_num; i++) {
825 825
 	patt = root->ac_reloff[i];
826 826
 	if(fd == -1) {
827
-	    patt->offset_min = CLI_OFF_NONE;
828
-	} else if((ret = cli_caloff(NULL, &info, fd, root->type, patt->offdata, &patt->offset_min, &patt->offset_max))) {
827
+	    data->offset[patt->offset_min] = CLI_OFF_NONE;
828
+	} else if((ret = cli_caloff(NULL, &info, fd, root->type, patt->offdata, &data->offset[patt->offset_min], &data->offset[patt->offset_max]))) {
829 829
 	    cli_errmsg("cli_ac_caloff: Can't calculate relative offset in signature for %s\n", patt->virname);
830 830
 	    if(info.exeinfo.section)
831 831
 		free(info.exeinfo.section);
... ...
@@ -859,6 +875,11 @@ void cli_ac_freedata(struct cli_ac_data *data)
859 859
 	free(data->lsigcnt);
860 860
 	data->lsigs = 0;
861 861
     }
862
+
863
+    if(data && data->reloffsigs) {
864
+	free(data->offset);
865
+	data->reloffsigs = 0;
866
+    }
862 867
 }
863 868
 
864 869
 inline static int ac_addtype(struct cli_matched_type **list, cli_file_t type, off_t offset, const cli_ctx *ctx)
... ...
@@ -926,7 +947,7 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
926 926
 	    while(patt) {
927 927
 		bp = i + 1 - patt->depth;
928 928
 		pt = patt;
929
-		/*
929
+	 	/*
930 930
 		while(pt) {
931 931
 		    if((pt->type && !(mode & AC_SCAN_FT)) || (!pt->type && !(mode & AC_SCAN_VIR))) {
932 932
 			pt = pt->next_same;
... ...
@@ -958,9 +979,16 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
958 958
 			}
959 959
 			realoff = offset + bp - pt->prefix_length;
960 960
 			if(pt->offset_min != CLI_OFF_ANY && (!pt->sigid || pt->partno == 1)) {
961
-			    if(pt->offset_max > realoff || pt->offset_min < realoff) {
962
-				pt = pt->next_same;
963
-				continue;
961
+			    if(pt->offdata[0] == CLI_OFF_ABSOLUTE) {
962
+				if(pt->offset_max < realoff || pt->offset_min > realoff) {
963
+				    pt = pt->next_same;
964
+				    continue;
965
+				}
966
+			    } else {
967
+				if(mdata->offset[pt->offset_min] == CLI_OFF_NONE || mdata->offset[pt->offset_max] < realoff || mdata->offset[pt->offset_min] > realoff) {
968
+				    pt = pt->next_same;
969
+				    continue;
970
+				}
964 971
 			    }
965 972
 			}
966 973
 			if(pt->sigid) { /* it's a partial signature */
... ...
@@ -1477,6 +1505,8 @@ int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hex
1477 1477
 	    return CL_EMEM;
1478 1478
 	}
1479 1479
 	root->ac_reloff[root->ac_reloff_num] = new;
1480
+	new->offset_min = root->ac_reloff_num * 2;
1481
+	new->offset_max = new->offset_min + 1;
1480 1482
 	root->ac_reloff_num++;
1481 1483
     }
1482 1484
 
... ...
@@ -33,8 +33,9 @@
33 33
 
34 34
 struct cli_ac_data {
35 35
     int32_t ***offmatrix;
36
-    uint32_t partsigs, lsigs;
36
+    uint32_t partsigs, lsigs, reloffsigs;
37 37
     uint32_t **lsigcnt;
38
+    uint32_t *offset;
38 39
 };
39 40
 
40 41
 struct cli_ac_alt {
... ...
@@ -79,13 +80,13 @@ struct cli_ac_result {
79 79
 #include "matcher.h"
80 80
 
81 81
 int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern);
82
-int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint32_t lsigs, uint8_t tracklen);
82
+int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint32_t lsigs, uint32_t reloffsigs, uint8_t tracklen);
83 83
 int cli_ac_chklsig(const char *expr, const char *end, uint32_t *lsigcnt, unsigned int *cnt, uint64_t *ids, unsigned int parse_only);
84 84
 void cli_ac_freedata(struct cli_ac_data *data);
85 85
 int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, void **customdata, struct cli_ac_result **res, const struct cli_matcher *root, struct cli_ac_data *mdata, uint32_t offset, cli_file_t ftype, struct cli_matched_type **ftoffset, unsigned int mode, const cli_ctx *ctx);
86 86
 int cli_ac_buildtrie(struct cli_matcher *root);
87 87
 int cli_ac_init(struct cli_matcher *root, uint8_t mindepth, uint8_t maxdepth);
88
-int cli_ac_caloff(struct cli_matcher *root, int fd);
88
+int cli_ac_caloff(const struct cli_matcher *root, struct cli_ac_data *data, int fd);
89 89
 void cli_ac_free(struct cli_matcher *root);
90 90
 int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hexsig, uint32_t sigid, uint16_t parts, uint16_t partno, uint16_t rtype, uint16_t type, uint32_t mindist, uint32_t maxdist, const char *offset, const uint32_t *lsigid, unsigned int options);
91 91
 
... ...
@@ -55,8 +55,12 @@ int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern, const
55 55
 	cli_errmsg("cli_bm_addpatt: Can't calculate offset for signature %s\n", pattern->virname);
56 56
 	return ret;
57 57
     }
58
-    if(pattern->offdata[0] != CLI_OFF_ANY && pattern->offdata[0] != CLI_OFF_ABSOLUTE)
59
-	root->bm_reloff_num++;
58
+    if(pattern->offdata[0] != CLI_OFF_ANY) {
59
+	if(pattern->offdata[0] == CLI_OFF_ABSOLUTE)
60
+	    root->bm_absoff_num++;
61
+	else
62
+	    root->bm_reloff_num++;
63
+    }
60 64
 
61 65
 #if BM_MIN_LENGTH == BM_BLOCK_SIZE
62 66
     /* try to load balance bm_suffix (at the cost of bm_shift) */
... ...
@@ -154,7 +158,7 @@ void cli_bm_free(struct cli_matcher *root)
154 154
 
155 155
 int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_matcher *root, uint32_t offset, int fd)
156 156
 {
157
-	uint32_t i, j, off;
157
+	uint32_t i, j, off, off_min, off_max;
158 158
 	uint8_t found, pchain, shift;
159 159
 	uint16_t idx, idxchk;
160 160
 	struct cli_bm_patt *p;
... ...
@@ -163,7 +167,6 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
163 163
         struct cli_target_info info;
164 164
         int ret;
165 165
 
166
-
167 166
     if(!root || !root->bm_shift)
168 167
 	return CL_CLEAN;
169 168
 
... ...
@@ -226,16 +229,19 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
226 226
 		if(found && p->length + p->prefix_length == j) {
227 227
 		    if(p->offset_min != CLI_OFF_ANY) {
228 228
 			if(p->offdata[0] != CLI_OFF_ABSOLUTE) {
229
-			    ret = cli_caloff(NULL, &info, fd, root->type, p->offdata, &p->offset_min, &p->offset_max);
229
+			    ret = cli_caloff(NULL, &info, fd, root->type, p->offdata, &off_min, &off_max);
230 230
 			    if(ret != CL_SUCCESS) {
231 231
 				cli_errmsg("cli_bm_scanbuff: Can't calculate relative offset in signature for %s\n", p->virname);
232 232
 				if(info.exeinfo.section)
233 233
 				    free(info.exeinfo.section);
234 234
 				return ret;
235 235
 			    }
236
+			} else {
237
+			    off_min = p->offset_min;
238
+			    off_max = p->offset_max;
236 239
 			}
237 240
 			off = offset + i - p->prefix_length - BM_MIN_LENGTH + BM_BLOCK_SIZE;
238
-			if(p->offset_max > off || p->offset_min < off) {
241
+			if(off_max < off || off_min > off) {
239 242
 			    p = p->next;
240 243
 			    continue;
241 244
 			}
... ...
@@ -73,7 +73,7 @@ int cli_scanbuff(const unsigned char *buffer, uint32_t length, uint32_t offset,
73 73
 
74 74
     if(troot) {
75 75
 
76
-	if(!acdata && (ret = cli_ac_initdata(&mdata, troot->ac_partsigs, troot->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN)))
76
+	if(!acdata && (ret = cli_ac_initdata(&mdata, troot->ac_partsigs, troot->ac_lsigs, troot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)))
77 77
 	    return ret;
78 78
 
79 79
 	if(troot->ac_only || (ret = cli_bm_scanbuff(buffer, length, virname, troot, offset, -1)) != CL_VIRUS)
... ...
@@ -86,7 +86,7 @@ int cli_scanbuff(const unsigned char *buffer, uint32_t length, uint32_t offset,
86 86
 	    return ret;
87 87
     }
88 88
 
89
-    if(!acdata && (ret = cli_ac_initdata(&mdata, groot->ac_partsigs, groot->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN)))
89
+    if(!acdata && (ret = cli_ac_initdata(&mdata, groot->ac_partsigs, groot->ac_lsigs, groot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)))
90 90
 	return ret;
91 91
 
92 92
     if(groot->ac_only || (ret = cli_bm_scanbuff(buffer, length, virname, groot, offset, -1)) != CL_VIRUS)
... ...
@@ -366,11 +366,11 @@ int cli_scandesc(int desc, cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struc
366 366
     }
367 367
 
368 368
     if(!ftonly)
369
-	if((ret = cli_ac_caloff(groot, desc)) || (ret = cli_ac_initdata(&gdata, groot->ac_partsigs, groot->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN)))
369
+	if((ret = cli_ac_initdata(&gdata, groot->ac_partsigs, groot->ac_lsigs, groot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)) || (ret = cli_ac_caloff(groot, &gdata, desc)))
370 370
 	    return ret;
371 371
 
372 372
     if(troot) {
373
-	if((ret = cli_ac_caloff(troot, desc)) || (ret = cli_ac_initdata(&tdata, troot->ac_partsigs, troot->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN))) {
373
+	if((ret = cli_ac_initdata(&tdata, troot->ac_partsigs, troot->ac_lsigs, troot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)) || (ret = cli_ac_caloff(troot, &tdata, desc))) {
374 374
 	    if(!ftonly)
375 375
 		cli_ac_freedata(&gdata);
376 376
 	    return ret;
... ...
@@ -76,7 +76,7 @@ struct cli_matcher {
76 76
     struct cli_bm_patt **bm_suffix;
77 77
     struct cli_hashset md5_sizes_hs;
78 78
     uint32_t *soff, soff_len; /* for PE section sigs */
79
-    uint32_t bm_patterns, bm_reloff_num;
79
+    uint32_t bm_patterns, bm_reloff_num, bm_absoff_num;
80 80
 
81 81
     /* Extended Aho-Corasick */
82 82
     uint32_t ac_partsigs, ac_nodes, ac_patterns, ac_lsigs;
... ...
@@ -84,7 +84,7 @@ struct cli_matcher {
84 84
     struct cli_ac_node *ac_root, **ac_nodetable;
85 85
     struct cli_ac_patt **ac_pattable;
86 86
     struct cli_ac_patt **ac_reloff;
87
-    uint32_t ac_reloff_num;
87
+    uint32_t ac_reloff_num, ac_absoff_num;
88 88
     uint8_t ac_mindepth, ac_maxdepth;
89 89
 
90 90
     uint16_t maxpatlen;
... ...
@@ -2152,7 +2152,7 @@ int cl_engine_compile(struct cl_engine *engine)
2152 2152
 	if((root = engine->root[i])) {
2153 2153
 	    if((ret = cli_ac_buildtrie(root)))
2154 2154
 		return ret;
2155
-	    cli_dbgmsg("matcher[%u]: %s: AC sigs: %u (reloff: %u) BM sigs: %u (reloff: %u) %s\n", i, cli_mtargets[i].name, root->ac_patterns, root->ac_reloff_num, root->bm_patterns, root->bm_reloff_num, root->ac_only ? "(ac_only mode)" : "");
2155
+	    cli_dbgmsg("matcher[%u]: %s: AC sigs: %u (reloff: %u, absoff: %u) BM sigs: %u (reloff: %u, absoff: %u) %s\n", i, cli_mtargets[i].name, root->ac_patterns, root->ac_reloff_num, root->ac_absoff_num, root->bm_patterns, root->bm_reloff_num, root->bm_absoff_num, root->ac_only ? "(ac_only mode)" : "");
2156 2156
 	}
2157 2157
     }
2158 2158
 
... ...
@@ -289,7 +289,7 @@ int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* di
289 289
 		buffer[buffer_len]=0;
290 290
 		cli_dbgmsg("Looking up in regex_list: %s\n", buffer);
291 291
 
292
-		if((rc = cli_ac_initdata(&mdata, 0, 0, CLI_DEFAULT_AC_TRACKLEN)))
292
+		if((rc = cli_ac_initdata(&mdata, 0, 0, 0, CLI_DEFAULT_AC_TRACKLEN)))
293 293
 			return rc;
294 294
 
295 295
 		bufrev = cli_strdup(buffer);
... ...
@@ -1062,10 +1062,10 @@ static int cli_scanscript(int desc, cli_ctx *ctx)
1062 1062
 	text_normalize_init(&state, normalized, SCANBUFF + maxpatlen);
1063 1063
 	ret = CL_CLEAN;
1064 1064
 
1065
-	if ((ret = cli_ac_initdata(&tmdata, troot->ac_partsigs, troot->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN)))
1065
+	if ((ret = cli_ac_initdata(&tmdata, troot->ac_partsigs, troot->ac_lsigs, troot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)))
1066 1066
 	    return ret;
1067 1067
 
1068
-	if ((ret = cli_ac_initdata(&gmdata, groot->ac_partsigs, groot->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN))) {
1068
+	if ((ret = cli_ac_initdata(&gmdata, groot->ac_partsigs, groot->ac_lsigs, groot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))) {
1069 1069
 	    cli_ac_freedata(&tmdata);
1070 1070
 	    return ret;
1071 1071
 	}
... ...
@@ -76,7 +76,7 @@ START_TEST (test_ac_scanbuff) {
76 76
     ret = cli_ac_buildtrie(root);
77 77
     fail_unless(ret == CL_SUCCESS, "cli_ac_buildtrie() failed");
78 78
 
79
-    ret = cli_ac_initdata(&mdata, root->ac_partsigs, 0, CLI_DEFAULT_AC_TRACKLEN);
79
+    ret = cli_ac_initdata(&mdata, root->ac_partsigs, 0, 0, CLI_DEFAULT_AC_TRACKLEN);
80 80
     fail_unless(ret == CL_SUCCESS, "cli_ac_initdata() failed");
81 81
 
82 82
     for(i = 0; ac_testdata[i].data; i++) {