Browse code

handle multiple matches (bb #1110) update tests for regex, reenable test

git-svn: trunk@4018

Török Edvin authored on 2008/07/29 19:36:26
Showing 7 changed files
... ...
@@ -1,3 +1,8 @@
1
+Tue Jul 29 13:18:24 EEST 2008 (edwin)
2
+------------------------------------
3
+  * libclamav/regex_*.[ch]: handle multiple matches (bb #1110)
4
+  * unit_tests: update tests for regex, reenable test 
5
+
1 6
 Tue Jul 29 10:47:23 CEST 2008 (tk)
2 7
 ----------------------------------
3 8
   * libclamav/matcher-ac.c: add support for returning multiple matches in
... ...
@@ -58,6 +58,7 @@
58 58
 #include "jsparse/textbuf.h"
59 59
 #include "regex_suffix.h"
60 60
 /* Prototypes */
61
+static regex_t *new_preg(struct regex_matcher *matcher);
61 62
 static size_t reverse_string(char *pattern);
62 63
 static int add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, struct regex_list *regex);
63 64
 static int add_static_pattern(struct regex_matcher *matcher, char* pattern);
... ...
@@ -240,6 +241,7 @@ int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* di
240 240
 {
241 241
 	char* orig_real_url = real_url;
242 242
 	struct regex_list *regex;
243
+	struct regex_list *last_match;
243 244
 
244 245
 	assert(matcher);
245 246
 	assert(real_url);
... ...
@@ -259,6 +261,7 @@ int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* di
259 259
 		char *bufrev;
260 260
 		int rc = 0;
261 261
 		struct cli_ac_data mdata;
262
+		struct cli_ac_result *res = NULL;
262 263
 
263 264
 		if(!buffer)
264 265
 			return CL_EMEM;
... ...
@@ -281,30 +284,37 @@ int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* di
281 281
 		reverse_string(bufrev);
282 282
 		rc = SO_search(&matcher->filter, (const unsigned char*)bufrev, buffer_len) != -1;
283 283
 		if(!rc) {
284
+			free(buffer);
285
+			free(bufrev);
284 286
 			/* filter says this suffix doesn't match.
285 287
 			 * The filter has false positives, but no false
286 288
 			 * negatives */
287 289
 			return 0;
288 290
 		}
289 291
 
290
-		rc = cli_ac_scanbuff((const unsigned char*)bufrev,buffer_len, NULL, &regex, NULL, &matcher->suffixes,&mdata,0,0,-1,NULL,AC_SCAN_VIR,NULL);
292
+		rc = cli_ac_scanbuff((const unsigned char*)bufrev,buffer_len, NULL, (void*)&regex, &res, &matcher->suffixes,&mdata,0,0,-1,NULL,AC_SCAN_VIR,NULL);
291 293
 		free(bufrev);
292 294
 		cli_ac_freedata(&mdata);
293 295
 
294
-		if(rc) {
295
-			/* TODO loop over multiple virusnames here */
296
-			do {
296
+		rc = 0;
297
+		while(res) {
298
+			struct cli_ac_result *q;
299
+			regex = res->customdata;
300
+			while(!rc && regex) {
297 301
 				/* loop over multiple regexes corresponding to
298 302
 				 * this suffix */
299
-				if (!regex->preg.re_magic) {
303
+				if (!regex->preg) {
300 304
 					/* we matched a static pattern */
301 305
 					rc = validate_subdomain(regex, pre_fixup, buffer, buffer_len, real_url, real_len, orig_real_url);
302 306
 				} else {
303
-					rc = !cli_regexec(&regex->preg, buffer, 0, NULL, 0);
307
+					rc = !cli_regexec(regex->preg, buffer, 0, NULL, 0);
304 308
 				}
305 309
 				if(rc) *info = regex->pattern;
306 310
 				regex = regex->nxt;
307
-			 } while(!rc && regex);
311
+			}
312
+			q = res;
313
+			res = res->next;
314
+			free(q);
308 315
 		}
309 316
 		free(buffer);
310 317
 		if(!rc)
... ...
@@ -510,20 +520,22 @@ void regex_list_done(struct regex_matcher* matcher)
510 510
 			for(i=0;i<matcher->suffix_cnt;i++) {
511 511
 				struct regex_list *r = matcher->suffix_regexes[i];
512 512
 				while(r) {
513
-					cli_regfree(&r->preg);
513
+					struct regex_list *q = r;
514 514
 					r = r->nxt;
515
+					free(q->pattern);
516
+					free(q);
515 517
 				}
516 518
 			}
517 519
 			free(matcher->suffix_regexes);
518 520
 			matcher->suffix_regexes = NULL;
519 521
 		}
520
-		if(matcher->all_regexes) {
522
+		if(matcher->all_pregs) {
521 523
 			for(i=0;i<matcher->regex_cnt;i++) {
522
-				struct regex_list *r = matcher->all_regexes[i];
523
-				free(r->pattern);
524
+				regex_t *r = matcher->all_pregs[i];
525
+				cli_regfree(r);
524 526
 				free(r);
525 527
 			}
526
-			free(matcher->all_regexes);
528
+			free(matcher->all_pregs);
527 529
 		}
528 530
 		hashtab_free(&matcher->suffix_hash);
529 531
 		matcher->list_built=0;
... ...
@@ -589,12 +601,18 @@ static int add_newsuffix(struct regex_matcher *matcher, struct regex_list *info,
589 589
 /* ------ load a regex, determine suffix, determine suffix2regexlist map ---- */
590 590
 
591 591
 /* returns 0 on success, clamav error code otherwise */
592
-static int add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, struct regex_list *regex)
592
+static int add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, struct regex_list *iregex)
593 593
 {
594 594
 	struct regex_matcher *matcher = cbdata;
595
+	struct regex_list *regex = cli_malloc(sizeof(*regex));
595 596
 	const struct element *el;
596 597
 
597 598
 	assert(matcher);
599
+	if(!regex)
600
+		return CL_EMEM;
601
+	regex->pattern = iregex->pattern ? cli_strdup(iregex->pattern) : NULL;
602
+	regex->preg = iregex->preg;
603
+	regex->nxt = NULL;
598 604
 	el = hashtab_find(&matcher->suffix_hash, suffix, suffix_len);
599 605
 	/* TODO: what if suffixes are prefixes of eachother and only one will
600 606
 	 * match? */
... ...
@@ -630,45 +648,43 @@ static size_t reverse_string(char *pattern)
630 630
 	return len;
631 631
 }
632 632
 
633
-static struct regex_list *new_regex(struct regex_matcher *matcher)
633
+static regex_t *new_preg(struct regex_matcher *matcher)
634 634
 {
635
-	struct regex_list *r;
636
-	matcher->all_regexes = cli_realloc(matcher->all_regexes, ++matcher->regex_cnt * sizeof(*matcher->all_regexes));
637
-	if(!matcher->all_regexes)
635
+	regex_t *r;
636
+	matcher->all_pregs = cli_realloc(matcher->all_pregs, ++matcher->regex_cnt * sizeof(*matcher->all_pregs));
637
+	if(!matcher->all_pregs)
638 638
 		return NULL;
639 639
 	r = cli_malloc(sizeof(*r));
640 640
 	if(!r)
641 641
 		return NULL;
642
-	matcher->all_regexes[matcher->regex_cnt-1] = r;
642
+	matcher->all_pregs[matcher->regex_cnt-1] = r;
643 643
 	return r;
644 644
 }
645 645
 
646 646
 static int add_static_pattern(struct regex_matcher *matcher, char* pattern)
647 647
 {
648 648
 	size_t len;
649
-	struct regex_list *regex = new_regex(matcher);
650
-	if(!regex)
651
-		return CL_EMEM;
649
+	struct regex_list regex;
650
+	int rc;
651
+
652 652
 	len = reverse_string(pattern);
653
-	regex->nxt = NULL;
654
-	regex->pattern = cli_strdup(pattern);
655
-	regex->preg.re_magic = 0;
656
-	return add_pattern_suffix(matcher, pattern, len, regex);
653
+	regex.nxt = NULL;
654
+	regex.pattern = cli_strdup(pattern);
655
+	regex.preg = NULL;
656
+	rc = add_pattern_suffix(matcher, pattern, len, &regex);
657
+	free(regex.pattern);
658
+	return rc;
657 659
 }
658 660
 
659 661
 int regex_list_add_pattern(struct regex_matcher *matcher, char *pattern)
660 662
 {
661 663
 	int rc;
662
-	struct regex_list *regex = new_regex(matcher);
664
+	struct regex_list regex;
663 665
 	size_t len;
664 666
 	/* we only match the host, so remove useless stuff */
665 667
 	const char remove_end[] = "([/?].*)?/";
666 668
 	const char remove_end2[] = "([/?].*)/";
667 669
 
668
-
669
-	if(!regex)
670
-		return CL_EMEM;
671
-
672 670
 	len = strlen(pattern);
673 671
 	if(len > sizeof(remove_end)) {
674 672
 		if(strncmp(&pattern[len - sizeof(remove_end)+1], remove_end, sizeof(remove_end)-1) == 0) {
... ...
@@ -681,12 +697,15 @@ int regex_list_add_pattern(struct regex_matcher *matcher, char *pattern)
681 681
 		}
682 682
 	}
683 683
 	pattern[len] = '\0';
684
-	regex->pattern = NULL;
684
+	regex.pattern = NULL;
685
+
686
+	regex.preg = new_preg(matcher);
687
+	if(!regex.preg)
688
+		return CL_EMEM;
685 689
 
686
-	rc = cli_regex2suffix(pattern, regex, add_pattern_suffix, matcher);
690
+	rc = cli_regex2suffix(pattern, &regex, add_pattern_suffix, matcher);
687 691
 	if(rc) {
688
-		cli_regfree(&regex->preg);
689
-		free(regex);
692
+		cli_regfree(regex.preg);
690 693
 	}
691 694
 
692 695
 	return rc;
... ...
@@ -41,7 +41,7 @@ struct regex_matcher {
41 41
 	size_t suffix_cnt;
42 42
 	struct regex_list **suffix_regexes;
43 43
 	size_t regex_cnt;
44
-	struct regex_list **all_regexes;
44
+	regex_t **all_pregs;
45 45
 	struct cli_matcher suffixes;
46 46
 	struct filter filter;
47 47
 	int list_inited:2;
... ...
@@ -415,12 +415,12 @@ int cli_regex2suffix(const char *pattern, struct regex_list *regex, suffix_callb
415 415
 
416 416
 	assert(regex && pattern);
417 417
 
418
-	rc = cli_regcomp(&regex->preg, pattern, REG_EXTENDED);
418
+	rc = cli_regcomp(regex->preg, pattern, REG_EXTENDED);
419 419
 	if(rc) {
420
-		size_t buflen = cli_regerror(rc, &regex->preg, NULL, 0);
420
+		size_t buflen = cli_regerror(rc, regex->preg, NULL, 0);
421 421
 		char *errbuf = cli_malloc(buflen);
422 422
 		if(errbuf) {
423
-			cli_regerror(rc, &regex->preg, errbuf, buflen);
423
+			cli_regerror(rc, regex->preg, errbuf, buflen);
424 424
 			cli_errmsg(MODULE "Error compiling regular expression %s: %s\n", pattern, errbuf);
425 425
 			free(errbuf);
426 426
 		} else {
... ...
@@ -428,10 +428,8 @@ int cli_regex2suffix(const char *pattern, struct regex_list *regex, suffix_callb
428 428
 		}
429 429
 		return rc;
430 430
 	}
431
-#ifdef CL_DEBUG
432
-	regex->pattern = cli_strdup(pattern);
433
-#endif
434 431
 	regex->nxt = NULL;
432
+	regex->pattern = cli_strdup(pattern);
435 433
 
436 434
 	n = parse_regex(pattern, &last);
437 435
 	if(!n)
... ...
@@ -441,6 +439,7 @@ int cli_regex2suffix(const char *pattern, struct regex_list *regex, suffix_callb
441 441
 	n->parent = &root_node;
442 442
 
443 443
 	rc = build_suffixtree_descend(n, &buf, cb, cbdata, regex);
444
+	free(regex->pattern);
444 445
 	free(buf.data);
445 446
 	destroy_tree(n);
446 447
 	return rc;
... ...
@@ -25,7 +25,7 @@
25 25
 
26 26
 struct regex_list {
27 27
 	char *pattern;
28
-	regex_t preg;
28
+	regex_t *preg;
29 29
 	struct regex_list *nxt;
30 30
 };
31 31
 typedef int (*suffix_callback)(void *cbdata, const char *suffix, size_t len, struct regex_list *regex);
... ...
@@ -333,7 +333,7 @@ int main(int argc, char **argv)
333 333
     srunner_add_suite(sr, test_cli_suite());
334 334
     srunner_add_suite(sr, test_jsnorm_suite());
335 335
     srunner_add_suite(sr, test_str_suite());
336
-    /*    srunner_add_suite(sr, test_regex_suite()); */
336
+    srunner_add_suite(sr, test_regex_suite());
337 337
     srunner_add_suite(sr, test_disasm_suite());
338 338
 
339 339
     srunner_set_log(sr, "test.log");
... ...
@@ -61,7 +61,10 @@ START_TEST (empty)
61 61
 	const char pattern[] = "";
62 62
 	int rc;
63 63
 	errmsg_expected();
64
+	regex.preg = malloc(sizeof(*regex.preg));
65
+	fail_unless(!!regex.preg, "malloc");
64 66
 	rc = cli_regex2suffix(pattern, &regex, cb_fail, NULL);
67
+	free(regex.preg);
65 68
 	fail_unless(rc == REG_EMPTY, "empty pattern");
66 69
 	fail_unless(cb_called == 0, "callback shouldn't be called");
67 70
 }
... ...
@@ -71,9 +74,12 @@ START_TEST (one)
71 71
 {
72 72
 	const char pattern[] = "a";
73 73
 	int rc;
74
+	regex.preg = malloc(sizeof(*regex.preg));
75
+	fail_unless(!!regex.preg, "malloc");
74 76
 	rc = cli_regex2suffix(pattern, &regex, cb_expect_single, "a");
75 77
 	fail_unless(rc == 0, "single character pattern");
76
-	cli_regfree(&regex.preg);
78
+	cli_regfree(regex.preg);
79
+	free(regex.preg);
77 80
 	fail_unless(cb_called == 1, "callback should be called once");
78 81
 }
79 82
 END_TEST
... ...
@@ -111,9 +117,12 @@ START_TEST (test_suffix)
111 111
 	const char **p=tests[_i];
112 112
 
113 113
 	fail_unless(!!pattern, "test pattern");
114
+	regex.preg = malloc(sizeof(*regex.preg));
115
+	fail_unless(!!regex.preg, "malloc");
114 116
 	rc = cli_regex2suffix(pattern, &regex, cb_expect_multi, tests[_i]);
115 117
 	fail_unless(rc == 0, "single character pattern");
116
-	cli_regfree(&regex.preg);
118
+	cli_regfree(regex.preg);
119
+	free(regex.preg);
117 120
 	p++;
118 121
 	while(*p++) n++;
119 122
 	fail_unless(cb_called == n,
... ...
@@ -128,7 +137,6 @@ static void setup(void)
128 128
 
129 129
 static void teardown(void)
130 130
 {
131
-	free(regex.pattern);
132 131
 }
133 132
 
134 133
 static struct regex_matcher matcher;
... ...
@@ -154,7 +162,11 @@ static const struct rtest {
154 154
 	{".+\\.ebayrtm\\.com([/?].*)?:.+\\.ebay\\.(de|com|co\\.uk)([/?].*)?/",
155 155
 		"http://srx.main.ebayrtm.com",
156 156
 		"pages.ebay.de",
157
-		1 /* should be whitelisted */}
157
+		1 /* should be whitelisted */},
158
+	{".+\\.ebayrtm\\.com([/?].*)?:.+\\.ebay\\.(de|com|co\\.uk)([/?].*)?/",
159
+		"http://srx.main.ebayrtm.com.evil.example.com",
160
+		"pages.ebay.de",
161
+		0}
158 162
 };
159 163
 
160 164
 START_TEST (regex_list_match_test)