git-svn: trunk@4018
Török Edvin authored on 2008/07/29 19:36:26... | ... |
@@ -1,3 +1,8 @@ |
1 |
+Tue Jul 29 13:18:24 EEST 2008 (edwin) |
|
2 |
+------------------------------------ |
|
3 |
+ * libclamav/regex_*.[ch]: handle multiple matches (bb #1110) |
|
4 |
+ * unit_tests: update tests for regex, reenable test |
|
5 |
+ |
|
1 | 6 |
Tue Jul 29 10:47:23 CEST 2008 (tk) |
2 | 7 |
---------------------------------- |
3 | 8 |
* libclamav/matcher-ac.c: add support for returning multiple matches in |
... | ... |
@@ -58,6 +58,7 @@ |
58 | 58 |
#include "jsparse/textbuf.h" |
59 | 59 |
#include "regex_suffix.h" |
60 | 60 |
/* Prototypes */ |
61 |
+static regex_t *new_preg(struct regex_matcher *matcher); |
|
61 | 62 |
static size_t reverse_string(char *pattern); |
62 | 63 |
static int add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, struct regex_list *regex); |
63 | 64 |
static int add_static_pattern(struct regex_matcher *matcher, char* pattern); |
... | ... |
@@ -240,6 +241,7 @@ int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* di |
240 | 240 |
{ |
241 | 241 |
char* orig_real_url = real_url; |
242 | 242 |
struct regex_list *regex; |
243 |
+ struct regex_list *last_match; |
|
243 | 244 |
|
244 | 245 |
assert(matcher); |
245 | 246 |
assert(real_url); |
... | ... |
@@ -259,6 +261,7 @@ int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* di |
259 | 259 |
char *bufrev; |
260 | 260 |
int rc = 0; |
261 | 261 |
struct cli_ac_data mdata; |
262 |
+ struct cli_ac_result *res = NULL; |
|
262 | 263 |
|
263 | 264 |
if(!buffer) |
264 | 265 |
return CL_EMEM; |
... | ... |
@@ -281,30 +284,37 @@ int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* di |
281 | 281 |
reverse_string(bufrev); |
282 | 282 |
rc = SO_search(&matcher->filter, (const unsigned char*)bufrev, buffer_len) != -1; |
283 | 283 |
if(!rc) { |
284 |
+ free(buffer); |
|
285 |
+ free(bufrev); |
|
284 | 286 |
/* filter says this suffix doesn't match. |
285 | 287 |
* The filter has false positives, but no false |
286 | 288 |
* negatives */ |
287 | 289 |
return 0; |
288 | 290 |
} |
289 | 291 |
|
290 |
- rc = cli_ac_scanbuff((const unsigned char*)bufrev,buffer_len, NULL, ®ex, NULL, &matcher->suffixes,&mdata,0,0,-1,NULL,AC_SCAN_VIR,NULL); |
|
292 |
+ rc = cli_ac_scanbuff((const unsigned char*)bufrev,buffer_len, NULL, (void*)®ex, &res, &matcher->suffixes,&mdata,0,0,-1,NULL,AC_SCAN_VIR,NULL); |
|
291 | 293 |
free(bufrev); |
292 | 294 |
cli_ac_freedata(&mdata); |
293 | 295 |
|
294 |
- if(rc) { |
|
295 |
- /* TODO loop over multiple virusnames here */ |
|
296 |
- do { |
|
296 |
+ rc = 0; |
|
297 |
+ while(res) { |
|
298 |
+ struct cli_ac_result *q; |
|
299 |
+ regex = res->customdata; |
|
300 |
+ while(!rc && regex) { |
|
297 | 301 |
/* loop over multiple regexes corresponding to |
298 | 302 |
* this suffix */ |
299 |
- if (!regex->preg.re_magic) { |
|
303 |
+ if (!regex->preg) { |
|
300 | 304 |
/* we matched a static pattern */ |
301 | 305 |
rc = validate_subdomain(regex, pre_fixup, buffer, buffer_len, real_url, real_len, orig_real_url); |
302 | 306 |
} else { |
303 |
- rc = !cli_regexec(®ex->preg, buffer, 0, NULL, 0); |
|
307 |
+ rc = !cli_regexec(regex->preg, buffer, 0, NULL, 0); |
|
304 | 308 |
} |
305 | 309 |
if(rc) *info = regex->pattern; |
306 | 310 |
regex = regex->nxt; |
307 |
- } while(!rc && regex); |
|
311 |
+ } |
|
312 |
+ q = res; |
|
313 |
+ res = res->next; |
|
314 |
+ free(q); |
|
308 | 315 |
} |
309 | 316 |
free(buffer); |
310 | 317 |
if(!rc) |
... | ... |
@@ -510,20 +520,22 @@ void regex_list_done(struct regex_matcher* matcher) |
510 | 510 |
for(i=0;i<matcher->suffix_cnt;i++) { |
511 | 511 |
struct regex_list *r = matcher->suffix_regexes[i]; |
512 | 512 |
while(r) { |
513 |
- cli_regfree(&r->preg); |
|
513 |
+ struct regex_list *q = r; |
|
514 | 514 |
r = r->nxt; |
515 |
+ free(q->pattern); |
|
516 |
+ free(q); |
|
515 | 517 |
} |
516 | 518 |
} |
517 | 519 |
free(matcher->suffix_regexes); |
518 | 520 |
matcher->suffix_regexes = NULL; |
519 | 521 |
} |
520 |
- if(matcher->all_regexes) { |
|
522 |
+ if(matcher->all_pregs) { |
|
521 | 523 |
for(i=0;i<matcher->regex_cnt;i++) { |
522 |
- struct regex_list *r = matcher->all_regexes[i]; |
|
523 |
- free(r->pattern); |
|
524 |
+ regex_t *r = matcher->all_pregs[i]; |
|
525 |
+ cli_regfree(r); |
|
524 | 526 |
free(r); |
525 | 527 |
} |
526 |
- free(matcher->all_regexes); |
|
528 |
+ free(matcher->all_pregs); |
|
527 | 529 |
} |
528 | 530 |
hashtab_free(&matcher->suffix_hash); |
529 | 531 |
matcher->list_built=0; |
... | ... |
@@ -589,12 +601,18 @@ static int add_newsuffix(struct regex_matcher *matcher, struct regex_list *info, |
589 | 589 |
/* ------ load a regex, determine suffix, determine suffix2regexlist map ---- */ |
590 | 590 |
|
591 | 591 |
/* returns 0 on success, clamav error code otherwise */ |
592 |
-static int add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, struct regex_list *regex) |
|
592 |
+static int add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, struct regex_list *iregex) |
|
593 | 593 |
{ |
594 | 594 |
struct regex_matcher *matcher = cbdata; |
595 |
+ struct regex_list *regex = cli_malloc(sizeof(*regex)); |
|
595 | 596 |
const struct element *el; |
596 | 597 |
|
597 | 598 |
assert(matcher); |
599 |
+ if(!regex) |
|
600 |
+ return CL_EMEM; |
|
601 |
+ regex->pattern = iregex->pattern ? cli_strdup(iregex->pattern) : NULL; |
|
602 |
+ regex->preg = iregex->preg; |
|
603 |
+ regex->nxt = NULL; |
|
598 | 604 |
el = hashtab_find(&matcher->suffix_hash, suffix, suffix_len); |
599 | 605 |
/* TODO: what if suffixes are prefixes of eachother and only one will |
600 | 606 |
* match? */ |
... | ... |
@@ -630,45 +648,43 @@ static size_t reverse_string(char *pattern) |
630 | 630 |
return len; |
631 | 631 |
} |
632 | 632 |
|
633 |
-static struct regex_list *new_regex(struct regex_matcher *matcher) |
|
633 |
+static regex_t *new_preg(struct regex_matcher *matcher) |
|
634 | 634 |
{ |
635 |
- struct regex_list *r; |
|
636 |
- matcher->all_regexes = cli_realloc(matcher->all_regexes, ++matcher->regex_cnt * sizeof(*matcher->all_regexes)); |
|
637 |
- if(!matcher->all_regexes) |
|
635 |
+ regex_t *r; |
|
636 |
+ matcher->all_pregs = cli_realloc(matcher->all_pregs, ++matcher->regex_cnt * sizeof(*matcher->all_pregs)); |
|
637 |
+ if(!matcher->all_pregs) |
|
638 | 638 |
return NULL; |
639 | 639 |
r = cli_malloc(sizeof(*r)); |
640 | 640 |
if(!r) |
641 | 641 |
return NULL; |
642 |
- matcher->all_regexes[matcher->regex_cnt-1] = r; |
|
642 |
+ matcher->all_pregs[matcher->regex_cnt-1] = r; |
|
643 | 643 |
return r; |
644 | 644 |
} |
645 | 645 |
|
646 | 646 |
static int add_static_pattern(struct regex_matcher *matcher, char* pattern) |
647 | 647 |
{ |
648 | 648 |
size_t len; |
649 |
- struct regex_list *regex = new_regex(matcher); |
|
650 |
- if(!regex) |
|
651 |
- return CL_EMEM; |
|
649 |
+ struct regex_list regex; |
|
650 |
+ int rc; |
|
651 |
+ |
|
652 | 652 |
len = reverse_string(pattern); |
653 |
- regex->nxt = NULL; |
|
654 |
- regex->pattern = cli_strdup(pattern); |
|
655 |
- regex->preg.re_magic = 0; |
|
656 |
- return add_pattern_suffix(matcher, pattern, len, regex); |
|
653 |
+ regex.nxt = NULL; |
|
654 |
+ regex.pattern = cli_strdup(pattern); |
|
655 |
+ regex.preg = NULL; |
|
656 |
+ rc = add_pattern_suffix(matcher, pattern, len, ®ex); |
|
657 |
+ free(regex.pattern); |
|
658 |
+ return rc; |
|
657 | 659 |
} |
658 | 660 |
|
659 | 661 |
int regex_list_add_pattern(struct regex_matcher *matcher, char *pattern) |
660 | 662 |
{ |
661 | 663 |
int rc; |
662 |
- struct regex_list *regex = new_regex(matcher); |
|
664 |
+ struct regex_list regex; |
|
663 | 665 |
size_t len; |
664 | 666 |
/* we only match the host, so remove useless stuff */ |
665 | 667 |
const char remove_end[] = "([/?].*)?/"; |
666 | 668 |
const char remove_end2[] = "([/?].*)/"; |
667 | 669 |
|
668 |
- |
|
669 |
- if(!regex) |
|
670 |
- return CL_EMEM; |
|
671 |
- |
|
672 | 670 |
len = strlen(pattern); |
673 | 671 |
if(len > sizeof(remove_end)) { |
674 | 672 |
if(strncmp(&pattern[len - sizeof(remove_end)+1], remove_end, sizeof(remove_end)-1) == 0) { |
... | ... |
@@ -681,12 +697,15 @@ int regex_list_add_pattern(struct regex_matcher *matcher, char *pattern) |
681 | 681 |
} |
682 | 682 |
} |
683 | 683 |
pattern[len] = '\0'; |
684 |
- regex->pattern = NULL; |
|
684 |
+ regex.pattern = NULL; |
|
685 |
+ |
|
686 |
+ regex.preg = new_preg(matcher); |
|
687 |
+ if(!regex.preg) |
|
688 |
+ return CL_EMEM; |
|
685 | 689 |
|
686 |
- rc = cli_regex2suffix(pattern, regex, add_pattern_suffix, matcher); |
|
690 |
+ rc = cli_regex2suffix(pattern, ®ex, add_pattern_suffix, matcher); |
|
687 | 691 |
if(rc) { |
688 |
- cli_regfree(®ex->preg); |
|
689 |
- free(regex); |
|
692 |
+ cli_regfree(regex.preg); |
|
690 | 693 |
} |
691 | 694 |
|
692 | 695 |
return rc; |
... | ... |
@@ -415,12 +415,12 @@ int cli_regex2suffix(const char *pattern, struct regex_list *regex, suffix_callb |
415 | 415 |
|
416 | 416 |
assert(regex && pattern); |
417 | 417 |
|
418 |
- rc = cli_regcomp(®ex->preg, pattern, REG_EXTENDED); |
|
418 |
+ rc = cli_regcomp(regex->preg, pattern, REG_EXTENDED); |
|
419 | 419 |
if(rc) { |
420 |
- size_t buflen = cli_regerror(rc, ®ex->preg, NULL, 0); |
|
420 |
+ size_t buflen = cli_regerror(rc, regex->preg, NULL, 0); |
|
421 | 421 |
char *errbuf = cli_malloc(buflen); |
422 | 422 |
if(errbuf) { |
423 |
- cli_regerror(rc, ®ex->preg, errbuf, buflen); |
|
423 |
+ cli_regerror(rc, regex->preg, errbuf, buflen); |
|
424 | 424 |
cli_errmsg(MODULE "Error compiling regular expression %s: %s\n", pattern, errbuf); |
425 | 425 |
free(errbuf); |
426 | 426 |
} else { |
... | ... |
@@ -428,10 +428,8 @@ int cli_regex2suffix(const char *pattern, struct regex_list *regex, suffix_callb |
428 | 428 |
} |
429 | 429 |
return rc; |
430 | 430 |
} |
431 |
-#ifdef CL_DEBUG |
|
432 |
- regex->pattern = cli_strdup(pattern); |
|
433 |
-#endif |
|
434 | 431 |
regex->nxt = NULL; |
432 |
+ regex->pattern = cli_strdup(pattern); |
|
435 | 433 |
|
436 | 434 |
n = parse_regex(pattern, &last); |
437 | 435 |
if(!n) |
... | ... |
@@ -441,6 +439,7 @@ int cli_regex2suffix(const char *pattern, struct regex_list *regex, suffix_callb |
441 | 441 |
n->parent = &root_node; |
442 | 442 |
|
443 | 443 |
rc = build_suffixtree_descend(n, &buf, cb, cbdata, regex); |
444 |
+ free(regex->pattern); |
|
444 | 445 |
free(buf.data); |
445 | 446 |
destroy_tree(n); |
446 | 447 |
return rc; |
... | ... |
@@ -333,7 +333,7 @@ int main(int argc, char **argv) |
333 | 333 |
srunner_add_suite(sr, test_cli_suite()); |
334 | 334 |
srunner_add_suite(sr, test_jsnorm_suite()); |
335 | 335 |
srunner_add_suite(sr, test_str_suite()); |
336 |
- /* srunner_add_suite(sr, test_regex_suite()); */ |
|
336 |
+ srunner_add_suite(sr, test_regex_suite()); |
|
337 | 337 |
srunner_add_suite(sr, test_disasm_suite()); |
338 | 338 |
|
339 | 339 |
srunner_set_log(sr, "test.log"); |
... | ... |
@@ -61,7 +61,10 @@ START_TEST (empty) |
61 | 61 |
const char pattern[] = ""; |
62 | 62 |
int rc; |
63 | 63 |
errmsg_expected(); |
64 |
+ regex.preg = malloc(sizeof(*regex.preg)); |
|
65 |
+ fail_unless(!!regex.preg, "malloc"); |
|
64 | 66 |
rc = cli_regex2suffix(pattern, ®ex, cb_fail, NULL); |
67 |
+ free(regex.preg); |
|
65 | 68 |
fail_unless(rc == REG_EMPTY, "empty pattern"); |
66 | 69 |
fail_unless(cb_called == 0, "callback shouldn't be called"); |
67 | 70 |
} |
... | ... |
@@ -71,9 +74,12 @@ START_TEST (one) |
71 | 71 |
{ |
72 | 72 |
const char pattern[] = "a"; |
73 | 73 |
int rc; |
74 |
+ regex.preg = malloc(sizeof(*regex.preg)); |
|
75 |
+ fail_unless(!!regex.preg, "malloc"); |
|
74 | 76 |
rc = cli_regex2suffix(pattern, ®ex, cb_expect_single, "a"); |
75 | 77 |
fail_unless(rc == 0, "single character pattern"); |
76 |
- cli_regfree(®ex.preg); |
|
78 |
+ cli_regfree(regex.preg); |
|
79 |
+ free(regex.preg); |
|
77 | 80 |
fail_unless(cb_called == 1, "callback should be called once"); |
78 | 81 |
} |
79 | 82 |
END_TEST |
... | ... |
@@ -111,9 +117,12 @@ START_TEST (test_suffix) |
111 | 111 |
const char **p=tests[_i]; |
112 | 112 |
|
113 | 113 |
fail_unless(!!pattern, "test pattern"); |
114 |
+ regex.preg = malloc(sizeof(*regex.preg)); |
|
115 |
+ fail_unless(!!regex.preg, "malloc"); |
|
114 | 116 |
rc = cli_regex2suffix(pattern, ®ex, cb_expect_multi, tests[_i]); |
115 | 117 |
fail_unless(rc == 0, "single character pattern"); |
116 |
- cli_regfree(®ex.preg); |
|
118 |
+ cli_regfree(regex.preg); |
|
119 |
+ free(regex.preg); |
|
117 | 120 |
p++; |
118 | 121 |
while(*p++) n++; |
119 | 122 |
fail_unless(cb_called == n, |
... | ... |
@@ -128,7 +137,6 @@ static void setup(void) |
128 | 128 |
|
129 | 129 |
static void teardown(void) |
130 | 130 |
{ |
131 |
- free(regex.pattern); |
|
132 | 131 |
} |
133 | 132 |
|
134 | 133 |
static struct regex_matcher matcher; |
... | ... |
@@ -154,7 +162,11 @@ static const struct rtest { |
154 | 154 |
{".+\\.ebayrtm\\.com([/?].*)?:.+\\.ebay\\.(de|com|co\\.uk)([/?].*)?/", |
155 | 155 |
"http://srx.main.ebayrtm.com", |
156 | 156 |
"pages.ebay.de", |
157 |
- 1 /* should be whitelisted */} |
|
157 |
+ 1 /* should be whitelisted */}, |
|
158 |
+ {".+\\.ebayrtm\\.com([/?].*)?:.+\\.ebay\\.(de|com|co\\.uk)([/?].*)?/", |
|
159 |
+ "http://srx.main.ebayrtm.com.evil.example.com", |
|
160 |
+ "pages.ebay.de", |
|
161 |
+ 0} |
|
158 | 162 |
}; |
159 | 163 |
|
160 | 164 |
START_TEST (regex_list_match_test) |