git-svn: trunk@3039
Török Edvin authored on 2007/04/29 05:15:22... | ... |
@@ -1,3 +1,8 @@ |
1 |
+Sat Apr 28 22:26:00 EEST 2007 (edwin) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav/regex_list.c: update code to use new AC matcher |
|
4 |
+ * libclamav/htmlnorm.c: fix URL truncation |
|
5 |
+ |
|
1 | 6 |
Sat Apr 28 19:51:22 CEST 2007 (tk) |
2 | 7 |
---------------------------------- |
3 | 8 |
* libclamav: new implementation of the Aho-Corasick pattern matcher: |
... | ... |
@@ -648,7 +648,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
648 | 648 |
case HTML_NORM: |
649 | 649 |
if (*ptr == '<') { |
650 | 650 |
#ifdef CL_EXPERIMENTAL |
651 |
- ptrend=ptr-1; /* for use by scanContents */ |
|
651 |
+ ptrend=ptr; /* for use by scanContents */ |
|
652 | 652 |
#endif |
653 | 653 |
html_output_c(file_buff_o1, file_buff_o2, '<'); |
654 | 654 |
if (in_script) { |
... | ... |
@@ -1513,7 +1513,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
1513 | 1513 |
#ifdef CL_EXPERIMENTAL |
1514 | 1514 |
if(hrefs && hrefs->scanContents && in_ahref && href_contents_begin) |
1515 | 1515 |
/* end of line, append contents now, resume on next line */ |
1516 |
- html_tag_contents_append(hrefs,in_ahref,href_contents_begin,ptr-1); |
|
1516 |
+ html_tag_contents_append(hrefs,in_ahref,href_contents_begin,ptr); |
|
1517 | 1517 |
ptrend = NULL; |
1518 | 1518 |
#endif |
1519 | 1519 |
free(line); |
... | ... |
@@ -275,7 +275,9 @@ int regex_list_match(struct regex_matcher* matcher,const char* real_url,const ch |
275 | 275 |
rc = match_node(hostOnly ? matcher->root_regex_hostonly : matcher->root_regex,(unsigned char*)buffer,buffer_len,info) == MATCH_SUCCESS ? CL_VIRUS : CL_SUCCESS; |
276 | 276 |
free(buffer); |
277 | 277 |
if(!rc) |
278 |
- cli_dbgmsg("not in regex list\n"); |
|
278 |
+ cli_dbgmsg("Lookup result: not in regex list\n"); |
|
279 |
+ else |
|
280 |
+ cli_dbgmsg("Lookup result: in regex list\n"); |
|
279 | 281 |
return rc; |
280 | 282 |
} |
281 | 283 |
} |
... | ... |
@@ -551,6 +553,7 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio |
551 | 551 |
return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB; |
552 | 552 |
} |
553 | 553 |
else if( ( buffer[0] == 'H' && !is_whitelist) || (buffer[0] == 'M' && is_whitelist)) {/*matches displayed host*/ |
554 |
+ struct cli_matcher* root; |
|
554 | 555 |
if(matcher->list_built) { |
555 | 556 |
struct cli_matcher* old_hosts = matcher->root_hosts; |
556 | 557 |
matcher->root_hosts_cnt++; |
... | ... |
@@ -560,16 +563,22 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio |
560 | 560 |
matcher->root_hosts = old_hosts;/* according to manpage this must still be valid*/ |
561 | 561 |
return CL_EMEM; |
562 | 562 |
} |
563 |
- memset(&matcher->root_hosts[matcher->root_hosts_cnt-1], 0, sizeof(struct cli_matcher)); |
|
564 |
- matcher->root_hosts[matcher->root_hosts_cnt-1].ac_root = cli_calloc(1, sizeof(struct cli_ac_node)); |
|
565 |
- if(!matcher->root_hosts[matcher->root_hosts_cnt-1].ac_root) { |
|
566 |
- matcher->root_hosts_cnt--; |
|
567 |
- return CL_EMEM; |
|
568 |
- } |
|
569 |
- cli_dbgmsg("Increased number of root_hosts in regex_list.c\n"); |
|
563 |
+ |
|
564 |
+ root = &matcher->root_hosts[matcher->root_hosts_cnt-1]; |
|
565 |
+ memset(root, 0, sizeof(struct cli_matcher)); |
|
566 |
+ |
|
567 |
+ cli_dbgmsg("regex_list: Initialising AC pattern matcher\n"); |
|
568 |
+ if((rc = cli_ac_init(root, AC_DEFAULT_MIN_DEPTH, AC_DEFAULT_MAX_DEPTH))) { |
|
569 |
+ /* no need to free previously allocated memory here */ |
|
570 |
+ cli_errmsg("regex_list: Can't initialise AC pattern matcher\n"); |
|
571 |
+ return rc; |
|
572 |
+ } |
|
570 | 573 |
matcher->list_built = 0; |
571 | 574 |
} |
572 |
- if(( rc = add_regex_list_element(&matcher->root_hosts[matcher->root_hosts_cnt-1],pattern,flags) )) |
|
575 |
+ else { |
|
576 |
+ root = &matcher->root_hosts[matcher->root_hosts_cnt-1]; |
|
577 |
+ } |
|
578 |
+ if(( rc = add_regex_list_element(root,pattern,flags) )) |
|
573 | 579 |
return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB; |
574 | 580 |
} |
575 | 581 |
else { |
... | ... |
@@ -627,7 +636,7 @@ void regex_list_done(struct regex_matcher* matcher) |
627 | 627 |
if(matcher->list_loaded) { |
628 | 628 |
if(matcher->root_hosts) { |
629 | 629 |
size_t i; |
630 |
- for(i=0;i<matcher->root_hosts_cnt;i++) |
|
630 |
+ for(i=0;i<matcher->root_hosts_cnt;i++) |
|
631 | 631 |
cli_ac_free(&matcher->root_hosts[i]); |
632 | 632 |
free(matcher->root_hosts); |
633 | 633 |
matcher->root_hosts=NULL; |