Browse code

Update code to use new AC matcher. Fix URL truncation.

git-svn: trunk@3039

Török Edvin authored on 2007/04/29 05:15:22
Showing 3 changed files
... ...
@@ -1,3 +1,8 @@
1
+Sat Apr 28 22:26:00 EEST 2007 (edwin)
2
+----------------------------------
3
+  * libclamav/regex_list.c: update code to use new AC matcher
4
+  * libclamav/htmlnorm.c: fix URL truncation
5
+
1 6
 Sat Apr 28 19:51:22 CEST 2007 (tk)
2 7
 ----------------------------------
3 8
   * libclamav: new implementation of the Aho-Corasick pattern matcher:
... ...
@@ -648,7 +648,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
648 648
 			case HTML_NORM:
649 649
 				if (*ptr == '<') {
650 650
 #ifdef CL_EXPERIMENTAL
651
-					ptrend=ptr-1; /* for use by scanContents */
651
+					ptrend=ptr; /* for use by scanContents */
652 652
 #endif
653 653
 					html_output_c(file_buff_o1, file_buff_o2, '<');
654 654
 					if (in_script) {
... ...
@@ -1513,7 +1513,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1513 1513
 #ifdef CL_EXPERIMENTAL
1514 1514
 		if(hrefs && hrefs->scanContents && in_ahref && href_contents_begin)
1515 1515
 			/* end of line, append contents now, resume on next line */
1516
-			html_tag_contents_append(hrefs,in_ahref,href_contents_begin,ptr-1);
1516
+			html_tag_contents_append(hrefs,in_ahref,href_contents_begin,ptr);
1517 1517
 		ptrend = NULL;
1518 1518
 #endif
1519 1519
 		free(line);
... ...
@@ -275,7 +275,9 @@ int regex_list_match(struct regex_matcher* matcher,const char* real_url,const ch
275 275
 			rc = match_node(hostOnly ? matcher->root_regex_hostonly : matcher->root_regex,(unsigned char*)buffer,buffer_len,info) == MATCH_SUCCESS ? CL_VIRUS : CL_SUCCESS;
276 276
 		free(buffer);
277 277
 		if(!rc)
278
-			cli_dbgmsg("not in regex list\n");
278
+			cli_dbgmsg("Lookup result: not in regex list\n");
279
+		else
280
+			cli_dbgmsg("Lookup result: in regex list\n");
279 281
 		return rc;
280 282
 	}
281 283
 }
... ...
@@ -551,6 +553,7 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio
551 551
 				return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;
552 552
 		}
553 553
 		else if( ( buffer[0] == 'H' && !is_whitelist) || (buffer[0] == 'M' && is_whitelist)) {/*matches displayed host*/
554
+			struct cli_matcher* root;
554 555
  			if(matcher->list_built) {
555 556
  				struct cli_matcher* old_hosts = matcher->root_hosts;
556 557
  				matcher->root_hosts_cnt++;
... ...
@@ -560,16 +563,22 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio
560 560
  					matcher->root_hosts = old_hosts;/* according to manpage this must still be valid*/
561 561
  					return CL_EMEM;
562 562
 				} 
563
- 				memset(&matcher->root_hosts[matcher->root_hosts_cnt-1], 0, sizeof(struct cli_matcher));
564
- 				matcher->root_hosts[matcher->root_hosts_cnt-1].ac_root = cli_calloc(1, sizeof(struct cli_ac_node));
565
- 				if(!matcher->root_hosts[matcher->root_hosts_cnt-1].ac_root) {
566
- 					matcher->root_hosts_cnt--;
567
- 					return CL_EMEM;
568
- 				}
569
- 				cli_dbgmsg("Increased number of root_hosts in regex_list.c\n");
563
+
564
+				root = &matcher->root_hosts[matcher->root_hosts_cnt-1];
565
+ 				memset(root, 0, sizeof(struct cli_matcher));
566
+
567
+				cli_dbgmsg("regex_list: Initialising AC pattern matcher\n");
568
+				if((rc = cli_ac_init(root, AC_DEFAULT_MIN_DEPTH, AC_DEFAULT_MAX_DEPTH))) {
569
+					/* no need to free previously allocated memory here */
570
+					cli_errmsg("regex_list: Can't initialise AC pattern matcher\n");
571
+					return rc;
572
+				}
570 573
  				matcher->list_built = 0;
571 574
  			}
572
- 			if(( rc = add_regex_list_element(&matcher->root_hosts[matcher->root_hosts_cnt-1],pattern,flags) ))
575
+			else {
576
+				root = &matcher->root_hosts[matcher->root_hosts_cnt-1];
577
+			}
578
+ 			if(( rc = add_regex_list_element(root,pattern,flags) ))
573 579
 				return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;
574 580
 		}
575 581
 		else {
... ...
@@ -627,7 +636,7 @@ void regex_list_done(struct regex_matcher* matcher)
627 627
 	if(matcher->list_loaded) {
628 628
 		if(matcher->root_hosts) {
629 629
 			size_t i;
630
-			for(i=0;i<matcher->root_hosts_cnt;i++)
630
+			for(i=0;i<matcher->root_hosts_cnt;i++) 
631 631
 				cli_ac_free(&matcher->root_hosts[i]);
632 632
 			free(matcher->root_hosts);
633 633
 			matcher->root_hosts=NULL;