Browse code

phishing: fixed bugs and updated docs

git-svn: trunk@2282

aCaB authored on 2006/09/17 00:49:27
Showing 5 changed files
... ...
@@ -1,3 +1,8 @@
1
+Sat Sep 16 17:46:49 CEST 2006 (acab)
2
+------------------------------------
3
+  * phishing: fixed string truncation, crashes and updated relevant
4
+              documentation (patch from Edvin)
5
+
1 6
 Sat Sep 16 14:30:29 CEST 2006 (acab)
2 7
 ------------------------------------
3 8
   * libclamav/petite.h: fixed inconsistent function declaration.
... ...
@@ -63,6 +63,16 @@ RealURL\InsetSpace ~
63 63
 DisplayedURL
64 64
 \end_layout
65 65
 
66
+\begin_layout Standard
67
+or:
68
+\end_layout
69
+
70
+\begin_layout Standard
71
+
72
+\series bold
73
+H RealURL
74
+\end_layout
75
+
66 76
 \begin_layout Itemize
67 77
 Where 
68 78
 \noun on
... ...
@@ -82,7 +92,11 @@ R regex, has to match entire url, see section
82 82
 \end_layout
83 83
 
84 84
 \begin_layout Description
85
-H has to match the host part of url only (a simple pattern, i.e.
85
+H has to match the host part of 
86
+\noun on
87
+realURL 
88
+\noun default
89
+only (a simple pattern, i.e.
86 90
  it is matched literally)
87 91
 \end_layout
88 92
 
... ...
@@ -138,7 +152,8 @@ realURL
138 138
 \noun default
139 139
 , and its contents is the 
140 140
 \noun on
141
-displayedURL
141
+displayedURL.
142
+ 
142 143
 \end_layout
143 144
 
144 145
 \begin_layout Itemize
... ...
@@ -242,7 +257,23 @@ H
242 242
 \noun default
243 243
  
244 244
 \series default
245
-flag, then the 2nd href will match too.
245
+flag, then a line like:
246
+\end_layout
247
+
248
+\begin_layout Quote
249
+H paypal.com
250
+\end_layout
251
+
252
+\begin_layout Standard
253
+Will match <a href=
254
+\begin_inset Quotes erd
255
+\end_inset
256
+
257
+http://owned.com
258
+\begin_inset Quotes erd
259
+\end_inset
260
+
261
+>paypal.com</a>.
246 262
 \end_layout
247 263
 
248 264
 \begin_layout Subsubsection
249 265
Binary files a/clamav-devel/docs/phishsigs_howto.pdf and b/clamav-devel/docs/phishsigs_howto.pdf differ
... ...
@@ -19,6 +19,9 @@
19 19
  *  MA 02110-1301, USA.
20 20
  *
21 21
  *  $Log: phishcheck.c,v $
22
+ *  Revision 1.6  2006/09/16 15:49:27  acab
23
+ *  phishing: fixed bugs and updated docs
24
+ *
22 25
  *  Revision 1.5  2006/09/16 05:59:14  njh
23 26
  *  Fixed compiler warning
24 27
  *
... ...
@@ -639,7 +642,7 @@ str_strip(char **begin, const char **end, const char *what, size_t what_len)
639 639
 
640 640
 	/* strip trailing @what */
641 641
 	if(what_len <= (size_t)(str_end - sbegin)) {
642
-		str_end_what = str_end - what_len;
642
+		str_end_what = str_end - what_len + 1;
643 643
 		while((str_end_what > sbegin) &&
644 644
 		      (strncmp(str_end_what, what, what_len) == 0)) {
645 645
 			str_end -= what_len;
... ...
@@ -648,8 +651,8 @@ str_strip(char **begin, const char **end, const char *what, size_t what_len)
648 648
 	}
649 649
 
650 650
 	*begin = sbegin++;
651
-	while(sbegin+what_len < str_end) {
652
-		while(sbegin+what_len<str_end && !strncmp(sbegin,what,what_len)) {
651
+	while(sbegin+what_len <= str_end) {
652
+		while(sbegin+what_len<=str_end && !strncmp(sbegin,what,what_len)) {
653 653
 			const char* src = sbegin+what_len;
654 654
 			/* move string */
655 655
 			memmove(sbegin,src,str_end-src+1);
... ...
@@ -19,6 +19,9 @@
19 19
  *  MA 02110-1301, USA.
20 20
  *
21 21
  *  $Log: regex_list.c,v $
22
+ *  Revision 1.2  2006/09/16 15:49:27  acab
23
+ *  phishing: fixed bugs and updated docs
24
+ *
22 25
  *  Revision 1.1  2006/09/12 19:38:39  acab
23 26
  *  Phishing module merge - libclamav
24 27
  *
... ...
@@ -232,9 +235,11 @@ int regex_list_match(struct regex_matcher* matcher,const char* real_url,const ch
232 232
 			return CL_EMEM;
233 233
 
234 234
 		strncpy(buffer,real_url,real_len);
235
-		buffer[real_len]=' ';
235
+		buffer[real_len]=hostOnly ? '\0' : ' ';
236
+		if(!hostOnly) {
236 237
 		strncpy(buffer+real_len+1,display_url,display_len);
237 238
 		buffer[buffer_len]=0;
239
+		}
238 240
 		cli_dbgmsg("Looking up in regex_list: %s\n");
239 241
 
240 242
 		rc = cli_ac_scanbuff(buffer,buffer_len,info,hostOnly ? matcher->root_hosts : matcher->root_urls,&partcnt,0,0,&partoff,0,-1,NULL);
... ...
@@ -359,7 +364,7 @@ int init_regex_list(struct regex_matcher* matcher)
359 359
  * although the name might be confusing, @pattern is not a regex!*/
360 360
 static int add_regex_list_element(struct cli_matcher* root,const char* pattern,char* info)
361 361
 {
362
-       int ret;
362
+       int ret,i;
363 363
        struct cli_ac_patt *new = cli_calloc(1,sizeof(*new));
364 364
        size_t len;
365 365
 
... ...
@@ -386,7 +391,8 @@ static int add_regex_list_element(struct cli_matcher* root,const char* pattern,c
386 386
 	       free(new);
387 387
 	       return CL_EMEM;
388 388
        }
389
-       strncpy((char*)new->pattern,(const char*)pattern,len);
389
+       for(i=0;i<len;i++)
390
+	       new->pattern[i]=pattern[i];/*new->pattern is short int* */
390 391
 
391 392
        new->virname = info;
392 393
        if((ret = cli_ac_addpatt(root,new))) {
... ...
@@ -462,7 +468,7 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio
462 462
 			return CL_EMALFDB;
463 463
 		}
464 464
 		pattern[0]='\0';
465
-		flags=buffer+1;
465
+		flags=strdup(buffer+1);
466 466
 		pattern++;
467 467
 		if(buffer[0] == 'R') {
468 468
 			if(( rc = add_pattern(matcher,(const unsigned char*)pattern,flags) ))
... ...
@@ -724,7 +730,7 @@ struct token_t
724 724
 	size_t len;
725 725
 	char   type;
726 726
 	union {
727
-		const unsigned char* start;
727
+		unsigned char* start;
728 728
 		char_bitmap_p        bitmap;
729 729
 	} u;
730 730
 };
... ...
@@ -740,7 +746,7 @@ static const unsigned char* getNextToken(const unsigned char* pat,struct token_t
740 740
 		case '\\':
741 741
 			token->type=TOKEN_CHAR;
742 742
 			token->u.start = ++pat;
743
-			if(islower(token->u.start)) {
743
+			if(islower(*token->u.start)) {
744 744
 				/* handle \n, \t, etc. */
745 745
 				char c;
746 746
 				if(snprintf(&c,1,"\%c",token->u.start)!=1)
... ...
@@ -1239,6 +1245,8 @@ static int match_node(struct tree_node* node,const unsigned char* c,size_t len,c
1239 1239
 	assert(c);
1240 1240
 	assert(info);
1241 1241
 
1242
+	if(!node->u.children)
1243
+		return MATCH_FAILED;/* tree empty */
1242 1244
 	*info = NULL;
1243 1245
 	len++;
1244 1246
 	c--;