Browse code

Fixed some NULL pointers

git-svn: trunk@2259

Nigel Horne authored on 2006/09/14 18:02:25
Showing 3 changed files
... ...
@@ -1,3 +1,7 @@
1
+Thu Sep 14 10:01:21 BST 2006 (njh)
2
+----------------------------------
3
+  * libclamav:	Phixed some buffer underruns and NULL pointers
4
+
1 5
 Thu Sep 14 09:12:03 BST 2006 (trog)
2 6
 -----------------------------------
3 7
   * libclamav/unrar/unrar.c, unrar20.c, unrarppm.c: improve handling of
... ...
@@ -15,7 +19,7 @@ Thu Sep 14 00:35:56 CEST 2006 (acab)
15 15
 Wed Sep 13 22:38:22 BST 2006 (njh)
16 16
 ----------------------------------
17 17
   * libclamav/mbox.c:	Committed ACAB's merge of Edvin's Phish code,
18
-  			configure --enable-experimental to use it.
18
+			configure --enable-experimental to use it.
19 19
 
20 20
 Wed Sep 13 19:41:20 CEST 2006 (acab)
21 21
 ------------------------------------
... ...
@@ -19,6 +19,9 @@
19 19
  *  MA 02110-1301, USA.
20 20
  *
21 21
  *  $Log: phishcheck.c,v $
22
+ *  Revision 1.2  2006/09/14 08:59:37  njh
23
+ *  Fixed some NULL pointers
24
+ *
22 25
  *  Revision 1.1  2006/09/12 19:38:39  acab
23 26
  *  Phishing module merge - libclamav
24 27
  *
... ...
@@ -68,7 +71,7 @@
68 68
  *  Regex bracket handling update.
69 69
  *  Better regex paranthesized & alternate expression handling.
70 70
  *
71
- 
71
+
72 72
 case CL_PHISH_HOST_NOT_LISTED:
73 73
  return "Host not listed in .pdb -> not checked";*  Revision 1.19  2006/07/31 20:12:30  edwin
74 74
  *  Preliminary support for domain databases (domains to check by phishmodule)
... ...
@@ -128,16 +131,16 @@ case CL_PHISH_HOST_NOT_LISTED:
128 128
 
129 129
 #define PHISHY_USERNAME_IN_URL 1
130 130
 #define PHISHY_NUMERIC_IP      2
131
-#define REAL_IS_MAILTO         4
131
+#define REAL_IS_MAILTO		 4
132 132
 /* this is just a flag, so that the displayed url will be parsed as mailto too, for example
133 133
  * <a href='mailto:somebody@yahoo.com'>to:somebody@yahoo.com</a>*/
134
-#define DOMAIN_LISTED          8
135
-#define PHISHY_CLOAKED_NULL    16
136
-#define PHISHY_HEX_URL         32
134
+#define DOMAIN_LISTED		 8
135
+#define PHISHY_CLOAKED_NULL	16
136
+#define PHISHY_HEX_URL		32
137 137
 
138 138
 
139 139
 /*
140
-* Phishing design documentation, 
140
+* Phishing design documentation,
141 141
 (initially written at http://wiki.clamav.net/index.php/phishing_design as discussed with aCaB)
142 142
 
143 143
 *Warning*: if flag *--phish-scan-alldomains* (or equivalent clamd/clamav-milter config option) isn't given, then phishing scanning is done only for domains listed in daily.pdb.
... ...
@@ -212,6 +215,7 @@ For the Whitelist(.wdb)/Domainlist(.pdb) format see regex_list.c (search for Fla
212 212
  *
213 213
  */
214 214
 static char empty_string[]="";
215
+static	char	*rfind(const char *start, char c, size_t len);
215 216
 
216 217
 void url_check_init(struct url_check* urls)
217 218
 {
... ...
@@ -229,7 +233,7 @@ void url_check_init(struct url_check* urls)
229 229
 
230 230
 inline void string_free(struct string* str)
231 231
 {
232
-	for(;;){ 
232
+	for(;;){
233 233
 		str->refcount--;
234 234
 		if(!str->refcount) {
235 235
 			if(str->ref)/* don't free, this is a portion of another string */
... ...
@@ -240,7 +244,7 @@ inline void string_free(struct string* str)
240 240
 			}
241 241
 		}
242 242
 		else break;
243
-	} 
243
+	}
244 244
 }
245 245
 
246 246
 /* always use the string_assign when assigning to a string, this makes sure the old one's refcount is decremented*/
... ...
@@ -355,7 +359,7 @@ void get_host(struct string* dest,const char* URL,int isReal,int* phishy)
355 355
 			/* it is not required to use mailto: in the displayed url, they might use to:, or whatever */
356 356
 			end = URL+strlen(URL)+1;
357 357
 			start = URL + strcspn(URL,": ")+1;
358
-			if (start==end) 
358
+			if (start==end)
359 359
 				start = URL;
360 360
 			ismailto = 1;
361 361
 		}
... ...
@@ -371,14 +375,23 @@ void get_host(struct string* dest,const char* URL,int isReal,int* phishy)
371 371
 			else ismailto=2;/*no-protocol, might be mailto, @ is no problem*/
372 372
 		}
373 373
 	}
374
-	else start += 3;/* :// */
375
-	
376
-	if(!ismailto || !isReal) { 
377
-		const char* realhost;
374
+	else
375
+		start += 3;	/* :// */
376
+
377
+	if(!ismailto || !isReal) {
378
+		const char *realhost;
379
+
378 380
 		do {
379
-			end	 = start+strcspn(start,":/?");
381
+			end  = start + strcspn(start,":/?");
380 382
 			realhost = strchr(start,'@');
381
-			if(start!=end && realhost>end) realhost = NULL;/*don't check beyond end of hostname*/
383
+
384
+			if(realhost == NULL)
385
+				break;
386
+
387
+			if(start!=end && realhost>end)
388
+				/*don't check beyond end of hostname*/
389
+				realhost = NULL;
390
+
382 391
 			if(realhost) {
383 392
 				const char* tld = strrchr(realhost,'.');
384 393
 				if(tld && isTLD(tld,tld-realhost-1))
... ...
@@ -388,13 +401,13 @@ void get_host(struct string* dest,const char* URL,int isReal,int* phishy)
388 388
 			}
389 389
 		} while(realhost);/*skip over multiple @ characters, text following last @ character is the real host*/
390 390
 	}
391
-	else 
391
+	else
392 392
 	if (ismailto && isReal)
393 393
 		*phishy |= REAL_IS_MAILTO;
394 394
 
395 395
 	if(!end) {
396 396
 		end  = start+strcspn(start,":/?");/*especially important for mailto:somebody@yahoo.com?subject=...*/
397
-		if(!end) 
397
+		if(!end)
398 398
 			end  = start + strlen(start);
399 399
 	}
400 400
 
... ...
@@ -440,11 +453,17 @@ int isTLD(const char* str,int len)
440 440
 /*
441 441
  * memrchr isn't standard, so I use this
442 442
  */
443
-char* rfind(char* start,char c,size_t len)
443
+static char *
444
+rfind(const char *start, char c, size_t len)
444 445
 {
445
-	char* p;
446
-	for(p=start+len;p>=start && *p!=c;p--);
447
-	return p<start ? NULL : p;
446
+	const char *p;
447
+
448
+	if(start == NULL)
449
+		return NULL;
450
+
451
+	for(p = start + len; (p >= start) && (*p != c); p--)
452
+		;
453
+	return (p < start) ? NULL : p;
448 454
 }
449 455
 
450 456
 void get_domain(struct string* dest,struct string* host)
... ...
@@ -522,7 +541,7 @@ int isNumeric(const char* host)
522 522
 	/* 1.2.3.4 -> 7*/
523 523
 	/* 127.127.127.127 -> 15*/
524 524
 	if(len<7 || len>15)
525
-		return 0;	
525
+		return 0;
526 526
 	sscanf(host,"%d.%d.%d.%d%n",&a,&b,&c,&d,&n);
527 527
 	if(n==len)
528 528
 		if(a>=0 && a<=256 && b>=0 && b<=256 && c>=0 && c<=256 && d>=0 && d<=256)
... ...
@@ -545,13 +564,19 @@ static inline char hex2int(const unsigned char* src)
545 545
 }
546 546
 
547 547
 
548
-/* deletes @what from the string @begin. 
548
+/* deletes @what from the string @begin.
549 549
  * @what_len: length of @what, excluding the terminating \0 */
550
-static void str_hex_to_char(char** begin,const char** end)
550
+static void
551
+str_hex_to_char(char **begin, const char **end)
551 552
 {
552
-	char* sbegin = *begin;
553
-	const char* str_end = *end;
553
+	char *sbegin = *begin;
554
+	const char *str_end = *end;
555
+
554 556
 	assert(str_end>sbegin);
557
+
558
+	if(strlen(sbegin) <= 2)
559
+		return;
560
+
555 561
 	/* convert leading %xx*/
556 562
 	if (sbegin[0] == '%') {
557 563
 		sbegin[2] = hex2int((unsigned char*)sbegin+1);
... ...
@@ -570,30 +595,49 @@ static void str_hex_to_char(char** begin,const char** end)
570 570
 	}
571 571
 	*end = str_end;
572 572
 }
573
-/* deletes @what from the string @begin. 
574
- * @what_len: length of @what, excluding the terminating \0 */
575
-static void str_strip(char** begin,const char** end,const char* what,size_t what_len)
573
+
574
+/*
575
+ * deletes @what from the string @begin.
576
+ * @what_len: length of @what, excluding the terminating \0
577
+ */
578
+static void
579
+str_strip(char **begin, const char **end, const char *what, size_t what_len)
576 580
 {
577
-	char* sbegin = *begin;
578
-	const char* str_end = *end;
579
-	const char* str_end_what;
581
+	char *sbegin = *begin;
582
+	const char *str_end = *end;
583
+	const char *str_end_what;
580 584
 	size_t cmp_len = what_len;
581
-	assert(str_end>sbegin);
582
-	if(str_end < sbegin + what_len)
585
+
586
+	if(begin == NULL)
587
+		return;
588
+
589
+	assert(str_end > sbegin);
590
+
591
+	/*if(str_end < (sbegin + what_len))
592
+		return;*/
593
+	if(strlen(sbegin) < what_len)
583 594
 		return;
595
+
584 596
 	/* strip leading @what */
585 597
 	while(cmp_len && !strncmp(sbegin,what,cmp_len)) {
586 598
 		sbegin += what_len;
599
+
587 600
 		if(cmp_len > what_len)
588 601
 			cmp_len -= what_len;
589
-		else cmp_len = 0;
602
+		else
603
+			cmp_len = 0;
590 604
 	}
605
+
591 606
 	/* strip trailing @what */
592
-	str_end_what = str_end - what_len;
593
-	while(str_end_what>sbegin && !strncmp(str_end_what,what,what_len)) {
594
-		str_end -= what_len;
595
-		str_end_what -= what_len;
607
+	if(what_len <= strlen(str_end)) {
608
+		str_end_what = str_end - what_len;
609
+		while((str_end_what > sbegin) &&
610
+		      (strncmp(str_end_what, what, what_len) == 0)) {
611
+			str_end -= what_len;
612
+			str_end_what -= what_len;
613
+		}
596 614
 	}
615
+
597 616
 	*begin = sbegin++;
598 617
 	while(sbegin+what_len < str_end) {
599 618
 		while(sbegin+what_len<str_end && !strncmp(sbegin,what,what_len)) {
... ...
@@ -637,7 +681,7 @@ static inline void str_make_lowercase(char* str,size_t len)
637 637
 static inline void clear_msb(char* begin)
638 638
 {
639 639
 	for(;*begin;begin++)
640
-		*begin = fix32((*begin)&0x7f);	
640
+		*begin = fix32((*begin)&0x7f);
641 641
 }
642 642
 
643 643
 /*
... ...
@@ -653,48 +697,64 @@ static inline void clear_msb(char* begin)
653 653
  * <a href="www.yahoo.com">Check out yahoo.com</a>
654 654
  * Here we add a ., so we get: check.out.yahoo.com (it won't trigger)
655 655
  *
656
- * Rule for adding .: if substring from right contains dot, then add dot, otherwise strip space
656
+ * Rule for adding .: if substring from right contains dot, then add dot,
657
+ *	otherwise strip space
657 658
  *
658 659
  */
659
-static inline void str_fixup_spaces(char **begin,const char** end)
660
+static inline void
661
+str_fixup_spaces(char **begin, const char **end)
660 662
 {
661
-	char* space = strchr(*begin,' ');
663
+	char *space = strchr(*begin, ' ');
664
+
665
+	if(space == NULL)
666
+		return;
667
+
662 668
 	/* strip any number of spaces after / */
663
-	while(space>*begin && space[-1]=='/' && space[0]==' ' && space<*end) {
664
-		memmove(space,space+1,*end-space+1);
669
+	while((space > *begin) && (space[-1] == '/') && (space[0] == ' ') && (space < *end)) {
670
+		memmove(space, space+1, *end-space+1);
665 671
 		(*end)--;
666 672
 	}
667 673
 
668
-	for(space = rfind(*begin,' ',*end-*begin);space && space[0]!='.' && space<*end;space++) {}
674
+	for(space = rfind(*begin,' ',*end-*begin);space && space[0]!='.' && space<*end;space++)
675
+		;
669 676
 	if(space && space[0]=='.')
670 677
 		str_replace(*begin,*end,' ','.');
671
-	else 
678
+	else
672 679
 		str_strip(begin,end," ",1);
673 680
 }
674 681
 
675 682
 /* allocates memory */
676
-void cleanupURL(struct string* URL,int isReal)
683
+void
684
+cleanupURL(struct string *URL, int isReal)
677 685
 {
678
-	char* begin = URL->data;
679
-	const char* end;
686
+	char *begin = URL->data;
687
+	const char *end;
680 688
 	size_t len;
689
+
681 690
 	clear_msb(begin);
682
-/*	if(!URL->data)
691
+	/*if(begin == NULL)
683 692
 		return;*/
684 693
 	/*TODO: handle hex-encoded IPs*/
685
-	while(isspace(*begin)) begin++;
686
-	len=strlen(begin);
687
-	end = begin+len-1;
688
-	/*cli_dbgmsg("%d\n",end-begin);*/
689
-	if(begin>=end) {
694
+	while(isspace(*begin))
695
+		begin++;
696
+
697
+	len = strlen(begin);
698
+	if(len == 0) {
699
+		string_assign_null(URL);
700
+		return;
701
+	}
702
+
703
+	end = begin + len - 1;
704
+	/*cli_dbgmsg("%d %d\n", end-begin, len);*/
705
+	if(begin >= end) {
690 706
 		string_assign_null(URL);
691 707
 		return;
692 708
 	}
693
-	while(isspace(*end)) 
709
+	while(isspace(*end))
694 710
 		end--;
695 711
 	/*TODO: convert \ to /, and stuff like that*/
696 712
 	/* From mailscanner, my comments enclosed in {} */
697
-        if(!strncmp(begin,dotnet,dotnet_len) || !strncmp(begin,adonet,adonet_len) || !strncmp(begin,aspnet,aspnet_len))
713
+	if(!strncmp(begin,dotnet,dotnet_len) || !strncmp(begin,adonet,adonet_len) || !strncmp(begin,aspnet,aspnet_len))
698 714
 		string_assign_null(URL);
699 715
 	else {
700 716
 		size_t host_len;
... ...
@@ -758,7 +818,7 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
758 758
 	if(is_phish_disabled())
759 759
 		return 0;
760 760
 	if(!hexinited) {
761
-		init_hextable(); 
761
+		init_hextable();
762 762
 		atexit(phishing_done);/*TODO: replace this with a proper phishing_done call from manager.c*/
763 763
 	}
764 764
 
... ...
@@ -857,25 +917,25 @@ static char* str_compose(const char* a,const char* b,const char* c)
857 857
 
858 858
 /*static const char* url_regex="^ *([[:alnum:]%_-]+:(//)?)?([[:alnum:]%_-]@)*[[:alnum:]%_-]+\\.([[:alnum:]%_-]+\\.)*[[:alnum:]_%-]+(/[[:alnum:];:@$=?&/.,%_-]+) *$";*/
859 859
 /* for urls, including mailto: urls, and (broken) http:www... style urls*/
860
-/* refer to: http://www.w3.org/Addressing/URL/5_URI_BNF.html 
860
+/* refer to: http://www.w3.org/Addressing/URL/5_URI_BNF.html
861 861
  * Modifications: don't allow empty domains/subdomains, such as www..com <- that is no url
862 862
  * So the 'safe' char class has been split up
863 863
  * */
864 864
 /* character classes */
865
-#define URI_alpha       "a-zA-Z"
866
-#define URI_digit       "0-9"
865
+#define URI_alpha	"a-zA-Z"
866
+#define URI_digit	"0-9"
867 867
 #define URI_safe_nodot  "-$_@&"
868
-#define URI_safe        "-$_@.&"
869
-#define URI_extra       "!*\"'(),"
868
+#define URI_safe	"-$_@.&"
869
+#define URI_extra	"!*\"'(),"
870 870
 #define URI_reserved    "=;/#?: "
871 871
 #define URI_national    "{}|[]\\^~"
872 872
 #define URI_punctuation "<>"
873 873
 
874
-#define URI_hex         "[0-9a-fA-f]"
874
+#define URI_hex		 "[0-9a-fA-f]"
875 875
 #define URI_escape      "%"URI_hex"{2}"
876 876
 #define URI_xalpha "([" URI_safe URI_alpha URI_digit  URI_extra "]|"URI_escape")" /* URI_safe has to be first, because it contains - */
877 877
 #define URI_xalpha_nodot "([" URI_safe_nodot URI_alpha URI_digit URI_extra "]|"URI_escape")"
878
- 
878
+
879 879
 #define URI_xalphas URI_xalpha"+"
880 880
 #define URI_xalphas_nodot URI_xalpha_nodot"*"
881 881
 
... ...
@@ -924,7 +984,7 @@ int isURL(const char* URL)
924 924
 		if(build_regex(&preg,url_regex,1))
925 925
 			return -1;
926 926
 	}
927
-	return URL ? !regexec(preg,URL,0,NULL,0) : 0; 
927
+	return URL ? !regexec(preg,URL,0,NULL,0) : 0;
928 928
 }
929 929
 
930 930
 int isNumericURL(const char* URL)
... ...
@@ -978,10 +1038,10 @@ enum phish_status url_get_host(struct url_check* url,struct url_check* host_url,
978 978
 	}
979 979
 	return CL_PHISH_NODECISION;
980 980
 }
981
-	
981
+
982 982
 
983 983
 void url_get_domain(struct url_check* url,struct url_check* domains)
984
-{		
984
+{
985 985
 	get_domain(&domains->realLink, &url->realLink);
986 986
 	get_domain(&domains->displayLink, &url->displayLink);
987 987
 	domains->flags	     = url->flags;
... ...
@@ -1095,7 +1155,7 @@ enum phish_status phishingCheck(struct url_check* urls)
1095 1095
 
1096 1096
 	if(urls->flags&CHECK_CLOAKING) {
1097 1097
 		/*Checks if URL is cloaked.
1098
-		Should we check if it containts another http://, https://? 
1098
+		Should we check if it containts another http://, https://?
1099 1099
 		No because we might get false positives from redirect services.*/
1100 1100
 		if(strstr(urls->realLink.data,"%00")) {
1101 1101
 			free_if_needed(&host_url);
... ...
@@ -1111,14 +1171,14 @@ enum phish_status phishingCheck(struct url_check* urls)
1111 1111
 		free_if_needed(&host_url);
1112 1112
 		return CL_PHISH_CLEAN;
1113 1113
 	}
1114
-	
1114
+
1115 1115
 	if(urls->flags&CHECK_SSL && isSSL(urls->displayLink.data) && !isSSL(urls->realLink.data)) {
1116 1116
 		free_if_needed(&host_url);
1117 1117
 		return CL_PHISH_SSL_SPOOF;
1118 1118
 	}
1119 1119
 
1120
-	if((rc = url_get_host(urls,&host_url,DOMAIN_REAL,&phishy))) 
1121
-	{	
1120
+	if((rc = url_get_host(urls,&host_url,DOMAIN_REAL,&phishy)))
1121
+	{
1122 1122
 		free_if_needed(&host_url);
1123 1123
 		return rc;
1124 1124
 	}
... ...
@@ -1128,7 +1188,7 @@ enum phish_status phishingCheck(struct url_check* urls)
1128 1128
 		return CL_PHISH_CLEAN_CID;
1129 1129
 	}
1130 1130
 
1131
-	if(!isURL(urls->displayLink.data) && 
1131
+	if(!isURL(urls->displayLink.data) &&
1132 1132
 			( (phishy&PHISHY_NUMERIC_IP && !isNumericURL(urls->displayLink.data)) ||
1133 1133
 			  !(phishy&PHISHY_NUMERIC_IP))) {
1134 1134
 		free_if_needed(&host_url);
... ...
@@ -1154,7 +1214,7 @@ enum phish_status phishingCheck(struct url_check* urls)
1154 1154
 			free_if_needed(&domain_url);
1155 1155
 		}
1156 1156
 
1157
-		/*if(urls->flags&CHECK_REDIR) { 
1157
+		/*if(urls->flags&CHECK_REDIR) {
1158 1158
 			//see where the realLink redirects, and compare that with the displayed Link
1159 1159
 			const uchar* redirectedURL  = getRedirectedURL(urls->realLink);
1160 1160
 			if(urls->needsfree)
... ...
@@ -77,7 +77,6 @@ void free_if_needed(struct url_check* url);
77 77
 void get_host(struct string* dest,const char* URL,int isReal,int* phishy);
78 78
 int isCountryCode(const char* str);
79 79
 int isTLD(const char* str,int len);
80
-char* rfind(char* start,char c,size_t len);
81 80
 void get_domain(struct string* dest,struct string* host);
82 81
 int ip_reverse(struct url_check* urls,int isReal);
83 82
 void reverse_lookup(struct url_check* url,int isReal);