Browse code

code cleanup

git-svn: trunk@2379

Tomasz Kojm authored on 2006/10/15 08:52:02
Showing 5 changed files
... ...
@@ -1,10 +1,14 @@
1
+Sun Oct 15 01:49:55 CEST 2006 (tk)
2
+----------------------------------
3
+  * libclamav: anti-phish code cleanup (Edwin)
4
+
1 5
 Sat Oct 14 23:09:12 CEST 2006 (tk)
2 6
 ----------------------------------
3 7
   * libclamav/dsig.c: new function cli_versigpss(): digital signature
4 8
 		      verification based on RSASSA-PSS with 2048 bit RSA
5 9
 		      key and SHA256 hash function
6 10
   * libclamav/sha256.[ch]: new files (SHA256 implementation from mhash)
7
-  * sigtool/sigtool.c: genetate compressed and signed .cdiff files
11
+  * sigtool/sigtool.c: generate compressed and signed .cdiff files
8 12
   * shared/cdiff.c: handle new .cdiff files
9 13
 
10 14
 Fri Oct 13 15:42:43 BST 2006 (njh)
... ...
@@ -19,6 +19,9 @@
19 19
  *  MA 02110-1301, USA.
20 20
  *
21 21
  *  $Log: phishcheck.c,v $
22
+ *  Revision 1.14  2006/10/14 23:52:01  tkojm
23
+ *  code cleanup
24
+ *
22 25
  *  Revision 1.13  2006/10/10 23:51:49  tkojm
23 26
  *  apply patches for the anti-phish code from Edwin
24 27
  *
... ...
@@ -378,7 +381,8 @@ void string_free(struct string* str)
378 378
 			if(str->ref)/* don't free, this is a portion of another string */
379 379
 				str=str->ref;/* try to free that one*/
380 380
 			else {
381
-				free(str->data);
381
+				if(str->data)
382
+					free(str->data);
382 383
 				break;
383 384
 			}
384 385
 		}
... ...
@@ -416,9 +420,11 @@ static inline void string_init_c(struct string* dest,char* data)
416 416
 }
417 417
 
418 418
 /* make a copy of the string between start -> end*/
419
-void string_assign_dup(struct string* dest,const char* start,const char* end)
419
+int string_assign_dup(struct string* dest,const char* start,const char* end)
420 420
 {
421 421
 	char*	    ret  = cli_malloc(end-start+1);
422
+	if(!ret)
423
+		return CL_EMEM;
422 424
 	strncpy(ret,start,end-start);
423 425
 	ret[end-start]='\0';
424 426
 
... ...
@@ -426,6 +432,7 @@ void string_assign_dup(struct string* dest,const char* start,const char* end)
426 426
 	dest->data=ret;
427 427
 	dest->refcount=1;
428 428
 	dest->ref=NULL;
429
+	return CL_SUCCESS;
429 430
 }
430 431
 
431 432
 static inline void string_assign_null(struct string* dest)
... ...
@@ -470,21 +477,21 @@ static int build_regex(regex_t* preg,const char* regex,int nosub)
470 470
 			cli_errmsg("Error in compiling regex:%s\nDisabling phishing checks\n",errbuf);
471 471
 			free(errbuf);
472 472
 		} else
473
-			cli_errmsg("Error in compiling regex, disabling phishing checks\n");
473
+			cli_errmsg("Error in compiling regex, disabling phishing checks. Additionaly an Out-of-memory error was encountered while generating a detailed error message\n");
474 474
 #endif
475 475
 		return 1;
476 476
 	}
477
-	return 0;
477
+	return CL_SUCCESS;
478 478
 }
479 479
 
480 480
 /*static regex_t* host_preg = NULL;
481 481
 static const char* host_regex="cid:.+|mailto:(.+)|([[:alpha:]]+://)?(([^:/?]+@)+([^:/?]+)([:/?].+)?|([^@:/?]+)([:/?].+)?)"; <- this is slower than the function below
482 482
 */
483 483
 /* allocates memory */
484
-void get_host(const struct phishcheck* s,struct string* dest,const char* URL,int isReal,int* phishy)
484
+int get_host(const struct phishcheck* s,struct string* dest,const char* URL,int isReal,int* phishy)
485 485
 {
486 486
 	const char mailto[] = "mailto:";
487
-	int ismailto = 0;
487
+	int rc,ismailto = 0;
488 488
 	const char* start;
489 489
 	const char* end=NULL;
490 490
 	if(!URL) {
... ...
@@ -536,7 +543,10 @@ void get_host(const struct phishcheck* s,struct string* dest,const char* URL,int
536 536
 
537 537
 			if(realhost) {
538 538
 				const char* tld = strrchr(realhost,'.');
539
-				if(tld && isTLD(s,tld,tld-realhost-1))
539
+				rc = tld ? isTLD(s,tld,tld-realhost-1) : 0;
540
+				if(rc < 0)
541
+					return rc;
542
+				if(rc)
540 543
 					*phishy |= PHISHY_USERNAME_IN_URL;/* if the url contains a username that is there just to fool people,
541 544
 					like http://www.ebay.com@somevilplace.someevildomain.com/ */
542 545
 				start=realhost+1;/*skip the username*/
... ...
@@ -553,7 +563,8 @@ void get_host(const struct phishcheck* s,struct string* dest,const char* URL,int
553 553
 			end  = start + strlen(start);
554 554
 	}
555 555
 
556
-	string_assign_dup(dest,start,end);
556
+	if(rc = string_assign_dup(dest,start,end))
557
+		return rc;
557 558
 }
558 559
 
559 560
 int isCountryCode(const struct phishcheck* s,const char* str)
... ...
@@ -568,11 +579,14 @@ int isTLD(const struct phishcheck* pchk,const char* str,int len)
568 568
 	else {
569 569
 		char*	s  = cli_malloc(len+1);
570 570
 		int rc;
571
+
572
+		if(!s)
573
+			return CL_EMEM;
571 574
 		strncpy(s,str,len);
572 575
 		s[len]='\0';
573 576
 		rc = !regexec(&pchk->preg_tld,s,0,NULL,0);
574 577
 		free(s);
575
-		return rc;
578
+		return rc ? 1 : 0;
576 579
 	}
577 580
 }
578 581
 
... ...
@@ -689,7 +703,8 @@ str_hex_to_char(char **begin, const char **end)
689 689
 	char *sbegin = *begin;
690 690
 	const char *str_end = *end;
691 691
 
692
-	massert(str_end>sbegin);
692
+	if(str_end <= sbegin)
693
+		return;
693 694
 
694 695
 	if(strlen(sbegin) <= 2)
695 696
 		return;
... ...
@@ -725,11 +740,9 @@ str_strip(char **begin, const char **end, const char *what, size_t what_len)
725 725
 	const char *str_end_what;
726 726
 	size_t cmp_len = what_len;
727 727
 
728
-	if(begin == NULL)
728
+	if(begin == NULL || str_end <= sbegin)
729 729
 		return;
730 730
 
731
-	massert(str_end > sbegin);
732
-
733 731
 	/*if(str_end < (sbegin + what_len))
734 732
 		return;*/
735 733
 	if(strlen(sbegin) < what_len)
... ...
@@ -831,7 +844,7 @@ str_fixup_spaces(char **begin, const char **end)
831 831
 }
832 832
 
833 833
 /* allocates memory */
834
-void
834
+int
835 835
 cleanupURL(struct string *URL, int isReal)
836 836
 {
837 837
 	char *begin = URL->data;
... ...
@@ -866,6 +879,8 @@ cleanupURL(struct string *URL, int isReal)
866 866
 	else {
867 867
 		size_t host_len;
868 868
 		char* host_begin;
869
+		int rc;
870
+
869 871
 		str_replace(begin,end,'\\','/');
870 872
 		str_strip(&begin,&end,"\"",1);
871 873
 		str_strip(&begin,&end,lt,lt_len);
... ...
@@ -880,7 +895,8 @@ cleanupURL(struct string *URL, int isReal)
880 880
 		/* convert %xx to real value */
881 881
 		str_hex_to_char(&begin,&end);
882 882
 		str_fixup_spaces(&begin,&end);
883
-		string_assign_dup(URL,begin,end+1);
883
+		if (rc = string_assign_dup(URL,begin,end+1))
884
+			return rc;
884 885
 		/*cli_dbgmsg("%p::%s\n",URL->data,URL->data);*/
885 886
 	}
886 887
 }
... ...
@@ -926,7 +942,11 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
926 926
 				blobClose(hrefs->contents[i]);
927 927
 			}*/
928 928
 			string_init_c(&urls.displayLink,(char*)blobGetData(hrefs->contents[i]));
929
-			massert(!urls.displayLink.data[blobGetDataSize(hrefs->contents[i])-1]);
929
+
930
+			if (urls.displayLink.data[blobGetDataSize(hrefs->contents[i])-1]) {
931
+				cli_warnmsg("urls.displayLink.data[...]");
932
+				return CL_CLEAN;
933
+			}
930 934
 /*			massert(strlen(urls.displayLink.data) < blobGetDataSize(hrefs->contents[i]));*/
931 935
 			urls.realLink.refcount=-1;
932 936
 			urls.displayLink.refcount=-1;/*don't free these, caller will free*/
... ...
@@ -995,7 +1015,9 @@ static char* str_compose(const char* a,const char* b,const char* c)
995 995
 	const size_t b_len = strlen(b);
996 996
 	const size_t c_len = strlen(c);
997 997
 	const size_t r_len = a_len+b_len+c_len+1;
998
-	char* concated = malloc(r_len);
998
+	char* concated = cli_malloc(r_len);
999
+	if(!concated)
1000
+		return NULL;
999 1001
 	strncpy(concated,a,a_len);
1000 1002
 	strncpy(concated+a_len,b,b_len);
1001 1003
 	strncpy(concated+a_len+b_len,c,c_len);
... ...
@@ -1026,6 +1048,8 @@ int phishing_init(struct cl_engine* engine)
1026 1026
 	}
1027 1027
 	else {
1028 1028
 		pchk = engine->phishcheck;
1029
+		if(!pchk)
1030
+			return CL_ENULLARG;
1029 1031
 		if(!pchk->is_disabled) {
1030 1032
 			/* already initialized */
1031 1033
 			return CL_SUCCESS;
... ...
@@ -1123,7 +1147,7 @@ enum phish_status cleanupURLs(struct url_check* urls)
1123 1123
 	return CL_PHISH_NODECISION;
1124 1124
 }
1125 1125
 
1126
-enum phish_status url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url_check* host_url,int isReal,int* phishy)
1126
+int url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url_check* host_url,int isReal,int* phishy)
1127 1127
 {
1128 1128
 	struct string* host = isReal ? &host_url->realLink : &host_url->displayLink;
1129 1129
 	get_host(pchk, host, isReal ? url->realLink.data : url->displayLink.data, isReal, phishy);
... ...
@@ -1205,7 +1229,8 @@ enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check*
1205 1205
 		return CL_PHISH_CLEAN;/* displayed and real URL are identical -> clean */
1206 1206
 
1207 1207
 	if((rc = cleanupURLs(urls))) {
1208
-		massert(!isPhishing(rc));/* not allowed to decide this is phishing */
1208
+		if(isPhishing(rc))/* not allowed to decide this is phishing */
1209
+			return CL_PHISH_CLEAN;
1209 1210
 		return rc;/* URLs identical after cleanup */
1210 1211
 	}
1211 1212
 
... ...
@@ -1223,7 +1248,8 @@ enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check*
1223 1223
 
1224 1224
 	if((rc = url_get_host(pchk, urls,&host_url,DOMAIN_DISPLAY,&phishy))) {
1225 1225
 		free_if_needed(&host_url);
1226
-		massert(!isPhishing(rc));
1226
+		if(isPhishing(rc))
1227
+			return CL_PHISH_CLEAN;
1227 1228
 		return rc;
1228 1229
 	}
1229 1230
 
... ...
@@ -80,10 +80,10 @@ void url_check_init(struct url_check* urls);
80 80
 void string_free(struct string* str);
81 81
 void string_assign(struct string* dest,struct string* src);
82 82
 void string_assign_c(struct string* dest,char* data);
83
-void string_assign_dup(struct string* dest,const char* start,const char* end);
83
+int string_assign_dup(struct string* dest,const char* start,const char* end);
84 84
 void string_assign_ref(struct string* dest,struct string* ref,char* data);
85 85
 void free_if_needed(struct url_check* url);
86
-void get_host(const struct phishcheck* pchk,struct string* dest,const char* URL,int isReal,int* phishy);
86
+int get_host(const struct phishcheck* pchk,struct string* dest,const char* URL,int isReal,int* phishy);
87 87
 int isCountryCode(const struct phishcheck* s,const char* str);
88 88
 int isTLD(const struct phishcheck* s,const char* str,int len);
89 89
 void get_domain(const struct phishcheck* pchk,struct string* dest,struct string* host);
... ...
@@ -91,12 +91,12 @@ int ip_reverse(struct url_check* urls,int isReal);
91 91
 void reverse_lookup(struct url_check* url,int isReal);
92 92
 int isNumeric(const char* host);
93 93
 int isSSL(const char* URL);
94
-void cleanupURL(struct string* URL,int isReal);
94
+int cleanupURL(struct string* URL,int isReal);
95 95
 void get_redirected_URL(struct string* URL);
96 96
 int isURL(const struct phishcheck* pchk,const char* URL);
97 97
 enum phish_status cleanupURLs(struct url_check* urls);
98 98
 int isNumericURL(const struct phishcheck* pchk, const char* URL);
99
-enum phish_status url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url_check* host_url,int isReal,int* phishy);
99
+int url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url_check* host_url,int isReal,int* phishy);
100 100
 void url_get_domain(const struct phishcheck* pchk, struct url_check* url,struct url_check* domains);
101 101
 enum phish_status phishy_map(int phishy,enum phish_status fallback);
102 102
 int isEncoded(const char* url);
... ...
@@ -499,7 +499,7 @@ static int cli_initengine(struct cl_engine **engine, unsigned int options)
499 499
     }
500 500
 
501 501
 #ifdef CL_EXPERIMENTAL
502
-    if(rc = phishing_init(*engine))
502
+    if((rc = phishing_init(*engine)))
503 503
 	return rc;
504 504
 #endif
505 505
 
... ...
@@ -619,7 +619,7 @@ static int cli_loadwdb(FILE *fd, struct cl_engine **engine, unsigned int options
619 619
     }
620 620
 
621 621
     if(!(*engine)->whitelist_matcher) {
622
-	if(ret = init_whitelist(*engine)) {
622
+	if((ret = init_whitelist(*engine))) {
623 623
 	    phishing_done(*engine);
624 624
 	    cl_free(*engine);
625 625
 	    return ret;
... ...
@@ -646,7 +646,7 @@ static int cli_loadpdb(FILE *fd, struct cl_engine **engine, unsigned int options
646 646
     }
647 647
 
648 648
     if(!(*engine)->domainlist_matcher) {
649
-	if(ret = init_domainlist(*engine)) {
649
+	if((ret = init_domainlist(*engine))) {
650 650
 	    phishing_done(*engine);
651 651
 	    cl_free(*engine);
652 652
 	    return ret;
... ...
@@ -19,6 +19,9 @@
19 19
  *  MA 02110-1301, USA.
20 20
  *
21 21
  *  $Log: regex_list.c,v $
22
+ *  Revision 1.10  2006/10/14 23:52:02  tkojm
23
+ *  code cleanup
24
+ *
22 25
  *  Revision 1.9  2006/10/10 23:51:49  tkojm
23 26
  *  apply patches for the anti-phish code from Edwin
24 27
  *
... ...
@@ -570,7 +573,7 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio
570 570
 		pattern[0]='\0';
571 571
 		flags = buffer+1;
572 572
 		pattern++;
573
-		if((buffer[0] == 'R' && !is_whitelist) || (buffer[0] == 'X' && !is_whitelist)) {/*regex*/
573
+		if((buffer[0] == 'R' && !is_whitelist) || (buffer[0] == 'X' && is_whitelist)) {/*regex*/
574 574
 			if(( rc = add_pattern(matcher,(const unsigned char*)pattern,flags) ))
575 575
 				return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;
576 576
 		}
... ...
@@ -740,13 +743,17 @@ void regex_list_done(struct regex_matcher* matcher)
740 740
 
741 741
 	regex_list_cleanup(matcher);
742 742
 	if(matcher->list_loaded) {
743
-		cli_ac_free(matcher->root_hosts);
744
-		free(matcher->root_hosts);
745
-		matcher->root_hosts=NULL;
743
+		if(matcher->root_hosts) {
744
+			cli_ac_free(matcher->root_hosts);
745
+			free(matcher->root_hosts);
746
+			matcher->root_hosts=NULL;
747
+		}
746 748
 
747
-		cli_ac_free(matcher->root_urls);
748
-		free(matcher->root_urls);
749
-		matcher->root_urls=NULL;
749
+		if(matcher->root_urls) {
750
+			cli_ac_free(matcher->root_urls);
751
+			free(matcher->root_urls);
752
+			matcher->root_urls=NULL;
753
+		}
750 754
 
751 755
 		matcher->list_built=0;
752 756
 		destroy_tree(matcher);
... ...
@@ -951,7 +958,7 @@ static const unsigned char* find_regex_start(const unsigned char* pat)
951 951
 		if(token.type!=TOKEN_REGEX) {
952 952
 			last = tmp;
953 953
 			lasttype = token.type;
954
-			if(token.type==TOKEN_BRACKET)
954
+			if(token.type==TOKEN_BRACKET && token.u.bitmap)
955 955
 				free(token.u.bitmap);
956 956
 			if(token.type==TOKEN_ALT || token.type==TOKEN_PAR_OPEN) {
957 957
 				/* save this position on stack, succesfully parsed till here*/
... ...
@@ -1233,6 +1240,8 @@ static int add_pattern(struct regex_matcher* matcher,const unsigned char* pat,co
1233 1233
 					if(charclass == std_class_cnt) {/*not a std char class*/
1234 1234
 						new->op = OP_CUSTOMCLASS;
1235 1235
 						new->u.children = cli_malloc(sizeof(new->u.children[0])*2);
1236
+						if(!new->u.children)
1237
+							return CL_EMEM;
1236 1238
 						new->u.bitmap[0] = token.u.bitmap;
1237 1239
 						new->u.bitmap[1] = NULL;
1238 1240
 						tree_node_insert_nonbin(node,new);
... ...
@@ -1259,17 +1268,23 @@ static int add_pattern(struct regex_matcher* matcher,const unsigned char* pat,co
1259 1259
 			case TOKEN_REGEX:
1260 1260
 			case TOKEN_DONE: {
1261 1261
 						 struct leaf_info* leaf=cli_malloc(sizeof(*leaf));
1262
+						 if(!leaf)
1263
+							 return CL_EMEM;
1262 1264
 						 leaf->info=strdup(info);
1263 1265
 						 if(token.type==TOKEN_REGEX) {
1264 1266
 							 int rc;
1265 1267
 							 struct tree_node* new;
1266 1268
 							 regex_t* preg;
1267 1269
 							 preg=cli_malloc(sizeof(*preg));
1270
+							 if(!preg)
1271
+								 return CL_EMEM;
1268 1272
 							 rc = regcomp(preg,(const char*)token.u.start,REG_EXTENDED|(bol?0:REG_NOTBOL));
1269 1273
 							 leaf->preg=preg;
1270 1274
 							 if(rc)
1271 1275
 								 return rc;
1272 1276
 							 new=cli_malloc(sizeof(*new));
1277
+							 if(!new)
1278
+								 return CL_EMEM;
1273 1279
 							 new->op=OP_LEAF;
1274 1280
 							 new->next=node;
1275 1281
 							 new->alternatives=0;
... ...
@@ -1511,7 +1526,8 @@ static void destroy_tree(struct regex_matcher* matcher)
1511 1511
 	destroy_tree_internal(matcher,matcher->root_regex);
1512 1512
 	while (matcher->node_stack.cnt) {
1513 1513
 		struct tree_node* node = stack_pop(&matcher->node_stack);
1514
-		free(node);
1514
+		if(node)
1515
+			free(node);
1515 1516
 	}
1516 1517
 }
1517 1518
 #ifndef NDEBUG