git-svn: trunk@2379
Tomasz Kojm authored on 2006/10/15 08:52:02... | ... |
@@ -1,10 +1,14 @@ |
1 |
+Sun Oct 15 01:49:55 CEST 2006 (tk) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav: anti-phish code cleanup (Edwin) |
|
4 |
+ |
|
1 | 5 |
Sat Oct 14 23:09:12 CEST 2006 (tk) |
2 | 6 |
---------------------------------- |
3 | 7 |
* libclamav/dsig.c: new function cli_versigpss(): digital signature |
4 | 8 |
verification based on RSASSA-PSS with 2048 bit RSA |
5 | 9 |
key and SHA256 hash function |
6 | 10 |
* libclamav/sha256.[ch]: new files (SHA256 implementation from mhash) |
7 |
- * sigtool/sigtool.c: genetate compressed and signed .cdiff files |
|
11 |
+ * sigtool/sigtool.c: generate compressed and signed .cdiff files |
|
8 | 12 |
* shared/cdiff.c: handle new .cdiff files |
9 | 13 |
|
10 | 14 |
Fri Oct 13 15:42:43 BST 2006 (njh) |
... | ... |
@@ -19,6 +19,9 @@ |
19 | 19 |
* MA 02110-1301, USA. |
20 | 20 |
* |
21 | 21 |
* $Log: phishcheck.c,v $ |
22 |
+ * Revision 1.14 2006/10/14 23:52:01 tkojm |
|
23 |
+ * code cleanup |
|
24 |
+ * |
|
22 | 25 |
* Revision 1.13 2006/10/10 23:51:49 tkojm |
23 | 26 |
* apply patches for the anti-phish code from Edwin |
24 | 27 |
* |
... | ... |
@@ -378,7 +381,8 @@ void string_free(struct string* str) |
378 | 378 |
if(str->ref)/* don't free, this is a portion of another string */ |
379 | 379 |
str=str->ref;/* try to free that one*/ |
380 | 380 |
else { |
381 |
- free(str->data); |
|
381 |
+ if(str->data) |
|
382 |
+ free(str->data); |
|
382 | 383 |
break; |
383 | 384 |
} |
384 | 385 |
} |
... | ... |
@@ -416,9 +420,11 @@ static inline void string_init_c(struct string* dest,char* data) |
416 | 416 |
} |
417 | 417 |
|
418 | 418 |
/* make a copy of the string between start -> end*/ |
419 |
-void string_assign_dup(struct string* dest,const char* start,const char* end) |
|
419 |
+int string_assign_dup(struct string* dest,const char* start,const char* end) |
|
420 | 420 |
{ |
421 | 421 |
char* ret = cli_malloc(end-start+1); |
422 |
+ if(!ret) |
|
423 |
+ return CL_EMEM; |
|
422 | 424 |
strncpy(ret,start,end-start); |
423 | 425 |
ret[end-start]='\0'; |
424 | 426 |
|
... | ... |
@@ -426,6 +432,7 @@ void string_assign_dup(struct string* dest,const char* start,const char* end) |
426 | 426 |
dest->data=ret; |
427 | 427 |
dest->refcount=1; |
428 | 428 |
dest->ref=NULL; |
429 |
+ return CL_SUCCESS; |
|
429 | 430 |
} |
430 | 431 |
|
431 | 432 |
static inline void string_assign_null(struct string* dest) |
... | ... |
@@ -470,21 +477,21 @@ static int build_regex(regex_t* preg,const char* regex,int nosub) |
470 | 470 |
cli_errmsg("Error in compiling regex:%s\nDisabling phishing checks\n",errbuf); |
471 | 471 |
free(errbuf); |
472 | 472 |
} else |
473 |
- cli_errmsg("Error in compiling regex, disabling phishing checks\n"); |
|
473 |
+ cli_errmsg("Error in compiling regex, disabling phishing checks. Additionaly an Out-of-memory error was encountered while generating a detailed error message\n"); |
|
474 | 474 |
#endif |
475 | 475 |
return 1; |
476 | 476 |
} |
477 |
- return 0; |
|
477 |
+ return CL_SUCCESS; |
|
478 | 478 |
} |
479 | 479 |
|
480 | 480 |
/*static regex_t* host_preg = NULL; |
481 | 481 |
static const char* host_regex="cid:.+|mailto:(.+)|([[:alpha:]]+://)?(([^:/?]+@)+([^:/?]+)([:/?].+)?|([^@:/?]+)([:/?].+)?)"; <- this is slower than the function below |
482 | 482 |
*/ |
483 | 483 |
/* allocates memory */ |
484 |
-void get_host(const struct phishcheck* s,struct string* dest,const char* URL,int isReal,int* phishy) |
|
484 |
+int get_host(const struct phishcheck* s,struct string* dest,const char* URL,int isReal,int* phishy) |
|
485 | 485 |
{ |
486 | 486 |
const char mailto[] = "mailto:"; |
487 |
- int ismailto = 0; |
|
487 |
+ int rc,ismailto = 0; |
|
488 | 488 |
const char* start; |
489 | 489 |
const char* end=NULL; |
490 | 490 |
if(!URL) { |
... | ... |
@@ -536,7 +543,10 @@ void get_host(const struct phishcheck* s,struct string* dest,const char* URL,int |
536 | 536 |
|
537 | 537 |
if(realhost) { |
538 | 538 |
const char* tld = strrchr(realhost,'.'); |
539 |
- if(tld && isTLD(s,tld,tld-realhost-1)) |
|
539 |
+ rc = tld ? isTLD(s,tld,tld-realhost-1) : 0; |
|
540 |
+ if(rc < 0) |
|
541 |
+ return rc; |
|
542 |
+ if(rc) |
|
540 | 543 |
*phishy |= PHISHY_USERNAME_IN_URL;/* if the url contains a username that is there just to fool people, |
541 | 544 |
like http://www.ebay.com@somevilplace.someevildomain.com/ */ |
542 | 545 |
start=realhost+1;/*skip the username*/ |
... | ... |
@@ -553,7 +563,8 @@ void get_host(const struct phishcheck* s,struct string* dest,const char* URL,int |
553 | 553 |
end = start + strlen(start); |
554 | 554 |
} |
555 | 555 |
|
556 |
- string_assign_dup(dest,start,end); |
|
556 |
+ if(rc = string_assign_dup(dest,start,end)) |
|
557 |
+ return rc; |
|
557 | 558 |
} |
558 | 559 |
|
559 | 560 |
int isCountryCode(const struct phishcheck* s,const char* str) |
... | ... |
@@ -568,11 +579,14 @@ int isTLD(const struct phishcheck* pchk,const char* str,int len) |
568 | 568 |
else { |
569 | 569 |
char* s = cli_malloc(len+1); |
570 | 570 |
int rc; |
571 |
+ |
|
572 |
+ if(!s) |
|
573 |
+ return CL_EMEM; |
|
571 | 574 |
strncpy(s,str,len); |
572 | 575 |
s[len]='\0'; |
573 | 576 |
rc = !regexec(&pchk->preg_tld,s,0,NULL,0); |
574 | 577 |
free(s); |
575 |
- return rc; |
|
578 |
+ return rc ? 1 : 0; |
|
576 | 579 |
} |
577 | 580 |
} |
578 | 581 |
|
... | ... |
@@ -689,7 +703,8 @@ str_hex_to_char(char **begin, const char **end) |
689 | 689 |
char *sbegin = *begin; |
690 | 690 |
const char *str_end = *end; |
691 | 691 |
|
692 |
- massert(str_end>sbegin); |
|
692 |
+ if(str_end <= sbegin) |
|
693 |
+ return; |
|
693 | 694 |
|
694 | 695 |
if(strlen(sbegin) <= 2) |
695 | 696 |
return; |
... | ... |
@@ -725,11 +740,9 @@ str_strip(char **begin, const char **end, const char *what, size_t what_len) |
725 | 725 |
const char *str_end_what; |
726 | 726 |
size_t cmp_len = what_len; |
727 | 727 |
|
728 |
- if(begin == NULL) |
|
728 |
+ if(begin == NULL || str_end <= sbegin) |
|
729 | 729 |
return; |
730 | 730 |
|
731 |
- massert(str_end > sbegin); |
|
732 |
- |
|
733 | 731 |
/*if(str_end < (sbegin + what_len)) |
734 | 732 |
return;*/ |
735 | 733 |
if(strlen(sbegin) < what_len) |
... | ... |
@@ -831,7 +844,7 @@ str_fixup_spaces(char **begin, const char **end) |
831 | 831 |
} |
832 | 832 |
|
833 | 833 |
/* allocates memory */ |
834 |
-void |
|
834 |
+int |
|
835 | 835 |
cleanupURL(struct string *URL, int isReal) |
836 | 836 |
{ |
837 | 837 |
char *begin = URL->data; |
... | ... |
@@ -866,6 +879,8 @@ cleanupURL(struct string *URL, int isReal) |
866 | 866 |
else { |
867 | 867 |
size_t host_len; |
868 | 868 |
char* host_begin; |
869 |
+ int rc; |
|
870 |
+ |
|
869 | 871 |
str_replace(begin,end,'\\','/'); |
870 | 872 |
str_strip(&begin,&end,"\"",1); |
871 | 873 |
str_strip(&begin,&end,lt,lt_len); |
... | ... |
@@ -880,7 +895,8 @@ cleanupURL(struct string *URL, int isReal) |
880 | 880 |
/* convert %xx to real value */ |
881 | 881 |
str_hex_to_char(&begin,&end); |
882 | 882 |
str_fixup_spaces(&begin,&end); |
883 |
- string_assign_dup(URL,begin,end+1); |
|
883 |
+ if (rc = string_assign_dup(URL,begin,end+1)) |
|
884 |
+ return rc; |
|
884 | 885 |
/*cli_dbgmsg("%p::%s\n",URL->data,URL->data);*/ |
885 | 886 |
} |
886 | 887 |
} |
... | ... |
@@ -926,7 +942,11 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs) |
926 | 926 |
blobClose(hrefs->contents[i]); |
927 | 927 |
}*/ |
928 | 928 |
string_init_c(&urls.displayLink,(char*)blobGetData(hrefs->contents[i])); |
929 |
- massert(!urls.displayLink.data[blobGetDataSize(hrefs->contents[i])-1]); |
|
929 |
+ |
|
930 |
+ if (urls.displayLink.data[blobGetDataSize(hrefs->contents[i])-1]) { |
|
931 |
+ cli_warnmsg("urls.displayLink.data[...]"); |
|
932 |
+ return CL_CLEAN; |
|
933 |
+ } |
|
930 | 934 |
/* massert(strlen(urls.displayLink.data) < blobGetDataSize(hrefs->contents[i]));*/ |
931 | 935 |
urls.realLink.refcount=-1; |
932 | 936 |
urls.displayLink.refcount=-1;/*don't free these, caller will free*/ |
... | ... |
@@ -995,7 +1015,9 @@ static char* str_compose(const char* a,const char* b,const char* c) |
995 | 995 |
const size_t b_len = strlen(b); |
996 | 996 |
const size_t c_len = strlen(c); |
997 | 997 |
const size_t r_len = a_len+b_len+c_len+1; |
998 |
- char* concated = malloc(r_len); |
|
998 |
+ char* concated = cli_malloc(r_len); |
|
999 |
+ if(!concated) |
|
1000 |
+ return NULL; |
|
999 | 1001 |
strncpy(concated,a,a_len); |
1000 | 1002 |
strncpy(concated+a_len,b,b_len); |
1001 | 1003 |
strncpy(concated+a_len+b_len,c,c_len); |
... | ... |
@@ -1026,6 +1048,8 @@ int phishing_init(struct cl_engine* engine) |
1026 | 1026 |
} |
1027 | 1027 |
else { |
1028 | 1028 |
pchk = engine->phishcheck; |
1029 |
+ if(!pchk) |
|
1030 |
+ return CL_ENULLARG; |
|
1029 | 1031 |
if(!pchk->is_disabled) { |
1030 | 1032 |
/* already initialized */ |
1031 | 1033 |
return CL_SUCCESS; |
... | ... |
@@ -1123,7 +1147,7 @@ enum phish_status cleanupURLs(struct url_check* urls) |
1123 | 1123 |
return CL_PHISH_NODECISION; |
1124 | 1124 |
} |
1125 | 1125 |
|
1126 |
-enum phish_status url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url_check* host_url,int isReal,int* phishy) |
|
1126 |
+int url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url_check* host_url,int isReal,int* phishy) |
|
1127 | 1127 |
{ |
1128 | 1128 |
struct string* host = isReal ? &host_url->realLink : &host_url->displayLink; |
1129 | 1129 |
get_host(pchk, host, isReal ? url->realLink.data : url->displayLink.data, isReal, phishy); |
... | ... |
@@ -1205,7 +1229,8 @@ enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* |
1205 | 1205 |
return CL_PHISH_CLEAN;/* displayed and real URL are identical -> clean */ |
1206 | 1206 |
|
1207 | 1207 |
if((rc = cleanupURLs(urls))) { |
1208 |
- massert(!isPhishing(rc));/* not allowed to decide this is phishing */ |
|
1208 |
+ if(isPhishing(rc))/* not allowed to decide this is phishing */ |
|
1209 |
+ return CL_PHISH_CLEAN; |
|
1209 | 1210 |
return rc;/* URLs identical after cleanup */ |
1210 | 1211 |
} |
1211 | 1212 |
|
... | ... |
@@ -1223,7 +1248,8 @@ enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* |
1223 | 1223 |
|
1224 | 1224 |
if((rc = url_get_host(pchk, urls,&host_url,DOMAIN_DISPLAY,&phishy))) { |
1225 | 1225 |
free_if_needed(&host_url); |
1226 |
- massert(!isPhishing(rc)); |
|
1226 |
+ if(isPhishing(rc)) |
|
1227 |
+ return CL_PHISH_CLEAN; |
|
1227 | 1228 |
return rc; |
1228 | 1229 |
} |
1229 | 1230 |
|
... | ... |
@@ -80,10 +80,10 @@ void url_check_init(struct url_check* urls); |
80 | 80 |
void string_free(struct string* str); |
81 | 81 |
void string_assign(struct string* dest,struct string* src); |
82 | 82 |
void string_assign_c(struct string* dest,char* data); |
83 |
-void string_assign_dup(struct string* dest,const char* start,const char* end); |
|
83 |
+int string_assign_dup(struct string* dest,const char* start,const char* end); |
|
84 | 84 |
void string_assign_ref(struct string* dest,struct string* ref,char* data); |
85 | 85 |
void free_if_needed(struct url_check* url); |
86 |
-void get_host(const struct phishcheck* pchk,struct string* dest,const char* URL,int isReal,int* phishy); |
|
86 |
+int get_host(const struct phishcheck* pchk,struct string* dest,const char* URL,int isReal,int* phishy); |
|
87 | 87 |
int isCountryCode(const struct phishcheck* s,const char* str); |
88 | 88 |
int isTLD(const struct phishcheck* s,const char* str,int len); |
89 | 89 |
void get_domain(const struct phishcheck* pchk,struct string* dest,struct string* host); |
... | ... |
@@ -91,12 +91,12 @@ int ip_reverse(struct url_check* urls,int isReal); |
91 | 91 |
void reverse_lookup(struct url_check* url,int isReal); |
92 | 92 |
int isNumeric(const char* host); |
93 | 93 |
int isSSL(const char* URL); |
94 |
-void cleanupURL(struct string* URL,int isReal); |
|
94 |
+int cleanupURL(struct string* URL,int isReal); |
|
95 | 95 |
void get_redirected_URL(struct string* URL); |
96 | 96 |
int isURL(const struct phishcheck* pchk,const char* URL); |
97 | 97 |
enum phish_status cleanupURLs(struct url_check* urls); |
98 | 98 |
int isNumericURL(const struct phishcheck* pchk, const char* URL); |
99 |
-enum phish_status url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url_check* host_url,int isReal,int* phishy); |
|
99 |
+int url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url_check* host_url,int isReal,int* phishy); |
|
100 | 100 |
void url_get_domain(const struct phishcheck* pchk, struct url_check* url,struct url_check* domains); |
101 | 101 |
enum phish_status phishy_map(int phishy,enum phish_status fallback); |
102 | 102 |
int isEncoded(const char* url); |
... | ... |
@@ -499,7 +499,7 @@ static int cli_initengine(struct cl_engine **engine, unsigned int options) |
499 | 499 |
} |
500 | 500 |
|
501 | 501 |
#ifdef CL_EXPERIMENTAL |
502 |
- if(rc = phishing_init(*engine)) |
|
502 |
+ if((rc = phishing_init(*engine))) |
|
503 | 503 |
return rc; |
504 | 504 |
#endif |
505 | 505 |
|
... | ... |
@@ -619,7 +619,7 @@ static int cli_loadwdb(FILE *fd, struct cl_engine **engine, unsigned int options |
619 | 619 |
} |
620 | 620 |
|
621 | 621 |
if(!(*engine)->whitelist_matcher) { |
622 |
- if(ret = init_whitelist(*engine)) { |
|
622 |
+ if((ret = init_whitelist(*engine))) { |
|
623 | 623 |
phishing_done(*engine); |
624 | 624 |
cl_free(*engine); |
625 | 625 |
return ret; |
... | ... |
@@ -646,7 +646,7 @@ static int cli_loadpdb(FILE *fd, struct cl_engine **engine, unsigned int options |
646 | 646 |
} |
647 | 647 |
|
648 | 648 |
if(!(*engine)->domainlist_matcher) { |
649 |
- if(ret = init_domainlist(*engine)) { |
|
649 |
+ if((ret = init_domainlist(*engine))) { |
|
650 | 650 |
phishing_done(*engine); |
651 | 651 |
cl_free(*engine); |
652 | 652 |
return ret; |
... | ... |
@@ -19,6 +19,9 @@ |
19 | 19 |
* MA 02110-1301, USA. |
20 | 20 |
* |
21 | 21 |
* $Log: regex_list.c,v $ |
22 |
+ * Revision 1.10 2006/10/14 23:52:02 tkojm |
|
23 |
+ * code cleanup |
|
24 |
+ * |
|
22 | 25 |
* Revision 1.9 2006/10/10 23:51:49 tkojm |
23 | 26 |
* apply patches for the anti-phish code from Edwin |
24 | 27 |
* |
... | ... |
@@ -570,7 +573,7 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio |
570 | 570 |
pattern[0]='\0'; |
571 | 571 |
flags = buffer+1; |
572 | 572 |
pattern++; |
573 |
- if((buffer[0] == 'R' && !is_whitelist) || (buffer[0] == 'X' && !is_whitelist)) {/*regex*/ |
|
573 |
+ if((buffer[0] == 'R' && !is_whitelist) || (buffer[0] == 'X' && is_whitelist)) {/*regex*/ |
|
574 | 574 |
if(( rc = add_pattern(matcher,(const unsigned char*)pattern,flags) )) |
575 | 575 |
return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB; |
576 | 576 |
} |
... | ... |
@@ -740,13 +743,17 @@ void regex_list_done(struct regex_matcher* matcher) |
740 | 740 |
|
741 | 741 |
regex_list_cleanup(matcher); |
742 | 742 |
if(matcher->list_loaded) { |
743 |
- cli_ac_free(matcher->root_hosts); |
|
744 |
- free(matcher->root_hosts); |
|
745 |
- matcher->root_hosts=NULL; |
|
743 |
+ if(matcher->root_hosts) { |
|
744 |
+ cli_ac_free(matcher->root_hosts); |
|
745 |
+ free(matcher->root_hosts); |
|
746 |
+ matcher->root_hosts=NULL; |
|
747 |
+ } |
|
746 | 748 |
|
747 |
- cli_ac_free(matcher->root_urls); |
|
748 |
- free(matcher->root_urls); |
|
749 |
- matcher->root_urls=NULL; |
|
749 |
+ if(matcher->root_urls) { |
|
750 |
+ cli_ac_free(matcher->root_urls); |
|
751 |
+ free(matcher->root_urls); |
|
752 |
+ matcher->root_urls=NULL; |
|
753 |
+ } |
|
750 | 754 |
|
751 | 755 |
matcher->list_built=0; |
752 | 756 |
destroy_tree(matcher); |
... | ... |
@@ -951,7 +958,7 @@ static const unsigned char* find_regex_start(const unsigned char* pat) |
951 | 951 |
if(token.type!=TOKEN_REGEX) { |
952 | 952 |
last = tmp; |
953 | 953 |
lasttype = token.type; |
954 |
- if(token.type==TOKEN_BRACKET) |
|
954 |
+ if(token.type==TOKEN_BRACKET && token.u.bitmap) |
|
955 | 955 |
free(token.u.bitmap); |
956 | 956 |
if(token.type==TOKEN_ALT || token.type==TOKEN_PAR_OPEN) { |
957 | 957 |
/* save this position on stack, succesfully parsed till here*/ |
... | ... |
@@ -1233,6 +1240,8 @@ static int add_pattern(struct regex_matcher* matcher,const unsigned char* pat,co |
1233 | 1233 |
if(charclass == std_class_cnt) {/*not a std char class*/ |
1234 | 1234 |
new->op = OP_CUSTOMCLASS; |
1235 | 1235 |
new->u.children = cli_malloc(sizeof(new->u.children[0])*2); |
1236 |
+ if(!new->u.children) |
|
1237 |
+ return CL_EMEM; |
|
1236 | 1238 |
new->u.bitmap[0] = token.u.bitmap; |
1237 | 1239 |
new->u.bitmap[1] = NULL; |
1238 | 1240 |
tree_node_insert_nonbin(node,new); |
... | ... |
@@ -1259,17 +1268,23 @@ static int add_pattern(struct regex_matcher* matcher,const unsigned char* pat,co |
1259 | 1259 |
case TOKEN_REGEX: |
1260 | 1260 |
case TOKEN_DONE: { |
1261 | 1261 |
struct leaf_info* leaf=cli_malloc(sizeof(*leaf)); |
1262 |
+ if(!leaf) |
|
1263 |
+ return CL_EMEM; |
|
1262 | 1264 |
leaf->info=strdup(info); |
1263 | 1265 |
if(token.type==TOKEN_REGEX) { |
1264 | 1266 |
int rc; |
1265 | 1267 |
struct tree_node* new; |
1266 | 1268 |
regex_t* preg; |
1267 | 1269 |
preg=cli_malloc(sizeof(*preg)); |
1270 |
+ if(!preg) |
|
1271 |
+ return CL_EMEM; |
|
1268 | 1272 |
rc = regcomp(preg,(const char*)token.u.start,REG_EXTENDED|(bol?0:REG_NOTBOL)); |
1269 | 1273 |
leaf->preg=preg; |
1270 | 1274 |
if(rc) |
1271 | 1275 |
return rc; |
1272 | 1276 |
new=cli_malloc(sizeof(*new)); |
1277 |
+ if(!new) |
|
1278 |
+ return CL_EMEM; |
|
1273 | 1279 |
new->op=OP_LEAF; |
1274 | 1280 |
new->next=node; |
1275 | 1281 |
new->alternatives=0; |
... | ... |
@@ -1511,7 +1526,8 @@ static void destroy_tree(struct regex_matcher* matcher) |
1511 | 1511 |
destroy_tree_internal(matcher,matcher->root_regex); |
1512 | 1512 |
while (matcher->node_stack.cnt) { |
1513 | 1513 |
struct tree_node* node = stack_pop(&matcher->node_stack); |
1514 |
- free(node); |
|
1514 |
+ if(node) |
|
1515 |
+ free(node); |
|
1515 | 1516 |
} |
1516 | 1517 |
} |
1517 | 1518 |
#ifndef NDEBUG |