Browse code

initial support for matching URLs against MD5 hashes

git-svn: trunk@4049

Török Edvin authored on 2008/08/01 23:49:55
Showing 4 changed files
... ...
@@ -47,9 +47,11 @@
47 47
 #include "phishcheck.h"
48 48
 #include "phish_domaincheck_db.h"
49 49
 #include "phish_whitelist.h"
50
+#include "regex_list.h"
50 51
 #include "iana_tld.h"
51 52
 #include "iana_cctld.h"
52 53
 #include "scanners.h"
54
+#include "md5.h"
53 55
 
54 56
 
55 57
 #define DOMAIN_REAL 1
... ...
@@ -737,7 +739,6 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
737 737
 
738 738
 	if(!ctx->found_possibly_unwanted)
739 739
 		*ctx->virname=NULL;
740
-#if 0
741 740
 	FILE *f = fopen("/home/edwin/quarantine/urls","r");
742 741
 	if(!f)
743 742
 		abort();
... ...
@@ -770,7 +771,6 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
770 770
 	}
771 771
 	fclose(f);
772 772
 	return 0;
773
-#endif
774 773
 	for(i=0;i<hrefs->count;i++)
775 774
 		if(hrefs->contents[i]) {
776 775
 			struct url_check urls;
... ...
@@ -829,6 +829,9 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
829 829
 				case CL_PHISH_CLOAKED_UIU:
830 830
 					*ctx->virname="Phishing.Heuristics.Email.Cloaked.Username";/*http://www.ebay.com@www.evil.com*/
831 831
 					break;
832
+				case CL_PHISH_HASH:
833
+					*ctx->virname="Phishing.URL.Blacklisted";
834
+					break;
832 835
 				case CL_PHISH_NOMATCH:
833 836
 				default:
834 837
 					*ctx->virname="Phishing.Heuristics.Email.SpoofedDomain";
... ...
@@ -1177,6 +1180,23 @@ static int whitelist_check(const struct cl_engine* engine,struct url_check* urls
1177 1177
 	return whitelist_match(engine,urls->realLink.data,urls->displayLink.data,hostOnly);
1178 1178
 }
1179 1179
 
1180
+static int hash_match(const struct regex_matcher *rlist, const char *url, size_t len)
1181
+{
1182
+	unsigned char md5_dig[16];
1183
+	cli_md5_ctx md5;
1184
+
1185
+	if(!rlist->hashes.bm_patterns)
1186
+		return CL_CLEAN;
1187
+
1188
+	cli_md5_init(&md5);
1189
+	cli_md5_update(&md5, url, len);
1190
+	cli_md5_final(md5_dig, &md5);
1191
+	if(cli_bm_scanbuff(md5_dig, 16, NULL, &rlist->hashes,0,0,-1) == CL_VIRUS) {
1192
+		return CL_VIRUS;
1193
+	}
1194
+	return CL_SUCCESS;
1195
+}
1196
+
1180 1197
 /* urls can't contain null pointer, caller must ensure this */
1181 1198
 static enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls)
1182 1199
 {
... ...
@@ -1213,6 +1233,10 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url
1213 1213
 	if(whitelist_check(engine, urls, 0))
1214 1214
 		return CL_PHISH_CLEAN;/* if url is whitelisted don't perform further checks */
1215 1215
 
1216
+	if(hash_match(engine->domainlist_matcher, urls->realLink.data, strlen(urls->realLink.data)) == CL_VIRUS) {
1217
+		cli_dbgmsg("Hash matched for: %s\n", urls->realLink.data);
1218
+		return CL_PHISH_HASH;
1219
+	}
1216 1220
 	url_check_init(&host_url);
1217 1221
 
1218 1222
 	if((rc = url_get_host(pchk, urls, &host_url, DOMAIN_DISPLAY, &phishy))) {
... ...
@@ -26,7 +26,8 @@
26 26
 
27 27
 #define CL_PHISH_BASE 100
28 28
 enum phish_status {CL_PHISH_NODECISION=0, CL_PHISH_CLEAN=CL_PHISH_BASE,
29
-	CL_PHISH_CLOAKED_UIU, CL_PHISH_NUMERIC_IP, CL_PHISH_HEX_URL, CL_PHISH_CLOAKED_NULL, CL_PHISH_SSL_SPOOF, CL_PHISH_NOMATCH};
29
+	CL_PHISH_CLOAKED_UIU, CL_PHISH_NUMERIC_IP, CL_PHISH_HEX_URL, CL_PHISH_CLOAKED_NULL, CL_PHISH_SSL_SPOOF, CL_PHISH_NOMATCH,
30
+        CL_PHISH_HASH};
30 31
 
31 32
 #define CHECK_SSL         1
32 33
 #define CHECK_CLOAKING    2
... ...
@@ -350,6 +350,9 @@ int init_regex_list(struct regex_matcher* matcher)
350 350
 	if((rc = cli_ac_init(&matcher->suffixes, 2, 32))) {
351 351
 		return rc;
352 352
 	}
353
+	if(rc = cli_bm_init(&matcher->hashes)) {
354
+		return rc;
355
+	}
353 356
 	SO_init(&matcher->filter);
354 357
 	return CL_SUCCESS;
355 358
 }
... ...
@@ -394,7 +397,27 @@ static int functionality_level_check(char* line)
394 394
 			return CL_EMALFDB;
395 395
 		ptmin[-1]='\0';
396 396
 		return CL_SUCCESS;
397
-	}		
397
+	}
398
+}
399
+
400
+static int add_hash(struct regex_matcher *matcher, char* pattern)
401
+{
402
+	int rc;
403
+	struct cli_bm_patt *pat = cli_calloc(1, sizeof(*pat));
404
+	if(!pat)
405
+		return CL_EMEM;
406
+	pat->pattern = (unsigned char*)cli_hex2str(pattern);
407
+	if(!pat->pattern)
408
+		return CL_EMALFDB;
409
+	pat->length = 16;
410
+	pat->virname = NULL;
411
+	if(rc = cli_bm_addpatt(&matcher->hashes, pat)) {
412
+		cli_errmsg("add_hash: failed to add BM pattern\n");
413
+		free(pat->pattern);
414
+		free(pat);
415
+		return CL_EMALFDB;
416
+	}
417
+	return CL_SUCCESS;
398 418
 }
399 419
 
400 420
 
... ...
@@ -485,8 +508,13 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio
485 485
 			/*matches displayed host*/
486 486
 			if (( rc = add_static_pattern(matcher, pattern) ))
487 487
 				return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;
488
-		}
489
-		else {
488
+		} else if (buffer[0] == 'U' && !is_whitelist) {
489
+			pattern[pattern_len] = '\0';
490
+			if (( rc = add_hash(matcher, pattern) )) {
491
+				cli_errmsg("Error loading at line: %d\n", line);
492
+				return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;
493
+			}
494
+		} else {
490 495
 			return CL_EMALFDB;
491 496
 		}
492 497
 	}
... ...
@@ -545,6 +573,7 @@ void regex_list_done(struct regex_matcher* matcher)
545 545
 			free(matcher->all_pregs);
546 546
 		}
547 547
 		hashtab_free(&matcher->suffix_hash);
548
+		cli_bm_free(&matcher->hashes);
548 549
 		matcher->list_built=0;
549 550
 		matcher->list_loaded=0;
550 551
 	}
... ...
@@ -48,6 +48,7 @@ struct regex_matcher {
48 48
 	size_t regex_cnt;
49 49
 	regex_t **all_pregs;
50 50
 	struct cli_matcher suffixes;
51
+	struct cli_matcher hashes;
51 52
 	struct filter filter;
52 53
 	int list_inited:2;
53 54
 	int list_loaded:2;