Browse code

phishing patch from Edwin (closes bb#157, #174, #222, #224)

git-svn: trunk@2607

Tomasz Kojm authored on 2007/01/13 02:29:09
Showing 12 changed files
... ...
@@ -1,3 +1,14 @@
1
+Fri Jan 12 18:18:43 CET 2007 (tk)
2
+---------------------------------
3
+  * libclamav: phishing patch from Edwin (closes bb#157, #174, #222, #224)
4
+	         - add new tuning options to phishing code (--phishing-ssl,
5
+		   --phishing-cloak, PhishAlwaysBlockSSLMismatch,
6
+		   PhishAlwaysBlockCloak)
7
+		 - rename alldomains/stricturl option to PhishingRestrictedScan
8
+		 - update man pages for phishing options 
9
+		 - update clamd/clamscan/cfgparser for new phishing options
10
+		 - add log lines for non-default phishing options
11
+
1 12
 Tue Jan  9 21:04:03 CET 2007 (tk)
2 13
 ---------------------------------
3 14
   * libclamav: dynamic configuration support
... ...
@@ -313,6 +313,13 @@ int main(int argc, char **argv)
313 313
     else
314 314
 	logg("Not loading phishing signatures.\n");
315 315
 
316
+#ifdef CL_EXPERIMENTAL
317
+    if(cfgopt(copt,"PhishingScanURLs")->enabled)
318
+	dboptions |= CL_DB_PHISHING_URLS;
319
+    else
320
+	logg("Disabling URL based phishing detection.\n");
321
+#endif
322
+
316 323
     if(cfgopt(copt, "NodalCoreAcceleration")->enabled) {
317 324
 #ifdef HAVE_NCORE
318 325
 	dboptions |= CL_DB_NCORE;
... ...
@@ -299,12 +299,6 @@ int acceptloop_th(int *socketds, int nsockets, struct cl_node *root, unsigned in
299 299
     logg("*Listening daemon: PID: %d\n", getpid());
300 300
     max_threads = cfgopt(copt, "MaxThreads")->numarg;
301 301
 
302
-#ifdef CL_EXPERIMENTAL
303
-    if(cfgopt(copt,"PhishingScanURLs")->enabled)
304
-	dboptions |= CL_DB_PHISHING_URLS;
305
-    if(cfgopt(copt,"PhishingStrictURLCheck")->enabled)
306
-	options |= CL_SCAN_PHISHING_DOMAINLIST;
307
-#endif
308 302
 
309 303
     if(cfgopt(copt, "ScanArchive")->enabled || cfgopt(copt, "ClamukoScanArchive")->enabled) {
310 304
 
... ...
@@ -422,6 +416,34 @@ int acceptloop_th(int *socketds, int nsockets, struct cl_node *root, unsigned in
422 422
 	logg("HTML support disabled.\n");
423 423
     }
424 424
 
425
+#ifdef CL_EXPERIMENTAL
426
+    if(cfgopt(copt,"PhishingScanURLs")->enabled) {
427
+
428
+	if(cfgopt(copt,"PhishingRestrictedScan")->enabled) {
429
+	    /* we don't scan urls from all domains, just those listed in
430
+	     * .pdb file. This is the safe default
431
+	     */
432
+	    options |= CL_SCAN_PHISHING_DOMAINLIST;
433
+	} else {
434
+	    /* This is a false positive prone option, since newsletters, etc.
435
+	     * often contain links that will be classified as phishing attempts,
436
+	     * even though the site they link to isn't a phish site.
437
+	     */
438
+	    logg("Phishing: Checking all URLs, regardless of domain (FP prone).\n");
439
+	}
440
+
441
+	if(cfgopt(copt,"PhishingAlwaysBlockCloak")->enabled) {
442
+	    options |= CL_SCAN_PHISHING_BLOCKCLOAK; 
443
+	    logg("Phishing: Always checking for cloaked urls\n");
444
+	}
445
+
446
+	if(cfgopt(copt,"PhishingAlwaysBlockSSLMismatch")->enabled) {
447
+	    options |= CL_SCAN_PHISHING_BLOCKSSL;
448
+	    logg("Phishing: Always checking for ssl mismatches\n");
449
+	}
450
+    }
451
+#endif /* CL_EXPERIMENTAL */
452
+
425 453
     selfchk = cfgopt(copt, "SelfCheck")->numarg;
426 454
     if(!selfchk) {
427 455
 	logg("Self checking disabled.\n");
... ...
@@ -263,7 +263,9 @@ void help(void)
263 263
     mprintf("    --no-phishing-sigs                   Disable signature-based phishing detection\n");
264 264
 #ifdef CL_EXPERIMENTAL
265 265
     mprintf("    --no-phishing-scan-urls              Disable url-based phishing detection\n");
266
-    mprintf("    --phishing-strict-url-check          Enable phishing detection for all domains (might lead to false positives!)\n");
266
+    mprintf("    --no-phishing-restrictedscan         Enable phishing detection for all domains (might lead to false positives!)\n");
267
+    mprintf("    --phishing-ssl                       Always block SSL mismatches in URLs (phishing module)\n");
268
+    mprintf("    --phishing-cloak                     Always block cloaked URLs (phishing module)\n");
267 269
 #endif
268 270
     mprintf("    --no-algorithmic                     Disable algorithmic detection\n");
269 271
     mprintf("    --no-pe                              Disable PE analysis\n");
... ...
@@ -80,8 +80,10 @@ static struct option clamscan_longopt[] = {
80 80
     {"mail-follow-urls", 0, 0, 0},
81 81
     {"no-phishing-sigs", 0, 0, 0},
82 82
 #ifdef CL_EXPERIMENTAL
83
-    {"no-phishing-scan-urls",0,0,0},
84
-    {"phishing-strict-url-check",0,0,0},
83
+    {"no-phishing-scan-urls", 0, 0, 0},
84
+    {"no-phishing-restrictedscan", 0, 0, 0},
85
+    {"phishing-ssl", 0, 0, 0},
86
+    {"phishing-cloak", 0, 0, 0},
85 87
 #endif
86 88
     {"no-algorithmic", 0, 0, 0},
87 89
     {"unzip", 2, 0, 0},
... ...
@@ -95,8 +95,16 @@ int scanmanager(const struct optstruct *opt)
95 95
 #ifdef CL_EXPERIMENTAL
96 96
     if(!opt_check(opt,"no-phishing-scan-urls"))
97 97
 	dboptions |= CL_DB_PHISHING_URLS;
98
-    if(opt_check(opt,"phishing-strict-url-check"))
99
-	options |= CL_SCAN_PHISHING_DOMAINLIST;
98
+    if(!opt_check(opt,"no-phishing-restrictedscan")) {
99
+	/* not scanning all domains, check only URLs with domains from .pdb */
100
+	dboptions |= CL_SCAN_PHISHING_DOMAINLIST;
101
+    }
102
+    if(opt_check(opt,"phishing-ssl")) {
103
+	   dboptions |= CL_SCAN_PHISHING_BLOCKSSL;
104
+    }
105
+    if(opt_check(opt,"phishing-cloak")) {
106
+	    dboptions |= CL_SCAN_PHISHING_BLOCKCLOAK;
107
+    }
100 108
 #endif
101 109
 
102 110
     if(opt_check(opt, "dev-ac-only")) {
... ...
@@ -76,9 +76,15 @@ Disable detection of phishing messages.
76 76
 \fB\-\-no\-phishing\-scan\-urls\fR
77 77
 Disable url-based phishing detection. (Only available in experimental builds)
78 78
 .TP
79
-\fB\-\-phish\-scan\-alldomains\fR
79
+\fB\-\-no\-phishing\-restrictedscan\fR
80 80
 Enable phishing detection for all domains (might lead to false positives!).(Only available in experimental builds)
81 81
 .TP
82
+\fB\-\-phishing\-ssl\fR
83
+Always block SSL mismatches in URLs (might lead to false positives!). (Only available in experimental builds) 
84
+.TP
85
+\fB\-\-phishing\-cloak\fR
86
+Always block cloaked URLs (might lead to some false positives). (Only available in experimental builds)
87
+.TP
82 88
 \fB\-\-no\-algo\fR
83 89
 In some cases (eg. complex malware, exploits in graphic files, and others), ClamAV uses special algorithms to provide accurate detection. This option disables the algorithmic detection.
84 90
 .TP 
... ...
@@ -222,17 +222,33 @@ LocalSocket /tmp/clamd
222 222
 # Default: yes
223 223
 #PhishingSignatures yes
224 224
 
225
-# Use phishing detection for all domains (not just those listed in the .pdb database).
226
-# It is not recommended to turn this option on, it is mean for internal use.
227
-# (available in experimental builds only)
228
-# Default: no
229
-#PhishingStrictURLCheck no
230 225
 
231 226
 # Scan urls found in mails for phishing attempts.
232 227
 # (available in experimental builds only) 
233 228
 # Default: yes
234 229
 #PhishingScanURLs yes
235 230
 
231
+# Use phishing detection only for domains listed in the .pdb database. It is
232
+# not recommended to have this option turned off, because scanning of all
233
+# domains may lead to many false positives!
234
+# (available in experimental builds only)
235
+# Default: yes
236
+#PhishingRestrictedScan yes
237
+
238
+# Always block SSL mismatches in URLs, even if the URL isn't in the database.
239
+# This can lead to false positives.
240
+# (available in experimental builds only)
241
+#
242
+# Default: no
243
+#PhishingAlwaysBlockSSLMismatch no
244
+
245
+# Always block cloaked URLs, even if URL isn't in database.
246
+# There is a remote possibility, that this leads to false positives.
247
+# (available in experimental builds only)
248
+#
249
+# Default: no
250
+#PhishingAlwaysBlockCloak no
251
+
236 252
 ##
237 253
 ## HTML
238 254
 ##
... ...
@@ -88,8 +88,10 @@ extern "C"
88 88
 #define CL_SCAN_MAILURL		    0x80
89 89
 #define CL_SCAN_BLOCKMAX	    0x100
90 90
 #define CL_SCAN_ALGO		    0x200
91
-#define CL_SCAN_PHISHING_DOMAINLIST 0x800
92
-#define CL_SCAN_ELF		    0x1000
91
+#define CL_SCAN_PHISHING_DOMAINLIST 0x400
92
+#define CL_SCAN_PHISHING_BLOCKSSL   0x800 /* ssl mismatches, not ssl by itself*/
93
+#define CL_SCAN_PHISHING_BLOCKCLOAK 0x1000
94
+#define CL_SCAN_ELF		    0x2000
93 95
 
94 96
 /* recommended scan settings */
95 97
 #define CL_SCAN_STDOPT		(CL_SCAN_ARCHIVE | CL_SCAN_MAIL | CL_SCAN_OLE2 | CL_SCAN_HTML | CL_SCAN_PE | CL_SCAN_ALGO | CL_SCAN_ELF) 
... ...
@@ -19,6 +19,9 @@
19 19
  *  MA 02110-1301, USA.
20 20
  *
21 21
  *  $Log: phishcheck.c,v $
22
+ *  Revision 1.17  2007/01/12 17:29:09  tkojm
23
+ *  phishing patch from Edwin (closes bb#157, #174, #222, #224)
24
+ *
22 25
  *  Revision 1.16  2006/12/20 01:23:50  tkojm
23 26
  *  options cleanup
24 27
  *
... ...
@@ -268,6 +271,15 @@ For the Whitelist(.wdb)/Domainlist(.pdb) format see regex_list.c (search for Fla
268 268
 
269 269
 /* Constant strings and tables */ 
270 270
 static char empty_string[]="";
271
+
272
+#define ANY_CLOAK "(0[xX])?([a-fA-F0-9]+\\.?)+"
273
+#define CLOAK_REGEX_HEXURL "("ANY_CLOAK")?0[xX][a-fA-F0-9]+\\.?"ANY_CLOAK
274
+#define OCTAL_CLOAK "("ANY_CLOAK")?000[0-9]+\\.?"ANY_CLOAK
275
+#define DWORD_CLOAK "[0-9]{8,}"
276
+
277
+static const char cloaked_host_regex[] = "^(("CLOAK_REGEX_HEXURL")|("OCTAL_CLOAK")|("DWORD_CLOAK"))$";
278
+
279
+
271 280
 static const char tld_regex[] = "^"iana_tld"$";
272 281
 static const char cctld_regex[] = "^"iana_cctld"$";
273 282
 static const char dotnet[] = ".net";
... ...
@@ -944,6 +956,12 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
944 944
 				continue;
945 945
 			if (ctx->options&CL_SCAN_PHISHING_DOMAINLIST)
946 946
 				urls.flags |= DOMAINLIST_REQUIRED;
947
+			if (ctx->options & CL_SCAN_PHISHING_BLOCKSSL) {
948
+				urls.always_check_flags |= CHECK_SSL;
949
+			}
950
+			if (ctx->options & CL_SCAN_PHISHING_BLOCKCLOAK) {
951
+				urls.always_check_flags |= CHECK_CLOAKING;
952
+			}
947 953
 			string_init_c(&urls.realLink,(char*)hrefs->value[i]);
948 954
 /*			if(!hrefs->contents[i]->isClosed) {
949 955
 				blobAddData(hrefs->contents[i],empty_string,1);
... ...
@@ -1035,7 +1053,10 @@ static char* str_compose(const char* a,const char* b,const char* c)
1035 1035
 
1036 1036
 static inline char hex2int(const unsigned char* src)
1037 1037
 {
1038
-	return hextable[src[0]]<<4 | hextable[src[1]];
1038
+	return (src[0] == '0' && src[1] == '0') ? 
1039
+		0x1 :/* don't convert %00 to \0, use 0x1
1040
+ 		      * this value is also used by cloak check*/
1041
+		hextable[src[0]]<<4 | hextable[src[1]];
1039 1042
 }
1040 1043
 
1041 1044
 static void free_regex(regex_t* p)
... ...
@@ -1066,6 +1087,12 @@ int phishing_init(struct cl_engine* engine)
1066 1066
 
1067 1067
 	cli_dbgmsg("Initializing phishcheck module\n");
1068 1068
 
1069
+	if(build_regex(&pchk->preg_hexurl,cloaked_host_regex,1)) {
1070
+		free(pchk);
1071
+		engine->phishcheck = NULL;
1072
+		return CL_EFORMAT;
1073
+	}
1074
+
1069 1075
 	if(build_regex(&pchk->preg_cctld,cctld_regex,1)) {
1070 1076
 		free(pchk);
1071 1077
 		engine->phishcheck = NULL;
... ...
@@ -1106,6 +1133,7 @@ void phishing_done(struct cl_engine* engine)
1106 1106
 	cli_dbgmsg("Cleaning up phishcheck\n");
1107 1107
 	if(pchk && !pchk->is_disabled) {
1108 1108
 		free_regex(&pchk->preg);
1109
+		free_regex(&pchk->preg_hexurl);
1109 1110
 		free_regex(&pchk->preg_cctld);
1110 1111
 		free_regex(&pchk->preg_tld);
1111 1112
 		free_regex(&pchk->preg_numeric);
... ...
@@ -1167,7 +1195,8 @@ int url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url
1167 1167
 		string_free(host);
1168 1168
 		return CL_PHISH_TEXTURL;
1169 1169
 	}
1170
-	if(isReal && (!strncmp(host->data,"0x",2) || !strncmp(host->data,"0X",2))) {
1170
+	if(!regexec(&pchk->preg_hexurl,host->data,0,NULL,0)) {
1171
+		/* use a regex here, so that we don't accidentally block 0xacab.net style hosts */
1171 1172
 		string_free(host);
1172 1173
 		return CL_PHISH_HEX_URL;
1173 1174
 	}
... ...
@@ -1217,6 +1246,7 @@ int whitelist_check(const struct cl_engine* engine,struct url_check* urls,int ho
1217 1217
 	return whitelist_match(engine,urls->realLink.data,urls->displayLink.data,hostOnly);
1218 1218
 }
1219 1219
 
1220
+
1220 1221
 /* urls can't contain null pointer, caller must ensure this */
1221 1222
 enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls)
1222 1223
 {
... ...
@@ -1267,17 +1297,23 @@ enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check*
1267 1267
 			if(domainlist_match(engine,urls->displayLink.data,urls->realLink.data,1,&urls->flags))
1268 1268
 				phishy |= DOMAIN_LISTED;
1269 1269
 			else {
1270
+			}
1271
+		}
1272
+	}
1273
+
1274
+	if(urls->flags & DOMAINLIST_REQUIRED && !(phishy & DOMAIN_LISTED) ) {
1275
+		urls->flags &= urls->always_check_flags;
1276
+		if(!urls->flags) {
1270 1277
 				free_if_needed(&host_url);
1271 1278
 				return CL_PHISH_HOST_NOT_LISTED;
1272 1279
 			}
1273 1280
 		}
1274
-	}
1275 1281
 
1276 1282
 	if(urls->flags&CHECK_CLOAKING) {
1277 1283
 		/*Checks if URL is cloaked.
1278 1284
 		Should we check if it containts another http://, https://?
1279 1285
 		No because we might get false positives from redirect services.*/
1280
-		if(strstr(urls->realLink.data,"%00")) {
1286
+		if(strchr(urls->realLink.data,'\0x1')) {
1281 1287
 			free_if_needed(&host_url);
1282 1288
 			return CL_PHISH_CLOAKED_NULL;
1283 1289
 		}
... ...
@@ -1287,6 +1323,7 @@ enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check*
1287 1287
 		}
1288 1288
 	}
1289 1289
 
1290
+
1290 1291
 	if(urls->displayLink.data[0]=='\0') {
1291 1292
 		free_if_needed(&host_url);
1292 1293
 		return CL_PHISH_CLEAN;
... ...
@@ -1297,12 +1334,22 @@ enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check*
1297 1297
 		return CL_PHISH_SSL_SPOOF;
1298 1298
 	}
1299 1299
 
1300
+	if(!urls->flags&CHECK_CLOAKING && urls->flags & DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED) ) {
1301
+		free_if_needed(&host_url);
1302
+		return CL_PHISH_HOST_NOT_LISTED;
1303
+	}
1304
+
1300 1305
 	if((rc = url_get_host(pchk, urls,&host_url,DOMAIN_REAL,&phishy)))
1301 1306
 	{
1302 1307
 		free_if_needed(&host_url);
1303 1308
 		return rc;
1304 1309
 	}
1305 1310
 
1311
+	if(urls->flags&DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED)) {
1312
+		free_if_needed(&host_url);
1313
+		return CL_PHISH_HOST_NOT_LISTED;
1314
+	}
1315
+
1306 1316
 	if(!strncmp(urls->displayLink.data,cid,cid_len))/* cid: image */{
1307 1317
 		free_if_needed(&host_url);
1308 1318
 		return CL_PHISH_CLEAN_CID;
... ...
@@ -62,6 +62,7 @@ struct phishcheck {
62 62
 	regex_t preg_tld;
63 63
 	regex_t preg_cctld;
64 64
 	regex_t preg_numeric;
65
+	regex_t preg_hexurl;
65 66
 	char*    url_regex;
66 67
 	int      is_disabled;
67 68
 };
... ...
@@ -70,6 +71,7 @@ struct url_check {
70 70
 	struct string realLink;
71 71
 	struct string displayLink;
72 72
 	unsigned short       flags;
73
+	unsigned short always_check_flags;
73 74
 };
74 75
 
75 76
 int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs);
... ...
@@ -52,7 +52,11 @@ struct cfgoption cfg_options[] = {
52 52
     {"PhishingSignatures", OPT_BOOL, 1, NULL, 0, OPT_CLAMD},
53 53
 #ifdef CL_EXPERIMENTAL
54 54
     {"PhishingScanURLs",OPT_BOOL, 1, NULL, 0, OPT_CLAMD},
55
-    {"PhishingStrictURLCheck", OPT_BOOL, 0, NULL, 0, OPT_CLAMD},
55
+    /* these are FP prone options, if default isn't used */
56
+    {"PhishingAlwaysBlockCloak", OPT_BOOL, 0, NULL, 0, OPT_CLAMD},
57
+    {"PhishingAlwaysBlockSSLMismatch", OPT_BOOL, 0, NULL, 0, OPT_CLAMD},
58
+    {"PhishingRestrictedScan", OPT_BOOL, 1, NULL, 0, OPT_CLAMD},
59
+    /* end of FP prone options */
56 60
 #endif
57 61
     {"AlgorithmicDetection", OPT_BOOL, 1, NULL, 0, OPT_CLAMD},
58 62
     {"ScanHTML", OPT_BOOL, 1, NULL, 0, OPT_CLAMD},