git-svn: trunk@2607
Tomasz Kojm authored on 2007/01/13 02:29:09... | ... |
@@ -1,3 +1,14 @@ |
1 |
+Fri Jan 12 18:18:43 CET 2007 (tk) |
|
2 |
+--------------------------------- |
|
3 |
+ * libclamav: phishing patch from Edwin (closes bb#157, #174, #222, #224) |
|
4 |
+ - add new tuning options to phishing code (--phishing-ssl, |
|
5 |
+ --phishing-cloak, PhishAlwaysBlockSSLMismatch, |
|
6 |
+ PhishAlwaysBlockCloak) |
|
7 |
+ - rename alldomains/stricturl option to PhishingRestrictedScan |
|
8 |
+ - update man pages for phishing options |
|
9 |
+ - update clamd/clamscan/cfgparser for new phishing options |
|
10 |
+ - add log lines for non-default phishing options |
|
11 |
+ |
|
1 | 12 |
Tue Jan 9 21:04:03 CET 2007 (tk) |
2 | 13 |
--------------------------------- |
3 | 14 |
* libclamav: dynamic configuration support |
... | ... |
@@ -313,6 +313,13 @@ int main(int argc, char **argv) |
313 | 313 |
else |
314 | 314 |
logg("Not loading phishing signatures.\n"); |
315 | 315 |
|
316 |
+#ifdef CL_EXPERIMENTAL |
|
317 |
+ if(cfgopt(copt,"PhishingScanURLs")->enabled) |
|
318 |
+ dboptions |= CL_DB_PHISHING_URLS; |
|
319 |
+ else |
|
320 |
+ logg("Disabling URL based phishing detection.\n"); |
|
321 |
+#endif |
|
322 |
+ |
|
316 | 323 |
if(cfgopt(copt, "NodalCoreAcceleration")->enabled) { |
317 | 324 |
#ifdef HAVE_NCORE |
318 | 325 |
dboptions |= CL_DB_NCORE; |
... | ... |
@@ -299,12 +299,6 @@ int acceptloop_th(int *socketds, int nsockets, struct cl_node *root, unsigned in |
299 | 299 |
logg("*Listening daemon: PID: %d\n", getpid()); |
300 | 300 |
max_threads = cfgopt(copt, "MaxThreads")->numarg; |
301 | 301 |
|
302 |
-#ifdef CL_EXPERIMENTAL |
|
303 |
- if(cfgopt(copt,"PhishingScanURLs")->enabled) |
|
304 |
- dboptions |= CL_DB_PHISHING_URLS; |
|
305 |
- if(cfgopt(copt,"PhishingStrictURLCheck")->enabled) |
|
306 |
- options |= CL_SCAN_PHISHING_DOMAINLIST; |
|
307 |
-#endif |
|
308 | 302 |
|
309 | 303 |
if(cfgopt(copt, "ScanArchive")->enabled || cfgopt(copt, "ClamukoScanArchive")->enabled) { |
310 | 304 |
|
... | ... |
@@ -422,6 +416,34 @@ int acceptloop_th(int *socketds, int nsockets, struct cl_node *root, unsigned in |
422 | 422 |
logg("HTML support disabled.\n"); |
423 | 423 |
} |
424 | 424 |
|
425 |
+#ifdef CL_EXPERIMENTAL |
|
426 |
+ if(cfgopt(copt,"PhishingScanURLs")->enabled) { |
|
427 |
+ |
|
428 |
+ if(cfgopt(copt,"PhishingRestrictedScan")->enabled) { |
|
429 |
+ /* we don't scan urls from all domains, just those listed in |
|
430 |
+ * .pdb file. This is the safe default |
|
431 |
+ */ |
|
432 |
+ options |= CL_SCAN_PHISHING_DOMAINLIST; |
|
433 |
+ } else { |
|
434 |
+ /* This is a false positive prone option, since newsletters, etc. |
|
435 |
+ * often contain links that will be classified as phishing attempts, |
|
436 |
+ * even though the site they link to isn't a phish site. |
|
437 |
+ */ |
|
438 |
+ logg("Phishing: Checking all URLs, regardless of domain (FP prone).\n"); |
|
439 |
+ } |
|
440 |
+ |
|
441 |
+ if(cfgopt(copt,"PhishingAlwaysBlockCloak")->enabled) { |
|
442 |
+ options |= CL_SCAN_PHISHING_BLOCKCLOAK; |
|
443 |
+ logg("Phishing: Always checking for cloaked urls\n"); |
|
444 |
+ } |
|
445 |
+ |
|
446 |
+ if(cfgopt(copt,"PhishingAlwaysBlockSSLMismatch")->enabled) { |
|
447 |
+ options |= CL_SCAN_PHISHING_BLOCKSSL; |
|
448 |
+ logg("Phishing: Always checking for ssl mismatches\n"); |
|
449 |
+ } |
|
450 |
+ } |
|
451 |
+#endif /* CL_EXPERIMENTAL */ |
|
452 |
+ |
|
425 | 453 |
selfchk = cfgopt(copt, "SelfCheck")->numarg; |
426 | 454 |
if(!selfchk) { |
427 | 455 |
logg("Self checking disabled.\n"); |
... | ... |
@@ -263,7 +263,9 @@ void help(void) |
263 | 263 |
mprintf(" --no-phishing-sigs Disable signature-based phishing detection\n"); |
264 | 264 |
#ifdef CL_EXPERIMENTAL |
265 | 265 |
mprintf(" --no-phishing-scan-urls Disable url-based phishing detection\n"); |
266 |
- mprintf(" --phishing-strict-url-check Enable phishing detection for all domains (might lead to false positives!)\n"); |
|
266 |
+ mprintf(" --no-phishing-restrictedscan Enable phishing detection for all domains (might lead to false positives!)\n"); |
|
267 |
+ mprintf(" --phishing-ssl Always block SSL mismatches in URLs (phishing module)\n"); |
|
268 |
+ mprintf(" --phishing-cloak Always block cloaked URLs (phishing module)\n"); |
|
267 | 269 |
#endif |
268 | 270 |
mprintf(" --no-algorithmic Disable algorithmic detection\n"); |
269 | 271 |
mprintf(" --no-pe Disable PE analysis\n"); |
... | ... |
@@ -80,8 +80,10 @@ static struct option clamscan_longopt[] = { |
80 | 80 |
{"mail-follow-urls", 0, 0, 0}, |
81 | 81 |
{"no-phishing-sigs", 0, 0, 0}, |
82 | 82 |
#ifdef CL_EXPERIMENTAL |
83 |
- {"no-phishing-scan-urls",0,0,0}, |
|
84 |
- {"phishing-strict-url-check",0,0,0}, |
|
83 |
+ {"no-phishing-scan-urls", 0, 0, 0}, |
|
84 |
+ {"no-phishing-restrictedscan", 0, 0, 0}, |
|
85 |
+ {"phishing-ssl", 0, 0, 0}, |
|
86 |
+ {"phishing-cloak", 0, 0, 0}, |
|
85 | 87 |
#endif |
86 | 88 |
{"no-algorithmic", 0, 0, 0}, |
87 | 89 |
{"unzip", 2, 0, 0}, |
... | ... |
@@ -95,8 +95,16 @@ int scanmanager(const struct optstruct *opt) |
95 | 95 |
#ifdef CL_EXPERIMENTAL |
96 | 96 |
if(!opt_check(opt,"no-phishing-scan-urls")) |
97 | 97 |
dboptions |= CL_DB_PHISHING_URLS; |
98 |
- if(opt_check(opt,"phishing-strict-url-check")) |
|
99 |
- options |= CL_SCAN_PHISHING_DOMAINLIST; |
|
98 |
+ if(!opt_check(opt,"no-phishing-restrictedscan")) { |
|
99 |
+ /* not scanning all domains, check only URLs with domains from .pdb */ |
|
100 |
+ dboptions |= CL_SCAN_PHISHING_DOMAINLIST; |
|
101 |
+ } |
|
102 |
+ if(opt_check(opt,"phishing-ssl")) { |
|
103 |
+ dboptions |= CL_SCAN_PHISHING_BLOCKSSL; |
|
104 |
+ } |
|
105 |
+ if(opt_check(opt,"phishing-cloak")) { |
|
106 |
+ dboptions |= CL_SCAN_PHISHING_BLOCKCLOAK; |
|
107 |
+ } |
|
100 | 108 |
#endif |
101 | 109 |
|
102 | 110 |
if(opt_check(opt, "dev-ac-only")) { |
... | ... |
@@ -76,9 +76,15 @@ Disable detection of phishing messages. |
76 | 76 |
\fB\-\-no\-phishing\-scan\-urls\fR |
77 | 77 |
Disable url-based phishing detection. (Only available in experimental builds) |
78 | 78 |
.TP |
79 |
-\fB\-\-phish\-scan\-alldomains\fR |
|
79 |
+\fB\-\-no\-phishing\-restrictedscan\fR |
|
80 | 80 |
Enable phishing detection for all domains (might lead to false positives!).(Only available in experimental builds) |
81 | 81 |
.TP |
82 |
+\fB\-\-phishing\-ssl\fR |
|
83 |
+Always block SSL mismatches in URLs (might lead to false positives!). (Only available in experimental builds) |
|
84 |
+.TP |
|
85 |
+\fB\-\-phishing\-cloak\fR |
|
86 |
+Always block cloaked URLs (might lead to some false positives). (Only available in experimental builds) |
|
87 |
+.TP |
|
82 | 88 |
\fB\-\-no\-algo\fR |
83 | 89 |
In some cases (eg. complex malware, exploits in graphic files, and others), ClamAV uses special algorithms to provide accurate detection. This option disables the algorithmic detection. |
84 | 90 |
.TP |
... | ... |
@@ -222,17 +222,33 @@ LocalSocket /tmp/clamd |
222 | 222 |
# Default: yes |
223 | 223 |
#PhishingSignatures yes |
224 | 224 |
|
225 |
-# Use phishing detection for all domains (not just those listed in the .pdb database). |
|
226 |
-# It is not recommended to turn this option on, it is mean for internal use. |
|
227 |
-# (available in experimental builds only) |
|
228 |
-# Default: no |
|
229 |
-#PhishingStrictURLCheck no |
|
230 | 225 |
|
231 | 226 |
# Scan urls found in mails for phishing attempts. |
232 | 227 |
# (available in experimental builds only) |
233 | 228 |
# Default: yes |
234 | 229 |
#PhishingScanURLs yes |
235 | 230 |
|
231 |
+# Use phishing detection only for domains listed in the .pdb database. It is |
|
232 |
+# not recommended to have this option turned off, because scanning of all |
|
233 |
+# domains may lead to many false positives! |
|
234 |
+# (available in experimental builds only) |
|
235 |
+# Default: yes |
|
236 |
+#PhishingRestrictedScan yes |
|
237 |
+ |
|
238 |
+# Always block SSL mismatches in URLs, even if the URL isn't in the database. |
|
239 |
+# This can lead to false positives. |
|
240 |
+# (available in experimental builds only) |
|
241 |
+# |
|
242 |
+# Default: no |
|
243 |
+#PhishingAlwaysBlockSSLMismatch no |
|
244 |
+ |
|
245 |
+# Always block cloaked URLs, even if URL isn't in database. |
|
246 |
+# There is a remote possibility, that this leads to false positives. |
|
247 |
+# (available in experimental builds only) |
|
248 |
+# |
|
249 |
+# Default: no |
|
250 |
+#PhishingAlwaysBlockCloak no |
|
251 |
+ |
|
236 | 252 |
## |
237 | 253 |
## HTML |
238 | 254 |
## |
... | ... |
@@ -88,8 +88,10 @@ extern "C" |
88 | 88 |
#define CL_SCAN_MAILURL 0x80 |
89 | 89 |
#define CL_SCAN_BLOCKMAX 0x100 |
90 | 90 |
#define CL_SCAN_ALGO 0x200 |
91 |
-#define CL_SCAN_PHISHING_DOMAINLIST 0x800 |
|
92 |
-#define CL_SCAN_ELF 0x1000 |
|
91 |
+#define CL_SCAN_PHISHING_DOMAINLIST 0x400 |
|
92 |
+#define CL_SCAN_PHISHING_BLOCKSSL 0x800 /* ssl mismatches, not ssl by itself*/ |
|
93 |
+#define CL_SCAN_PHISHING_BLOCKCLOAK 0x1000 |
|
94 |
+#define CL_SCAN_ELF 0x2000 |
|
93 | 95 |
|
94 | 96 |
/* recommended scan settings */ |
95 | 97 |
#define CL_SCAN_STDOPT (CL_SCAN_ARCHIVE | CL_SCAN_MAIL | CL_SCAN_OLE2 | CL_SCAN_HTML | CL_SCAN_PE | CL_SCAN_ALGO | CL_SCAN_ELF) |
... | ... |
@@ -19,6 +19,9 @@ |
19 | 19 |
* MA 02110-1301, USA. |
20 | 20 |
* |
21 | 21 |
* $Log: phishcheck.c,v $ |
22 |
+ * Revision 1.17 2007/01/12 17:29:09 tkojm |
|
23 |
+ * phishing patch from Edwin (closes bb#157, #174, #222, #224) |
|
24 |
+ * |
|
22 | 25 |
* Revision 1.16 2006/12/20 01:23:50 tkojm |
23 | 26 |
* options cleanup |
24 | 27 |
* |
... | ... |
@@ -268,6 +271,15 @@ For the Whitelist(.wdb)/Domainlist(.pdb) format see regex_list.c (search for Fla |
268 | 268 |
|
269 | 269 |
/* Constant strings and tables */ |
270 | 270 |
static char empty_string[]=""; |
271 |
+ |
|
272 |
+#define ANY_CLOAK "(0[xX])?([a-fA-F0-9]+\\.?)+" |
|
273 |
+#define CLOAK_REGEX_HEXURL "("ANY_CLOAK")?0[xX][a-fA-F0-9]+\\.?"ANY_CLOAK |
|
274 |
+#define OCTAL_CLOAK "("ANY_CLOAK")?000[0-9]+\\.?"ANY_CLOAK |
|
275 |
+#define DWORD_CLOAK "[0-9]{8,}" |
|
276 |
+ |
|
277 |
+static const char cloaked_host_regex[] = "^(("CLOAK_REGEX_HEXURL")|("OCTAL_CLOAK")|("DWORD_CLOAK"))$"; |
|
278 |
+ |
|
279 |
+ |
|
271 | 280 |
static const char tld_regex[] = "^"iana_tld"$"; |
272 | 281 |
static const char cctld_regex[] = "^"iana_cctld"$"; |
273 | 282 |
static const char dotnet[] = ".net"; |
... | ... |
@@ -944,6 +956,12 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs) |
944 | 944 |
continue; |
945 | 945 |
if (ctx->options&CL_SCAN_PHISHING_DOMAINLIST) |
946 | 946 |
urls.flags |= DOMAINLIST_REQUIRED; |
947 |
+ if (ctx->options & CL_SCAN_PHISHING_BLOCKSSL) { |
|
948 |
+ urls.always_check_flags |= CHECK_SSL; |
|
949 |
+ } |
|
950 |
+ if (ctx->options & CL_SCAN_PHISHING_BLOCKCLOAK) { |
|
951 |
+ urls.always_check_flags |= CHECK_CLOAKING; |
|
952 |
+ } |
|
947 | 953 |
string_init_c(&urls.realLink,(char*)hrefs->value[i]); |
948 | 954 |
/* if(!hrefs->contents[i]->isClosed) { |
949 | 955 |
blobAddData(hrefs->contents[i],empty_string,1); |
... | ... |
@@ -1035,7 +1053,10 @@ static char* str_compose(const char* a,const char* b,const char* c) |
1035 | 1035 |
|
1036 | 1036 |
static inline char hex2int(const unsigned char* src) |
1037 | 1037 |
{ |
1038 |
- return hextable[src[0]]<<4 | hextable[src[1]]; |
|
1038 |
+ return (src[0] == '0' && src[1] == '0') ? |
|
1039 |
+ 0x1 :/* don't convert %00 to \0, use 0x1 |
|
1040 |
+ * this value is also used by cloak check*/ |
|
1041 |
+ hextable[src[0]]<<4 | hextable[src[1]]; |
|
1039 | 1042 |
} |
1040 | 1043 |
|
1041 | 1044 |
static void free_regex(regex_t* p) |
... | ... |
@@ -1066,6 +1087,12 @@ int phishing_init(struct cl_engine* engine) |
1066 | 1066 |
|
1067 | 1067 |
cli_dbgmsg("Initializing phishcheck module\n"); |
1068 | 1068 |
|
1069 |
+ if(build_regex(&pchk->preg_hexurl,cloaked_host_regex,1)) { |
|
1070 |
+ free(pchk); |
|
1071 |
+ engine->phishcheck = NULL; |
|
1072 |
+ return CL_EFORMAT; |
|
1073 |
+ } |
|
1074 |
+ |
|
1069 | 1075 |
if(build_regex(&pchk->preg_cctld,cctld_regex,1)) { |
1070 | 1076 |
free(pchk); |
1071 | 1077 |
engine->phishcheck = NULL; |
... | ... |
@@ -1106,6 +1133,7 @@ void phishing_done(struct cl_engine* engine) |
1106 | 1106 |
cli_dbgmsg("Cleaning up phishcheck\n"); |
1107 | 1107 |
if(pchk && !pchk->is_disabled) { |
1108 | 1108 |
free_regex(&pchk->preg); |
1109 |
+ free_regex(&pchk->preg_hexurl); |
|
1109 | 1110 |
free_regex(&pchk->preg_cctld); |
1110 | 1111 |
free_regex(&pchk->preg_tld); |
1111 | 1112 |
free_regex(&pchk->preg_numeric); |
... | ... |
@@ -1167,7 +1195,8 @@ int url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url |
1167 | 1167 |
string_free(host); |
1168 | 1168 |
return CL_PHISH_TEXTURL; |
1169 | 1169 |
} |
1170 |
- if(isReal && (!strncmp(host->data,"0x",2) || !strncmp(host->data,"0X",2))) { |
|
1170 |
+ if(!regexec(&pchk->preg_hexurl,host->data,0,NULL,0)) { |
|
1171 |
+ /* use a regex here, so that we don't accidentally block 0xacab.net style hosts */ |
|
1171 | 1172 |
string_free(host); |
1172 | 1173 |
return CL_PHISH_HEX_URL; |
1173 | 1174 |
} |
... | ... |
@@ -1217,6 +1246,7 @@ int whitelist_check(const struct cl_engine* engine,struct url_check* urls,int ho |
1217 | 1217 |
return whitelist_match(engine,urls->realLink.data,urls->displayLink.data,hostOnly); |
1218 | 1218 |
} |
1219 | 1219 |
|
1220 |
+ |
|
1220 | 1221 |
/* urls can't contain null pointer, caller must ensure this */ |
1221 | 1222 |
enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls) |
1222 | 1223 |
{ |
... | ... |
@@ -1267,17 +1297,23 @@ enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* |
1267 | 1267 |
if(domainlist_match(engine,urls->displayLink.data,urls->realLink.data,1,&urls->flags)) |
1268 | 1268 |
phishy |= DOMAIN_LISTED; |
1269 | 1269 |
else { |
1270 |
+ } |
|
1271 |
+ } |
|
1272 |
+ } |
|
1273 |
+ |
|
1274 |
+ if(urls->flags & DOMAINLIST_REQUIRED && !(phishy & DOMAIN_LISTED) ) { |
|
1275 |
+ urls->flags &= urls->always_check_flags; |
|
1276 |
+ if(!urls->flags) { |
|
1270 | 1277 |
free_if_needed(&host_url); |
1271 | 1278 |
return CL_PHISH_HOST_NOT_LISTED; |
1272 | 1279 |
} |
1273 | 1280 |
} |
1274 |
- } |
|
1275 | 1281 |
|
1276 | 1282 |
if(urls->flags&CHECK_CLOAKING) { |
1277 | 1283 |
/*Checks if URL is cloaked. |
1278 | 1284 |
Should we check if it containts another http://, https://? |
1279 | 1285 |
No because we might get false positives from redirect services.*/ |
1280 |
- if(strstr(urls->realLink.data,"%00")) { |
|
1286 |
+ if(strchr(urls->realLink.data,'\0x1')) { |
|
1281 | 1287 |
free_if_needed(&host_url); |
1282 | 1288 |
return CL_PHISH_CLOAKED_NULL; |
1283 | 1289 |
} |
... | ... |
@@ -1287,6 +1323,7 @@ enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* |
1287 | 1287 |
} |
1288 | 1288 |
} |
1289 | 1289 |
|
1290 |
+ |
|
1290 | 1291 |
if(urls->displayLink.data[0]=='\0') { |
1291 | 1292 |
free_if_needed(&host_url); |
1292 | 1293 |
return CL_PHISH_CLEAN; |
... | ... |
@@ -1297,12 +1334,22 @@ enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* |
1297 | 1297 |
return CL_PHISH_SSL_SPOOF; |
1298 | 1298 |
} |
1299 | 1299 |
|
1300 |
+ if(!urls->flags&CHECK_CLOAKING && urls->flags & DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED) ) { |
|
1301 |
+ free_if_needed(&host_url); |
|
1302 |
+ return CL_PHISH_HOST_NOT_LISTED; |
|
1303 |
+ } |
|
1304 |
+ |
|
1300 | 1305 |
if((rc = url_get_host(pchk, urls,&host_url,DOMAIN_REAL,&phishy))) |
1301 | 1306 |
{ |
1302 | 1307 |
free_if_needed(&host_url); |
1303 | 1308 |
return rc; |
1304 | 1309 |
} |
1305 | 1310 |
|
1311 |
+ if(urls->flags&DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED)) { |
|
1312 |
+ free_if_needed(&host_url); |
|
1313 |
+ return CL_PHISH_HOST_NOT_LISTED; |
|
1314 |
+ } |
|
1315 |
+ |
|
1306 | 1316 |
if(!strncmp(urls->displayLink.data,cid,cid_len))/* cid: image */{ |
1307 | 1317 |
free_if_needed(&host_url); |
1308 | 1318 |
return CL_PHISH_CLEAN_CID; |
... | ... |
@@ -62,6 +62,7 @@ struct phishcheck { |
62 | 62 |
regex_t preg_tld; |
63 | 63 |
regex_t preg_cctld; |
64 | 64 |
regex_t preg_numeric; |
65 |
+ regex_t preg_hexurl; |
|
65 | 66 |
char* url_regex; |
66 | 67 |
int is_disabled; |
67 | 68 |
}; |
... | ... |
@@ -70,6 +71,7 @@ struct url_check { |
70 | 70 |
struct string realLink; |
71 | 71 |
struct string displayLink; |
72 | 72 |
unsigned short flags; |
73 |
+ unsigned short always_check_flags; |
|
73 | 74 |
}; |
74 | 75 |
|
75 | 76 |
int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs); |
... | ... |
@@ -52,7 +52,11 @@ struct cfgoption cfg_options[] = { |
52 | 52 |
{"PhishingSignatures", OPT_BOOL, 1, NULL, 0, OPT_CLAMD}, |
53 | 53 |
#ifdef CL_EXPERIMENTAL |
54 | 54 |
{"PhishingScanURLs",OPT_BOOL, 1, NULL, 0, OPT_CLAMD}, |
55 |
- {"PhishingStrictURLCheck", OPT_BOOL, 0, NULL, 0, OPT_CLAMD}, |
|
55 |
+ /* these are FP prone options, if default isn't used */ |
|
56 |
+ {"PhishingAlwaysBlockCloak", OPT_BOOL, 0, NULL, 0, OPT_CLAMD}, |
|
57 |
+ {"PhishingAlwaysBlockSSLMismatch", OPT_BOOL, 0, NULL, 0, OPT_CLAMD}, |
|
58 |
+ {"PhishingRestrictedScan", OPT_BOOL, 1, NULL, 0, OPT_CLAMD}, |
|
59 |
+ /* end of FP prone options */ |
|
56 | 60 |
#endif |
57 | 61 |
{"AlgorithmicDetection", OPT_BOOL, 1, NULL, 0, OPT_CLAMD}, |
58 | 62 |
{"ScanHTML", OPT_BOOL, 1, NULL, 0, OPT_CLAMD}, |