git-svn: trunk@5015
Török Edvin authored on 2009/04/03 05:36:27... | ... |
@@ -1,3 +1,9 @@ |
1 |
+Thu Apr 2 23:35:36 EEST 2009 (edwin) |
|
2 |
+------------------------------------- |
|
3 |
+ * docs/phishsigs_howto.tex, libclamav/phishcheck.c, |
|
4 |
+ libclamav/readdb.c, libclamav/regex_list.c: local.gdb whitelisting |
|
5 |
+ of safebrowsing entries (bb #1482). |
|
6 |
+ |
|
1 | 7 |
Thu Apr 2 22:59:30 EEST 2009 (edwin) |
2 | 8 |
------------------------------------- |
3 | 9 |
* libclamav/htmlnorm.c, libclamav/htmlnorm.h, libclamav/mbox.c, |
... | ... |
@@ -57,6 +57,7 @@ S1:P:HostPrefix[:FuncLevelSpec] |
57 | 57 |
S1:F:Sha256hash[:FuncLevelSpec] |
58 | 58 |
S2:P:HostPrefix[:FuncLevelSpec] |
59 | 59 |
S2:F:Sha256hash[:FuncLevelSpec] |
60 |
+S:W:Sha256hash[:FuncLevelSpec] |
|
60 | 61 |
\end{verbatim} |
61 | 62 |
|
62 | 63 |
\begin{description} |
... | ... |
@@ -67,6 +68,8 @@ S2:F:Sha256hash[:FuncLevelSpec] |
67 | 67 |
\item [{S1:}] |
68 | 68 |
Hashes for blacklisting phishing sites. |
69 | 69 |
Virus name: Phishing.URL.Blacklisted |
70 |
+ \item [{S:W}] |
|
71 |
+ Locally whitelisted hashes. |
|
70 | 72 |
\item [{HostPrefix}] |
71 | 73 |
4-byte prefix of the sha256 hash of the last 2 or 3 components of the hostname. |
72 | 74 |
If prefix doesn't match, no further lookups are performed. |
... | ... |
@@ -76,7 +79,8 @@ If prefix doesn't match, no further lookups are performed. |
76 | 76 |
|
77 | 77 |
To see which hash/URL matched, look at the \verb+clamscan --debug+ output, and look for the following strings: |
78 | 78 |
\verb+Looking up hash+, \verb+prefix matched+, and \verb+Hash matched+. |
79 |
-Local whitelisting of .gdb entries can be done by creating .wdb entries. |
|
79 |
+Local whitelisting of .gdb entries can be done by creating a local.gdb file, and |
|
80 |
+adding a line \verb+S:W:<HASH>+. |
|
80 | 81 |
|
81 | 82 |
\subsection{WDB format} |
82 | 83 |
This file contains whitelisted url pairs |
... | ... |
@@ -1206,6 +1206,9 @@ static int hash_match(const struct regex_matcher *rlist, const char *host, size_ |
1206 | 1206 |
} |
1207 | 1207 |
if (cli_bm_scanbuff(sha256_dig, 32, &virname, &rlist->sha256_hashes,0,0,-1) == CL_VIRUS) { |
1208 | 1208 |
switch(*virname) { |
1209 |
+ case 'W': |
|
1210 |
+ cli_dbgmsg("Hash is whitelisted, skipping\n"); |
|
1211 |
+ break; |
|
1209 | 1212 |
case '1': |
1210 | 1213 |
return CL_PHISH_HASH1; |
1211 | 1214 |
case '2': |
... | ... |
@@ -1413,7 +1416,7 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url |
1413 | 1413 |
{ |
1414 | 1414 |
struct url_check host_url; |
1415 | 1415 |
int rc = CL_PHISH_NODECISION; |
1416 |
- int phishy=0, blacklisted=0; |
|
1416 |
+ int phishy=0; |
|
1417 | 1417 |
const struct phishcheck* pchk = (const struct phishcheck*) engine->phishcheck; |
1418 | 1418 |
|
1419 | 1419 |
if(!urls->realLink.data) |
... | ... |
@@ -1436,10 +1439,14 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url |
1436 | 1436 |
return CL_PHISH_CLEAN; |
1437 | 1437 |
} else { |
1438 | 1438 |
cli_dbgmsg("Hash matched for: %s\n", urls->realLink.data); |
1439 |
- blacklisted = rc; |
|
1439 |
+ return rc; |
|
1440 | 1440 |
} |
1441 | 1441 |
} |
1442 | 1442 |
|
1443 |
+ if (urls->displayLink.data[0] == '\0') { |
|
1444 |
+ return CL_PHISH_CLEAN; |
|
1445 |
+ } |
|
1446 |
+ |
|
1443 | 1447 |
if((rc = cleanupURLs(urls))) { |
1444 | 1448 |
/* it can only return an error, or say its clean; |
1445 | 1449 |
* it is not allowed to decide it is phishing */ |
... | ... |
@@ -1453,20 +1460,12 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url |
1453 | 1453 |
( (phishy&PHISHY_NUMERIC_IP && !isNumericURL(pchk, urls->displayLink.data)) || |
1454 | 1454 |
!(phishy&PHISHY_NUMERIC_IP))) { |
1455 | 1455 |
cli_dbgmsg("Displayed 'url' is not url:%s\n",urls->displayLink.data); |
1456 |
- if (!blacklisted) |
|
1457 |
- return CL_PHISH_CLEAN; |
|
1456 |
+ return CL_PHISH_CLEAN; |
|
1458 | 1457 |
} |
1459 | 1458 |
|
1460 | 1459 |
if(whitelist_check(engine, urls, 0)) |
1461 | 1460 |
return CL_PHISH_CLEAN;/* if url is whitelisted don't perform further checks */ |
1462 | 1461 |
|
1463 |
- if (blacklisted) |
|
1464 |
- return blacklisted; |
|
1465 |
- |
|
1466 |
- if (urls->displayLink.data[0] == '\0') { |
|
1467 |
- return CL_PHISH_CLEAN; |
|
1468 |
- } |
|
1469 |
- |
|
1470 | 1462 |
url_check_init(&host_url); |
1471 | 1463 |
|
1472 | 1464 |
if((rc = url_get_host(urls, &host_url, DOMAIN_DISPLAY, &phishy))) { |
... | ... |
@@ -1585,6 +1585,13 @@ static int cli_loaddbdir(const char *dirname, struct cl_engine *engine, unsigned |
1585 | 1585 |
return ret; |
1586 | 1586 |
} |
1587 | 1587 |
|
1588 |
+ /* try to load local.gdb next */ |
|
1589 |
+ sprintf(dbfile, "%s/local.gdb", dirname); |
|
1590 |
+ if(!access(dbfile, R_OK) && (ret = cli_load(dbfile, engine, signo, options, NULL))) { |
|
1591 |
+ free(dbfile); |
|
1592 |
+ return ret; |
|
1593 |
+ } |
|
1594 |
+ |
|
1588 | 1595 |
/* check for and load daily.cfg */ |
1589 | 1596 |
sprintf(dbfile, "%s/daily.cfg", dirname); |
1590 | 1597 |
if(!access(dbfile, R_OK) && (ret = cli_load(dbfile, engine, signo, options, NULL))) { |
... | ... |
@@ -432,18 +432,13 @@ static int add_hash(struct regex_matcher *matcher, char* pattern, const char fl, |
432 | 432 |
int rc; |
433 | 433 |
struct cli_bm_patt *pat = mpool_calloc(matcher->mempool, 1, sizeof(*pat)); |
434 | 434 |
struct cli_matcher *bm; |
435 |
+ const char *vname = NULL; |
|
435 | 436 |
if(!pat) |
436 | 437 |
return CL_EMEM; |
437 | 438 |
pat->pattern = (unsigned char*)cli_mpool_hex2str(matcher->mempool, pattern); |
438 | 439 |
if(!pat->pattern) |
439 | 440 |
return CL_EMALFDB; |
440 | 441 |
pat->length = 32; |
441 |
- pat->virname = mpool_malloc(matcher->mempool, 1); |
|
442 |
- if(!pat->virname) { |
|
443 |
- free(pat); |
|
444 |
- return CL_EMEM; |
|
445 |
- } |
|
446 |
- *pat->virname = fl; |
|
447 | 442 |
if (is_prefix) { |
448 | 443 |
pat->length=4; |
449 | 444 |
bm = &matcher->hostkey_prefix; |
... | ... |
@@ -451,6 +446,23 @@ static int add_hash(struct regex_matcher *matcher, char* pattern, const char fl, |
451 | 451 |
bm = &matcher->sha256_hashes; |
452 | 452 |
} |
453 | 453 |
|
454 |
+ if (fl != 'W' && pat->length == 32 && |
|
455 |
+ cli_bm_scanbuff(pat->pattern, 32, &vname, &matcher->sha256_hashes,0,0,-1) == CL_VIRUS) { |
|
456 |
+ if (*vname == 'W') { |
|
457 |
+ /* hash is whitelisted in local.gdb */ |
|
458 |
+ cli_dbgmsg("Skipping hash %s\n", pattern); |
|
459 |
+ mpool_free(matcher->mempool, pat->pattern); |
|
460 |
+ mpool_free(matcher->mempool, pat); |
|
461 |
+ return CL_SUCCESS; |
|
462 |
+ } |
|
463 |
+ } |
|
464 |
+ pat->virname = mpool_malloc(matcher->mempool, 1); |
|
465 |
+ if(!pat->virname) { |
|
466 |
+ free(pat); |
|
467 |
+ return CL_EMEM; |
|
468 |
+ } |
|
469 |
+ *pat->virname = fl; |
|
470 |
+ |
|
454 | 471 |
if((rc = cli_bm_addpatt(bm, pat))) { |
455 | 472 |
cli_errmsg("add_hash: failed to add BM pattern\n"); |
456 | 473 |
free(pat->pattern); |
... | ... |
@@ -550,9 +562,11 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int *sign |
550 | 550 |
/*matches displayed host*/ |
551 | 551 |
if (( rc = add_static_pattern(matcher, pattern) )) |
552 | 552 |
return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB; |
553 |
- } else if (buffer[0] == 'S' && !is_whitelist) { |
|
553 |
+ } else if (buffer[0] == 'S' && (!is_whitelist || pattern[0]=='W')) { |
|
554 | 554 |
pattern[pattern_len] = '\0'; |
555 |
- if((pattern[0]=='F' || pattern[0]=='P') && pattern[1]==':') { |
|
555 |
+ if (pattern[0]=='W') |
|
556 |
+ flags[0]='W'; |
|
557 |
+ if((pattern[0]=='W' || pattern[0]=='F' || pattern[0]=='P') && pattern[1]==':') { |
|
556 | 558 |
pattern += 2; |
557 | 559 |
if (( rc = add_hash(matcher, pattern, flags[0], pattern[-2] == 'P') )) { |
558 | 560 |
cli_errmsg("Error loading at line: %d\n", line); |