git-svn: trunk@4953
Török Edvin authored on 2009/03/16 22:41:23... | ... |
@@ -1,3 +1,8 @@ |
1 |
+Mon Mar 16 15:41:17 EET 2009 (edwin) |
|
2 |
+------------------------------------ |
|
3 |
+ * docs/phishsigs_howto.tex, libclamav/phishcheck.c: document URL |
|
4 |
+ blacklisting, and whitelisting (bb #1458). |
|
5 |
+ |
|
1 | 6 |
Mon Mar 16 14:44:25 EET 2009 (edwin) |
2 | 7 |
------------------------------------ |
3 | 8 |
* clamdtop/clamdtop.c: fix warning |
... | ... |
@@ -37,12 +37,7 @@ H[Filter]:DisplayedHostname[:FuncLevelSpec] |
37 | 37 |
\item or a subdomain of the specified hostname |
38 | 38 |
\item to avoid false matches in case of subdomain matches, the engine checks that there is a dot(\verb+.+) or a space(\verb+ +) before the matched portion |
39 | 39 |
\end{itemize} |
40 |
- \item [{Filter}] an (optional) 3-digit hexadecimal number representing flags that should be filtered. |
|
41 |
- \begin{itemize} |
|
42 |
- \item flag filtering only makes sense in .pdb files. (however clamav won't complain if you put flags in .wdb files, it will just skip them) |
|
43 |
- \item for details on how to construct a flag number see section \prettyref{sec:Flags} |
|
44 |
- \end{itemize} |
|
45 |
- |
|
40 |
+ \item [{Filter}] is ignored for R and H for compatibility reasons |
|
46 | 41 |
\item [{\textsc{RealURL}}] is the URL the user is sent to, example: \emph{href} attribute of an html anchor (\emph{<a> tag}) |
47 | 42 |
\item [{\textsc{DisplayedURL}}] is the URL description displayed to the user, where its \emph{claimed} they are sent, example: contents of an html anchor (\emph{<a> tag}) |
48 | 43 |
\item [{DisplayedHostname}] is the hostname portion of the \textsc{DisplayedURL} |
... | ... |
@@ -53,6 +48,36 @@ H[Filter]:DisplayedHostname[:FuncLevelSpec] |
53 | 53 |
\end{itemize} |
54 | 54 |
\end{description} |
55 | 55 |
|
56 |
+\subsection{GDB format} |
|
57 |
+This file contains URL hashes in the following format: |
|
58 |
+\begin{verbatim} |
|
59 |
+S:P:HostPrefix[:FuncLevelSpec] |
|
60 |
+S:F:Sha256hash[:FuncLevelSpec] |
|
61 |
+S1:P:HostPrefix[:FuncLevelSpec] |
|
62 |
+S1:F:Sha256hash[:FuncLevelSpec] |
|
63 |
+S2:P:HostPrefix[:FuncLevelSpec] |
|
64 |
+S2:F:Sha256hash[:FuncLevelSpec] |
|
65 |
+\end{verbatim} |
|
66 |
+ |
|
67 |
+\begin{description} |
|
68 |
+ \item [{S:}] |
|
69 |
+ These are hashes for Google Safe Browsing - malware sites, and should not be used for other purposes. |
|
70 |
+ \item [{S2:}] |
|
71 |
+ These are hashes for Google Safe Browsing - phishing sites, and should not be used for other purposes. |
|
72 |
+ \item [{S1:}] |
|
73 |
+ Hashes for blacklisting phishing sites. |
|
74 |
+ Virus name: Phishing.URL.Blacklisted |
|
75 |
+ \item [{HostPrefix}] |
|
76 |
+ 4-byte prefix of the sha256 hash of the last 2 or 3 components of the hostname. |
|
77 |
+If prefix doesn't match, no further lookups are performed. |
|
78 |
+ \item [{Sha256hash}] |
|
79 |
+ sha256 hash of the canonicalized URL, or a sha256 hash of its prefix/suffix according to the Google Safe Browsing ``Performing Lookups'' rules. There should be a corresponding \verb+:P:HostkeyPrefix+ entry for the hash to be taken into consideration. |
|
80 |
+\end{description} |
|
81 |
+ |
|
82 |
+To see which hash/URL matched, look at the \verb+clamscan --debug+ output, and look for the following strings: |
|
83 |
+\verb+Looking up hash+, \verb+prefix matched+, and \verb+Hash matched+. |
|
84 |
+Local whitelisting of .gdb entries can be done by creating .wdb entries. |
|
85 |
+ |
|
56 | 86 |
\subsection{WDB format} |
57 | 87 |
This file contains whitelisted url pairs |
58 | 88 |
It contains lines in the following format: |
... | ... |
@@ -61,6 +61,7 @@ |
61 | 61 |
#define DOMAIN_LISTED 8 |
62 | 62 |
#define PHISHY_CLOAKED_NULL 16 |
63 | 63 |
|
64 |
+ |
|
64 | 65 |
/* |
65 | 66 |
* Phishing design documentation, |
66 | 67 |
(initially written at http://wiki.clamav.net/index.php/phishing_design as discussed with aCaB) |
... | ... |
@@ -1395,7 +1396,7 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url |
1395 | 1395 |
{ |
1396 | 1396 |
struct url_check host_url; |
1397 | 1397 |
int rc = CL_PHISH_NODECISION; |
1398 |
- int phishy=0; |
|
1398 |
+ int phishy=0, blacklisted=0; |
|
1399 | 1399 |
const struct phishcheck* pchk = (const struct phishcheck*) engine->phishcheck; |
1400 | 1400 |
|
1401 | 1401 |
if(!urls->realLink.data || urls->displayLink.data[0]=='\0') |
... | ... |
@@ -1413,11 +1414,13 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url |
1413 | 1413 |
} |
1414 | 1414 |
|
1415 | 1415 |
if(( rc = url_hash_match(engine->domainlist_matcher, urls->realLink.data, strlen(urls->realLink.data)) )) { |
1416 |
- if (rc == CL_PHISH_CLEAN) |
|
1416 |
+ if (rc == CL_PHISH_CLEAN) { |
|
1417 | 1417 |
cli_dbgmsg("not analyzing, not a real url: %s\n", urls->realLink.data); |
1418 |
- else |
|
1418 |
+ return CL_PHISH_CLEAN; |
|
1419 |
+ } else { |
|
1419 | 1420 |
cli_dbgmsg("Hash matched for: %s\n", urls->realLink.data); |
1420 |
- return rc; |
|
1421 |
+ blacklisted = rc; |
|
1422 |
+ } |
|
1421 | 1423 |
} |
1422 | 1424 |
|
1423 | 1425 |
if((rc = cleanupURLs(urls))) { |
... | ... |
@@ -1433,12 +1436,16 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url |
1433 | 1433 |
( (phishy&PHISHY_NUMERIC_IP && !isNumericURL(pchk, urls->displayLink.data)) || |
1434 | 1434 |
!(phishy&PHISHY_NUMERIC_IP))) { |
1435 | 1435 |
cli_dbgmsg("Displayed 'url' is not url:%s\n",urls->displayLink.data); |
1436 |
- return CL_PHISH_CLEAN; |
|
1436 |
+ if (!blacklisted) |
|
1437 |
+ return CL_PHISH_CLEAN; |
|
1437 | 1438 |
} |
1438 | 1439 |
|
1439 | 1440 |
if(whitelist_check(engine, urls, 0)) |
1440 | 1441 |
return CL_PHISH_CLEAN;/* if url is whitelisted don't perform further checks */ |
1441 | 1442 |
|
1443 |
+ if (blacklisted) |
|
1444 |
+ return blacklisted; |
|
1445 |
+ |
|
1442 | 1446 |
url_check_init(&host_url); |
1443 | 1447 |
|
1444 | 1448 |
if((rc = url_get_host(urls, &host_url, DOMAIN_DISPLAY, &phishy))) { |