git-svn: trunk@3044
Nigel Horne authored on 2007/05/02 03:00:28... | ... |
@@ -1,6 +1,10 @@ |
1 |
+Tue May 1 18:13:09 BST 2007 (njh) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav/mbox.c: more phish enabling code |
|
4 |
+ |
|
1 | 5 |
Tue May 1 19:00:00 EEST 2007 (edwin) |
2 | 6 |
---------------------------------- |
3 |
- * apply next set of patches for enabling phishing code |
|
7 |
+ * apply next set of patches for enabling phishing code |
|
4 | 8 |
|
5 | 9 |
Tue May 1 17:20:53 CEST 2007 (tk) |
6 | 10 |
---------------------------------- |
... | ... |
@@ -73,6 +73,9 @@ static char const rcsid[] = "$Id: mbox.c,v 1.381 2007/02/15 12:26:44 njh Exp $"; |
73 | 73 |
#include "str.h" |
74 | 74 |
#include "filetypes.h" |
75 | 75 |
#include "mbox.h" |
76 |
+#include "dconf.h" |
|
77 |
+ |
|
78 |
+#define DCONF_PHISHING mctx->ctx->dconf->phishing |
|
76 | 79 |
|
77 | 80 |
#ifdef CL_DEBUG |
78 | 81 |
|
... | ... |
@@ -141,13 +144,9 @@ typedef enum { |
141 | 141 |
* 301/302 redirects we wish to follow |
142 | 142 |
*/ |
143 | 143 |
|
144 |
-#ifdef FOLLOWURLS |
|
145 | 144 |
#include "htmlnorm.h" |
146 |
-#endif |
|
147 | 145 |
|
148 |
-#ifdef CL_EXPERIMENTAL |
|
149 | 146 |
#include "phishcheck.h" |
150 |
-#endif |
|
151 | 147 |
|
152 | 148 |
#ifndef C_WINDOWS |
153 | 149 |
#include <netdb.h> |
... | ... |
@@ -1954,9 +1953,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re |
1954 | 1954 |
message *mainMessage = messageIn; |
1955 | 1955 |
fileblob *fb; |
1956 | 1956 |
bool infected = FALSE; |
1957 |
-#ifdef CL_EXPERIMENTAL |
|
1958 |
- const int doPhishingScan = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS; /* || (mctx->ctx->options&CL_SCAN_PHISHING_GA_TRAIN) || (mctx->ctx->options&CL_SCAN_PHISHING_GA); kept here for the GA MERGE */ |
|
1959 |
-#endif |
|
1957 |
+ const int doPhishingScan = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE); |
|
1960 | 1958 |
|
1961 | 1959 |
cli_dbgmsg("in parseEmailBody\n"); |
1962 | 1960 |
|
... | ... |
@@ -2025,30 +2022,21 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re |
2025 | 2025 |
case NOMIME: |
2026 | 2026 |
cli_dbgmsg("Not a mime encoded message\n"); |
2027 | 2027 |
aText = textAddMessage(aText, mainMessage); |
2028 |
-#ifdef CL_EXPERIMENTAL |
|
2029 | 2028 |
if(!doPhishingScan) |
2030 | 2029 |
break; |
2031 | 2030 |
/* |
2032 | 2031 |
* Fall through: some phishing mails claim they are |
2033 | 2032 |
* text/plain, when they are in fact html |
2034 | 2033 |
*/ |
2035 |
-#else |
|
2036 |
- break; |
|
2037 |
-#endif |
|
2038 | 2034 |
case TEXT: |
2039 | 2035 |
/* text/plain has been preprocessed as no encoding */ |
2040 |
-#ifdef CL_EXPERIMENTAL |
|
2041 |
- if((subtype == HTML) || doPhishingScan) { |
|
2042 |
-#else |
|
2043 |
- if((mctx->ctx->options&CL_SCAN_MAILURL) && (subtype == HTML)) |
|
2044 |
-#endif |
|
2036 |
+ if(((mctx->ctx->options&CL_SCAN_MAILURL) && (subtype == HTML)) || doPhishingScan) { |
|
2045 | 2037 |
/* |
2046 | 2038 |
* It would be better to save and scan the |
2047 | 2039 |
* file and only checkURLs if it's found to be |
2048 | 2040 |
* clean |
2049 | 2041 |
*/ |
2050 | 2042 |
checkURLs(mainMessage, mctx, &rc, (subtype == HTML)); |
2051 |
-#ifdef CL_EXPERIMENTAL |
|
2052 | 2043 |
/* |
2053 | 2044 |
* There might be html sent without subtype |
2054 | 2045 |
* html too, so scan them for phishing |
... | ... |
@@ -2056,7 +2044,6 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re |
2056 | 2056 |
if(rc == VIRUS) |
2057 | 2057 |
infected = TRUE; |
2058 | 2058 |
} |
2059 |
-#endif |
|
2060 | 2059 |
break; |
2061 | 2060 |
case MULTIPART: |
2062 | 2061 |
cli_dbgmsg("Content-type 'multipart' handler\n"); |
... | ... |
@@ -3877,7 +3864,7 @@ getHrefs(message *m, tag_arguments_t *hrefs) |
3877 | 3877 |
hrefs->contents = NULL; |
3878 | 3878 |
|
3879 | 3879 |
cli_dbgmsg("getHrefs: calling html_normalise_mem\n"); |
3880 |
- if(!html_normalise_mem(blobGetData(b), (off_t)len, NULL, hrefs)) { |
|
3880 |
+ if(!html_normalise_mem(blobGetData(b), (off_t)len, NULL, hrefs,m->ctx->dconf)) { |
|
3881 | 3881 |
blobDestroy(b); |
3882 | 3882 |
return NULL; |
3883 | 3883 |
} |
... | ... |
@@ -3887,7 +3874,6 @@ getHrefs(message *m, tag_arguments_t *hrefs) |
3887 | 3887 |
return b; |
3888 | 3888 |
} |
3889 | 3889 |
|
3890 |
-#ifdef CL_EXPERIMENTAL |
|
3891 | 3890 |
/* |
3892 | 3891 |
* Experimental: validate URLs for phishes |
3893 | 3892 |
* followurls: see if URLs point to malware |
... | ... |
@@ -3898,8 +3884,7 @@ checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html) |
3898 | 3898 |
blob *b; |
3899 | 3899 |
tag_arguments_t hrefs; |
3900 | 3900 |
|
3901 |
- /* aCaB: stripped GA related stuff */ |
|
3902 |
- hrefs.scanContents = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS; |
|
3901 |
+ hrefs.scanContents = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE); |
|
3903 | 3902 |
|
3904 | 3903 |
#if (!defined(FOLLOWURLS)) || (FOLLOWURLS <= 0) |
3905 | 3904 |
if(!hrefs.scanContents) |
... | ... |
@@ -3916,7 +3901,7 @@ checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html) |
3916 | 3916 |
|
3917 | 3917 |
b = getHrefs(mainMessage, &hrefs); |
3918 | 3918 |
if(b) { |
3919 |
- if(hrefs.scanContents /*mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS*/) { |
|
3919 |
+ if(hrefs.scanContents) { |
|
3920 | 3920 |
if(phishingScan(mainMessage, mctx->dir, mctx->ctx, &hrefs) == CL_VIRUS) { |
3921 | 3921 |
mainMessage->isInfected = TRUE; |
3922 | 3922 |
*rc = VIRUS; |
... | ... |
@@ -3929,27 +3914,6 @@ checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html) |
3929 | 3929 |
hrefs_done(b,&hrefs); |
3930 | 3930 |
} |
3931 | 3931 |
|
3932 |
-#else /*!CL_EXPERIMENTAL*/ |
|
3933 |
- |
|
3934 |
-static void |
|
3935 |
-checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html) |
|
3936 |
-{ |
|
3937 |
- blob *b; |
|
3938 |
- tag_arguments_t hrefs; |
|
3939 |
- |
|
3940 |
- if(!is_html || (!(mctx->ctx->options&CL_SCAN_MAILURL)) || (*rc == VIRUS)) |
|
3941 |
- return; |
|
3942 |
- |
|
3943 |
- hrefs.count = 0; |
|
3944 |
- hrefs.tag = hrefs.value = NULL; |
|
3945 |
- hrefs.contents = NULL; |
|
3946 |
- |
|
3947 |
- b = getHrefs(mainMessage, &hrefs); |
|
3948 |
- if(b) |
|
3949 |
- do_checkURLs(mctx->dir, &hrefs); |
|
3950 |
- hrefs_done(b, &hrefs); |
|
3951 |
-} |
|
3952 |
-#endif /*CL_EXPERIMENTAL*/ |
|
3953 | 3932 |
|
3954 | 3933 |
#if defined(FOLLOWURLS) && (FOLLOWURLS > 0) |
3955 | 3934 |
static void |
... | ... |
@@ -4870,6 +4834,8 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m |
4870 | 4870 |
#endif |
4871 | 4871 |
message *aMessage = messages[i]; |
4872 | 4872 |
|
4873 |
+ const int doPhishingScan = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING&PHISHING_CONF_ENGINE); |
|
4874 |
+ |
|
4873 | 4875 |
if(aMessage == NULL) |
4874 | 4876 |
return mainMessage; |
4875 | 4877 |
|
... | ... |
@@ -4952,12 +4918,10 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m |
4952 | 4952 |
} |
4953 | 4953 |
} else { |
4954 | 4954 |
const int is_html = (tableFind(mctx->subtypeTable, cptr) == HTML); |
4955 |
- if((mctx->ctx->options&CL_SCAN_MAILURL) && is_html) |
|
4955 |
+ if((mctx->ctx->options&CL_SCAN_MAILURL) && is_html) |
|
4956 | 4956 |
checkURLs(aMessage, mctx, rc, 1); |
4957 |
-#ifdef CL_EXPERIMENTAL |
|
4958 |
- else if(mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS) |
|
4957 |
+ else if(doPhishingScan) |
|
4959 | 4958 |
checkURLs(aMessage, mctx, rc, is_html); |
4960 |
-#endif |
|
4961 | 4959 |
messageAddArgument(aMessage, |
4962 | 4960 |
"filename=mixedtextportion"); |
4963 | 4961 |
} |