Browse code

more phish enabling code

git-svn: trunk@3044

Nigel Horne authored on 2007/05/02 03:00:28
Showing 2 changed files
... ...
@@ -1,6 +1,10 @@
1
+Tue May  1 18:13:09 BST 2007 (njh)
2
+----------------------------------
3
+  *	libclamav/mbox.c:	more phish enabling code
4
+
1 5
 Tue May  1 19:00:00 EEST 2007 (edwin)
2 6
 ----------------------------------
3
-  * apply next set of patches for enabling phishing code    
7
+  * apply next set of patches for enabling phishing code
4 8
 
5 9
 Tue May  1 17:20:53 CEST 2007 (tk)
6 10
 ----------------------------------
... ...
@@ -73,6 +73,9 @@ static	char	const	rcsid[] = "$Id: mbox.c,v 1.381 2007/02/15 12:26:44 njh Exp $";
73 73
 #include "str.h"
74 74
 #include "filetypes.h"
75 75
 #include "mbox.h"
76
+#include "dconf.h"
77
+
78
+#define DCONF_PHISHING mctx->ctx->dconf->phishing
76 79
 
77 80
 #ifdef	CL_DEBUG
78 81
 
... ...
@@ -141,13 +144,9 @@ typedef	enum {
141 141
 				 * 301/302 redirects we wish to follow
142 142
 				 */
143 143
 
144
-#ifdef	FOLLOWURLS
145 144
 #include "htmlnorm.h"
146
-#endif
147 145
 
148
-#ifdef CL_EXPERIMENTAL
149 146
 #include "phishcheck.h"
150
-#endif
151 147
 
152 148
 #ifndef	C_WINDOWS
153 149
 #include <netdb.h>
... ...
@@ -1954,9 +1953,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
1954 1954
 	message *mainMessage = messageIn;
1955 1955
 	fileblob *fb;
1956 1956
 	bool infected = FALSE;
1957
-#ifdef CL_EXPERIMENTAL
1958
-	const int doPhishingScan = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS; /* || (mctx->ctx->options&CL_SCAN_PHISHING_GA_TRAIN) || (mctx->ctx->options&CL_SCAN_PHISHING_GA);  kept here for the GA MERGE */
1959
-#endif
1957
+	const int doPhishingScan = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE); 
1960 1958
 
1961 1959
 	cli_dbgmsg("in parseEmailBody\n");
1962 1960
 
... ...
@@ -2025,30 +2022,21 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
2025 2025
 		case NOMIME:
2026 2026
 			cli_dbgmsg("Not a mime encoded message\n");
2027 2027
 			aText = textAddMessage(aText, mainMessage);
2028
-#ifdef CL_EXPERIMENTAL
2029 2028
 			if(!doPhishingScan)
2030 2029
 				break;
2031 2030
 			/*
2032 2031
 			 * Fall through: some phishing mails claim they are
2033 2032
 			 * text/plain, when they are in fact html
2034 2033
 			 */
2035
-#else
2036
-			break;
2037
-#endif
2038 2034
 		case TEXT:
2039 2035
 			/* text/plain has been preprocessed as no encoding */
2040
-#ifdef CL_EXPERIMENTAL
2041
-			if((subtype == HTML) || doPhishingScan) {
2042
-#else
2043
-			if((mctx->ctx->options&CL_SCAN_MAILURL) && (subtype == HTML))
2044
-#endif
2036
+			if(((mctx->ctx->options&CL_SCAN_MAILURL) && (subtype == HTML)) || doPhishingScan) {
2045 2037
 				/*
2046 2038
 				 * It would be better to save and scan the
2047 2039
 				 * file and only checkURLs if it's found to be
2048 2040
 				 * clean
2049 2041
 				 */
2050 2042
 				checkURLs(mainMessage, mctx, &rc, (subtype == HTML));
2051
-#ifdef CL_EXPERIMENTAL
2052 2043
 				/*
2053 2044
 				 * There might be html sent without subtype
2054 2045
 				 * html too, so scan them for phishing
... ...
@@ -2056,7 +2044,6 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
2056 2056
 				if(rc == VIRUS)
2057 2057
 					infected = TRUE;
2058 2058
 			}
2059
-#endif
2060 2059
 			break;
2061 2060
 		case MULTIPART:
2062 2061
 			cli_dbgmsg("Content-type 'multipart' handler\n");
... ...
@@ -3877,7 +3864,7 @@ getHrefs(message *m, tag_arguments_t *hrefs)
3877 3877
 	hrefs->contents = NULL;
3878 3878
 
3879 3879
 	cli_dbgmsg("getHrefs: calling html_normalise_mem\n");
3880
-	if(!html_normalise_mem(blobGetData(b), (off_t)len, NULL, hrefs)) {
3880
+	if(!html_normalise_mem(blobGetData(b), (off_t)len, NULL, hrefs,m->ctx->dconf)) {
3881 3881
 		blobDestroy(b);
3882 3882
 		return NULL;
3883 3883
 	}
... ...
@@ -3887,7 +3874,6 @@ getHrefs(message *m, tag_arguments_t *hrefs)
3887 3887
 	return b;
3888 3888
 }
3889 3889
 
3890
-#ifdef CL_EXPERIMENTAL
3891 3890
 /*
3892 3891
  * Experimental: validate URLs for phishes
3893 3892
  * followurls: see if URLs point to malware
... ...
@@ -3898,8 +3884,7 @@ checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html)
3898 3898
 	blob *b;
3899 3899
 	tag_arguments_t hrefs;
3900 3900
 
3901
-	/* aCaB: stripped GA related stuff */
3902
-	hrefs.scanContents = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS;
3901
+	hrefs.scanContents = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE);
3903 3902
 
3904 3903
 #if    (!defined(FOLLOWURLS)) || (FOLLOWURLS <= 0)
3905 3904
 	if(!hrefs.scanContents)
... ...
@@ -3916,7 +3901,7 @@ checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html)
3916 3916
 
3917 3917
 	b = getHrefs(mainMessage, &hrefs);
3918 3918
 	if(b) {
3919
-		if(hrefs.scanContents /*mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS*/) {
3919
+		if(hrefs.scanContents) {
3920 3920
 			if(phishingScan(mainMessage, mctx->dir, mctx->ctx, &hrefs) == CL_VIRUS) {
3921 3921
 				mainMessage->isInfected = TRUE;
3922 3922
 				*rc = VIRUS;
... ...
@@ -3929,27 +3914,6 @@ checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html)
3929 3929
 	hrefs_done(b,&hrefs);
3930 3930
 }
3931 3931
 
3932
-#else	/*!CL_EXPERIMENTAL*/
3933
-
3934
-static void
3935
-checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html)
3936
-{
3937
-	blob *b;
3938
-	tag_arguments_t hrefs;
3939
-
3940
-	if(!is_html || (!(mctx->ctx->options&CL_SCAN_MAILURL)) || (*rc == VIRUS))
3941
-		return;
3942
-
3943
-	hrefs.count = 0;
3944
-	hrefs.tag = hrefs.value = NULL;
3945
-	hrefs.contents = NULL;
3946
-
3947
-	b = getHrefs(mainMessage, &hrefs);
3948
-	if(b)
3949
-		do_checkURLs(mctx->dir, &hrefs);
3950
-	hrefs_done(b, &hrefs);
3951
-}
3952
-#endif	/*CL_EXPERIMENTAL*/
3953 3932
 
3954 3933
 #if	defined(FOLLOWURLS) && (FOLLOWURLS > 0)
3955 3934
 static void
... ...
@@ -4870,6 +4834,8 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m
4870 4870
 #endif
4871 4871
 	message *aMessage = messages[i];
4872 4872
 
4873
+	const int doPhishingScan = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING&PHISHING_CONF_ENGINE);
4874
+
4873 4875
 	if(aMessage == NULL)
4874 4876
 		return mainMessage;
4875 4877
 
... ...
@@ -4952,12 +4918,10 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m
4952 4952
 					}
4953 4953
 				} else {
4954 4954
 					const int is_html = (tableFind(mctx->subtypeTable, cptr) == HTML);
4955
-					if((mctx->ctx->options&CL_SCAN_MAILURL) && is_html)
4955
+					if((mctx->ctx->options&CL_SCAN_MAILURL) && is_html)						
4956 4956
 						checkURLs(aMessage, mctx, rc, 1);
4957
-#ifdef	CL_EXPERIMENTAL
4958
-					else if(mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS)
4957
+					else if(doPhishingScan)
4959 4958
 						checkURLs(aMessage, mctx, rc, is_html);
4960
-#endif
4961 4959
 					messageAddArgument(aMessage,
4962 4960
 						"filename=mixedtextportion");
4963 4961
 				}