Browse code

Better handling of bounce messages

git-svn: trunk@2062

Nigel Horne authored on 2006/07/04 17:40:46
Showing 2 changed files
... ...
@@ -1,3 +1,7 @@
1
+Tue Jul  4 09:39:41 BST 2006 (njh)
2
+----------------------------------
3
+  * libclamav/mbox.c:	Better handling of bounce messages
4
+
1 5
 Mon Jul  3 13:09:27 BST 2006 (njh)
2 6
 ----------------------------------
3 7
   * libclamav/mbox.c:	Multiparts now only create an array when needed
... ...
@@ -13,7 +17,7 @@ Sat Jul  1 17:18:17 BST 2006 (njh)
13 13
 Sat Jul  1 04:49:32 BST 2006 (njh)
14 14
 ----------------------------------
15 15
   * libclamav:	Large binhex files were not being handled gracefully. Tidied
16
-  			the handling code. Note that large binhex are not
16
+			the handling code. Note that large binhex are not
17 17
 			currently decoded.
18 18
 			Bug reported by Luca
19 19
 
... ...
@@ -16,7 +16,7 @@
16 16
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17 17
  *  MA 02110-1301, USA.
18 18
  */
19
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.317 2006/07/03 12:08:55 njh Exp $";
19
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.318 2006/07/04 08:38:53 njh Exp $";
20 20
 
21 21
 #if HAVE_CONFIG_H
22 22
 #include "clamav-config.h"
... ...
@@ -215,7 +215,8 @@ static	int	rfc1341(message *m, const char *dir);
215 215
 static	bool	usefulHeader(int commandNumber, const char *cmd);
216 216
 static	char	*getline_from_mbox(char *buffer, size_t len, FILE *fin);
217 217
 static	bool	isBounceStart(const char *line);
218
-static	bool	binhexMessage(const char *dir, message *m);
218
+static	bool	exportBinhexMessage(const char *dir, message *m);
219
+static	int	exportBounceMessage(text *start, const mbox_ctx *ctx);
219 220
 static	message	*do_multipart(message *mainMessage, message **messages, int i, int *rc, mbox_ctx *mctx, message *messageIn, text **tptr);
220 221
 
221 222
 static	void	checkURLs(message *m, const char *dir);
... ...
@@ -1994,6 +1995,11 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx)
1994 1994
 		case TEXT:
1995 1995
 			/* text/plain has been preprocessed as no encoding */
1996 1996
 			if((mctx->ctx->options&CL_SCAN_MAILURL) && (subtype == HTML))
1997
+				/*
1998
+				 * It would be better to save and scan the
1999
+				 * file and only checkURLs if it's found to be
2000
+				 * clean
2001
+				 */
1997 2002
 				checkURLs(mainMessage, mctx->dir);
1998 2003
 			break;
1999 2004
 		case MULTIPART:
... ...
@@ -2001,7 +2007,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx)
2001 2001
 			boundary = messageFindArgument(mainMessage, "boundary");
2002 2002
 
2003 2003
 			if(boundary == NULL) {
2004
-				cli_warnmsg("Multipart MIME message contains no boundaries\n");
2004
+				cli_warnmsg("Multipart MIME message contains no boundary header\n");
2005 2005
 				/* Broken e-mail message */
2006 2006
 				mimeType = NOMIME;
2007 2007
 				/*
... ...
@@ -2040,9 +2046,10 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx)
2040 2040
 					 * TODO: check yEnc
2041 2041
 					 */
2042 2042
 					if(binhexBegin(mainMessage) == t_line) {
2043
-						if(binhexMessage(mctx->dir, mainMessage)) {
2043
+						if(exportBinhexMessage(mctx->dir, mainMessage)) {
2044 2044
 							/* virus found */
2045 2045
 							rc = 3;
2046
+							infected = TRUE;
2046 2047
 							break;
2047 2048
 						}
2048 2049
 					} else if(encodingLine(mainMessage) == t_line->t_next) {
... ...
@@ -2063,7 +2070,8 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx)
2063 2063
 			while((t_line = t_line->t_next) != NULL);
2064 2064
 
2065 2065
 			if(t_line == NULL) {
2066
-				cli_dbgmsg("Multipart MIME message contains no boundary lines\n");
2066
+				cli_dbgmsg("Multipart MIME message contains no boundary lines (%s)\n",
2067
+					boundary);
2067 2068
 				/*
2068 2069
 				 * Free added by Thomas Lamy
2069 2070
 				 * <Thomas.Lamy@in-online.net>
... ...
@@ -2842,75 +2850,15 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx)
2842 2842
 	 * are hidden in HTML code
2843 2843
 	 */
2844 2844
 	if(mainMessage && (rc != 3)) {
2845
+		text *t_line;
2846
+
2845 2847
 		/*
2846 2848
 		 * Look for uu-encoded main file
2847 2849
 		 */
2848
-		text *t_line;
2849
-
2850 2850
 		if((encodingLine(mainMessage) != NULL) &&
2851
-			  ((t_line = bounceBegin(mainMessage)) != NULL)) {
2852
-			text *t, *start;
2853
-
2854
-			/*
2855
-			 * Attempt to save the original (unbounced)
2856
-			 * message - clamscan will find that in the
2857
-			 * directory and call us again (with any luck)
2858
-			 * having found an e-mail message to handle.
2859
-			 *
2860
-			 * This finds a lot of false positives, the
2861
-			 * search that a content type is in the
2862
-			 * bounce (i.e. it's after the bounce header)
2863
-			 * helps a bit.
2864
-			 *
2865
-			 * messageAddLine
2866
-			 * optimisation could help here, but needs
2867
-			 * careful thought, do it with line numbers
2868
-			 * would be best, since the current method in
2869
-			 * messageAddLine of checking encoding first
2870
-			 * must remain otherwise non bounce messages
2871
-			 * won't be scanned
2872
-			 */
2873
-			for(t = start = t_line; t; t = t->t_next) {
2874
-				char cmd[RFC2821LENGTH + 1];
2875
-				const char *txt = lineGetData(t->t_line);
2876
-
2877
-				if(txt == NULL)
2878
-					continue;
2879
-				if(cli_strtokbuf(txt, 0, ":", cmd) == NULL)
2880
-					continue;
2881
-
2882
-				switch(tableFind(mctx->rfc821Table, cmd)) {
2883
-					case CONTENT_TRANSFER_ENCODING:
2884
-						if((strstr(txt, "7bit") == NULL) &&
2885
-						   (strstr(txt, "8bit") == NULL))
2886
-							break;
2887
-						continue;
2888
-					case CONTENT_DISPOSITION:
2889
-						break;
2890
-					case CONTENT_TYPE:
2891
-						if(strstr(txt, "text/plain") != NULL)
2892
-							t = NULL;
2893
-						break;
2894
-					default:
2895
-						if(strcasecmp(cmd, "From") == 0)
2896
-							start = t_line;
2897
-						else if(strcasecmp(cmd, "Received") == 0)
2898
-							start = t_line;
2899
-						continue;
2900
-				}
2901
-				break;
2902
-			}
2903
-			if(t && ((fb = fileblobCreate()) != NULL)) {
2904
-				cli_dbgmsg("Found a bounce message\n");
2905
-				fileblobSetFilename(fb, mctx->dir, "bounce");
2906
-				/*fileblobSetCTX(fb, mctx->ctx);*/
2907
-				if(textToFileblob(start, fb, 1) == NULL)
2908
-					cli_dbgmsg("Nothing new to save in the bounce message\n");
2909
-				else
2910
-					rc = 1;
2911
-				fileblobDestroy(fb);
2912
-			} else
2913
-				cli_dbgmsg("Not found a bounce message\n");
2851
+		   ((t_line = bounceBegin(mainMessage)) != NULL)) {
2852
+			if(exportBounceMessage(t_line, mctx))
2853
+				rc = 1;
2914 2854
 		} else {
2915 2855
 			bool saveIt;
2916 2856
 
... ...
@@ -2921,7 +2869,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx)
2921 2921
 				 * content encoding statement don't
2922 2922
 				 * bother saving to scan, it's safe
2923 2923
 				 */
2924
-				saveIt = (encodingLine(mainMessage) != NULL);
2924
+				saveIt = (bool)(encodingLine(mainMessage) != NULL);
2925 2925
 			else if((t_line = encodingLine(mainMessage)) != NULL) {
2926 2926
 				/*
2927 2927
 				 * Some bounces include the message
... ...
@@ -4309,7 +4257,7 @@ isBounceStart(const char *line)
4309 4309
  *	extract it
4310 4310
  */
4311 4311
 static bool
4312
-binhexMessage(const char *dir, message *m)
4312
+exportBinhexMessage(const char *dir, message *m)
4313 4313
 {
4314 4314
 	bool infected = FALSE;
4315 4315
 	fileblob *fb;
... ...
@@ -4333,6 +4281,80 @@ binhexMessage(const char *dir, message *m)
4333 4333
 }
4334 4334
 
4335 4335
 /*
4336
+ * Locate any bounce message and extract it. Return 1 if anything found
4337
+ */
4338
+static int
4339
+exportBounceMessage(text *start, const mbox_ctx *mctx)
4340
+{
4341
+	int rc = 0;
4342
+	text *t;
4343
+	fileblob *fb;
4344
+
4345
+	/*
4346
+	 * Attempt to save the original (unbounced)
4347
+	 * message - clamscan will find that in the
4348
+	 * directory and call us again (with any luck)
4349
+	 * having found an e-mail message to handle.
4350
+	 *
4351
+	 * This finds a lot of false positives, the
4352
+	 * search that a content type is in the
4353
+	 * bounce (i.e. it's after the bounce header)
4354
+	 * helps a bit.
4355
+	 *
4356
+	 * messageAddLine
4357
+	 * optimisation could help here, but needs
4358
+	 * careful thought, do it with line numbers
4359
+	 * would be best, since the current method in
4360
+	 * messageAddLine of checking encoding first
4361
+	 * must remain otherwise non bounce messages
4362
+	 * won't be scanned
4363
+	 */
4364
+	for(t = start; t; t = t->t_next) {
4365
+		char cmd[RFC2821LENGTH + 1];
4366
+		const char *txt = lineGetData(t->t_line);
4367
+
4368
+		if(txt == NULL)
4369
+			continue;
4370
+		if(cli_strtokbuf(txt, 0, ":", cmd) == NULL)
4371
+			continue;
4372
+
4373
+		switch(tableFind(mctx->rfc821Table, cmd)) {
4374
+			case CONTENT_TRANSFER_ENCODING:
4375
+				if((strstr(txt, "7bit") == NULL) &&
4376
+				   (strstr(txt, "8bit") == NULL))
4377
+					break;
4378
+				continue;
4379
+			case CONTENT_DISPOSITION:
4380
+				break;
4381
+			case CONTENT_TYPE:
4382
+				if(strstr(txt, "text/plain") != NULL)
4383
+					t = NULL;
4384
+				break;
4385
+			default:
4386
+				if(strcasecmp(cmd, "From") == 0)
4387
+					start = t;
4388
+				else if(strcasecmp(cmd, "Received") == 0)
4389
+					start = t;
4390
+				continue;
4391
+		}
4392
+		break;
4393
+	}
4394
+	if(t && ((fb = fileblobCreate()) != NULL)) {
4395
+		cli_dbgmsg("Found a bounce message\n");
4396
+		fileblobSetFilename(fb, mctx->dir, "bounce");
4397
+		/*fileblobSetCTX(fb, mctx->ctx);*/
4398
+		if(textToFileblob(start, fb, 1) == NULL)
4399
+			cli_dbgmsg("Nothing new to save in the bounce message\n");
4400
+		else
4401
+			rc = 1;
4402
+		fileblobDestroy(fb);
4403
+	} else
4404
+		cli_dbgmsg("Not found a bounce message\n");
4405
+
4406
+	return rc;
4407
+}
4408
+
4409
+/*
4336 4410
  * Handle the ith element of a number of multiparts, e.g. multipart/alternative
4337 4411
  */
4338 4412
 static message *
... ...
@@ -4363,7 +4385,7 @@ do_multipart(message *mainMessage, message **messages, int i, int *rc, mbox_ctx
4363 4363
 				if(binhexBegin(aMessage)) {
4364 4364
 					cli_dbgmsg("Found binhex message in multipart/mixed mainMessage\n");
4365 4365
 
4366
-					if(binhexMessage(mctx->dir, mainMessage))
4366
+					if(exportBinhexMessage(mctx->dir, mainMessage))
4367 4367
 						*rc = 3;
4368 4368
 				}
4369 4369
 				if(mainMessage != messageIn)
... ...
@@ -4372,7 +4394,7 @@ do_multipart(message *mainMessage, message **messages, int i, int *rc, mbox_ctx
4372 4372
 			} else if(aMessage) {
4373 4373
 				if(binhexBegin(aMessage)) {
4374 4374
 					cli_dbgmsg("Found binhex message in multipart/mixed non mime part\n");
4375
-					if(binhexMessage(mctx->dir, aMessage))
4375
+					if(exportBinhexMessage(mctx->dir, aMessage))
4376 4376
 						*rc = 3;
4377 4377
 					assert(aMessage == messages[i]);
4378 4378
 					messageReset(messages[i]);