Browse code

Better handling of encapsulated messages

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@149 77e5149b-7576-45b1-b177-96237e5ba77b

Nigel Horne authored on 2003/12/11 23:37:28
Showing 2 changed files
... ...
@@ -1,3 +1,8 @@
1
+Thu Dec 11 14:36:32 GMT 2003 (njh)
2
+----------------------------------
3
+  * libclamav: better handling of encapsulated messages, i.e. emails
4
+  	within other emails such as forwarded messages
5
+
1 6
 Wed Dec 10 12:01:27 GMT 2003 (njh)
2 7
 ----------------------------------
3 8
   * clamav-milter: Timeout on waiting for data from clamd, by honouring
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: mbox.c,v $
20
+ * Revision 1.21  2003/12/11 14:35:48  nigelhorne
21
+ * Better handling of encapsulated messages
22
+ *
20 23
  * Revision 1.20  2003/12/06 04:03:26  nigelhorne
21 24
  * Handle hand crafted emails that incorrectly set multipart headers
22 25
  *
... ...
@@ -51,7 +54,7 @@
51 51
  * Compilable under SCO; removed duplicate code with message.c
52 52
  *
53 53
  */
54
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.20 2003/12/06 04:03:26 nigelhorne Exp $";
54
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.21 2003/12/11 14:35:48 nigelhorne Exp $";
55 55
 
56 56
 #ifndef	CL_DEBUG
57 57
 /*#define	NDEBUG	/* map CLAMAV debug onto standard */
... ...
@@ -85,6 +88,7 @@ static	char	const	rcsid[] = "$Id: mbox.c,v 1.20 2003/12/06 04:03:26 nigelhorne E
85 85
 #include "message.h"
86 86
 #include "others.h"
87 87
 #include "defaults.h"
88
+#include "str.h"
88 89
 
89 90
 #if	defined(NO_STRTOK_R) || !defined(CL_THREAD_SAFE)
90 91
 #undef strtok_r
... ...
@@ -102,7 +106,8 @@ static	char	const	rcsid[] = "$Id: mbox.c,v 1.20 2003/12/06 04:03:26 nigelhorne E
102 102
 
103 103
 typedef enum    { FALSE = 0, TRUE = 1 } bool;
104 104
 
105
-static	int	insert(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, const char *dir, table_t *rfc821Table, table_t *subtypeTable);
105
+static	void	parseEmailHeaders(message *m, table_t *rfc821Table);
106
+static	int	parseEmailBody(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, const char *dir, table_t *rfc821Table, table_t *subtypeTable);
106 107
 static	int	boundaryStart(const char *line, const char *boundary);
107 108
 static	int	endOfMessage(const char *line, const char *boundary);
108 109
 static	int	initialiseTables(table_t **rfc821Table, table_t **subtypeTable);
... ...
@@ -275,7 +280,7 @@ cl_mbox(const char *dir, int desc)
275 275
 					 */
276 276
 					messageClean(m);
277 277
 					if(messageGetBody(m))
278
-						if(!insert(m,  NULL, 0, NULL, dir, rfc821Table, subtypeTable))
278
+						if(!parseEmailBody(m,  NULL, 0, NULL, dir, rfc821Table, subtypeTable))
279 279
 							break;
280 280
 					/*
281 281
 					 * Starting a new message, throw away all the
... ...
@@ -331,66 +336,26 @@ cl_mbox(const char *dir, int desc)
331 331
 			}
332 332
 		} while(fgets(buffer, sizeof(buffer), fd) != NULL);
333 333
 	} else {
334
-		/* !isMbox => single mail message */
335
-		bool inHeader = TRUE;
336
-		bool inMimeHeader = FALSE;
337
-
334
+		/*
335
+		 * It's a single message, parse the headers then the body
336
+		 */
338 337
 		do {
339 338
 			/*
340
-			 * State machine:
341
-			 *	inMimeHeader	= handling mime commands over
342
-			 *				more than one line
343
-			 *	inHeader	= handling e-mail header
344
-			 *	otherwise	= handling e-mail body
345
-			 */
346
-			/*
347
-			 * Section B.2 of RFC822 says TAB or SPACE means
348
-			 * a continuation of the previous entry
339
+			 * cli_chomp coredumps for cli_chomp("")
340
+			 * or cli_chomp("\r");
349 341
 			 */
350
-			if(inHeader && ((buffer[0] == '\t') || (buffer[0] == ' ')))
351
-				inMimeHeader = TRUE;
352
-			if(inMimeHeader) {
353
-				const char *ptr;
354
-
355
-				assert(inHeader);
356
-
357
-				if(!continuationMarker(buffer))
358
-					inMimeHeader = FALSE;	 /* no more args */
359
-
360
-				/*
361
-				 * Add all the arguments on the line
362
-				 */
363
-				for(ptr = strtok_r(buffer, ";\r\n", &strptr); ptr; ptr = strtok_r(NULL, ":\r\n", &strptr))
364
-					messageAddArgument(m, ptr);
365
-			} else if(inHeader) {
366
-
367
-				cli_dbgmsg("Deal with header %s", buffer);
368
-
369
-				/*
370
-				 * A blank line signifies the end of the header and
371
-				 * the start of the text
372
-				 */
373
-				if((strstrip(buffer) == 0) || (buffer[0] == '\n') || (buffer[0] == '\r')) {
374
-					cli_dbgmsg("End of header information\n");
375
-					inHeader = FALSE;
376
-				} else {
377
-					const bool isLastLine = !continuationMarker(buffer);
378
-					const char *cmd = strtok_r(buffer, " \t", &strptr);
379
-
380
-					if (cmd && *cmd) {
381
-						const char *arg = strtok_r(NULL, "\r\n", &strptr);
382
-
383
-						if(arg)
384
-							if(parseMimeHeader(m, cmd, rfc821Table, arg) == CONTENT_TYPE)
385
-								inMimeHeader = !isLastLine;
386
-					}
387
-				}
388
-			} else {
389
-				/*cli_dbgmsg("adding line %s", buffer);*/
390
-
391
-				messageAddLine(m, strtok_r(buffer, "\r\n", &strptr));
392
-			}
342
+			/*cli_chomp(buffer);*/
343
+			char *ptr = strrchr(buffer, '\n');
344
+			if(ptr)
345
+				*ptr = '\0';
346
+			ptr = strrchr(buffer, '\r');
347
+			if(ptr)
348
+				*ptr = '\0';
349
+
350
+			messageAddLine(m, buffer);
393 351
 		} while(fgets(buffer, sizeof(buffer), fd) != NULL);
352
+
353
+		parseEmailHeaders(m, rfc821Table);
394 354
 	}
395 355
 
396 356
 	fclose(fd);
... ...
@@ -402,7 +367,7 @@ cl_mbox(const char *dir, int desc)
402 402
 	 */
403 403
 	messageClean(m);
404 404
 	if(messageGetBody(m))
405
-		if(!insert(m, NULL, 0, NULL, dir, rfc821Table, subtypeTable))
405
+		if(!parseEmailBody(m, NULL, 0, NULL, dir, rfc821Table, subtypeTable))
406 406
 			retcode = -1;
407 407
 
408 408
 	/*
... ...
@@ -419,8 +384,95 @@ cl_mbox(const char *dir, int desc)
419 419
 }
420 420
 
421 421
 /*
422
+ * The given message contains a raw e-mail.
423
+ *
424
+ * This function parses the headers of m and sets the message's arguments
425
+ */
426
+static void
427
+parseEmailHeaders(message *m, table_t *rfc821Table)
428
+{
429
+	/* !isMbox => single mail message */
430
+	bool inHeader = TRUE;
431
+	bool inMimeHeader = FALSE;
432
+	text *t, *msgText = messageToText(m);
433
+
434
+	t = msgText;
435
+	assert(t != NULL);
436
+
437
+	do {
438
+		char *buffer = strdup(t->t_text);
439
+#ifdef CL_THREAD_SAFE
440
+		char *strptr;
441
+#endif
442
+
443
+		/*cli_chomp(buffer);*/
444
+		char *ptr = strrchr(buffer, '\n');
445
+		if(ptr)
446
+			*ptr = '\0';
447
+		ptr = strrchr(buffer, '\r');
448
+		if(ptr)
449
+			*ptr = '\0';
450
+		/*
451
+		 * State machine:
452
+		 *	inMimeHeader	= handling mime commands over
453
+		 *				more than one line
454
+		 *	inHeader	= handling e-mail header
455
+		 *	otherwise	= handling e-mail body
456
+		 */
457
+
458
+		/*
459
+		 * Section B.2 of RFC822 says TAB or SPACE means
460
+		 * a continuation of the previous entry
461
+		 */
462
+		if(inHeader && ((buffer[0] == '\t') || (buffer[0] == ' ')))
463
+			inMimeHeader = TRUE;
464
+		if(inMimeHeader) {
465
+			const char *ptr;
466
+
467
+			assert(inHeader);
468
+
469
+			if(!continuationMarker(buffer))
470
+				inMimeHeader = FALSE;	 /* no more args */
471
+
472
+			/*
473
+			 * Add all the arguments on the line
474
+			 */
475
+			for(ptr = strtok_r(buffer, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr))
476
+				messageAddArgument(m, ptr);
477
+		} else if(inHeader) {
478
+
479
+			cli_dbgmsg("Deal with header %s\n", buffer);
480
+
481
+			/*
482
+			 * A blank line signifies the end of the header and
483
+			 * the start of the text
484
+			 */
485
+			if(strstrip(buffer) == 0) {
486
+				cli_dbgmsg("End of header information\n");
487
+				break;
488
+			} else {
489
+				const bool isLastLine = !continuationMarker(buffer);
490
+				const char *cmd = strtok_r(buffer, " \t", &strptr);
491
+
492
+				if (cmd && *cmd) {
493
+					const char *arg = strtok_r(NULL, "", &strptr);
494
+
495
+					if(arg)
496
+						if(parseMimeHeader(m, cmd, rfc821Table, arg) == CONTENT_TYPE)
497
+							inMimeHeader = !isLastLine;
498
+				}
499
+			}
500
+		}
501
+	} while((t = t->t_next) != NULL);
502
+
503
+	textDestroy(msgText);
504
+}
505
+
506
+/*
422 507
  * This is a recursive routine.
423 508
  *
509
+ * This function parses the body of mainMessage and saves its attachments in dir
510
+ *
424 511
  * mainMessage is the buffer to be parsed. First time of calling it'll be
425 512
  *	the whole message. Later it'll be parts of a multipart message
426 513
  * textIn is the plain text message being built up so far
... ...
@@ -432,7 +484,7 @@ cl_mbox(const char *dir, int desc)
432 432
  *	2 for success, attachements not saved
433 433
  */
434 434
 static int	/* success or fail */
435
-insert(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, const char *dir, table_t *rfc821Table, table_t *subtypeTable)
435
+parseEmailBody(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, const char *dir, table_t *rfc821Table, table_t *subtypeTable)
436 436
 {
437 437
 	char *ptr;
438 438
 	message *messages[MAXALTERNATIVE];
... ...
@@ -441,7 +493,7 @@ insert(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, const cha
441 441
 	blob *blobList[MAX_ATTACHMENTS], **blobs;
442 442
 	const char *cptr;
443 443
 
444
-	cli_dbgmsg("in insert(nBlobs = %d)\n", nBlobs);
444
+	cli_dbgmsg("in parseEmailBody(nBlobs = %d)\n", nBlobs);
445 445
 
446 446
 	/* Pre-assertions */
447 447
 	if(nBlobs >= MAX_ATTACHMENTS) {
... ...
@@ -703,7 +755,7 @@ insert(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, const cha
703 703
 					cli_dbgmsg("No HTML code found to be scanned");
704 704
 					rc = 0;
705 705
 				} else
706
-					rc = insert(aMessage, blobs, nBlobs, aText, dir, rfc821Table, subtypeTable);
706
+					rc = parseEmailBody(aMessage, blobs, nBlobs, aText, dir, rfc821Table, subtypeTable);
707 707
 				blobArrayDestroy(blobs, nBlobs);
708 708
 				blobs = NULL;
709 709
 				nBlobs = 0;
... ...
@@ -743,7 +795,7 @@ insert(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, const cha
743 743
 				aMessage = messages[htmltextPart];
744 744
 				aText = textAddMessage(aText, aMessage);
745 745
 
746
-				rc = insert(NULL, blobs, nBlobs, aText, dir, rfc821Table, subtypeTable);
746
+				rc = parseEmailBody(NULL, blobs, nBlobs, aText, dir, rfc821Table, subtypeTable);
747 747
 				if(rc == 1) {
748 748
 					/*
749 749
 					 * Alternative message has saved its
... ...
@@ -855,7 +907,7 @@ insert(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, const cha
855 855
 						break;
856 856
 					case MESSAGE:
857 857
 						cli_dbgmsg("Found message inside multipart\n");
858
-						rc = insert(aMessage, blobs, nBlobs, NULL, dir, rfc821Table, subtypeTable);
858
+						rc = parseEmailBody(aMessage, blobs, nBlobs, NULL, dir, rfc821Table, subtypeTable);
859 859
 						continue;
860 860
 					case MULTIPART:
861 861
 						/*
... ...
@@ -866,7 +918,7 @@ insert(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, const cha
866 866
 						 */
867 867
 						cli_dbgmsg("Found multipart inside multipart\n");
868 868
 						t = messageToText(aMessage);
869
-						rc = insert(aMessage, blobs, nBlobs, t, dir, rfc821Table, subtypeTable);
869
+						rc = parseEmailBody(aMessage, blobs, nBlobs, t, dir, rfc821Table, subtypeTable);
870 870
 						textDestroy(t);
871 871
 
872 872
 						mainMessage = aMessage;
... ...
@@ -909,7 +961,7 @@ insert(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, const cha
909 909
 
910 910
 				if(numberOfAttachments == 0) {
911 911
 					/* No usable attachment was found */
912
-					rc = insert(NULL, NULL, 0, aText, dir, rfc821Table, subtypeTable);
912
+					rc = parseEmailBody(NULL, NULL, 0, aText, dir, rfc821Table, subtypeTable);
913 913
 					break;
914 914
 				}
915 915
 				/*
... ...
@@ -923,7 +975,7 @@ insert(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, const cha
923 923
 					blobList[numberOfAttachments++] = blobs[i];
924 924
 				}
925 925
 
926
-				rc = insert(mainMessage, blobList, numberOfAttachments, aText, dir, rfc821Table, subtypeTable);
926
+				rc = parseEmailBody(mainMessage, blobList, numberOfAttachments, aText, dir, rfc821Table, subtypeTable);
927 927
 				break;
928 928
 			case DIGEST:
929 929
 			case SIGNED:
... ...
@@ -940,7 +992,7 @@ insert(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, const cha
940 940
 				if(htmltextPart == -1)
941 941
 					htmltextPart = 0;
942 942
 
943
-				rc = insert(messages[htmltextPart], blobs, nBlobs, aText, dir, rfc821Table, subtypeTable);
943
+				rc = parseEmailBody(messages[htmltextPart], blobs, nBlobs, aText, dir, rfc821Table, subtypeTable);
944 944
 				blobArrayDestroy(blobs, nBlobs);
945 945
 				blobs = NULL;
946 946
 				nBlobs = 0;
... ...
@@ -983,15 +1035,13 @@ insert(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, const cha
983 983
 			if((strcasecmp(mimeSubtype, "rfc822") == 0) ||
984 984
 			   (strcasecmp(mimeSubtype, "delivery-status") == 0)) {
985 985
 				/*
986
-				 * TODO: Tidy this up, it's just a duplicate
987
-				 * of the cl_mbox code....
986
+				 * Found a message encapsulated within
987
+				 * another message
988 988
 				 *
989 989
 				 * Thomas Lamy <Thomas.Lamy@in-online.net>:
990 990
 				 * ensure t is correctly freed
991 991
 				 */
992 992
 				text *t, *msgText = messageToText(mainMessage);
993
-				bool inHeader = TRUE;
994
-				bool inMimeHeader = FALSE;
995 993
 				message *m;
996 994
 
997 995
 				t = msgText;
... ...
@@ -1002,66 +1052,28 @@ insert(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, const cha
1002 1002
 
1003 1003
 				cli_dbgmsg("Decode rfc822");
1004 1004
 
1005
-				/*
1006
-				 * Since by using the forward facility, the program is spawned each
1007
-				 * time a mail comes in this loop, which looks at each new item in
1008
-				 * the mailbox, probably isn't needed. It's in here incase the forward
1009
-				 * mechanism is dropped, and it doesn't really add too much
1010
-				 */
1011 1005
 				do {
1012 1006
 					char *buffer = strdup(t->t_text);
1013 1007
 
1014
-					/*
1015
-					 * Handle this where we're mid point through this stuff
1016
-					 *	Content-Type: multipart/alternative;
1017
-					 *		boundary="----foo"
1018
-					 */
1019
-					if(inMimeHeader) {
1020
-						const char *ptr;
1008
+					/*cli_chomp(buffer);*/
1009
+					char *ptr = strrchr(buffer, '\n');
1010
+					if(ptr)
1011
+						*ptr = '\0';
1012
+					ptr = strrchr(buffer, '\r');
1013
+					if(ptr)
1014
+						*ptr = '\0';
1021 1015
 
1022
-						/*
1023
-						 * Some clients are broken and put white space after
1024
-						 * the ;
1025
-						 */
1026
-						if(!continuationMarker(buffer))
1027
-							inMimeHeader = FALSE;	 /* no more args */
1028
-
1029
-						/*
1030
-						 * Add all the arguments on the line
1031
-						 */
1032
-						for(ptr = strtok_r(buffer, ";\r\n", &strptr); ptr; ptr = strtok_r(NULL, ":\r\n", &strptr))
1033
-							messageAddArgument(m, ptr);
1034
-
1035
-					} else if(inHeader) {
1036
-						/*
1037
-						 * A blank line signifies the end of the header and
1038
-						 * the start of the text
1039
-						 */
1040
-						strstrip(buffer);
1041
-
1042
-						if((buffer[0] == '\n') || (buffer[0] == '\r'))
1043
-							inHeader = 0;
1044
-						else {
1045
-							const bool isLastLine = !continuationMarker(buffer);
1046
-							const char *cmd = strtok_r(buffer, " \t", &strptr);
1047
-
1048
-							if (cmd && *cmd) {
1049
-								const char *arg = strtok_r(NULL, "\r\n", &strptr);
1050
-
1051
-								if(arg)
1052
-									if(parseMimeHeader(m, cmd, rfc821Table, arg) == CONTENT_TYPE)
1053
-										inMimeHeader = !isLastLine;
1054
-							}
1055
-						}
1056
-					} else
1057
-						messageAddLine(m, strtok_r(buffer, "\r\n", &strptr));
1016
+					messageAddLine(m, buffer);
1058 1017
 					free(buffer);
1059 1018
 				} while((t = t->t_next) != NULL);
1060 1019
 
1061 1020
 				textDestroy(msgText);
1021
+
1022
+				parseEmailHeaders(m, rfc821Table);
1023
+
1062 1024
 				messageClean(m);
1063 1025
 				if(messageGetBody(m))
1064
-					rc = insert(m, NULL, 0, NULL, dir, rfc821Table, subtypeTable);
1026
+					rc = parseEmailBody(m, NULL, 0, NULL, dir, rfc821Table, subtypeTable);
1065 1027
 
1066 1028
 				messageDestroy(m);
1067 1029
 
... ...
@@ -1211,7 +1223,7 @@ insert(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, const cha
1211 1211
 	if(blobs && (blobsIn == NULL))
1212 1212
 		blobArrayDestroy(blobs, nBlobs);
1213 1213
 
1214
-	cli_dbgmsg("insert() returning 1\n");
1214
+	cli_dbgmsg("parseEmailBody() returning 1\n");
1215 1215
 
1216 1216
 	return 1;
1217 1217
 }