Browse code

Handle boundary= "foo"

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@821 77e5149b-7576-45b1-b177-96237e5ba77b

Nigel Horne authored on 2004/09/04 01:02:25
Showing 3 changed files
... ...
@@ -1,3 +1,9 @@
1
+Fri Sep  3 17:00:28 BST 2004 (njh)
2
+----------------------------------
3
+  * libclamav:	Handle spaces in headers such as 'boundary= "foo"'. I believe
4
+			that the space on the RHS of the = is not RFC1521,
5
+			but Outlook Express generates them
6
+
1 7
 Wed Sep  1 16:11:40 CEST 2004 (tk)
2 8
 ----------------------------------
3 9
   * libclamav: replace current MD5 implementation with another one
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: mbox.c,v $
20
+ * Revision 1.114  2004/09/03 15:59:00  nigelhorne
21
+ * Handle boundary= "foo"
22
+ *
20 23
  * Revision 1.113  2004/08/26 09:33:20  nigelhorne
21 24
  * Scan Communigate Pro files
22 25
  *
... ...
@@ -327,7 +330,7 @@
327 327
  * Compilable under SCO; removed duplicate code with message.c
328 328
  *
329 329
  */
330
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.113 2004/08/26 09:33:20 nigelhorne Exp $";
330
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.114 2004/09/03 15:59:00 nigelhorne Exp $";
331 331
 
332 332
 #if HAVE_CONFIG_H
333 333
 #include "clamav-config.h"
... ...
@@ -439,9 +442,6 @@ static	size_t	strip(char *buf, int len);
439 439
 static	bool	continuationMarker(const char *line);
440 440
 static	int	parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg);
441 441
 static	void	saveTextPart(message *m, const char *dir);
442
-#if	0
443
-static	bool	saveFile(const blob *b, const char *dir);
444
-#endif
445 442
 
446 443
 static	void	checkURLs(message *m, const char *dir);
447 444
 #ifdef	WITH_CURL
... ...
@@ -746,6 +746,7 @@ parseEmailHeaders(const message *m, const table_t *rfc821)
746 746
 	const text *t;
747 747
 	message *ret;
748 748
 	bool anyHeadersFound = FALSE;
749
+	bool Xheader = FALSE;
749 750
 
750 751
 	cli_dbgmsg("parseEmailHeaders\n");
751 752
 
... ...
@@ -765,30 +766,31 @@ parseEmailHeaders(const message *m, const table_t *rfc821)
765 765
 		else
766 766
 			buffer = NULL;
767 767
 
768
-		/*
769
-		 * Section B.2 of RFC822 says TAB or SPACE means
770
-		 * a continuation of the previous entry.
771
-		 */
772
-		if(inHeader && buffer &&
773
-		  ((buffer[0] == '\t') || (buffer[0] == ' '))) {
774
-			/*
775
-			 * Add all the arguments on the line
776
-			 */
777
-			const char *ptr;
778
-			char *copy = strdup(buffer);
779
-
780
-			for(ptr = strtok_r(copy, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr))
781
-				messageAddArgument(ret, ptr);
782
-			free(copy);
783
-		} else if(inHeader) {
784
-			/*
785
-			 * A blank line signifies the end of the header and
786
-			 * the start of the text
787
-			 */
768
+		if(inHeader) {
788 769
 			if(buffer == NULL) {
770
+				/*
771
+				 * A blank line signifies the end of the header
772
+				 * and the start of the text
773
+				 */
789 774
 				cli_dbgmsg("End of header information\n");
790 775
 				inHeader = FALSE;
776
+			} else if(((buffer[0] == '\t') || (buffer[0] == ' ')) &&
777
+				  (!Xheader)) {
778
+				/*
779
+				 * Section B.2 of RFC822 says TAB or SPACE means
780
+				 * a continuation of the previous entry.
781
+				 *
782
+				 * Add all the arguments on the line
783
+				 */
784
+				const char *ptr;
785
+				char *copy = strdup(buffer);
786
+
787
+				for(ptr = strtok_r(copy, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr))
788
+					if(strchr(ptr, '='))
789
+						messageAddArguments(ret, ptr);
790
+				free(copy);
791 791
 			} else {
792
+				Xheader = (bool)(buffer[0] == 'X');
792 793
 				if((parseEmailHeader(ret, buffer, rfc821) >= 0) ||
793 794
 				   (strncasecmp(buffer, "From ", 5) == 0))
794 795
 					anyHeadersFound = TRUE;
... ...
@@ -973,7 +975,7 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
973 973
 			while((t_line = t_line->t_next) != NULL);
974 974
 
975 975
 			if(t_line == NULL) {
976
-				cli_warnmsg("Multipart MIME message contains no boundary lines\n");
976
+				cli_dbgmsg("Multipart MIME message contains no boundary lines\n");
977 977
 				/*
978 978
 				 * Free added by Thomas Lamy
979 979
 				 * <Thomas.Lamy@in-online.net>
... ...
@@ -1389,7 +1391,7 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
1389 1389
 								addAttachment = TRUE;
1390 1390
 							}
1391 1391
 						} else {
1392
-							cli_warnmsg("Text type %s is not supported\n", dtype);
1392
+							cli_dbgmsg("Text type %s is not supported\n", dtype);
1393 1393
 							continue;
1394 1394
 						}
1395 1395
 						break;
... ...
@@ -1870,18 +1872,17 @@ static int
1870 1870
 getTextPart(message *const messages[], size_t size)
1871 1871
 {
1872 1872
 	size_t i;
1873
+	int textpart = -1;
1873 1874
 
1874 1875
 	for(i = 0; i < size; i++) {
1875 1876
 		assert(messages[i] != NULL);
1876
-		if((messageGetMimeType(messages[i]) == TEXT) &&
1877
-		   (strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0))
1878
-			return (int)i;
1877
+		if(messageGetMimeType(messages[i]) == TEXT) {
1878
+			if(strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0)
1879
+				return (int)i;
1880
+			textpart = (int)i;
1881
+		}
1879 1882
 	}
1880
-	for(i = 0; i < size; i++)
1881
-		if(messageGetMimeType(messages[i]) == TEXT)
1882
-			return (int)i;
1883
-
1884
-	return -1;
1883
+	return textpart;
1885 1884
 }
1886 1885
 
1887 1886
 /*
... ...
@@ -1981,11 +1982,10 @@ continuationMarker(const char *line)
1981 1981
 static int
1982 1982
 parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg)
1983 1983
 {
1984
-	const int type = tableFind(rfc821Table, cmd);
1985 1984
 #ifdef CL_THREAD_SAFE
1986 1985
 	char *strptr;
1987 1986
 #endif
1988
-	char *copy = strdup(arg);
1987
+	char *copy = strdup(arg ? arg : "");
1989 1988
 	char *ptr = copy;
1990 1989
 
1991 1990
 	if(copy == NULL)
... ...
@@ -1994,7 +1994,7 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
1994 1994
 	cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg);
1995 1995
 	strstrip(copy);
1996 1996
 
1997
-	switch(type) {
1997
+	switch(tableFind(rfc821Table, cmd)) {
1998 1998
 		case CONTENT_TYPE:
1999 1999
 			/*
2000 2000
 			 * Fix for non RFC1521 compliant mailers
... ...
@@ -2020,7 +2020,7 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
2020 2020
 				 * which I believe is illegal according to
2021 2021
 				 * RFC1521
2022 2022
 				 */
2023
-				cli_warnmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", copy);
2023
+				cli_dbgmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", copy);
2024 2024
 			else {
2025 2025
 				/*
2026 2026
 				 * Some clients are broken and
... ...
@@ -2093,108 +2093,6 @@ saveTextPart(message *m, const char *dir)
2093 2093
 	}
2094 2094
 }
2095 2095
 
2096
-#if	0
2097
-/*
2098
- * Save some data as a unique file in the given directory.
2099
- *
2100
- * TODO: don't save archive files if archive scanning is disabled, or
2101
- *	OLE2 files if that is disabled or pattern match --exclude, but
2102
- *	we need access to the command line options/clamav.conf here to
2103
- *	be able to do that
2104
- *
2105
- * FIXME: duplicated code with fileblobSetFilename()
2106
- */
2107
-static bool
2108
-saveFile(const blob *b, const char *dir)
2109
-{
2110
-	const unsigned long nbytes = blobGetDataSize(b);
2111
-	size_t suffixLen = 0;
2112
-	int fd;
2113
-	const char *cptr, *suffix;
2114
-	char filename[NAME_MAX + 1];
2115
-
2116
-	assert(dir != NULL);
2117
-
2118
-	if(nbytes == 0)
2119
-		return TRUE;
2120
-
2121
-	cptr = blobGetFilename(b);
2122
-
2123
-	if(cptr == NULL) {
2124
-		cptr = "unknown";
2125
-		suffix = "";
2126
-	} else {
2127
-		/*
2128
-		 * Some programs are broken and use an idea of a ".suffix"
2129
-		 * to determine the file type rather than looking up the
2130
-		 * magic number. CPM has a lot to answer for...
2131
-		 * FIXME: the suffix now appears twice in the filename...
2132
-		 */
2133
-		suffix = strrchr(cptr, '.');
2134
-		if(suffix == NULL)
2135
-			suffix = "";
2136
-		else {
2137
-			suffixLen = strlen(suffix);
2138
-			if(suffixLen > 4) {
2139
-				/* Found a full stop which isn't a suffix */
2140
-				suffix = "";
2141
-				suffixLen = 0;
2142
-			}
2143
-		}
2144
-	}
2145
-	cli_dbgmsg("Saving attachment in %s/%s\n", dir, cptr);
2146
-
2147
-	/*
2148
-	 * Allow for very long filenames. We have to truncate them to fit
2149
-	 */
2150
-	snprintf(filename, sizeof(filename) - 1 - suffixLen, "%s/%.*sXXXXXX", dir,
2151
-		(int)(sizeof(filename) - 9 - suffixLen - strlen(dir)), cptr);
2152
-
2153
-	/*
2154
-	 * TODO: add a HAVE_MKSTEMP property
2155
-	 */
2156
-#if	defined(C_LINUX) || defined(C_BSD) || defined(HAVE_MKSTEMP) || defined(C_SOLARIS) || defined(C_CYGWIN)
2157
-	fd = mkstemp(filename);
2158
-#else
2159
-	(void)mktemp(filename);
2160
-	fd = open(filename, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
2161
-#endif
2162
-
2163
-	if(fd < 0) {
2164
-		cli_errmsg("Can't create temporary file %s: %s\n", filename, strerror(errno));
2165
-		cli_dbgmsg("%lu %d %d\n", suffixLen, sizeof(filename), strlen(filename));
2166
-		return FALSE;
2167
-	}
2168
-
2169
-	/*
2170
-	 * Add the suffix back to the end of the filename. Tut-tut, filenames
2171
-	 * should be independant of their usage on UNIX type systems.
2172
-	 */
2173
-	if(suffixLen > 1) {
2174
-		char stub[NAME_MAX + 1];
2175
-
2176
-		snprintf(stub, sizeof(stub), "%s%s", filename, suffix);
2177
-#ifdef	C_LINUX
2178
-		rename(stub, filename);
2179
-#else
2180
-		link(stub, filename);
2181
-		unlink(stub);
2182
-#endif
2183
-	}
2184
-
2185
-	cli_dbgmsg("Saving attachment as %s (%lu bytes long)\n",
2186
-		filename, nbytes);
2187
-
2188
-	if(cli_writen(fd, blobGetData(b), (size_t)nbytes) != nbytes) {
2189
-		perror(filename);
2190
-		close(fd);
2191
-		return FALSE;
2192
-	}
2193
-
2194
-	return (close(fd) >= 0);
2195
-}
2196
-#endif
2197
-
2198 2096
 #ifdef	FOLLOWURLS
2199 2097
 static void
2200 2098
 checkURLs(message *m, const char *dir)
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: message.c,v $
20
+ * Revision 1.76  2004/09/03 15:59:00  nigelhorne
21
+ * Handle boundary= "foo"
22
+ *
20 23
  * Revision 1.75  2004/08/23 13:15:16  nigelhorne
21 24
  * messageClearMarkers
22 25
  *
... ...
@@ -222,7 +225,7 @@
222 222
  * uuencodebegin() no longer static
223 223
  *
224 224
  */
225
-static	char	const	rcsid[] = "$Id: message.c,v 1.75 2004/08/23 13:15:16 nigelhorne Exp $";
225
+static	char	const	rcsid[] = "$Id: message.c,v 1.76 2004/09/03 15:59:00 nigelhorne Exp $";
226 226
 
227 227
 #if HAVE_CONFIG_H
228 228
 #include "clamav-config.h"
... ...
@@ -282,6 +285,7 @@ static	unsigned	char	base64(char c);
282 282
 static	unsigned	char	uudecode(char c);
283 283
 static	const	char	*messageGetArgument(const message *m, int arg);
284 284
 static	void	*messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy)(void *), void (*setFilename)(void *, const char *, const char *), void (*addData)(void *, const unsigned char *, size_t), void *(*exportText)(const text *, void *));
285
+static	int	usefulArg(const char *arg);
285 286
 
286 287
 /*
287 288
  * These maps are ordered in decreasing likelyhood of their appearance
... ...
@@ -523,20 +527,8 @@ messageAddArgument(message *m, const char *arg)
523 523
 		/* Empty argument? Probably a broken mail client... */
524 524
 		return;
525 525
 
526
-	/*
527
-	 * These are the only arguments we're interested in.
528
-	 * Do 'fgrep messageFindArgument *.c' if you don't believe me!
529
-	 * It's probably not good doing this since each time a new
530
-	 * messageFindArgument is added I need to remember to look here,
531
-	 * but it can save a lot of memory...
532
-	 */
533
-	if((strncasecmp(arg, "name", 4) != 0) &&
534
-	   (strncasecmp(arg, "filename", 8) != 0) &&
535
-	   (strncasecmp(arg, "boundary", 8) != 0) &&
536
-	   (strncasecmp(arg, "type", 4) != 0)) {
537
-		cli_dbgmsg("Discarding unwanted argument '%s'\n", arg);
526
+	if(!usefulArg(arg))
538 527
 		return;
539
-	}
540 528
 
541 529
 	cli_dbgmsg("Add argument '%s'\n", arg);
542 530
 
... ...
@@ -598,6 +590,7 @@ messageAddArguments(message *m, const char *s)
598 598
 		}
599 599
 
600 600
 		key = string;
601
+
601 602
 		data = strchr(string, '=');
602 603
 
603 604
 		/*
... ...
@@ -618,8 +611,7 @@ messageAddArguments(message *m, const char *s)
618 618
 			/*
619 619
 			 * Completely broken, give up
620 620
 			 */
621
-			cli_warnmsg("Can't parse non RFC1521 header \"%s\"\n",
622
-				s);
621
+			cli_dbgmsg("Can't parse header \"%s\"\n", s);
623 622
 			return;
624 623
 		}
625 624
 
... ...
@@ -629,6 +621,12 @@ messageAddArguments(message *m, const char *s)
629 629
 
630 630
 		/*
631 631
 		 * Handle white space to the right of the equals sign
632
+		 * This breaks RFC1521 which has:
633
+		 *	parameter := attribute "=" value
634
+		 *	attribute := token   ; case-insensitive
635
+		 *	token  :=  1*<any (ASCII) CHAR except SPACE, CTLs,
636
+		 *		or tspecials>
637
+		 * But too many MUAs ignore this
632 638
 		 */
633 639
 		while(isspace(*string) && (*string != '\0'))
634 640
 			string++;
... ...
@@ -651,14 +649,21 @@ messageAddArguments(message *m, const char *s)
651 651
 			cptr++;
652 652
 
653 653
 			string = strchr(cptr, '"');
654
+
654 655
 			if((string == NULL) || (strlen(key) == 0)) {
655
-				cli_warnmsg("Can't parse header \"%s\"\n", s);
656
+				if(usefulArg(key))
657
+					cli_warnmsg("Can't parse header (1) \"%s\"\n", s);
656 658
 				free((char *)key);
657 659
 				return;
658 660
 			}
659 661
 
660 662
 			string++;
661 663
 
664
+			if(!usefulArg(key)) {
665
+				free((char *)key);
666
+				continue;
667
+			}
668
+
662 669
 			data = strdup(cptr);
663 670
 
664 671
 			ptr = (data) ? strchr(data, '"') : NULL;
... ...
@@ -674,7 +679,7 @@ messageAddArguments(message *m, const char *s)
674 674
 				 * TODO: the file should still be saved and
675 675
 				 * virus checked
676 676
 				 */
677
-				cli_warnmsg("Can't parse header \"%s\"\n", s);
677
+				cli_warnmsg("Can't parse header (2) \"%s\"\n", s);
678 678
 				if(data)
679 679
 					free(data);
680 680
 				free((char *)key);
... ...
@@ -683,14 +688,6 @@ messageAddArguments(message *m, const char *s)
683 683
 
684 684
 			*ptr = '\0';
685 685
 
686
-#if	0
687
-			field = cli_malloc(strlen(key) + strlen(data) + 2);
688
-			if(field)
689
-				sprintf(field, "%s=%s", key, data);
690
-
691
-			free((char *)key);
692
-			free(data);
693
-#else
694 686
 			field = cli_realloc((char *)key, strlen(key) + strlen(data) + 2);
695 687
 			if(field) {
696 688
 				strcat(field, "=");
... ...
@@ -698,7 +695,6 @@ messageAddArguments(message *m, const char *s)
698 698
 			} else
699 699
 				free((char *)key);
700 700
 			free(data);
701
-#endif
702 701
 		} else {
703 702
 			size_t len;
704 703
 
... ...
@@ -1392,7 +1388,6 @@ messageToFileblob(message *m, const char *dir)
1392 1392
 /*
1393 1393
  * Decode and transfer the contents of the message into a blob
1394 1394
  * The caller must free the returned blob
1395
- * TODO: a lot of code here is duplicated with messageToFileblob
1396 1395
  */
1397 1396
 blob *
1398 1397
 messageToBlob(message *m)
... ...
@@ -1876,7 +1871,6 @@ decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(
1876 1876
 			}
1877 1877
 
1878 1878
 		} else while(*in) {
1879
-		/* Slower decoding for last line */
1880 1879
 			int nbytes;
1881 1880
 
1882 1881
 			if(m->base64chars) {
... ...
@@ -1978,3 +1972,23 @@ uudecode(char c)
1978 1978
 {
1979 1979
 	return(c - ' ');
1980 1980
 }
1981
+
1982
+/*
1983
+ * These are the only arguments we're interested in.
1984
+ * Do 'fgrep messageFindArgument *.c' if you don't believe me!
1985
+ * It's probably not good doing this since each time a new
1986
+ * messageFindArgument is added I need to remember to look here,
1987
+ * but it can save a lot of memory...
1988
+ */
1989
+static int
1990
+usefulArg(const char *arg)
1991
+{
1992
+	if((strncasecmp(arg, "name", 4) != 0) &&
1993
+	   (strncasecmp(arg, "filename", 8) != 0) &&
1994
+	   (strncasecmp(arg, "boundary", 8) != 0) &&
1995
+	   (strncasecmp(arg, "type", 4) != 0)) {
1996
+		cli_dbgmsg("Discarding unwanted argument '%s'\n", arg);
1997
+		return 0;
1998
+	}
1999
+	return 1;
2000
+}