Browse code

Rewrite handling of folded headers

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@1084 77e5149b-7576-45b1-b177-96237e5ba77b

Nigel Horne authored on 2004/11/12 07:18:10
Showing 3 changed files
... ...
@@ -1,3 +1,7 @@
1
+Thu Nov 11 22:17:31 GMT 2004 (njh)
2
+----------------------------------
3
+  * libclamav:		Rewrote the parsing of headers to improve folded lines
4
+
1 5
 Wed Nov 10 10:12:18 GMT 2004 (njh)
2 6
 ----------------------------------
3 7
   * libclamav/mbox.c:	Fix escaped parenthesis in rfc822 comments
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: mbox.c,v $
20
+ * Revision 1.175  2004/11/11 22:15:46  nigelhorne
21
+ * Rewrite handling of folded headers
22
+ *
20 23
  * Revision 1.174  2004/11/10 10:08:45  nigelhorne
21 24
  * Fix escaped parenthesis in rfc822 comments
22 25
  *
... ...
@@ -510,7 +513,7 @@
510 510
  * Compilable under SCO; removed duplicate code with message.c
511 511
  *
512 512
  */
513
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.174 2004/11/10 10:08:45 nigelhorne Exp $";
513
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.175 2004/11/11 22:15:46 nigelhorne Exp $";
514 514
 
515 515
 #if HAVE_CONFIG_H
516 516
 #include "clamav-config.h"
... ...
@@ -997,9 +1000,9 @@ parseEmailHeaders(const message *m, const table_t *rfc821)
997 997
 	const text *t;
998 998
 	message *ret;
999 999
 	bool anyHeadersFound = FALSE;
1000
-	bool Xheader = FALSE;
1001 1000
 	int commandNumber = -1;
1002 1001
 	char *fullline = NULL;
1002
+	size_t fulllinelength = 0;
1003 1003
 
1004 1004
 	cli_dbgmsg("parseEmailHeaders\n");
1005 1005
 
... ...
@@ -1024,44 +1027,76 @@ parseEmailHeaders(const message *m, const table_t *rfc821)
1024 1024
 				 */
1025 1025
 				cli_dbgmsg("End of header information\n");
1026 1026
 				inHeader = FALSE;
1027
-			} else if(((buffer[0] == '\t') || (buffer[0] == ' ') || contMarker) &&
1028
-				  (!Xheader)) {
1029
-				/*
1030
-				 * Section B.2 of RFC822 says TAB or SPACE means
1031
-				 * a continuation of the previous entry.
1032
-				 *
1033
-				 * Add all the arguments on the line
1034
-				 */
1027
+			} else {
1035 1028
 				char *ptr;
1036
-				char copy[LINE_LENGTH + 1];
1037 1029
 				bool inquotes = FALSE;
1038 1030
 				bool arequotes = FALSE;
1031
+				const char *qptr;
1032
+				int quotes;
1039 1033
 #ifdef CL_THREAD_SAFE
1040 1034
 				char *strptr;
1041 1035
 #endif
1036
+				char cmd[LINE_LENGTH + 1];
1037
+
1038
+				if(fullline == NULL) {
1039
+					commandNumber = tableFind(rfc821, buffer);
1040
+					fullline = strdup("");
1041
+					fulllinelength = 1;
1042
+				}
1043
+				fulllinelength += strlen(buffer);
1044
+				fullline = cli_realloc(fullline, fulllinelength);
1045
+				strcat(fullline, buffer);
1042 1046
 
1043 1047
 				contMarker = continuationMarker(buffer);
1048
+				if(contMarker)
1049
+					continue;
1050
+
1051
+				if(t->t_next && (t->t_next->t_line != NULL)) {
1052
+					const char *next = lineGetData(t->t_next->t_line);
1053
+
1054
+					/*
1055
+					 * Section B.2 of RFC822 says TAB or SPACE means
1056
+					 * a continuation of the previous entry.
1057
+					 *
1058
+					 * Add all the arguments on the line
1059
+					 */
1060
+					if((next[0] == '\t') || (next[0] == ' '))
1061
+						continue;
1062
+				}
1063
+
1064
+				quotes = 0;
1065
+				for(qptr = buffer; *qptr; qptr++)
1066
+					if(*qptr == '\"')
1067
+						quotes++;
1068
+
1069
+				if(quotes & 1) {
1070
+					contMarker = TRUE;
1071
+					continue;
1072
+				}
1073
+
1074
+				ptr = rfc822comments(fullline);
1075
+				if(ptr) {
1076
+					free(fullline);
1077
+					fullline = ptr;
1078
+				}
1079
+				if(cli_strtokbuf(fullline, 0, ":", cmd) != NULL) {
1080
+					anyHeadersFound = TRUE;
1081
+					commandNumber = tableFind(rfc821, cmd);
1082
+				}
1083
+
1044 1084
 				switch(commandNumber) {
1045 1085
 					case CONTENT_TRANSFER_ENCODING:
1046 1086
 					case CONTENT_DISPOSITION:
1047 1087
 					case CONTENT_TYPE:
1048 1088
 						break;
1049 1089
 					default:
1090
+						free(fullline);
1091
+						fullline = NULL;
1050 1092
 						continue;
1051 1093
 				}
1052 1094
 
1053
-				if(fullline) {
1054
-					/*
1055
-					 * FIXME: Handle more than one line spanned by
1056
-					 * quote marks, and handle two very long lines
1057
-					 */
1058
-					snprintf(copy, sizeof(copy) - 1, "%s%s", fullline, buffer);
1059
-					free(fullline);
1060
-					fullline = NULL;
1061
-				} else {
1062
-					assert(strlen(buffer) < sizeof(copy));
1063
-					strcpy(copy, buffer);
1064
-				}
1095
+				if(parseEmailHeader(ret, fullline, rfc821) < 0)
1096
+					continue;
1065 1097
 
1066 1098
 				/*
1067 1099
 				 * Ensure that the colon in headers such as
... ...
@@ -1069,7 +1104,7 @@ parseEmailHeaders(const message *m, const table_t *rfc821)
1069 1069
 				 * separator
1070 1070
 				 *	boundary="=.J:gysAG)N(3_zv"
1071 1071
 				 */
1072
-				for(ptr = copy; *ptr; ptr++)
1072
+				for(ptr = fullline; *ptr; ptr++)
1073 1073
 					if(*ptr == '\"')
1074 1074
 						inquotes = !inquotes;
1075 1075
 					else if(inquotes) {
... ...
@@ -1078,7 +1113,7 @@ parseEmailHeaders(const message *m, const table_t *rfc821)
1078 1078
 					}
1079 1079
 
1080 1080
 #ifdef	CL_THREAD_SAFE
1081
-				for(ptr = strtok_r(copy, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr))
1081
+				for(ptr = strtok_r(fullline, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr))
1082 1082
 					if(strchr(ptr, '=')) {
1083 1083
 						if(arequotes) {
1084 1084
 							char *p2;
... ...
@@ -1088,7 +1123,7 @@ parseEmailHeaders(const message *m, const table_t *rfc821)
1088 1088
 						messageAddArguments(ret, ptr);
1089 1089
 					}
1090 1090
 #else
1091
-				for(ptr = strtok(copy, ";"); ptr; ptr = strtok(NULL, ":"))
1091
+				for(ptr = strtok(fullline, ";"); ptr; ptr = strtok(NULL, ":"))
1092 1092
 					if(strchr(ptr, '=')) {
1093 1093
 						if(arequotes) {
1094 1094
 							char *p2;
... ...
@@ -1098,35 +1133,8 @@ parseEmailHeaders(const message *m, const table_t *rfc821)
1098 1098
 						messageAddArguments(ret, ptr);
1099 1099
 					}
1100 1100
 #endif
1101
-			} else {
1102
-				const char *qptr;
1103
-				int quotes = 0;
1104
-				bool parsed = FALSE;
1105
-				char cmd[LINE_LENGTH + 1];
1106
-
1107
-				Xheader = (bool)(buffer[0] == 'X');
1108
-				contMarker = continuationMarker(buffer);
1109
-
1110
-				if(!Xheader)
1111
-					for(qptr = buffer; *qptr; qptr++)
1112
-						if(*qptr == '\"')
1113
-							quotes++;
1114
-
1101
+				free(fullline);
1115 1102
 				fullline = NULL;
1116
-
1117
-				if(quotes & 1) {
1118
-					contMarker = TRUE;
1119
-					fullline = strdup(buffer);
1120
-					parsed = TRUE;
1121
-				} else if((parseEmailHeader(ret, buffer, rfc821) >= 0) ||
1122
-					  (strncasecmp(buffer, "From ", 5) == 0))
1123
-					parsed = TRUE;
1124
-
1125
-				if(parsed)
1126
-					if(cli_strtokbuf(buffer, 0, ":", cmd) != NULL) {
1127
-						anyHeadersFound = TRUE;
1128
-						commandNumber = tableFind(rfc821, cmd);
1129
-					}
1130 1103
 			}
1131 1104
 		} else {
1132 1105
 			/*cli_dbgmsg("Add line to body '%s'\n", buffer);*/
... ...
@@ -1136,8 +1144,12 @@ parseEmailHeaders(const message *m, const table_t *rfc821)
1136 1136
 	}
1137 1137
 
1138 1138
 	if(fullline) {
1139
-		if(*fullline)
1140
-			cli_warnmsg("parseEmailHeaders: Fullline set '%s' - report to bugs@clamav.net\n");
1139
+		if(*fullline) switch(commandNumber) {
1140
+			case CONTENT_TRANSFER_ENCODING:
1141
+			case CONTENT_DISPOSITION:
1142
+			case CONTENT_TYPE:
1143
+				cli_warnmsg("parseEmailHeaders: Fullline set '%s' - report to bugs@clamav.net\n", fullline);
1144
+		}
1141 1145
 		free(fullline);
1142 1146
 	}
1143 1147
 
... ...
@@ -1466,6 +1478,8 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
1466 1466
 						inMimeHead = continuationMarker(line);
1467 1467
 						messageAddArgument(aMessage, line);
1468 1468
 					} else if(inhead) {	/* handling normal headers */
1469
+						char *ptr;
1470
+
1469 1471
 						if(line == NULL) {
1470 1472
 							/* empty line */
1471 1473
 							inhead = 0;
... ...
@@ -1504,13 +1518,17 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
1504 1504
 						inMimeHead = continuationMarker(line);
1505 1505
 						if(!inMimeHead) {
1506 1506
 							const text *next = t_line->t_next;
1507
-							char *fullline = strdup(line);
1507
+							char *fullline;
1508 1508
 							int quotes = 0;
1509 1509
 							const char *qptr;
1510 1510
 
1511 1511
 							assert(strlen(line) <= LINE_LENGTH);
1512 1512
 
1513
-							for(qptr = line; *qptr; qptr++)
1513
+							fullline = rfc822comments(line);
1514
+							if(fullline == NULL)
1515
+								fullline = strdup(line);
1516
+
1517
+							for(qptr = fullline; *qptr; qptr++)
1514 1518
 								if(*qptr == '\"')
1515 1519
 									quotes++;
1516 1520
 
... ...
@@ -1523,7 +1541,6 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
1523 1523
 							 */
1524 1524
 							while(next && next->t_line) {
1525 1525
 								const char *data = lineGetData(next->t_line);
1526
-								char *ptr;
1527 1526
 
1528 1527
 								if((!isspace(data[0])) &&
1529 1528
 								   ((quotes & 1) == 0))
... ...
@@ -1554,7 +1571,12 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
1554 1554
 							cli_dbgmsg("Multipart %d: About to parse header '%s'\n",
1555 1555
 								multiparts, line);
1556 1556
 
1557
-							parseEmailHeader(aMessage, line, rfc821Table);
1557
+							ptr = rfc822comments(line);
1558
+
1559
+							parseEmailHeader(aMessage, (ptr) ? ptr : line, rfc821Table);
1560
+
1561
+							if(ptr)
1562
+								free(ptr);
1558 1563
 						}
1559 1564
 					} else if(boundaryStart(line, boundary)) {
1560 1565
 						inhead = 1;
... ...
@@ -2255,12 +2277,22 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
2255 2255
 static int
2256 2256
 boundaryStart(const char *line, const char *boundary)
2257 2257
 {
2258
+	char *ptr, *p;
2259
+
2258 2260
 	if(line == NULL)
2259 2261
 		return 0;	/* empty line */
2260 2262
 
2261 2263
 	cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary);
2262
-	if(*line++ != '-')
2264
+
2265
+	p = ptr = rfc822comments(line);
2266
+	if(ptr == NULL)
2267
+		ptr = line;
2268
+
2269
+	if(*ptr++ != '-') {
2270
+		if(p)
2271
+			free(p);
2263 2272
 		return 0;
2273
+	}
2264 2274
 
2265 2275
 	/*
2266 2276
 	 * Gibe.B3 is broken, it has:
... ...
@@ -2277,12 +2309,16 @@ boundaryStart(const char *line, const char *boundary)
2277 2277
 	 * boundary="1" we want to ensure that we don't break out of every line
2278 2278
 	 * that has -1 in it instead of starting --1. This needs some more work.
2279 2279
 	 */
2280
-	if(strstr(line, boundary) != NULL) {
2280
+	if(strstr(ptr, boundary) != NULL) {
2281 2281
 		cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line);
2282
+		if(p)
2283
+			free(p);
2282 2284
 		return 1;
2283 2285
 	}
2284
-	if(*line++ != '-')
2286
+	if(*ptr++ != '-')
2285 2287
 		return 0;
2288
+	if(p)
2289
+		free(p);
2286 2290
 	return strcasecmp(line, boundary) == 0;
2287 2291
 }
2288 2292
 
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: message.c,v $
20
+ * Revision 1.113  2004/11/11 22:15:46  nigelhorne
21
+ * Rewrite handling of folded headers
22
+ *
20 23
  * Revision 1.112  2004/11/09 19:40:06  nigelhorne
21 24
  * Find uuencoded files in preambles to multipart messages
22 25
  *
... ...
@@ -333,7 +336,7 @@
333 333
  * uuencodebegin() no longer static
334 334
  *
335 335
  */
336
-static	char	const	rcsid[] = "$Id: message.c,v 1.112 2004/11/09 19:40:06 nigelhorne Exp $";
336
+static	char	const	rcsid[] = "$Id: message.c,v 1.113 2004/11/11 22:15:46 nigelhorne Exp $";
337 337
 
338 338
 #if HAVE_CONFIG_H
339 339
 #include "clamav-config.h"
... ...
@@ -1572,15 +1575,17 @@ messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy
1572 1572
 		encoding_type enctype = m->encodingTypes[i];
1573 1573
 		size_t size;
1574 1574
 
1575
+		cli_dbgmsg("messageExport: enctype %d is %d\n", i, enctype);
1575 1576
 		/*
1576 1577
 		 * Find the filename to decode
1577 1578
 		 */
1578
-		if((enctype == UUENCODE) || ((i == 0) && uuencodeBegin(m))) {
1579
+		if((enctype == UUENCODE) || ((enctype == NOENCODING) && (i == 0) && uuencodeBegin(m))) {
1579 1580
 			t_line = uuencodeBegin(m);
1580 1581
 
1581 1582
 			if(t_line == NULL) {
1582 1583
 				/*cli_warnmsg("UUENCODED attachment is missing begin statement\n");*/
1583 1584
 				(*destroy)(ret);
1585
+				m->base64chars = NULL;
1584 1586
 				return NULL;
1585 1587
 			}
1586 1588
 
... ...
@@ -1643,6 +1648,7 @@ messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy
1643 1643
 
1644 1644
 			t_line = messageGetBody(m);
1645 1645
 		}
1646
+
1646 1647
 		if(filename)
1647 1648
 			free((char *)filename);
1648 1649