git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@1084 77e5149b-7576-45b1-b177-96237e5ba77b
Nigel Horne authored on 2004/11/12 07:18:10... | ... |
@@ -1,3 +1,7 @@ |
1 |
+Thu Nov 11 22:17:31 GMT 2004 (njh) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav: Rewrote the parsing of headers to improve folded lines |
|
4 |
+ |
|
1 | 5 |
Wed Nov 10 10:12:18 GMT 2004 (njh) |
2 | 6 |
---------------------------------- |
3 | 7 |
* libclamav/mbox.c: Fix escaped parenthesis in rfc822 comments |
... | ... |
@@ -17,6 +17,9 @@ |
17 | 17 |
* |
18 | 18 |
* Change History: |
19 | 19 |
* $Log: mbox.c,v $ |
20 |
+ * Revision 1.175 2004/11/11 22:15:46 nigelhorne |
|
21 |
+ * Rewrite handling of folded headers |
|
22 |
+ * |
|
20 | 23 |
* Revision 1.174 2004/11/10 10:08:45 nigelhorne |
21 | 24 |
* Fix escaped parenthesis in rfc822 comments |
22 | 25 |
* |
... | ... |
@@ -510,7 +513,7 @@ |
510 | 510 |
* Compilable under SCO; removed duplicate code with message.c |
511 | 511 |
* |
512 | 512 |
*/ |
513 |
-static char const rcsid[] = "$Id: mbox.c,v 1.174 2004/11/10 10:08:45 nigelhorne Exp $"; |
|
513 |
+static char const rcsid[] = "$Id: mbox.c,v 1.175 2004/11/11 22:15:46 nigelhorne Exp $"; |
|
514 | 514 |
|
515 | 515 |
#if HAVE_CONFIG_H |
516 | 516 |
#include "clamav-config.h" |
... | ... |
@@ -997,9 +1000,9 @@ parseEmailHeaders(const message *m, const table_t *rfc821) |
997 | 997 |
const text *t; |
998 | 998 |
message *ret; |
999 | 999 |
bool anyHeadersFound = FALSE; |
1000 |
- bool Xheader = FALSE; |
|
1001 | 1000 |
int commandNumber = -1; |
1002 | 1001 |
char *fullline = NULL; |
1002 |
+ size_t fulllinelength = 0; |
|
1003 | 1003 |
|
1004 | 1004 |
cli_dbgmsg("parseEmailHeaders\n"); |
1005 | 1005 |
|
... | ... |
@@ -1024,44 +1027,76 @@ parseEmailHeaders(const message *m, const table_t *rfc821) |
1024 | 1024 |
*/ |
1025 | 1025 |
cli_dbgmsg("End of header information\n"); |
1026 | 1026 |
inHeader = FALSE; |
1027 |
- } else if(((buffer[0] == '\t') || (buffer[0] == ' ') || contMarker) && |
|
1028 |
- (!Xheader)) { |
|
1029 |
- /* |
|
1030 |
- * Section B.2 of RFC822 says TAB or SPACE means |
|
1031 |
- * a continuation of the previous entry. |
|
1032 |
- * |
|
1033 |
- * Add all the arguments on the line |
|
1034 |
- */ |
|
1027 |
+ } else { |
|
1035 | 1028 |
char *ptr; |
1036 |
- char copy[LINE_LENGTH + 1]; |
|
1037 | 1029 |
bool inquotes = FALSE; |
1038 | 1030 |
bool arequotes = FALSE; |
1031 |
+ const char *qptr; |
|
1032 |
+ int quotes; |
|
1039 | 1033 |
#ifdef CL_THREAD_SAFE |
1040 | 1034 |
char *strptr; |
1041 | 1035 |
#endif |
1036 |
+ char cmd[LINE_LENGTH + 1]; |
|
1037 |
+ |
|
1038 |
+ if(fullline == NULL) { |
|
1039 |
+ commandNumber = tableFind(rfc821, buffer); |
|
1040 |
+ fullline = strdup(""); |
|
1041 |
+ fulllinelength = 1; |
|
1042 |
+ } |
|
1043 |
+ fulllinelength += strlen(buffer); |
|
1044 |
+ fullline = cli_realloc(fullline, fulllinelength); |
|
1045 |
+ strcat(fullline, buffer); |
|
1042 | 1046 |
|
1043 | 1047 |
contMarker = continuationMarker(buffer); |
1048 |
+ if(contMarker) |
|
1049 |
+ continue; |
|
1050 |
+ |
|
1051 |
+ if(t->t_next && (t->t_next->t_line != NULL)) { |
|
1052 |
+ const char *next = lineGetData(t->t_next->t_line); |
|
1053 |
+ |
|
1054 |
+ /* |
|
1055 |
+ * Section B.2 of RFC822 says TAB or SPACE means |
|
1056 |
+ * a continuation of the previous entry. |
|
1057 |
+ * |
|
1058 |
+ * Add all the arguments on the line |
|
1059 |
+ */ |
|
1060 |
+ if((next[0] == '\t') || (next[0] == ' ')) |
|
1061 |
+ continue; |
|
1062 |
+ } |
|
1063 |
+ |
|
1064 |
+ quotes = 0; |
|
1065 |
+ for(qptr = buffer; *qptr; qptr++) |
|
1066 |
+ if(*qptr == '\"') |
|
1067 |
+ quotes++; |
|
1068 |
+ |
|
1069 |
+ if(quotes & 1) { |
|
1070 |
+ contMarker = TRUE; |
|
1071 |
+ continue; |
|
1072 |
+ } |
|
1073 |
+ |
|
1074 |
+ ptr = rfc822comments(fullline); |
|
1075 |
+ if(ptr) { |
|
1076 |
+ free(fullline); |
|
1077 |
+ fullline = ptr; |
|
1078 |
+ } |
|
1079 |
+ if(cli_strtokbuf(fullline, 0, ":", cmd) != NULL) { |
|
1080 |
+ anyHeadersFound = TRUE; |
|
1081 |
+ commandNumber = tableFind(rfc821, cmd); |
|
1082 |
+ } |
|
1083 |
+ |
|
1044 | 1084 |
switch(commandNumber) { |
1045 | 1085 |
case CONTENT_TRANSFER_ENCODING: |
1046 | 1086 |
case CONTENT_DISPOSITION: |
1047 | 1087 |
case CONTENT_TYPE: |
1048 | 1088 |
break; |
1049 | 1089 |
default: |
1090 |
+ free(fullline); |
|
1091 |
+ fullline = NULL; |
|
1050 | 1092 |
continue; |
1051 | 1093 |
} |
1052 | 1094 |
|
1053 |
- if(fullline) { |
|
1054 |
- /* |
|
1055 |
- * FIXME: Handle more than one line spanned by |
|
1056 |
- * quote marks, and handle two very long lines |
|
1057 |
- */ |
|
1058 |
- snprintf(copy, sizeof(copy) - 1, "%s%s", fullline, buffer); |
|
1059 |
- free(fullline); |
|
1060 |
- fullline = NULL; |
|
1061 |
- } else { |
|
1062 |
- assert(strlen(buffer) < sizeof(copy)); |
|
1063 |
- strcpy(copy, buffer); |
|
1064 |
- } |
|
1095 |
+ if(parseEmailHeader(ret, fullline, rfc821) < 0) |
|
1096 |
+ continue; |
|
1065 | 1097 |
|
1066 | 1098 |
/* |
1067 | 1099 |
* Ensure that the colon in headers such as |
... | ... |
@@ -1069,7 +1104,7 @@ parseEmailHeaders(const message *m, const table_t *rfc821) |
1069 | 1069 |
* separator |
1070 | 1070 |
* boundary="=.J:gysAG)N(3_zv" |
1071 | 1071 |
*/ |
1072 |
- for(ptr = copy; *ptr; ptr++) |
|
1072 |
+ for(ptr = fullline; *ptr; ptr++) |
|
1073 | 1073 |
if(*ptr == '\"') |
1074 | 1074 |
inquotes = !inquotes; |
1075 | 1075 |
else if(inquotes) { |
... | ... |
@@ -1078,7 +1113,7 @@ parseEmailHeaders(const message *m, const table_t *rfc821) |
1078 | 1078 |
} |
1079 | 1079 |
|
1080 | 1080 |
#ifdef CL_THREAD_SAFE |
1081 |
- for(ptr = strtok_r(copy, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr)) |
|
1081 |
+ for(ptr = strtok_r(fullline, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr)) |
|
1082 | 1082 |
if(strchr(ptr, '=')) { |
1083 | 1083 |
if(arequotes) { |
1084 | 1084 |
char *p2; |
... | ... |
@@ -1088,7 +1123,7 @@ parseEmailHeaders(const message *m, const table_t *rfc821) |
1088 | 1088 |
messageAddArguments(ret, ptr); |
1089 | 1089 |
} |
1090 | 1090 |
#else |
1091 |
- for(ptr = strtok(copy, ";"); ptr; ptr = strtok(NULL, ":")) |
|
1091 |
+ for(ptr = strtok(fullline, ";"); ptr; ptr = strtok(NULL, ":")) |
|
1092 | 1092 |
if(strchr(ptr, '=')) { |
1093 | 1093 |
if(arequotes) { |
1094 | 1094 |
char *p2; |
... | ... |
@@ -1098,35 +1133,8 @@ parseEmailHeaders(const message *m, const table_t *rfc821) |
1098 | 1098 |
messageAddArguments(ret, ptr); |
1099 | 1099 |
} |
1100 | 1100 |
#endif |
1101 |
- } else { |
|
1102 |
- const char *qptr; |
|
1103 |
- int quotes = 0; |
|
1104 |
- bool parsed = FALSE; |
|
1105 |
- char cmd[LINE_LENGTH + 1]; |
|
1106 |
- |
|
1107 |
- Xheader = (bool)(buffer[0] == 'X'); |
|
1108 |
- contMarker = continuationMarker(buffer); |
|
1109 |
- |
|
1110 |
- if(!Xheader) |
|
1111 |
- for(qptr = buffer; *qptr; qptr++) |
|
1112 |
- if(*qptr == '\"') |
|
1113 |
- quotes++; |
|
1114 |
- |
|
1101 |
+ free(fullline); |
|
1115 | 1102 |
fullline = NULL; |
1116 |
- |
|
1117 |
- if(quotes & 1) { |
|
1118 |
- contMarker = TRUE; |
|
1119 |
- fullline = strdup(buffer); |
|
1120 |
- parsed = TRUE; |
|
1121 |
- } else if((parseEmailHeader(ret, buffer, rfc821) >= 0) || |
|
1122 |
- (strncasecmp(buffer, "From ", 5) == 0)) |
|
1123 |
- parsed = TRUE; |
|
1124 |
- |
|
1125 |
- if(parsed) |
|
1126 |
- if(cli_strtokbuf(buffer, 0, ":", cmd) != NULL) { |
|
1127 |
- anyHeadersFound = TRUE; |
|
1128 |
- commandNumber = tableFind(rfc821, cmd); |
|
1129 |
- } |
|
1130 | 1103 |
} |
1131 | 1104 |
} else { |
1132 | 1105 |
/*cli_dbgmsg("Add line to body '%s'\n", buffer);*/ |
... | ... |
@@ -1136,8 +1144,12 @@ parseEmailHeaders(const message *m, const table_t *rfc821) |
1136 | 1136 |
} |
1137 | 1137 |
|
1138 | 1138 |
if(fullline) { |
1139 |
- if(*fullline) |
|
1140 |
- cli_warnmsg("parseEmailHeaders: Fullline set '%s' - report to bugs@clamav.net\n"); |
|
1139 |
+ if(*fullline) switch(commandNumber) { |
|
1140 |
+ case CONTENT_TRANSFER_ENCODING: |
|
1141 |
+ case CONTENT_DISPOSITION: |
|
1142 |
+ case CONTENT_TYPE: |
|
1143 |
+ cli_warnmsg("parseEmailHeaders: Fullline set '%s' - report to bugs@clamav.net\n", fullline); |
|
1144 |
+ } |
|
1141 | 1145 |
free(fullline); |
1142 | 1146 |
} |
1143 | 1147 |
|
... | ... |
@@ -1466,6 +1478,8 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t |
1466 | 1466 |
inMimeHead = continuationMarker(line); |
1467 | 1467 |
messageAddArgument(aMessage, line); |
1468 | 1468 |
} else if(inhead) { /* handling normal headers */ |
1469 |
+ char *ptr; |
|
1470 |
+ |
|
1469 | 1471 |
if(line == NULL) { |
1470 | 1472 |
/* empty line */ |
1471 | 1473 |
inhead = 0; |
... | ... |
@@ -1504,13 +1518,17 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t |
1504 | 1504 |
inMimeHead = continuationMarker(line); |
1505 | 1505 |
if(!inMimeHead) { |
1506 | 1506 |
const text *next = t_line->t_next; |
1507 |
- char *fullline = strdup(line); |
|
1507 |
+ char *fullline; |
|
1508 | 1508 |
int quotes = 0; |
1509 | 1509 |
const char *qptr; |
1510 | 1510 |
|
1511 | 1511 |
assert(strlen(line) <= LINE_LENGTH); |
1512 | 1512 |
|
1513 |
- for(qptr = line; *qptr; qptr++) |
|
1513 |
+ fullline = rfc822comments(line); |
|
1514 |
+ if(fullline == NULL) |
|
1515 |
+ fullline = strdup(line); |
|
1516 |
+ |
|
1517 |
+ for(qptr = fullline; *qptr; qptr++) |
|
1514 | 1518 |
if(*qptr == '\"') |
1515 | 1519 |
quotes++; |
1516 | 1520 |
|
... | ... |
@@ -1523,7 +1541,6 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t |
1523 | 1523 |
*/ |
1524 | 1524 |
while(next && next->t_line) { |
1525 | 1525 |
const char *data = lineGetData(next->t_line); |
1526 |
- char *ptr; |
|
1527 | 1526 |
|
1528 | 1527 |
if((!isspace(data[0])) && |
1529 | 1528 |
((quotes & 1) == 0)) |
... | ... |
@@ -1554,7 +1571,12 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t |
1554 | 1554 |
cli_dbgmsg("Multipart %d: About to parse header '%s'\n", |
1555 | 1555 |
multiparts, line); |
1556 | 1556 |
|
1557 |
- parseEmailHeader(aMessage, line, rfc821Table); |
|
1557 |
+ ptr = rfc822comments(line); |
|
1558 |
+ |
|
1559 |
+ parseEmailHeader(aMessage, (ptr) ? ptr : line, rfc821Table); |
|
1560 |
+ |
|
1561 |
+ if(ptr) |
|
1562 |
+ free(ptr); |
|
1558 | 1563 |
} |
1559 | 1564 |
} else if(boundaryStart(line, boundary)) { |
1560 | 1565 |
inhead = 1; |
... | ... |
@@ -2255,12 +2277,22 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t |
2255 | 2255 |
static int |
2256 | 2256 |
boundaryStart(const char *line, const char *boundary) |
2257 | 2257 |
{ |
2258 |
+ char *ptr, *p; |
|
2259 |
+ |
|
2258 | 2260 |
if(line == NULL) |
2259 | 2261 |
return 0; /* empty line */ |
2260 | 2262 |
|
2261 | 2263 |
cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary); |
2262 |
- if(*line++ != '-') |
|
2264 |
+ |
|
2265 |
+ p = ptr = rfc822comments(line); |
|
2266 |
+ if(ptr == NULL) |
|
2267 |
+ ptr = line; |
|
2268 |
+ |
|
2269 |
+ if(*ptr++ != '-') { |
|
2270 |
+ if(p) |
|
2271 |
+ free(p); |
|
2263 | 2272 |
return 0; |
2273 |
+ } |
|
2264 | 2274 |
|
2265 | 2275 |
/* |
2266 | 2276 |
* Gibe.B3 is broken, it has: |
... | ... |
@@ -2277,12 +2309,16 @@ boundaryStart(const char *line, const char *boundary) |
2277 | 2277 |
* boundary="1" we want to ensure that we don't break out of every line |
2278 | 2278 |
* that has -1 in it instead of starting --1. This needs some more work. |
2279 | 2279 |
*/ |
2280 |
- if(strstr(line, boundary) != NULL) { |
|
2280 |
+ if(strstr(ptr, boundary) != NULL) { |
|
2281 | 2281 |
cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line); |
2282 |
+ if(p) |
|
2283 |
+ free(p); |
|
2282 | 2284 |
return 1; |
2283 | 2285 |
} |
2284 |
- if(*line++ != '-') |
|
2286 |
+ if(*ptr++ != '-') |
|
2285 | 2287 |
return 0; |
2288 |
+ if(p) |
|
2289 |
+ free(p); |
|
2286 | 2290 |
return strcasecmp(line, boundary) == 0; |
2287 | 2291 |
} |
2288 | 2292 |
|
... | ... |
@@ -17,6 +17,9 @@ |
17 | 17 |
* |
18 | 18 |
* Change History: |
19 | 19 |
* $Log: message.c,v $ |
20 |
+ * Revision 1.113 2004/11/11 22:15:46 nigelhorne |
|
21 |
+ * Rewrite handling of folded headers |
|
22 |
+ * |
|
20 | 23 |
* Revision 1.112 2004/11/09 19:40:06 nigelhorne |
21 | 24 |
* Find uuencoded files in preambles to multipart messages |
22 | 25 |
* |
... | ... |
@@ -333,7 +336,7 @@ |
333 | 333 |
* uuencodebegin() no longer static |
334 | 334 |
* |
335 | 335 |
*/ |
336 |
-static char const rcsid[] = "$Id: message.c,v 1.112 2004/11/09 19:40:06 nigelhorne Exp $"; |
|
336 |
+static char const rcsid[] = "$Id: message.c,v 1.113 2004/11/11 22:15:46 nigelhorne Exp $"; |
|
337 | 337 |
|
338 | 338 |
#if HAVE_CONFIG_H |
339 | 339 |
#include "clamav-config.h" |
... | ... |
@@ -1572,15 +1575,17 @@ messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy |
1572 | 1572 |
encoding_type enctype = m->encodingTypes[i]; |
1573 | 1573 |
size_t size; |
1574 | 1574 |
|
1575 |
+ cli_dbgmsg("messageExport: enctype %d is %d\n", i, enctype); |
|
1575 | 1576 |
/* |
1576 | 1577 |
* Find the filename to decode |
1577 | 1578 |
*/ |
1578 |
- if((enctype == UUENCODE) || ((i == 0) && uuencodeBegin(m))) { |
|
1579 |
+ if((enctype == UUENCODE) || ((enctype == NOENCODING) && (i == 0) && uuencodeBegin(m))) { |
|
1579 | 1580 |
t_line = uuencodeBegin(m); |
1580 | 1581 |
|
1581 | 1582 |
if(t_line == NULL) { |
1582 | 1583 |
/*cli_warnmsg("UUENCODED attachment is missing begin statement\n");*/ |
1583 | 1584 |
(*destroy)(ret); |
1585 |
+ m->base64chars = NULL; |
|
1584 | 1586 |
return NULL; |
1585 | 1587 |
} |
1586 | 1588 |
|
... | ... |
@@ -1643,6 +1648,7 @@ messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy |
1643 | 1643 |
|
1644 | 1644 |
t_line = messageGetBody(m); |
1645 | 1645 |
} |
1646 |
+ |
|
1646 | 1647 |
if(filename) |
1647 | 1648 |
free((char *)filename); |
1648 | 1649 |
|