git-svn: trunk@2751
Nigel Horne authored on 2007/02/15 21:29:51... | ... |
@@ -1,3 +1,11 @@ |
1 |
+Thu Feb 15 12:27:22 GMT 2007 (njh) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav/mbox.c: Fixed bugs in the handling of boundary lines |
|
4 |
+ Improved handling of the warning messages associated |
|
5 |
+ with recursion limits |
|
6 |
+ Fixed handling of OK_ATTACHMENTS_NOT_SAVED in some |
|
7 |
+ larger files |
|
8 |
+ |
|
1 | 9 |
Wed Feb 14 13:15:25 CET 2007 (tk) |
2 | 10 |
--------------------------------- |
3 | 11 |
* libclamav/entconv.c: fix incorrect use of isspace() in experimental code |
... | ... |
@@ -16,7 +16,7 @@ |
16 | 16 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, |
17 | 17 |
* MA 02110-1301, USA. |
18 | 18 |
*/ |
19 |
-static char const rcsid[] = "$Id: mbox.c,v 1.380 2007/02/13 19:47:37 njh Exp $"; |
|
19 |
+static char const rcsid[] = "$Id: mbox.c,v 1.381 2007/02/15 12:26:44 njh Exp $"; |
|
20 | 20 |
|
21 | 21 |
#ifdef _MSC_VER |
22 | 22 |
#include <winsock.h> /* only needed in CL_EXPERIMENTAL */ |
... | ... |
@@ -118,7 +118,8 @@ typedef enum { |
118 | 118 |
FAIL, |
119 | 119 |
OK, |
120 | 120 |
OK_ATTACHMENTS_NOT_SAVED, |
121 |
- VIRUS |
|
121 |
+ VIRUS, |
|
122 |
+ MAXREC |
|
122 | 123 |
} mbox_status; |
123 | 124 |
|
124 | 125 |
#ifndef isblank |
... | ... |
@@ -288,7 +289,7 @@ static message *parseEmailHeaders(message *m, const table_t *rfc821Table); |
288 | 288 |
static int parseEmailHeader(message *m, const char *line, const table_t *rfc821Table); |
289 | 289 |
static mbox_status parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int recursion_level); |
290 | 290 |
static int boundaryStart(const char *line, const char *boundary); |
291 |
-static int endOfMessage(const char *line, const char *boundary); |
|
291 |
+static int boundaryEnd(const char *line, const char *boundary); |
|
292 | 292 |
static int initialiseTables(table_t **rfc821Table, table_t **subtypeTable); |
293 | 293 |
static int getTextPart(message *const messages[], size_t size); |
294 | 294 |
static size_t strip(char *buf, int len); |
... | ... |
@@ -1459,6 +1460,7 @@ cli_parse_mbox(const char *dir, int desc, cli_ctx *ctx) |
1459 | 1459 |
if(messageAddStr(m, buffer) < 0) |
1460 | 1460 |
break; |
1461 | 1461 |
} else |
1462 |
+ /* at this point, the \n has been removed */ |
|
1462 | 1463 |
if(messageAddStr(m, buffer) < 0) |
1463 | 1464 |
break; |
1464 | 1465 |
} while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL); |
... | ... |
@@ -1504,8 +1506,20 @@ cli_parse_mbox(const char *dir, int desc, cli_ctx *ctx) |
1504 | 1504 |
messageSetCTX(body, ctx); |
1505 | 1505 |
switch(parseEmailBody(body, NULL, &mctx, 0)) { |
1506 | 1506 |
case FAIL: |
1507 |
+ /* |
|
1508 |
+ * beware: cli_magic_scandesc(), |
|
1509 |
+ * changes this into CL_CLEAN, so only |
|
1510 |
+ * use it to inform the higher levels |
|
1511 |
+ * that we couldn't decode it because |
|
1512 |
+ * it isn't an mbox, not to signal |
|
1513 |
+ * decoding errors on what *is* a valid |
|
1514 |
+ * mbox |
|
1515 |
+ */ |
|
1507 | 1516 |
retcode = CL_EFORMAT; |
1508 | 1517 |
break; |
1518 |
+ case MAXREC: |
|
1519 |
+ retcode = CL_EMAXREC; |
|
1520 |
+ break; |
|
1509 | 1521 |
case VIRUS: |
1510 | 1522 |
retcode = CL_VIRUS; |
1511 | 1523 |
break; |
... | ... |
@@ -2060,7 +2074,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re |
2060 | 2060 |
*ctx->virname = "MIME.RecursionLimit"; |
2061 | 2061 |
return VIRUS; |
2062 | 2062 |
} else |
2063 |
- return OK_ATTACHMENTS_NOT_SAVED; |
|
2063 |
+ return MAXREC; |
|
2064 | 2064 |
} |
2065 | 2065 |
} |
2066 | 2066 |
|
... | ... |
@@ -2259,6 +2273,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re |
2259 | 2259 |
for(multiparts = 0; t_line && !infected; multiparts++) { |
2260 | 2260 |
int lines = 0; |
2261 | 2261 |
message **m; |
2262 |
+ mbox_status old_rc; |
|
2262 | 2263 |
|
2263 | 2264 |
m = cli_realloc(messages, ((multiparts + 1) * sizeof(message *))); |
2264 | 2265 |
if(m == NULL) |
... | ... |
@@ -2474,7 +2489,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re |
2474 | 2474 |
|
2475 | 2475 |
parseEmailHeader(aMessage, fullline, mctx->rfc821Table); |
2476 | 2476 |
free(fullline); |
2477 |
- } else if(endOfMessage(line, boundary)) { |
|
2477 |
+ } else if(boundaryEnd(line, boundary)) { |
|
2478 | 2478 |
/* |
2479 | 2479 |
* Some viruses put information |
2480 | 2480 |
* *after* the end of message, |
... | ... |
@@ -2495,8 +2510,8 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re |
2495 | 2495 |
} |
2496 | 2496 |
} while((t_line = t_line->t_next) != NULL); |
2497 | 2497 |
|
2498 |
- cli_dbgmsg("Part %d has %d lines\n", |
|
2499 |
- multiparts, lines); |
|
2498 |
+ cli_dbgmsg("Part %d has %d lines, rc = %d\n", |
|
2499 |
+ multiparts, lines, rc); |
|
2500 | 2500 |
|
2501 | 2501 |
/* |
2502 | 2502 |
* Only save in the array of messages if some |
... | ... |
@@ -2512,10 +2527,13 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re |
2512 | 2512 |
case APPLEDOUBLE: |
2513 | 2513 |
case KNOWBOT: |
2514 | 2514 |
case -1: |
2515 |
+ old_rc = rc; |
|
2515 | 2516 |
mainMessage = do_multipart(mainMessage, |
2516 | 2517 |
messages, multiparts, |
2517 | 2518 |
&rc, mctx, messageIn, |
2518 | 2519 |
&aText, recursion_level); |
2520 |
+ if((rc == OK_ATTACHMENTS_NOT_SAVED) && (old_rc == OK)) |
|
2521 |
+ rc = OK; |
|
2519 | 2522 |
--multiparts; |
2520 | 2523 |
if(rc == VIRUS) |
2521 | 2524 |
infected = TRUE; |
... | ... |
@@ -2567,12 +2585,13 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re |
2567 | 2567 |
} |
2568 | 2568 |
|
2569 | 2569 |
/* |
2570 |
- * FIXME: we could return 2 here when we have |
|
2571 |
- * saved stuff earlier |
|
2572 |
- * |
|
2573 | 2570 |
* Nothing to do |
2574 | 2571 |
*/ |
2575 |
- return (rc == VIRUS) ? VIRUS : OK_ATTACHMENTS_NOT_SAVED; |
|
2572 |
+ switch(rc) { |
|
2573 |
+ case VIRUS: return VIRUS; |
|
2574 |
+ case MAXREC: return MAXREC; |
|
2575 |
+ default: return OK_ATTACHMENTS_NOT_SAVED; |
|
2576 |
+ } |
|
2576 | 2577 |
} |
2577 | 2578 |
|
2578 | 2579 |
cli_dbgmsg("Find out the multipart type (%s)\n", mimeSubtype); |
... | ... |
@@ -2616,7 +2635,8 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re |
2616 | 2616 |
rc = parseEmailBody(aMessage, aText, mctx, recursion_level + 1); |
2617 | 2617 |
if(rc == OK) { |
2618 | 2618 |
assert(aMessage == messages[htmltextPart]); |
2619 |
- messageDestroy(aMessage); |
|
2619 |
+ if(aMessage) |
|
2620 |
+ messageDestroy(aMessage); |
|
2620 | 2621 |
messages[htmltextPart] = NULL; |
2621 | 2622 |
} |
2622 | 2623 |
} |
... | ... |
@@ -2699,6 +2719,8 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re |
2699 | 2699 |
infected = TRUE; |
2700 | 2700 |
break; |
2701 | 2701 |
} |
2702 |
+ if(rc == MAXREC) |
|
2703 |
+ break; |
|
2702 | 2704 |
} |
2703 | 2705 |
|
2704 | 2706 |
/* rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1); */ |
... | ... |
@@ -3047,7 +3069,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re |
3047 | 3047 |
rc = OK; |
3048 | 3048 |
} |
3049 | 3049 |
} |
3050 |
- } else |
|
3050 |
+ } /*else |
|
3051 | 3051 |
rc = OK_ATTACHMENTS_NOT_SAVED; /* nothing saved */ |
3052 | 3052 |
|
3053 | 3053 |
if(mainMessage && (mainMessage != messageIn)) |
... | ... |
@@ -3119,20 +3141,42 @@ boundaryStart(const char *line, const char *boundary) |
3119 | 3119 |
* |
3120 | 3120 |
* Look with and without RFC822 comments stripped, I've seen some |
3121 | 3121 |
* samples where () are taken as comments in boundaries and some where |
3122 |
- * they're not. Irrespective of whatever RFC2822 says we need to find |
|
3123 |
- * viruses in both types of mails |
|
3122 |
+ * they're not. Irrespective of whatever RFC2822 says, we need to find |
|
3123 |
+ * viruses in both types of mails. |
|
3124 | 3124 |
*/ |
3125 |
- if((strstr(ptr, boundary) != NULL) || (strstr(line, boundary) != NULL)) |
|
3126 |
- rc = OK; |
|
3127 |
- else if(*ptr++ != '-') |
|
3128 |
- rc = FAIL; |
|
3125 |
+ if((strstr(&ptr[1], boundary) != NULL) || (strstr(line, boundary) != NULL)) { |
|
3126 |
+ const char *k = ptr; |
|
3127 |
+ |
|
3128 |
+ /* |
|
3129 |
+ * We need to ensure that we don't match --11=-=-=11 when |
|
3130 |
+ * looking for --1=-=-=1 in well behaved headers, that's a |
|
3131 |
+ * false positive problem mentioned above |
|
3132 |
+ */ |
|
3133 |
+ rc = 0; |
|
3134 |
+ do |
|
3135 |
+ if(strcmp(++k, boundary) == 0) { |
|
3136 |
+ rc = 1; |
|
3137 |
+ break; |
|
3138 |
+ } |
|
3139 |
+ while(*k == '-'); |
|
3140 |
+ if(rc == 0) { |
|
3141 |
+ k = &line[1]; |
|
3142 |
+ do |
|
3143 |
+ if(strcmp(++k, boundary) == 0) { |
|
3144 |
+ rc = 1; |
|
3145 |
+ break; |
|
3146 |
+ } |
|
3147 |
+ while(*k == '-'); |
|
3148 |
+ } |
|
3149 |
+ } else if(*ptr++ != '-') |
|
3150 |
+ rc = 0; |
|
3129 | 3151 |
else |
3130 | 3152 |
rc = (strcasecmp(ptr, boundary) == 0); |
3131 | 3153 |
|
3132 | 3154 |
if(out) |
3133 | 3155 |
free(out); |
3134 | 3156 |
|
3135 |
- if(rc == OK) |
|
3157 |
+ if(rc == 1) |
|
3136 | 3158 |
cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line); |
3137 | 3159 |
|
3138 | 3160 |
return rc; |
... | ... |
@@ -3144,13 +3188,15 @@ boundaryStart(const char *line, const char *boundary) |
3144 | 3144 |
* The message ends with with --boundary-- |
3145 | 3145 |
*/ |
3146 | 3146 |
static int |
3147 |
-endOfMessage(const char *line, const char *boundary) |
|
3147 |
+boundaryEnd(const char *line, const char *boundary) |
|
3148 | 3148 |
{ |
3149 | 3149 |
size_t len; |
3150 | 3150 |
|
3151 | 3151 |
if(line == NULL) |
3152 | 3152 |
return 0; |
3153 |
- /*cli_dbgmsg("endOfMessage: line = '%s' boundary = '%s'\n", line, boundary);*/ |
|
3153 |
+ |
|
3154 |
+ /*cli_dbgmsg("boundaryEnd: line = '%s' boundary = '%s'\n", line, boundary);*/ |
|
3155 |
+ |
|
3154 | 3156 |
if(*line++ != '-') |
3155 | 3157 |
return 0; |
3156 | 3158 |
if(*line++ != '-') |
... | ... |
@@ -3167,7 +3213,11 @@ endOfMessage(const char *line, const char *boundary) |
3167 | 3167 |
line = &line[len]; |
3168 | 3168 |
if(*line++ != '-') |
3169 | 3169 |
return 0; |
3170 |
- return *line == '-'; |
|
3170 |
+ if(*line == '-') { |
|
3171 |
+ cli_dbgmsg("boundaryEnd: found %s in %s\n", boundary, line); |
|
3172 |
+ return 1; |
|
3173 |
+ } |
|
3174 |
+ return 0; |
|
3171 | 3175 |
} |
3172 | 3176 |
|
3173 | 3177 |
/* |
... | ... |
@@ -5203,6 +5253,9 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m |
5203 | 5203 |
if(aMessage == NULL) |
5204 | 5204 |
return mainMessage; |
5205 | 5205 |
|
5206 |
+ if(*rc != OK) |
|
5207 |
+ return mainMessage; |
|
5208 |
+ |
|
5206 | 5209 |
cli_dbgmsg("Mixed message part %d is of type %d\n", |
5207 | 5210 |
i, messageGetMimeType(aMessage)); |
5208 | 5211 |
|
... | ... |
@@ -5357,7 +5410,7 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m |
5357 | 5357 |
if(body) { |
5358 | 5358 |
messageSetCTX(body, mctx->ctx); |
5359 | 5359 |
*rc = parseEmailBody(body, NULL, mctx, recursion_level + 1); |
5360 |
- if(messageContainsVirus(body)) |
|
5360 |
+ if((*rc == OK) && messageContainsVirus(body)) |
|
5361 | 5361 |
*rc = VIRUS; |
5362 | 5362 |
messageDestroy(body); |
5363 | 5363 |
} |
... | ... |
@@ -5376,7 +5429,7 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m |
5376 | 5376 |
* whole multipart section |
5377 | 5377 |
*/ |
5378 | 5378 |
*rc = parseEmailBody(aMessage, *tptr, mctx, recursion_level + 1); |
5379 |
- cli_dbgmsg("Finished recursion\n"); |
|
5379 |
+ cli_dbgmsg("Finished recursion, rc = %d\n", *rc); |
|
5380 | 5380 |
assert(aMessage == messages[i]); |
5381 | 5381 |
messageDestroy(messages[i]); |
5382 | 5382 |
messages[i] = NULL; |