Browse code

Improve boundary handling

git-svn: trunk@2751

Nigel Horne authored on 2007/02/15 21:29:51
Showing 2 changed files
... ...
@@ -1,3 +1,11 @@
1
+Thu Feb 15 12:27:22 GMT 2007 (njh)
2
+----------------------------------
3
+  * libclamav/mbox.c:	Fixed bugs in the handling of boundary lines
4
+  			Improved handling of the warning messages associated
5
+				with recursion limits
6
+			Fixed handling of OK_ATTACHMENTS_NOT_SAVED in some
7
+				larger files
8
+  			
1 9
 Wed Feb 14 13:15:25 CET 2007 (tk)
2 10
 ---------------------------------
3 11
   * libclamav/entconv.c: fix incorrect use of isspace() in experimental code
... ...
@@ -16,7 +16,7 @@
16 16
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17 17
  *  MA 02110-1301, USA.
18 18
  */
19
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.380 2007/02/13 19:47:37 njh Exp $";
19
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.381 2007/02/15 12:26:44 njh Exp $";
20 20
 
21 21
 #ifdef	_MSC_VER
22 22
 #include <winsock.h>	/* only needed in CL_EXPERIMENTAL */
... ...
@@ -118,7 +118,8 @@ typedef	enum {
118 118
 	FAIL,
119 119
 	OK,
120 120
 	OK_ATTACHMENTS_NOT_SAVED,
121
-	VIRUS
121
+	VIRUS,
122
+	MAXREC
122 123
 } mbox_status;
123 124
 
124 125
 #ifndef isblank
... ...
@@ -288,7 +289,7 @@ static	message	*parseEmailHeaders(message *m, const table_t *rfc821Table);
288 288
 static	int	parseEmailHeader(message *m, const char *line, const table_t *rfc821Table);
289 289
 static	mbox_status	parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int recursion_level);
290 290
 static	int	boundaryStart(const char *line, const char *boundary);
291
-static	int	endOfMessage(const char *line, const char *boundary);
291
+static	int	boundaryEnd(const char *line, const char *boundary);
292 292
 static	int	initialiseTables(table_t **rfc821Table, table_t **subtypeTable);
293 293
 static	int	getTextPart(message *const messages[], size_t size);
294 294
 static	size_t	strip(char *buf, int len);
... ...
@@ -1459,6 +1460,7 @@ cli_parse_mbox(const char *dir, int desc, cli_ctx *ctx)
1459 1459
 					if(messageAddStr(m, buffer) < 0)
1460 1460
 						break;
1461 1461
 			} else
1462
+				/* at this point, the \n has been removed */
1462 1463
 				if(messageAddStr(m, buffer) < 0)
1463 1464
 					break;
1464 1465
 		} while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL);
... ...
@@ -1504,8 +1506,20 @@ cli_parse_mbox(const char *dir, int desc, cli_ctx *ctx)
1504 1504
 			messageSetCTX(body, ctx);
1505 1505
 			switch(parseEmailBody(body, NULL, &mctx, 0)) {
1506 1506
 				case FAIL:
1507
+					/*
1508
+					 * beware: cli_magic_scandesc(),
1509
+					 * changes this into CL_CLEAN, so only
1510
+					 * use it to inform the higher levels
1511
+					 * that we couldn't decode it because
1512
+					 * it isn't an mbox, not to signal
1513
+					 * decoding errors on what *is* a valid
1514
+					 * mbox
1515
+					 */
1507 1516
 					retcode = CL_EFORMAT;
1508 1517
 					break;
1518
+				case MAXREC:
1519
+					retcode = CL_EMAXREC;
1520
+					break;
1509 1521
 				case VIRUS:
1510 1522
 					retcode = CL_VIRUS;
1511 1523
 					break;
... ...
@@ -2060,7 +2074,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
2060 2060
 					*ctx->virname = "MIME.RecursionLimit";
2061 2061
 				return VIRUS;
2062 2062
 			} else
2063
-				return OK_ATTACHMENTS_NOT_SAVED;
2063
+				return MAXREC;
2064 2064
 		}
2065 2065
 	}
2066 2066
 
... ...
@@ -2259,6 +2273,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
2259 2259
 			for(multiparts = 0; t_line && !infected; multiparts++) {
2260 2260
 				int lines = 0;
2261 2261
 				message **m;
2262
+				mbox_status old_rc;
2262 2263
 
2263 2264
 				m = cli_realloc(messages, ((multiparts + 1) * sizeof(message *)));
2264 2265
 				if(m == NULL)
... ...
@@ -2474,7 +2489,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
2474 2474
 
2475 2475
 						parseEmailHeader(aMessage, fullline, mctx->rfc821Table);
2476 2476
 						free(fullline);
2477
-					} else if(endOfMessage(line, boundary)) {
2477
+					} else if(boundaryEnd(line, boundary)) {
2478 2478
 						/*
2479 2479
 						 * Some viruses put information
2480 2480
 						 * *after* the end of message,
... ...
@@ -2495,8 +2510,8 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
2495 2495
 					}
2496 2496
 				} while((t_line = t_line->t_next) != NULL);
2497 2497
 
2498
-				cli_dbgmsg("Part %d has %d lines\n",
2499
-					multiparts, lines);
2498
+				cli_dbgmsg("Part %d has %d lines, rc = %d\n",
2499
+					multiparts, lines, rc);
2500 2500
 
2501 2501
 				/*
2502 2502
 				 * Only save in the array of messages if some
... ...
@@ -2512,10 +2527,13 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
2512 2512
 					case APPLEDOUBLE:
2513 2513
 					case KNOWBOT:
2514 2514
 					case -1:
2515
+						old_rc = rc;
2515 2516
 						mainMessage = do_multipart(mainMessage,
2516 2517
 							messages, multiparts,
2517 2518
 							&rc, mctx, messageIn,
2518 2519
 							&aText, recursion_level);
2520
+						if((rc == OK_ATTACHMENTS_NOT_SAVED) && (old_rc == OK))
2521
+							rc = OK;
2519 2522
 						--multiparts;
2520 2523
 						if(rc == VIRUS)
2521 2524
 							infected = TRUE;
... ...
@@ -2567,12 +2585,13 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
2567 2567
 				}
2568 2568
 
2569 2569
 				/*
2570
-				 * FIXME: we could return 2 here when we have
2571
-				 * saved stuff earlier
2572
-				 *
2573 2570
 				 * Nothing to do
2574 2571
 				 */
2575
-				return (rc == VIRUS) ? VIRUS : OK_ATTACHMENTS_NOT_SAVED;
2572
+				switch(rc) {
2573
+					case VIRUS: return VIRUS;
2574
+					case MAXREC: return MAXREC;
2575
+					default: return OK_ATTACHMENTS_NOT_SAVED;
2576
+				}
2576 2577
 			}
2577 2578
 
2578 2579
 			cli_dbgmsg("Find out the multipart type (%s)\n", mimeSubtype);
... ...
@@ -2616,7 +2635,8 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
2616 2616
 					rc = parseEmailBody(aMessage, aText, mctx, recursion_level + 1);
2617 2617
 					if(rc == OK) {
2618 2618
 						assert(aMessage == messages[htmltextPart]);
2619
-						messageDestroy(aMessage);
2619
+						if(aMessage)
2620
+							messageDestroy(aMessage);
2620 2621
 						messages[htmltextPart] = NULL;
2621 2622
 					}
2622 2623
 				}
... ...
@@ -2699,6 +2719,8 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
2699 2699
 						infected = TRUE;
2700 2700
 						break;
2701 2701
 					}
2702
+					if(rc == MAXREC)
2703
+						break;
2702 2704
 				}
2703 2705
 
2704 2706
 				/* rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1); */
... ...
@@ -3047,7 +3069,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
3047 3047
 				rc = OK;
3048 3048
 			}
3049 3049
 		}
3050
-	} else
3050
+	} /*else
3051 3051
 		rc = OK_ATTACHMENTS_NOT_SAVED;	/* nothing saved */
3052 3052
 
3053 3053
 	if(mainMessage && (mainMessage != messageIn))
... ...
@@ -3119,20 +3141,42 @@ boundaryStart(const char *line, const char *boundary)
3119 3119
 	 *
3120 3120
 	 * Look with and without RFC822 comments stripped, I've seen some
3121 3121
 	 * samples where () are taken as comments in boundaries and some where
3122
-	 * they're not. Irrespective of whatever RFC2822 says we need to find
3123
-	 * viruses in both types of mails
3122
+	 * they're not. Irrespective of whatever RFC2822 says, we need to find
3123
+	 * viruses in both types of mails.
3124 3124
 	 */
3125
-	if((strstr(ptr, boundary) != NULL) || (strstr(line, boundary) != NULL))
3126
-		rc = OK;
3127
-	else if(*ptr++ != '-')
3128
-		rc = FAIL;
3125
+	if((strstr(&ptr[1], boundary) != NULL) || (strstr(line, boundary) != NULL)) {
3126
+		const char *k = ptr;
3127
+
3128
+		/*
3129
+		 * We need to ensure that we don't match --11=-=-=11 when
3130
+		 * looking for --1=-=-=1 in well behaved headers, that's a
3131
+		 * false positive problem mentioned above
3132
+		 */
3133
+		rc = 0;
3134
+		do
3135
+			if(strcmp(++k, boundary) == 0) {
3136
+				rc = 1;
3137
+				break;
3138
+			}
3139
+		while(*k == '-');
3140
+		if(rc == 0) {
3141
+			k = &line[1];
3142
+			do
3143
+				if(strcmp(++k, boundary) == 0) {
3144
+					rc = 1;
3145
+					break;
3146
+				}
3147
+			while(*k == '-');
3148
+		}
3149
+	} else if(*ptr++ != '-')
3150
+		rc = 0;
3129 3151
 	else
3130 3152
 		rc = (strcasecmp(ptr, boundary) == 0);
3131 3153
 
3132 3154
 	if(out)
3133 3155
 		free(out);
3134 3156
 
3135
-	if(rc == OK)
3157
+	if(rc == 1)
3136 3158
 		cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line);
3137 3159
 
3138 3160
 	return rc;
... ...
@@ -3144,13 +3188,15 @@ boundaryStart(const char *line, const char *boundary)
3144 3144
  * The message ends with with --boundary--
3145 3145
  */
3146 3146
 static int
3147
-endOfMessage(const char *line, const char *boundary)
3147
+boundaryEnd(const char *line, const char *boundary)
3148 3148
 {
3149 3149
 	size_t len;
3150 3150
 
3151 3151
 	if(line == NULL)
3152 3152
 		return 0;
3153
-	/*cli_dbgmsg("endOfMessage: line = '%s' boundary = '%s'\n", line, boundary);*/
3153
+
3154
+	/*cli_dbgmsg("boundaryEnd: line = '%s' boundary = '%s'\n", line, boundary);*/
3155
+
3154 3156
 	if(*line++ != '-')
3155 3157
 		return 0;
3156 3158
 	if(*line++ != '-')
... ...
@@ -3167,7 +3213,11 @@ endOfMessage(const char *line, const char *boundary)
3167 3167
 	line = &line[len];
3168 3168
 	if(*line++ != '-')
3169 3169
 		return 0;
3170
-	return *line == '-';
3170
+	if(*line == '-') {
3171
+		cli_dbgmsg("boundaryEnd: found %s in %s\n", boundary, line);
3172
+		return 1;
3173
+	}
3174
+	return 0;
3171 3175
 }
3172 3176
 
3173 3177
 /*
... ...
@@ -5203,6 +5253,9 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m
5203 5203
 	if(aMessage == NULL)
5204 5204
 		return mainMessage;
5205 5205
 
5206
+	if(*rc != OK)
5207
+		return mainMessage;
5208
+
5206 5209
 	cli_dbgmsg("Mixed message part %d is of type %d\n",
5207 5210
 		i, messageGetMimeType(aMessage));
5208 5211
 
... ...
@@ -5357,7 +5410,7 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m
5357 5357
 			if(body) {
5358 5358
 				messageSetCTX(body, mctx->ctx);
5359 5359
 				*rc = parseEmailBody(body, NULL, mctx, recursion_level + 1);
5360
-				if(messageContainsVirus(body))
5360
+				if((*rc == OK) && messageContainsVirus(body))
5361 5361
 					*rc = VIRUS;
5362 5362
 				messageDestroy(body);
5363 5363
 			}
... ...
@@ -5376,7 +5429,7 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m
5376 5376
 				 * whole multipart section
5377 5377
 				 */
5378 5378
 				*rc = parseEmailBody(aMessage, *tptr, mctx, recursion_level + 1);
5379
-				cli_dbgmsg("Finished recursion\n");
5379
+				cli_dbgmsg("Finished recursion, rc = %d\n", *rc);
5380 5380
 				assert(aMessage == messages[i]);
5381 5381
 				messageDestroy(messages[i]);
5382 5382
 				messages[i] = NULL;