Browse code

Ensure multipart just save the bodies of attachments

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@160 77e5149b-7576-45b1-b177-96237e5ba77b

Nigel Horne authored on 2003/12/20 22:57:26
Showing 2 changed files
... ...
@@ -1,3 +1,7 @@
1
+Sat Dec 20 13:56:27 GMT 2003 (njh)
2
+----------------------------------
3
+  * libclamav: Ensure multipart just save the bodies of attachments
4
+
1 5
 Sat Dec 20 13:25:23 CET 2003
2 6
 ----------------------------------
3 7
   * clamdscan: fixed a segmentation fault when invoked without arguments
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: mbox.c,v $
20
+ * Revision 1.24  2003/12/20 13:55:36  nigelhorne
21
+ * Ensure multipart just save the bodies of attachments
22
+ *
20 23
  * Revision 1.23  2003/12/14 18:07:01  nigelhorne
21 24
  * Some viruses in embedded messages were not being found
22 25
  *
... ...
@@ -60,7 +63,7 @@
60 60
  * Compilable under SCO; removed duplicate code with message.c
61 61
  *
62 62
  */
63
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.23 2003/12/14 18:07:01 nigelhorne Exp $";
63
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.24 2003/12/20 13:55:36 nigelhorne Exp $";
64 64
 
65 65
 #ifndef	CL_DEBUG
66 66
 /*#define	NDEBUG	/* map CLAMAV debug onto standard */
... ...
@@ -200,22 +203,17 @@ static	const	struct tableinit {
200 200
  * TODO: parse .msg format files
201 201
  * TODO: fully handle AppleDouble format, see
202 202
  * http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
203
+ * TODO: ensure parseEmailHeaders is always called before parseEmailBody
204
+ * TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody
203 205
  */
204 206
 int
205 207
 cl_mbox(const char *dir, int desc)
206 208
 {
207 209
 	int retcode, i;
208
-	bool isMbox;	/*
209
-			 * is it a UNIX style mbox with more than one
210
-			 * mail message, or just a single mail message?
211
-			 */
212
-	message *m;
210
+	message *m, *body;
213 211
 	table_t	*rfc821Table, *subtypeTable;
214 212
 	FILE *fd;
215 213
 	char buffer[LINE_LENGTH];
216
-#ifdef CL_THREAD_SAFE
217
-	char *strptr;
218
-#endif
219 214
 
220 215
 	cli_dbgmsg("in mbox()\n");
221 216
 
... ...
@@ -239,140 +237,71 @@ cl_mbox(const char *dir, int desc)
239 239
 		return -1;
240 240
 	}
241 241
 
242
-	isMbox = (strncmp(buffer, "From ", 5) == 0);
243
-
244
-	if(isMbox) {
242
+	/*
243
+	 * is it a UNIX style mbox with more than one
244
+	 * mail message, or just a single mail message?
245
+	 */
246
+	if(strncmp(buffer, "From ", 5) == 0) {
245 247
 		/*
246 248
 		 * Have been asked to check a UNIX style mbox file, which
247 249
 		 * may contain more than one e-mail message to decode
248 250
 		 */
249
-		bool inHeader = FALSE;
250
-		bool inMimeHeader = FALSE;
251
-		bool lastLineWasEmpty = TRUE;
252
-		bool first = TRUE;
251
+		bool lastLineWasEmpty = FALSE;
253 252
 
254 253
 		do {
255 254
 			/*cli_dbgmsg("read: %s", buffer);*/
256 255
 
257
-			/*
258
-			 * Handle this where we're mid point through this stuff
259
-			 *	Content-Type: multipart/alternative;
260
-			 *		boundary="----foo"
261
-			 */
262
-			if(inHeader && ((buffer[0] == '\t') || (buffer[0] == ' ')))
263
-				inMimeHeader = TRUE;
264
-			if(inMimeHeader) {
265
-				const char *ptr;
266
-
267
-				assert(!first);
268
-
269
-				if(!continuationMarker(buffer))
270
-					inMimeHeader = FALSE;	 /* no more args */
271
-
256
+			cli_chomp(buffer);
257
+			if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) {
272 258
 				/*
273
-				 * Add all the arguments on the line
259
+				 * End of a message in the mail box
274 260
 				 */
275
-				for(ptr = strtok_r(buffer, ";\r\n", &strptr); ptr; ptr = strtok_r(NULL, ":\r\n", &strptr))
276
-					messageAddArgument(m, ptr);
277
-
278
-			} else if((!inHeader) && lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) {
261
+				body = parseEmailHeaders(m, rfc821Table);
262
+				messageDestroy(m);
263
+				messageClean(body);
264
+				if(messageGetBody(body))
265
+					if(!parseEmailBody(body,  NULL, 0, NULL, dir, rfc821Table, subtypeTable))
266
+						break;
279 267
 				/*
280
-				 * New message, save the previous message, if any
268
+				 * Starting a new message, throw away all the
269
+				 * information about the old one
281 270
 				 */
282
-				if(!first) {
283
-					/*
284
-					 * End of the current message, add it and look
285
-					 * for the start of the next one
286
-					 */
287
-					messageClean(m);
288
-					if(messageGetBody(m))
289
-						if(!parseEmailBody(m,  NULL, 0, NULL, dir, rfc821Table, subtypeTable))
290
-							break;
291
-					/*
292
-					 * Starting a new message, throw away all the
293
-					 * information about the old one
294
-					 */
295
-					messageReset(m);
296
-				} else
297
-					first = FALSE;
271
+				m = body;
272
+				messageReset(body);
298 273
 
299
-				lastLineWasEmpty = inHeader = TRUE;
274
+				lastLineWasEmpty = TRUE;
300 275
 				cli_dbgmsg("Finished processing message\n");
301
-			} else if(inHeader) {
302
-
303
-				cli_dbgmsg("Deal with header %s", buffer);
304
-
305
-				/*
306
-				 * A blank line signifies the end of the header and
307
-				 * the start of the text
308
-				 */
309
-				if((strstrip(buffer) == 0) || (buffer[0] == '\n') || (buffer[0] == '\r')) {
310
-					cli_dbgmsg("End of header information\n");
311
-					inHeader = FALSE;
312
-				} else {
313
-					const bool isLastLine = !continuationMarker(buffer);
314
-					const char *cmd = strtok_r(buffer, " \t", &strptr);
315
-
316
-					if (cmd && *cmd) {
317
-						const char *arg = strtok_r(NULL, "\r\n", &strptr);
318
-
319
-						if(arg)
320
-							if(parseMimeHeader(m, cmd, rfc821Table, arg) == CONTENT_TYPE)
321
-								inMimeHeader = !isLastLine;
322
-					}
323
-				}
324
-			} else {
325
-				assert(!first);
326
-
327
-				/*cli_dbgmsg("adding line %s", buffer);*/
328
-
329
-				lastLineWasEmpty = ((buffer[0] == '\n') || (buffer[0] == '\r'));
330
-				/*
331
-				 * Add this line to the end of the linked list
332
-				 * of lines. This isn't needed when using
333
-				 * .forward since the rest of the file *must*
334
-				 * be the text so a single fread() should
335
-				 * suffice. Still, it does no harm and is more
336
-				 * flexible this way
337
-				 *
338
-				 * Note that the terminating newline is not
339
-				 * added
340
-				 */
341
-				messageAddLine(m, strtok_r(buffer, "\r\n", &strptr));
342
-			}
276
+			} else
277
+				lastLineWasEmpty = (buffer[0] == '\0');
278
+			messageAddLine(m, buffer);
343 279
 		} while(fgets(buffer, sizeof(buffer), fd) != NULL);
344
-	} else {
280
+	} else
345 281
 		/*
346 282
 		 * It's a single message, parse the headers then the body
347 283
 		 */
348
-		message *body;
349
-
350 284
 		do {
351 285
 			cli_chomp(buffer);
352 286
 			messageAddLine(m, buffer);
353 287
 		} while(fgets(buffer, sizeof(buffer), fd) != NULL);
354 288
 
355
-		body = parseEmailHeaders(m, rfc821Table);
356
-		messageDestroy(m);
357
-		m = body;
358
-	}
359
-
360 289
 	fclose(fd);
361 290
 
362 291
 	retcode = 0;
363 292
 
293
+	body = parseEmailHeaders(m, rfc821Table);
294
+	messageDestroy(m);
364 295
 	/*
365 296
 	 * Write out the last entry in the mailbox
366 297
 	 */
367
-	messageClean(m);
368
-	if(messageGetBody(m))
369
-		if(!parseEmailBody(m, NULL, 0, NULL, dir, rfc821Table, subtypeTable))
298
+	messageClean(body);
299
+	if(messageGetBody(body))
300
+		if(!parseEmailBody(body, NULL, 0, NULL, dir, rfc821Table, subtypeTable))
370 301
 			retcode = -1;
371 302
 
372 303
 	/*
373 304
 	 * Tidy up and quit
374 305
 	 */
375
-	messageDestroy(m);
306
+	messageDestroy(body);
376 307
 
377 308
 	tableDestroy(rfc821Table);
378 309
 	tableDestroy(subtypeTable);
... ...
@@ -394,11 +323,18 @@ parseEmailHeaders(const message *m, const table_t *rfc821Table)
394 394
 {
395 395
 	bool inContinuationHeader = FALSE;
396 396
 	bool inHeader = TRUE;
397
-	text *t, *msgText = messageToText(m);
398
-	message *ret = messageCreate();
397
+	text *t, *msgText;
398
+	message *ret;
399
+
400
+	if(m == NULL)
401
+		return NULL;
402
+
403
+	msgText = messageToText(m);
404
+	if(msgText == NULL)
405
+		return NULL;
399 406
 
400 407
 	t = msgText;
401
-	assert(t != NULL);
408
+	ret = messageCreate();
402 409
 
403 410
 	do {
404 411
 		char *buffer = strdup(t->t_text);
... ...
@@ -836,6 +772,7 @@ parseEmailBody(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, c
836 836
 					bool addToText = FALSE;
837 837
 					const char *dtype;
838 838
 					text *t;
839
+					message *body;
839 840
 
840 841
 					aMessage = messages[i];
841 842
 
... ...
@@ -900,7 +837,11 @@ parseEmailBody(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, c
900 900
 						break;
901 901
 					case MESSAGE:
902 902
 						cli_dbgmsg("Found message inside multipart\n");
903
-						rc = parseEmailBody(aMessage, blobs, nBlobs, NULL, dir, rfc821Table, subtypeTable);
903
+						body = parseEmailHeaders(aMessage, rfc821Table);
904
+						if(body) {
905
+							rc = parseEmailBody(body, blobs, nBlobs, NULL, dir, rfc821Table, subtypeTable);
906
+							messageDestroy(body);
907
+						}
904 908
 						continue;
905 909
 					case MULTIPART:
906 910
 						/*
... ...
@@ -910,11 +851,19 @@ parseEmailBody(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, c
910 910
 						 *
911 911
 						 */
912 912
 						cli_dbgmsg("Found multipart inside multipart\n");
913
-						t = messageToText(aMessage);
914
-						rc = parseEmailBody(aMessage, blobs, nBlobs, t, dir, rfc821Table, subtypeTable);
915
-						textDestroy(t);
916
-
917
-						mainMessage = aMessage;
913
+						if(aMessage) {
914
+							body = parseEmailHeaders(aMessage, rfc821Table);
915
+							if(body) {
916
+								t = messageToText(body);
917
+								rc = parseEmailBody(body, blobs, nBlobs, t, dir, rfc821Table, subtypeTable);
918
+								textDestroy(t);
919
+
920
+								mainMessage = body;
921
+							}
922
+						} else {
923
+							rc = parseEmailBody(NULL, blobs, nBlobs, NULL, dir, rfc821Table, subtypeTable);
924
+							mainMessage = NULL;
925
+						}
918 926
 						continue;
919 927
 					case AUDIO:
920 928
 					case IMAGE:
... ...
@@ -971,6 +920,12 @@ parseEmailBody(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, c
971 971
 				rc = parseEmailBody(mainMessage, blobList, numberOfAttachments, aText, dir, rfc821Table, subtypeTable);
972 972
 				break;
973 973
 			case DIGEST:
974
+				/*
975
+				 * TODO:
976
+				 * According to section 5.1.5 RFC2046, the
977
+				 * default mime type of multipart/digest parts
978
+				 * is message/rfc822
979
+				 */
974 980
 			case SIGNED:
975 981
 			case PARALLEL:
976 982
 				/*