Browse code

Optimise messages without other messages encapsulated within them

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@633 77e5149b-7576-45b1-b177-96237e5ba77b

Nigel Horne authored on 2004/06/25 22:58:41
Showing 2 changed files
... ...
@@ -1,7 +1,12 @@
1
+Fri Jun 25 14:57:06 BST 2004 (njh)
2
+----------------------------------
3
+  * libclamav/mbox.c:	Optimise the scanning of emails that don't have
4
+	other emails within them.
5
+
1 6
 Thu Jun 24 22:38:16 BST 2004 (njh)
2 7
 ----------------------------------
3 8
   * libclamav:	Plug small memory leak when scanning emails with a large
4
-  	number of attachments
9
+	number of attachments
5 10
 		Handle uuencoded files created with buggy software
6 11
 
7 12
 Thu Jun 24 09:09:27 BST 2004 (trog)
... ...
@@ -12,7 +17,7 @@ Thu Jun 24 09:09:27 BST 2004 (trog)
12 12
 Wed Jun 23 17:23:59 BST 2004 (njh)
13 13
 ----------------------------------
14 14
   * libclamav/mbox.c:	Further optimisation in the handling of empty lines
15
-  	in emails
15
+	in emails
16 16
 
17 17
 Wed Jun 23 15:16:20 BST 2004 (trog)
18 18
 -----------------------------------
... ...
@@ -26,7 +31,7 @@ Tue Jun 22 18:47:32 CEST 2004 (tk)
26 26
 Tue Jun 22 11:58:06 BST 2004 (njh/trog)
27 27
 ---------------------------------------
28 28
   * libclamav/str.c:	Rewrote cli_chomp() as discussed in the clamav-devel
29
-  	mailing list
29
+	mailing list
30 30
 
31 31
 Tue Jun 22 05:09:54 BST 2004 (njh)
32 32
 ----------------------------------
... ...
@@ -34,8 +39,8 @@ Tue Jun 22 05:09:54 BST 2004 (njh)
34 34
 	clamfi_abort
35 35
 			Removed compilation warning in FreeBSD5.2
36 36
   * libclamav:		Call cli_chomp() twice to ensure \r is also removed in
37
-  	emails
38
-  			Optimise handling of blank lines in emails
37
+	emails
38
+			Optimise handling of blank lines in emails
39 39
 			Trialing Andrey J. Melnikoff (TEMHOTA)
40 40
 	<temnota@kmv.ru>'s patch to print stack trace on SIGSEGV. Tidied,
41 41
 	optimised and applied the patch. Comments welcome.
... ...
@@ -68,7 +73,7 @@ Mon Jun 21 11:21:48 BST 2004 (njh)
68 68
 Fri Jun 18 11:08:26 BST 2004 (njh)
69 69
 ----------------------------------
70 70
   * libclamav:			Allow any number of alternatives in
71
-  	multipart messages
71
+	multipart messages
72 72
 
73 73
 Wed Jun 16 09:09:45 BST 2004 (njh)
74 74
 ----------------------------------
... ...
@@ -79,7 +84,7 @@ Wed Jun 16 09:09:45 BST 2004 (njh)
79 79
   * docs/man/clamav-milter.8:	Added access to sendmail variables in template
80 80
 	files
81 81
   * libclamav:			Added small performance improvements
82
-  				Added thread safety measures
82
+				Added thread safety measures
83 83
 
84 84
 Tue Jun 15 22:41:03 CEST 2004 (tk)
85 85
 ----------------------------------
... ...
@@ -99,7 +104,7 @@ Mon Jun 14 15:35:04 BST 2004 (njh)
99 99
 Mon Jun 14 10:07:24 BST 2004 (njh)
100 100
 ----------------------------------
101 101
   * libclamav/mbox.c:	Some spam generates very broken headers, added fix
102
-  	to try to scan (with warnings about the assumptions made)
102
+	to try to scan (with warnings about the assumptions made)
103 103
 
104 104
 Sun Jun 13 14:26:33 CEST 2004 (tk)
105 105
 ----------------------------------
... ...
@@ -166,7 +171,7 @@ Tue Jun  8 22:46:29 BST 2004 (njh)
166 166
 Sun Jun  6 22:35:19 BST 2004 (njh)
167 167
 ----------------------------------
168 168
   * libclamav/mbox.c:	Find uuencoded viruses in multipart/mixed that have no
169
-  	start of message boundaries
169
+	start of message boundaries
170 170
 
171 171
 Sun Jun  6 03:38:08 CEST 2004 (tk)
172 172
 ----------------------------------
... ...
@@ -211,7 +216,7 @@ Wed Jun  2 02:30:34 CEST 2004 (tk)
211 211
 Tue Jun  1 10:09:02 BST 2004 (njh)
212 212
 ----------------------------------
213 213
   * libclamav/message.c:	Corrupted BinHex could still cause crash on
214
-  	some non Linux systems (thanks to Trog for spotting this one)
214
+	some non Linux systems (thanks to Trog for spotting this one)
215 215
 
216 216
 Sun May 30 03:35:38 CEST 2004 (tk)
217 217
 ----------------------------------
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: mbox.c,v $
20
+ * Revision 1.82  2004/06/25 13:56:38  nigelhorne
21
+ * Optimise messages without other messages encapsulated within them
22
+ *
20 23
  * Revision 1.81  2004/06/24 21:36:38  nigelhorne
21 24
  * Plug memory leak with large number of attachments
22 25
  *
... ...
@@ -231,7 +234,7 @@
231 231
  * Compilable under SCO; removed duplicate code with message.c
232 232
  *
233 233
  */
234
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.81 2004/06/24 21:36:38 nigelhorne Exp $";
234
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.82 2004/06/25 13:56:38 nigelhorne Exp $";
235 235
 
236 236
 #if HAVE_CONFIG_H
237 237
 #include "clamav-config.h"
... ...
@@ -306,7 +309,7 @@ static	void	print_trace(int use_syslog);
306 306
 
307 307
 typedef enum    { FALSE = 0, TRUE = 1 } bool;
308 308
 
309
-static	message	*parseEmailHeaders(const message *m, const table_t *rfc821Table);
309
+static	message	*parseEmailHeaders(message *m, const table_t *rfc821Table, bool destroy);
310 310
 static	int	parseEmailHeader(message *m, const char *line, const table_t *rfc821Table);
311 311
 static	int	parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, const char *dir, table_t *rfc821Table, table_t *subtypeTable);
312 312
 static	int	boundaryStart(const char *line, const char *boundary);
... ...
@@ -488,7 +491,7 @@ cl_mbox(const char *dir, int desc)
488 488
 				/*
489 489
 				 * End of a message in the mail box
490 490
 				 */
491
-				body = parseEmailHeaders(m, rfc821Table);
491
+				body = parseEmailHeaders(m, rfc821Table, TRUE);
492 492
 				messageDestroy(m);
493 493
 				if(messageGetBody(body))
494 494
 					if(!parseEmailBody(body,  NULL, 0, NULL, dir, rfc821Table, subtypeTable)) {
... ...
@@ -534,7 +537,7 @@ cl_mbox(const char *dir, int desc)
534 534
 
535 535
 	retcode = 0;
536 536
 
537
-	body = parseEmailHeaders(m, rfc821Table);
537
+	body = parseEmailHeaders(m, rfc821Table, TRUE);
538 538
 	messageDestroy(m);
539 539
 	/*
540 540
 	 * Write out the last entry in the mailbox
... ...
@@ -563,13 +566,20 @@ cl_mbox(const char *dir, int desc)
563 563
  * This function parses the headers of m and sets the message's arguments
564 564
  *
565 565
  * Returns the message's body with the correct arguments set
566
+ *
567
+ * The downside of this approach is that for a short time we have two copies
568
+ * of the message in memory, the upside is that it makes for easier parsing
569
+ * of encapsulated messages, and in the long run uses less memory in those
570
+ * scenarios
571
+ * BUT: if 'destroy' is set, the caller has given us a hint than 'm' will
572
+ * not be used again before it is destroyed, so we can trash it
566 573
  */
567 574
 static message *
568
-parseEmailHeaders(const message *m, const table_t *rfc821Table)
575
+parseEmailHeaders(message *m, const table_t *rfc821Table, bool destroy)
569 576
 {
570 577
 	bool inContinuationHeader = FALSE;	/* state machine: ugh */
571 578
 	bool inHeader = TRUE;
572
-	const text *t;
579
+	text *t;
573 580
 	message *ret;
574 581
 
575 582
 	cli_dbgmsg("parseEmailHeaders\n");
... ...
@@ -579,16 +589,21 @@ parseEmailHeaders(const message *m, const table_t *rfc821Table)
579 579
 
580 580
 	ret = messageCreate();
581 581
 
582
-	for(t = messageGetBody(m); t; t = t->t_next) {
582
+	for(t = (text *)messageGetBody(m); t; t = t->t_next) {
583 583
 		char *buffer;
584 584
 #ifdef CL_THREAD_SAFE
585 585
 		char *strptr;
586 586
 #endif
587 587
 
588 588
 		if(t->t_text) {
589
-			buffer = strdup(t->t_text);
590
-			if(buffer == NULL)
591
-				break;
589
+			if(destroy) {
590
+				buffer = t->t_text;
591
+				t->t_text = NULL;
592
+			} else {
593
+				buffer = strdup(t->t_text);
594
+				if(buffer == NULL)
595
+					break;
596
+			}
592 597
 			if(cli_chomp(buffer) == 0) {
593 598
 				free(buffer);
594 599
 				buffer = NULL;
... ...
@@ -605,8 +620,6 @@ parseEmailHeaders(const message *m, const table_t *rfc821Table)
605 605
 			inContinuationHeader = TRUE;
606 606
 
607 607
 		if(inContinuationHeader) {
608
-			const char *ptr;
609
-
610 608
 			if(!continuationMarker(buffer))
611 609
 				inContinuationHeader = FALSE;	 /* no more args */
612 610
 
... ...
@@ -614,6 +627,8 @@ parseEmailHeaders(const message *m, const table_t *rfc821Table)
614 614
 			 * Add all the arguments on the line
615 615
 			 */
616 616
 			if(buffer) {
617
+				const char *ptr;
618
+
617 619
 				for(ptr = strtok_r(buffer, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr))
618 620
 					messageAddArgument(ret, ptr);
619 621
 				free(buffer);
... ...
@@ -1221,7 +1236,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1221 1221
 						 * many nested levels are
1222 1222
 						 * involved.
1223 1223
 						 */
1224
-						body = parseEmailHeaders(aMessage, rfc821Table);
1224
+						body = parseEmailHeaders(aMessage, rfc821Table, TRUE);
1225 1225
 						/*
1226 1226
 						 * We've fininished with the
1227 1227
 						 * original copy of the message,
... ...
@@ -1247,7 +1262,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1247 1247
 						 */
1248 1248
 						cli_dbgmsg("Found multipart inside multipart\n");
1249 1249
 						if(aMessage) {
1250
-							body = parseEmailHeaders(aMessage, rfc821Table);
1250
+							body = parseEmailHeaders(aMessage, rfc821Table, TRUE);
1251 1251
 							if(body) {
1252 1252
 								assert(aMessage == messages[i]);
1253 1253
 								messageDestroy(messages[i]);
... ...
@@ -1449,7 +1464,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1449 1449
 			}
1450 1450
 			if((strcasecmp(mimeSubtype, "rfc822") == 0) ||
1451 1451
 			   (strcasecmp(mimeSubtype, "delivery-status") == 0)) {
1452
-				message *m = parseEmailHeaders(mainMessage, rfc821Table);
1452
+				message *m = parseEmailHeaders(mainMessage, rfc821Table, FALSE);
1453 1453
 				if(m) {
1454 1454
 					cli_dbgmsg("Decode rfc822");
1455 1455
 
... ...
@@ -2116,7 +2131,7 @@ print_trace(int use_syslog)
2116 2116
 
2117 2117
 	if(use_syslog == 0)
2118 2118
 		cli_dbgmsg("Backtrace of pid %d:\n", pid);
2119
-	else 
2119
+	else
2120 2120
 		syslog(LOG_ERR, "Backtrace of pid %d:", pid);
2121 2121
 
2122 2122
 	for(i = 0; i < size; i++)