Browse code

Use line.[ch]

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@775 77e5149b-7576-45b1-b177-96237e5ba77b

Nigel Horne authored on 2004/08/21 21:01:07
Showing 8 changed files
... ...
@@ -1,3 +1,9 @@
1
+Sat Aug 21 12:59:43 BST 2004 (njh)
2
+----------------------------------
3
+  * libclamav:	Changed the handling of miltipart messages, that is scanning
4
+  			emails with attachments. Reports on impact on memory
5
+			usage and speed welcome to clamav-devel@clamav.net
6
+
1 7
 Fri Aug 20 21:05:04 CEST 2004 (tk)
2 8
 ----------------------------------
3 9
   * libclamav/Makefile.am: add line.[ch]
... ...
@@ -16,12 +16,15 @@
16 16
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 17
  *
18 18
  * $Log: line.c,v $
19
+ * Revision 1.2  2004/08/21 11:57:57  nigelhorne
20
+ * Use line.[ch]
21
+ *
19 22
  * Revision 1.1  2004/08/20 11:58:20  nigelhorne
20 23
  * First draft
21 24
  *
22 25
  */
23 26
 
24
-static	char	const	rcsid[] = "$Id: line.c,v 1.1 2004/08/20 11:58:20 nigelhorne Exp $";
27
+static	char	const	rcsid[] = "$Id: line.c,v 1.2 2004/08/21 11:57:57 nigelhorne Exp $";
25 28
 
26 29
 #if HAVE_CONFIG_H
27 30
 #include "clamav-config.h"
... ...
@@ -33,16 +36,17 @@ static	char	const	rcsid[] = "$Id: line.c,v 1.1 2004/08/20 11:58:20 nigelhorne Ex
33 33
 #include "line.h"
34 34
 #include "others.h"
35 35
 
36
+#ifdef	OLD
36 37
 line_t *
37 38
 lineCreate(const char *data)
38 39
 {
39
-	line_t *ret = cli_malloc(sizeof(struct line));
40
+	line_t *ret = (line_t *)li_malloc(sizeof(struct line));
40 41
 
41 42
 	if(ret == NULL)
42 43
 		return NULL;
43 44
 
44
-	ret->l_data = strdup(data);
45
-	if(ret->l_data == NULL) {
45
+	ret->l_str = strdup(data);
46
+	if(ret->l_str == NULL) {
46 47
 		free(ret);
47 48
 		return NULL;
48 49
 	}
... ...
@@ -61,8 +65,53 @@ lineLink(line_t *line)
61 61
 line_t *
62 62
 lineUnlink(line_t *line)
63 63
 {
64
+	/*printf("%d:\n\t'%s'\n", line->l_refs, line->l_str);*/
65
+
64 66
 	if(--line->l_refs == 0) {
65
-		free(line->l_data);
67
+		free(line->l_str);
68
+		free(line);
69
+		return NULL;
70
+	}
71
+	return line;
72
+}
73
+
74
+const char *
75
+lineGetData(const line_t *line)
76
+{
77
+	return line ? line->l_str : NULL;
78
+}
79
+#else
80
+line_t *
81
+lineCreate(const char *data)
82
+{
83
+	line_t *ret = (line_t *)cli_malloc(strlen(data) + 2);
84
+
85
+	if(ret == NULL)
86
+		return NULL;
87
+
88
+	ret[0] = (char)1;
89
+	strcpy(&ret[1], data);
90
+
91
+	return ret;
92
+}
93
+
94
+line_t *
95
+lineLink(line_t *line)
96
+{
97
+	if(line[0] == 127) {
98
+		cli_warnmsg("lineLink: linkcount too large\n");
99
+		return NULL;
100
+	}
101
+	line[0]++;
102
+	return line;
103
+}
104
+
105
+line_t *
106
+lineUnlink(line_t *line)
107
+{
108
+	/*printf("%d:\n\t'%s'\n", (int)line[0], &line[1]);*/
109
+
110
+	if(--line[0] == 0) {
66 111
 		free(line);
67 112
 		return NULL;
68 113
 	}
... ...
@@ -72,5 +121,6 @@ lineUnlink(line_t *line)
72 72
 const char *
73 73
 lineGetData(const line_t *line)
74 74
 {
75
-	return line ? line->l_data : NULL;
75
+	return line ? &line[1] : NULL;
76 76
 }
77
+#endif
... ...
@@ -16,6 +16,9 @@
16 16
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 17
  *
18 18
  * $Log: line.h,v $
19
+ * Revision 1.3  2004/08/21 11:57:57  nigelhorne
20
+ * Use line.[ch]
21
+ *
19 22
  * Revision 1.2  2004/08/20 19:06:45  kojm
20 23
  * add line.[ch]
21 24
  *
... ...
@@ -27,10 +30,16 @@
27 27
 #ifndef __LINE_H
28 28
 #define __LINE_H
29 29
 
30
+#ifdef	OLD
31
+/* easier to read, but slower */
32
+
30 33
 typedef struct line {
31
-	char	*l_data;	/* the line's contents */
34
+	char	*l_str;	/* the line's contents */
32 35
 	unsigned int	l_refs;	/* the number of references to the data */
33 36
 } line_t;
37
+#else
38
+typedef	char	line_t;	/* first byte is the ref count */
39
+#endif
34 40
 
35 41
 line_t	*lineCreate(const char *data);
36 42
 line_t	*lineLink(line_t *line);
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: mbox.c,v $
20
+ * Revision 1.108  2004/08/21 11:57:57  nigelhorne
21
+ * Use line.[ch]
22
+ *
20 23
  * Revision 1.107  2004/08/20 04:55:07  nigelhorne
21 24
  * FOLLOWURL
22 25
  *
... ...
@@ -309,7 +312,7 @@
309 309
  * Compilable under SCO; removed duplicate code with message.c
310 310
  *
311 311
  */
312
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.107 2004/08/20 04:55:07 nigelhorne Exp $";
312
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.108 2004/08/21 11:57:57 nigelhorne Exp $";
313 313
 
314 314
 #if HAVE_CONFIG_H
315 315
 #include "clamav-config.h"
... ...
@@ -347,6 +350,7 @@ static	char	const	rcsid[] = "$Id: mbox.c,v 1.107 2004/08/20 04:55:07 nigelhorne
347 347
 #include "table.h"
348 348
 #include "mbox.h"
349 349
 #include "blob.h"
350
+#include "line.h"
350 351
 #include "text.h"
351 352
 #include "message.h"
352 353
 #include "others.h"
... ...
@@ -409,9 +413,9 @@ typedef enum	{ FALSE = 0, TRUE = 1 } bool;
409 409
 #undef	WITH_CURL
410 410
 #endif
411 411
 
412
-static	message	*parseEmailHeaders(message *m, const table_t *rfc821Table, bool destroy);
412
+static	message	*parseEmailHeaders(const message *m, const table_t *rfc821Table);
413 413
 static	int	parseEmailHeader(message *m, const char *line, const table_t *rfc821Table);
414
-static	int	parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, const char *dir, table_t *rfc821Table, table_t *subtypeTable, unsigned int options);
414
+static	int	parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, const char *dir, const table_t *rfc821Table, const table_t *subtypeTable, unsigned int options);
415 415
 static	int	boundaryStart(const char *line, const char *boundary);
416 416
 static	int	endOfMessage(const char *line, const char *boundary);
417 417
 static	int	initialiseTables(table_t **rfc821Table, table_t **subtypeTable);
... ...
@@ -615,7 +619,7 @@ cli_mbox(const char *dir, int desc, unsigned int options)
615 615
 				/*
616 616
 				 * End of a message in the mail box
617 617
 				 */
618
-				body = parseEmailHeaders(m, rfc821, TRUE);
618
+				body = parseEmailHeaders(m, rfc821);
619 619
 				if(body == NULL) {
620 620
 					messageReset(m);
621 621
 					continue;
... ...
@@ -637,7 +641,7 @@ cli_mbox(const char *dir, int desc, unsigned int options)
637 637
 				cli_dbgmsg("Finished processing message\n");
638 638
 			} else
639 639
 				lastLineWasEmpty = (bool)(buffer[0] == '\0');
640
-			if(messageAddLine(m, buffer, 1) < 0)
640
+			if(messageAddStr(m, buffer) < 0)
641 641
 				break;
642 642
 		} while(fgets(buffer, sizeof(buffer), fd) != NULL);
643 643
 
... ...
@@ -655,27 +659,25 @@ cli_mbox(const char *dir, int desc, unsigned int options)
655 655
 		 * FIXME: files full of new lines and nothing else are
656 656
 		 * handled ungracefully...
657 657
 		 */
658
-		do
658
+		do {
659 659
 			/*
660
-			 * No need to preprocess such as cli_chomp() since
661
-			 * that'll be done by parseEmailHeaders()
662
-			 *
663 660
 			 * TODO: this needlessly creates a message object,
664 661
 			 * it'd be better if parseEmailHeaders could also
665 662
 			 * read in from a file. I do not want to lump the
666 663
 			 * parseEmailHeaders code here, that'd be a duplication
667 664
 			 * of code I want to avoid
668 665
 			 */
669
-			if(messageAddLine(m, buffer, 1) < 0)
666
+			(void)cli_chomp(buffer);
667
+			if(messageAddStr(m, buffer) < 0)
670 668
 				break;
671
-		while(fgets(buffer, sizeof(buffer), fd) != NULL);
669
+		} while(fgets(buffer, sizeof(buffer), fd) != NULL);
672 670
 	}
673 671
 
674 672
 	fclose(fd);
675 673
 
676 674
 	retcode = 0;
677 675
 
678
-	body = parseEmailHeaders(m, rfc821, TRUE);
676
+	body = parseEmailHeaders(m, rfc821);
679 677
 	messageDestroy(m);
680 678
 	if(body) {
681 679
 		/*
... ...
@@ -711,14 +713,12 @@ cli_mbox(const char *dir, int desc, unsigned int options)
711 711
  * of the message in memory, the upside is that it makes for easier parsing
712 712
  * of encapsulated messages, and in the long run uses less memory in those
713 713
  * scenarios
714
- * BUT: if 'destroy' is set, the caller has given us a hint than 'm' will
715
- * not be used again before it is destroyed, so we can trash it
716 714
  */
717 715
 static message *
718
-parseEmailHeaders(message *m, const table_t *rfc821, bool destroy)
716
+parseEmailHeaders(const message *m, const table_t *rfc821)
719 717
 {
720 718
 	bool inHeader = TRUE;
721
-	text *t;
719
+	const text *t;
722 720
 	message *ret;
723 721
 	bool anyHeadersFound = FALSE;
724 722
 
... ...
@@ -729,26 +729,15 @@ parseEmailHeaders(message *m, const table_t *rfc821, bool destroy)
729 729
 
730 730
 	ret = messageCreate();
731 731
 
732
-	for(t = (text *)messageGetBody(m); t; t = t->t_next) {
733
-		char *buffer;
732
+	for(t = messageGetBody(m); t; t = t->t_next) {
733
+		const char *buffer;
734 734
 #ifdef CL_THREAD_SAFE
735 735
 		char *strptr;
736 736
 #endif
737 737
 
738
-		if(t->t_text) {
739
-			if(destroy) {
740
-				buffer = t->t_text;
741
-				t->t_text = NULL;
742
-			} else {
743
-				buffer = strdup(t->t_text);
744
-				if(buffer == NULL)
745
-					break;
746
-			}
747
-			if(cli_chomp(buffer) == 0) {
748
-				free(buffer);
749
-				buffer = NULL;
750
-			}
751
-		} else
738
+		if(t->t_line)
739
+			buffer = lineGetData(t->t_line);
740
+		else
752 741
 			buffer = NULL;
753 742
 
754 743
 		/*
... ...
@@ -760,13 +749,12 @@ parseEmailHeaders(message *m, const table_t *rfc821, bool destroy)
760 760
 			/*
761 761
 			 * Add all the arguments on the line
762 762
 			 */
763
-			if(buffer) {
764
-				const char *ptr;
763
+			const char *ptr;
764
+			char *copy = strdup(buffer);
765 765
 
766
-				for(ptr = strtok_r(buffer, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr))
767
-					messageAddArgument(ret, ptr);
768
-				free(buffer);
769
-			}
766
+			for(ptr = strtok_r(copy, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr))
767
+				messageAddArgument(ret, ptr);
768
+			free(copy);
770 769
 		} else if(inHeader) {
771 770
 			/*
772 771
 			 * A blank line signifies the end of the header and
... ...
@@ -779,11 +767,10 @@ parseEmailHeaders(message *m, const table_t *rfc821, bool destroy)
779 779
 				if((parseEmailHeader(ret, buffer, rfc821) >= 0) ||
780 780
 				   (strncasecmp(buffer, "From ", 5) == 0))
781 781
 					anyHeadersFound = TRUE;
782
-				free(buffer);
783 782
 			}
784 783
 		} else {
785 784
 			/*cli_dbgmsg("Add line to body '%s'\n", buffer);*/
786
-			if(messageAddLine(ret, buffer, 0) < 0)
785
+			if(messageAddLine(ret, t->t_line) < 0)
787 786
 				break;
788 787
 		}
789 788
 	}
... ...
@@ -861,7 +848,7 @@ parseEmailHeader(message *m, const char *line, const table_t *rfc821)
861 861
  *	2 for success, attachments not saved
862 862
  */
863 863
 static int	/* success or fail */
864
-parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, const char *dir, table_t *rfc821Table, table_t *subtypeTable, unsigned int options)
864
+parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, const char *dir, const table_t *rfc821Table, const table_t *subtypeTable, unsigned int options)
865 865
 {
866 866
 	message **messages;	/* parts of a multipart message */
867 867
 	int inhead, inMimeHead, i, rc = 1, htmltextPart, multiparts = 0;
... ...
@@ -965,7 +952,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
965 965
 			}
966 966
 
967 967
 			do
968
-				if(boundaryStart(t_line->t_text, boundary))
968
+				if(boundaryStart(lineGetData(t_line->t_line), boundary))
969 969
 					break;
970 970
 			while((t_line = t_line->t_next) != NULL);
971 971
 
... ...
@@ -1017,8 +1004,9 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1017 1017
 				 * but some viruses insert them
1018 1018
 				 */
1019 1019
 				while((t_line = t_line->t_next) != NULL)
1020
-					if(t_line->t_text &&
1021
-					   (cli_chomp(t_line->t_text) > 0))
1020
+					if(t_line->t_line &&
1021
+					   /*(cli_chomp(t_line->t_text) > 0))*/
1022
+					   (strlen(lineGetData(t_line->t_line)) > 0))
1022 1023
 						break;
1023 1024
 
1024 1025
 				if(t_line == NULL) {
... ...
@@ -1038,7 +1026,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1038 1038
 				}
1039 1039
 
1040 1040
 				do {
1041
-					const char *line = t_line->t_text;
1041
+					const char *line = lineGetData(t_line->t_line);
1042 1042
 
1043 1043
 					/*cli_dbgmsg("inMimeHead %d inhead %d boundary %s line '%s' next '%s'\n",
1044 1044
 						inMimeHead, inhead, boundary, line, t_line->t_next ? t_line->t_next->t_text : "(null)");*/
... ...
@@ -1115,11 +1103,16 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1115 1115
 						 * put white space after the ;
1116 1116
 						 */
1117 1117
 						inMimeHead = continuationMarker(line);
1118
-						if(!inMimeHead)
1119
-							if(t_line->t_next &&
1120
-							   t_line->t_next->t_text &&
1121
-							  ((t_line->t_next->t_text[0] == '\t') || (t_line->t_next->t_text[0] == ' ')))
1122
-								inMimeHead = TRUE;
1118
+						if(!inMimeHead) {
1119
+							const text *next = t_line->t_next;
1120
+
1121
+							if(next && next->t_line) {
1122
+								const char *data = lineGetData(next->t_line);
1123
+
1124
+								if((data[0] == '\t') || (data[0] == ' '))
1125
+									inMimeHead = TRUE;
1126
+							}
1127
+						}
1123 1128
 
1124 1129
 						parseEmailHeader(aMessage, line, rfc821Table);
1125 1130
 					} else if(boundaryStart(line, boundary)) {
... ...
@@ -1137,7 +1130,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1137 1137
 						/* t_line = NULL;*/
1138 1138
 						break;
1139 1139
 					} else {
1140
-						if(messageAddLine(aMessage, line, 1) < 0)
1140
+						if(messageAddLine(aMessage, t_line->t_line) < 0)
1141 1141
 							break;
1142 1142
 						lines++;
1143 1143
 					}
... ...
@@ -1342,7 +1335,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1342 1342
 							/*
1343 1343
 							 * No plain text version
1344 1344
 							 */
1345
-							messageAddLine(aMessage, "No plain text alternative", 1);
1345
+							messageAddStr(aMessage, "No plain text alternative");
1346 1346
 						assert(messageGetBody(aMessage) != NULL);
1347 1347
 						break;
1348 1348
 					case TEXT:
... ...
@@ -1404,7 +1397,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1404 1404
 							messages[i] = NULL;
1405 1405
 							continue;
1406 1406
 						}
1407
-						messageAddLineAtTop(aMessage,
1407
+						messageAddStrAtTop(aMessage,
1408 1408
 							"Received: by clamd");
1409 1409
 #ifdef	SAVE_TO_DISC
1410 1410
 						/*
... ...
@@ -1447,27 +1440,6 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1447 1447
 						 * be an attachment
1448 1448
 						 */
1449 1449
 						cli_dbgmsg("Found multipart inside multipart\n");
1450
-#if	0
1451
-						if(aMessage) {
1452
-							body = parseEmailHeaders(aMessage, rfc821Table, TRUE);
1453
-							if(body) {
1454
-								assert(aMessage == messages[i]);
1455
-								messageDestroy(messages[i]);
1456
-								messages[i] = NULL;
1457
-
1458
-								if(mainMessage && (mainMessage != messageIn))
1459
-									messageDestroy(mainMessage);
1460
-
1461
-								/*t = messageToText(body);
1462
-								rc = parseEmailBody(body, blobs, nBlobs, t, dir, rfc821Table, subtypeTable, options);*/
1463
-								rc = parseEmailBody(body, blobs, nBlobs, aText, dir, rfc821Table, subtypeTable, options);
1464
-								/*textDestroy(t);*/
1465
-
1466
-								cli_dbgmsg("Finished recursion\n");
1467
-
1468
-								mainMessage = body;
1469
-							}
1470
-#else
1471 1450
 						if(aMessage) {
1472 1451
 							/*
1473 1452
 							 * The headers were parsed when reading in the
... ...
@@ -1478,7 +1450,6 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1478 1478
 							assert(aMessage == messages[i]);
1479 1479
 							messageDestroy(messages[i]);
1480 1480
 							messages[i] = NULL;
1481
-#endif
1482 1481
 						} else {
1483 1482
 							rc = parseEmailBody(NULL, blobs, nBlobs, NULL, dir, rfc821Table, subtypeTable, options);
1484 1483
 							if(mainMessage && (mainMessage != messageIn))
... ...
@@ -1663,7 +1634,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1663 1663
 			}
1664 1664
 			if((strcasecmp(mimeSubtype, "rfc822") == 0) ||
1665 1665
 			   (strcasecmp(mimeSubtype, "delivery-status") == 0)) {
1666
-				message *m = parseEmailHeaders(mainMessage, rfc821Table, FALSE);
1666
+				message *m = parseEmailHeaders(mainMessage, rfc821Table);
1667 1667
 				if(m) {
1668 1668
 					cli_dbgmsg("Decode rfc822");
1669 1669
 
... ...
@@ -1810,12 +1781,15 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1810 1810
 				 * must remain otherwise non bounce messages
1811 1811
 				 * won't be scanned
1812 1812
 				 */
1813
-				for(t = t_line; t; t = t->t_next)
1814
-					if(t->t_text &&
1815
-					   (strncasecmp(t->t_text, encoding, sizeof(encoding) - 1) == 0) &&
1816
-					   (strstr(t->t_text, "7bit") == NULL) &&
1817
-					   (strstr(t->t_text, "8bit") == NULL))
1813
+				for(t = t_line; t; t = t->t_next) {
1814
+					const char *txt = lineGetData(t->t_line);
1815
+
1816
+					if(txt &&
1817
+					   (strncasecmp(txt, encoding, sizeof(encoding) - 1) == 0) &&
1818
+					   (strstr(txt, "7bit") == NULL) &&
1819
+					   (strstr(txt, "8bit") == NULL))
1818 1820
 						break;
1821
+				}
1819 1822
 				if(t && ((b = textToBlob(t_line, NULL)) != NULL)) {
1820 1823
 					cli_dbgmsg("Found a bounce message\n");
1821 1824
 
... ...
@@ -2399,14 +2373,14 @@ checkURLs(message *m, const char *dir)
2399 2399
 				continue;
2400 2400
 			if(*p2 == '\0')
2401 2401
 				continue;
2402
-			if(n == MAX_URLS) {
2403
-				cli_warnmsg("Not all URLs will be scanned\n");
2404
-				break;
2405
-			}
2406 2402
 			if(tableFind(t, p2) == 1) {
2407 2403
 				cli_dbgmsg("URL %s already downloaded\n", p2);
2408 2404
 				continue;
2409 2405
 			}
2406
+			if(n == MAX_URLS) {
2407
+				cli_warnmsg("Not all URLs will be scanned\n");
2408
+				break;
2409
+			}
2410 2410
 			(void)tableInsert(t, p2, 1);
2411 2411
 			cli_dbgmsg("Downloading URL %s to be scanned\n", p2);
2412 2412
 			strncpy(name, p2, sizeof(name));
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: message.c,v $
20
+ * Revision 1.72  2004/08/21 11:57:57  nigelhorne
21
+ * Use line.[ch]
22
+ *
20 23
  * Revision 1.71  2004/08/13 09:28:16  nigelhorne
21 24
  * Remove incorrect comment style
22 25
  *
... ...
@@ -210,7 +213,7 @@
210 210
  * uuencodebegin() no longer static
211 211
  *
212 212
  */
213
-static	char	const	rcsid[] = "$Id: message.c,v 1.71 2004/08/13 09:28:16 nigelhorne Exp $";
213
+static	char	const	rcsid[] = "$Id: message.c,v 1.72 2004/08/21 11:57:57 nigelhorne Exp $";
214 214
 
215 215
 #if HAVE_CONFIG_H
216 216
 #include "clamav-config.h"
... ...
@@ -240,6 +243,7 @@ static	char	const	rcsid[] = "$Id: message.c,v 1.71 2004/08/13 09:28:16 nigelhorn
240 240
 #include <pthread.h>
241 241
 #endif
242 242
 
243
+#include "line.h"
243 244
 #include "mbox.h"
244 245
 #include "table.h"
245 246
 #include "blob.h"
... ...
@@ -259,6 +263,7 @@ static	char	const	rcsid[] = "$Id: message.c,v 1.71 2004/08/13 09:28:16 nigelhorn
259 259
 
260 260
 typedef enum { FALSE = 0, TRUE = 1 } bool;
261 261
 
262
+static	void	messageIsEncoding(message *m);
262 263
 static	unsigned char	*decodeLine(message *m, const char *line, unsigned char *buf, size_t buflen);
263 264
 static unsigned char *decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast);
264 265
 static	void	squeeze(char *s);
... ...
@@ -814,16 +819,41 @@ messageGetEncoding(const message *m)
814 814
 	return(m->encodingType);
815 815
 }
816 816
 
817
+int
818
+messageAddLine(message *m, line_t *line)
819
+{
820
+	assert(m != NULL);
821
+
822
+	if(m->body_first == NULL)
823
+		m->body_last = m->body_first = (text *)cli_malloc(sizeof(text));
824
+	else {
825
+		m->body_last->t_next = (text *)cli_malloc(sizeof(text));
826
+		m->body_last = m->body_last->t_next;
827
+	}
828
+
829
+	if(m->body_last == NULL)
830
+		return -1;
831
+
832
+	m->body_last->t_next = NULL;
833
+
834
+	if(line && lineGetData(line)) {
835
+		m->body_last->t_line = lineLink(line);
836
+
837
+		messageIsEncoding(m);
838
+	} else
839
+		m->body_last->t_line = NULL;
840
+
841
+	return 1;
842
+}
843
+
817 844
 /*
818 845
  * Add the given line to the end of the given message
819 846
  * If needed a copy of the given line is taken which the caller must free
820 847
  * Line must not be terminated by a \n
821 848
  */
822 849
 int
823
-messageAddLine(message *m, const char *line, int takeCopy)
850
+messageAddStr(message *m, const char *data)
824 851
 {
825
-	static const char encoding[] = "Content-Transfer-Encoding";
826
-	static const char binhex[] = "(This file must be converted with BinHex 4.0)";
827 852
 	assert(m != NULL);
828 853
 
829 854
 	if(m->body_first == NULL)
... ...
@@ -838,40 +868,18 @@ messageAddLine(message *m, const char *line, int takeCopy)
838 838
 
839 839
 	m->body_last->t_next = NULL;
840 840
 
841
-	if(line && *line) {
842
-		if(takeCopy) {
843
-			m->body_last->t_text = strdup(line);
844
-			if(m->body_last->t_text == NULL) {
845
-				cli_errmsg("messageAddLine: out of memory\n");
846
-				return -1;
847
-			}
848
-			/* cli_chomp(m->body_last->t_text); */
849
-		} else
850
-			m->body_last->t_text = (char *)line;
841
+	if(data && *data) {
842
+		m->body_last->t_line = lineCreate(data);
851 843
 
852
-		/*
853
-		 * See if this line marks the start of a non MIME inclusion that
854
-		 * will need to be scanned
855
-		 */
856
-		if((m->encoding == NULL) &&
857
-		   (strncasecmp(line, encoding, sizeof(encoding) - 1) == 0) &&
858
-		   (strstr(line, "7bit") == NULL))
859
-			m->encoding = m->body_last;
860
-		else if(/*(m->bounce == NULL) &&*/
861
-			(cli_filetype(line, strlen(line)) == CL_MAILFILE))
862
-				m->bounce = m->body_last;
863
-		else if((m->binhex == NULL) &&
864
-			(strncasecmp(line, binhex, sizeof(binhex) - 1) == 0))
865
-				m->binhex = m->body_last;
866
-		else if((m->uuencode == NULL) &&
867
-			((strncasecmp(line, "begin ", 6) == 0) &&
868
-			(isdigit(line[6])) &&
869
-			(isdigit(line[7])) &&
870
-			(isdigit(line[8])) &&
871
-			(line[9] == ' ')))
872
-				m->uuencode = m->body_last;
844
+		if(m->body_last->t_line == NULL) {
845
+			cli_errmsg("messageAddStr: out of memory\n");
846
+			return -1;
847
+		}
848
+		/* cli_chomp(m->body_last->t_text); */
849
+
850
+		messageIsEncoding(m);
873 851
 	} else
874
-		m->body_last->t_text = NULL;
852
+		m->body_last->t_line = NULL;
875 853
 
876 854
 	return 1;
877 855
 }
... ...
@@ -882,14 +890,14 @@ messageAddLine(message *m, const char *line, int takeCopy)
882 882
  * Line must not be terminated by a \n
883 883
  */
884 884
 int
885
-messageAddLineAtTop(message *m, const char *line)
885
+messageAddStrAtTop(message *m, const char *data)
886 886
 {
887 887
 	text *oldfirst;
888 888
 
889 889
 	assert(m != NULL);
890 890
 
891 891
 	if(m->body_first == NULL)
892
-		return messageAddLine(m, line, 1);
892
+		return messageAddLine(m, lineCreate(data));
893 893
 
894 894
 	oldfirst = m->body_first;
895 895
 	m->body_first = (text *)cli_malloc(sizeof(text));
... ...
@@ -899,16 +907,46 @@ messageAddLineAtTop(message *m, const char *line)
899 899
 	}
900 900
 
901 901
 	m->body_first->t_next = oldfirst;
902
-	m->body_first->t_text = strdup((line) ? line : "");
902
+	m->body_first->t_line = lineCreate((data) ? data : "");
903 903
 
904
-	if(m->body_first->t_text == NULL) {
905
-		cli_errmsg("messageAddLineAtTop: out of memory\n");
904
+	if(m->body_first->t_line == NULL) {
905
+		cli_errmsg("messageAddStrAtTop: out of memory\n");
906 906
 		return -1;
907 907
 	}
908 908
 	return 1;
909 909
 }
910 910
 
911 911
 /*
912
+ * See if the last line marks the start of a non MIME inclusion that
913
+ * will need to be scanned
914
+ */
915
+static void
916
+messageIsEncoding(message *m)
917
+{
918
+	static const char encoding[] = "Content-Transfer-Encoding";
919
+	static const char binhex[] = "(This file must be converted with BinHex 4.0)";
920
+	const char *line = lineGetData(m->body_last->t_line);
921
+
922
+	if((m->encoding == NULL) &&
923
+	   (strncasecmp(line, encoding, sizeof(encoding) - 1) == 0) &&
924
+	   (strstr(line, "7bit") == NULL))
925
+		m->encoding = m->body_last;
926
+	else if(/*(m->bounce == NULL) &&*/
927
+		(cli_filetype(line, strlen(line)) == CL_MAILFILE))
928
+			m->bounce = m->body_last;
929
+	else if((m->binhex == NULL) &&
930
+		(strncasecmp(line, binhex, sizeof(binhex) - 1) == 0))
931
+			m->binhex = m->body_last;
932
+	else if((m->uuencode == NULL) &&
933
+		((strncasecmp(line, "begin ", 6) == 0) &&
934
+		(isdigit(line[6])) &&
935
+		(isdigit(line[7])) &&
936
+		(isdigit(line[8])) &&
937
+		(line[9] == ' ')))
938
+			m->uuencode = m->body_last;
939
+}
940
+
941
+/*
912 942
  * Returns a pointer to the body of the message. Note that it does NOT return
913 943
  * a copy of the data
914 944
  */
... ...
@@ -961,7 +999,7 @@ messageToBlob(message *m)
961 961
 			return NULL;
962 962
 		}
963 963
 
964
-		filename = cli_strtok(t_line->t_text, 2, " ");
964
+		filename = cli_strtok(lineGetData(t_line->t_line), 2, " ");
965 965
 
966 966
 		if(filename == NULL) {
967 967
 			cli_dbgmsg("UUencoded attachment sent with no filename\n");
... ...
@@ -1013,8 +1051,10 @@ messageToBlob(message *m)
1013 1013
 		 * See RFC1741
1014 1014
 		 */
1015 1015
 		while((t_line = t_line->t_next) != NULL)
1016
-			if(t_line->t_text)
1017
-				blobAddData(tmp, (unsigned char *)t_line->t_text, strlen(t_line->t_text));
1016
+			if(t_line->t_line) {
1017
+				const char *d = lineGetData(t_line->t_line);
1018
+				blobAddData(tmp, (unsigned char *)d, strlen(d));
1019
+			}
1018 1020
 
1019 1021
 		data = blobGetData(tmp);
1020 1022
 
... ...
@@ -1239,9 +1279,6 @@ messageToBlob(message *m)
1239 1239
 
1240 1240
 		return b;
1241 1241
 	} else {
1242
-		/*
1243
-		 * Discard attachments with no filename
1244
-		 */
1245 1242
 		filename = (char *)messageFindArgument(m, "filename");
1246 1243
 		if(filename == NULL) {
1247 1244
 			filename = (char *)messageFindArgument(m, "name");
... ...
@@ -1282,7 +1319,7 @@ messageToBlob(message *m)
1282 1282
 	do {
1283 1283
 		unsigned char data[1024];
1284 1284
 		unsigned char *uptr;
1285
-		const char *line = t_line->t_text;
1285
+		const char *line = lineGetData(t_line->t_line);
1286 1286
 
1287 1287
 		if(messageGetEncoding(m) == UUENCODE) {
1288 1288
 			/*
... ...
@@ -1353,14 +1390,12 @@ messageToText(message *m)
1353 1353
 				last = last->t_next;
1354 1354
 			}
1355 1355
 
1356
-			if((last == NULL) ||
1357
-			   ((last->t_text = strdup(t_line->t_text)) == NULL)) {
1358
-				if(last)
1359
-					free(last);
1356
+			if(last == NULL) {
1360 1357
 				if(first)
1361 1358
 					textDestroy(first);
1362 1359
 				return NULL;
1363 1360
 			}
1361
+			last->t_line = lineLink(t_line->t_line);
1364 1362
 		}
1365 1363
 	else {
1366 1364
 		if(messageGetEncoding(m) == UUENCODE) {
... ...
@@ -1380,7 +1415,7 @@ messageToText(message *m)
1380 1380
 		for(; t_line; t_line = t_line->t_next) {
1381 1381
 			unsigned char data[1024];
1382 1382
 			unsigned char *uptr;
1383
-			const char *line = t_line->t_text;
1383
+			const char *line = lineGetData(t_line->t_line);
1384 1384
 
1385 1385
 			if(messageGetEncoding(m) == BASE64) {
1386 1386
 				/*
... ...
@@ -1410,7 +1445,7 @@ messageToText(message *m)
1410 1410
 			if(last == NULL)
1411 1411
 				break;
1412 1412
 
1413
-			last->t_text = data[0] ? strdup((char *)data) : NULL;
1413
+			last->t_line = ((data[0] != '\n') && data[0]) ? lineCreate((char *)data) : NULL;
1414 1414
 
1415 1415
 			if(line && messageGetEncoding(m) == BASE64)
1416 1416
 				if(strchr(line, '='))
... ...
@@ -1418,11 +1453,9 @@ messageToText(message *m)
1418 1418
 		}
1419 1419
 		if(m->base64chars) {
1420 1420
 			unsigned char data[4];
1421
-			unsigned char *ptr;
1422 1421
 
1423 1422
 			memset(data, '\0', sizeof(data));
1424
-			ptr = decode(m, NULL, data, base64, FALSE);
1425
-			if(ptr) {
1423
+			if(decode(m, NULL, data, base64, FALSE)) {
1426 1424
 				if(first == NULL)
1427 1425
 					first = last = cli_malloc(sizeof(text));
1428 1426
 				else {
... ...
@@ -1431,7 +1464,7 @@ messageToText(message *m)
1431 1431
 				}
1432 1432
 
1433 1433
 				if(last != NULL)
1434
-					last->t_text = data[0] ? strdup((char *)data) : NULL;
1434
+					last->t_line = data[0] ? lineCreate((char *)data) : NULL;
1435 1435
 			}
1436 1436
 			m->base64chars = 0;
1437 1437
 		}
... ...
@@ -1642,10 +1675,11 @@ decodeLine(message *m, const char *line, unsigned char *buf, size_t buflen)
1642 1642
 			if(copy == NULL)
1643 1643
 				break;
1644 1644
 
1645
+			squeeze(copy);
1646
+
1645 1647
 			p2 = strchr(copy, '=');
1646 1648
 			if(p2)
1647 1649
 				*p2 = '\0';
1648
-			squeeze(copy);
1649 1650
 
1650 1651
 			/*
1651 1652
 			 * Klez doesn't always put "=" on the last line
... ...
@@ -1714,7 +1748,6 @@ squeeze(char *s)
1714 1714
  * decoded. After the last line is found, decode will be called with in = NULL
1715 1715
  * to flush these out
1716 1716
  */
1717
-#if	1
1718 1717
 static unsigned char *
1719 1718
 decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast)
1720 1719
 {
... ...
@@ -1866,84 +1899,6 @@ decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(
1866 1866
 	}
1867 1867
 	return out;
1868 1868
 }
1869
-#else
1870
-static unsigned char *
1871
-decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast)
1872
-{
1873
-	unsigned char b1, b2, b3, b4;
1874
-
1875
-	/*cli_dbgmsg("decode %s (len %d ifFast %d)\n", in, strlen(in), isFast);*/
1876
-	if(isFast)
1877
-		/* Fast decoding if not last line */
1878
-		while(*in) {
1879
-			b1 = (*decoder)(*in++);
1880
-			b2 = (*decoder)(*in++);
1881
-			b3 = (*decoder)(*in++);
1882
-			/*
1883
-			 * Put this line here to help on some compilers which
1884
-			 * can make use of some architecure's ability to
1885
-			 * multiprocess when different variables can be
1886
-			 * updated at the same time - here b3 is used in
1887
-			 * one line, b1/b2 in the next and b4 in the next after
1888
-			 * that, b3 and b4 rely on in but b1/b2 don't
1889
-			 */
1890
-			*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
1891
-			b4 = (*decoder)(*in++);
1892
-			*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
1893
-			*out++ = (b3 << 6) | (b4 & 0x3F);
1894
-		}
1895
-	else
1896
-		/* Slower decoding for last line */
1897
-		while(*in) {
1898
-			int nbytes;
1899
-
1900
-			b1 = (*decoder)(*in++);
1901
-			if(*in == '\0') {
1902
-				b2 = '\0';
1903
-				nbytes = 1;
1904
-			} else {
1905
-				b2 = (*decoder)(*in++);
1906
-				if(*in == '\0') {
1907
-					b3 = '\0';
1908
-					nbytes = 2;
1909
-				} else {
1910
-					b3 = (*decoder)(*in++);
1911
-
1912
-					if(*in == '\0') {
1913
-						b4 = '\0';
1914
-						nbytes = 3;
1915
-					} else {
1916
-						b4 = (*decoder)(*in++);
1917
-						nbytes = 4;
1918
-					}
1919
-				}
1920
-			}
1921
-
1922
-			switch(nbytes) {
1923
-				case 3:
1924
-					b4 = '\0';
1925
-					/* fall through */
1926
-				case 4:
1927
-					*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
1928
-					*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
1929
-					*out++ = (b3 << 6) | (b4 & 0x3F);
1930
-					break;
1931
-				case 2:
1932
-					*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
1933
-					*out++ = b2 << 4;
1934
-					break;
1935
-				case 1:
1936
-					*out++ = b1 << 2;
1937
-					break;
1938
-				default:
1939
-					assert(0);
1940
-			}
1941
-			if(nbytes != 4)
1942
-				break;
1943
-		}
1944
-	return out;
1945
-}
1946
-#endif
1947 1869
 
1948 1870
 static unsigned char
1949 1871
 hex(char c)
... ...
@@ -16,6 +16,9 @@
16 16
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 17
  *
18 18
  * $Log: message.h,v $
19
+ * Revision 1.13  2004/08/21 11:57:57  nigelhorne
20
+ * Use line.[ch]
21
+ *
19 22
  * Revision 1.12  2004/07/20 14:35:29  nigelhorne
20 23
  * Some MYDOOM.I were getting through
21 24
  *
... ...
@@ -83,8 +86,9 @@ void	messageAddArguments(message *m, const char *arg);
83 83
 const	char	*messageFindArgument(const message *m, const char *variable);
84 84
 void	messageSetEncoding(message *m, const char *enctype);
85 85
 encoding_type	messageGetEncoding(const message *m);
86
-int	messageAddLine(message *m, const char *line, int takeCopy);
87
-int	messageAddLineAtTop(message *m, const char *line);
86
+int	messageAddLine(message *m, line_t *line);
87
+int	messageAddStr(message *m, const char *data);
88
+int	messageAddStrAtTop(message *m, const char *data);
88 89
 const	text	*messageGetBody(const message *m);
89 90
 void	messageClean(message *m);
90 91
 blob	*messageToBlob(message *m);
... ...
@@ -16,6 +16,9 @@
16 16
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 17
  *
18 18
  * $Log: text.c,v $
19
+ * Revision 1.9  2004/08/21 11:57:57  nigelhorne
20
+ * Use line.[ch]
21
+ *
19 22
  * Revision 1.8  2004/07/20 14:35:29  nigelhorne
20 23
  * Some MYDOOM.I were getting through
21 24
  *
... ...
@@ -33,7 +36,7 @@
33 33
  *
34 34
  */
35 35
 
36
-static	char	const	rcsid[] = "$Id: text.c,v 1.8 2004/07/20 14:35:29 nigelhorne Exp $";
36
+static	char	const	rcsid[] = "$Id: text.c,v 1.9 2004/08/21 11:57:57 nigelhorne Exp $";
37 37
 
38 38
 #if HAVE_CONFIG_H
39 39
 #include "clamav-config.h"
... ...
@@ -45,13 +48,16 @@ static	char	const	rcsid[] = "$Id: text.c,v 1.8 2004/07/20 14:35:29 nigelhorne Ex
45 45
 #include <sys/malloc.h>
46 46
 #else
47 47
 #ifdef HAVE_MALLOC_H /* tk: FreeBSD-CURRENT doesn't support malloc.h */
48
+#ifndef	C_BSD	/* BSD now uses stdlib.h */
48 49
 #include <malloc.h>
49 50
 #endif
50 51
 #endif
52
+#endif
51 53
 #include <string.h>
52 54
 #include <ctype.h>
53 55
 #include <assert.h>
54 56
 
57
+#include "line.h"
55 58
 #include "mbox.h"
56 59
 #include "blob.h"
57 60
 #include "text.h"
... ...
@@ -62,8 +68,8 @@ textDestroy(text *t_head)
62 62
 {
63 63
 	while(t_head) {
64 64
 		text *t_next = t_head->t_next;
65
-		if(t_head->t_text)
66
-			free(t_head->t_text);
65
+		if(t_head->t_line)
66
+			lineUnlink(t_head->t_line);
67 67
 		free(t_head);
68 68
 		t_head = t_next;
69 69
 	}
... ...
@@ -154,11 +160,10 @@ textCopy(const text *t_head)
154 154
 
155 155
 		assert(last != NULL);
156 156
 
157
-		if(t_head->t_text) {
158
-			last->t_text = strdup(t_head->t_text);
159
-			assert(last->t_text != NULL);
160
-		} else
161
-			last->t_text = NULL;
157
+		if(t_head->t_line)
158
+			last->t_line = lineLink(t_head->t_line);
159
+		else
160
+			last->t_line = NULL;
162 161
 
163 162
 		t_head = t_head->t_next;
164 163
 	}
... ...
@@ -192,11 +197,10 @@ textAdd(text *t_head, const text *t)
192 192
 
193 193
 		assert(t_head != NULL);
194 194
 
195
-		if(t->t_text) {
196
-			t_head->t_text = strdup(t->t_text);
197
-			assert(t_head->t_text != NULL);
198
-		} else
199
-			t_head->t_text = NULL;
195
+		if(t->t_line)
196
+			t_head->t_line = lineLink(t->t_line);
197
+		else
198
+			t_head->t_line = NULL;
200 199
 
201 200
 		t = t->t_next;
202 201
 	}
... ...
@@ -248,16 +252,19 @@ textToBlob(const text *t, blob *b)
248 248
 	}
249 249
 
250 250
 	for(t1 = t; t1; t1 = t1->t_next)
251
-		if(t1->t_text)
252
-			s += strlen(t1->t_text) + 1;
251
+		if(t1->t_line)
252
+			s += strlen(lineGetData(t1->t_line)) + 1;
253 253
 		else
254 254
 			s++;
255 255
 
256 256
 	blobGrow(b, s);
257 257
 
258 258
 	do {
259
-		if(t->t_text)
260
-			blobAddData(b, (unsigned char *)t->t_text, strlen(t->t_text));
259
+		if(t->t_line) {
260
+			const char *l = lineGetData(t->t_line);
261
+
262
+			blobAddData(b, (unsigned char *)l, strlen(l));
263
+		}
261 264
 		blobAddData(b, (unsigned char *)"\n", 1);
262 265
 	} while((t = t->t_next) != NULL);
263 266
 
... ...
@@ -16,6 +16,9 @@
16 16
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 17
  *
18 18
  * $Log: text.h,v $
19
+ * Revision 1.5  2004/08/21 11:57:57  nigelhorne
20
+ * Use line.[ch]
21
+ *
19 22
  * Revision 1.4  2004/07/20 14:35:29  nigelhorne
20 23
  * Some MYDOOM.I were getting through
21 24
  *
... ...
@@ -25,7 +28,7 @@
25 25
  */
26 26
 
27 27
 typedef struct text {
28
-	char	*t_text;	/* NULL if the line is empty */
28
+	line_t	*t_line;	/* NULL if the line is empty */
29 29
 	struct	text	*t_next;
30 30
 } text;
31 31