Browse code

Faster scanning for non MIME messages

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@427 77e5149b-7576-45b1-b177-96237e5ba77b

Nigel Horne authored on 2004/03/21 18:43:17
Showing 4 changed files
... ...
@@ -1,3 +1,8 @@
1
+Sun Mar 21 09:51:45 GMT 2004 (njh)
2
+----------------------------------
3
+  * libclamav:	Faster scanning for non MIME messages, only scan the message
4
+  	once for binhex, uuencode, bounces etc.
5
+
1 6
 Sat Mar 20 19:37:11 GMT 2004 (njh)
2 7
 ----------------------------------
3 8
   * libclamav/message.c:	Removed the duplicated code from bounce checks
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: mbox.c,v $
20
+ * Revision 1.56  2004/03/21 09:41:26  nigelhorne
21
+ * Faster scanning for non MIME messages
22
+ *
20 23
  * Revision 1.55  2004/03/20 17:39:23  nigelhorne
21 24
  * First attempt to handle all bounces
22 25
  *
... ...
@@ -156,7 +159,7 @@
156 156
  * Compilable under SCO; removed duplicate code with message.c
157 157
  *
158 158
  */
159
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.55 2004/03/20 17:39:23 nigelhorne Exp $";
159
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.56 2004/03/21 09:41:26 nigelhorne Exp $";
160 160
 
161 161
 #if HAVE_CONFIG_H
162 162
 #include "clamav-config.h"
... ...
@@ -225,7 +228,6 @@ static	bool	continuationMarker(const char *line);
225 225
 static	int	parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg);
226 226
 static	void	saveTextPart(message *m, const char *dir);
227 227
 static	bool	saveFile(const blob *b, const char *dir);
228
-static	bool	isAllText(const message *m);
229 228
 
230 229
 /* Maximum number of attachments that we accept */
231 230
 #define	MAX_ATTACHMENTS	10
... ...
@@ -987,7 +989,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
987 987
 						break;
988 988
 					case MESSAGE:
989 989
 						cli_dbgmsg("Found message inside multipart\n");
990
-						if(isAllText(aMessage))
990
+						if(messageIsAllText(aMessage))
991 991
 							continue;
992 992
 
993 993
 						body = parseEmailHeaders(aMessage, rfc821Table);
... ...
@@ -1359,7 +1361,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1359 1359
 					}
1360 1360
 					blobDestroy(b);
1361 1361
 				}
1362
-			} else if((!isAllText(mainMessage)) &&
1362
+			} else if((!messageIsAllText(mainMessage)) &&
1363 1363
 				  ((t_line = bounceBegin(mainMessage)) != NULL)) {
1364 1364
 				/*
1365 1365
 				 * Attempt to save the original (unbounced)
... ...
@@ -1367,31 +1369,9 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1367 1367
 				 * directory and call us again (with any luck)
1368 1368
 				 * having found an e-mail message to handle
1369 1369
 				 */
1370
-
1371
-				/*
1372
-				 * Ignore the blank lines before the message
1373
-				 * proper
1374
-				 */
1375
-				/*while((t_line = t_line->t_next) != NULL)
1376
-					if(strcmp(t_line->t_text, "") != 0)
1377
-						break;*/
1378
-
1379
-				if(t_line == NULL) {
1380
-					cli_dbgmsg("Not found bounce message\n");
1381
-					saveTextPart(mainMessage, dir);
1382
-				} else if((b = blobCreate()) != NULL) {
1370
+				if((b = blobCreate()) != NULL) {
1383 1371
 					cli_dbgmsg("Found a bounce message\n");
1384
-					/*
1385
-					 * Ensure the when any bounce messages
1386
-					 * that have been saved in the
1387
-					 * temporary directory are passed to
1388
-					 * cl_mbox() by inserting a header line
1389
-					 * that scanners.c recognises as a mail
1390
-					 *
1391
-					 * Fix thanks to "Andrey J. Melnikoff
1392
-					 * (TEMHOTA)" <temnota@kmv.ru>
1393
-					 */
1394
-					/*blobAddData(b, (unsigned char *)"Received: by clamd\n", 19);*/
1372
+
1395 1373
 					do {
1396 1374
 						blobAddData(b, (unsigned char *)t_line->t_text, strlen(t_line->t_text));
1397 1375
 						blobAddData(b, (unsigned char *)"\n", 1);
... ...
@@ -1413,7 +1393,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1413 1413
 					 * content encoding statement don't
1414 1414
 					 * bother saving to scan, it's safe
1415 1415
 					 */
1416
-					saveIt = !isAllText(mainMessage);
1416
+					saveIt = !messageIsAllText(mainMessage);
1417 1417
 				else
1418 1418
 					saveIt = TRUE;
1419 1419
 
... ...
@@ -1853,27 +1833,3 @@ saveFile(const blob *b, const char *dir)
1853 1853
 
1854 1854
 	return (close(fd) >= 0);
1855 1855
 }
1856
-
1857
-/*
1858
- * If a message doesn't not contain another message which could be harmful
1859
- * it is deemed to be safe.
1860
- *
1861
- * TODO: ensure nothing can get through this
1862
- *
1863
- * TODO: check to see if we need to
1864
- * find anything else, perhaps anything
1865
- * from the RFC821 table?
1866
- */
1867
-static bool
1868
-isAllText(const message *m)
1869
-{
1870
-	const text *t;
1871
-
1872
-	for(t = messageGetBody(m); t; t = t->t_next)
1873
-		if(strncasecmp(t->t_text,
1874
-			"Content-Transfer-Encoding",
1875
-			strlen("Content-Transfer-Encoding")) == 0)
1876
-				return FALSE;
1877
-
1878
-	return TRUE;
1879
-}
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: message.c,v $
20
+ * Revision 1.46  2004/03/21 09:41:27  nigelhorne
21
+ * Faster scanning for non MIME messages
22
+ *
20 23
  * Revision 1.45  2004/03/20 19:26:48  nigelhorne
21 24
  * Second attempt to handle all bounces
22 25
  *
... ...
@@ -132,7 +135,7 @@
132 132
  * uuencodebegin() no longer static
133 133
  *
134 134
  */
135
-static	char	const	rcsid[] = "$Id: message.c,v 1.45 2004/03/20 19:26:48 nigelhorne Exp $";
135
+static	char	const	rcsid[] = "$Id: message.c,v 1.46 2004/03/21 09:41:27 nigelhorne Exp $";
136 136
 
137 137
 #if HAVE_CONFIG_H
138 138
 #include "clamav-config.h"
... ...
@@ -259,6 +262,7 @@ messageReset(message *m)
259 259
 
260 260
 	memset(m, '\0', sizeof(message));
261 261
 	m->mimeType = NOMIME;
262
+	m->encodingType = NOENCODING;
262 263
 }
263 264
 
264 265
 void
... ...
@@ -668,6 +672,8 @@ messageGetEncoding(const message *m)
668 668
 void
669 669
 messageAddLine(message *m, const char *line)
670 670
 {
671
+	static const char encoding[] = "Content-Transfer-Encoding";
672
+	static const char binhex[] = "(This file must be converted with BinHex 4.0)";
671 673
 	assert(m != NULL);
672 674
 
673 675
 	if(m->body_first == NULL)
... ...
@@ -677,12 +683,38 @@ messageAddLine(message *m, const char *line)
677 677
 		m->body_last = m->body_last->t_next;
678 678
 	}
679 679
 
680
+	if(m->body_last == NULL)
681
+		return;
682
+
680 683
 	m->body_last->t_next = NULL;
681 684
 
682 685
 	m->body_last->t_text = strdup((line) ? line : "");
683 686
 
684 687
 	assert(m->body_last->t_text != NULL);
685 688
 	assert(m->body_first != NULL);
689
+
690
+	/*
691
+	 * See if this line marks the start of a non MIME inclusion that
692
+	 * will need to be scanned
693
+	 */
694
+	if(line) {
695
+		if((m->encoding == NULL) &&
696
+		   (strncasecmp(line, encoding, sizeof(encoding) - 1) == 0))
697
+			m->encoding = m->body_last;
698
+		else if((m->bounce == NULL) &&
699
+			(cli_filetype(line, strlen(line)) == CL_MAILFILE))
700
+				m->bounce = m->body_last;
701
+		else if((m->binhex == NULL) &&
702
+			(strncasecmp(line, binhex, sizeof(binhex) - 1) == 0))
703
+				m->binhex = m->body_last;
704
+		else if((m->uuencode == NULL) &&
705
+			((strncasecmp(line, "begin ", 6) == 0) &&
706
+			(isdigit(line[6])) &&
707
+			(isdigit(line[7])) &&
708
+			(isdigit(line[8])) &&
709
+			(line[9] == ' ')))
710
+				m->uuencode = m->body_last;
711
+	}
686 712
 }
687 713
 
688 714
 const text *
... ...
@@ -1115,6 +1147,7 @@ messageToText(const message *m)
1115 1115
 /*
1116 1116
  * Scan to find the UUENCODED message (if any)
1117 1117
  */
1118
+#if	0
1118 1119
 const text *
1119 1120
 uuencodeBegin(const message *m)
1120 1121
 {
... ...
@@ -1138,10 +1171,18 @@ uuencodeBegin(const message *m)
1138 1138
 	}
1139 1139
 	return NULL;
1140 1140
 }
1141
+#else
1142
+const text *
1143
+uuencodeBegin(const message *m)
1144
+{
1145
+	return m->uuencode;
1146
+}
1147
+#endif
1141 1148
 
1142 1149
 /*
1143 1150
  * Scan to find the BINHEX message (if any)
1144 1151
  */
1152
+#if	0
1145 1153
 const text *
1146 1154
 binhexBegin(const message *m)
1147 1155
 {
... ...
@@ -1153,11 +1194,19 @@ binhexBegin(const message *m)
1153 1153
 
1154 1154
 	return NULL;
1155 1155
 }
1156
+#else
1157
+const text *
1158
+binhexBegin(const message *m)
1159
+{
1160
+	return m->binhex;
1161
+}
1162
+#endif
1156 1163
 
1157 1164
 /*
1158 1165
  * Scan to find a bounce message. There is no standard for these, not
1159 1166
  * even a convention, so don't expect this to be foolproof
1160 1167
  */
1168
+#if	0
1161 1169
 const text *
1162 1170
 bounceBegin(const message *m)
1163 1171
 {
... ...
@@ -1169,6 +1218,45 @@ bounceBegin(const message *m)
1169 1169
 
1170 1170
 	return NULL;
1171 1171
 }
1172
+#else
1173
+const text *
1174
+bounceBegin(const message *m)
1175
+{
1176
+	return m->bounce;
1177
+}
1178
+#endif
1179
+
1180
+/*
1181
+ * If a message doesn't not contain another message which could be harmful
1182
+ * it is deemed to be safe.
1183
+ *
1184
+ * TODO: ensure nothing can get through this
1185
+ *
1186
+ * TODO: check to see if we need to
1187
+ * find anything else, perhaps anything
1188
+ * from the RFC821 table?
1189
+ */
1190
+#if	0
1191
+int
1192
+messageIsAllText(const message *m)
1193
+{
1194
+	const text *t;
1195
+
1196
+	for(t = messageGetBody(m); t; t = t->t_next)
1197
+		if(strncasecmp(t->t_text,
1198
+			"Content-Transfer-Encoding",
1199
+			strlen("Content-Transfer-Encoding")) == 0)
1200
+				return 0;
1201
+
1202
+	return 1;
1203
+}
1204
+#else
1205
+int
1206
+messageIsAllText(const message *m)
1207
+{
1208
+	return (m->encoding == NULL);
1209
+}
1210
+#endif
1172 1211
 
1173 1212
 /*
1174 1213
  * Decode a line and add it to a buffer, return the end of the buffer
... ...
@@ -16,6 +16,9 @@
16 16
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 17
  *
18 18
  * $Log: message.h,v $
19
+ * Revision 1.6  2004/03/21 09:41:27  nigelhorne
20
+ * Faster scanning for non MIME messages
21
+ *
19 22
  * Revision 1.5  2004/01/28 10:15:24  nigelhorne
20 23
  * Added support to scan some bounce messages
21 24
  *
... ...
@@ -35,6 +38,14 @@ typedef struct message {
35 35
 	char	**mimeArguments;
36 36
 	char	*mimeDispositionType;	/* probably attachment */
37 37
 	text	*body_first, *body_last;
38
+	/*
39
+	 * Markers for the start of various non MIME messages that could
40
+	 * be included within this message
41
+	 */
42
+	text	*bounce;	/* start of a bounced message */
43
+	text	*binhex;	/* start of a binhex message */
44
+	text	*uuencode;	/* start of a uuencoded message */
45
+	text	*encoding;	/* is the non MIME message encoded? */
38 46
 } message;
39 47
 
40 48
 message	*messageCreate(void);
... ...
@@ -59,5 +70,6 @@ text	*messageToText(const message *m);
59 59
 const	text	*uuencodeBegin(const message *m);
60 60
 const	text	*binhexBegin(const message *m);
61 61
 const	text	*bounceBegin(const message *m);
62
+int	messageIsAllText(const message *m);
62 63
 
63 64
 #endif	/*_MESSAGE_H*/