git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@427 77e5149b-7576-45b1-b177-96237e5ba77b
Nigel Horne authored on 2004/03/21 18:43:17... | ... |
@@ -1,3 +1,8 @@ |
1 |
+Sun Mar 21 09:51:45 GMT 2004 (njh) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav: Faster scanning for non MIME messages, only scan the message |
|
4 |
+ once for binhex, uuencode, bounces etc. |
|
5 |
+ |
|
1 | 6 |
Sat Mar 20 19:37:11 GMT 2004 (njh) |
2 | 7 |
---------------------------------- |
3 | 8 |
* libclamav/message.c: Removed the duplicated code from bounce checks |
... | ... |
@@ -17,6 +17,9 @@ |
17 | 17 |
* |
18 | 18 |
* Change History: |
19 | 19 |
* $Log: mbox.c,v $ |
20 |
+ * Revision 1.56 2004/03/21 09:41:26 nigelhorne |
|
21 |
+ * Faster scanning for non MIME messages |
|
22 |
+ * |
|
20 | 23 |
* Revision 1.55 2004/03/20 17:39:23 nigelhorne |
21 | 24 |
* First attempt to handle all bounces |
22 | 25 |
* |
... | ... |
@@ -156,7 +159,7 @@ |
156 | 156 |
* Compilable under SCO; removed duplicate code with message.c |
157 | 157 |
* |
158 | 158 |
*/ |
159 |
-static char const rcsid[] = "$Id: mbox.c,v 1.55 2004/03/20 17:39:23 nigelhorne Exp $"; |
|
159 |
+static char const rcsid[] = "$Id: mbox.c,v 1.56 2004/03/21 09:41:26 nigelhorne Exp $"; |
|
160 | 160 |
|
161 | 161 |
#if HAVE_CONFIG_H |
162 | 162 |
#include "clamav-config.h" |
... | ... |
@@ -225,7 +228,6 @@ static bool continuationMarker(const char *line); |
225 | 225 |
static int parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg); |
226 | 226 |
static void saveTextPart(message *m, const char *dir); |
227 | 227 |
static bool saveFile(const blob *b, const char *dir); |
228 |
-static bool isAllText(const message *m); |
|
229 | 228 |
|
230 | 229 |
/* Maximum number of attachments that we accept */ |
231 | 230 |
#define MAX_ATTACHMENTS 10 |
... | ... |
@@ -987,7 +989,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con |
987 | 987 |
break; |
988 | 988 |
case MESSAGE: |
989 | 989 |
cli_dbgmsg("Found message inside multipart\n"); |
990 |
- if(isAllText(aMessage)) |
|
990 |
+ if(messageIsAllText(aMessage)) |
|
991 | 991 |
continue; |
992 | 992 |
|
993 | 993 |
body = parseEmailHeaders(aMessage, rfc821Table); |
... | ... |
@@ -1359,7 +1361,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con |
1359 | 1359 |
} |
1360 | 1360 |
blobDestroy(b); |
1361 | 1361 |
} |
1362 |
- } else if((!isAllText(mainMessage)) && |
|
1362 |
+ } else if((!messageIsAllText(mainMessage)) && |
|
1363 | 1363 |
((t_line = bounceBegin(mainMessage)) != NULL)) { |
1364 | 1364 |
/* |
1365 | 1365 |
* Attempt to save the original (unbounced) |
... | ... |
@@ -1367,31 +1369,9 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con |
1367 | 1367 |
* directory and call us again (with any luck) |
1368 | 1368 |
* having found an e-mail message to handle |
1369 | 1369 |
*/ |
1370 |
- |
|
1371 |
- /* |
|
1372 |
- * Ignore the blank lines before the message |
|
1373 |
- * proper |
|
1374 |
- */ |
|
1375 |
- /*while((t_line = t_line->t_next) != NULL) |
|
1376 |
- if(strcmp(t_line->t_text, "") != 0) |
|
1377 |
- break;*/ |
|
1378 |
- |
|
1379 |
- if(t_line == NULL) { |
|
1380 |
- cli_dbgmsg("Not found bounce message\n"); |
|
1381 |
- saveTextPart(mainMessage, dir); |
|
1382 |
- } else if((b = blobCreate()) != NULL) { |
|
1370 |
+ if((b = blobCreate()) != NULL) { |
|
1383 | 1371 |
cli_dbgmsg("Found a bounce message\n"); |
1384 |
- /* |
|
1385 |
- * Ensure the when any bounce messages |
|
1386 |
- * that have been saved in the |
|
1387 |
- * temporary directory are passed to |
|
1388 |
- * cl_mbox() by inserting a header line |
|
1389 |
- * that scanners.c recognises as a mail |
|
1390 |
- * |
|
1391 |
- * Fix thanks to "Andrey J. Melnikoff |
|
1392 |
- * (TEMHOTA)" <temnota@kmv.ru> |
|
1393 |
- */ |
|
1394 |
- /*blobAddData(b, (unsigned char *)"Received: by clamd\n", 19);*/ |
|
1372 |
+ |
|
1395 | 1373 |
do { |
1396 | 1374 |
blobAddData(b, (unsigned char *)t_line->t_text, strlen(t_line->t_text)); |
1397 | 1375 |
blobAddData(b, (unsigned char *)"\n", 1); |
... | ... |
@@ -1413,7 +1393,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con |
1413 | 1413 |
* content encoding statement don't |
1414 | 1414 |
* bother saving to scan, it's safe |
1415 | 1415 |
*/ |
1416 |
- saveIt = !isAllText(mainMessage); |
|
1416 |
+ saveIt = !messageIsAllText(mainMessage); |
|
1417 | 1417 |
else |
1418 | 1418 |
saveIt = TRUE; |
1419 | 1419 |
|
... | ... |
@@ -1853,27 +1833,3 @@ saveFile(const blob *b, const char *dir) |
1853 | 1853 |
|
1854 | 1854 |
return (close(fd) >= 0); |
1855 | 1855 |
} |
1856 |
- |
|
1857 |
-/* |
|
1858 |
- * If a message doesn't not contain another message which could be harmful |
|
1859 |
- * it is deemed to be safe. |
|
1860 |
- * |
|
1861 |
- * TODO: ensure nothing can get through this |
|
1862 |
- * |
|
1863 |
- * TODO: check to see if we need to |
|
1864 |
- * find anything else, perhaps anything |
|
1865 |
- * from the RFC821 table? |
|
1866 |
- */ |
|
1867 |
-static bool |
|
1868 |
-isAllText(const message *m) |
|
1869 |
-{ |
|
1870 |
- const text *t; |
|
1871 |
- |
|
1872 |
- for(t = messageGetBody(m); t; t = t->t_next) |
|
1873 |
- if(strncasecmp(t->t_text, |
|
1874 |
- "Content-Transfer-Encoding", |
|
1875 |
- strlen("Content-Transfer-Encoding")) == 0) |
|
1876 |
- return FALSE; |
|
1877 |
- |
|
1878 |
- return TRUE; |
|
1879 |
-} |
... | ... |
@@ -17,6 +17,9 @@ |
17 | 17 |
* |
18 | 18 |
* Change History: |
19 | 19 |
* $Log: message.c,v $ |
20 |
+ * Revision 1.46 2004/03/21 09:41:27 nigelhorne |
|
21 |
+ * Faster scanning for non MIME messages |
|
22 |
+ * |
|
20 | 23 |
* Revision 1.45 2004/03/20 19:26:48 nigelhorne |
21 | 24 |
* Second attempt to handle all bounces |
22 | 25 |
* |
... | ... |
@@ -132,7 +135,7 @@ |
132 | 132 |
* uuencodebegin() no longer static |
133 | 133 |
* |
134 | 134 |
*/ |
135 |
-static char const rcsid[] = "$Id: message.c,v 1.45 2004/03/20 19:26:48 nigelhorne Exp $"; |
|
135 |
+static char const rcsid[] = "$Id: message.c,v 1.46 2004/03/21 09:41:27 nigelhorne Exp $"; |
|
136 | 136 |
|
137 | 137 |
#if HAVE_CONFIG_H |
138 | 138 |
#include "clamav-config.h" |
... | ... |
@@ -259,6 +262,7 @@ messageReset(message *m) |
259 | 259 |
|
260 | 260 |
memset(m, '\0', sizeof(message)); |
261 | 261 |
m->mimeType = NOMIME; |
262 |
+ m->encodingType = NOENCODING; |
|
262 | 263 |
} |
263 | 264 |
|
264 | 265 |
void |
... | ... |
@@ -668,6 +672,8 @@ messageGetEncoding(const message *m) |
668 | 668 |
void |
669 | 669 |
messageAddLine(message *m, const char *line) |
670 | 670 |
{ |
671 |
+ static const char encoding[] = "Content-Transfer-Encoding"; |
|
672 |
+ static const char binhex[] = "(This file must be converted with BinHex 4.0)"; |
|
671 | 673 |
assert(m != NULL); |
672 | 674 |
|
673 | 675 |
if(m->body_first == NULL) |
... | ... |
@@ -677,12 +683,38 @@ messageAddLine(message *m, const char *line) |
677 | 677 |
m->body_last = m->body_last->t_next; |
678 | 678 |
} |
679 | 679 |
|
680 |
+ if(m->body_last == NULL) |
|
681 |
+ return; |
|
682 |
+ |
|
680 | 683 |
m->body_last->t_next = NULL; |
681 | 684 |
|
682 | 685 |
m->body_last->t_text = strdup((line) ? line : ""); |
683 | 686 |
|
684 | 687 |
assert(m->body_last->t_text != NULL); |
685 | 688 |
assert(m->body_first != NULL); |
689 |
+ |
|
690 |
+ /* |
|
691 |
+ * See if this line marks the start of a non MIME inclusion that |
|
692 |
+ * will need to be scanned |
|
693 |
+ */ |
|
694 |
+ if(line) { |
|
695 |
+ if((m->encoding == NULL) && |
|
696 |
+ (strncasecmp(line, encoding, sizeof(encoding) - 1) == 0)) |
|
697 |
+ m->encoding = m->body_last; |
|
698 |
+ else if((m->bounce == NULL) && |
|
699 |
+ (cli_filetype(line, strlen(line)) == CL_MAILFILE)) |
|
700 |
+ m->bounce = m->body_last; |
|
701 |
+ else if((m->binhex == NULL) && |
|
702 |
+ (strncasecmp(line, binhex, sizeof(binhex) - 1) == 0)) |
|
703 |
+ m->binhex = m->body_last; |
|
704 |
+ else if((m->uuencode == NULL) && |
|
705 |
+ ((strncasecmp(line, "begin ", 6) == 0) && |
|
706 |
+ (isdigit(line[6])) && |
|
707 |
+ (isdigit(line[7])) && |
|
708 |
+ (isdigit(line[8])) && |
|
709 |
+ (line[9] == ' '))) |
|
710 |
+ m->uuencode = m->body_last; |
|
711 |
+ } |
|
686 | 712 |
} |
687 | 713 |
|
688 | 714 |
const text * |
... | ... |
@@ -1115,6 +1147,7 @@ messageToText(const message *m) |
1115 | 1115 |
/* |
1116 | 1116 |
* Scan to find the UUENCODED message (if any) |
1117 | 1117 |
*/ |
1118 |
+#if 0 |
|
1118 | 1119 |
const text * |
1119 | 1120 |
uuencodeBegin(const message *m) |
1120 | 1121 |
{ |
... | ... |
@@ -1138,10 +1171,18 @@ uuencodeBegin(const message *m) |
1138 | 1138 |
} |
1139 | 1139 |
return NULL; |
1140 | 1140 |
} |
1141 |
+#else |
|
1142 |
+const text * |
|
1143 |
+uuencodeBegin(const message *m) |
|
1144 |
+{ |
|
1145 |
+ return m->uuencode; |
|
1146 |
+} |
|
1147 |
+#endif |
|
1141 | 1148 |
|
1142 | 1149 |
/* |
1143 | 1150 |
* Scan to find the BINHEX message (if any) |
1144 | 1151 |
*/ |
1152 |
+#if 0 |
|
1145 | 1153 |
const text * |
1146 | 1154 |
binhexBegin(const message *m) |
1147 | 1155 |
{ |
... | ... |
@@ -1153,11 +1194,19 @@ binhexBegin(const message *m) |
1153 | 1153 |
|
1154 | 1154 |
return NULL; |
1155 | 1155 |
} |
1156 |
+#else |
|
1157 |
+const text * |
|
1158 |
+binhexBegin(const message *m) |
|
1159 |
+{ |
|
1160 |
+ return m->binhex; |
|
1161 |
+} |
|
1162 |
+#endif |
|
1156 | 1163 |
|
1157 | 1164 |
/* |
1158 | 1165 |
* Scan to find a bounce message. There is no standard for these, not |
1159 | 1166 |
* even a convention, so don't expect this to be foolproof |
1160 | 1167 |
*/ |
1168 |
+#if 0 |
|
1161 | 1169 |
const text * |
1162 | 1170 |
bounceBegin(const message *m) |
1163 | 1171 |
{ |
... | ... |
@@ -1169,6 +1218,45 @@ bounceBegin(const message *m) |
1169 | 1169 |
|
1170 | 1170 |
return NULL; |
1171 | 1171 |
} |
1172 |
+#else |
|
1173 |
+const text * |
|
1174 |
+bounceBegin(const message *m) |
|
1175 |
+{ |
|
1176 |
+ return m->bounce; |
|
1177 |
+} |
|
1178 |
+#endif |
|
1179 |
+ |
|
1180 |
+/* |
|
1181 |
+ * If a message doesn't not contain another message which could be harmful |
|
1182 |
+ * it is deemed to be safe. |
|
1183 |
+ * |
|
1184 |
+ * TODO: ensure nothing can get through this |
|
1185 |
+ * |
|
1186 |
+ * TODO: check to see if we need to |
|
1187 |
+ * find anything else, perhaps anything |
|
1188 |
+ * from the RFC821 table? |
|
1189 |
+ */ |
|
1190 |
+#if 0 |
|
1191 |
+int |
|
1192 |
+messageIsAllText(const message *m) |
|
1193 |
+{ |
|
1194 |
+ const text *t; |
|
1195 |
+ |
|
1196 |
+ for(t = messageGetBody(m); t; t = t->t_next) |
|
1197 |
+ if(strncasecmp(t->t_text, |
|
1198 |
+ "Content-Transfer-Encoding", |
|
1199 |
+ strlen("Content-Transfer-Encoding")) == 0) |
|
1200 |
+ return 0; |
|
1201 |
+ |
|
1202 |
+ return 1; |
|
1203 |
+} |
|
1204 |
+#else |
|
1205 |
+int |
|
1206 |
+messageIsAllText(const message *m) |
|
1207 |
+{ |
|
1208 |
+ return (m->encoding == NULL); |
|
1209 |
+} |
|
1210 |
+#endif |
|
1172 | 1211 |
|
1173 | 1212 |
/* |
1174 | 1213 |
* Decode a line and add it to a buffer, return the end of the buffer |
... | ... |
@@ -16,6 +16,9 @@ |
16 | 16 |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
17 | 17 |
* |
18 | 18 |
* $Log: message.h,v $ |
19 |
+ * Revision 1.6 2004/03/21 09:41:27 nigelhorne |
|
20 |
+ * Faster scanning for non MIME messages |
|
21 |
+ * |
|
19 | 22 |
* Revision 1.5 2004/01/28 10:15:24 nigelhorne |
20 | 23 |
* Added support to scan some bounce messages |
21 | 24 |
* |
... | ... |
@@ -35,6 +38,14 @@ typedef struct message { |
35 | 35 |
char **mimeArguments; |
36 | 36 |
char *mimeDispositionType; /* probably attachment */ |
37 | 37 |
text *body_first, *body_last; |
38 |
+ /* |
|
39 |
+ * Markers for the start of various non MIME messages that could |
|
40 |
+ * be included within this message |
|
41 |
+ */ |
|
42 |
+ text *bounce; /* start of a bounced message */ |
|
43 |
+ text *binhex; /* start of a binhex message */ |
|
44 |
+ text *uuencode; /* start of a uuencoded message */ |
|
45 |
+ text *encoding; /* is the non MIME message encoded? */ |
|
38 | 46 |
} message; |
39 | 47 |
|
40 | 48 |
message *messageCreate(void); |
... | ... |
@@ -59,5 +70,6 @@ text *messageToText(const message *m); |
59 | 59 |
const text *uuencodeBegin(const message *m); |
60 | 60 |
const text *binhexBegin(const message *m); |
61 | 61 |
const text *bounceBegin(const message *m); |
62 |
+int messageIsAllText(const message *m); |
|
62 | 63 |
|
63 | 64 |
#endif /*_MESSAGE_H*/ |