git-svn: trunk@2115
Nigel Horne authored on 2006/07/24 21:14:46... | ... |
@@ -1,3 +1,8 @@ |
1 |
+Mon Jul 24 13:13:35 BST 2006 (njh) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav/mbox.c: Some HTML.Phishing.Bank-598 were not being caught, |
|
4 |
+ reported by Sven |
|
5 |
+ |
|
1 | 6 |
Sun Jul 23 20:13:46 BST 2006 (njh) |
2 | 7 |
---------------------------------- |
3 | 8 |
* clamav-milter: Call res_close() and fix blacklist mutex |
... | ... |
@@ -16,7 +16,7 @@ |
16 | 16 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, |
17 | 17 |
* MA 02110-1301, USA. |
18 | 18 |
*/ |
19 |
-static char const rcsid[] = "$Id: mbox.c,v 1.323 2006/07/12 21:21:25 njh Exp $"; |
|
19 |
+static char const rcsid[] = "$Id: mbox.c,v 1.324 2006/07/24 12:14:46 njh Exp $"; |
|
20 | 20 |
|
21 | 21 |
#if HAVE_CONFIG_H |
22 | 22 |
#include "clamav-config.h" |
... | ... |
@@ -218,6 +218,7 @@ static bool exportBinhexMessage(const char *dir, message *m); |
218 | 218 |
static int exportBounceMessage(text *start, const mbox_ctx *ctx); |
219 | 219 |
static message *do_multipart(message *mainMessage, message **messages, int i, int *rc, mbox_ctx *mctx, message *messageIn, text **tptr); |
220 | 220 |
static int count_quotes(const char *buf); |
221 |
+static bool next_is_folded_header(const text *t); |
|
221 | 222 |
|
222 | 223 |
static void checkURLs(message *m, const char *dir); |
223 | 224 |
#ifdef WITH_CURL |
... | ... |
@@ -1747,16 +1748,9 @@ parseEmailHeaders(message *m, const table_t *rfc821) |
1747 | 1747 |
|
1748 | 1748 |
assert(fullline != NULL); |
1749 | 1749 |
|
1750 |
- if(t->t_next && (t->t_next->t_line != NULL)) |
|
1751 |
- /* |
|
1752 |
- * Section B.2 of RFC822 says TAB or |
|
1753 |
- * SPACE means a continuation of the |
|
1754 |
- * previous entry. |
|
1755 |
- * |
|
1756 |
- * Add all the arguments on the line |
|
1757 |
- */ |
|
1758 |
- if(isblank(lineGetData(t->t_next->t_line)[0])) |
|
1759 |
- continue; |
|
1750 |
+ if(next_is_folded_header(t)) |
|
1751 |
+ /* Add arguments to this line */ |
|
1752 |
+ continue; |
|
1760 | 1753 |
|
1761 | 1754 |
if(count_quotes(fullline) & 1) |
1762 | 1755 |
continue; |
... | ... |
@@ -1978,7 +1972,8 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx) |
1978 | 1978 |
boundary = messageFindArgument(mainMessage, "boundary"); |
1979 | 1979 |
|
1980 | 1980 |
if(boundary == NULL) { |
1981 |
- cli_warnmsg("Multipart MIME message contains no boundary header\n"); |
|
1981 |
+ cli_warnmsg("Multipart/%s MIME message contains no boundary header\n", |
|
1982 |
+ mimeSubtype); |
|
1982 | 1983 |
/* Broken e-mail message */ |
1983 | 1984 |
mimeType = NOMIME; |
1984 | 1985 |
/* |
... | ... |
@@ -2076,7 +2071,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx) |
2076 | 2076 |
* This looks like parseEmailHeaders() - maybe there's |
2077 | 2077 |
* some duplication of code to be cleaned up |
2078 | 2078 |
* |
2079 |
- * We need to create an array rather than just |
|
2079 |
+ * We may need to create an array rather than just |
|
2080 | 2080 |
* save each part as it is found because not all |
2081 | 2081 |
* elements will need scanning, and we don't yet know |
2082 | 2082 |
* which of those elements it will be, except in |
... | ... |
@@ -2168,7 +2163,6 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx) |
2168 | 2168 |
} else if(inhead) { /* handling normal headers */ |
2169 | 2169 |
/*int quotes;*/ |
2170 | 2170 |
char *fullline, *ptr; |
2171 |
- const text *next; |
|
2172 | 2171 |
|
2173 | 2172 |
if(line == NULL) { |
2174 | 2173 |
/* |
... | ... |
@@ -2188,12 +2182,14 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx) |
2188 | 2188 |
* |
2189 | 2189 |
* UEsDBAoAAAAAAACgPjJ2RHw676gAAO+oAABEAAAAbWFpbF90ZXh0LWluZm8udHh0ICAgICAgICAg |
2190 | 2190 |
*/ |
2191 |
- next = t_line->t_next; |
|
2191 |
+ const text *next = t_line->t_next; |
|
2192 |
+ |
|
2192 | 2193 |
if(next && next->t_line) { |
2193 | 2194 |
const char *data = lineGetData(next->t_line); |
2194 | 2195 |
|
2195 | 2196 |
if((messageGetEncoding(aMessage) == NOENCODING) && |
2196 |
- (messageGetMimeType(aMessage) == APPLICATION)) |
|
2197 |
+ (messageGetMimeType(aMessage) == APPLICATION) && |
|
2198 |
+ strstr(data, "base64")) { |
|
2197 | 2199 |
/* |
2198 | 2200 |
* Handle this nightmare (note the blank |
2199 | 2201 |
* line in the header and the incorrect |
... | ... |
@@ -2204,11 +2200,10 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx) |
2204 | 2204 |
* r-Encoding: base64 |
2205 | 2205 |
* Content-Disposition: attachment; filename="zipped_files.EXE" |
2206 | 2206 |
*/ |
2207 |
- if(strstr(data, "base64")) { |
|
2208 |
- messageSetEncoding(aMessage, "base64"); |
|
2209 |
- cli_dbgmsg("Ignoring fake end of headers\n"); |
|
2210 |
- continue; |
|
2211 |
- } |
|
2207 |
+ messageSetEncoding(aMessage, "base64"); |
|
2208 |
+ cli_dbgmsg("Ignoring fake end of headers\n"); |
|
2209 |
+ continue; |
|
2210 |
+ } |
|
2212 | 2211 |
if((strncmp(data, "Content", 7) == 0) || |
2213 | 2212 |
(strncmp(data, "filename=", 9) == 0)) { |
2214 | 2213 |
cli_dbgmsg("Ignoring fake end of headers\n"); |
... | ... |
@@ -2263,15 +2258,12 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx) |
2263 | 2263 |
* Content-Type: application/octet-stream; name="foo |
2264 | 2264 |
* " |
2265 | 2265 |
*/ |
2266 |
- next = t_line->t_next; |
|
2267 |
- while(next && next->t_line) { |
|
2268 |
- const char *data = lineGetData(next->t_line); |
|
2269 |
- |
|
2270 |
- /*if((!isspace(data[0])) && |
|
2271 |
- ((quotes & 1) == 0)) |
|
2272 |
- break;*/ |
|
2273 |
- if(!isspace(data[0])) |
|
2274 |
- break; |
|
2266 |
+ while(t_line && next_is_folded_header(t_line)) { |
|
2267 |
+ const char *data; |
|
2268 |
+ |
|
2269 |
+ t_line = t_line->t_next; |
|
2270 |
+ |
|
2271 |
+ data = lineGetData(t_line->t_line); |
|
2275 | 2272 |
|
2276 | 2273 |
if(data[1] == '\0') { |
2277 | 2274 |
/* |
... | ... |
@@ -2296,10 +2288,8 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx) |
2296 | 2296 |
strcat(fullline, data); |
2297 | 2297 |
|
2298 | 2298 |
/*quotes = count_quotes(data);*/ |
2299 |
- |
|
2300 |
- t_line = next; |
|
2301 |
- next = next->t_next; |
|
2302 | 2299 |
} |
2300 |
+ |
|
2303 | 2301 |
cli_dbgmsg("Multipart %d: About to parse folded header '%s'\n", |
2304 | 2302 |
multiparts, fullline); |
2305 | 2303 |
|
... | ... |
@@ -4516,3 +4506,70 @@ count_quotes(const char *buf) |
4516 | 4516 |
|
4517 | 4517 |
return quotes; |
4518 | 4518 |
} |
4519 |
+ |
|
4520 |
+/* |
|
4521 |
+ * Will the next line be a folded header? See RFC2822 section 2.2.3 |
|
4522 |
+ */ |
|
4523 |
+static bool |
|
4524 |
+next_is_folded_header(const text *t) |
|
4525 |
+{ |
|
4526 |
+ const text *next = t->t_next; |
|
4527 |
+ const char *data, *ptr; |
|
4528 |
+ |
|
4529 |
+ if(next == NULL) |
|
4530 |
+ return FALSE; |
|
4531 |
+ |
|
4532 |
+ if(next->t_line == NULL) |
|
4533 |
+ return FALSE; |
|
4534 |
+ |
|
4535 |
+ data = lineGetData(next->t_line); |
|
4536 |
+ |
|
4537 |
+ /* |
|
4538 |
+ * Section B.2 of RFC822 says TAB or |
|
4539 |
+ * SPACE means a continuation of the |
|
4540 |
+ * previous entry. |
|
4541 |
+ */ |
|
4542 |
+ if(isblank(data[0])) |
|
4543 |
+ return TRUE; |
|
4544 |
+ |
|
4545 |
+ if(strchr(data, '=') == NULL) |
|
4546 |
+ /* |
|
4547 |
+ * Avoid false positives with |
|
4548 |
+ * Content-Type: text/html; |
|
4549 |
+ * Content-Transfer-Encoding: quoted-printable |
|
4550 |
+ */ |
|
4551 |
+ return FALSE; |
|
4552 |
+ |
|
4553 |
+ /* |
|
4554 |
+ * Some are broken and don't fold headers lines |
|
4555 |
+ * correctly as per section 2.2.3 of RFC2822. |
|
4556 |
+ * Generally they miss the white space at |
|
4557 |
+ * the start of the fold line: |
|
4558 |
+ * Content-Type: multipart/related; |
|
4559 |
+ * type="multipart/alternative"; |
|
4560 |
+ * boundary="----=_NextPart_000_006A_01C6AC47.348CB550" |
|
4561 |
+ * should read: |
|
4562 |
+ * Content-Type: multipart/related; |
|
4563 |
+ * type="multipart/alternative"; |
|
4564 |
+ * boundary="----=_NextPart_000_006A_01C6AC47.348CB550" |
|
4565 |
+ * Since we're a virus checker not an RFC |
|
4566 |
+ * verifier we need to handle these |
|
4567 |
+ */ |
|
4568 |
+ data = lineGetData(t->t_line); |
|
4569 |
+ |
|
4570 |
+ ptr = strchr(data, '\0'); |
|
4571 |
+ |
|
4572 |
+ while(--ptr > data) |
|
4573 |
+ switch(*ptr) { |
|
4574 |
+ case ';': |
|
4575 |
+ return TRUE; |
|
4576 |
+ case '\n': |
|
4577 |
+ case ' ': |
|
4578 |
+ case '\r': |
|
4579 |
+ case '\t': |
|
4580 |
+ continue; /* white space at end of line */ |
|
4581 |
+ default: |
|
4582 |
+ return FALSE; |
|
4583 |
+ } |
|
4584 |
+ return FALSE; |
|
4585 |
+} |