Browse code

Some HTML.Phishing.Bank-598 were not being caught

git-svn: trunk@2115

Nigel Horne authored on 2006/07/24 21:14:46
Showing 2 changed files
... ...
@@ -1,3 +1,8 @@
1
+Mon Jul 24 13:13:35 BST 2006 (njh)
2
+----------------------------------
3
+  * libclamav/mbox.c:	Some HTML.Phishing.Bank-598 were not being caught,
4
+				reported by Sven
5
+
1 6
 Sun Jul 23 20:13:46 BST 2006 (njh)
2 7
 ----------------------------------
3 8
   * clamav-milter:	Call res_close() and fix blacklist mutex
... ...
@@ -16,7 +16,7 @@
16 16
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17 17
  *  MA 02110-1301, USA.
18 18
  */
19
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.323 2006/07/12 21:21:25 njh Exp $";
19
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.324 2006/07/24 12:14:46 njh Exp $";
20 20
 
21 21
 #if HAVE_CONFIG_H
22 22
 #include "clamav-config.h"
... ...
@@ -218,6 +218,7 @@ static	bool	exportBinhexMessage(const char *dir, message *m);
218 218
 static	int	exportBounceMessage(text *start, const mbox_ctx *ctx);
219 219
 static	message	*do_multipart(message *mainMessage, message **messages, int i, int *rc, mbox_ctx *mctx, message *messageIn, text **tptr);
220 220
 static	int	count_quotes(const char *buf);
221
+static	bool	next_is_folded_header(const text *t);
221 222
 
222 223
 static	void	checkURLs(message *m, const char *dir);
223 224
 #ifdef	WITH_CURL
... ...
@@ -1747,16 +1748,9 @@ parseEmailHeaders(message *m, const table_t *rfc821)
1747 1747
 
1748 1748
 				assert(fullline != NULL);
1749 1749
 
1750
-				if(t->t_next && (t->t_next->t_line != NULL))
1751
-					/*
1752
-					 * Section B.2 of RFC822 says TAB or
1753
-					 * SPACE means a continuation of the
1754
-					 * previous entry.
1755
-					 *
1756
-					 * Add all the arguments on the line
1757
-					 */
1758
-					if(isblank(lineGetData(t->t_next->t_line)[0]))
1759
-						continue;
1750
+				if(next_is_folded_header(t))
1751
+					/* Add arguments to this line */
1752
+					continue;
1760 1753
 
1761 1754
 				if(count_quotes(fullline) & 1)
1762 1755
 					continue;
... ...
@@ -1978,7 +1972,8 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx)
1978 1978
 			boundary = messageFindArgument(mainMessage, "boundary");
1979 1979
 
1980 1980
 			if(boundary == NULL) {
1981
-				cli_warnmsg("Multipart MIME message contains no boundary header\n");
1981
+				cli_warnmsg("Multipart/%s MIME message contains no boundary header\n",
1982
+					mimeSubtype);
1982 1983
 				/* Broken e-mail message */
1983 1984
 				mimeType = NOMIME;
1984 1985
 				/*
... ...
@@ -2076,7 +2071,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx)
2076 2076
 			 * This looks like parseEmailHeaders() - maybe there's
2077 2077
 			 * some duplication of code to be cleaned up
2078 2078
 			 *
2079
-			 * We need to create an array rather than just
2079
+			 * We may need to create an array rather than just
2080 2080
 			 * save each part as it is found because not all
2081 2081
 			 * elements will need scanning, and we don't yet know
2082 2082
 			 * which of those elements it will be, except in
... ...
@@ -2168,7 +2163,6 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx)
2168 2168
 					} else if(inhead) {	/* handling normal headers */
2169 2169
 						/*int quotes;*/
2170 2170
 						char *fullline, *ptr;
2171
-						const text *next;
2172 2171
 
2173 2172
 						if(line == NULL) {
2174 2173
 							/*
... ...
@@ -2188,12 +2182,14 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx)
2188 2188
 							 *
2189 2189
 							 * UEsDBAoAAAAAAACgPjJ2RHw676gAAO+oAABEAAAAbWFpbF90ZXh0LWluZm8udHh0ICAgICAgICAg
2190 2190
 							 */
2191
-							next = t_line->t_next;
2191
+							const text *next = t_line->t_next;
2192
+
2192 2193
 							if(next && next->t_line) {
2193 2194
 								const char *data = lineGetData(next->t_line);
2194 2195
 
2195 2196
 								if((messageGetEncoding(aMessage) == NOENCODING) &&
2196
-								   (messageGetMimeType(aMessage) == APPLICATION))
2197
+								   (messageGetMimeType(aMessage) == APPLICATION) &&
2198
+								   strstr(data, "base64")) {
2197 2199
 									/*
2198 2200
 									 * Handle this nightmare (note the blank
2199 2201
 									 * line in the header and the incorrect
... ...
@@ -2204,11 +2200,10 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx)
2204 2204
 									 * r-Encoding: base64
2205 2205
 									 * Content-Disposition: attachment; filename="zipped_files.EXE"
2206 2206
 									 */
2207
-									if(strstr(data, "base64")) {
2208
-										messageSetEncoding(aMessage, "base64");
2209
-										cli_dbgmsg("Ignoring fake end of headers\n");
2210
-										continue;
2211
-									}
2207
+									messageSetEncoding(aMessage, "base64");
2208
+									cli_dbgmsg("Ignoring fake end of headers\n");
2209
+									continue;
2210
+								}
2212 2211
 								if((strncmp(data, "Content", 7) == 0) ||
2213 2212
 								   (strncmp(data, "filename=", 9) == 0)) {
2214 2213
 									cli_dbgmsg("Ignoring fake end of headers\n");
... ...
@@ -2263,15 +2258,12 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx)
2263 2263
 						 * Content-Type: application/octet-stream; name="foo
2264 2264
 						 * "
2265 2265
 						 */
2266
-						next = t_line->t_next;
2267
-						while(next && next->t_line) {
2268
-							const char *data = lineGetData(next->t_line);
2269
-
2270
-							/*if((!isspace(data[0])) &&
2271
-							   ((quotes & 1) == 0))
2272
-								break;*/
2273
-							if(!isspace(data[0]))
2274
-								break;
2266
+						while(t_line && next_is_folded_header(t_line)) {
2267
+							const char *data;
2268
+
2269
+							t_line = t_line->t_next;
2270
+
2271
+							data = lineGetData(t_line->t_line);
2275 2272
 
2276 2273
 							if(data[1] == '\0') {
2277 2274
 								/*
... ...
@@ -2296,10 +2288,8 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx)
2296 2296
 							strcat(fullline, data);
2297 2297
 
2298 2298
 							/*quotes = count_quotes(data);*/
2299
-
2300
-							t_line = next;
2301
-							next = next->t_next;
2302 2299
 						}
2300
+
2303 2301
 						cli_dbgmsg("Multipart %d: About to parse folded header '%s'\n",
2304 2302
 							multiparts, fullline);
2305 2303
 
... ...
@@ -4516,3 +4506,70 @@ count_quotes(const char *buf)
4516 4516
 
4517 4517
 	return quotes;
4518 4518
 }
4519
+
4520
+/*
4521
+ * Will the next line be a folded header? See RFC2822 section 2.2.3
4522
+ */
4523
+static bool
4524
+next_is_folded_header(const text *t)
4525
+{
4526
+	const text *next = t->t_next;
4527
+	const char *data, *ptr;
4528
+
4529
+	if(next == NULL)
4530
+		return FALSE;
4531
+
4532
+	if(next->t_line == NULL)
4533
+		return FALSE;
4534
+
4535
+	data = lineGetData(next->t_line);
4536
+
4537
+	/*
4538
+	 * Section B.2 of RFC822 says TAB or
4539
+	 * SPACE means a continuation of the
4540
+	 * previous entry.
4541
+	 */
4542
+	if(isblank(data[0]))
4543
+		return TRUE;
4544
+
4545
+	if(strchr(data, '=') == NULL)
4546
+		/*
4547
+		 * Avoid false positives with
4548
+		 *	Content-Type: text/html;
4549
+		 *	Content-Transfer-Encoding: quoted-printable
4550
+		 */
4551
+		return FALSE;
4552
+	
4553
+	/*
4554
+	 * Some are broken and don't fold headers lines
4555
+	 * correctly as per section 2.2.3 of RFC2822.
4556
+	 * Generally they miss the white space at
4557
+	 * the start of the fold line:
4558
+	 *	Content-Type: multipart/related;
4559
+	 *	type="multipart/alternative";
4560
+	 *	boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
4561
+	 * should read:
4562
+	 *	Content-Type: multipart/related;
4563
+	 *	 type="multipart/alternative";
4564
+	 *	 boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
4565
+	 * Since we're a virus checker not an RFC
4566
+	 * verifier we need to handle these
4567
+	 */
4568
+	data = lineGetData(t->t_line);
4569
+
4570
+	ptr = strchr(data, '\0');
4571
+
4572
+	while(--ptr > data)
4573
+		switch(*ptr) {
4574
+			case ';':
4575
+				return TRUE;
4576
+			case '\n':
4577
+			case ' ':
4578
+			case '\r':
4579
+			case '\t':
4580
+				continue;	/* white space at end of line */
4581
+			default:
4582
+				return FALSE;
4583
+		}
4584
+	return FALSE;
4585
+}