Browse code

Some MYDOOM.I were getting through

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@677 77e5149b-7576-45b1-b177-96237e5ba77b

Nigel Horne authored on 2004/07/20 23:38:48
Showing 6 changed files
... ...
@@ -1,3 +1,7 @@
1
+Tue Jul 20 15:38:03 BST 2004 (njh)
2
+----------------------------------
3
+  * libclamav:	Some MyDoom.I were getting through
4
+
1 5
 Tue Jul 20 03:26:38 CEST 2004 (tk)
2 6
 ----------------------------------
3 7
   * libclamav: integrate CHM decoder from Trog
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: mbox.c,v $
20
+ * Revision 1.88  2004/07/20 14:35:29  nigelhorne
21
+ * Some MYDOOM.I were getting through
22
+ *
20 23
  * Revision 1.87  2004/07/19 17:54:40  kojm
21 24
  * Use new patter matching algorithm. Cleanup.
22 25
  *
... ...
@@ -249,7 +252,7 @@
249 249
  * Compilable under SCO; removed duplicate code with message.c
250 250
  *
251 251
  */
252
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.87 2004/07/19 17:54:40 kojm Exp $";
252
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.88 2004/07/20 14:35:29 nigelhorne Exp $";
253 253
 
254 254
 #if HAVE_CONFIG_H
255 255
 #include "clamav-config.h"
... ...
@@ -1749,7 +1752,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1749 1749
 static int
1750 1750
 boundaryStart(const char *line, const char *boundary)
1751 1751
 {
1752
-	/*cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary);*/
1752
+	cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary);
1753 1753
 	if(line == NULL)
1754 1754
 		return 0;	/* empty line */
1755 1755
 
... ...
@@ -1772,7 +1775,7 @@ boundaryStart(const char *line, const char *boundary)
1772 1772
 	 * that has -1 in it instead of starting --1. This needs some more work.
1773 1773
 	 */
1774 1774
 	if(strstr(line, boundary) != NULL) {
1775
-		cli_dbgmsg("found %s in %s\n", boundary, line);
1775
+		cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line);
1776 1776
 		return 1;
1777 1777
 	}
1778 1778
 	if(*line++ != '-')
... ...
@@ -1790,6 +1793,7 @@ endOfMessage(const char *line, const char *boundary)
1790 1790
 {
1791 1791
 	size_t len;
1792 1792
 
1793
+	cli_dbgmsg("endOfMessage: line = '%s' boundary = '%s'\n", line, boundary);
1793 1794
 	if(line == NULL)
1794 1795
 		return 0;
1795 1796
 	if(*line++ != '-')
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: message.c,v $
20
+ * Revision 1.65  2004/07/20 14:35:29  nigelhorne
21
+ * Some MYDOOM.I were getting through
22
+ *
20 23
  * Revision 1.64  2004/07/02 23:00:57  kojm
21 24
  * new method of file type detection; HTML normalisation
22 25
  *
... ...
@@ -189,7 +192,7 @@
189 189
  * uuencodebegin() no longer static
190 190
  *
191 191
  */
192
-static	char	const	rcsid[] = "$Id: message.c,v 1.64 2004/07/02 23:00:57 kojm Exp $";
192
+static	char	const	rcsid[] = "$Id: message.c,v 1.65 2004/07/20 14:35:29 nigelhorne Exp $";
193 193
 
194 194
 #if HAVE_CONFIG_H
195 195
 #include "clamav-config.h"
... ...
@@ -238,8 +241,9 @@ static	char	const	rcsid[] = "$Id: message.c,v 1.64 2004/07/02 23:00:57 kojm Exp
238 238
 
239 239
 typedef enum { FALSE = 0, TRUE = 1 } bool;
240 240
 
241
-static	unsigned char	*decodeLine(const message *m, const char *line, unsigned char *buf, size_t buflen);
242
-static unsigned char *decode(const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast);
241
+static	unsigned char	*decodeLine(message *m, const char *line, unsigned char *buf, size_t buflen);
242
+static unsigned char *decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast);
243
+static	void	squeeze(char *s);
243 244
 static	unsigned	char	hex(char c);
244 245
 static	unsigned	char	base64(char c);
245 246
 static	unsigned	char	uudecode(char c);
... ...
@@ -321,6 +325,8 @@ messageReset(message *m)
321 321
 	if(m->body_first)
322 322
 		textDestroy(m->body_first);
323 323
 
324
+	assert(m->base64chars == 0);
325
+
324 326
 	memset(m, '\0', sizeof(message));
325 327
 	m->mimeType = NOMIME;
326 328
 	m->encodingType = NOENCODING;
... ...
@@ -947,7 +953,7 @@ messageToBlob(message *m)
947 947
 		unsigned char *uptr, *data;
948 948
 		char *ptr;
949 949
 		int bytenumber;
950
-		blob *tmp = blobCreate();
950
+		blob *tmp;
951 951
 
952 952
 		/*
953 953
 		 * Table look up by Thomas Lamy <Thomas.Lamy@in-online.net>
... ...
@@ -965,6 +971,13 @@ messageToBlob(message *m)
965 965
 		/* 70-7f */	0x3d,0x3e,0x3f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
966 966
 		};
967 967
 
968
+		tmp = blobCreate();
969
+
970
+		if(tmp == NULL) {
971
+			blobDestroy(b);
972
+			return NULL;
973
+		}
974
+
968 975
 		/*
969 976
 		 * Decode BinHex4. First create a temporary blob which contains
970 977
 		 * the encoded message. Then decode that blob to the target
... ...
@@ -1077,6 +1090,11 @@ messageToBlob(message *m)
1077 1077
 		if(memchr(data, 0x90, newlen)) {
1078 1078
 			blob *u = blobCreate();	/* uncompressed data */
1079 1079
 
1080
+			if(u == NULL) {
1081
+				blobDestroy(b);
1082
+				blobDestroy(tmp);
1083
+				return NULL;
1084
+			}
1080 1085
 			/*
1081 1086
 			 * Includes compression
1082 1087
 			 */
... ...
@@ -1262,7 +1280,7 @@ messageToBlob(message *m)
1262 1262
 		 * the last byte and should be used as evidence
1263 1263
 		 * of the end of the data. Some mail clients
1264 1264
 		 * annoyingly then put plain text after the '='
1265
-		 * bytes. Sigh
1265
+		 * byte and viruses exploit this bug. Sigh
1266 1266
 		 */
1267 1267
 		/*if(messageGetEncoding(m) == BASE64)
1268 1268
 			if(strchr(line, '='))
... ...
@@ -1270,6 +1288,17 @@ messageToBlob(message *m)
1270 1270
 
1271 1271
 	} while((t_line = t_line->t_next) != NULL);
1272 1272
 
1273
+	/* Verify we have nothing left to flush out */
1274
+	if(m->base64chars) {
1275
+		unsigned char data[4];
1276
+		unsigned char *ptr;
1277
+
1278
+		ptr = decode(m, NULL, data, base64, FALSE);
1279
+		if(ptr)
1280
+			blobAddData(b, data, (size_t)(ptr - data));
1281
+		m->base64chars = 0;
1282
+	}
1283
+
1273 1284
 	return b;
1274 1285
 }
1275 1286
 
... ...
@@ -1278,7 +1307,7 @@ messageToBlob(message *m)
1278 1278
  * The caller must free the returned text
1279 1279
  */
1280 1280
 text *
1281
-messageToText(const message *m)
1281
+messageToText(message *m)
1282 1282
 {
1283 1283
 	text *first = NULL, *last = NULL;
1284 1284
 	const text *t_line;
... ...
@@ -1301,7 +1330,8 @@ messageToText(const message *m)
1301 1301
 			   ((last->t_text = strdup(t_line->t_text)) == NULL)) {
1302 1302
 				if(last)
1303 1303
 					free(last);
1304
-				textDestroy(first);
1304
+				if(first)
1305
+					textDestroy(first);
1305 1306
 				return NULL;
1306 1307
 			}
1307 1308
 		}
... ...
@@ -1325,7 +1355,14 @@ messageToText(const message *m)
1325 1325
 			unsigned char *uptr;
1326 1326
 			const char *line = t_line->t_text;
1327 1327
 
1328
-			if(messageGetEncoding(m) == UUENCODE)
1328
+			if(messageGetEncoding(m) == BASE64) {
1329
+				/*
1330
+				 * ignore blanks - breaks RFC which is
1331
+				 * probably the point!
1332
+				 */
1333
+				if(line == NULL)
1334
+					continue;
1335
+			} else if(messageGetEncoding(m) == UUENCODE)
1329 1336
 				if(strcasecmp(line, "end") == 0)
1330 1337
 					break;
1331 1338
 
... ...
@@ -1481,7 +1518,7 @@ encodingLine(const message *m)
1481 1481
  * len is sizeof(ptr)
1482 1482
  */
1483 1483
 static unsigned char *
1484
-decodeLine(const message *m, const char *line, unsigned char *buf, size_t buflen)
1484
+decodeLine(message *m, const char *line, unsigned char *buf, size_t buflen)
1485 1485
 {
1486 1486
 	size_t len;
1487 1487
 	bool softbreak;
... ...
@@ -1513,7 +1550,7 @@ decodeLine(const message *m, const char *line, unsigned char *buf, size_t buflen
1513 1513
 				*buf++ = '\n';
1514 1514
 				break;
1515 1515
 			}
1516
-			
1516
+
1517 1517
 			softbreak = FALSE;
1518 1518
 			while(*line) {
1519 1519
 				if(*line == '=') {
... ...
@@ -1562,11 +1599,17 @@ decodeLine(const message *m, const char *line, unsigned char *buf, size_t buflen
1562 1562
 			p2 = strchr(copy, '=');
1563 1563
 			if(p2)
1564 1564
 				*p2 = '\0';
1565
+			squeeze(copy);
1566
+
1565 1567
 			/*
1566 1568
 			 * Klez doesn't always put "=" on the last line
1567 1569
 			 */
1568
-			buf = decode(copy, buf, base64, (p2 == NULL) && ((strlen(copy) & 3) == 0));
1569
-			/*buf = decode(copy, buf, base64, FALSE);*/
1570
+			buf = decode(m, copy, buf, base64, (p2 == NULL) && ((strlen(copy) & 3) == 0));
1571
+			if(p2)
1572
+				/* flush the read ahead bytes */
1573
+				buf = decode(m, NULL, buf, base64, FALSE);
1574
+
1575
+			/*buf = decode(m, copy, buf, base64, FALSE);*/
1570 1576
 
1571 1577
 			free(copy);
1572 1578
 			break;
... ...
@@ -1592,7 +1635,7 @@ decodeLine(const message *m, const char *line, unsigned char *buf, size_t buflen
1592 1592
 				 */
1593 1593
 				cli_warnmsg("uudecode: buffer overflow stopped, attempting to ignore but decoding may fail\n");
1594 1594
 			else
1595
-				buf = decode(line, buf, uudecode, (len & 3) == 0);
1595
+				buf = decode(m, line, buf, uudecode, (len & 3) == 0);
1596 1596
 			break;
1597 1597
 	}
1598 1598
 
... ...
@@ -1601,10 +1644,179 @@ decodeLine(const message *m, const char *line, unsigned char *buf, size_t buflen
1601 1601
 }
1602 1602
 
1603 1603
 /*
1604
+ * Remove the spaces from the middle of a string. Spaces shouldn't appear
1605
+ * mid string in base64 files, but some broken mail clients ignore such
1606
+ * errors rather than discarding the mail, and virus writers exploit this bug
1607
+ */
1608
+static void
1609
+squeeze(char *s)
1610
+{
1611
+	while((s = strchr(s, ' ')) != NULL)
1612
+		strcpy(s, &s[1]);
1613
+}
1614
+
1615
+/*
1604 1616
  * Returns one byte after the end of the decoded data in "out"
1617
+ *
1618
+ * Update m->base64chars with the last few bytes of data that we haven't
1619
+ * decoded. After the last line is found, decode will be called with in = NULL
1620
+ * to flush these out
1605 1621
  */
1622
+#if	1
1606 1623
 static unsigned char *
1607
-decode(const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast)
1624
+decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast)
1625
+{
1626
+	unsigned char b1, b2, b3, b4;
1627
+	unsigned char cb1, cb2, cb3;	/* carried over from last line */
1628
+
1629
+	/*cli_dbgmsg("decode %s (len %d ifFast %d base64chars %d)\n", in,
1630
+		in ? strlen(in) : 0,
1631
+		isFast, m->base64chars);*/
1632
+
1633
+	cb1 = cb2 = cb3 = '\0';
1634
+
1635
+	switch(m->base64chars) {
1636
+		case 3:
1637
+			cb3 = m->base64_3;
1638
+			/* FALLTHROUGH */
1639
+		case 2:
1640
+			cb2 = m->base64_2;
1641
+			/* FALLTHROUGH */
1642
+		case 1:
1643
+			cb1 = m->base64_1;
1644
+			isFast = FALSE;
1645
+			break;
1646
+		default:
1647
+			assert(m->base64chars <= 3);
1648
+	}
1649
+
1650
+	if(isFast)
1651
+		/* Fast decoding if not last line */
1652
+		while(*in) {
1653
+			b1 = (*decoder)(*in++);
1654
+			b2 = (*decoder)(*in++);
1655
+			b3 = (*decoder)(*in++);
1656
+			/*
1657
+			 * Put this line here to help on some compilers which
1658
+			 * can make use of some architecure's ability to
1659
+			 * multiprocess when different variables can be
1660
+			 * updated at the same time - here b3 is used in
1661
+			 * one line, b1/b2 in the next and b4 in the next after
1662
+			 * that, b3 and b4 rely on in but b1/b2 don't
1663
+			 */
1664
+			*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
1665
+			b4 = (*decoder)(*in++);
1666
+			*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
1667
+			*out++ = (b3 << 6) | (b4 & 0x3F);
1668
+		}
1669
+	else {
1670
+		if(in == NULL) {	/* flush */
1671
+			int nbytes = m->base64chars;
1672
+
1673
+			if(nbytes == 0)
1674
+				return out;
1675
+
1676
+			m->base64chars--;
1677
+			b1 = cb1;
1678
+
1679
+			if(m->base64chars) {
1680
+				m->base64chars--;
1681
+				b2 = cb2;
1682
+
1683
+				if(m->base64chars) {
1684
+					m->base64chars--;
1685
+					b3 = cb3;
1686
+					assert(m->base64chars == 0);
1687
+				}
1688
+			}
1689
+
1690
+			switch(nbytes) {
1691
+				case 3:
1692
+					b4 = '\0';
1693
+					/* fall through */
1694
+				case 4:
1695
+					*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
1696
+					*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
1697
+					*out++ = (b3 << 6) | (b4 & 0x3F);
1698
+					break;
1699
+				case 2:
1700
+					*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
1701
+					*out++ = b2 << 4;
1702
+					break;
1703
+				case 1:
1704
+					*out++ = b1 << 2;
1705
+					break;
1706
+				default:
1707
+					assert(0);
1708
+			}
1709
+
1710
+		} else while(*in) {
1711
+		/* Slower decoding for last line */
1712
+			int nbytes;
1713
+
1714
+			if(m->base64chars) {
1715
+				m->base64chars--;
1716
+				b1 = cb1;
1717
+			} else
1718
+				b1 = (*decoder)(*in++);
1719
+
1720
+			if(*in == '\0') {
1721
+				b2 = '\0';
1722
+				nbytes = 1;
1723
+			} else {
1724
+				if(m->base64chars) {
1725
+					m->base64chars--;
1726
+					b2 = cb2;
1727
+				} else
1728
+					b2 = (*decoder)(*in++);
1729
+
1730
+				if(*in == '\0') {
1731
+					b3 = '\0';
1732
+					nbytes = 2;
1733
+				} else {
1734
+					if(m->base64chars) {
1735
+						m->base64chars--;
1736
+						b3 = cb3;
1737
+					} else
1738
+						b3 = (*decoder)(*in++);
1739
+
1740
+					if(*in == '\0') {
1741
+						b4 = '\0';
1742
+						nbytes = 3;
1743
+					} else {
1744
+						b4 = (*decoder)(*in++);
1745
+						nbytes = 4;
1746
+					}
1747
+				}
1748
+			}
1749
+
1750
+			switch(nbytes) {
1751
+				case 3:
1752
+					m->base64_3 = b3;
1753
+				case 2:
1754
+					m->base64_2 = b2;
1755
+				case 1:
1756
+					m->base64_1 = b1;
1757
+					break;
1758
+				case 4:
1759
+					*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
1760
+					*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
1761
+					*out++ = (b3 << 6) | (b4 & 0x3F);
1762
+					break;
1763
+				default:
1764
+					assert(0);
1765
+			}
1766
+			if(nbytes != 4) {
1767
+				m->base64chars = nbytes;
1768
+				break;
1769
+			}
1770
+		}
1771
+	}
1772
+	return out;
1773
+}
1774
+#else
1775
+static unsigned char *
1776
+decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast)
1608 1777
 {
1609 1778
 	unsigned char b1, b2, b3, b4;
1610 1779
 
... ...
@@ -1679,6 +1891,7 @@ decode(const char *in, unsigned char *out, unsigned char (*decoder)(char), bool
1679 1679
 		}
1680 1680
 	return out;
1681 1681
 }
1682
+#endif
1682 1683
 
1683 1684
 static unsigned char
1684 1685
 hex(char c)
... ...
@@ -16,6 +16,9 @@
16 16
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 17
  *
18 18
  * $Log: message.h,v $
19
+ * Revision 1.12  2004/07/20 14:35:29  nigelhorne
20
+ * Some MYDOOM.I were getting through
21
+ *
19 22
  * Revision 1.11  2004/05/06 18:01:25  nigelhorne
20 23
  * Force attachments marked as RFC822 messages to be scanned
21 24
  *
... ...
@@ -53,6 +56,9 @@ typedef struct message {
53 53
 	char	**mimeArguments;
54 54
 	char	*mimeDispositionType;	/* probably attachment */
55 55
 	text	*body_first, *body_last;
56
+
57
+	char	base64_1, base64_2, base64_3;
58
+	int	base64chars;
56 59
 	/*
57 60
 	 * Markers for the start of various non MIME messages that could
58 61
 	 * be included within this message
... ...
@@ -82,7 +88,7 @@ int	messageAddLineAtTop(message *m, const char *line);
82 82
 const	text	*messageGetBody(const message *m);
83 83
 void	messageClean(message *m);
84 84
 blob	*messageToBlob(message *m);
85
-text	*messageToText(const message *m);
85
+text	*messageToText(message *m);
86 86
 const	text	*uuencodeBegin(const message *m);
87 87
 const	text	*binhexBegin(const message *m);
88 88
 const	text	*bounceBegin(const message *m);
... ...
@@ -16,6 +16,9 @@
16 16
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 17
  *
18 18
  * $Log: text.c,v $
19
+ * Revision 1.8  2004/07/20 14:35:29  nigelhorne
20
+ * Some MYDOOM.I were getting through
21
+ *
19 22
  * Revision 1.7  2004/06/22 04:08:02  nigelhorne
20 23
  * Optimise empty lines
21 24
  *
... ...
@@ -30,7 +33,7 @@
30 30
  *
31 31
  */
32 32
 
33
-static	char	const	rcsid[] = "$Id: text.c,v 1.7 2004/06/22 04:08:02 nigelhorne Exp $";
33
+static	char	const	rcsid[] = "$Id: text.c,v 1.8 2004/07/20 14:35:29 nigelhorne Exp $";
34 34
 
35 35
 #if HAVE_CONFIG_H
36 36
 #include "clamav-config.h"
... ...
@@ -207,7 +210,7 @@ textAdd(text *t_head, const text *t)
207 207
  * Add a message's content to the end of the current object
208 208
  */
209 209
 text *
210
-textAddMessage(text *aText, const message *aMessage)
210
+textAddMessage(text *aText, message *aMessage)
211 211
 {
212 212
 	assert(aMessage != NULL);
213 213
 
... ...
@@ -16,6 +16,9 @@
16 16
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 17
  *
18 18
  * $Log: text.h,v $
19
+ * Revision 1.4  2004/07/20 14:35:29  nigelhorne
20
+ * Some MYDOOM.I were getting through
21
+ *
19 22
  * Revision 1.3  2004/06/22 04:08:02  nigelhorne
20 23
  * Optimise empty lines
21 24
  *
... ...
@@ -32,5 +35,5 @@ void	textDestroy(text *t_head);
32 32
 text	*textClean(text *t_head);
33 33
 text	*textCopy(const text *t_head);
34 34
 text	*textAdd(text *t_head, const text *t);
35
-text	*textAddMessage(text *aText, const message *aMessage);
35
+text	*textAddMessage(text *aText, message *aMessage);
36 36
 blob	*textToBlob(const text *t, blob *b);