Browse code

New world code now finds all samples

git-svn: trunk@1796

Nigel Horne authored on 2006/01/03 02:40:29
Showing 2 changed files
... ...
@@ -1,3 +1,9 @@
1
+Mon Jan  2 17:38:35 GMT 2006 (njh)
2
+----------------------------------
3
+  * libclamav/mbox.c:	NEW_WORLD code (not enabled by default) now finds all
4
+  				malware in my database and is now ready for
5
+				testing
6
+
1 7
 Wed Dec 28 13:49:46 GMT 2005 (njh)
2 8
 ----------------------------------
3 9
   * libclamav/mbox.c:	Added patches by Gianluigi Tiesi <sherpya*netfarm.it>
... ...
@@ -15,7 +15,7 @@
15 15
  *  along with this program; if not, write to the Free Software
16 16
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 17
  */
18
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.265 2005/12/28 13:49:36 nigelhorne Exp $";
18
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.266 2006/01/02 17:37:57 nigelhorne Exp $";
19 19
 
20 20
 #if HAVE_CONFIG_H
21 21
 #include "clamav-config.h"
... ...
@@ -300,6 +300,8 @@ static	pthread_mutex_t	tables_mutex = PTHREAD_MUTEX_INITIALIZER;
300 300
 
301 301
 #ifdef	NEW_WORLD
302 302
 
303
+#undef	PARTIAL_DIR
304
+
303 305
 #if HAVE_MMAP
304 306
 #if HAVE_SYS_MMAN_H
305 307
 #include <sys/mman.h>
... ...
@@ -338,6 +340,10 @@ struct scanlist {
338 338
  * TODO: Also all those pmemstr()s are slow, so we need to reduce the number
339 339
  *	and size of data scanned each time, and we fall through to
340 340
  *	cli_parse_mbox() too often
341
+ *
342
+ * FIXME: Fall through on systems without mmap()
343
+ *
344
+ * TODO: partial_dir fall through
341 345
  */
342 346
 int
343 347
 cli_mbox(const char *dir, int desc, unsigned int options)
... ...
@@ -367,7 +373,7 @@ cli_mbox(const char *dir, int desc, unsigned int options)
367 367
 	if(size > 10*1024*1024)
368 368
 		return cli_parse_mbox(dir, desc, options);	/* should be StreamMaxLength, I guess */
369 369
 
370
-	cli_warnmsg("NEW_WORLD is new code - use at your own risk.\n");
370
+	/*cli_warnmsg("NEW_WORLD is new code - use at your own risk.\n");*/
371 371
 #ifdef	PARTIAL_DIR
372 372
 	cli_warnmsg("PARTIAL_DIR doesn't work in the NEW_WORLD yet\n");
373 373
 #endif
... ...
@@ -422,6 +428,7 @@ cli_mbox(const char *dir, int desc, unsigned int options)
422 422
 		assert(scanelem->size <= size);
423 423
 		assert(&q[s - 1] <= last);
424 424
 	}
425
+
425 426
 	q = start;
426 427
 	s = size;
427 428
 	while((p = (char *)cli_pmemstr(q, s, "quoted-printable", 16)) != NULL) {
... ...
@@ -440,6 +447,11 @@ cli_mbox(const char *dir, int desc, unsigned int options)
440 440
 			}
441 441
 
442 442
 		cli_dbgmsg("Found quoted-printable\n");
443
+#ifdef	notdef
444
+		/*
445
+		 * The problem with quoted printable is recognising when to stop
446
+		 * parsing
447
+		 */
443 448
 		if(scanelem) {
444 449
 			scanelem->next = cli_malloc(sizeof(struct scanlist));
445 450
 			scanelem = scanelem->next;
... ...
@@ -463,12 +475,21 @@ cli_mbox(const char *dir, int desc, unsigned int options)
463 463
 		}
464 464
 		assert(scanelem->size <= size);
465 465
 		assert(&q[s - 1] <= last);
466
+#else
467
+		if(wasAlloced)
468
+			free(start);
469
+		else
470
+			munmap(start, size);
471
+
472
+		return cli_parse_mbox(dir, desc, options);
473
+#endif
466 474
 	}
467 475
 
468 476
 	if(scanlist == NULL) {
469 477
 		const struct tableinit *tableinit;
470 478
 		bool anyHeadersFound = FALSE;
471 479
 		bool hasuuencode = FALSE;
480
+		cli_file_t type;
472 481
 
473 482
 		/* FIXME: message: There could of course be no decoder needed... */
474 483
 		for(tableinit = rfc821headers; tableinit->key; tableinit++)
... ...
@@ -481,6 +502,12 @@ cli_mbox(const char *dir, int desc, unsigned int options)
481 481
 			/* uuencoded part */
482 482
 			hasuuencode = TRUE;
483 483
 
484
+		type = cli_filetype(start, size);
485
+
486
+		if((type == CL_TYPE_UNKNOWN_TEXT) &&
487
+		   (strncmp(start, "Microsoft Mail Internet Headers", 31) == 0))
488
+		   	type = CL_TYPE_MAIL;
489
+
484 490
 		if(wasAlloced)
485 491
 			free(start);
486 492
 		else
... ...
@@ -488,11 +515,17 @@ cli_mbox(const char *dir, int desc, unsigned int options)
488 488
 
489 489
 		if(anyHeadersFound || hasuuencode) {
490 490
 			/* TODO: reduce the number of falls through here */
491
-			cli_warnmsg("cli_mbox: uuencode or unknown encoder\n");
492
-			return cli_parse_mbox(dir, desc, options);
491
+			if(hasuuencode)
492
+				cli_dbgmsg("New world - fall back to old uudecoder, type %d\n", type);
493
+			else
494
+				cli_dbgmsg("cli_mbox: unknown encoder, type %d\n", type);
495
+			if(type == CL_TYPE_MAIL)
496
+				return cli_parse_mbox(dir, desc, options);
497
+			cli_dbgmsg("Unknown filetype %d, return CLEAN\n", type);
498
+			return CL_CLEAN;
493 499
 		}
494 500
 
495
-		cli_warnmsg("cli_mbox: I believe it's plain text which must be clean\n");
501
+		cli_dbgmsg("cli_mbox: I believe it's plain text which must be clean\n");
496 502
 		return CL_CLEAN;
497 503
 	}
498 504
 
... ...
@@ -522,6 +555,24 @@ cli_mbox(const char *dir, int desc, unsigned int options)
522 522
 					b64start += 4;
523 523
 					b64size -= 4;
524 524
 					break;
525
+				} else if(memcmp(b64start, "\n \n", 3) == 0) {
526
+					/*
527
+					 * Some viruses are broken and have
528
+					 * one space character at the end of
529
+					 * the headers
530
+					 */
531
+					b64start += 3;
532
+					b64size -= 3;
533
+					break;
534
+				} else if(memcmp(b64start, "\r\n \r\n", 5) == 0) {
535
+					/*
536
+					 * Some viruses are broken and have
537
+					 * one space character at the end of
538
+					 * the headers
539
+					 */
540
+					b64start += 5;
541
+					b64size -= 5;
542
+					break;
525 543
 				}
526 544
 				b64start++;
527 545
 				b64size--;
... ...
@@ -541,8 +592,14 @@ cli_mbox(const char *dir, int desc, unsigned int options)
541 541
 				line = NULL;
542 542
 
543 543
 				m = messageCreate();
544
-				if(m == NULL)
544
+				if(m == NULL) {
545
+					if(wasAlloced)
546
+						free(start);
547
+					else
548
+						munmap(start, size);
549
+
545 550
 					return CL_EMEM;
551
+				}
546 552
 				messageSetEncoding(m, "base64");
547 553
 
548 554
 				lastline = 0;
... ...
@@ -637,8 +694,14 @@ cli_mbox(const char *dir, int desc, unsigned int options)
637 637
 				cli_dbgmsg("cli_mbox: decoding %ld quoted-printable bytes\n", quotedsize);
638 638
 
639 639
 				m = messageCreate();
640
-				if(m == NULL)
640
+				if(m == NULL) {
641
+					if(wasAlloced)
642
+						free(start);
643
+					else
644
+						munmap(start, size);
645
+
641 646
 					return CL_EMEM;
647
+				}
642 648
 				messageSetEncoding(m, "quoted-printable");
643 649
 
644 650
 				line = NULL;
... ...
@@ -710,6 +773,7 @@ cli_mbox(const char *dir, int desc, unsigned int options)
710 710
 	if(ret == 0)
711 711
 		return CL_CLEAN;	/* a lie - but it gets things going */
712 712
 
713
+	cli_dbgmsg("New world - don't know what to do - fall back to old world\n");
713 714
 	/* Fall back for now */
714 715
 	lseek(desc, 0L, SEEK_SET);
715 716
 	return cli_parse_mbox(dir, desc, options);