Browse code

Work on the NEW_WORLD Code

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@1473 77e5149b-7576-45b1-b177-96237e5ba77b

Nigel Horne authored on 2005/04/13 18:10:29
Showing 1 changed files
... ...
@@ -15,7 +15,7 @@
15 15
  *  along with this program; if not, write to the Free Software
16 16
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 17
  */
18
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.236 2005/04/07 16:38:37 nigelhorne Exp $";
18
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.237 2005/04/13 09:10:29 nigelhorne Exp $";
19 19
 
20 20
 #if HAVE_CONFIG_H
21 21
 #include "clamav-config.h"
... ...
@@ -329,6 +329,10 @@ struct scanlist {
329 329
  * FIXME: Some mailbox scans are slower with this method. I suspect that it's
330 330
  * because the scan can proceed to the end of the file rather than the end
331 331
  * of the attachment which can mean than later emails are scanned many times
332
+ *
333
+ * TODO: Also all those pmemstr()s are slow, so we need to reduce the number
334
+ *	and size of data scanned each time, and we fall through to
335
+ *	cli_parse_mbox() too often
332 336
  */
333 337
 int
334 338
 cli_mbox(const char *dir, int desc, unsigned int options)
... ...
@@ -402,8 +406,10 @@ cli_mbox(const char *dir, int desc, unsigned int options)
402 402
 			scanelem->size = (size_t)(p - q);
403 403
 			q = p;
404 404
 			s -= scanelem->size;
405
-		} else
406
-			s = scanelem->size = (size_t)(last - scanelem->start) + 1;
405
+		} else {
406
+			scanelem->size = (size_t)(last - scanelem->start) + 1;
407
+			break;
408
+		}
407 409
 		cli_dbgmsg("base64: last %u q %u s %u\n", (unsigned int)last, (unsigned int)q, s);
408 410
 		assert(scanelem->size <= size);
409 411
 		assert(&q[s - 1] <= last);
... ...
@@ -411,6 +417,20 @@ cli_mbox(const char *dir, int desc, unsigned int options)
411 411
 	q = start;
412 412
 	s = size;
413 413
 	while((p = (char *)cli_pmemstr(q, s, "quoted-printable", 16)) != NULL) {
414
+		if(p != q)
415
+			switch(p[-1]) {
416
+				case ' ':
417
+				case ':':
418
+				case '=':	/* wrong but allow it */
419
+					break;
420
+				default:
421
+					s -= (p - q) + 16;
422
+					q = &p[16];
423
+					cli_dbgmsg("Ignore quoted-printable false positive\n");
424
+					cli_dbgmsg("s = %u\n", s);
425
+					continue;	/* false positive */
426
+			}
427
+			
414 428
 		cli_dbgmsg("Found quoted-printable\n");
415 429
 		if(scanelem) {
416 430
 			scanelem->next = cli_malloc(sizeof(struct scanlist));
... ...
@@ -428,8 +448,11 @@ cli_mbox(const char *dir, int desc, unsigned int options)
428 428
 			scanelem->size = (size_t)(p - q);
429 429
 			q = p;
430 430
 			s -= scanelem->size;
431
-		} else
432
-			s = scanelem->size = (size_t)(last - scanelem->start) + 1;
431
+			cli_dbgmsg("qp: scanelem->size = %u\n", scanelem->size);
432
+		} else {
433
+			scanelem->size = (size_t)(last - scanelem->start) + 1;
434
+			break;
435
+		}
433 436
 		assert(scanelem->size <= size);
434 437
 		assert(&q[s - 1] <= last);
435 438
 	}
... ...
@@ -437,6 +460,7 @@ cli_mbox(const char *dir, int desc, unsigned int options)
437 437
 	if(scanlist == NULL) {
438 438
 		const struct tableinit *tableinit;
439 439
 		bool anyHeadersFound = FALSE;
440
+		bool hasuuencode = FALSE;
440 441
 
441 442
 		/* FIXME: message: There could of course be no decoder needed... */
442 443
 		for(tableinit = rfc821headers; tableinit->key; tableinit++)
... ...
@@ -445,15 +469,21 @@ cli_mbox(const char *dir, int desc, unsigned int options)
445 445
 				break;
446 446
 			}
447 447
 
448
+		if((!anyHeadersFound) && cli_pmemstr(start, size, "\nbegin ", 7))
449
+			/* uuencoded part */
450
+			hasuuencode = TRUE;
451
+
448 452
 		if(wasAlloced)
449 453
 			free(start);
450 454
 		else
451 455
 			munmap(start, size);
452 456
 
453
-		if(anyHeadersFound) {
454
-			cli_warnmsg("cli_mbox: unknown encoder\n");
457
+		if(anyHeadersFound || hasuuencode) {
458
+			/* TODO: reduce the number of falls through here */
459
+			cli_warnmsg("cli_mbox: uuencode or unknown encoder\n");
455 460
 			return cli_parse_mbox(dir, desc, options);
456 461
 		}
462
+
457 463
 		cli_warnmsg("cli_mbox: I believe it's plain text which must be clean\n");
458 464
 		return CL_CLEAN;
459 465
 	}
... ...
@@ -489,7 +519,7 @@ cli_mbox(const char *dir, int desc, unsigned int options)
489 489
 			}
490 490
 
491 491
 			if(b64size > 0L)
492
-				while(!isalnum(*b64start)) {
492
+				while((!isalnum(*b64start)) && (*b64start != '/')) {
493 493
 					if(b64size-- == 0L)
494 494
 						break;
495 495
 					b64start++;
... ...
@@ -505,7 +535,7 @@ cli_mbox(const char *dir, int desc, unsigned int options)
505 505
 					return CL_EMEM;
506 506
 				messageSetEncoding(m, "base64");
507 507
 
508
-				while(b64size > 0L) {
508
+				do {
509 509
 					int length = 0;
510 510
 
511 511
 					/*printf("%ld: ", b64size); fflush(stdout);*/
... ...
@@ -535,7 +565,8 @@ cli_mbox(const char *dir, int desc, unsigned int options)
535 535
 					--b64size;
536 536
 					if(strchr(line, '='))
537 537
 						break;
538
-				}
538
+				} while(b64size > 0L);
539
+
539 540
 				free(line);
540 541
 				fb = messageToFileblob(m, dir);
541 542
 				messageDestroy(m);
... ...
@@ -589,7 +620,7 @@ cli_mbox(const char *dir, int desc, unsigned int options)
589 589
 
590 590
 				line = NULL;
591 591
 
592
-				while(quotedsize > 0L) {
592
+				do {
593 593
 					int length = 0;
594 594
 
595 595
 					/*printf("%ld: ", quotedsize); fflush(stdout);*/
... ...
@@ -617,7 +648,8 @@ cli_mbox(const char *dir, int desc, unsigned int options)
617 617
 					}
618 618
 					quotedstart = ++ptr;
619 619
 					--quotedsize;
620
-				}
620
+				} while(quotedsize > 0L);
621
+
621 622
 				free(line);
622 623
 				fb = messageToFileblob(m, dir);
623 624
 				messageDestroy(m);
... ...
@@ -650,6 +682,7 @@ cli_mbox(const char *dir, int desc, unsigned int options)
650 650
 		return CL_CLEAN;	/* a lie - but it gets things going */
651 651
 
652 652
 	/* Fall back for now */
653
+	lseek(desc, 0L, SEEK_SET);
653 654
 	return cli_parse_mbox(dir, desc, options);
654 655
 }
655 656
 #else