Browse code

NEW_WORLD: Catch phishes

git-svn: trunk@2012

Nigel Horne authored on 2006/06/07 21:30:09
Showing 2 changed files
... ...
@@ -1,3 +1,7 @@
1
+Wed Jun  7 13:29:22 BST 2006 (njh)
2
+----------------------------------
3
+  * libclamav/mbox.c:	NEW_WORLD: Fast phish detection.
4
+
1 5
 Wed Jun  7 12:11:55 CEST 2006 (tk)
2 6
 ----------------------------------
3 7
   * libclamav/sis.c: fix compilation error on Cygwin, reported by NJH
... ...
@@ -16,7 +16,7 @@
16 16
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17 17
  *  MA 02110-1301, USA.
18 18
  */
19
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.310 2006/06/06 21:22:00 njh Exp $";
19
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.311 2006/06/07 12:28:49 njh Exp $";
20 20
 
21 21
 #if HAVE_CONFIG_H
22 22
 #include "clamav-config.h"
... ...
@@ -298,6 +298,8 @@ static	pthread_mutex_t	tables_mutex = PTHREAD_MUTEX_INITIALIZER;
298 298
 
299 299
 #ifdef	NEW_WORLD
300 300
 
301
+#include "matcher.h"
302
+
301 303
 #undef	PARTIAL_DIR
302 304
 
303 305
 #if HAVE_MMAP
... ...
@@ -335,6 +337,7 @@ static struct map {
335 335
 	struct	map	*next;
336 336
 } *map, *tail;
337 337
 
338
+static	int	save_text(cli_ctx *ctx, const char *dir, const char *start, size_t len);
338 339
 static	void	create_map(const char *begin, const char *end);
339 340
 static	void	add_to_map(const char *offset, const char *word);
340 341
 static	const	char	*find_in_map(const char *offset, const char *word);
... ...
@@ -365,8 +368,6 @@ static	void	free_map(void);
365 365
  * TODO: Add support for systems without mmap()
366 366
  *
367 367
  * TODO: partial_dir fall through
368
- *
369
- * FIXME:	Doesn't catch all phishes
370 368
  */
371 369
 int
372 370
 cli_mbox(const char *dir, int desc, cli_ctx *ctx)
... ...
@@ -377,7 +378,7 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx)
377 377
 	struct stat statb;
378 378
 	message *m;
379 379
 	fileblob *fb;
380
-	int ret = 0;
380
+	int ret = CL_CLEAN;
381 381
 	int wasAlloced;
382 382
 	struct scanlist *scanlist, *scanelem;
383 383
 
... ...
@@ -516,9 +517,24 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx)
516 516
 				break;
517 517
 			}
518 518
 
519
-		if((!anyHeadersFound) && find_in_map(start, "\nbegin "))
519
+		if((!anyHeadersFound) &&
520
+		   ((p = find_in_map(start, "\nbegin ")) != NULL) &&
521
+		   (isuuencodebegin(++p)))
520 522
 			/* uuencoded part */
521 523
 			hasuuencode = TRUE;
524
+		else {
525
+			cli_dbgmsg("Nothing encoded, looking for a text part to save\n");
526
+			ret = save_text(ctx, dir, start, size);
527
+			if(wasAlloced)
528
+				free(start);
529
+			else
530
+				munmap(start, size);
531
+
532
+			free_map();
533
+			if(ret != CL_EFORMAT)
534
+				return ret;
535
+			ret = CL_CLEAN;
536
+		}
522 537
 
523 538
 		free_map();
524 539
 
... ...
@@ -536,15 +552,17 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx)
536 536
 		if(anyHeadersFound || hasuuencode) {
537 537
 			/* TODO: reduce the number of falls through here */
538 538
 			if(hasuuencode)
539
-				cli_dbgmsg("New world - fall back to old uudecoder, type %d\n", type);
539
+				/* TODO: fast track visa */
540
+				cli_warnmsg("New world - fall back to old uudecoder\n");
540 541
 			else
541
-				cli_dbgmsg("cli_mbox: unknown encoder, type %d\n", type);
542
+				cli_warnmsg("cli_mbox: unknown encoder, type %d\n", type);
542 543
 			if(type == CL_TYPE_MAIL)
543 544
 				return cli_parse_mbox(dir, desc, ctx);
544 545
 			cli_dbgmsg("Unknown filetype %d, return CLEAN\n", type);
545 546
 			return CL_CLEAN;
546 547
 		}
547 548
 
549
+#if	0	/* I don't believe this is needed any more */
548 550
 		/*
549 551
 		 * The message could be a plain text phish
550 552
 		 * FIXME: Can't get to the option whether we are looking for
... ...
@@ -555,12 +573,11 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx)
555 555
 		 */
556 556
 		if(type == CL_TYPE_MAIL)
557 557
 			return cli_parse_mbox(dir, desc, ctx);
558
+#endif
558 559
 		cli_dbgmsg("cli_mbox: I believe it's plain text (type == %d) which must be clean\n",
559 560
 			type);
560 561
 		return CL_CLEAN;
561 562
 	}
562
-	free_map();
563
-
564 563
 #if	0
565 564
 	if(wasAlloced) {
566 565
 		const char *max = NULL;
... ...
@@ -640,6 +657,7 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx)
640 640
 
641 641
 				cli_dbgmsg("cli_mbox: decoding %ld base64 bytes\n", b64size);
642 642
 				if((fb = fileblobCreate()) == NULL) {
643
+					free_map();
643 644
 					if(wasAlloced)
644 645
 						free(start);
645 646
 					else
... ...
@@ -649,7 +667,8 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx)
649 649
 				}
650 650
 
651 651
 				tmpfilename = cli_gentemp(dir);
652
-				if(tmpfilename == 0) {
652
+				if(tmpfilename == NULL) {
653
+					free_map();
653 654
 					if(wasAlloced)
654 655
 						free(start);
655 656
 					else
... ...
@@ -665,6 +684,7 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx)
665 665
 
666 666
 				m = messageCreate();
667 667
 				if(m == NULL) {
668
+					free_map();
668 669
 					if(wasAlloced)
669 670
 						free(start);
670 671
 					else
... ...
@@ -809,6 +829,7 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx)
809 809
 
810 810
 				m = messageCreate();
811 811
 				if(m == NULL) {
812
+					free_map();
812 813
 					if(wasAlloced)
813 814
 						free(start);
814 815
 					else
... ...
@@ -871,6 +892,18 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx)
871 871
 	}
872 872
 	scanelem = scanlist;
873 873
 
874
+	/*
875
+	 * There could be a phish in the plain text part, so save that
876
+	 * FIXME: Can't get to the option whether we are looking for
877
+	 *	phishes or not, so assume we are, this slows things a
878
+	 *	lot
879
+	 * Should be
880
+	 *	if((type == CL_TYPE_MAIL) && (!(no-phishing))
881
+	 */
882
+	ret = save_text(ctx, dir, start, size);
883
+
884
+	free_map();
885
+
874 886
 	while(scanelem) {
875 887
 		struct scanlist *n = scanelem->next;
876 888
 
... ...
@@ -887,15 +920,84 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx)
887 887
 	 * FIXME: Need to run cl_scandir() here and return that value
888 888
 	 */
889 889
 	cli_dbgmsg("cli_mbox: ret = %d\n", ret);
890
-	if(ret == 0)
891
-		return CL_SUCCESS;
890
+	if(ret != CL_EFORMAT)
891
+		return ret;
892 892
 
893
-	cli_dbgmsg("New world - don't know what to do - fall back to old world\n");
893
+	cli_warnmsg("New world - don't know what to do - fall back to old world\n");
894 894
 	/* Fall back for now */
895 895
 	lseek(desc, 0L, SEEK_SET);
896 896
 	return cli_parse_mbox(dir, desc, ctx);
897 897
 }
898 898
 
899
+/*
900
+ * Save a text part - it could contain phish or jscript
901
+ */
902
+static int
903
+save_text(cli_ctx *ctx, const char *dir, const char *start, size_t len)
904
+{
905
+	const char *p;
906
+
907
+	if((p = find_in_map(start, "\n\n")) || (p = find_in_map(start, "\r\n\r\n"))) {
908
+		const char *q;
909
+		fileblob *fb;
910
+		char *tmpfilename;
911
+
912
+		if(((q = find_in_map(start, "base64")) == NULL) &&
913
+		   ((q = find_in_map(start, "quoted_printable")) == NULL)) {
914
+			cli_dbgmsg("It's all plain text!\n");
915
+			if(*p == '\r')
916
+				p += 4;
917
+			else
918
+				p += 2;
919
+			len -= (p - start);
920
+		} else if(((q = find_in_map(p, "\nFrom ")) == NULL) &&
921
+		   ((q = find_in_map(p, "base64")) == NULL) &&
922
+		   ((q = find_in_map(p, "quoted-printable")) == NULL))
923
+			cli_dbgmsg("Can't find end of plain text - assume it's all\n");
924
+		else
925
+			len = (size_t)(q - p);
926
+
927
+		if(len < 5) {
928
+			cli_dbgmsg("save_text: Too small\n");
929
+			return CL_EFORMAT;
930
+		}
931
+		if(ctx->scanned)
932
+			*ctx->scanned += len / CL_COUNT_PRECISION;
933
+
934
+		/*
935
+		 * This doesn't work, cli_scanbuff isn't designed to be used
936
+		 *	in this way. It gets the "filetype" wrong and then
937
+		 *	doesn't scan correctly
938
+		 */
939
+		if(cli_scanbuff((char *)p, len, ctx->virname, ctx->engine, 0) == CL_VIRUS) {
940
+			cli_dbgmsg("save_text: found %s\n", *ctx->virname);
941
+			return CL_VIRUS;
942
+		}
943
+
944
+		fb = fileblobCreate();
945
+		if(fb == NULL)
946
+			return CL_EMEM;
947
+
948
+		tmpfilename = cli_gentemp(dir);
949
+
950
+		if(tmpfilename == NULL) {
951
+			fileblobDestroy(fb);
952
+			return CL_ETMPFILE;
953
+		}
954
+		cli_dbgmsg("save plain bit to %s, %u bytes\n",
955
+			tmpfilename, len);
956
+
957
+		fileblobSetFilename(fb, dir, tmpfilename);
958
+		free(tmpfilename);
959
+
960
+		(void)fileblobAddData(fb, (const unsigned char *)p, len);
961
+		fileblobDestroy(fb);
962
+		return CL_SUCCESS;
963
+	}
964
+	cli_dbgmsg("No text part found to save\n");
965
+	return CL_EFORMAT;
966
+}
967
+
899 968
 static void
900 969
 create_map(const char *begin, const char *end)
901 970
 {
... ...
@@ -906,7 +1008,9 @@ create_map(const char *begin, const char *end)
906 906
 		{	"base64",		6	},
907 907
 		{	"quoted-printable",	16	},
908 908
 		{	"\nbegin ",		7	},
909
-		{	"\nFrom ",		7	},
909
+		{	"\nFrom ",		6	},
910
+		{	"\n\n",			2	},
911
+		{	"\r\n\r\n",		4	},
910 912
 		{	NULL,			0	}
911 913
 	};
912 914