Browse code

Tidy and add mmap test code

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@1190 77e5149b-7576-45b1-b177-96237e5ba77b

Nigel Horne authored on 2004/12/17 00:29:08
Showing 1 changed files
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: mbox.c,v $
20
+ * Revision 1.201  2004/12/16 15:29:08  nigelhorne
21
+ * Tidy and add mmap test code
22
+ *
20 23
  * Revision 1.200  2004/12/07 23:08:10  nigelhorne
21 24
  * Fix empty content-type in multipart header
22 25
  *
... ...
@@ -588,7 +591,7 @@
588 588
  * Compilable under SCO; removed duplicate code with message.c
589 589
  *
590 590
  */
591
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.200 2004/12/07 23:08:10 nigelhorne Exp $";
591
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.201 2004/12/16 15:29:08 nigelhorne Exp $";
592 592
 
593 593
 #if HAVE_CONFIG_H
594 594
 #include "clamav-config.h"
... ...
@@ -727,6 +730,7 @@ typedef enum	{ FALSE = 0, TRUE = 1 } bool;
727 727
  */
728 728
 #define	PARTIAL_DIR
729 729
 
730
+static	int	cli_parse_mbox(const char *dir, int desc, unsigned int options);
730 731
 static	message	*parseEmailHeaders(const message *m, const table_t *rfc821Table);
731 732
 static	int	parseEmailHeader(message *m, const char *line, const table_t *rfc821Table);
732 733
 static	int	parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t *rfc821Table, const table_t *subtypeTable, unsigned int options);
... ...
@@ -743,6 +747,9 @@ static	char	*rfc822comments(const char *in);
743 743
 #ifdef	PARTIAL_DIR
744 744
 static	int	rfc1341(message *m, const char *dir);
745 745
 #endif
746
+#ifdef	notdef
747
+static	const	char	*cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns);
748
+#endif
746 749
 
747 750
 static	void	checkURLs(message *m, const char *dir);
748 751
 #ifdef	WITH_CURL
... ...
@@ -863,6 +870,211 @@ static	pthread_mutex_t	tables_mutex = PTHREAD_MUTEX_INITIALIZER;
863 863
 #define	O_BINARY	0
864 864
 #endif
865 865
 
866
+#ifdef	notdef
867
+
868
+#if HAVE_MMAP
869
+#if HAVE_SYS_MMAN_H
870
+#include <sys/mman.h>
871
+#else /* HAVE_SYS_MMAN_H */
872
+#undef HAVE_MMAP
873
+#endif
874
+#endif
875
+
876
+/*
877
+ * This could be the future. Instead of parsing and decoding it just decodes.
878
+ * Currently this code is about 50% *slower* than the full blown parser, but
879
+ * that may improve with time.
880
+ * USE IT AT YOUR PERIL, a large number of viruses are not detected with this
881
+ * method, possibly because the decoded files must be exact and not have
882
+ * extra data at the start or end, which this code will produce.
883
+ */
884
+int
885
+cli_mbox(const char *dir, int desc, unsigned int options)
886
+{
887
+	char *start, *ptr, *line, *buf, *p;
888
+	const char *last;
889
+	int decoder;
890
+	long bytesleft;
891
+	size_t size, headersize, bodysize;
892
+	struct stat statb;
893
+	message *m;
894
+	fileblob *fb;
895
+
896
+	if(fstat(desc, &statb) < 0)
897
+		return CL_EOPEN;
898
+
899
+	size = statb.st_size;
900
+
901
+	if(size == 0)
902
+		return CL_CLEAN;
903
+
904
+	m = messageCreate();
905
+	if(m == NULL)
906
+		return CL_EMEM;
907
+
908
+	start = ptr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, 0);
909
+	if(ptr == MAP_FAILED) {
910
+		messageDestroy(m);
911
+		return CL_EMEM;
912
+	}
913
+
914
+	cli_dbgmsg("mmap'ed mbox\n");
915
+
916
+	last = &start[size - 1];
917
+
918
+	/*
919
+	 * Look for the end of the headers
920
+	 */
921
+	while(ptr < last) {
922
+		if(*ptr == ';')
923
+			ptr++;
924
+		else if(*ptr == '\n') {
925
+			ptr++;
926
+			if((*ptr == '\n') || (*ptr == '\r')) {
927
+				ptr++;
928
+				break;
929
+			}
930
+		}
931
+		ptr++;
932
+	}
933
+	if(ptr >= last) {
934
+		cli_dbgmsg("cli_mbox: can't determine the end of the headers\n");
935
+		messageDestroy(m);
936
+		munmap(start, size);
937
+		return cli_parse_mbox(dir, desc, options);
938
+	}
939
+
940
+	headersize = (size_t)(ptr - start);
941
+	bodysize = size - headersize;
942
+
943
+	cli_dbgmsg("cli_mbox: size=%u, headersize=%u, bodysize=%u\n", size, headersize, bodysize);
944
+
945
+	bytesleft = bodysize;
946
+	buf = ptr;
947
+	decoder = 0;
948
+
949
+	/* Would be nice to have a case insensitive cli_memstr() */
950
+	if(cli_pmemstr(start, headersize, "base64", 6)) {
951
+		cli_dbgmsg("Header base64\n");
952
+		decoder |= 1;
953
+	}
954
+	if(cli_pmemstr(start, headersize, "quoted-printable", 16)) {
955
+		cli_dbgmsg("Header quoted-printable\n");
956
+		decoder |= 2;
957
+	}
958
+	if(!(decoder&1))
959
+		if((p = (char *)cli_pmemstr(ptr, bodysize, "base64", 6)) != NULL) {
960
+			cli_dbgmsg("Body base64\n");
961
+			decoder |= 1;
962
+			buf = &p[6];
963
+			bytesleft = (unsigned long)(last - buf) + 1;
964
+			messageSetEncoding(m, "base64");
965
+		}
966
+	if(!(decoder&2))
967
+		if((p = (char *)cli_pmemstr(ptr, bodysize, "quoted-printable", 16)) != NULL) {
968
+			cli_dbgmsg("Body quoted-printable\n");
969
+			decoder |= 2;
970
+			if((p < buf) || (buf == ptr)) {
971
+				buf = &p[16];
972
+				bytesleft = (unsigned long)(last - buf) + 1;
973
+			}
974
+			messageSetEncoding(m, "quoted-printable");
975
+		}
976
+
977
+	if(decoder == 0) {
978
+		messageDestroy(m);
979
+		munmap(start, size);
980
+		cli_dbgmsg("cli_mbox: unknown encoder\n");
981
+		return cli_parse_mbox(dir, desc, options);
982
+	}
983
+
984
+	line = NULL;
985
+
986
+	while(*buf != '\n') {
987
+		buf++;
988
+		bytesleft--;
989
+	}
990
+	/*
991
+	 * Look for the end of the headers
992
+	 */
993
+	while(buf < last) {
994
+		if(*buf == ';') {
995
+			buf++;
996
+			bytesleft--;
997
+		} else if(*buf == '\n') {
998
+			buf++;
999
+			bytesleft--;
1000
+			if((*buf == '\n') || (*buf == '\r')) {
1001
+				buf++;
1002
+				bytesleft--;
1003
+				break;
1004
+			}
1005
+		}
1006
+		buf++;
1007
+		bytesleft--;
1008
+	}
1009
+
1010
+	while(!isalnum(*buf)) {
1011
+		buf++;
1012
+		bytesleft--;
1013
+	}
1014
+
1015
+	if(bytesleft > 1024*1024)
1016
+		bytesleft = 1024*1024;	/* should be MaxStreamSize, I guess */
1017
+
1018
+	cli_dbgmsg("cli_mbox: decoding %ld bytes\n", bytesleft);
1019
+
1020
+	while(bytesleft > 0) {
1021
+		int length = 0;
1022
+
1023
+		/*printf("%ld: ", bytesleft); fflush(stdout);*/
1024
+
1025
+		for(ptr = buf; bytesleft && (*ptr != '\n') && (*ptr != '\r'); ptr++) {
1026
+			length++;
1027
+			--bytesleft;
1028
+		}
1029
+
1030
+		/*printf("%d: ", length); fflush(stdout);*/
1031
+
1032
+		line = cli_realloc(line, length + 1);
1033
+
1034
+		memcpy(line, buf, length);
1035
+		line[length] = '\0';
1036
+
1037
+		/*puts(line);*/
1038
+
1039
+		if(messageAddStr(m, line) < 0)
1040
+			break;
1041
+
1042
+		if((bytesleft > 0) && (*ptr == '\r')) {
1043
+			ptr++;
1044
+			bytesleft--;
1045
+		}
1046
+		buf = ++ptr;
1047
+		bytesleft--;
1048
+	}
1049
+	if(line)
1050
+		free(line);
1051
+
1052
+	munmap(start, size);
1053
+
1054
+	fb = messageToFileblob(m, dir);
1055
+	messageDestroy(m);
1056
+
1057
+	if(fb) {
1058
+		fileblobDestroy(fb);
1059
+		return CL_CLEAN;	/* a lie - but it gets things going */
1060
+	}
1061
+	return cli_parse_mbox(dir, desc, options);
1062
+}
1063
+#else
1064
+int
1065
+cli_mbox(const char *dir, int desc, unsigned int options)
1066
+{
1067
+	return cli_parse_mbox(dir, desc, options);
1068
+}
1069
+#endif
1070
+
866 1071
 /*
867 1072
  * TODO: when signal handling is added, need to remove temp files when a
868 1073
  *	signal is received
... ...
@@ -877,8 +1089,8 @@ static	pthread_mutex_t	tables_mutex = PTHREAD_MUTEX_INITIALIZER;
877 877
  * TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody
878 878
  * TODO: Look into TNEF. Is there anything that needs to be done here?
879 879
  */
880
-int
881
-cli_mbox(const char *dir, int desc, unsigned int options)
880
+static int
881
+cli_parse_mbox(const char *dir, int desc, unsigned int options)
882 882
 {
883 883
 	int retcode, i;
884 884
 	message *m, *body;
... ...
@@ -968,7 +1180,11 @@ cli_mbox(const char *dir, int desc, unsigned int options)
968 968
 					}
969 969
 				/*
970 970
 				 * Starting a new message, throw away all the
971
-				 * information about the old one
971
+				 * information about the old one. It would
972
+				 * be best to be able to scan this message
973
+				 * now, but cli_scanfile needs arguments
974
+				 * that haven't been passed here so it can't be
975
+				 * called
972 976
 				 */
973 977
 				m = body;
974 978
 				messageReset(body);
... ...
@@ -980,7 +1196,7 @@ cli_mbox(const char *dir, int desc, unsigned int options)
980 980
 				break;
981 981
 		} while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL);
982 982
 
983
-		cli_dbgmsg("Deal with email number %d\n", messagenumber);
983
+		cli_dbgmsg("Extract attachments from email %d\n", messagenumber);
984 984
 	} else {
985 985
 		/*
986 986
 		 * It's a single message, parse the headers then the body
... ...
@@ -2348,7 +2564,9 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
2348 2348
 				if((fb = fileblobCreate()) != NULL) {
2349 2349
 					cli_dbgmsg("Found a bounce message with no header\n");
2350 2350
 					fileblobSetFilename(fb, dir, "bounce");
2351
-					fileblobAddData(fb, "Received: by clamd (bounce)\n", 28);
2351
+					fileblobAddData(fb,
2352
+						(const unsigned char *)"Received: by clamd (bounce)\n",
2353
+						28);
2352 2354
 
2353 2355
 					fb = textToFileblob(t_line, fb);
2354 2356
 
... ...
@@ -3506,3 +3724,44 @@ print_trace(int use_syslog)
3506 3506
 	free(strings);
3507 3507
 }
3508 3508
 #endif
3509
+
3510
+#ifdef	notdef
3511
+/*
3512
+ * like cli_memstr - but returns the location of the match
3513
+ */
3514
+static const char *
3515
+cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns)
3516
+{
3517
+	const char *pt, *hay;
3518
+	size_t n;
3519
+
3520
+	if(hs < ns)
3521
+		return 0;
3522
+
3523
+	if(haystack == needle)
3524
+		return haystack;
3525
+
3526
+	if(memcmp(haystack, needle, ns) == 0)
3527
+		return haystack;
3528
+
3529
+	pt = hay = haystack;
3530
+	n = hs;
3531
+
3532
+	while((pt = memchr(hay, needle[0], n)) != NULL) {
3533
+		n -= (int) pt - (int) hay;
3534
+		if(n < ns)
3535
+			break;
3536
+
3537
+		if(memcmp(pt, needle, ns) == 0)
3538
+			return pt;
3539
+
3540
+		if(hay == pt) {
3541
+			n--;
3542
+			hay++;
3543
+		} else
3544
+			hay = pt;
3545
+	}
3546
+
3547
+	return NULL;
3548
+}
3549
+#endif