git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@1190 77e5149b-7576-45b1-b177-96237e5ba77b
Nigel Horne authored on 2004/12/17 00:29:08... | ... |
@@ -17,6 +17,9 @@ |
17 | 17 |
* |
18 | 18 |
* Change History: |
19 | 19 |
* $Log: mbox.c,v $ |
20 |
+ * Revision 1.201 2004/12/16 15:29:08 nigelhorne |
|
21 |
+ * Tidy and add mmap test code |
|
22 |
+ * |
|
20 | 23 |
* Revision 1.200 2004/12/07 23:08:10 nigelhorne |
21 | 24 |
* Fix empty content-type in multipart header |
22 | 25 |
* |
... | ... |
@@ -588,7 +591,7 @@ |
588 | 588 |
* Compilable under SCO; removed duplicate code with message.c |
589 | 589 |
* |
590 | 590 |
*/ |
591 |
-static char const rcsid[] = "$Id: mbox.c,v 1.200 2004/12/07 23:08:10 nigelhorne Exp $"; |
|
591 |
+static char const rcsid[] = "$Id: mbox.c,v 1.201 2004/12/16 15:29:08 nigelhorne Exp $"; |
|
592 | 592 |
|
593 | 593 |
#if HAVE_CONFIG_H |
594 | 594 |
#include "clamav-config.h" |
... | ... |
@@ -727,6 +730,7 @@ typedef enum { FALSE = 0, TRUE = 1 } bool; |
727 | 727 |
*/ |
728 | 728 |
#define PARTIAL_DIR |
729 | 729 |
|
730 |
+static int cli_parse_mbox(const char *dir, int desc, unsigned int options); |
|
730 | 731 |
static message *parseEmailHeaders(const message *m, const table_t *rfc821Table); |
731 | 732 |
static int parseEmailHeader(message *m, const char *line, const table_t *rfc821Table); |
732 | 733 |
static int parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t *rfc821Table, const table_t *subtypeTable, unsigned int options); |
... | ... |
@@ -743,6 +747,9 @@ static char *rfc822comments(const char *in); |
743 | 743 |
#ifdef PARTIAL_DIR |
744 | 744 |
static int rfc1341(message *m, const char *dir); |
745 | 745 |
#endif |
746 |
+#ifdef notdef |
|
747 |
+static const char *cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns); |
|
748 |
+#endif |
|
746 | 749 |
|
747 | 750 |
static void checkURLs(message *m, const char *dir); |
748 | 751 |
#ifdef WITH_CURL |
... | ... |
@@ -863,6 +870,211 @@ static pthread_mutex_t tables_mutex = PTHREAD_MUTEX_INITIALIZER; |
863 | 863 |
#define O_BINARY 0 |
864 | 864 |
#endif |
865 | 865 |
|
866 |
+#ifdef notdef |
|
867 |
+ |
|
868 |
+#if HAVE_MMAP |
|
869 |
+#if HAVE_SYS_MMAN_H |
|
870 |
+#include <sys/mman.h> |
|
871 |
+#else /* HAVE_SYS_MMAN_H */ |
|
872 |
+#undef HAVE_MMAP |
|
873 |
+#endif |
|
874 |
+#endif |
|
875 |
+ |
|
876 |
+/* |
|
877 |
+ * This could be the future. Instead of parsing and decoding it just decodes. |
|
878 |
+ * Currently this code is about 50% *slower* than the full blown parser, but |
|
879 |
+ * that may improve with time. |
|
880 |
+ * USE IT AT YOUR PERIL, a large number of viruses are not detected with this |
|
881 |
+ * method, possibly because the decoded files must be exact and not have |
|
882 |
+ * extra data at the start or end, which this code will produce. |
|
883 |
+ */ |
|
884 |
+int |
|
885 |
+cli_mbox(const char *dir, int desc, unsigned int options) |
|
886 |
+{ |
|
887 |
+ char *start, *ptr, *line, *buf, *p; |
|
888 |
+ const char *last; |
|
889 |
+ int decoder; |
|
890 |
+ long bytesleft; |
|
891 |
+ size_t size, headersize, bodysize; |
|
892 |
+ struct stat statb; |
|
893 |
+ message *m; |
|
894 |
+ fileblob *fb; |
|
895 |
+ |
|
896 |
+ if(fstat(desc, &statb) < 0) |
|
897 |
+ return CL_EOPEN; |
|
898 |
+ |
|
899 |
+ size = statb.st_size; |
|
900 |
+ |
|
901 |
+ if(size == 0) |
|
902 |
+ return CL_CLEAN; |
|
903 |
+ |
|
904 |
+ m = messageCreate(); |
|
905 |
+ if(m == NULL) |
|
906 |
+ return CL_EMEM; |
|
907 |
+ |
|
908 |
+ start = ptr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, 0); |
|
909 |
+ if(ptr == MAP_FAILED) { |
|
910 |
+ messageDestroy(m); |
|
911 |
+ return CL_EMEM; |
|
912 |
+ } |
|
913 |
+ |
|
914 |
+ cli_dbgmsg("mmap'ed mbox\n"); |
|
915 |
+ |
|
916 |
+ last = &start[size - 1]; |
|
917 |
+ |
|
918 |
+ /* |
|
919 |
+ * Look for the end of the headers |
|
920 |
+ */ |
|
921 |
+ while(ptr < last) { |
|
922 |
+ if(*ptr == ';') |
|
923 |
+ ptr++; |
|
924 |
+ else if(*ptr == '\n') { |
|
925 |
+ ptr++; |
|
926 |
+ if((*ptr == '\n') || (*ptr == '\r')) { |
|
927 |
+ ptr++; |
|
928 |
+ break; |
|
929 |
+ } |
|
930 |
+ } |
|
931 |
+ ptr++; |
|
932 |
+ } |
|
933 |
+ if(ptr >= last) { |
|
934 |
+ cli_dbgmsg("cli_mbox: can't determine the end of the headers\n"); |
|
935 |
+ messageDestroy(m); |
|
936 |
+ munmap(start, size); |
|
937 |
+ return cli_parse_mbox(dir, desc, options); |
|
938 |
+ } |
|
939 |
+ |
|
940 |
+ headersize = (size_t)(ptr - start); |
|
941 |
+ bodysize = size - headersize; |
|
942 |
+ |
|
943 |
+ cli_dbgmsg("cli_mbox: size=%u, headersize=%u, bodysize=%u\n", size, headersize, bodysize); |
|
944 |
+ |
|
945 |
+ bytesleft = bodysize; |
|
946 |
+ buf = ptr; |
|
947 |
+ decoder = 0; |
|
948 |
+ |
|
949 |
+ /* Would be nice to have a case insensitive cli_memstr() */ |
|
950 |
+ if(cli_pmemstr(start, headersize, "base64", 6)) { |
|
951 |
+ cli_dbgmsg("Header base64\n"); |
|
952 |
+ decoder |= 1; |
|
953 |
+ } |
|
954 |
+ if(cli_pmemstr(start, headersize, "quoted-printable", 16)) { |
|
955 |
+ cli_dbgmsg("Header quoted-printable\n"); |
|
956 |
+ decoder |= 2; |
|
957 |
+ } |
|
958 |
+ if(!(decoder&1)) |
|
959 |
+ if((p = (char *)cli_pmemstr(ptr, bodysize, "base64", 6)) != NULL) { |
|
960 |
+ cli_dbgmsg("Body base64\n"); |
|
961 |
+ decoder |= 1; |
|
962 |
+ buf = &p[6]; |
|
963 |
+ bytesleft = (unsigned long)(last - buf) + 1; |
|
964 |
+ messageSetEncoding(m, "base64"); |
|
965 |
+ } |
|
966 |
+ if(!(decoder&2)) |
|
967 |
+ if((p = (char *)cli_pmemstr(ptr, bodysize, "quoted-printable", 16)) != NULL) { |
|
968 |
+ cli_dbgmsg("Body quoted-printable\n"); |
|
969 |
+ decoder |= 2; |
|
970 |
+ if((p < buf) || (buf == ptr)) { |
|
971 |
+ buf = &p[16]; |
|
972 |
+ bytesleft = (unsigned long)(last - buf) + 1; |
|
973 |
+ } |
|
974 |
+ messageSetEncoding(m, "quoted-printable"); |
|
975 |
+ } |
|
976 |
+ |
|
977 |
+ if(decoder == 0) { |
|
978 |
+ messageDestroy(m); |
|
979 |
+ munmap(start, size); |
|
980 |
+ cli_dbgmsg("cli_mbox: unknown encoder\n"); |
|
981 |
+ return cli_parse_mbox(dir, desc, options); |
|
982 |
+ } |
|
983 |
+ |
|
984 |
+ line = NULL; |
|
985 |
+ |
|
986 |
+ while(*buf != '\n') { |
|
987 |
+ buf++; |
|
988 |
+ bytesleft--; |
|
989 |
+ } |
|
990 |
+ /* |
|
991 |
+ * Look for the end of the headers |
|
992 |
+ */ |
|
993 |
+ while(buf < last) { |
|
994 |
+ if(*buf == ';') { |
|
995 |
+ buf++; |
|
996 |
+ bytesleft--; |
|
997 |
+ } else if(*buf == '\n') { |
|
998 |
+ buf++; |
|
999 |
+ bytesleft--; |
|
1000 |
+ if((*buf == '\n') || (*buf == '\r')) { |
|
1001 |
+ buf++; |
|
1002 |
+ bytesleft--; |
|
1003 |
+ break; |
|
1004 |
+ } |
|
1005 |
+ } |
|
1006 |
+ buf++; |
|
1007 |
+ bytesleft--; |
|
1008 |
+ } |
|
1009 |
+ |
|
1010 |
+ while(!isalnum(*buf)) { |
|
1011 |
+ buf++; |
|
1012 |
+ bytesleft--; |
|
1013 |
+ } |
|
1014 |
+ |
|
1015 |
+ if(bytesleft > 1024*1024) |
|
1016 |
+ bytesleft = 1024*1024; /* should be MaxStreamSize, I guess */ |
|
1017 |
+ |
|
1018 |
+ cli_dbgmsg("cli_mbox: decoding %ld bytes\n", bytesleft); |
|
1019 |
+ |
|
1020 |
+ while(bytesleft > 0) { |
|
1021 |
+ int length = 0; |
|
1022 |
+ |
|
1023 |
+ /*printf("%ld: ", bytesleft); fflush(stdout);*/ |
|
1024 |
+ |
|
1025 |
+ for(ptr = buf; bytesleft && (*ptr != '\n') && (*ptr != '\r'); ptr++) { |
|
1026 |
+ length++; |
|
1027 |
+ --bytesleft; |
|
1028 |
+ } |
|
1029 |
+ |
|
1030 |
+ /*printf("%d: ", length); fflush(stdout);*/ |
|
1031 |
+ |
|
1032 |
+ line = cli_realloc(line, length + 1); |
|
1033 |
+ |
|
1034 |
+ memcpy(line, buf, length); |
|
1035 |
+ line[length] = '\0'; |
|
1036 |
+ |
|
1037 |
+ /*puts(line);*/ |
|
1038 |
+ |
|
1039 |
+ if(messageAddStr(m, line) < 0) |
|
1040 |
+ break; |
|
1041 |
+ |
|
1042 |
+ if((bytesleft > 0) && (*ptr == '\r')) { |
|
1043 |
+ ptr++; |
|
1044 |
+ bytesleft--; |
|
1045 |
+ } |
|
1046 |
+ buf = ++ptr; |
|
1047 |
+ bytesleft--; |
|
1048 |
+ } |
|
1049 |
+ if(line) |
|
1050 |
+ free(line); |
|
1051 |
+ |
|
1052 |
+ munmap(start, size); |
|
1053 |
+ |
|
1054 |
+ fb = messageToFileblob(m, dir); |
|
1055 |
+ messageDestroy(m); |
|
1056 |
+ |
|
1057 |
+ if(fb) { |
|
1058 |
+ fileblobDestroy(fb); |
|
1059 |
+ return CL_CLEAN; /* a lie - but it gets things going */ |
|
1060 |
+ } |
|
1061 |
+ return cli_parse_mbox(dir, desc, options); |
|
1062 |
+} |
|
1063 |
+#else |
|
1064 |
+int |
|
1065 |
+cli_mbox(const char *dir, int desc, unsigned int options) |
|
1066 |
+{ |
|
1067 |
+ return cli_parse_mbox(dir, desc, options); |
|
1068 |
+} |
|
1069 |
+#endif |
|
1070 |
+ |
|
866 | 1071 |
/* |
867 | 1072 |
* TODO: when signal handling is added, need to remove temp files when a |
868 | 1073 |
* signal is received |
... | ... |
@@ -877,8 +1089,8 @@ static pthread_mutex_t tables_mutex = PTHREAD_MUTEX_INITIALIZER; |
877 | 877 |
* TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody |
878 | 878 |
* TODO: Look into TNEF. Is there anything that needs to be done here? |
879 | 879 |
*/ |
880 |
-int |
|
881 |
-cli_mbox(const char *dir, int desc, unsigned int options) |
|
880 |
+static int |
|
881 |
+cli_parse_mbox(const char *dir, int desc, unsigned int options) |
|
882 | 882 |
{ |
883 | 883 |
int retcode, i; |
884 | 884 |
message *m, *body; |
... | ... |
@@ -968,7 +1180,11 @@ cli_mbox(const char *dir, int desc, unsigned int options) |
968 | 968 |
} |
969 | 969 |
/* |
970 | 970 |
* Starting a new message, throw away all the |
971 |
- * information about the old one |
|
971 |
+ * information about the old one. It would |
|
972 |
+ * be best to be able to scan this message |
|
973 |
+ * now, but cli_scanfile needs arguments |
|
974 |
+ * that haven't been passed here so it can't be |
|
975 |
+ * called |
|
972 | 976 |
*/ |
973 | 977 |
m = body; |
974 | 978 |
messageReset(body); |
... | ... |
@@ -980,7 +1196,7 @@ cli_mbox(const char *dir, int desc, unsigned int options) |
980 | 980 |
break; |
981 | 981 |
} while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL); |
982 | 982 |
|
983 |
- cli_dbgmsg("Deal with email number %d\n", messagenumber); |
|
983 |
+ cli_dbgmsg("Extract attachments from email %d\n", messagenumber); |
|
984 | 984 |
} else { |
985 | 985 |
/* |
986 | 986 |
* It's a single message, parse the headers then the body |
... | ... |
@@ -2348,7 +2564,9 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t |
2348 | 2348 |
if((fb = fileblobCreate()) != NULL) { |
2349 | 2349 |
cli_dbgmsg("Found a bounce message with no header\n"); |
2350 | 2350 |
fileblobSetFilename(fb, dir, "bounce"); |
2351 |
- fileblobAddData(fb, "Received: by clamd (bounce)\n", 28); |
|
2351 |
+ fileblobAddData(fb, |
|
2352 |
+ (const unsigned char *)"Received: by clamd (bounce)\n", |
|
2353 |
+ 28); |
|
2352 | 2354 |
|
2353 | 2355 |
fb = textToFileblob(t_line, fb); |
2354 | 2356 |
|
... | ... |
@@ -3506,3 +3724,44 @@ print_trace(int use_syslog) |
3506 | 3506 |
free(strings); |
3507 | 3507 |
} |
3508 | 3508 |
#endif |
3509 |
+ |
|
3510 |
+#ifdef notdef |
|
3511 |
+/* |
|
3512 |
+ * like cli_memstr - but returns the location of the match |
|
3513 |
+ */ |
|
3514 |
+static const char * |
|
3515 |
+cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns) |
|
3516 |
+{ |
|
3517 |
+ const char *pt, *hay; |
|
3518 |
+ size_t n; |
|
3519 |
+ |
|
3520 |
+ if(hs < ns) |
|
3521 |
+ return 0; |
|
3522 |
+ |
|
3523 |
+ if(haystack == needle) |
|
3524 |
+ return haystack; |
|
3525 |
+ |
|
3526 |
+ if(memcmp(haystack, needle, ns) == 0) |
|
3527 |
+ return haystack; |
|
3528 |
+ |
|
3529 |
+ pt = hay = haystack; |
|
3530 |
+ n = hs; |
|
3531 |
+ |
|
3532 |
+ while((pt = memchr(hay, needle[0], n)) != NULL) { |
|
3533 |
+ n -= (int) pt - (int) hay; |
|
3534 |
+ if(n < ns) |
|
3535 |
+ break; |
|
3536 |
+ |
|
3537 |
+ if(memcmp(pt, needle, ns) == 0) |
|
3538 |
+ return pt; |
|
3539 |
+ |
|
3540 |
+ if(hay == pt) { |
|
3541 |
+ n--; |
|
3542 |
+ hay++; |
|
3543 |
+ } else |
|
3544 |
+ hay = pt; |
|
3545 |
+ } |
|
3546 |
+ |
|
3547 |
+ return NULL; |
|
3548 |
+} |
|
3549 |
+#endif |