git-svn: trunk@2012
Nigel Horne authored on 2006/06/07 21:30:09... | ... |
@@ -1,3 +1,7 @@ |
1 |
+Wed Jun 7 13:29:22 BST 2006 (njh) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav/mbox.c: NEW_WORLD: Fast phish detection. |
|
4 |
+ |
|
1 | 5 |
Wed Jun 7 12:11:55 CEST 2006 (tk) |
2 | 6 |
---------------------------------- |
3 | 7 |
* libclamav/sis.c: fix compilation error on Cygwin, reported by NJH |
... | ... |
@@ -16,7 +16,7 @@ |
16 | 16 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, |
17 | 17 |
* MA 02110-1301, USA. |
18 | 18 |
*/ |
19 |
-static char const rcsid[] = "$Id: mbox.c,v 1.310 2006/06/06 21:22:00 njh Exp $"; |
|
19 |
+static char const rcsid[] = "$Id: mbox.c,v 1.311 2006/06/07 12:28:49 njh Exp $"; |
|
20 | 20 |
|
21 | 21 |
#if HAVE_CONFIG_H |
22 | 22 |
#include "clamav-config.h" |
... | ... |
@@ -298,6 +298,8 @@ static pthread_mutex_t tables_mutex = PTHREAD_MUTEX_INITIALIZER; |
298 | 298 |
|
299 | 299 |
#ifdef NEW_WORLD |
300 | 300 |
|
301 |
+#include "matcher.h" |
|
302 |
+ |
|
301 | 303 |
#undef PARTIAL_DIR |
302 | 304 |
|
303 | 305 |
#if HAVE_MMAP |
... | ... |
@@ -335,6 +337,7 @@ static struct map { |
335 | 335 |
struct map *next; |
336 | 336 |
} *map, *tail; |
337 | 337 |
|
338 |
+static int save_text(cli_ctx *ctx, const char *dir, const char *start, size_t len); |
|
338 | 339 |
static void create_map(const char *begin, const char *end); |
339 | 340 |
static void add_to_map(const char *offset, const char *word); |
340 | 341 |
static const char *find_in_map(const char *offset, const char *word); |
... | ... |
@@ -365,8 +368,6 @@ static void free_map(void); |
365 | 365 |
* TODO: Add support for systems without mmap() |
366 | 366 |
* |
367 | 367 |
* TODO: partial_dir fall through |
368 |
- * |
|
369 |
- * FIXME: Doesn't catch all phishes |
|
370 | 368 |
*/ |
371 | 369 |
int |
372 | 370 |
cli_mbox(const char *dir, int desc, cli_ctx *ctx) |
... | ... |
@@ -377,7 +378,7 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx) |
377 | 377 |
struct stat statb; |
378 | 378 |
message *m; |
379 | 379 |
fileblob *fb; |
380 |
- int ret = 0; |
|
380 |
+ int ret = CL_CLEAN; |
|
381 | 381 |
int wasAlloced; |
382 | 382 |
struct scanlist *scanlist, *scanelem; |
383 | 383 |
|
... | ... |
@@ -516,9 +517,24 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx) |
516 | 516 |
break; |
517 | 517 |
} |
518 | 518 |
|
519 |
- if((!anyHeadersFound) && find_in_map(start, "\nbegin ")) |
|
519 |
+ if((!anyHeadersFound) && |
|
520 |
+ ((p = find_in_map(start, "\nbegin ")) != NULL) && |
|
521 |
+ (isuuencodebegin(++p))) |
|
520 | 522 |
/* uuencoded part */ |
521 | 523 |
hasuuencode = TRUE; |
524 |
+ else { |
|
525 |
+ cli_dbgmsg("Nothing encoded, looking for a text part to save\n"); |
|
526 |
+ ret = save_text(ctx, dir, start, size); |
|
527 |
+ if(wasAlloced) |
|
528 |
+ free(start); |
|
529 |
+ else |
|
530 |
+ munmap(start, size); |
|
531 |
+ |
|
532 |
+ free_map(); |
|
533 |
+ if(ret != CL_EFORMAT) |
|
534 |
+ return ret; |
|
535 |
+ ret = CL_CLEAN; |
|
536 |
+ } |
|
522 | 537 |
|
523 | 538 |
free_map(); |
524 | 539 |
|
... | ... |
@@ -536,15 +552,17 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx) |
536 | 536 |
if(anyHeadersFound || hasuuencode) { |
537 | 537 |
/* TODO: reduce the number of falls through here */ |
538 | 538 |
if(hasuuencode) |
539 |
- cli_dbgmsg("New world - fall back to old uudecoder, type %d\n", type); |
|
539 |
+ /* TODO: fast track visa */ |
|
540 |
+ cli_warnmsg("New world - fall back to old uudecoder\n"); |
|
540 | 541 |
else |
541 |
- cli_dbgmsg("cli_mbox: unknown encoder, type %d\n", type); |
|
542 |
+ cli_warnmsg("cli_mbox: unknown encoder, type %d\n", type); |
|
542 | 543 |
if(type == CL_TYPE_MAIL) |
543 | 544 |
return cli_parse_mbox(dir, desc, ctx); |
544 | 545 |
cli_dbgmsg("Unknown filetype %d, return CLEAN\n", type); |
545 | 546 |
return CL_CLEAN; |
546 | 547 |
} |
547 | 548 |
|
549 |
+#if 0 /* I don't believe this is needed any more */ |
|
548 | 550 |
/* |
549 | 551 |
* The message could be a plain text phish |
550 | 552 |
* FIXME: Can't get to the option whether we are looking for |
... | ... |
@@ -555,12 +573,11 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx) |
555 | 555 |
*/ |
556 | 556 |
if(type == CL_TYPE_MAIL) |
557 | 557 |
return cli_parse_mbox(dir, desc, ctx); |
558 |
+#endif |
|
558 | 559 |
cli_dbgmsg("cli_mbox: I believe it's plain text (type == %d) which must be clean\n", |
559 | 560 |
type); |
560 | 561 |
return CL_CLEAN; |
561 | 562 |
} |
562 |
- free_map(); |
|
563 |
- |
|
564 | 563 |
#if 0 |
565 | 564 |
if(wasAlloced) { |
566 | 565 |
const char *max = NULL; |
... | ... |
@@ -640,6 +657,7 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx) |
640 | 640 |
|
641 | 641 |
cli_dbgmsg("cli_mbox: decoding %ld base64 bytes\n", b64size); |
642 | 642 |
if((fb = fileblobCreate()) == NULL) { |
643 |
+ free_map(); |
|
643 | 644 |
if(wasAlloced) |
644 | 645 |
free(start); |
645 | 646 |
else |
... | ... |
@@ -649,7 +667,8 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx) |
649 | 649 |
} |
650 | 650 |
|
651 | 651 |
tmpfilename = cli_gentemp(dir); |
652 |
- if(tmpfilename == 0) { |
|
652 |
+ if(tmpfilename == NULL) { |
|
653 |
+ free_map(); |
|
653 | 654 |
if(wasAlloced) |
654 | 655 |
free(start); |
655 | 656 |
else |
... | ... |
@@ -665,6 +684,7 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx) |
665 | 665 |
|
666 | 666 |
m = messageCreate(); |
667 | 667 |
if(m == NULL) { |
668 |
+ free_map(); |
|
668 | 669 |
if(wasAlloced) |
669 | 670 |
free(start); |
670 | 671 |
else |
... | ... |
@@ -809,6 +829,7 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx) |
809 | 809 |
|
810 | 810 |
m = messageCreate(); |
811 | 811 |
if(m == NULL) { |
812 |
+ free_map(); |
|
812 | 813 |
if(wasAlloced) |
813 | 814 |
free(start); |
814 | 815 |
else |
... | ... |
@@ -871,6 +892,18 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx) |
871 | 871 |
} |
872 | 872 |
scanelem = scanlist; |
873 | 873 |
|
874 |
+ /* |
|
875 |
+ * There could be a phish in the plain text part, so save that |
|
876 |
+ * FIXME: Can't get to the option whether we are looking for |
|
877 |
+ * phishes or not, so assume we are, this slows things a |
|
878 |
+ * lot |
|
879 |
+ * Should be |
|
880 |
+ * if((type == CL_TYPE_MAIL) && (!(no-phishing)) |
|
881 |
+ */ |
|
882 |
+ ret = save_text(ctx, dir, start, size); |
|
883 |
+ |
|
884 |
+ free_map(); |
|
885 |
+ |
|
874 | 886 |
while(scanelem) { |
875 | 887 |
struct scanlist *n = scanelem->next; |
876 | 888 |
|
... | ... |
@@ -887,15 +920,84 @@ cli_mbox(const char *dir, int desc, cli_ctx *ctx) |
887 | 887 |
* FIXME: Need to run cl_scandir() here and return that value |
888 | 888 |
*/ |
889 | 889 |
cli_dbgmsg("cli_mbox: ret = %d\n", ret); |
890 |
- if(ret == 0) |
|
891 |
- return CL_SUCCESS; |
|
890 |
+ if(ret != CL_EFORMAT) |
|
891 |
+ return ret; |
|
892 | 892 |
|
893 |
- cli_dbgmsg("New world - don't know what to do - fall back to old world\n"); |
|
893 |
+ cli_warnmsg("New world - don't know what to do - fall back to old world\n"); |
|
894 | 894 |
/* Fall back for now */ |
895 | 895 |
lseek(desc, 0L, SEEK_SET); |
896 | 896 |
return cli_parse_mbox(dir, desc, ctx); |
897 | 897 |
} |
898 | 898 |
|
899 |
+/* |
|
900 |
+ * Save a text part - it could contain phish or jscript |
|
901 |
+ */ |
|
902 |
+static int |
|
903 |
+save_text(cli_ctx *ctx, const char *dir, const char *start, size_t len) |
|
904 |
+{ |
|
905 |
+ const char *p; |
|
906 |
+ |
|
907 |
+ if((p = find_in_map(start, "\n\n")) || (p = find_in_map(start, "\r\n\r\n"))) { |
|
908 |
+ const char *q; |
|
909 |
+ fileblob *fb; |
|
910 |
+ char *tmpfilename; |
|
911 |
+ |
|
912 |
+ if(((q = find_in_map(start, "base64")) == NULL) && |
|
913 |
+ ((q = find_in_map(start, "quoted_printable")) == NULL)) { |
|
914 |
+ cli_dbgmsg("It's all plain text!\n"); |
|
915 |
+ if(*p == '\r') |
|
916 |
+ p += 4; |
|
917 |
+ else |
|
918 |
+ p += 2; |
|
919 |
+ len -= (p - start); |
|
920 |
+ } else if(((q = find_in_map(p, "\nFrom ")) == NULL) && |
|
921 |
+ ((q = find_in_map(p, "base64")) == NULL) && |
|
922 |
+ ((q = find_in_map(p, "quoted-printable")) == NULL)) |
|
923 |
+ cli_dbgmsg("Can't find end of plain text - assume it's all\n"); |
|
924 |
+ else |
|
925 |
+ len = (size_t)(q - p); |
|
926 |
+ |
|
927 |
+ if(len < 5) { |
|
928 |
+ cli_dbgmsg("save_text: Too small\n"); |
|
929 |
+ return CL_EFORMAT; |
|
930 |
+ } |
|
931 |
+ if(ctx->scanned) |
|
932 |
+ *ctx->scanned += len / CL_COUNT_PRECISION; |
|
933 |
+ |
|
934 |
+ /* |
|
935 |
+ * This doesn't work, cli_scanbuff isn't designed to be used |
|
936 |
+ * in this way. It gets the "filetype" wrong and then |
|
937 |
+ * doesn't scan correctly |
|
938 |
+ */ |
|
939 |
+ if(cli_scanbuff((char *)p, len, ctx->virname, ctx->engine, 0) == CL_VIRUS) { |
|
940 |
+ cli_dbgmsg("save_text: found %s\n", *ctx->virname); |
|
941 |
+ return CL_VIRUS; |
|
942 |
+ } |
|
943 |
+ |
|
944 |
+ fb = fileblobCreate(); |
|
945 |
+ if(fb == NULL) |
|
946 |
+ return CL_EMEM; |
|
947 |
+ |
|
948 |
+ tmpfilename = cli_gentemp(dir); |
|
949 |
+ |
|
950 |
+ if(tmpfilename == NULL) { |
|
951 |
+ fileblobDestroy(fb); |
|
952 |
+ return CL_ETMPFILE; |
|
953 |
+ } |
|
954 |
+ cli_dbgmsg("save plain bit to %s, %u bytes\n", |
|
955 |
+ tmpfilename, len); |
|
956 |
+ |
|
957 |
+ fileblobSetFilename(fb, dir, tmpfilename); |
|
958 |
+ free(tmpfilename); |
|
959 |
+ |
|
960 |
+ (void)fileblobAddData(fb, (const unsigned char *)p, len); |
|
961 |
+ fileblobDestroy(fb); |
|
962 |
+ return CL_SUCCESS; |
|
963 |
+ } |
|
964 |
+ cli_dbgmsg("No text part found to save\n"); |
|
965 |
+ return CL_EFORMAT; |
|
966 |
+} |
|
967 |
+ |
|
899 | 968 |
static void |
900 | 969 |
create_map(const char *begin, const char *end) |
901 | 970 |
{ |
... | ... |
@@ -906,7 +1008,9 @@ create_map(const char *begin, const char *end) |
906 | 906 |
{ "base64", 6 }, |
907 | 907 |
{ "quoted-printable", 16 }, |
908 | 908 |
{ "\nbegin ", 7 }, |
909 |
- { "\nFrom ", 7 }, |
|
909 |
+ { "\nFrom ", 6 }, |
|
910 |
+ { "\n\n", 2 }, |
|
911 |
+ { "\r\n\r\n", 4 }, |
|
910 | 912 |
{ NULL, 0 } |
911 | 913 |
}; |
912 | 914 |
|