Browse code

bb12380: Added limits to the mailbox parser.

Implemented several maximums in parsing MIME messages to avoid Denial of
Service attempts, as well as improving parsing logic to avoid repeatedly
calling the realloc function. These are in response to excessively long scan
times for specially crafted files.
This is in response to CVE-2019-15961.
The limits added are
1. Limit on number of MIME parts per message.
2. Limit on number of header bytes.
3. Limit on number of email headers.
4. Limit on number of line folds.
5. Limit on numbef of MIME arguments.

Andy Ragusa authored on 2019/11/08 02:10:26
Showing 1 changed files
... ...
@@ -23,7 +23,6 @@
23 23
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
24 24
  *  MA 02110-1301, USA.
25 25
  */
26
-
27 26
 #if HAVE_CONFIG_H
28 27
 #include "clamav-config.h"
29 28
 #endif
... ...
@@ -204,9 +203,9 @@ typedef struct mbox_ctx {
204 204
 #endif
205 205
 
206 206
 static int cli_parse_mbox(const char *dir, cli_ctx *ctx);
207
-static message *parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821Table, const char *firstLine, const char *dir);
208
-static message *parseEmailHeaders(message *m, const table_t *rfc821Table);
209
-static int parseEmailHeader(message *m, const char *line, const table_t *rfc821Table);
207
+static message *parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821Table, const char *firstLine, const char *dir, cli_ctx *ctx, bool *heuristicFound);
208
+static message *parseEmailHeaders(message *m, const table_t *rfc821Table, bool *heuristicFound);
209
+static int parseEmailHeader(message *m, const char *line, const table_t *rfc821, cli_ctx *ctx, bool *heuristicFound);
210 210
 static cl_error_t parseMHTMLComment(const char *comment, cli_ctx *ctx, void *wrkjobj, void *cbdata);
211 211
 static mbox_status parseRootMHTML(mbox_ctx *mctx, message *m, text *t);
212 212
 static mbox_status parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int recursion_level);
... ...
@@ -215,7 +214,7 @@ static int boundaryEnd(const char *line, const char *boundary);
215 215
 static int initialiseTables(table_t **rfc821Table, table_t **subtypeTable);
216 216
 static int getTextPart(message *const messages[], size_t size);
217 217
 static size_t strip(char *buf, int len);
218
-static int parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg);
218
+static int parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg, cli_ctx *ctx, bool *heuristicFound);
219 219
 static int saveTextPart(mbox_ctx *mctx, message *m, int destroy_text);
220 220
 static char *rfc2047(const char *in);
221 221
 static char *rfc822comments(const char *in, char *out);
... ...
@@ -236,6 +235,12 @@ static blob *getHrefs(message *m, tag_arguments_t *hrefs);
236 236
 static void hrefs_done(blob *b, tag_arguments_t *hrefs);
237 237
 static void checkURLs(message *m, mbox_ctx *mctx, mbox_status *rc, int is_html);
238 238
 
239
+static bool haveTooManyMIMEPartsPerMessage(size_t mimePartCnt, cli_ctx *ctx);
240
+static bool hitLineFoldCnt(const char *const line, size_t *lineFoldCnt, cli_ctx *ctx);
241
+static bool haveTooManyHeaderBytes(size_t totalLen, cli_ctx *ctx);
242
+static bool haveTooManyEmailHeaders(size_t totalHeaderCnt, cli_ctx *ctx);
243
+static bool haveTooManyMIMEArguments(size_t argCnt, cli_ctx *ctx);
244
+
239 245
 /* Maximum line length according to RFC2821 */
240 246
 #define RFC2821LENGTH 1000
241 247
 
... ...
@@ -284,6 +289,12 @@ static void checkURLs(message *m, mbox_ctx *mctx, mbox_status *rc, int is_html);
284 284
                          */
285 285
 #define KNOWBOT 14      /* Unknown and undocumented format? */
286 286
 
287
+#define HEURISTIC_EMAIL_MAX_LINE_FOLDS_PER_HEADER (256 * 1024)
288
+#define HEURISTIC_EMAIL_MAX_HEADER_BYTES (1024 * 256)
289
+#define HEURISTIC_EMAIL_MAX_HEADERS 1024
290
+#define HEURISTIC_EMAIL_MAX_MIME_PARTS_PER_MESSAGE 1024
291
+#define HEURISTIC_EMAIL_MAX_ARGUMENTS_PER_HEADER 256
292
+
287 293
 static const struct tableinit {
288 294
     const char *key;
289 295
     int value;
... ...
@@ -426,7 +437,7 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
426 426
 		 */
427 427
         bool lastLineWasEmpty;
428 428
         int messagenumber;
429
-        message *m = messageCreate();
429
+        message *m = messageCreate(); /*Create an empty email */
430 430
 
431 431
         if (m == NULL)
432 432
             return CL_EMEM;
... ...
@@ -437,15 +448,20 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
437 437
 
438 438
         do {
439 439
             cli_chomp(buffer);
440
-            /*if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {*/
440
+            /*if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) */
441 441
             if (lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) {
442 442
                 cli_dbgmsg("Deal with message number %d\n", messagenumber++);
443 443
                 /*
444 444
 				 * End of a message in the mail box
445 445
 				 */
446
-                body = parseEmailHeaders(m, rfc821);
446
+                bool heuristicFound = FALSE;
447
+                body                = parseEmailHeaders(m, rfc821, &heuristicFound);
447 448
                 if (body == NULL) {
448 449
                     messageReset(m);
450
+                    if (heuristicFound) {
451
+                        retcode = CL_VIRUS;
452
+                        break;
453
+                    }
449 454
                     continue;
450 455
                 }
451 456
                 messageSetCTX(body, ctx);
... ...
@@ -496,7 +512,11 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
496 496
 
497 497
         if (retcode == CL_SUCCESS) {
498 498
             cli_dbgmsg("Extract attachments from email %d\n", messagenumber);
499
-            body = parseEmailHeaders(m, rfc821);
499
+            bool heuristicFound = FALSE;
500
+            body                = parseEmailHeaders(m, rfc821, &heuristicFound);
501
+            if (heuristicFound) {
502
+                retcode = CL_VIRUS;
503
+            }
500 504
         }
501 505
         if (m)
502 506
             messageDestroy(m);
... ...
@@ -523,7 +543,11 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
523 523
 
524 524
         buffer[sizeof(buffer) - 1] = '\0';
525 525
 
526
-        body = parseEmailFile(map, &at, rfc821, buffer, dir);
526
+        bool heuristicFound = FALSE;
527
+        body                = parseEmailFile(map, &at, rfc821, buffer, dir, ctx, &heuristicFound);
528
+        if (heuristicFound) {
529
+            retcode = CL_VIRUS;
530
+        }
527 531
     }
528 532
 
529 533
     if (body) {
... ...
@@ -579,6 +603,253 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
579 579
     return retcode;
580 580
 }
581 581
 
582
+/*TODO: move these to a header.*/
583
+#define DO_STRDUP(buf, var) \
584
+    do {                    \
585
+        var = strdup(buf);  \
586
+        if (NULL == var) {  \
587
+            goto done;      \
588
+        }                   \
589
+    } while (0)
590
+
591
+#define DO_FREE(var)       \
592
+    do {                   \
593
+        if (NULL != var) { \
594
+            free(var);     \
595
+            var = NULL;    \
596
+        }                  \
597
+    } while (0)
598
+
599
+#define DO_MALLOC(var, size) \
600
+    do {                     \
601
+        var = malloc(size);  \
602
+        if (NULL == var) {   \
603
+            goto done;       \
604
+        }                    \
605
+    } while (0)
606
+
607
+#define DO_CALLOC(var, size)                   \
608
+    do {                                       \
609
+        (var) = calloc((size), sizeof *(var)); \
610
+        if (NULL == var) {                     \
611
+            goto done;                         \
612
+        }                                      \
613
+    } while (0)
614
+
615
+#define DO_VERIFY_POINTER(ptr) \
616
+    do {                       \
617
+        if (NULL == ptr) {     \
618
+            goto done;         \
619
+        }                      \
620
+    } while (0)
621
+
622
+#define READ_STRUCT_BUFFER_LEN 1024
623
+typedef struct _ReadStruct {
624
+    char buffer[READ_STRUCT_BUFFER_LEN + 1];
625
+
626
+    size_t bufferLen;
627
+
628
+    struct _ReadStruct *next;
629
+
630
+} ReadStruct;
631
+
632
+static ReadStruct *
633
+appendReadStruct(ReadStruct *rs, const char *const buffer)
634
+{
635
+    if (NULL == rs) {
636
+        assert(rs && "Invalid argument");
637
+        goto done;
638
+    }
639
+
640
+    size_t spaceLeft = (READ_STRUCT_BUFFER_LEN - rs->bufferLen);
641
+
642
+    if (strlen(buffer) > spaceLeft) {
643
+        ReadStruct *next = NULL;
644
+        int part = spaceLeft;
645
+        strncpy(&(rs->buffer[rs->bufferLen]), buffer, part);
646
+        rs->bufferLen += part;
647
+
648
+        DO_CALLOC(next, 1);
649
+
650
+        rs->next = next;
651
+        strcpy(next->buffer, &(buffer[part]));
652
+        next->bufferLen = strlen(&(buffer[part]));
653
+
654
+        rs = next;
655
+    } else {
656
+        strcpy(&(rs->buffer[rs->bufferLen]), buffer);
657
+        rs->bufferLen += strlen(buffer);
658
+    }
659
+
660
+done:
661
+    return rs;
662
+}
663
+
664
+static char *
665
+getMallocedBufferFromList(const ReadStruct *head)
666
+{
667
+
668
+    const ReadStruct *rs = head;
669
+    int bufferLen        = 1;
670
+    char *working        = NULL;
671
+    char *ret            = NULL;
672
+
673
+    while (rs) {
674
+        bufferLen += rs->bufferLen;
675
+        rs = rs->next;
676
+    }
677
+
678
+    DO_MALLOC(working, bufferLen);
679
+
680
+    rs        = head;
681
+    bufferLen = 0;
682
+    while (rs) {
683
+        memcpy(&(working[bufferLen]), rs->buffer, rs->bufferLen);
684
+        bufferLen += rs->bufferLen;
685
+        working[bufferLen] = 0;
686
+        rs                 = rs->next;
687
+    }
688
+
689
+    ret = working;
690
+done:
691
+    if (NULL == ret) {
692
+        DO_FREE(working);
693
+    }
694
+
695
+    return ret;
696
+}
697
+
698
+static void
699
+freeList(ReadStruct *head)
700
+{
701
+    while (head) {
702
+        ReadStruct *rs = head->next;
703
+        DO_FREE(head);
704
+        head = rs;
705
+    }
706
+}
707
+
708
+#ifndef FREELIST_REALLOC
709
+#define FREELIST_REALLOC(head, curr) \
710
+    do {                             \
711
+        if (curr != head) {          \
712
+            freeList(head->next);    \
713
+        }                            \
714
+        head->bufferLen = 0;         \
715
+        head->next      = 0;         \
716
+        curr            = head;      \
717
+    } while (0)
718
+#endif /*FREELIST_REALLOC*/
719
+
720
+/*Check if we have repeated blank lines with only a semicolon at the end.  Semicolon is a delimiter for parameters, 
721
+ * but if there is no data, it isn't a parameter.  Allow the first one because it may be continuation of a previous line 
722
+ * that actually had data in it.*/
723
+static bool
724
+doContinueMultipleEmptyOptions(const char *const line, bool *lastWasOnlySemi)
725
+{
726
+    if (line) {
727
+        size_t i   = 0;
728
+        int doCont = 1;
729
+        for (; i < strlen(line); i++) {
730
+            if (isblank(line[i])) {
731
+            } else if (';' == line[i]) {
732
+            } else {
733
+                doCont = 0;
734
+                break;
735
+            }
736
+        }
737
+
738
+        if (1 == doCont) {
739
+            if (*lastWasOnlySemi) {
740
+                return TRUE;
741
+            }
742
+            *lastWasOnlySemi = TRUE;
743
+        } else {
744
+            *lastWasOnlySemi = FALSE;
745
+        }
746
+    }
747
+    return FALSE;
748
+}
749
+
750
+static bool
751
+hitLineFoldCnt(const char *const line, size_t *lineFoldCnt, cli_ctx *ctx)
752
+{
753
+
754
+    if (line) {
755
+        if (isblank(line[0])) {
756
+            (*lineFoldCnt)++;
757
+        } else {
758
+            (*lineFoldCnt) = 0;
759
+        }
760
+
761
+        if ((*lineFoldCnt) >= HEURISTIC_EMAIL_MAX_LINE_FOLDS_PER_HEADER) {
762
+            if (ctx->options->general & CL_SCAN_GENERAL_HEURISTICS) {
763
+                cli_append_virus(ctx, "Heuristics.Email.ExceedsMaxLineFoldCnt");
764
+            }
765
+
766
+            return TRUE;
767
+        }
768
+    }
769
+    return FALSE;
770
+}
771
+
772
+static bool
773
+haveTooManyHeaderBytes(size_t totalLen, cli_ctx *ctx)
774
+{
775
+
776
+    if (totalLen > HEURISTIC_EMAIL_MAX_HEADER_BYTES) {
777
+        if (ctx->options->general & CL_SCAN_GENERAL_HEURISTICS) {
778
+            cli_append_virus(ctx, "Heuristics.Email.ExceedsMaxHeaderBytes");
779
+        }
780
+
781
+        return TRUE;
782
+    }
783
+    return FALSE;
784
+}
785
+
786
+static bool
787
+haveTooManyEmailHeaders(size_t totalHeaderCnt, cli_ctx *ctx)
788
+{
789
+
790
+    if (totalHeaderCnt > HEURISTIC_EMAIL_MAX_HEADERS) {
791
+        if (ctx->options->general & CL_SCAN_GENERAL_HEURISTICS) {
792
+            cli_append_virus(ctx, "Heuristics.Email.ExceedsMaxEmailHeaders");
793
+        }
794
+
795
+        return TRUE;
796
+    }
797
+    return FALSE;
798
+}
799
+
800
+static bool
801
+haveTooManyMIMEPartsPerMessage(size_t mimePartCnt, cli_ctx *ctx)
802
+{
803
+
804
+    if (mimePartCnt >= HEURISTIC_EMAIL_MAX_MIME_PARTS_PER_MESSAGE) {
805
+        if (ctx->options->general & CL_SCAN_GENERAL_HEURISTICS) {
806
+            cli_append_virus(ctx, "Heuristics.Email.ExceedsMaxMIMEPartsPerMessage");
807
+        }
808
+
809
+        return TRUE;
810
+    }
811
+    return FALSE;
812
+}
813
+
814
+static bool
815
+haveTooManyMIMEArguments(size_t argCnt, cli_ctx *ctx)
816
+{
817
+
818
+    if (argCnt >= HEURISTIC_EMAIL_MAX_ARGUMENTS_PER_HEADER) {
819
+        if (ctx->options->general & CL_SCAN_GENERAL_HEURISTICS) {
820
+            cli_append_virus(ctx, "Heuristics.Email.ExceedsMaxMIMEArguments");
821
+        }
822
+
823
+        return TRUE;
824
+    }
825
+
826
+    return FALSE;
827
+}
828
+
582 829
 /*
583 830
  * Read in an email message from fin, parse it, and return the message
584 831
  *
... ...
@@ -586,7 +857,7 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
586 586
  * handled ungracefully...
587 587
  */
588 588
 static message *
589
-parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *firstLine, const char *dir)
589
+parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *firstLine, const char *dir, cli_ctx *ctx, bool *heuristicFound)
590 590
 {
591 591
     bool inHeader     = TRUE;
592 592
     bool bodyIsEmpty  = TRUE;
... ...
@@ -594,9 +865,21 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
594 594
     message *ret;
595 595
     bool anyHeadersFound = FALSE;
596 596
     int commandNumber    = -1;
597
-    char *fullline = NULL, *boundary = NULL;
598
-    size_t fulllinelength = 0;
597
+    char *boundary       = NULL;
599 598
     char buffer[RFC2821LENGTH + 1];
599
+    bool lastWasOnlySemi    = FALSE;
600
+    int err                 = 1;
601
+    size_t totalHeaderBytes = 0;
602
+    size_t totalHeaderCnt   = 0;
603
+
604
+    size_t lineFoldCnt = 0;
605
+
606
+    *heuristicFound = FALSE;
607
+
608
+    ReadStruct *head = NULL;
609
+    ReadStruct *curr = NULL;
610
+    DO_CALLOC(head, 1);
611
+    curr = head;
600 612
 
601 613
     cli_dbgmsg("parseEmailFile\n");
602 614
 
... ...
@@ -615,6 +898,15 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
615 615
         else
616 616
             line = buffer;
617 617
 
618
+        if (doContinueMultipleEmptyOptions(line, &lastWasOnlySemi)) {
619
+            continue;
620
+        }
621
+
622
+        if (hitLineFoldCnt(line, &lineFoldCnt, ctx)) {
623
+            *heuristicFound = TRUE;
624
+            break;
625
+        }
626
+
618 627
         /*
619 628
 		 * Don't blank lines which are only spaces from headers,
620 629
 		 * otherwise they'll be treated as the end of header marker
... ...
@@ -627,8 +919,8 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
627 627
             }
628 628
         }
629 629
         if (inHeader) {
630
-            cli_dbgmsg("parseEmailFile: check '%s' fullline %p\n",
631
-                       buffer, fullline);
630
+            cli_dbgmsg("parseEmailFile: check '%s'\n", buffer);
631
+
632 632
             /*
633 633
 			 * Ensure wide characters are handled where
634 634
 			 * sizeof(char) > 1
... ...
@@ -652,13 +944,30 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
652 652
 					 * content-type line. So we just have
653 653
 					 * to make a best guess. Sigh.
654 654
 					 */
655
-                    if (fullline) {
656
-                        if (parseEmailHeader(ret, fullline, rfc821) < 0)
657
-                            continue;
655
+                    if (head->bufferLen) {
656
+                        char *header     = getMallocedBufferFromList(head);
657
+                        int needContinue = 0;
658
+                        DO_VERIFY_POINTER(header);
659
+
660
+                        totalHeaderCnt++;
661
+                        if (haveTooManyEmailHeaders(totalHeaderCnt, ctx)) {
662
+                            *heuristicFound = TRUE;
663
+                            break;
664
+                        }
665
+                        needContinue = (parseEmailHeader(ret, header, rfc821, ctx, heuristicFound) < 0);
666
+                        if (*heuristicFound) {
667
+                            DO_FREE(header);
668
+                            break;
669
+                        }
670
+
671
+                        DO_FREE(header);
672
+                        FREELIST_REALLOC(head, curr);
658 673
 
659
-                        free(fullline);
660
-                        fullline = NULL;
674
+                        if (needContinue) {
675
+                            continue;
676
+                        }
661 677
                     }
678
+
662 679
                     if (boundary ||
663 680
                         ((boundary = (char *)messageFindArgument(ret, "boundary")) != NULL)) {
664 681
                         lastWasBlank = TRUE;
... ...
@@ -666,7 +975,7 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
666 666
                     }
667 667
                 }
668 668
             }
669
-            if ((line == NULL) && (fullline == NULL)) { /* empty line */
669
+            if ((line == NULL) && (0 == head->bufferLen)) { /* empty line */
670 670
                 /*
671 671
 				 * A blank line signifies the end of
672 672
 				 * the header and the start of the text
... ...
@@ -681,8 +990,9 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
681 681
             } else {
682 682
                 char *ptr;
683 683
                 const char *lookahead;
684
+                bool lineAdded = TRUE;
684 685
 
685
-                if (fullline == NULL) {
686
+                if (0 == head->bufferLen) {
686 687
                     char cmd[RFC2821LENGTH + 1], out[RFC2821LENGTH + 1];
687 688
 
688 689
                     /*
... ...
@@ -715,23 +1025,26 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
715 715
                                 anyHeadersFound = usefulHeader(commandNumber, cmd);
716 716
                             continue;
717 717
                     }
718
-                    fullline       = cli_strdup(line);
719
-                    fulllinelength = strlen(line) + 1;
720
-                    if (!fullline) {
721
-                        if (ret)
718
+                    curr = appendReadStruct(curr, line);
719
+                    if (NULL == curr) {
720
+                        if (ret) {
722 721
                             ret->isTruncated = TRUE;
722
+                        }
723 723
                         break;
724 724
                     }
725 725
                 } else if (line != NULL) {
726
-                    fulllinelength += strlen(line) + 1;
727
-                    ptr = cli_realloc(fullline, fulllinelength);
728
-                    if (ptr == NULL)
729
-                        continue;
730
-                    fullline = ptr;
731
-                    cli_strlcat(fullline, line, fulllinelength);
726
+                    curr = appendReadStruct(curr, line);
727
+                } else {
728
+                    lineAdded = FALSE;
732 729
                 }
733 730
 
734
-                assert(fullline != NULL);
731
+                if (lineAdded) {
732
+                    totalHeaderBytes += strlen(line);
733
+                    if (haveTooManyHeaderBytes(totalHeaderBytes, ctx)) {
734
+                        *heuristicFound = TRUE;
735
+                        break;
736
+                    }
737
+                }
735 738
 
736 739
                 if ((lookahead = fmap_need_off_once(map, *at, 1))) {
737 740
                     /*
... ...
@@ -749,24 +1062,34 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
749 749
 				 * Handle broken headers, where the next
750 750
 				 * line isn't indented by whitespace
751 751
 				 */
752
-                if (fullline[strlen(fullline) - 1] == ';')
753
-                    /* Add arguments to this line */
754
-                    continue;
752
+                {
753
+                    char *header     = getMallocedBufferFromList(head); /*This is the issue */
754
+                    int needContinue = 0;
755
+                    needContinue     = (header[strlen(header) - 1] == ';');
756
+                    if (0 == needContinue) {
757
+                        needContinue = (line && (count_quotes(header) & 1));
758
+                    }
755 759
 
756
-                if (line && (count_quotes(fullline) & 1))
757
-                    continue;
760
+                    if (0 == needContinue) {
761
+                        totalHeaderCnt++;
762
+                        if (haveTooManyEmailHeaders(totalHeaderCnt, ctx)) {
763
+                            *heuristicFound = TRUE;
764
+                            break;
765
+                        }
766
+                        needContinue = (parseEmailHeader(ret, header, rfc821, ctx, heuristicFound) < 0);
767
+                        if (*heuristicFound) {
768
+                            DO_FREE(header);
769
+                            break;
770
+                        }
771
+                        /*Check total headers here;*/
772
+                    }
758 773
 
759
-                ptr = rfc822comments(fullline, NULL);
760
-                if (ptr) {
761
-                    free(fullline);
762
-                    fullline = ptr;
774
+                    DO_FREE(header);
775
+                    if (needContinue) {
776
+                        continue;
777
+                    }
778
+                    FREELIST_REALLOC(head, curr);
763 779
                 }
764
-
765
-                if (parseEmailHeader(ret, fullline, rfc821) < 0)
766
-                    continue;
767
-
768
-                free(fullline);
769
-                fullline = NULL;
770 780
             }
771 781
         } else if (line && isuuencodebegin(line)) {
772 782
             /*
... ...
@@ -810,19 +1133,17 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
810 810
         }
811 811
     } while (getline_from_mbox(buffer, sizeof(buffer) - 1, map, at) != NULL);
812 812
 
813
-    if (boundary)
814
-        free(boundary);
815
-
816
-    if (fullline) {
817
-        if (*fullline) switch (commandNumber) {
818
-                case CONTENT_TRANSFER_ENCODING:
819
-                case CONTENT_DISPOSITION:
820
-                case CONTENT_TYPE:
821
-                    cli_dbgmsg("parseEmailFile: Fullline unparsed '%s'\n", fullline);
822
-            }
823
-        free(fullline);
813
+    err = 0;
814
+done:
815
+    if (err) {
816
+        cli_errmsg("parseEmailFile: ERROR parsing file\n");
817
+        ret->isTruncated = TRUE;
824 818
     }
825 819
 
820
+    DO_FREE(boundary);
821
+
822
+    freeList(head);
823
+
826 824
     if (!anyHeadersFound) {
827 825
         /*
828 826
 		 * False positive in believing we have an e-mail when we don't
... ...
@@ -832,6 +1153,12 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
832 832
         return NULL;
833 833
     }
834 834
 
835
+    if (*heuristicFound) {
836
+        messageDestroy(ret);
837
+        cli_dbgmsg("parseEmailFile: found heuristic\n");
838
+        return NULL;
839
+    }
840
+
835 841
     cli_dbgmsg("parseEmailFile: return\n");
836 842
 
837 843
     return ret;
... ...
@@ -846,7 +1173,7 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
846 846
  * TODO: remove the duplication with parseEmailFile
847 847
  */
848 848
 static message *
849
-parseEmailHeaders(message *m, const table_t *rfc821)
849
+parseEmailHeaders(message *m, const table_t *rfc821, bool *heuristicFound)
850 850
 {
851 851
     bool inHeader    = TRUE;
852 852
     bool bodyIsEmpty = TRUE;
... ...
@@ -856,9 +1183,14 @@ parseEmailHeaders(message *m, const table_t *rfc821)
856 856
     int commandNumber     = -1;
857 857
     char *fullline        = NULL;
858 858
     size_t fulllinelength = 0;
859
+    bool lastWasOnlySemi  = FALSE;
860
+    size_t lineFoldCnt    = 0;
861
+    size_t totalHeaderCnt = 0;
859 862
 
860 863
     cli_dbgmsg("parseEmailHeaders\n");
861 864
 
865
+    *heuristicFound = FALSE;
866
+
862 867
     if (m == NULL)
863 868
         return NULL;
864 869
 
... ...
@@ -872,6 +1204,15 @@ parseEmailHeaders(message *m, const table_t *rfc821)
872 872
         else
873 873
             line = NULL;
874 874
 
875
+        if (doContinueMultipleEmptyOptions(line, &lastWasOnlySemi)) {
876
+            continue;
877
+        }
878
+
879
+        if (hitLineFoldCnt(line, &lineFoldCnt, m->ctx)) {
880
+            *heuristicFound = TRUE;
881
+            break;
882
+        }
883
+
875 884
         if (inHeader) {
876 885
             cli_dbgmsg("parseEmailHeaders: check '%s'\n",
877 886
                        line ? line : "");
... ...
@@ -889,6 +1230,7 @@ parseEmailHeaders(message *m, const table_t *rfc821)
889 889
                 bodyIsEmpty = TRUE;
890 890
             } else {
891 891
                 char *ptr;
892
+                bool lineAdded = TRUE;
892 893
 
893 894
                 if (fullline == NULL) {
894 895
                     char cmd[RFC2821LENGTH + 1];
... ...
@@ -934,8 +1276,21 @@ parseEmailHeaders(message *m, const table_t *rfc821)
934 934
                         continue;
935 935
                     fullline = ptr;
936 936
                     cli_strlcat(fullline, line, fulllinelength);
937
+                } else {
938
+                    lineAdded = FALSE;
937 939
                 }
938 940
                 assert(fullline != NULL);
941
+                /*continue doesn't seem right here, but that is what is done everywhere else when a malloc fails.*/
942
+                if (NULL == fullline) {
943
+                    continue;
944
+                }
945
+
946
+                if (lineAdded) {
947
+                    if (haveTooManyHeaderBytes(fulllinelength, m->ctx)) {
948
+                        *heuristicFound = TRUE;
949
+                        break;
950
+                    }
951
+                }
939 952
 
940 953
                 if (next_is_folded_header(t))
941 954
                     /* Add arguments to this line */
... ...
@@ -953,8 +1308,17 @@ parseEmailHeaders(message *m, const table_t *rfc821)
953 953
                     fullline = ptr;
954 954
                 }
955 955
 
956
-                if (parseEmailHeader(ret, fullline, rfc821) < 0)
956
+                totalHeaderCnt++;
957
+                if (haveTooManyEmailHeaders(totalHeaderCnt, m->ctx)) {
958
+                    *heuristicFound = TRUE;
959
+                    break;
960
+                }
961
+                if (parseEmailHeader(ret, fullline, rfc821, m->ctx, heuristicFound) < 0) {
957 962
                     continue;
963
+                }
964
+                if (*heuristicFound) {
965
+                    break;
966
+                }
958 967
 
959 968
                 free(fullline);
960 969
                 fullline = NULL;
... ...
@@ -1000,6 +1364,11 @@ parseEmailHeaders(message *m, const table_t *rfc821)
1000 1000
         cli_dbgmsg("parseEmailHeaders: no headers found, assuming it isn't an email\n");
1001 1001
         return NULL;
1002 1002
     }
1003
+    if (*heuristicFound) {
1004
+        messageDestroy(ret);
1005
+        cli_dbgmsg("parseEmailHeaders: found a heuristic, delete message and stop parsing.\n");
1006
+        return NULL;
1007
+    }
1003 1008
 
1004 1009
     cli_dbgmsg("parseEmailHeaders: return\n");
1005 1010
 
... ...
@@ -1010,9 +1379,9 @@ parseEmailHeaders(message *m, const table_t *rfc821)
1010 1010
  * Handle a header line of an email message
1011 1011
  */
1012 1012
 static int
1013
-parseEmailHeader(message *m, const char *line, const table_t *rfc821)
1013
+parseEmailHeader(message *m, const char *line, const table_t *rfc821, cli_ctx *ctx, bool *heuristicFound)
1014 1014
 {
1015
-    int ret;
1015
+    int ret = -1;
1016 1016
 #ifdef CL_THREAD_SAFE
1017 1017
     char *strptr;
1018 1018
 #endif
... ...
@@ -1035,15 +1404,17 @@ parseEmailHeader(message *m, const char *line, const table_t *rfc821)
1035 1035
         return -1;
1036 1036
 
1037 1037
     copy = rfc2047(line);
1038
-    if (copy == NULL)
1038
+    if (copy == NULL) {
1039 1039
         /* an RFC checker would return -1 here */
1040 1040
         copy = cli_strdup(line);
1041
+        if (NULL == copy) {
1042
+            goto done;
1043
+        }
1044
+    }
1041 1045
 
1042 1046
     tokenseparator[0] = *separator;
1043 1047
     tokenseparator[1] = '\0';
1044 1048
 
1045
-    ret = -1;
1046
-
1047 1049
 #ifdef CL_THREAD_SAFE
1048 1050
     cmd = strtok_r(copy, tokenseparator, &strptr);
1049 1051
 #else
... ...
@@ -1057,7 +1428,7 @@ parseEmailHeader(message *m, const char *line, const table_t *rfc821)
1057 1057
         char *arg = strtok(NULL, "");
1058 1058
 #endif
1059 1059
 
1060
-        if (arg)
1060
+        if (arg) {
1061 1061
             /*
1062 1062
 			 * Found a header such as
1063 1063
 			 * Content-Type: multipart/mixed;
... ...
@@ -1065,9 +1436,12 @@ parseEmailHeader(message *m, const char *line, const table_t *rfc821)
1065 1065
 			 * "multipart/mixed" and cmd to
1066 1066
 			 * be "Content-Type"
1067 1067
 			 */
1068
-            ret = parseMimeHeader(m, cmd, rfc821, arg);
1068
+            ret = parseMimeHeader(m, cmd, rfc821, arg, ctx, heuristicFound);
1069
+        }
1069 1070
     }
1070
-    free(copy);
1071
+done:
1072
+    DO_FREE(copy);
1073
+
1071 1074
     return ret;
1072 1075
 }
1073 1076
 
... ...
@@ -1310,6 +1684,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
1310 1310
 #if HAVE_JSON
1311 1311
     json_object *saveobj = mctx->wrkobj;
1312 1312
 #endif
1313
+    bool heuristicFound = FALSE;
1313 1314
 
1314 1315
     cli_dbgmsg("in parseEmailBody, %u files saved so far\n",
1315 1316
                mctx->files);
... ...
@@ -1391,12 +1766,14 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
1391 1391
                 cli_dbgmsg("Not a mime encoded message\n");
1392 1392
                 aText = textAddMessage(aText, mainMessage);
1393 1393
 
1394
-                if (!doPhishingScan)
1394
+                if (!doPhishingScan) {
1395 1395
                     break;
1396
+                }
1396 1397
                 /*
1397 1398
 			 * Fall through: some phishing mails claim they are
1398 1399
 			 * text/plain, when they are in fact html
1399 1400
 			 */
1401
+                /* fall through */
1400 1402
             case TEXT:
1401 1403
                 /* text/plain has been preprocessed as no encoding */
1402 1404
                 if (doPhishingScan) {
... ...
@@ -1605,7 +1982,11 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
1605 1605
 						 * Content-Type: application/octet-stream;
1606 1606
 						 * Content-Transfer-Encoding: base64
1607 1607
 						 */
1608
-                            parseEmailHeader(aMessage, line, mctx->rfc821Table);
1608
+                            parseEmailHeader(aMessage, line, mctx->rfc821Table, mctx->ctx, &heuristicFound);
1609
+                            if (heuristicFound) {
1610
+                                rc = VIRUS;
1611
+                                break;
1612
+                            }
1609 1613
 
1610 1614
                             while (isspace((int)*line))
1611 1615
                                 line++;
... ...
@@ -1750,8 +2131,11 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
1750 1750
                             cli_dbgmsg("Multipart %d: About to parse folded header '%s'\n",
1751 1751
                                        multiparts, fullline);
1752 1752
 
1753
-                            parseEmailHeader(aMessage, fullline, mctx->rfc821Table);
1753
+                            parseEmailHeader(aMessage, fullline, mctx->rfc821Table, mctx->ctx, &heuristicFound);
1754 1754
                             free(fullline);
1755
+                            if (heuristicFound) {
1756
+                                rc = VIRUS;
1757
+                            }
1755 1758
                         } else if (boundaryEnd(line, boundary)) {
1756 1759
                             /*
1757 1760
 						 * Some viruses put information
... ...
@@ -1828,6 +2212,12 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
1828 1828
 
1829 1829
                 free((char *)boundary);
1830 1830
 
1831
+                if (haveTooManyMIMEPartsPerMessage(multiparts, mctx->ctx)) {
1832
+                    DO_FREE(messages);
1833
+                    rc = VIRUS;
1834
+                    break;
1835
+                }
1836
+
1831 1837
                 /*
1832 1838
 			 * Preprocess. Anything special to be done before
1833 1839
 			 * we handle the multiparts?
... ...
@@ -1906,25 +2296,28 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
1906 1906
                         htmltextPart = getTextPart(messages, multiparts);
1907 1907
 
1908 1908
                         if (htmltextPart >= 0 && messages) {
1909
-                            if (messageGetBody(messages[htmltextPart]))
1909
+                            if (messageGetBody(messages[htmltextPart])) {
1910 1910
 
1911 1911
                                 aText = textAddMessage(aText, messages[htmltextPart]);
1912
-                        } else
1912
+                            }
1913
+                        } else {
1913 1914
                             /*
1914 1915
 					 * There isn't an HTML bit. If there's a
1915 1916
 					 * multipart bit, it'll may be in there
1916 1917
 					 * somewhere
1917 1918
 					 */
1918
-                            for (i = 0; i < multiparts; i++)
1919
+                            for (i = 0; i < multiparts; i++) {
1919 1920
                                 if (messageGetMimeType(messages[i]) == MULTIPART) {
1920 1921
                                     aMessage     = messages[i];
1921 1922
                                     htmltextPart = i;
1922 1923
                                     break;
1923 1924
                                 }
1925
+                            }
1926
+                        }
1924 1927
 
1925
-                        if (htmltextPart == -1)
1928
+                        if (htmltextPart == -1) {
1926 1929
                             cli_dbgmsg("No HTML code found to be scanned\n");
1927
-                        else {
1930
+                        } else {
1928 1931
 #if HAVE_JSON
1929 1932
                             /* Send root HTML file for preclassification */
1930 1933
                             if (mctx->ctx->wrkproperty)
... ...
@@ -1950,6 +2343,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
1950 1950
 				 * Content-Type: multipart/related;
1951 1951
 				 *	type="multipart/alternative"
1952 1952
 				 */
1953
+                        /* fall through */
1953 1954
                     case DIGEST:
1954 1955
                         /*
1955 1956
 				 * According to section 5.1.5 RFC2046, the
... ...
@@ -1971,6 +2365,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
1971 1971
 				 * virus is broken that way, and anyway we
1972 1972
 				 * wish to scan all of the alternatives
1973 1973
 				 */
1974
+                        /* fall through */
1974 1975
                     case REPORT:
1975 1976
                         /*
1976 1977
 				 * According to section 1 of RFC1892, the
... ...
@@ -2081,7 +2476,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
2081 2081
                 rc = FAIL;
2082 2082
                 if ((strcasecmp(mimeSubtype, "rfc822") == 0) ||
2083 2083
                     (strcasecmp(mimeSubtype, "delivery-status") == 0)) {
2084
-                    message *m = parseEmailHeaders(mainMessage, mctx->rfc821Table);
2084
+                    message *m = parseEmailHeaders(mainMessage, mctx->rfc821Table, &heuristicFound);
2085 2085
                     if (m) {
2086 2086
                         cli_dbgmsg("Decode rfc822\n");
2087 2087
 
... ...
@@ -2096,6 +2491,8 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
2096 2096
                             rc = parseEmailBody(m, NULL, mctx, recursion_level + 1);
2097 2097
 
2098 2098
                         messageDestroy(m);
2099
+                    } else if (heuristicFound) {
2100
+                        rc = VIRUS;
2099 2101
                     }
2100 2102
                     break;
2101 2103
                 } else if (strcasecmp(mimeSubtype, "disposition-notification") == 0) {
... ...
@@ -2134,6 +2531,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
2134 2134
 			 * Content-Type: application/unknown;
2135 2135
 			 * so let's try our best to salvage something
2136 2136
 			 */
2137
+                /* fall through */
2137 2138
             case APPLICATION:
2138 2139
                 /*cptr = messageGetMimeSubtype(mainMessage);
2139 2140
 
... ...
@@ -2734,11 +3132,14 @@ strstrip(char *s)
2734 2734
  * Returns 0 for OK, -1 for error
2735 2735
  */
2736 2736
 static int
2737
-parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg)
2737
+parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg, cli_ctx *ctx, bool *heuristicFound)
2738 2738
 {
2739 2739
     char *copy, *p, *buf;
2740 2740
     const char *ptr;
2741 2741
     int commandNumber;
2742
+    size_t argCnt = 0;
2743
+
2744
+    *heuristicFound = FALSE;
2742 2745
 
2743 2746
     cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg);
2744 2747
 
... ...
@@ -2746,15 +3147,17 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
2746 2746
     if (copy) {
2747 2747
         commandNumber = tableFind(rfc821Table, copy);
2748 2748
         free(copy);
2749
-    } else
2749
+    } else {
2750 2750
         commandNumber = tableFind(rfc821Table, cmd);
2751
+    }
2751 2752
 
2752 2753
     copy = rfc822comments(arg, NULL);
2753 2754
 
2754
-    if (copy)
2755
+    if (copy) {
2755 2756
         ptr = copy;
2756
-    else
2757
+    } else {
2757 2758
         ptr = arg;
2759
+    }
2758 2760
 
2759 2761
     buf = NULL;
2760 2762
 
... ...
@@ -2889,6 +3292,11 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
2889 2889
                 while (cli_strtokbuf(ptr, i++, ";", buf) != NULL) {
2890 2890
                     cli_dbgmsg("mimeArgs = '%s'\n", buf);
2891 2891
 
2892
+                    argCnt++;
2893
+                    if (haveTooManyMIMEArguments(argCnt, ctx)) {
2894
+                        *heuristicFound = TRUE;
2895
+                        break;
2896
+                    }
2892 2897
                     messageAddArguments(m, buf);
2893 2898
                 }
2894 2899
             }
... ...
@@ -3267,7 +3675,7 @@ rfc1341(message *m, const char *dir)
3267 3267
 
3268 3268
                 while ((dent = readdir(dd))) {
3269 3269
                     FILE *fin;
3270
-                    char buffer[BUFSIZ], fullname[NAME_MAX + 1];
3270
+                    char buffer[BUFSIZ], fullname[PATH_MAX + 1];
3271 3271
                     int nblanks;
3272 3272
                     STATBUF statb;
3273 3273
                     const char *dentry_idpart;
... ...
@@ -3792,8 +4200,9 @@ static const char *getMimeTypeStr(mime_type mimetype)
3792 3792
     const struct tableinit *entry = mimeTypeStr;
3793 3793
 
3794 3794
     while (entry->key) {
3795
-        if (mimetype == entry->value)
3795
+        if (mimetype == ((mime_type)entry->value)) {
3796 3796
             return entry->key;
3797
+        }
3797 3798
         entry++;
3798 3799
     }
3799 3800
     return "UNKNOWN";
... ...
@@ -3807,8 +4216,9 @@ static const char *getEncTypeStr(encoding_type enctype)
3807 3807
     const struct tableinit *entry = encTypeStr;
3808 3808
 
3809 3809
     while (entry->key) {
3810
-        if (enctype == entry->value)
3810
+        if (enctype == ((encoding_type)entry->value)) {
3811 3811
             return entry->key;
3812
+        }
3812 3813
         entry++;
3813 3814
     }
3814 3815
     return "UNKNOWN";
... ...
@@ -4047,8 +4457,9 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m
4047 4047
                 messages[i] = NULL;
4048 4048
             } else {
4049 4049
                 *rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1);
4050
-                if (mainMessage && (mainMessage != messageIn))
4050
+                if (mainMessage && (mainMessage != messageIn)) {
4051 4051
                     messageDestroy(mainMessage);
4052
+                }
4052 4053
                 mainMessage = NULL;
4053 4054
             }
4054 4055
 #if HAVE_JSON
... ...
@@ -4069,18 +4480,21 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m
4069 4069
 
4070 4070
         if (thisobj != NULL) {
4071 4071
             /* attempt to determine container size - prevents incorrect type reporting */
4072
-            if (json_object_object_get_ex(mctx->ctx->wrkproperty, "ContainedObjects", &arrobj))
4072
+            if (json_object_object_get_ex(mctx->ctx->wrkproperty, "ContainedObjects", &arrobj)) {
4073 4073
                 arrlen = json_object_array_length(arrobj);
4074
+            }
4074 4075
         }
4075 4076
 
4076 4077
 #endif
4077 4078
         if (fb) {
4078 4079
             /* aMessage doesn't always have a ctx set */
4079 4080
             fileblobSetCTX(fb, mctx->ctx);
4080
-            if (fileblobScanAndDestroy(fb) == CL_VIRUS)
4081
+            if (fileblobScanAndDestroy(fb) == CL_VIRUS) {
4081 4082
                 *rc = VIRUS;
4082
-            if (!addToText)
4083
+            }
4084
+            if (!addToText) {
4083 4085
                 mctx->files++;
4086
+            }
4084 4087
         }
4085 4088
 #if HAVE_JSON
4086 4089
         if (thisobj != NULL) {
... ...
@@ -4088,20 +4502,24 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m
4088 4088
             const char *dtype  = NULL;
4089 4089
 
4090 4090
             /* attempt to acquire container type */
4091
-            if (json_object_object_get_ex(mctx->ctx->wrkproperty, "ContainedObjects", &arrobj))
4092
-                if (json_object_array_length(arrobj) > arrlen)
4091
+            if (json_object_object_get_ex(mctx->ctx->wrkproperty, "ContainedObjects", &arrobj)) {
4092
+                if (json_object_array_length(arrobj) > ((int)arrlen)) {
4093 4093
                     entry = json_object_array_get_idx(arrobj, arrlen);
4094
+                }
4095
+            }
4094 4096
             if (entry) {
4095 4097
                 json_object_object_get_ex(entry, "FileType", &entry);
4096
-                if (entry)
4098
+                if (entry) {
4097 4099
                     dtype = json_object_get_string(entry);
4100
+                }
4098 4101
             }
4099 4102
             cli_jsonint(thisobj, "ContainedObjectsIndex", (uint32_t)arrlen);
4100 4103
             cli_jsonstr(thisobj, "ClamAVFileType", dtype ? dtype : "UNKNOWN");
4101 4104
         }
4102 4105
 #endif
4103
-        if (messageContainsVirus(aMessage))
4106
+        if (messageContainsVirus(aMessage)) {
4104 4107
             *rc = VIRUS;
4108
+        }
4105 4109
     }
4106 4110
     messageDestroy(aMessage);
4107 4111
     messages[i] = NULL;
... ...
@@ -4205,5 +4623,7 @@ newline_in_header(const char *line)
4205 4205
     if (strncmp(line, "Date: ", 6) == 0)
4206 4206
         return TRUE;
4207 4207
 
4208
+    cli_dbgmsg("newline_in_header, returning \"%s\"\n", line);
4209
+
4208 4210
     return FALSE;
4209 4211
 }