libclamav/mbox.c
e3aaff8e
 /*
e1cbc270
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
  *  Copyright (C) 2007-2013 Sourcefire, Inc.
2023340a
  *
  *  Authors: Nigel Horne
694e7882
  *
6289eda8
  *  Acknowledgements: Some ideas came from Stephen White <stephen@earth.li>,
  *                    Michael Dankov <misha@btrc.ru>, Gianluigi Tiesi <sherpya@netfarm.it>,
  *                    Everton da Silva Marques, Thomas Lamy <Thomas.Lamy@in-online.net>,
  *                    James Stevens <James@kyzo.com>
e3aaff8e
  *
  *  This program is free software; you can redistribute it and/or modify
2023340a
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
e3aaff8e
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
48b7b4a7
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
e3aaff8e
  */
2023340a
 
6d6e8271
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
e3aaff8e
 
 #ifdef CL_THREAD_SAFE
288057e9
 #ifndef _REENTRANT
 #define _REENTRANT /* for Solaris 2.8 */
e3aaff8e
 #endif
98cb5cba
 #endif
e3aaff8e
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <errno.h>
 #include <assert.h>
 #include <string.h>
288057e9
 #ifdef HAVE_STRINGS_H
e3aaff8e
 #include <strings.h>
bc6bbeff
 #endif
288057e9
 #ifdef HAVE_STRING_H
e906fef3
 #include <string.h>
 #endif
e3aaff8e
 #include <ctype.h>
 #include <time.h>
 #include <fcntl.h>
288057e9
 #ifdef HAVE_SYS_PARAM_H
d4d14218
 #include <sys/param.h>
bc6bbeff
 #endif
f10460ed
 #include <dirent.h>
a0b21816
 #include <limits.h>
093e013c
 #include <signal.h>
e3aaff8e
 
288057e9
 #ifdef HAVE_UNISTD_H
cd153266
 #include <unistd.h>
 #endif
 
288057e9
 #ifdef CL_THREAD_SAFE
e2875303
 #include <pthread.h>
 #endif
 
694e7882
 #if defined(_WIN32) || defined(_WIN64)
 #define strtok_r strtok_s
 #endif
 
60d8d2c3
 #include "clamav.h"
0f7f7682
 #include "others.h"
 #include "str.h"
 #include "filetypes.h"
e3aaff8e
 #include "mbox.h"
d77ac7de
 #include "dconf.h"
ee1b2a6c
 #include "fmap.h"
ede9939c
 #include "json_api.h"
c2df9f79
 #include "msxml_parser.h"
 
 #if HAVE_LIBXML2
f5412cc9
 #include <libxml/xmlversion.h>
c2df9f79
 #include <libxml/HTMLtree.h>
f5412cc9
 #include <libxml/HTMLparser.h>
c2df9f79
 #include <libxml/xmlreader.h>
 #endif
d77ac7de
 
 #define DCONF_PHISHING mctx->ctx->dconf->phishing
e3aaff8e
 
288057e9
 #ifdef CL_DEBUG
093e013c
 
288057e9
 #if defined(C_LINUX)
093e013c
 #include <features.h>
92dbfae7
 #endif
093e013c
 
02927896
 #if __GLIBC__ == 2 && __GLIBC_MINOR__ >= 1
 #define HAVE_BACKTRACE
 #endif
3f3f9085
 #endif
02927896
 
 #ifdef HAVE_BACKTRACE
 #include <execinfo.h>
694e7882
 
 #ifdef USE_SYSLOG
02927896
 #include <syslog.h>
694e7882
 #endif
02927896
 
288057e9
 static void sigsegv(int sig);
 static void print_trace(int use_syslog);
a9d251e0
 
288057e9
 /*#define	SAVE_TMP */ /* Save the file being worked on in tmp */
02927896
 #endif
 
288057e9
 #if defined(NO_STRTOK_R) || !defined(CL_THREAD_SAFE)
e3aaff8e
 #undef strtok_r
 #undef __strtok_r
288057e9
 #define strtok_r(a, b, c) strtok(a, b)
e3aaff8e
 #endif
 
288057e9
 #ifdef HAVE_STDBOOL_H
 #ifdef C_BEOS
89d4073d
 #include "SupportDefs.h"
 #else
edee0700
 #include <stdbool.h>
89d4073d
 #endif
edee0700
 #else
288057e9
 #ifdef FALSE
 typedef unsigned char bool;
edee0700
 #else
288057e9
 typedef enum { FALSE = 0,
                TRUE  = 1 } bool;
e3aaff8e
 #endif
 #endif
 
288057e9
 typedef enum {
     FAIL,
     OK,
     OK_ATTACHMENTS_NOT_SAVED,
     VIRUS,
     MAXREC,
     MAXFILES
ecc3d638
 } mbox_status;
 
9f2024cc
 #ifndef isblank
288057e9
 #define isblank(c) (((c) == ' ') || ((c) == '\t'))
9f2024cc
 #endif
 
288057e9
 #define SAVE_TO_DISC /* multipart/message are saved in a temporary file */
393a6d67
 
c52d991e
 #include "htmlnorm.h"
 
 #include "phishcheck.h"
 
288057e9
 #ifndef _WIN32
081f6473
 #include <sys/time.h>
ea541184
 #include <netdb.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
29b92a2d
 #if !defined(C_BEOS) && !defined(C_INTERIX)
ea541184
 #include <net/if.h>
 #include <arpa/inet.h>
 #endif
89d4073d
 #endif
f4a02249
 
ea541184
 #include <fcntl.h>
 
f10460ed
 /*
048a88e6
  * Use CL_SCAN_MAIL_PARTIAL_MESSAGE to handle messages covered by section 7.3.2 of RFC1341.
f10460ed
  *	This is experimental code so it is up to YOU to (1) ensure it's secure
cf569541
  * (2) periodically trim the directory of old files
  *
  * If you use the load balancing feature of clamav-milter to run clamd on
fb79b576
  * more than one machine you must make sure that .../partial is on a shared
cf569541
  * network filesystem
f10460ed
  */
d72749e0
 
27948a03
 /*
  * Slows things down a lot and only catches unencoded copies
  * of EICAR within bounces, which don't matter
  */
 //#define	SCAN_UNENCODED_BOUNCES
12bd9764
 
288057e9
 typedef struct mbox_ctx {
     const char *dir;
     const table_t *rfc821Table;
     const table_t *subtypeTable;
     cli_ctx *ctx;
     unsigned int files; /* number of files extracted */
ede9939c
 #if HAVE_JSON
288057e9
     json_object *wrkobj;
ede9939c
 #endif
c1fce7f7
 } mbox_ctx;
 
f87a92ca
 /* if supported by the system, use the optimized
  * version of getc, that doesn't do locking,
  * and is possibly implemented entirely as a macro */
 #if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L
 #define GETC(fp) getc_unlocked(fp)
 #define LOCKFILE(fp) flockfile(fp)
 #define UNLOCKFILE(fp) funlockfile(fp)
 #else
 #define GETC(fp) getc(fp)
 #define LOCKFILE(fp)
 #define UNLOCKFILE(fp)
 #endif
 
288057e9
 static int cli_parse_mbox(const char *dir, cli_ctx *ctx);
 static message *parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821Table, const char *firstLine, const char *dir);
 static message *parseEmailHeaders(message *m, const table_t *rfc821Table);
 static int parseEmailHeader(message *m, const char *line, const table_t *rfc821Table);
02840644
 static cl_error_t parseMHTMLComment(const char *comment, cli_ctx *ctx, void *wrkjobj, void *cbdata);
288057e9
 static mbox_status parseRootMHTML(mbox_ctx *mctx, message *m, text *t);
 static mbox_status parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int recursion_level);
 static int boundaryStart(const char *line, const char *boundary);
 static int boundaryEnd(const char *line, const char *boundary);
 static int initialiseTables(table_t **rfc821Table, table_t **subtypeTable);
 static int getTextPart(message *const messages[], size_t size);
 static size_t strip(char *buf, int len);
 static int parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg);
 static int saveTextPart(mbox_ctx *mctx, message *m, int destroy_text);
 static char *rfc2047(const char *in);
 static char *rfc822comments(const char *in, char *out);
 static int rfc1341(message *m, const char *dir);
 static bool usefulHeader(int commandNumber, const char *cmd);
 static char *getline_from_mbox(char *buffer, size_t len, fmap_t *map, size_t *at);
 static bool isBounceStart(mbox_ctx *mctx, const char *line);
 static bool exportBinhexMessage(mbox_ctx *mctx, message *m);
 static int exportBounceMessage(mbox_ctx *ctx, text *start);
 static const char *getMimeTypeStr(mime_type mimetype);
 static const char *getEncTypeStr(encoding_type enctype);
 static message *do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, mbox_ctx *mctx, message *messageIn, text **tptr, unsigned int recursion_level);
 static int count_quotes(const char *buf);
 static bool next_is_folded_header(const text *t);
 static bool newline_in_header(const char *line);
 
 static blob *getHrefs(message *m, tag_arguments_t *hrefs);
 static void hrefs_done(blob *b, tag_arguments_t *hrefs);
 static void checkURLs(message *m, mbox_ctx *mctx, mbox_status *rc, int is_html);
9b4bb8b7
 
9fe789f8
 /* Maximum line length according to RFC2821 */
288057e9
 #define RFC2821LENGTH 1000
e3aaff8e
 
 /* Hashcodes for our hash tables */
288057e9
 #define CONTENT_TYPE 1
 #define CONTENT_TRANSFER_ENCODING 2
 #define CONTENT_DISPOSITION 3
e3aaff8e
 
 /* Mime sub types */
288057e9
 #define PLAIN 1
 #define ENRICHED 2
 #define HTML 3
 #define RICHTEXT 4
 #define MIXED 5
 #define ALTERNATIVE 6 /* RFC1521*/
 #define DIGEST 7
 #define SIGNED 8
 #define PARALLEL 9
 #define RELATED 10      /* RFC2387 */
 #define REPORT 11       /* RFC1892 */
 #define APPLEDOUBLE 12  /* Handling of this in only noddy for now */
 #define FAX MIXED       /*                                              \
                          * RFC3458                                      \
                          * Drafts stated to treat is as mixed if it is  \
                          * not known.  This disappeared in the final    \
                          * version (except when talking about           \
                          * voice-message), but it is good enough for us \
                          * since we do no validation of coversheet      \
                          * presence etc. (which also has disappeared    \
                          * in the final version)                        \
                          */
 #define ENCRYPTED 13    /*                                        \
                          * e.g. RFC2015                           \
                          * Content-Type: multipart/encrypted;     \
                          * boundary="nextPart1383049.XCRrrar2yq"; \
                          * protocol="application/pgp-encrypted"   \
                          */
 #define X_BFILE RELATED /*                                           \
                          * BeOS, expert two parts: the file and it's \
                          * attributes. The attributes part comes as  \
                          *	Content-Type: application/x-be_attribute  \
                          *		name="foo"                               \
                          * I can't find where it is defined, any     \
                          * pointers would be appreciated. For now    \
                          * we treat it as multipart/related          \
                          */
 #define KNOWBOT 14      /* Unknown and undocumented format? */
 
 static const struct tableinit {
     const char *key;
     int value;
e3aaff8e
 } rfc821headers[] = {
288057e9
     /* TODO: make these regular expressions */
     {"Content-Type", CONTENT_TYPE},
     {"Content-Transfer-Encoding", CONTENT_TRANSFER_ENCODING},
     {"Content-Disposition", CONTENT_DISPOSITION},
     {NULL, 0}},
   mimeSubtypes[] = {/* see RFC2045 */
                     /* subtypes of Text */
                     {"plain", PLAIN},
                     {"enriched", ENRICHED},
                     {"html", HTML},
                     {"richtext", RICHTEXT},
                     /* subtypes of Multipart */
                     {"mixed", MIXED},
                     {"alternative", ALTERNATIVE},
                     {"digest", DIGEST},
                     {"signed", SIGNED},
                     {"parallel", PARALLEL},
                     {"related", RELATED},
                     {"report", REPORT},
                     {"appledouble", APPLEDOUBLE},
                     {"fax-message", FAX},
                     {"encrypted", ENCRYPTED},
                     {"x-bfile", X_BFILE},          /* BeOS */
                     {"knowbot", KNOWBOT},          /* ??? */
                     {"knowbot-metadata", KNOWBOT}, /* ??? */
                     {"knowbot-code", KNOWBOT},     /* ??? */
                     {"knowbot-state", KNOWBOT},    /* ??? */
                     {NULL, 0}},
   mimeTypeStr[] = {{"NOMIME", NOMIME}, {"APPLICATION", APPLICATION}, {"AUDIO", AUDIO}, {"IMAGE", IMAGE}, {"MESSAGE", MESSAGE}, {"MULTIPART", MULTIPART}, {"TEXT", TEXT}, {"VIDEO", VIDEO}, {"MEXTENSION", MEXTENSION}, {NULL, 0}}, encTypeStr[] = {{"NOENCODING", NOENCODING}, {"QUOTEDPRINTABLE", QUOTEDPRINTABLE}, {"BASE64", BASE64}, {"EIGHTBIT", EIGHTBIT}, {"BINARY", BINARY}, {"UUENCODE", UUENCODE}, {"YENCODE", YENCODE}, {"EEXTENSION", EEXTENSION}, {"BINHEX", BINHEX}, {NULL, 0}};
 
 #ifdef CL_THREAD_SAFE
 static pthread_mutex_t tables_mutex = PTHREAD_MUTEX_INITIALIZER;
e2875303
 #endif
288057e9
 static table_t *rfc821  = NULL;
 static table_t *subtype = NULL;
e3aaff8e
 
288057e9
 int cli_mbox(const char *dir, cli_ctx *ctx)
f24bf390
 {
288057e9
     if (dir == NULL) {
         cli_dbgmsg("cli_mbox called with NULL dir\n");
         return CL_ENULLARG;
     }
     return cli_parse_mbox(dir, ctx);
f24bf390
 }
 
e3aaff8e
 /*
  * TODO: when signal handling is added, need to remove temp files when a
ef822cfc
  *	signal is received
e3aaff8e
  * TODO: add option to scan in memory not via temp files, perhaps with a
1bfbedd4
  * named pipe or memory mapped file, though this won't work on big e-mails
  * containing many levels of encapsulated messages - it'd just take too much
  * RAM
049a18b9
  * TODO: parse .msg format files
c9b8f252
  * TODO: fully handle AppleDouble format, see
ef822cfc
  *	http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
89670d69
  * TODO: ensure parseEmailHeaders is always called before parseEmailBody
  * TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody
7cd9337a
  * TODO: Handle unexpected NUL bytes in header lines which stop strcmp()s:
9f2024cc
  *	e.g. \0Content-Type: application/binary;
e3aaff8e
  */
f24bf390
 static int
2df29bde
 cli_parse_mbox(const char *dir, cli_ctx *ctx)
e3aaff8e
 {
288057e9
     int retcode;
     message *body;
     char buffer[RFC2821LENGTH + 1];
     mbox_ctx mctx;
     size_t at   = 0;
     fmap_t *map = *ctx->fmap;
 
     cli_dbgmsg("in mbox()\n");
 
     if (!fmap_gets(map, buffer, &at, sizeof(buffer) - 1)) {
         /* empty message */
         return CL_CLEAN;
     }
 #ifdef CL_THREAD_SAFE
     pthread_mutex_lock(&tables_mutex);
e2875303
 #endif
288057e9
     if (rfc821 == NULL) {
         assert(subtype == NULL);
 
         if (initialiseTables(&rfc821, &subtype) < 0) {
             rfc821  = NULL;
             subtype = NULL;
 #ifdef CL_THREAD_SAFE
             pthread_mutex_unlock(&tables_mutex);
e2875303
 #endif
288057e9
             return CL_EMEM;
         }
     }
 #ifdef CL_THREAD_SAFE
     pthread_mutex_unlock(&tables_mutex);
e2875303
 #endif
e3aaff8e
 
288057e9
     retcode = CL_SUCCESS;
     body    = NULL;
e791b5ac
 
288057e9
     mctx.dir          = dir;
     mctx.rfc821Table  = rfc821;
     mctx.subtypeTable = subtype;
     mctx.ctx          = ctx;
     mctx.files        = 0;
ede9939c
 #if HAVE_JSON
288057e9
     mctx.wrkobj = ctx->wrkproperty;
ede9939c
 #endif
c1fce7f7
 
288057e9
     /*
45dc1456
 	 * Is it a UNIX style mbox with more than one
89670d69
 	 * mail message, or just a single mail message?
45dc1456
 	 *
 	 * TODO: It would be better if we called cli_scandir here rather than
 	 * in cli_scanmail. Then we could improve the way mailboxes with more
001ad879
 	 * than one message is handled, e.g. giving a better indication of
 	 * which message within the mailbox is infected
89670d69
 	 */
288057e9
     /*if((strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {*/
     if (strncmp(buffer, "From ", 5) == 0) {
         /*
049a18b9
 		 * Have been asked to check a UNIX style mbox file, which
 		 * may contain more than one e-mail message to decode
f003b79e
 		 *
 		 * It would be far better for scanners.c to do this splitting
 		 * and do this
 		 *	FOR EACH mail in the mailbox
 		 *	DO
 		 *		pass this mail to cli_mbox --
 		 *		scan this file
 		 *		IF this file has a virus quit
 		 *		THEN
 		 *			return CL_VIRUS
 		 *		FI
 		 *	END
 		 * This would remove a problem with this code that it can
 		 * fill up the tmp directory before it starts scanning
e3aaff8e
 		 */
288057e9
         bool lastLineWasEmpty;
         int messagenumber;
         message *m = messageCreate();
 
         if (m == NULL)
             return CL_EMEM;
 
         lastLineWasEmpty = FALSE;
         messagenumber    = 1;
         messageSetCTX(m, ctx);
 
         do {
             cli_chomp(buffer);
             /*if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {*/
             if (lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) {
                 cli_dbgmsg("Deal with message number %d\n", messagenumber++);
                 /*
89670d69
 				 * End of a message in the mail box
e3aaff8e
 				 */
288057e9
                 body = parseEmailHeaders(m, rfc821);
                 if (body == NULL) {
                     messageReset(m);
                     continue;
                 }
                 messageSetCTX(body, ctx);
                 messageDestroy(m);
                 if (messageGetBody(body)) {
                     mbox_status rc = parseEmailBody(body, NULL, &mctx, 0);
                     if (rc == FAIL) {
                         messageReset(body);
                         m = body;
                         continue;
                     } else if (rc == VIRUS) {
                         cli_dbgmsg("Message number %d is infected\n",
                                    messagenumber - 1);
                         retcode = CL_VIRUS;
                         m       = NULL;
                         break;
                     }
                 }
                 /*
89670d69
 				 * Starting a new message, throw away all the
f24bf390
 				 * information about the old one. It would
 				 * be best to be able to scan this message
 				 * now, but cli_scanfile needs arguments
 				 * that haven't been passed here so it can't be
 				 * called
e3aaff8e
 				 */
288057e9
                 m = body;
                 messageReset(body);
                 messageSetCTX(body, ctx);
e3aaff8e
 
288057e9
                 cli_dbgmsg("Finished processing message\n");
             } else
                 lastLineWasEmpty = (bool)(buffer[0] == '\0');
4945127a
 
288057e9
             if (isuuencodebegin(buffer)) {
                 /*
4945127a
 				 * Fast track visa to uudecode.
 				 * TODO: binhex, yenc
 				 */
288057e9
                 if (uudecodeFile(m, buffer, dir, map, &at) < 0)
                     if (messageAddStr(m, buffer) < 0)
                         break;
             } else
                 /* at this point, the \n has been removed */
                 if (messageAddStr(m, buffer) < 0)
                 break;
         } while (fmap_gets(map, buffer, &at, sizeof(buffer) - 1));
 
         if (retcode == CL_SUCCESS) {
             cli_dbgmsg("Extract attachments from email %d\n", messagenumber);
             body = parseEmailHeaders(m, rfc821);
         }
         if (m)
             messageDestroy(m);
     } else {
         /*
7e577f26
 		 * It's a single message, parse the headers then the body
4f1d0bfc
 		 */
288057e9
         if (strncmp(buffer, "P I ", 4) == 0)
             /*
69543a9d
 			 * CommuniGate Pro format: ignore headers until
 			 * blank line
 			 */
288057e9
             while (fmap_gets(map, buffer, &at, sizeof(buffer) - 1) &&
                    (strchr("\r\n", buffer[0]) == NULL))
                 ;
         /* getline_from_mbox could be using unlocked_stdio(3),
f87a92ca
 		 * so lock file here */
288057e9
         /*
69543a9d
 		 * Ignore any blank lines at the top of the message
 		 */
288057e9
         while (strchr("\r\n", buffer[0]) &&
                (getline_from_mbox(buffer, sizeof(buffer) - 1, map, &at) != NULL))
             ;
87c9313e
 
288057e9
         buffer[sizeof(buffer) - 1] = '\0';
4b187745
 
288057e9
         body = parseEmailFile(map, &at, rfc821, buffer, dir);
     }
7e577f26
 
288057e9
     if (body) {
         /*
4f1d0bfc
 		 * Write out the last entry in the mailbox
 		 */
288057e9
         if ((retcode == CL_SUCCESS) && messageGetBody(body)) {
             messageSetCTX(body, ctx);
             switch (parseEmailBody(body, NULL, &mctx, 0)) {
                 case OK:
                 case OK_ATTACHMENTS_NOT_SAVED:
                     break;
                 case FAIL:
                     /*
69c62847
 					 * beware: cli_magic_scandesc(),
 					 * changes this into CL_CLEAN, so only
 					 * use it to inform the higher levels
 					 * that we couldn't decode it because
 					 * it isn't an mbox, not to signal
 					 * decoding errors on what *is* a valid
 					 * mbox
 					 */
288057e9
                     retcode = CL_EFORMAT;
                     break;
                 case MAXREC:
                     retcode = CL_EMAXREC;
                     break;
                 case MAXFILES:
                     retcode = CL_EMAXFILES;
                     break;
                 case VIRUS:
                     retcode = CL_VIRUS;
                     break;
             }
         }
 
         if (body->isTruncated && retcode == CL_SUCCESS)
             retcode = CL_EMEM;
         /*
4f1d0bfc
 		 * Tidy up and quit
 		 */
288057e9
         messageDestroy(body);
     }
 
     if ((retcode == CL_CLEAN) && ctx->found_possibly_unwanted &&
         (*ctx->virname == NULL || SCAN_ALLMATCHES)) {
         retcode                      = cli_append_virus(ctx, "Heuristics.Phishing.Email");
         ctx->found_possibly_unwanted = 0;
     }
 
     cli_dbgmsg("cli_mbox returning %d\n", retcode);
 
     return retcode;
e3aaff8e
 }
 
 /*
ddea752e
  * Read in an email message from fin, parse it, and return the message
7e577f26
  *
ddea752e
  * FIXME: files full of new lines and nothing else are
  * handled ungracefully...
  */
 static message *
49cc1e3c
 parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *firstLine, const char *dir)
ddea752e
 {
288057e9
     bool inHeader     = TRUE;
     bool bodyIsEmpty  = TRUE;
     bool lastWasBlank = FALSE, lastBodyLineWasBlank = FALSE;
     message *ret;
     bool anyHeadersFound = FALSE;
     int commandNumber    = -1;
     char *fullline = NULL, *boundary = NULL;
     size_t fulllinelength = 0;
     char buffer[RFC2821LENGTH + 1];
 
     cli_dbgmsg("parseEmailFile\n");
 
     ret = messageCreate();
     if (ret == NULL)
         return NULL;
 
     strncpy(buffer, firstLine, sizeof(buffer) - 1);
     do {
         const char *line;
 
         (void)cli_chomp(buffer);
 
         if (buffer[0] == '\0')
             line = NULL;
         else
             line = buffer;
 
         /*
ddea752e
 		 * Don't blank lines which are only spaces from headers,
 		 * otherwise they'll be treated as the end of header marker
 		 */
288057e9
         if (lastWasBlank) {
             lastWasBlank = FALSE;
             if (boundaryStart(buffer, boundary)) {
                 cli_dbgmsg("Found a header line with space that should be blank\n");
                 inHeader = FALSE;
             }
         }
         if (inHeader) {
             cli_dbgmsg("parseEmailFile: check '%s' fullline %p\n",
                        buffer, fullline);
             /*
2a0041b8
 			 * Ensure wide characters are handled where
 			 * sizeof(char) > 1
 			 */
288057e9
             if (line && isspace(line[0] & 0xFF)) {
                 char copy[sizeof(buffer)];
41b7a56b
 
288057e9
                 strcpy(copy, buffer);
                 strstrip(copy);
                 if (copy[0] == '\0') {
                     /*
4d4166a9
 					 * The header line contains only white
 					 * space. This is not the end of the
 					 * headers according to RFC2822, but
 					 * some MUAs will handle it as though
 					 * it were, and virus writers exploit
 					 * this bug. We can't just break from
 					 * the loop here since that would allow
 					 * other exploits such as inserting a
 					 * white space line before the
 					 * content-type line. So we just have
 					 * to make a best guess. Sigh.
41b7a56b
 					 */
288057e9
                     if (fullline) {
                         if (parseEmailHeader(ret, fullline, rfc821) < 0)
                             continue;
 
                         free(fullline);
                         fullline = NULL;
                     }
                     if (boundary ||
                         ((boundary = (char *)messageFindArgument(ret, "boundary")) != NULL)) {
                         lastWasBlank = TRUE;
                         continue;
                     }
                 }
             }
             if ((line == NULL) && (fullline == NULL)) { /* empty line */
                 /*
0ed29506
 				 * A blank line signifies the end of
 				 * the header and the start of the text
 				 */
288057e9
                 if (!anyHeadersFound)
                     /* Ignore the junk at the top */
                     continue;
5860ae08
 
288057e9
                 cli_dbgmsg("End of header information\n");
                 inHeader    = FALSE;
                 bodyIsEmpty = TRUE;
             } else {
                 char *ptr;
                 const char *lookahead;
ddea752e
 
288057e9
                 if (fullline == NULL) {
                     char cmd[RFC2821LENGTH + 1], out[RFC2821LENGTH + 1];
ddea752e
 
288057e9
                     /*
ddea752e
 					 * Continuation of line we're ignoring?
 					 */
288057e9
                     if (isblank(line[0]))
                         continue;
e107e8cf
 
288057e9
                     /*
ddea752e
 					 * Is this a header we're interested in?
 					 */
288057e9
                     if ((strchr(line, ':') == NULL) ||
                         (cli_strtokbuf(line, 0, ":", cmd) == NULL)) {
                         if (strncmp(line, "From ", 5) == 0)
                             anyHeadersFound = TRUE;
                         continue;
                     }
 
                     ptr           = rfc822comments(cmd, out);
                     commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
 
                     switch (commandNumber) {
                         case CONTENT_TRANSFER_ENCODING:
                         case CONTENT_DISPOSITION:
                         case CONTENT_TYPE:
                             anyHeadersFound = TRUE;
                             break;
                         default:
                             if (!anyHeadersFound)
                                 anyHeadersFound = usefulHeader(commandNumber, cmd);
                             continue;
                     }
                     fullline       = cli_strdup(line);
                     fulllinelength = strlen(line) + 1;
                     if (!fullline) {
                         if (ret)
                             ret->isTruncated = TRUE;
                         break;
                     }
                 } else if (line != NULL) {
                     fulllinelength += strlen(line) + 1;
                     ptr = cli_realloc(fullline, fulllinelength);
                     if (ptr == NULL)
                         continue;
                     fullline = ptr;
                     cli_strlcat(fullline, line, fulllinelength);
                 }
 
                 assert(fullline != NULL);
 
                 if ((lookahead = fmap_need_off_once(map, *at, 1))) {
                     /*
ddea752e
 					 * Section B.2 of RFC822 says TAB or
 					 * SPACE means a continuation of the
 					 * previous entry.
 					 *
 					 * Add all the arguments on the line
 					 */
288057e9
                     if (isblank(*lookahead))
                         continue;
                 }
ddea752e
 
288057e9
                 /*
11f253d6
 				 * Handle broken headers, where the next
 				 * line isn't indented by whitespace
 				 */
288057e9
                 if (fullline[strlen(fullline) - 1] == ';')
                     /* Add arguments to this line */
                     continue;
 
                 if (line && (count_quotes(fullline) & 1))
                     continue;
 
                 ptr = rfc822comments(fullline, NULL);
                 if (ptr) {
                     free(fullline);
                     fullline = ptr;
                 }
 
                 if (parseEmailHeader(ret, fullline, rfc821) < 0)
                     continue;
 
                 free(fullline);
                 fullline = NULL;
             }
         } else if (line && isuuencodebegin(line)) {
             /*
ae5c693a
 			 * Fast track visa to uudecode.
 			 * TODO: binhex, yenc
 			 */
288057e9
             bodyIsEmpty = FALSE;
             if (uudecodeFile(ret, line, dir, map, at) < 0)
                 if (messageAddStr(ret, line) < 0)
                     break;
         } else {
             if (line == NULL) {
                 /*
c52d991e
 				 * Although this would save time and RAM, some
 				 * phish signatures have been built which need
 				 * the blank lines
 				 */
288057e9
                 if (lastBodyLineWasBlank &&
                     (messageGetMimeType(ret) != TEXT)) {
                     cli_dbgmsg("Ignoring consecutive blank lines in the body\n");
                     continue;
                 }
                 lastBodyLineWasBlank = TRUE;
             } else {
                 if (bodyIsEmpty) {
                     /*
6e3d492a
 					 * Broken message: new line in the
 					 * middle of the headers, so the first
 					 * line of the body is in fact
 					 * the last lines of the header
 					 */
288057e9
                     if (newline_in_header(line))
                         continue;
                     bodyIsEmpty = FALSE;
                 }
                 lastBodyLineWasBlank = FALSE;
             }
 
             if (messageAddStr(ret, line) < 0)
                 break;
         }
     } while (getline_from_mbox(buffer, sizeof(buffer) - 1, map, at) != NULL);
 
     if (boundary)
         free(boundary);
 
     if (fullline) {
         if (*fullline) switch (commandNumber) {
                 case CONTENT_TRANSFER_ENCODING:
                 case CONTENT_DISPOSITION:
                 case CONTENT_TYPE:
                     cli_dbgmsg("parseEmailFile: Fullline unparsed '%s'\n", fullline);
             }
         free(fullline);
     }
 
     if (!anyHeadersFound) {
         /*
ddea752e
 		 * False positive in believing we have an e-mail when we don't
 		 */
288057e9
         messageDestroy(ret);
         cli_dbgmsg("parseEmailFile: no headers found, assuming it isn't an email\n");
         return NULL;
     }
ddea752e
 
288057e9
     cli_dbgmsg("parseEmailFile: return\n");
ddea752e
 
288057e9
     return ret;
ddea752e
 }
 
 /*
  * The given message contains a raw e-mail.
e06d34dc
  *
94f051b0
  * Returns the message's body with the correct arguments set, empties the
  * given message's contents (note that it isn't destroyed)
ddea752e
  *
  * TODO: remove the duplication with parseEmailFile
7e577f26
  */
e06d34dc
 static message *
2673dc74
 parseEmailHeaders(message *m, const table_t *rfc821)
7e577f26
 {
288057e9
     bool inHeader    = TRUE;
     bool bodyIsEmpty = TRUE;
     text *t;
     message *ret;
     bool anyHeadersFound  = FALSE;
     int commandNumber     = -1;
     char *fullline        = NULL;
     size_t fulllinelength = 0;
 
     cli_dbgmsg("parseEmailHeaders\n");
 
     if (m == NULL)
         return NULL;
 
     ret = messageCreate();
 
     for (t = messageGetBody(m); t; t = t->t_next) {
         const char *line;
 
         if (t->t_line)
             line = lineGetData(t->t_line);
         else
             line = NULL;
 
         if (inHeader) {
             cli_dbgmsg("parseEmailHeaders: check '%s'\n",
                        line ? line : "");
             if (line == NULL) {
                 /*
4e75d9b6
 				 * A blank line signifies the end of
 				 * the header and the start of the text
 				 */
288057e9
                 cli_dbgmsg("End of header information\n");
                 if (!anyHeadersFound) {
                     cli_dbgmsg("Nothing interesting in the header\n");
                     break;
                 }
                 inHeader    = FALSE;
                 bodyIsEmpty = TRUE;
             } else {
                 char *ptr;
 
                 if (fullline == NULL) {
                     char cmd[RFC2821LENGTH + 1];
 
                     /*
45aba293
 					 * Continuation of line we're ignoring?
 					 */
288057e9
                     if (isblank(line[0]))
                         continue;
45aba293
 
288057e9
                     /*
45aba293
 					 * Is this a header we're interested in?
 					 */
288057e9
                     if ((strchr(line, ':') == NULL) ||
                         (cli_strtokbuf(line, 0, ":", cmd) == NULL)) {
                         if (strncmp(line, "From ", 5) == 0)
                             anyHeadersFound = TRUE;
                         continue;
                     }
 
                     ptr           = rfc822comments(cmd, NULL);
                     commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
                     if (ptr)
                         free(ptr);
 
                     switch (commandNumber) {
                         case CONTENT_TRANSFER_ENCODING:
                         case CONTENT_DISPOSITION:
                         case CONTENT_TYPE:
                             anyHeadersFound = TRUE;
                             break;
                         default:
                             if (!anyHeadersFound)
                                 anyHeadersFound = usefulHeader(commandNumber, cmd);
                             continue;
                     }
                     fullline       = cli_strdup(line);
                     fulllinelength = strlen(line) + 1;
                 } else if (line) {
                     fulllinelength += strlen(line) + 1;
                     ptr = cli_realloc(fullline, fulllinelength);
                     if (ptr == NULL)
                         continue;
                     fullline = ptr;
                     cli_strlcat(fullline, line, fulllinelength);
                 }
                 assert(fullline != NULL);
 
                 if (next_is_folded_header(t))
                     /* Add arguments to this line */
                     continue;
 
                 lineUnlink(t->t_line);
                 t->t_line = NULL;
 
                 if (count_quotes(fullline) & 1)
                     continue;
 
                 ptr = rfc822comments(fullline, NULL);
                 if (ptr) {
                     free(fullline);
                     fullline = ptr;
                 }
 
                 if (parseEmailHeader(ret, fullline, rfc821) < 0)
                     continue;
 
                 free(fullline);
                 fullline = NULL;
             }
         } else {
             if (bodyIsEmpty) {
                 if (line == NULL)
                     /* throw away leading blank lines */
                     continue;
                 /*
0d35f10f
 				 * Broken message: new line in the
 				 * middle of the headers, so the first
 				 * line of the body is in fact
 				 * the last lines of the header
 				 */
288057e9
                 if (newline_in_header(line))
                     continue;
                 bodyIsEmpty = FALSE;
             }
             /*if(t->t_line && isuuencodebegin(t->t_line))
0d35f10f
 				puts("FIXME: add fast visa here");*/
288057e9
             cli_dbgmsg("parseEmailHeaders: finished with headers, moving body\n");
             messageMoveText(ret, t, m);
             break;
         }
     }
 
     if (fullline) {
         if (*fullline) switch (commandNumber) {
                 case CONTENT_TRANSFER_ENCODING:
                 case CONTENT_DISPOSITION:
                 case CONTENT_TYPE:
                     cli_dbgmsg("parseEmailHeaders: Fullline unparsed '%s'\n", fullline);
             }
         free(fullline);
     }
 
     if (!anyHeadersFound) {
         /*
4f1d0bfc
 		 * False positive in believing we have an e-mail when we don't
 		 */
288057e9
         messageDestroy(ret);
         cli_dbgmsg("parseEmailHeaders: no headers found, assuming it isn't an email\n");
         return NULL;
     }
4f1d0bfc
 
288057e9
     cli_dbgmsg("parseEmailHeaders: return\n");
e17491b2
 
288057e9
     return ret;
7e577f26
 }
 
 /*
4c60b74f
  * Handle a header line of an email message
  */
 static int
393a6d67
 parseEmailHeader(message *m, const char *line, const table_t *rfc821)
4c60b74f
 {
288057e9
     int ret;
4c60b74f
 #ifdef CL_THREAD_SAFE
288057e9
     char *strptr;
4c60b74f
 #endif
288057e9
     const char *separator;
     char *cmd, *copy, tokenseparator[2];
4c60b74f
 
288057e9
     cli_dbgmsg("parseEmailHeader '%s'\n", line);
20d3dde9
 
288057e9
     /*
7cd9337a
 	 * In RFC822 the separator between the key a value is a colon,
97867f21
 	 * e.g.	Content-Transfer-Encoding: base64
 	 * However some MUA's are lapse about this and virus writers exploit
22cb38ed
 	 * this hole, so we need to check all known possibilities
97867f21
 	 */
288057e9
     for (separator = ":= "; *separator; separator++)
         if (strchr(line, *separator) != NULL)
             break;
97867f21
 
288057e9
     if (*separator == '\0')
         return -1;
1bfbedd4
 
288057e9
     copy = rfc2047(line);
     if (copy == NULL)
         /* an RFC checker would return -1 here */
         copy = cli_strdup(line);
f2f25418
 
288057e9
     tokenseparator[0] = *separator;
     tokenseparator[1] = '\0';
97867f21
 
288057e9
     ret = -1;
001ad879
 
288057e9
 #ifdef CL_THREAD_SAFE
     cmd = strtok_r(copy, tokenseparator, &strptr);
548a5f96
 #else
288057e9
     cmd = strtok(copy, tokenseparator);
548a5f96
 #endif
4c60b74f
 
288057e9
     if (cmd && (strstrip(cmd) > 0)) {
 #ifdef CL_THREAD_SAFE
         char *arg = strtok_r(NULL, "", &strptr);
548a5f96
 #else
288057e9
         char *arg = strtok(NULL, "");
548a5f96
 #endif
4c60b74f
 
288057e9
         if (arg)
             /*
4c60b74f
 			 * Found a header such as
 			 * Content-Type: multipart/mixed;
 			 * set arg to be
 			 * "multipart/mixed" and cmd to
a9f386ed
 			 * be "Content-Type"
4c60b74f
 			 */
288057e9
             ret = parseMimeHeader(m, cmd, rfc821, arg);
     }
     free(copy);
     return ret;
4c60b74f
 }
 
c2df9f79
 #if HAVE_LIBXML2
 static const struct key_entry mhtml_keys[] = {
288057e9
     /* root html tags for microsoft office document */
     {"html", "RootHTML", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB},
 
     {"head", "Head", MSXML_JSON_WRKPTR | MSXML_COMMENT_CB},
     {"meta", "Meta", MSXML_JSON_WRKPTR | MSXML_JSON_MULTI | MSXML_JSON_ATTRIB},
     {"link", "Link", MSXML_JSON_WRKPTR | MSXML_JSON_MULTI | MSXML_JSON_ATTRIB},
     {"script", "Script", MSXML_JSON_WRKPTR | MSXML_JSON_MULTI | MSXML_JSON_VALUE}};
c2df9f79
 static size_t num_mhtml_keys = sizeof(mhtml_keys) / sizeof(struct key_entry);
a6369bc8
 
 static const struct key_entry mhtml_comment_keys[] = {
288057e9
     /* embedded xml tags (comment) for microsoft office document */
     {"o:documentproperties", "DocumentProperties", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB},
     {"o:author", "Author", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
     {"o:lastauthor", "LastAuthor", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
     {"o:revision", "Revision", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
     {"o:totaltime", "TotalTime", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
     {"o:created", "Created", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
     {"o:lastsaved", "LastSaved", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
     {"o:pages", "Pages", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
     {"o:words", "Words", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
     {"o:characters", "Characters", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
     {"o:company", "Company", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
     {"o:lines", "Lines", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
     {"o:paragraphs", "Paragraphs", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
     {"o:characterswithspaces", "CharactersWithSpaces", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
     {"o:version", "Version", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
 
     {"o:officedocumentsettings", "DocumentSettings", MSXML_IGNORE_ELEM},
     {"w:worddocument", "WordDocument", MSXML_IGNORE_ELEM},
     {"w:latentstyles", "LatentStyles", MSXML_IGNORE_ELEM}};
a6369bc8
 static size_t num_mhtml_comment_keys = sizeof(mhtml_comment_keys) / sizeof(struct key_entry);
c2df9f79
 #endif
 
 /*
a6369bc8
  * The related multipart root HTML file comment parsing wrapper.
  *
  * Attempts to leverage msxml parser, cannot operate without LIBXML2.
  * This function is only used for Preclassification JSON.
  */
02840644
 static cl_error_t parseMHTMLComment(const char *comment, cli_ctx *ctx, void *wrkjobj, void *cbdata)
a6369bc8
 {
02840644
     cl_error_t ret = CL_SUCCESS;
 
a6369bc8
 #if HAVE_LIBXML2
288057e9
     const char *xmlsrt, *xmlend;
     xmlTextReaderPtr reader;
a6369bc8
 
288057e9
     UNUSEDPARAM(cbdata);
     UNUSEDPARAM(wrkjobj);
a6369bc8
 
288057e9
     xmlend = comment;
     while ((xmlsrt = strstr(xmlend, "<xml>"))) {
         xmlend = strstr(xmlsrt, "</xml>");
         if (xmlend == NULL) {
             cli_dbgmsg("parseMHTMLComment: unbounded xml tag\n");
             break;
         }
a6369bc8
 
288057e9
         reader = xmlReaderForMemory(xmlsrt, xmlend - xmlsrt + 6, "comment.xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS);
         if (!reader) {
             cli_dbgmsg("parseMHTMLComment: cannot initialize xmlReader\n");
a6369bc8
 
 #if HAVE_JSON
288057e9
             if (ctx->wrkproperty != NULL)
                 ret = cli_json_parse_error(ctx->wrkproperty, "MHTML_ERROR_XML_READER_MEM");
a6369bc8
 #endif
288057e9
             return ret; // libxml2 failed!
         }
 
         /* comment callback is not set to prevent recursion */
         /* TODO: should we separate the key dictionaries? */
         /* TODO: should we use the json object pointer? */
         ret = cli_msxml_parse_document(ctx, reader, mhtml_comment_keys, num_mhtml_comment_keys, MSXML_FLAG_JSON, NULL);
 
         xmlTextReaderClose(reader);
         xmlFreeTextReader(reader);
         if (ret != CL_SUCCESS)
             return ret;
     }
a6369bc8
 #else
288057e9
     UNUSEDPARAM(comment);
     UNUSEDPARAM(ctx);
     UNUSEDPARAM(wrkjobj);
     UNUSEDPARAM(cbdata);
a6369bc8
 
288057e9
     cli_dbgmsg("in parseMHTMLComment\n");
     cli_dbgmsg("parseMHTMLComment: parsing html xml-comments requires libxml2!\n");
a6369bc8
 #endif
02840644
     return ret;
a6369bc8
 }
 
 /*
c2df9f79
  * The related multipart root HTML file parsing wrapper.
  *
  * Attempts to leverage msxml parser, cannot operate without LIBXML2.
  * This function is only used for Preclassification JSON.
  */
 static mbox_status
 parseRootMHTML(mbox_ctx *mctx, message *m, text *t)
 {
288057e9
     cli_ctx *ctx = mctx->ctx;
c2df9f79
 #if HAVE_LIBXML2
f5412cc9
 #ifdef LIBXML_HTML_ENABLED
288057e9
     struct msxml_ctx mxctx;
     blob *input = NULL;
     htmlDocPtr htmlDoc;
     xmlTextReaderPtr reader;
     int ret        = CL_SUCCESS;
     mbox_status rc = OK;
c2df9f79
 #if HAVE_JSON
288057e9
     json_object *rhtml;
c2df9f79
 #endif
 
288057e9
     cli_dbgmsg("in parseRootMHTML\n");
c2df9f79
 
288057e9
     if (ctx == NULL)
         return OK;
c2df9f79
 
288057e9
     if (m == NULL && t == NULL)
         return OK;
c2df9f79
 
288057e9
     if (m != NULL)
         input = messageToBlob(m, 0);
     else /* t != NULL */
         input = textToBlob(t, NULL, 0);
4cd97da4
 
288057e9
     if (input == NULL)
         return OK;
c2df9f79
 
288057e9
     htmlDoc = htmlReadMemory((char *)input->data, input->len, "mhtml.html", NULL, CLAMAV_MIN_XMLREADER_FLAGS);
     if (htmlDoc == NULL) {
         cli_dbgmsg("parseRootMHTML: cannot initialize read html document\n");
c2df9f79
 #if HAVE_JSON
288057e9
         if (ctx->wrkproperty != NULL)
             ret = cli_json_parse_error(ctx->wrkproperty, "MHTML_ERROR_HTML_READ");
         if (ret != CL_SUCCESS)
             rc = FAIL;
c2df9f79
 #endif
288057e9
         blobDestroy(input);
         return rc;
     }
c2df9f79
 
 #if HAVE_JSON
288057e9
     if (mctx->wrkobj) {
         rhtml = cli_jsonobj(mctx->wrkobj, "RootHTML");
         if (rhtml != NULL) {
             /* MHTML-specific properties */
             cli_jsonstr(rhtml, "Encoding", (const char *)htmlGetMetaEncoding(htmlDoc));
             cli_jsonint(rhtml, "CompressMode", xmlGetDocCompressMode(htmlDoc));
         }
     }
c2df9f79
 #endif
 
288057e9
     reader = xmlReaderWalker(htmlDoc);
     if (reader == NULL) {
         cli_dbgmsg("parseRootMHTML: cannot initialize xmlTextReader\n");
c2df9f79
 #if HAVE_JSON
288057e9
         if (ctx->wrkproperty != NULL)
             ret = cli_json_parse_error(ctx->wrkproperty, "MHTML_ERROR_XML_READER_IO");
         if (ret != CL_SUCCESS)
             rc = FAIL;
c2df9f79
 #endif
288057e9
         blobDestroy(input);
         return rc;
     }
 
     memset(&mxctx, 0, sizeof(mxctx));
     /* no scanning callback set */
     mxctx.comment_cb = parseMHTMLComment;
     ret              = cli_msxml_parse_document(ctx, reader, mhtml_keys, num_mhtml_keys, MSXML_FLAG_JSON | MSXML_FLAG_WALK, &mxctx);
     switch (ret) {
         case CL_SUCCESS:
         case CL_ETIMEOUT:
         case CL_BREAK:
             rc = OK;
             break;
 
         case CL_EMAXREC:
             rc = MAXREC;
             break;
 
         case CL_EMAXFILES:
             rc = MAXFILES;
             break;
 
         case CL_VIRUS:
             rc = VIRUS;
             break;
 
         default:
             rc = FAIL;
     }
 
     xmlTextReaderClose(reader);
     xmlFreeTextReader(reader);
     xmlFreeDoc(htmlDoc);
     blobDestroy(input);
     return rc;
f5412cc9
 #else  /* LIBXML_HTML_ENABLED */
288057e9
     UNUSEDPARAM(m);
     UNUSEDPARAM(t);
     cli_dbgmsg("in parseRootMHTML\n");
     cli_dbgmsg("parseRootMHTML: parsing html documents disabled in libxml2!\n");
f5412cc9
 #endif /* LIBXML_HTML_ENABLED */
 #else  /* HAVE_LIBXML2 */
288057e9
     UNUSEDPARAM(m);
     UNUSEDPARAM(t);
     cli_dbgmsg("in parseRootMHTML\n");
     cli_dbgmsg("parseRootMHTML: parsing html documents requires libxml2!\n");
c2df9f79
 
288057e9
     return OK;
f5412cc9
 #endif /* HAVE_LIBXML2 */
c2df9f79
 }
 
4c60b74f
 /*
e3aaff8e
  * This is a recursive routine.
  *
7e577f26
  * This function parses the body of mainMessage and saves its attachments in dir
  *
e06d34dc
  * mainMessage is the buffer to be parsed, it contains an e-mail's body, without
f12d2498
  * any headers. First time of calling it'll be
  * the whole message. Later it'll be parts of a multipart message
e3aaff8e
  * textIn is the plain text message being built up so far
  */
ecc3d638
 static mbox_status
242ffd7a
 parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int recursion_level)
e3aaff8e
 {
288057e9
     mbox_status rc;
     text *aText          = textIn;
     message *mainMessage = messageIn;
     fileblob *fb;
     bool infected                  = FALSE;
     const struct cl_engine *engine = mctx->ctx->engine;
     const int doPhishingScan       = engine->dboptions & CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE);
ede9939c
 #if HAVE_JSON
288057e9
     json_object *saveobj = mctx->wrkobj;
ede9939c
 #endif
8386c723
 
288057e9
     cli_dbgmsg("in parseEmailBody, %u files saved so far\n",
                mctx->files);
e3aaff8e
 
288057e9
     /* FIXMELIMITS: this should be better integrated */
     if (engine->maxreclevel)
         /*
724b2bf7
 		 * This is approximate
 		 */
288057e9
         if (recursion_level > engine->maxreclevel) {
a5faab66
 
288057e9
             cli_dbgmsg("parseEmailBody: hit maximum recursion level (%u)\n", recursion_level);
             return MAXREC;
         }
     if (engine->maxfiles && (mctx->files >= engine->maxfiles)) {
         /*
724b2bf7
 		 * FIXME: This is only approx - it may have already
 		 * been exceeded
 		 */
288057e9
         cli_dbgmsg("parseEmailBody: number of files exceeded %u\n", engine->maxfiles);
         return MAXFILES;
     }
 
     rc = OK;
 
     /* Anything left to be parsed? */
     if (mainMessage && (messageGetBody(mainMessage) != NULL)) {
         mime_type mimeType;
         int subtype, inhead, htmltextPart, inMimeHead, i;
         const char *mimeSubtype;
         char *boundary;
         const text *t_line;
         /*bool isAlternative;*/
         message *aMessage;
         int multiparts     = 0;
         message **messages = NULL; /* parts of a multipart message */
 
         cli_dbgmsg("Parsing mail file\n");
 
         mimeType    = messageGetMimeType(mainMessage);
         mimeSubtype = messageGetMimeSubtype(mainMessage);
ede9939c
 #if HAVE_JSON
288057e9
         if (mctx->wrkobj != NULL) {
             mctx->wrkobj = cli_jsonobj(mctx->wrkobj, "Body");
             cli_jsonstr(mctx->wrkobj, "MimeType", getMimeTypeStr(mimeType));
             cli_jsonstr(mctx->wrkobj, "MimeSubtype", mimeSubtype);
             cli_jsonstr(mctx->wrkobj, "EncodingType", getEncTypeStr(messageGetEncoding(mainMessage)));
             cli_jsonstr(mctx->wrkobj, "Disposition", messageGetDispositionType(mainMessage));
             cli_jsonstr(mctx->wrkobj, "Filename", messageHasFilename(mainMessage) ? messageGetFilename(mainMessage) : "(inline)");
         }
ede9939c
 #endif
e3aaff8e
 
288057e9
         /* pre-process */
         subtype = tableFind(mctx->subtypeTable, mimeSubtype);
         if ((mimeType == TEXT) && (subtype == PLAIN)) {
             /*
e3aaff8e
 			 * This is effectively no encoding, notice that we
 			 * don't check that charset is us-ascii
 			 */
288057e9
             cli_dbgmsg("text/plain: Assume no attachments\n");
             mimeType = NOMIME;
             messageSetMimeSubtype(mainMessage, "");
         } else if ((mimeType == MESSAGE) &&
                    (strcasecmp(mimeSubtype, "rfc822-headers") == 0)) {
             /*
528c8a2d
 			 * RFC1892/RFC3462: section 2 text/rfc822-headers
 			 * incorrectly sent as message/rfc822-headers
46d375fe
 			 *
 			 * Parse as text/plain, i.e. no mime
528c8a2d
 			 */
288057e9
             cli_dbgmsg("Changing message/rfc822-headers to text/rfc822-headers\n");
             mimeType = NOMIME;
             messageSetMimeSubtype(mainMessage, "");
         } else
             cli_dbgmsg("mimeType = %d\n", (int)mimeType);
 
         switch (mimeType) {
             case NOMIME:
                 cli_dbgmsg("Not a mime encoded message\n");
                 aText = textAddMessage(aText, mainMessage);
 
                 if (!doPhishingScan)
                     break;
                 /*
94aea271
 			 * Fall through: some phishing mails claim they are
 			 * text/plain, when they are in fact html
 			 */
288057e9
             case TEXT:
                 /* text/plain has been preprocessed as no encoding */
                 if (doPhishingScan) {
                     /*
47d9cc65
 				 * It would be better to save and scan the
 				 * file and only checkURLs if it's found to be
 				 * clean
 				 */
288057e9
                     checkURLs(mainMessage, mctx, &rc, (subtype == HTML));
                     /*
8b899010
 				 * There might be html sent without subtype
 				 * html too, so scan them for phishing
 				 */
288057e9
                     if (rc == VIRUS)
                         infected = TRUE;
                 }
                 break;
             case MULTIPART:
                 cli_dbgmsg("Content-type 'multipart' handler\n");
                 boundary = messageFindArgument(mainMessage, "boundary");
e3aaff8e
 
ede9939c
 #if HAVE_JSON
288057e9
                 if (mctx->wrkobj != NULL)
                     cli_jsonstr(mctx->wrkobj, "Boundary", boundary);
ede9939c
 #endif
 
288057e9
                 if (boundary == NULL) {
                     cli_dbgmsg("Multipart/%s MIME message contains no boundary header\n",
                                mimeSubtype);
                     /* Broken e-mail message */
                     mimeType = NOMIME;
                     /*
e2e7ebf5
 				 * The break means that we will still
 				 * check if the file contains a uuencoded file
 				 */
288057e9
                     break;
                 }
e3aaff8e
 
288057e9
                 cli_chomp(boundary);
23889d27
 
288057e9
                 /* Perhaps it should assume mixed? */
                 if (mimeSubtype[0] == '\0') {
                     cli_dbgmsg("Multipart has no subtype assuming alternative\n");
                     mimeSubtype = "alternative";
                     messageSetMimeSubtype(mainMessage, "alternative");
                 }
cb5a87e0
 
288057e9
                 /*
e3aaff8e
 			 * Get to the start of the first message
 			 */
288057e9
                 t_line = messageGetBody(mainMessage);
 
                 if (t_line == NULL) {
                     cli_dbgmsg("Multipart MIME message has no body\n");
                     free((char *)boundary);
                     mimeType = NOMIME;
                     break;
                 }
 
                 do
                     if (t_line->t_line) {
                         if (boundaryStart(lineGetData(t_line->t_line), boundary))
                             break;
                         /*
182bbcc8
 					 * Found a binhex file before
ebe57840
 					 *	the first multipart
b116962d
 					 * TODO: check yEnc
bae9c53f
 					 */
288057e9
                         if (binhexBegin(mainMessage) == t_line) {
                             if (exportBinhexMessage(mctx, mainMessage)) {
                                 /* virus found */
                                 rc       = VIRUS;
                                 infected = TRUE;
                                 break;
                             }
                         } else if (t_line->t_next &&
                                    (encodingLine(mainMessage) == t_line->t_next)) {
                             /*
99c2299d
 						 * We look for the next line
 						 * since later on we'll skip
 						 * over the important line when
 						 * we think it's a blank line
 						 * at the top of the message -
 						 * which it would have been in
 						 * an RFC compliant world
 						 */
288057e9
                             cli_dbgmsg("Found MIME attachment before the first MIME section \"%s\"\n",
                                        lineGetData(t_line->t_next->t_line));
                             if (messageGetEncoding(mainMessage) == NOENCODING)
                                 break;
                         }
                     }
                 while ((t_line = t_line->t_next) != NULL);
 
                 if (t_line == NULL) {
                     cli_dbgmsg("Multipart MIME message contains no boundary lines (%s)\n",
                                boundary);
                     free((char *)boundary);
                     mimeType = NOMIME;
                     /*
e2e7ebf5
 				 * The break means that we will still
182bbcc8
 				 * check if the file contains a yEnc/binhex file
e2e7ebf5
 				 */
288057e9
                     break;
                 }
                 /*
e3aaff8e
 			 * Build up a table of all of the parts of this
 			 * multipart message. Remember, each part may itself
 			 * be a multipart message.
 			 */
288057e9
                 inhead     = 1;
                 inMimeHead = 0;
e3aaff8e
 
288057e9
                 /*
9bccc1e5
 			 * Re-read this variable in case mimeSubtype has changed
 			 */
288057e9
                 subtype = tableFind(mctx->subtypeTable, mimeSubtype);
9bccc1e5
 
288057e9
                 /*
9a729c80
 			 * Parse the mainMessage object and create an array
 			 * of objects called messages, one for each of the
9bccc1e5
 			 * multiparts that mainMessage contains.
3f46285b
 			 *
e06d34dc
 			 * This looks like parseEmailHeaders() - maybe there's
 			 * some duplication of code to be cleaned up
9bccc1e5
 			 *
842c7d49
 			 * We may need to create an array rather than just
9bccc1e5
 			 * save each part as it is found because not all
 			 * elements will need scanning, and we don't yet know
 			 * which of those elements it will be, except in
 			 * the case of mixed, when all parts need to be scanned.
e06d34dc
 			 */
288057e9
                 for (multiparts = 0; t_line && !infected; multiparts++) {
                     int lines = 0;
                     message **m;
                     mbox_status old_rc;
 
                     m = cli_realloc(messages, ((multiparts + 1) * sizeof(message *)));
                     if (m == NULL)
                         break;
                     messages = m;
 
                     aMessage = messages[multiparts] = messageCreate();
                     if (aMessage == NULL) {
                         multiparts--;
                         /* if allocation failed the first time,
f7f59c08
 					 * there's no point in retrying, just
 					 * break out */
288057e9
                         break;
                     }
                     messageSetCTX(aMessage, mctx->ctx);
e3aaff8e
 
288057e9
                     cli_dbgmsg("Now read in part %d\n", multiparts);
e3aaff8e
 
288057e9
                     /*
8ba634a9
 				 * Ignore blank lines. There shouldn't be ANY
 				 * but some viruses insert them
 				 */
288057e9
                     while ((t_line = t_line->t_next) != NULL)
                         if (t_line->t_line &&
                             /*(cli_chomp(t_line->t_text) > 0))*/
                             (strlen(lineGetData(t_line->t_line)) > 0))
                             break;
 
                     if (t_line == NULL) {
                         cli_dbgmsg("Empty part\n");
                         /*
b9ce9639
 					 * Remove this part unless there's
182bbcc8
 					 * a binhex portion somewhere in
b9ce9639
 					 * the complete message that we may
 					 * throw away by mistake if the MIME
 					 * encoding information is incorrect
 					 */
288057e9
                         if (mainMessage &&
                             (binhexBegin(mainMessage) == NULL)) {
                             messageDestroy(aMessage);
                             --multiparts;
                         }
                         continue;
                     }
 
                     do {
                         const char *line = lineGetData(t_line->t_line);
 
                         /*cli_dbgmsg("multipart %d: inMimeHead %d inhead %d boundary '%s' line '%s' next '%s'\n",
fa5661be
 						multiparts, inMimeHead, inhead, boundary, line,
391f7bb3
 						t_line->t_next && t_line->t_next->t_line ? lineGetData(t_line->t_next->t_line) : "(null)");*/
e3aaff8e
 
288057e9
                         if (inMimeHead) { /* continuation line */
                             if (line == NULL) {
                                 /*inhead =*/inMimeHead = 0;
                                 continue;
                             }
                             /*
3a978f7d
 						 * Handle continuation lines
 						 * because the previous line
1eec55a6
 						 * ended with a ; or this line
 						 * starts with a white space
3a978f7d
 						 */
288057e9
                             cli_dbgmsg("Multipart %d: About to add mime Argument '%s'\n",
                                        multiparts, line);
                             /*
3a978f7d
 						 * Handle the case when it
 						 * isn't really a continuation
 						 * line:
 						 * Content-Type: application/octet-stream;
 						 * Content-Transfer-Encoding: base64
 						 */
288057e9
                             parseEmailHeader(aMessage, line, mctx->rfc821Table);
 
                             while (isspace((int)*line))
                                 line++;
 
                             if (*line == '\0') {
                                 inhead = inMimeHead = 0;
                                 continue;
                             }
                             inMimeHead = FALSE;
                             messageAddArgument(aMessage, line);
                         } else if (inhead) { /* handling normal headers */
                             /*int quotes;*/
                             char *fullline, *ptr;
 
                             if (line == NULL) {
                                 /*
7e67e382
 							 * empty line, should the end of the headers,
 							 * but some base64 decoders, e.g. uudeview, are broken
 							 * and will handle this type of entry, decoding the
 							 * base64 content...
 							 * Content-Type: application/octet-stream; name=text.zip
 							 * Content-Transfer-Encoding: base64
 							 * Content-Disposition: attachment; filename="text.zip"
5198de85
 							 *
7e67e382
 							 * Content-Disposition: attachment;
 							 *	filename=text.zip
 							 * Content-Type: application/octet-stream;
 							 *	name=text.zip
 							 * Content-Transfer-Encoding: base64
5198de85
 							 *
7e67e382
 							 * UEsDBAoAAAAAAACgPjJ2RHw676gAAO+oAABEAAAAbWFpbF90ZXh0LWluZm8udHh0ICAgICAgICAg
 							 */
288057e9
                                 const text *next = t_line->t_next;
842c7d49
 
288057e9
                                 if (next && next->t_line) {
                                     const char *data = lineGetData(next->t_line);
5b76248c
 
288057e9
                                     if ((messageGetEncoding(aMessage) == NOENCODING) &&
                                         (messageGetMimeType(aMessage) == APPLICATION) &&
                                         data && strstr(data, "base64")) {
                                         /*
d72749e0
 									 * Handle this nightmare (note the blank
 									 * line in the header and the incorrect
 									 * content-transfer-encoding header)
 									 *
 									 * Content-Type: application/octet-stream; name="zipped_files.EXEX-Spanska: Yes
 									 *
 									 * r-Encoding: base64
 									 * Content-Disposition: attachment; filename="zipped_files.EXE"
 									 */
288057e9
                                         messageSetEncoding(aMessage, "base64");
                                         cli_dbgmsg("Ignoring fake end of headers\n");
                                         continue;
                                     }
                                     if ((strncmp(data, "Content", 7) == 0) ||
                                         (strncmp(data, "filename=", 9) == 0)) {
                                         cli_dbgmsg("Ignoring fake end of headers\n");
                                         continue;
                                     }
                                 }
                                 cli_dbgmsg("Multipart %d: End of header information\n",
                                            multiparts);
                                 inhead = 0;
                                 continue;
                             }
                             if (isspace((int)*line)) {
                                 /*
c76810dc
 							 * The first line is
 							 * continuation line.
 							 * This is tricky
 							 * to handle, but
 							 * all we can do is our
 							 * best
 							 */
288057e9
                                 cli_dbgmsg("Part %d starts with a continuation line\n",
                                            multiparts);
                                 messageAddArgument(aMessage, line);
                                 /*
c76810dc
 							 * Give it a default
 							 * MIME type since
 							 * that may be the
 							 * missing line
 							 *
 							 * Choose application to
 							 * force a save
 							 */
288057e9
                                 if (messageGetMimeType(aMessage) == NOMIME)
                                     messageSetMimeType(aMessage, "application");
                                 continue;
                             }
c76810dc
 
288057e9
                             inMimeHead = FALSE;
b2223aad
 
288057e9
                             assert(strlen(line) <= RFC2821LENGTH);
f1c1300c
 
288057e9
                             fullline = rfc822comments(line, NULL);
                             if (fullline == NULL)
                                 fullline = cli_strdup(line);
2ad0c86e
 
288057e9
                             /*quotes = count_quotes(fullline);*/
f1c1300c
 
288057e9
                             /*
c8a7cef0
 						 * Fold next lines to the end of this
 						 * if they start with a white space
 						 * or if this line has an odd number of quotes:
 						 * Content-Type: application/octet-stream; name="foo
 						 * "
 						 */
288057e9
                             while (t_line && next_is_folded_header(t_line)) {
                                 const char *data;
                                 size_t datasz;
842c7d49
 
288057e9
                                 t_line = t_line->t_next;
842c7d49
 
288057e9
                                 data = lineGetData(t_line->t_line);
f1c1300c
 
288057e9
                                 if (data[1] == '\0') {
                                     /*
f1d57230
 								 * Broken message: the
 								 * blank line at the end
 								 * of the headers isn't blank -
 								 * it contains a space
 								 */
288057e9
                                     cli_dbgmsg("Multipart %d: headers not terminated by blank line\n",
                                                multiparts);
                                     inhead = FALSE;
                                     break;
                                 }
f1d57230
 
288057e9
                                 datasz = strlen(fullline) + strlen(data) + 1;
                                 ptr    = cli_realloc(fullline, datasz);
303f9be9
 
288057e9
                                 if (ptr == NULL)
                                     break;
1eec55a6
 
288057e9
                                 fullline = ptr;
                                 cli_strlcat(fullline, data, datasz);
2ad0c86e
 
288057e9
                                 /*quotes = count_quotes(data);*/
                             }
842c7d49
 
288057e9
                             cli_dbgmsg("Multipart %d: About to parse folded header '%s'\n",
                                        multiparts, fullline);
c8a7cef0
 
288057e9
                             parseEmailHeader(aMessage, fullline, mctx->rfc821Table);
                             free(fullline);
                         } else if (boundaryEnd(line, boundary)) {
                             /*
e3aaff8e
 						 * Some viruses put information
 						 * *after* the end of message,
 						 * which presumably some broken
 						 * mail clients find, so we
 						 * can't assume that this
 						 * is the end of the message
 						 */
288057e9
                             /* t_line = NULL;*/
                             break;
                         } else if (boundaryStart(line, boundary)) {
                             inhead = 1;
                             break;
                         } else {
                             if (messageAddLine(aMessage, t_line->t_line) < 0)
                                 break;
                             lines++;
                         }
                     } while ((t_line = t_line->t_next) != NULL);
 
                     cli_dbgmsg("Part %d has %d lines, rc = %d\n",
                                multiparts, lines, (int)rc);
 
                     /*
9bccc1e5
 				 * Only save in the array of messages if some
 				 * decision will be taken on whether to scan.
 				 * If all parts will be scanned then save to
 				 * file straight away
 				 */
288057e9
                     switch (subtype) {
                         case MIXED:
                         case ALTERNATIVE:
                         case REPORT:
                         case DIGEST:
                         case APPLEDOUBLE:
                         case KNOWBOT:
                         case -1:
                             old_rc      = rc;
                             mainMessage = do_multipart(mainMessage,
                                                        messages, multiparts,
                                                        &rc, mctx, messageIn,
                                                        &aText, recursion_level);
                             if ((rc == OK_ATTACHMENTS_NOT_SAVED) && (old_rc == OK))
                                 rc = OK;
                             if (messages[multiparts]) {
                                 messageDestroy(messages[multiparts]);
                                 messages[multiparts] = NULL;
                             }
                             --multiparts;
                             if (rc == VIRUS)
                                 infected = TRUE;
                             break;
 
                         case RELATED:
                         case ENCRYPTED:
                         case SIGNED:
                         case PARALLEL:
                             /* all the subtypes that we handle
f7f59c08
 						 * (all from the switch(tableFind...) below)
 						 * must be listed here */
288057e9
                             break;
                         default:
694e7882
                             /* this is a subtype that we
 						 * don't handle anyway,
f7f59c08
 						 * don't store */
288057e9
                             if (messages[multiparts]) {
                                 messageDestroy(messages[multiparts]);
                                 messages[multiparts] = NULL;
                             }
                             --multiparts;
                     }
                 }
e3aaff8e
 
288057e9
                 free((char *)boundary);
e3aaff8e
 
288057e9
                 /*
c79a2273
 			 * Preprocess. Anything special to be done before
 			 * we handle the multiparts?
9a729c80
 			 */
288057e9
                 switch (subtype) {
                     case KNOWBOT:
                         /* TODO */
                         cli_dbgmsg("multipart/knowbot parsed as multipart/mixed for now\n");
                         mimeSubtype = "mixed";
                         break;
                     case -1:
                         /*
5d8100cb
 					 * According to section 7.2.6 of
7cd9337a
 					 * RFC1521, unrecognized multiparts
5d8100cb
 					 * should be treated as multipart/mixed.
 					 */
288057e9
                         cli_dbgmsg("Unsupported multipart format `%s', parsed as mixed\n", mimeSubtype);
                         mimeSubtype = "mixed";
                         break;
                 }
9a729c80
 
288057e9
                 /*
7c1eb3bf
 			 * We've finished message we're parsing
 			 */
288057e9
                 if (mainMessage && (mainMessage != messageIn)) {
                     messageDestroy(mainMessage);
                     mainMessage = NULL;
                 }
 
                 cli_dbgmsg("The message has %d parts\n", multiparts);
 
                 if (infected || ((multiparts == 0) && (aText == NULL))) {
                     if (messages) {
                         for (i = 0; i < multiparts; i++)
                             if (messages[i])
                                 messageDestroy(messages[i]);
                         free(messages);
                     }
                     if (aText && (textIn == NULL))
                         textDestroy(aText);
b912eaf2
 
ede9939c
 #if HAVE_JSON
288057e9
                     mctx->wrkobj = saveobj;
ede9939c
 #endif
288057e9
                     /*
ecc3d638
 				 * Nothing to do
9bccc1e5
 				 */
288057e9
                     switch (rc) {
                         case VIRUS:
                             return VIRUS;
                         case MAXREC:
                             return MAXREC;
                         default:
                             return OK_ATTACHMENTS_NOT_SAVED;
                     }
                 }
7c1eb3bf
 
288057e9
                 cli_dbgmsg("Find out the multipart type (%s)\n", mimeSubtype);
e3aaff8e
 
288057e9
                 /*
9a729c80
 			 * We now have all the parts of the multipart message
 			 * in the messages array:
 			 *	message *messages[multiparts]
 			 * Let's decide what to do with them all
 			 */
288057e9
                 switch (tableFind(mctx->subtypeTable, mimeSubtype)) {
                     case RELATED:
                         cli_dbgmsg("Multipart related handler\n");
                         /*
294d0774
 				 * Have a look to see if there's HTML code
 				 * which will need scanning
e3aaff8e
 				 */
288057e9
                         aMessage = NULL;
                         assert(multiparts > 0);
e3aaff8e
 
288057e9
                         htmltextPart = getTextPart(messages, multiparts);
e3aaff8e
 
288057e9
                         if (htmltextPart >= 0 && messages) {
                             if (messageGetBody(messages[htmltextPart]))
faa0d267
 
288057e9
                                 aText = textAddMessage(aText, messages[htmltextPart]);
                         } else
                             /*
294d0774
 					 * There isn't an HTML bit. If there's a
 					 * multipart bit, it'll may be in there
 					 * somewhere
e3aaff8e
 					 */
288057e9
                             for (i = 0; i < multiparts; i++)
                                 if (messageGetMimeType(messages[i]) == MULTIPART) {
                                     aMessage     = messages[i];
                                     htmltextPart = i;
                                     break;
                                 }
 
                         if (htmltextPart == -1)
                             cli_dbgmsg("No HTML code found to be scanned\n");
                         else {
c2df9f79
 #if HAVE_JSON
288057e9
                             /* Send root HTML file for preclassification */
                             if (mctx->ctx->wrkproperty)
                                 parseRootMHTML(mctx, aMessage, aText);
c2df9f79
 #endif
288057e9
                             rc = parseEmailBody(aMessage, aText, mctx, recursion_level + 1);
                             if ((rc == OK) && aMessage) {
                                 assert(aMessage == messages[htmltextPart]);
                                 messageDestroy(aMessage);
                                 messages[htmltextPart] = NULL;
                             } else if (rc == VIRUS) {
                                 infected = TRUE;
                                 break;
                             }
                         }
 
                         /*
e3aaff8e
 				 * The message is confused about the difference
 				 * between alternative and related. Badtrans.B
 				 * suffers from this problem.
 				 *
 				 * Fall through in this case:
 				 * Content-Type: multipart/related;
 				 *	type="multipart/alternative"
 				 */
288057e9
                     case DIGEST:
                         /*
ba867aed
 				 * According to section 5.1.5 RFC2046, the
 				 * default mime type of multipart/digest parts
 				 * is message/rfc822
 				 *
 				 * We consider them as alternative, wrong in
 				 * the strictest sense since they aren't
 				 * alternatives - all parts a valid - but it's
 				 * OK for our needs since it means each part
 				 * will be scanned
 				 */
288057e9
                     case ALTERNATIVE:
                         cli_dbgmsg("Multipart alternative handler\n");
e3aaff8e
 
288057e9
                         /*
e3aaff8e
 				 * Fall through - some clients are broken and
 				 * say alternative instead of mixed. The Klez
5d8100cb
 				 * virus is broken that way, and anyway we
 				 * wish to scan all of the alternatives
e3aaff8e
 				 */
288057e9
                     case REPORT:
                         /*
e3aaff8e
 				 * According to section 1 of RFC1892, the
 				 * syntax of multipart/report is the same
 				 * as multipart/mixed. There are some required
 				 * parameters, but there's no need for us to
 				 * verify that they exist
 				 */
288057e9
                     case ENCRYPTED:
                         /* MUAs without encryption plugins can display as multipart/mixed,
6351aa86
 				 * just scan it*/
288057e9
                     case MIXED:
                     case APPLEDOUBLE: /* not really supported */
                         /*
e3aaff8e
 				 * Look for attachments
 				 *
 				 * Not all formats are supported. If an
 				 * unsupported format turns out to be
 				 * common enough to implement, it is a simple
 				 * matter to add it
 				 */
288057e9
                         if (aText) {
                             if (mainMessage && (mainMessage != messageIn))
                                 messageDestroy(mainMessage);
                             mainMessage = NULL;
                         }
 
                         cli_dbgmsg("Mixed message with %d parts\n", multiparts);
                         for (i = 0; i < multiparts; i++) {
                             mainMessage = do_multipart(mainMessage,
                                                        messages, i, &rc, mctx,
                                                        messageIn, &aText, recursion_level + 1);
                             if (rc == VIRUS) {
                                 infected = TRUE;
                                 break;
                             }
                             if (rc == MAXREC)
                                 break;
                             if (rc == OK_ATTACHMENTS_NOT_SAVED)
                                 rc = OK;
                         }
 
                         /* rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1); */
                         break;
                     case SIGNED:
                     case PARALLEL:
                         /*
e3aaff8e
 				 * If we're here it could be because we have a
 				 * multipart/mixed message, consisting of a
 				 * message followed by an attachment. That
 				 * message itself is a multipart/alternative
 				 * message and we need to dig out the plain
 				 * text part of that alternative
 				 */
288057e9
                         if (messages) {
                             htmltextPart = getTextPart(messages, multiparts);
                             if (htmltextPart == -1)
                                 htmltextPart = 0;
                             rc = parseEmailBody(messages[htmltextPart], aText, mctx, recursion_level + 1);
                         }
                         break;
                     default:
                         assert(0);
                 }
 
                 if (mainMessage && (mainMessage != messageIn))
                     messageDestroy(mainMessage);
 
                 if (aText && (textIn == NULL)) {
                     if ((!infected) && (fb = fileblobCreate()) != NULL) {
                         cli_dbgmsg("Save non mime and/or text/plain part\n");
                         fileblobSetFilename(fb, mctx->dir, "textpart");
                         /*fileblobAddData(fb, "Received: by clamd (textpart)\n", 30);*/
                         fileblobSetCTX(fb, mctx->ctx);
                         (void)textToFileblob(aText, fb, 1);
 
                         fileblobDestroy(fb);
                         mctx->files++;
                     }
                     textDestroy(aText);
                 }
 
                 for (i = 0; i < multiparts; i++)
                     if (messages[i])
                         messageDestroy(messages[i]);
 
                 if (messages)
                     free(messages);
b726511f
 
ede9939c
 #if HAVE_JSON
288057e9
                 mctx->wrkobj = saveobj;
ede9939c
 #endif
288057e9
                 return rc;
e3aaff8e
 
288057e9
             case MESSAGE:
                 /*
e3aaff8e
 			 * Check for forbidden encodings
 			 */
288057e9
                 switch (messageGetEncoding(mainMessage)) {
                     case NOENCODING:
                     case EIGHTBIT:
                     case BINARY:
                         break;
                     default:
                         cli_dbgmsg("MIME type 'message' cannot be decoded\n");
                         break;
                 }
                 rc = FAIL;
                 if ((strcasecmp(mimeSubtype, "rfc822") == 0) ||
                     (strcasecmp(mimeSubtype, "delivery-status") == 0)) {
                     message *m = parseEmailHeaders(mainMessage, mctx->rfc821Table);
                     if (m) {
                         cli_dbgmsg("Decode rfc822\n");
 
                         messageSetCTX(m, mctx->ctx);
 
                         if (mainMessage && (mainMessage != messageIn)) {
                             messageDestroy(mainMessage);
                             mainMessage = NULL;
                         } else
                             messageReset(mainMessage);
                         if (messageGetBody(m))
                             rc = parseEmailBody(m, NULL, mctx, recursion_level + 1);
 
                         messageDestroy(m);
                     }
                     break;
                 } else if (strcasecmp(mimeSubtype, "disposition-notification") == 0) {
                     /* RFC 2298 - handle like a normal email */
                     rc = OK;
                     break;
                 } else if (strcasecmp(mimeSubtype, "partial") == 0) {
                     if (mctx->ctx->options->mail & CL_SCAN_MAIL_PARTIAL_MESSAGE) {
                         /* RFC1341 message split over many emails */
                         if (rfc1341(mainMessage, mctx->dir) >= 0)
                             rc = OK;
                     } else {
                         cli_warnmsg("Partial message received from MUA/MTA - message cannot be scanned\n");
                     }
                 } else if (strcasecmp(mimeSubtype, "external-body") == 0)
                     /* TODO */
                     cli_warnmsg("Attempt to send Content-type message/external-body trapped\n");
                 else
                     cli_warnmsg("Unsupported message format `%s' - if you believe this file contains a virus, submit it to www.clamav.net\n", mimeSubtype);
 
                 if (mainMessage && (mainMessage != messageIn))
                     messageDestroy(mainMessage);
                 if (messages)
                     free(messages);
ede9939c
 #if HAVE_JSON
288057e9
                 mctx->wrkobj = saveobj;
ede9939c
 #endif
288057e9
                 return rc;
e3aaff8e
 
288057e9
             default:
                 cli_dbgmsg("Message received with unknown mime encoding - assume application\n");
                 /*
5b2bcb65
 			 * Some Yahoo emails attach as
 			 * Content-Type: X-unknown/unknown;
 			 * instead of
 			 * Content-Type: application/unknown;
 			 * so let's try our best to salvage something
 			 */
288057e9
             case APPLICATION:
                 /*cptr = messageGetMimeSubtype(mainMessage);
d4d14218
 
4ab382c3
 			if((strcasecmp(cptr, "octet-stream") == 0) ||
aa0210b6
 			   (strcasecmp(cptr, "x-msdownload") == 0)) {*/
288057e9
                 {
                     fb = messageToFileblob(mainMessage, mctx->dir, 1);
 
                     if (fb) {
                         cli_dbgmsg("Saving main message as attachment\n");
                         if (fileblobScanAndDestroy(fb) == CL_VIRUS)
                             rc = VIRUS;
                         mctx->files++;
                         if (mainMessage != messageIn) {
                             messageDestroy(mainMessage);
                             mainMessage = NULL;
                         } else
                             messageReset(mainMessage);
                     }
                 } /*else
aa0210b6
 				cli_warnmsg("Discarded application not sent as attachment\n");*/
288057e9
                 break;
 
             case AUDIO:
             case VIDEO:
             case IMAGE:
                 break;
         }
 
         if (messages) {
             /* "can't happen" */
             cli_warnmsg("messages != NULL\n");
             free(messages);
         }
     }
 
     if (aText && (textIn == NULL)) {
         /* Look for a bounce in the text (non mime encoded) portion */
         const text *t;
         /* isBounceStart() is expensive, reduce the number of calls */
         bool lookahead_definately_is_bounce = FALSE;
 
         for (t = aText; t && (rc != VIRUS); t = t->t_next) {
             const line_t *l = t->t_line;
             const text *lookahead, *topofbounce;
             const char *s;
             bool inheader;
 
             if (l == NULL) {
                 /* assert(lookahead_definately_is_bounce == FALSE) */
                 continue;
             }
 
             if (lookahead_definately_is_bounce)
                 lookahead_definately_is_bounce = FALSE;
             else if (!isBounceStart(mctx, lineGetData(l)))
                 continue;
 
             lookahead = t->t_next;
             if (lookahead) {
                 if (isBounceStart(mctx, lineGetData(lookahead->t_line))) {
                     lookahead_definately_is_bounce = TRUE;
                     /* don't save worthless header lines */
                     continue;
                 }
             } else /* don't save a single liner */
                 break;
 
             /*
6fd711b2
 			 * We've found what looks like the start of a bounce
 			 * message. Only bother saving if it really is a bounce
 			 * message, this helps to speed up scanning of ping-pong
 			 * messages that have lots of bounces within bounces in
 			 * them
 			 */
288057e9
             for (; lookahead; lookahead = lookahead->t_next) {
                 l = lookahead->t_line;
 
                 if (l == NULL)
                     break;
                 s = lineGetData(l);
                 if (strncasecmp(s, "Content-Type:", 13) == 0) {
                     /*
584aadd5
 					 * Don't bother with text/plain or
 					 * text/html
6fd711b2
 					 */
288057e9
                     if (cli_strcasestr(s, "text/plain") != NULL)
                         /*
584aadd5
 						 * Don't bother to save the
 						 * unuseful part, read past
 						 * the headers then we'll go
 						 * on to look for the next
 						 * bounce message
6862efc7
 						 */
288057e9
                         continue;
                     if ((!doPhishingScan) &&
                         (cli_strcasestr(s, "text/html") != NULL))
                         continue;
                     break;
                 }
             }
 
             if (lookahead && (lookahead->t_line == NULL)) {
                 cli_dbgmsg("Non mime part bounce message is not mime encoded, so it will not be scanned\n");
                 t = lookahead;
                 /* look for next bounce message */
                 continue;
             }
 
             /*
6862efc7
 			 * Prescan the bounce message to see if there's likely
 			 * to be anything nasty.
 			 * This algorithm is hand crafted and may be breakable
 			 * so all submissions are welcome. It's best NOT to
 			 * remove this however you may be tempted, because it
 			 * significantly speeds up the scanning of multiple
 			 * bounces (i.e. bounces within many bounces)
 			 */
288057e9
             for (; lookahead; lookahead = lookahead->t_next) {
                 l = lookahead->t_line;
 
                 if (l) {
                     s = lineGetData(l);
                     if ((strncasecmp(s, "Content-Type:", 13) == 0) &&
                         (strstr(s, "multipart/") == NULL) &&
                         (strstr(s, "message/rfc822") == NULL) &&
                         (strstr(s, "text/plain") == NULL))
                         break;
                 }
             }
             if (lookahead == NULL) {
                 cli_dbgmsg("cli_mbox: I believe it's plain text which must be clean\n");
                 /* nothing here, move along please */
                 break;
             }
             if ((fb = fileblobCreate()) == NULL)
                 break;
             cli_dbgmsg("Save non mime part bounce message\n");
             fileblobSetFilename(fb, mctx->dir, "bounce");
             fileblobAddData(fb, (const unsigned char *)"Received: by clamd (bounce)\n", 28);
             fileblobSetCTX(fb, mctx->ctx);
 
             inheader    = TRUE;
             topofbounce = NULL;
             do {
                 l = t->t_line;
 
                 if (l == NULL) {
                     if (inheader) {
                         inheader    = FALSE;
                         topofbounce = t;
                     }
                 } else {
                     s = lineGetData(l);
                     fileblobAddData(fb, (const unsigned char *)s, strlen(s));
                 }
                 fileblobAddData(fb, (const unsigned char *)"\n", 1);
                 lookahead = t->t_next;
                 if (lookahead == NULL)
                     break;
                 t = lookahead;
                 l = t->t_line;
                 if ((!inheader) && l) {
                     s = lineGetData(l);
                     if (isBounceStart(mctx, s)) {
                         cli_dbgmsg("Found the start of another bounce candidate (%s)\n", s);
                         lookahead_definately_is_bounce = TRUE;
                         break;
                     }
                 }
             } while (!fileblobInfected(fb));
 
             if (fileblobScanAndDestroy(fb) == CL_VIRUS)
                 rc = VIRUS;
             mctx->files++;
 
             if (topofbounce)
                 t = topofbounce;
         }
         textDestroy(aText);
         aText = NULL;
     }
 
     /*
b0b860f1
 	 * No attachments - scan the text portions, often files
 	 * are hidden in HTML code
 	 */
288057e9
     if (mainMessage && (rc != VIRUS)) {
         text *t_line;
47d9cc65
 
288057e9
         /*
b0b860f1
 		 * Look for uu-encoded main file
e3aaff8e
 		 */
288057e9
         if (mainMessage->body_first != NULL &&
             (encodingLine(mainMessage) != NULL) &&
             ((t_line = bounceBegin(mainMessage)) != NULL))
             rc = (exportBounceMessage(mctx, t_line) == CL_VIRUS) ? VIRUS : OK;
         else {
             bool saveIt;
 
             if (messageGetMimeType(mainMessage) == MESSAGE)
                 /*
b0b860f1
 				 * Quick peek, if the encapsulated
 				 * message has no
 				 * content encoding statement don't
 				 * bother saving to scan, it's safe
f01bbfe8
 				 */
288057e9
                 saveIt = (bool)(encodingLine(mainMessage) != NULL);
             else if (mainMessage->body_last != NULL && (t_line = encodingLine(mainMessage)) != NULL) {
                 /*
b0b860f1
 				 * Some bounces include the message
 				 * body without the headers.
ebe57840
 				 * FIXME: Unfortunately this generates a
b0b860f1
 				 * lot of false positives that a bounce
 				 * has been found when it hasn't.
a7527b1f
 				 */
288057e9
                 if ((fb = fileblobCreate()) != NULL) {
                     cli_dbgmsg("Found a bounce message with no header at '%s'\n",
                                lineGetData(t_line->t_line));
                     fileblobSetFilename(fb, mctx->dir, "bounce");
                     fileblobAddData(fb,
                                     (const unsigned char *)"Received: by clamd (bounce)\n",
                                     28);
 
                     fileblobSetCTX(fb, mctx->ctx);
                     if (fileblobScanAndDestroy(textToFileblob(t_line, fb, 1)) == CL_VIRUS)
                         rc = VIRUS;
                     mctx->files++;
                 }
                 saveIt = FALSE;
             } else
                 /*
b0b860f1
 				 * Save the entire text portion,
 				 * since it it may be an HTML file with
a05e6d45
 				 * a JavaScript virus or a phish
b0b860f1
 				 */
288057e9
                 saveIt = TRUE;
 
             if (saveIt) {
                 cli_dbgmsg("Saving text part to scan, rc = %d\n",
                            (int)rc);
                 if (saveTextPart(mctx, mainMessage, 1) == CL_VIRUS)
                     rc = VIRUS;
 
                 if (mainMessage != messageIn) {
                     messageDestroy(mainMessage);
                     mainMessage = NULL;
                 } else
                     messageReset(mainMessage);
             }
         }
     } /*else
 		rc = OK_ATTACHMENTS_NOT_SAVED; */
       /* nothing saved */
 
     if (mainMessage && (mainMessage != messageIn))
         messageDestroy(mainMessage);
 
     if ((rc != FAIL) && infected)
         rc = VIRUS;
a603478f
 
ede9939c
 #if HAVE_JSON
288057e9
     mctx->wrkobj = saveobj;
ede9939c
 #endif
 
288057e9
     cli_dbgmsg("parseEmailBody() returning %d\n", (int)rc);
e3aaff8e
 
288057e9
     return rc;
e3aaff8e
 }
 
 /*
  * Is the current line the start of a new section?
  *
  * New sections start with --boundary
  */
 static int
 boundaryStart(const char *line, const char *boundary)
 {
288057e9
     const char *ptr;
     char *out;
     int rc;
     char buf[RFC2821LENGTH + 1];
23889d27
     char *newline;
2ad0c86e
 
288057e9
     if (line == NULL || *line == '\0')
         return 0; /* empty line */
     if (boundary == NULL)
         return 0;
0e4e16d4
 
23889d27
     newline = strdup(line);
     if (!(newline))
cd94be7a
         newline = (char *)line;
23889d27
 
6b579c8d
     if (newline != line && strlen(line)) {
23889d27
         char *p;
         /* Trim trailing spaces */
77326673
         p = newline + strlen(line) - 1;
26f42733
         while (p >= newline && *p == ' ')
23889d27
             *(p--) = '\0';
     }
 
     if (newline != line)
         cli_chomp(newline);
2ad0c86e
 
288057e9
     /* cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary); */
23889d27
 
288057e9
     if ((*newline != '-') && (*newline != '(')) {
23889d27
         if (newline != line)
             free(newline);
288057e9
         return 0;
23889d27
     }
ebe57840
 
288057e9
     if (strchr(newline, '-') == NULL) {
23889d27
         if (newline != line)
             free(newline);
288057e9
         return 0;
23889d27
     }
ebe57840
 
288057e9
     if (strlen(newline) <= sizeof(buf)) {
         out = NULL;
         ptr = rfc822comments(newline, buf);
     } else
         ptr = out = rfc822comments(newline, NULL);
d72749e0
 
288057e9
     if (ptr == NULL)
         ptr = newline;
2ad0c86e
 
288057e9
     if ((*ptr++ != '-') || (*ptr == '\0')) {
         if (out)
             free(out);
23889d27
         if (newline != line)
             free(newline);
 
288057e9
         return 0;
     }
0e4e16d4
 
288057e9
     /*
0e4e16d4
 	 * Gibe.B3 is broken, it has:
e3aaff8e
 	 *	boundary="---- =_NextPart_000_01C31177.9DC7C000"
 	 * but it's boundaries look like
 	 *	------ =_NextPart_000_01C31177.9DC7C000
0e4e16d4
 	 * notice the one too few '-'.
 	 * Presumably this is a deliberate exploitation of a bug in some mail
 	 * clients.
 	 *
 	 * The trouble is that this creates a lot of false positives for
 	 * boundary conditions, if we're too lax about matches. We do our level
 	 * best to avoid these false positives. For example if we have
 	 * boundary="1" we want to ensure that we don't break out of every line
 	 * that has -1 in it instead of starting --1. This needs some more work.
7c56033f
 	 *
 	 * Look with and without RFC822 comments stripped, I've seen some
 	 * samples where () are taken as comments in boundaries and some where
69c62847
 	 * they're not. Irrespective of whatever RFC2822 says, we need to find
 	 * viruses in both types of mails.
e3aaff8e
 	 */
288057e9
     if ((strstr(&ptr[1], boundary) != NULL) || (strstr(newline, boundary) != NULL)) {
         const char *k = ptr;
69c62847
 
288057e9
         /*
69c62847
 		 * We need to ensure that we don't match --11=-=-=11 when
 		 * looking for --1=-=-=1 in well behaved headers, that's a
 		 * false positive problem mentioned above
 		 */
288057e9
         rc = 0;
         do
             if (strcmp(++k, boundary) == 0) {
                 rc = 1;
                 break;
             }
         while (*k == '-');
         if (rc == 0) {
             k = &line[1];
             do
                 if (strcmp(++k, boundary) == 0) {
                     rc = 1;
                     break;
                 }
             while (*k == '-');
         }
     } else if (*ptr++ != '-')
         rc = 0;
     else
         rc = (strcasecmp(ptr, boundary) == 0);
 
     if (out)
         free(out);
 
     if (rc == 1)
         cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line);
df8806fd
 
23889d27
     if (newline != line)
         free(newline);
 
288057e9
     return rc;
e3aaff8e
 }
 
 /*
  * Is the current line the end?
  *
  * The message ends with with --boundary--
  */
 static int
69c62847
 boundaryEnd(const char *line, const char *boundary)
e3aaff8e
 {
288057e9
     size_t len;
23889d27
     char *newline, *p, *p2;
e3aaff8e
 
288057e9
     if (line == NULL || *line == '\0')
         return 0;
69c62847
 
23889d27
     p = newline = strdup(line);
d00281e1
     if (!(newline)) {
288057e9
         p       = (char *)line;
cd94be7a
         newline = (char *)line;
d00281e1
     }
23889d27
 
6b579c8d
     if (newline != line && strlen(line)) {
23889d27
         /* Trim trailing spaces */
77326673
         p2 = newline + strlen(line) - 1;
4a5d422d
         while (p2 >= newline && *p2 == ' ')
23889d27
             *(p2--) = '\0';
     }
 
288057e9
     /* cli_dbgmsg("boundaryEnd: line = '%s' boundary = '%s'\n", newline, boundary); */
69c62847
 
288057e9
     if (*p++ != '-') {
95890079
         if (newline != line)
             free(newline);
288057e9
         return 0;
95890079
     }
 
288057e9
     if (*p++ != '-') {
95890079
         if (newline != line)
             free(newline);
 
288057e9
         return 0;
95890079
     }
 
288057e9
     len = strlen(boundary);
     if (strncasecmp(p, boundary, len) != 0) {
23889d27
         if (newline != line)
             free(newline);
 
288057e9
         return 0;
23889d27
     }
288057e9
     /*
0e523db2
 	 * Use < rather than == because some broken mails have white
 	 * space after the boundary
 	 */
288057e9
     if (strlen(p) < (len + 2)) {
d00281e1
         if (newline != line)
             free(newline);
 
288057e9
         return 0;
d00281e1
     }
95890079
 
288057e9
     p = &p[len];
     if (*p++ != '-') {
23889d27
         if (newline != line)
             free(newline);
 
288057e9
         return 0;
23889d27
     }
95890079
 
288057e9
     if (*p == '-') {
         /* cli_dbgmsg("boundaryEnd: found %s in %s\n", boundary, p); */
23889d27
         if (newline != line)
             free(newline);
d00281e1
 
288057e9
         return 1;
     }
745cda08
 
     if (newline != line)
         free(newline);
 
288057e9
     return 0;
e3aaff8e
 }
 
 /*
  * Initialise the various lookup tables
  */
 static int
 initialiseTables(table_t **rfc821Table, table_t **subtypeTable)
 {
288057e9
     const struct tableinit *tableinit;
e3aaff8e
 
288057e9
     /*
e3aaff8e
 	 * Initialise the various look up tables
 	 */
288057e9
     *rfc821Table = tableCreate();
     assert(*rfc821Table != NULL);
 
     for (tableinit = rfc821headers; tableinit->key; tableinit++)
         if (tableInsert(*rfc821Table, tableinit->key, tableinit->value) < 0) {
             tableDestroy(*rfc821Table);
             *rfc821Table = NULL;
             return -1;
         }
 
     *subtypeTable = tableCreate();
     assert(*subtypeTable != NULL);
 
     for (tableinit = mimeSubtypes; tableinit->key; tableinit++)
         if (tableInsert(*subtypeTable, tableinit->key, tableinit->value) < 0) {
             tableDestroy(*rfc821Table);
             tableDestroy(*subtypeTable);
             *rfc821Table  = NULL;
             *subtypeTable = NULL;
             return -1;
         }
 
     return 0;
e3aaff8e
 }
 
 /*
d4d14218
  * If there's a HTML text version use that, otherwise
e3aaff8e
  * use the first text part, otherwise just use the
d4d14218
  * first one around. HTML text is most likely to include
  * a scripting worm
e3aaff8e
  *
  * If we can't find one, return -1
  */
 static int
 getTextPart(message *const messages[], size_t size)
 {
288057e9
     size_t i;
     int textpart = -1;
e3aaff8e
 
288057e9
     for (i = 0; i < size; i++)
         if (messages[i] && (messageGetMimeType(messages[i]) == TEXT)) {
             if (strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0)
                 return (int)i;
             textpart = (int)i;
         }
73ddf91f
 
288057e9
     return textpart;
e3aaff8e
 }
 
 /*
  * strip -
767f16ab
  *	Remove the trailing spaces from a buffer. Don't call this directly,
  * always call strstrip() which is a wrapper to this routine to be used with
  * NUL terminated strings. This code looks a bit strange because of it's
  * heritage from code that worked on strings that weren't necessarily NUL
  * terminated.
  * TODO: rewrite for clamAV
  *
e3aaff8e
  * Returns it's new length (a la strlen)
  *
  * len must be int not size_t because of the >= 0 test, it is sizeof(buf)
  *	not strlen(buf)
  */
 static size_t
 strip(char *buf, int len)
 {
288057e9
     register char *ptr;
     register size_t i;
 
     if ((buf == NULL) || (len <= 0))
         return 0;
 
     i = strlen(buf);
     if (len > (int)(i + 1))
         return i;
     ptr = &buf[--len];
 
 #if defined(UNIX) || defined(C_LINUX) || defined(C_DARWIN) /* watch - it may be in shared text area */
     do
         if (*ptr)
             *ptr = '\0';
     while ((--len >= 0) && (!isgraph(*--ptr)) && (*ptr != '\n') && (*ptr != '\r'));
 #else /* more characters can be displayed on DOS */
     do
 #ifndef REAL_MODE_DOS
         if (*ptr) /* C8.0 puts into a text area */
e3aaff8e
 #endif
288057e9
             *ptr = '\0';
     while ((--len >= 0) && ((*--ptr == '\0') || isspace((int)(*ptr & 0xFF))));
e3aaff8e
 #endif
288057e9
     return ((size_t)(len + 1));
e3aaff8e
 }
 
 /*
  * strstrip:
  *	Strip a given string
  */
f0627588
 size_t
e3aaff8e
 strstrip(char *s)
 {
288057e9
     if (s == (char *)NULL)
         return (0);
02927896
 
288057e9
     return (strip(s, strlen(s) + 1));
e3aaff8e
 }
 
30e18caf
 /*
  * Returns 0 for OK, -1 for error
  */
e3aaff8e
 static int
 parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg)
 {
288057e9
     char *copy, *p, *buf;
     const char *ptr;
     int commandNumber;
4f1d0bfc
 
288057e9
     cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg);
8b3563f2
 
288057e9
     copy = rfc822comments(cmd, NULL);
     if (copy) {
         commandNumber = tableFind(rfc821Table, copy);
         free(copy);
     } else
         commandNumber = tableFind(rfc821Table, cmd);
8b3563f2
 
288057e9
     copy = rfc822comments(arg, NULL);
e3aaff8e
 
288057e9
     if (copy)
         ptr = copy;
     else
         ptr = arg;
e75e1ad1
 
288057e9
     buf = NULL;
30e18caf
 
288057e9
     switch (commandNumber) {
         case CONTENT_TYPE:
             /*
e3aaff8e
 			 * Fix for non RFC1521 compliant mailers
 			 * that send content-type: Text instead
 			 * of content-type: Text/Plain, or
 			 * just simply "Content-Type:"
 			 */
288057e9
             if (arg == NULL)
                 /*
69543a9d
 				 * According to section 4 of RFC1521:
 				 * "Note also that a subtype specification is
 				 * MANDATORY. There are no default subtypes"
 				 *
1eec55a6
 				 * We have to break this and make an assumption
69543a9d
 				 * for the subtype because virus writers and
 				 * email client writers don't get it right
 				 */
288057e9
                 cli_dbgmsg("Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii\n");
             else if (strchr(ptr, '/') == NULL)
                 /*
69543a9d
 				 * Empty field, such as
 				 *	Content-Type:
 				 * which I believe is illegal according to
 				 * RFC1521
 				 */
288057e9
                 cli_dbgmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", ptr);
             else {
                 int i;
6d312569
 
288057e9
                 buf = cli_malloc(strlen(ptr) + 1);
                 if (buf == NULL) {
059ca614
                     cli_errmsg("parseMimeHeader: Unable to allocate memory for buf %llu\n", (long long unsigned)(strlen(ptr) + 1));
288057e9
                     if (copy)
                         free(copy);
                     return -1;
                 }
                 /*
e3aaff8e
 				 * Some clients are broken and
 				 * put white space after the ;
 				 */
288057e9
                 if (*arg == '/') {
                     cli_dbgmsg("Content-type '/' received, assuming application/octet-stream\n");
                     messageSetMimeType(m, "application");
                     messageSetMimeSubtype(m, "octet-stream");
                 } else {
                     /*
0960ff5e
 					 * The content type could be in quotes:
 					 *	Content-Type: "multipart/mixed"
 					 * FIXME: this is a hack in that ignores
 					 *	the quotes, it doesn't handle
 					 *	them properly
2625d6a0
 					 */
288057e9
                     while (isspace(*ptr))
                         ptr++;
                     if (ptr[0] == '\"')
                         ptr++;
0960ff5e
 
288057e9
                     if (ptr[0] != '/') {
                         char *s;
38d07186
 #ifdef CL_THREAD_SAFE
288057e9
                         char *strptr = NULL;
38d07186
 #endif
f017fbdd
 
288057e9
                         s = cli_strtokbuf(ptr, 0, ";", buf);
                         /*
6d312569
 						 * Handle
 						 * Content-Type: foo/bar multipart/mixed
 						 * and
 						 * Content-Type: multipart/mixed foo/bar
0960ff5e
 						 */
288057e9
                         if (s && *s) {
                             char *buf2 = cli_strdup(buf);
 
                             if (buf2 == NULL) {
                                 if (copy)
                                     free(copy);
                                 free(buf);
                                 return -1;
                             }
                             for (;;) {
 #ifdef CL_THREAD_SAFE
                                 int set = messageSetMimeType(m, strtok_r(s, "/", &strptr));
548a5f96
 #else
288057e9
                                 int set = messageSetMimeType(m, strtok(s, "/"));
548a5f96
 #endif
6d312569
 
288057e9
 #ifdef CL_THREAD_SAFE
                                 s = strtok_r(NULL, ";", &strptr);
548a5f96
 #else
288057e9
                                 s       = strtok(NULL, ";");
584aadd5
 #endif
288057e9
                                 if (s == NULL)
                                     break;
                                 if (set) {
                                     size_t len = strstrip(s) - 1;
                                     if (s[len] == '\"') {
                                         s[len] = '\0';
                                         len    = strstrip(s);
                                     }
                                     if (len) {
                                         if (strchr(s, ' '))
                                             messageSetMimeSubtype(m,
                                                                   cli_strtokbuf(s, 0, " ", buf2));
                                         else
                                             messageSetMimeSubtype(m, s);
                                     }
                                 }
 
                                 while (*s && !isspace(*s))
                                     s++;
                                 if (*s++ == '\0')
                                     break;
                                 if (*s == '\0')
                                     break;
                             }
                             free(buf2);
                         }
                     }
                 }
 
                 /*
20d3dde9
 				 * Add in all rest of the the arguments.
 				 * e.g. if the header is this:
 				 * Content-Type:', arg='multipart/mixed; boundary=foo
 				 * we find the boundary argument set it
e3aaff8e
 				 */
288057e9
                 i = 1;
                 while (cli_strtokbuf(ptr, i++, ";", buf) != NULL) {
                     cli_dbgmsg("mimeArgs = '%s'\n", buf);
 
                     messageAddArguments(m, buf);
                 }
             }
             break;
         case CONTENT_TRANSFER_ENCODING:
             messageSetEncoding(m, ptr);
             break;
         case CONTENT_DISPOSITION:
             buf = cli_malloc(strlen(ptr) + 1);
             if (buf == NULL) {
059ca614
                 cli_errmsg("parseMimeHeader: Unable to allocate memory for buf %llu\n", (long long unsigned)(strlen(ptr) + 1));
288057e9
                 if (copy)
                     free(copy);
                 return -1;
             }
             p = cli_strtokbuf(ptr, 0, ";", buf);
             if (p && *p) {
                 messageSetDispositionType(m, p);
                 messageAddArgument(m, cli_strtokbuf(ptr, 1, ";", buf));
             }
             if (!messageHasFilename(m))
                 /*
ab84808e
 				 * Handle this type of header, without
 				 * a filename (e.g. some Worm.Torvil.D)
 				 *	Content-ID: <nRfkHdrKsAxRU>
 				 * Content-Transfer-Encoding: base64
 				 * Content-Disposition: attachment
 				 */
288057e9
                 messageAddArgument(m, "filename=unknown");
     }
     if (copy)
         free(copy);
     if (buf)
         free(buf);
 
     return 0;
e3aaff8e
 }
 
e06d34dc
 /*
cca4efe4
  * Save the text portion of the message
  */
7dde984e
 static int
001ad879
 saveTextPart(mbox_ctx *mctx, message *m, int destroy_text)
cca4efe4
 {
288057e9
     fileblob *fb;
cca4efe4
 
288057e9
     messageAddArgument(m, "filename=textportion");
     if ((fb = messageToFileblob(m, mctx->dir, destroy_text)) != NULL) {
         /*
cca4efe4
 		 * Save main part to scan that
 		 */
288057e9
         cli_dbgmsg("Saving main message\n");
cca4efe4
 
288057e9
         mctx->files++;
         return fileblobScanAndDestroy(fb);
     }
     return CL_ETMPFILE;
cca4efe4
 }
 
a9714c49
 /*
f017fbdd
  * Handle RFC822 comments in headers.
d72749e0
  * If out == NULL, return a buffer without the comments, the caller must free
  *	the returned buffer
  * Return NULL on error or if the input * has no comments.
7cd9337a
  * See section 3.4.3 of RFC822
a9714c49
  * TODO: handle comments that go on to more than one line
  */
 static char *
d72749e0
 rfc822comments(const char *in, char *out)
a9714c49
 {
288057e9
     const char *iptr;
     char *optr;
     int backslash, inquote, commentlevel;
a9714c49
 
288057e9
     if (in == NULL)
         return NULL;
a9714c49
 
288057e9
     if (strchr(in, '(') == NULL)
         return NULL;
a9714c49
 
288057e9
     assert(out != in);
d72749e0
 
288057e9
     while (isspace(*in))
         in++;
faa0d267
 
288057e9
     if (out == NULL) {
         out = cli_malloc(strlen(in) + 1);
         if (out == NULL) {
059ca614
             cli_errmsg("rfc822comments: Unable to allocate memory for out %llu\n", (long long unsigned)(strlen(in) + 1));
288057e9
             return NULL;
241e7eb1
         }
288057e9
     }
 
     backslash = commentlevel = inquote = 0;
     optr                               = out;
 
     cli_dbgmsg("rfc822comments: contains a comment\n");
 
     for (iptr = in; *iptr; iptr++)
         if (backslash) {
             if (commentlevel == 0)
                 *optr++ = *iptr;
             backslash = 0;
         } else
             switch (*iptr) {
                 case '\\':
                     backslash = 1;
                     break;
                 case '\"':
                     *optr++ = '\"';
                     inquote = !inquote;
                     break;
                 case '(':
                     if (inquote)
                         *optr++ = '(';
                     else
                         commentlevel++;
                     break;
                 case ')':
                     if (inquote)
                         *optr++ = ')';
                     else if (commentlevel > 0)
                         commentlevel--;
                     break;
                 default:
                     if (commentlevel == 0)
                         *optr++ = *iptr;
             }
 
     if (backslash) /* last character was a single backslash */
         *optr++ = '\\';
     *optr = '\0';
 
     /*strstrip(out);*/
 
     cli_dbgmsg("rfc822comments '%s'=>'%s'\n", in, out);
 
     return out;
a9714c49
 }
50df4118
 
 /*
  * Handle RFC2047 encoding. Returns a malloc'd buffer that the caller must
  * free, or NULL on error
  */
 static char *
 rfc2047(const char *in)
 {
288057e9
     char *out, *pout;
     size_t len;
50df4118
 
288057e9
     if ((strstr(in, "=?") == NULL) || (strstr(in, "?=") == NULL))
         return cli_strdup(in);
50df4118
 
288057e9
     cli_dbgmsg("rfc2047 '%s'\n", in);
     out = cli_malloc(strlen(in) + 1);
50df4118
 
288057e9
     if (out == NULL) {
059ca614
         cli_errmsg("rfc2047: Unable to allocate memory for out %llu\n", (long long unsigned)(strlen(in) + 1));
288057e9
         return NULL;
241e7eb1
     }
50df4118
 
288057e9
     pout = out;
 
     /* For each RFC2047 string */
     while (*in) {
         char encoding, *ptr, *enctext;
         message *m;
         blob *b;
 
         /* Find next RFC2047 string */
         while (*in) {
             if ((*in == '=') && (in[1] == '?')) {
                 in += 2;
                 break;
             }
             *pout++ = *in++;
         }
         /* Skip over charset, find encoding */
         while ((*in != '?') && *in)
             in++;
         if (*in == '\0')
             break;
         encoding = *++in;
         encoding = (char)tolower(encoding);
 
         if ((encoding != 'q') && (encoding != 'b')) {
             cli_warnmsg("Unsupported RFC2047 encoding type '%c' - if you believe this file contains a virus, submit it to www.clamav.net\n", encoding);
             free(out);
             out = NULL;
             break;
         }
         /* Skip to encoded text */
         if (*++in != '?')
             break;
         if (*++in == '\0')
             break;
 
         enctext = cli_strdup(in);
         if (enctext == NULL) {
             free(out);
             out = NULL;
             break;
         }
         in = strstr(in, "?=");
         if (in == NULL) {
             free(enctext);
             break;
         }
         in += 2;
         ptr = strstr(enctext, "?=");
         assert(ptr != NULL);
         *ptr = '\0';
         /*cli_dbgmsg("Need to decode '%s' with method '%c'\n", enctext, encoding);*/
 
         m = messageCreate();
50876732
         if (m == NULL) {
             free(enctext);
288057e9
             break;
50876732
         }
288057e9
         messageAddStr(m, enctext);
         free(enctext);
         switch (encoding) {
             case 'q':
                 messageSetEncoding(m, "quoted-printable");
                 break;
             case 'b':
                 messageSetEncoding(m, "base64");
                 break;
         }
         b = messageToBlob(m, 1);
         if (b == NULL) {
             messageDestroy(m);
             break;
         }
         len = blobGetDataSize(b);
         cli_dbgmsg("Decoded as '%*.*s'\n", (int)len, (int)len,
                    (const char *)blobGetData(b));
         memcpy(pout, blobGetData(b), len);
         blobDestroy(b);
         messageDestroy(m);
         if (len > 0 && pout[len - 1] == '\n')
             pout += len - 1;
         else
             pout += len;
     }
     if (out == NULL)
         return NULL;
 
     *pout = '\0';
 
     cli_dbgmsg("rfc2047 returns '%s'\n", out);
     return out;
50df4118
 }
 
f10460ed
 /*
  * Handle partial messages
  */
 static int
 rfc1341(message *m, const char *dir)
 {
288057e9
     char *arg, *id, *number, *total, *oldfilename;
     const char *tmpdir;
     int n;
     char pdir[NAME_MAX + 1];
     unsigned char md5_val[16];
     char *md5_hex;
 
     id = (char *)messageFindArgument(m, "id");
     if (id == NULL)
         return -1;
 
     tmpdir = cli_gettmpdir();
 
     snprintf(pdir, sizeof(pdir) - 1, "%s" PATHSEP "clamav-partial", tmpdir);
 
     if ((mkdir(pdir, S_IRWXU) < 0) && (errno != EEXIST)) {
         cli_errmsg("Can't create the directory '%s'\n", pdir);
         free(id);
         return -1;
     } else if (errno == EEXIST) {
         STATBUF statb;
 
         if (CLAMSTAT(pdir, &statb) < 0) {
             char err[128];
             cli_errmsg("Partial directory %s: %s\n", pdir,
                        cli_strerror(errno, err, sizeof(err)));
             free(id);
             return -1;
         }
         if (statb.st_mode & 077)
             cli_warnmsg("Insecure partial directory %s (mode 0%o)\n",
                         pdir,
 #ifdef ACCESSPERMS
                         (int)(statb.st_mode & ACCESSPERMS)
ba74b333
 #else
288057e9
                         (int)(statb.st_mode & 0777)
ba74b333
 #endif
288057e9
             );
     }
 
     number = (char *)messageFindArgument(m, "number");
     if (number == NULL) {
         free(id);
         return -1;
     }
 
     oldfilename = messageGetFilename(m);
 
     arg = cli_malloc(10 + strlen(id) + strlen(number));
     if (arg) {
         sprintf(arg, "filename=%s%s", id, number);
         messageAddArgument(m, arg);
         free(arg);
     }
 
     if (oldfilename) {
         cli_dbgmsg("Must reset to %s\n", oldfilename);
         free(oldfilename);
     }
 
     n = atoi(number);
7fb5036f
     cl_hash_data("md5", id, strlen(id), md5_val, NULL);
288057e9
     md5_hex = cli_str2hex((const char *)md5_val, 16);
 
     if (!md5_hex) {
         free(id);
         free(number);
         return CL_EMEM;
     }
 
     if (messageSavePartial(m, pdir, md5_hex, n) < 0) {
         free(md5_hex);
         free(id);
         free(number);
         return -1;
     }
 
     total = (char *)messageFindArgument(m, "total");
     cli_dbgmsg("rfc1341: %s, %s of %s\n", id, number, (total) ? total : "?");
     if (total) {
         int t   = atoi(total);
         DIR *dd = NULL;
 
         free(total);
         /*
f10460ed
 		 * If it's the last one - reassemble it
3a0f75c6
 		 * FIXME: this assumes that we receive the parts in order
f10460ed
 		 */
288057e9
         if ((n == t) && ((dd = opendir(pdir)) != NULL)) {
             FILE *fout;
             char outname[NAME_MAX + 1];
             time_t now;
 
             sanitiseName(id);
 
             snprintf(outname, sizeof(outname) - 1, "%s" PATHSEP "%s", dir, id);
 
             cli_dbgmsg("outname: %s\n", outname);
 
             fout = fopen(outname, "wb");
             if (fout == NULL) {
                 cli_errmsg("Can't open '%s' for writing", outname);
                 free(id);
                 free(number);
                 free(md5_hex);
                 closedir(dd);
                 return -1;
             }
 
             time(&now);
             for (n = 1; n <= t; n++) {
                 char filename[NAME_MAX + 1];
                 struct dirent *dent;
f10460ed
 
288057e9
                 snprintf(filename, sizeof(filename), "_%s-%u", md5_hex, n);
fdc6adbe
 
288057e9
                 while ((dent = readdir(dd))) {
                     FILE *fin;
                     char buffer[BUFSIZ], fullname[NAME_MAX + 1];
                     int nblanks;
                     STATBUF statb;
                     const char *dentry_idpart;
b2c04b6c
                     int test_fd;
be4bf7f4
 
288057e9
                     if (dent->d_ino == 0)
                         continue;
f10460ed
 
288057e9
                     if (!strcmp(".", dent->d_name) ||
                         !strcmp("..", dent->d_name))
                         continue;
                     snprintf(fullname, sizeof(fullname) - 1,
                              "%s" PATHSEP "%s", pdir, dent->d_name);
                     dentry_idpart = strchr(dent->d_name, '_');
a43dd3cd
 
288057e9
                     if (!dentry_idpart ||
                         strcmp(filename, dentry_idpart) != 0) {
                         if (!m->ctx->engine->keeptmp)
                             continue;
b2c04b6c
 
                         if ((test_fd = open(fullname, O_RDONLY)) < 0)
                             continue;
 
288057e9
                         if (FSTAT(test_fd, &statb) < 0) {
b2c04b6c
                             close(test_fd);
288057e9
                             continue;
b2c04b6c
                         }
 
288057e9
                         if (now - statb.st_mtime > (time_t)(7 * 24 * 3600)) {
                             if (cli_unlink(fullname)) {
                                 cli_unlink(outname);
                                 fclose(fout);
                                 free(md5_hex);
                                 free(id);
                                 free(number);
                                 closedir(dd);
b2c04b6c
                                 close(test_fd);
288057e9
                                 return -1;
                             }
                         }
b2c04b6c
 
                         close(test_fd);
288057e9
                         continue;
                     }
 
                     fin = fopen(fullname, "rb");
                     if (fin == NULL) {
                         cli_errmsg("Can't open '%s' for reading", fullname);
                         fclose(fout);
                         cli_unlink(outname);
                         free(md5_hex);
                         free(id);
                         free(number);
                         closedir(dd);
                         return -1;
                     }
                     nblanks = 0;
                     while (fgets(buffer, sizeof(buffer) - 1, fin) != NULL)
                         /*
f10460ed
 						 * Ensure that trailing newlines
 						 * aren't copied
 						 */
288057e9
                         if (buffer[0] == '\n')
                             nblanks++;
                         else {
                             if (nblanks)
                                 do {
                                     if (putc('\n', fout) == EOF) break;
                                 } while (--nblanks > 0);
                             if (nblanks || fputs(buffer, fout) == EOF) {
                                 fclose(fin);
                                 fclose(fout);
                                 cli_unlink(outname);
                                 free(md5_hex);
                                 free(id);
                                 free(number);
                                 closedir(dd);
                                 return -1;
                             }
                         }
                     fclose(fin);
 
                     /* don't unlink if leave temps */
                     if (!m->ctx->engine->keeptmp) {
                         if (cli_unlink(fullname)) {
                             fclose(fout);
                             cli_unlink(outname);
                             free(md5_hex);
                             free(id);
                             free(number);
                             closedir(dd);
                             return -1;
                         }
                     }
                     break;
                 }
                 rewinddir(dd);
             }
             closedir(dd);
             fclose(fout);
         }
     }
     free(number);
     free(id);
     free(md5_hex);
 
     return 0;
f10460ed
 }
 
c52d991e
 static void
 hrefs_done(blob *b, tag_arguments_t *hrefs)
 {
288057e9
     if (b)
         blobDestroy(b);
     html_tag_arg_free(hrefs);
c52d991e
 }
 
f2b71eb9
 /* extract URLs from static text */
 static void extract_text_urls(const unsigned char *mem, size_t len, tag_arguments_t *hrefs)
 {
     char url[1024];
     size_t off;
288057e9
     for (off = 0; off + 10 < len; off++) {
         /* check whether this is the start of a URL */
         int32_t proto = cli_readint32(mem + off);
         /* convert to lowercase */
         proto |= 0x20202020;
         /* 'http:', 'https:', or 'ftp:' in little-endian */
         if ((proto == 0x70747468 &&
              (mem[off + 4] == ':' || (mem[off + 5] == 's' && mem[off + 6] == ':'))) ||
             proto == 0x3a707466) {
             size_t url_len;
             for (url_len = 4; off + url_len < len && url_len < (sizeof(url) - 1); url_len++) {
                 unsigned char c = mem[off + url_len];
                 /* smart compilers will compile this if into
f2b71eb9
 		 * a single bt + jb instruction */
288057e9
                 if (c == ' ' || c == '\n' || c == '\t')
                     break;
             }
             memcpy(url, mem + off, url_len);
             url[url_len] = '\0';
             html_tag_arg_add(hrefs, "href", url);
             off += url_len;
         }
f2b71eb9
     }
 }
 
c52d991e
 /*
  * This used to be part of checkURLs, split out, because phishingScan needs it
  * too, and phishingScan might be used in situations where checkURLs is
  * disabled (see ifdef)
  */
 static blob *
 getHrefs(message *m, tag_arguments_t *hrefs)
 {
288057e9
     unsigned char *mem;
     blob *b = messageToBlob(m, 0);
     size_t len;
 
     if (b == NULL)
         return NULL;
 
     len = blobGetDataSize(b);
 
     if (len == 0) {
         blobDestroy(b);
         return NULL;
     }
 
     /* TODO: make this size customisable */
     if (len > 100 * 1024) {
         cli_dbgmsg("Viruses pointed to by URLs not scanned in large message\n");
         blobDestroy(b);
         return NULL;
     }
 
     hrefs->count = 0;
     hrefs->tag = hrefs->value = NULL;
     hrefs->contents           = NULL;
 
     cli_dbgmsg("getHrefs: calling html_normalise_mem\n");
     mem = blobGetData(b);
     if (!html_normalise_mem(mem, (off_t)len, NULL, hrefs, m->ctx->dconf)) {
         blobDestroy(b);
         return NULL;
     }
     cli_dbgmsg("getHrefs: html_normalise_mem returned\n");
     if (!hrefs->count && hrefs->scanContents) {
         extract_text_urls(mem, len, hrefs);
     }
 
     /* TODO: Do we need to call remove_html_comments? */
     return b;
c52d991e
 }
 
ad422cc9
 /*
7dde984e
  * validate URLs for phishes
ad422cc9
  * followurls: see if URLs point to malware
  */
c52d991e
 static void
ecc3d638
 checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html)
c52d991e
 {
288057e9
     blob *b;
     tag_arguments_t hrefs;
c52d991e
 
cd94be7a
     UNUSEDPARAM(is_html);
 
288057e9
     if (*rc == VIRUS)
         return;
7dde984e
 
288057e9
     hrefs.scanContents = mctx->ctx->engine->dboptions & CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE);
c52d991e
 
288057e9
     if (!hrefs.scanContents)
         /*
093e013c
 		 * Don't waste time extracting hrefs (parsing html), nobody
 		 * will need it
 		 */
288057e9
         return;
c52d991e
 
288057e9
     hrefs.count = 0;
     hrefs.tag = hrefs.value = NULL;
     hrefs.contents          = NULL;
94aea271
 
288057e9
     b = getHrefs(mainMessage, &hrefs);
     if (b) {
         if (hrefs.scanContents) {
             if (phishingScan(mctx->ctx, &hrefs) == CL_VIRUS) {
                 /*
8affc406
 				 * FIXME: message objects' contents are
 				 *	encapsulated so we should not access
 				 *	the members directly
 				 */
288057e9
                 mainMessage->isInfected = TRUE;
                 *rc                     = VIRUS;
                 cli_dbgmsg("PH:Phishing found\n");
             }
         }
     }
     hrefs_done(b, &hrefs);
c52d991e
 }
 
f2f25418
 #ifdef HAVE_BACKTRACE
4f1d0bfc
 static void
02927896
 sigsegv(int sig)
 {
288057e9
     signal(SIGSEGV, SIG_DFL);
     print_trace(1);
     exit(SIGSEGV);
02927896
 }
 
4f1d0bfc
 static void
02927896
 print_trace(int use_syslog)
 {
288057e9
     void *array[10];
     size_t size;
     char **strings;
     size_t i;
     pid_t pid = getpid();
02927896
 
288057e9
     cli_errmsg("Segmentation fault, attempting to print backtrace\n");
d563818f
 
288057e9
     size    = backtrace(array, 10);
     strings = backtrace_symbols(array, size);
02927896
 
288057e9
     cli_errmsg("Backtrace of pid %d:\n", pid);
     if (use_syslog)
         syslog(LOG_ERR, "Backtrace of pid %d:", pid);
02927896
 
288057e9
     for (i = 0; i < size; i++) {
         cli_errmsg("%s\n", strings[i]);
         if (use_syslog)
             syslog(LOG_ERR, "bt[%llu]: %s", (unsigned long long)i, strings[i]);
     }
02927896
 
288057e9
 #ifdef SAVE_TMP
     cli_errmsg("The errant mail file has been saved\n");
d563818f
 #endif
288057e9
     /* #else TODO: dump the current email */
ef822cfc
 
288057e9
     free(strings);
02927896
 }
 #endif
f24bf390
 
6cad6a15
 /* See also clamav-milter */
ddea752e
 static bool
 usefulHeader(int commandNumber, const char *cmd)
 {
288057e9
     switch (commandNumber) {
         case CONTENT_TRANSFER_ENCODING:
         case CONTENT_DISPOSITION:
         case CONTENT_TYPE:
             return TRUE;
         default:
             if (strcasecmp(cmd, "From") == 0)
                 return TRUE;
             if (strcasecmp(cmd, "Received") == 0)
                 return TRUE;
             if (strcasecmp(cmd, "De") == 0)
                 return TRUE;
     }
 
     return FALSE;
ddea752e
 }
 
ae5c693a
 /*
72cf1461
  * Like fgets but cope with end of line by "\n", "\r\n", "\n\r", "\r"
  */
 static char *
6bcd0c86
 getline_from_mbox(char *buffer, size_t buffer_len, fmap_t *map, size_t *at)
72cf1461
 {
f304dc68
     const char *src, *cursrc;
     char *curbuf;
ee1b2a6c
     size_t i;
6bcd0c86
     size_t input_len = MIN(map->len - *at, buffer_len + 1);
     src = cursrc = fmap_need_off_once(map, *at, input_len);
72cf1461
 
288057e9
     /*	we check for eof from the result of GETC()
694e7882
  *	if(feof(fin))
f87a92ca
 		return NULL;*/
288057e9
     if (!src) {
         cli_dbgmsg("getline_from_mbox: fmap need failed\n");
         return NULL;
ee1b2a6c
     }
288057e9
     if ((buffer_len == 0) || (buffer == NULL)) {
         cli_errmsg("Invalid call to getline_from_mbox(). Refer to https://www.clamav.net/documents/installing-clamav\n");
         return NULL;
ee1b2a6c
     }
72cf1461
 
ee1b2a6c
     curbuf = buffer;
288057e9
 
     for (i = 0; i < buffer_len - 1; i++) {
         char c;
 
         if (!input_len--) {
             if (curbuf == buffer) {
                 /* EOF on first char */
                 return NULL;
             }
             break;
         }
 
         switch ((c = *cursrc++)) {
             case '\0':
                 continue;
             case '\n':
                 *curbuf++ = '\n';
                 if (input_len && *cursrc == '\r') {
                     i++;
                     cursrc++;
                 }
                 break;
             case '\r':
                 *curbuf++ = '\r';
                 if (input_len && *cursrc == '\n') {
                     i++;
                     cursrc++;
                 }
                 break;
             default:
                 *curbuf++ = c;
                 continue;
         }
         break;
ee1b2a6c
     }
     *at += cursrc - src;
     *curbuf = '\0';
288057e9
 
ee1b2a6c
     return buffer;
72cf1461
 }
b2ba24f5
 
a603478f
 /*
  * Is this line a candidate for the start of a bounce message?
  */
b2ba24f5
 static bool
7021b545
 isBounceStart(mbox_ctx *mctx, const char *line)
b2ba24f5
 {
288057e9
     size_t len;
be32043e
 
288057e9
     if (line == NULL)
         return FALSE;
     if (*line == '\0')
         return FALSE;
     /*if((strncmp(line, "From ", 5) == 0) && !isalnum(line[5]))
b2ba24f5
 		return FALSE;
 	if((strncmp(line, ">From ", 6) == 0) && !isalnum(line[6]))
25071deb
 		return FALSE;*/
b2ba24f5
 
288057e9
     len = strlen(line);
     if ((len < 6) || (len >= 72))
         return FALSE;
be32043e
 
288057e9
     if ((memcmp(line, "From ", 5) == 0) ||
         (memcmp(line, ">From ", 6) == 0)) {
         int numSpaces = 0, numDigits = 0;
 
         line += 4;
 
         do
             if (*line == ' ')
                 numSpaces++;
             else if (isdigit((*line) & 0xFF))
                 numDigits++;
         while (*++line != '\0');
 
         if (numSpaces < 6)
             return FALSE;
         if (numDigits < 11)
             return FALSE;
         return TRUE;
     }
     return (bool)(cli_filetype((const unsigned char *)line, len, mctx->ctx->engine) == CL_TYPE_MAIL);
b2ba24f5
 }
a05e6d45
 
 /*
  * Extract a binhexEncoded message, return if it's found to be infected as we
  *	extract it
  */
 static bool
001ad879
 exportBinhexMessage(mbox_ctx *mctx, message *m)
a05e6d45
 {
288057e9
     bool infected = FALSE;
     fileblob *fb;
a05e6d45
 
288057e9
     if (messageGetEncoding(m) == NOENCODING)
         messageSetEncoding(m, "x-binhex");
a05e6d45
 
288057e9
     fb = messageToFileblob(m, mctx->dir, 0);
a05e6d45
 
288057e9
     if (fb) {
         cli_dbgmsg("Binhex file decoded to %s\n",
                    fileblobGetFilename(fb));
a585329e
 
288057e9
         if (fileblobScanAndDestroy(fb) == CL_VIRUS)
             infected = TRUE;
         mctx->files++;
     } else
         cli_errmsg("Couldn't decode binhex file to %s\n", mctx->dir);
a05e6d45
 
288057e9
     return infected;
a05e6d45
 }
c1fce7f7
 
 /*
7dde984e
  * Locate any bounce message and extract it. Return cl_status
47d9cc65
  */
 static int
001ad879
 exportBounceMessage(mbox_ctx *mctx, text *start)
47d9cc65
 {
288057e9
     int rc = CL_CLEAN;
     text *t;
     fileblob *fb;
47d9cc65
 
288057e9
     /*
47d9cc65
 	 * Attempt to save the original (unbounced)
 	 * message - clamscan will find that in the
 	 * directory and call us again (with any luck)
 	 * having found an e-mail message to handle.
 	 *
 	 * This finds a lot of false positives, the
 	 * search that a content type is in the
 	 * bounce (i.e. it's after the bounce header)
 	 * helps a bit.
 	 *
 	 * messageAddLine
7cd9337a
 	 * optimization could help here, but needs
47d9cc65
 	 * careful thought, do it with line numbers
 	 * would be best, since the current method in
 	 * messageAddLine of checking encoding first
 	 * must remain otherwise non bounce messages
 	 * won't be scanned
 	 */
288057e9
     for (t = start; t; t = t->t_next) {
         const char *txt = lineGetData(t->t_line);
         char cmd[RFC2821LENGTH + 1];
 
         if (txt == NULL)
             continue;
         if (cli_strtokbuf(txt, 0, ":", cmd) == NULL)
             continue;
 
         switch (tableFind(mctx->rfc821Table, cmd)) {
             case CONTENT_TRANSFER_ENCODING:
                 if ((strstr(txt, "7bit") == NULL) &&
                     (strstr(txt, "8bit") == NULL))
                     break;
                 continue;
             case CONTENT_DISPOSITION:
                 break;
             case CONTENT_TYPE:
                 if (strstr(txt, "text/plain") != NULL)
                     t = NULL;
                 break;
             default:
                 if (strcasecmp(cmd, "From") == 0)
                     start = t;
                 else if (strcasecmp(cmd, "Received") == 0)
                     start = t;
                 continue;
         }
         break;
     }
     if (t && ((fb = fileblobCreate()) != NULL)) {
         cli_dbgmsg("Found a bounce message\n");
         fileblobSetFilename(fb, mctx->dir, "bounce");
         fileblobSetCTX(fb, mctx->ctx);
         if (textToFileblob(start, fb, 1) == NULL) {
             cli_dbgmsg("Nothing new to save in the bounce message\n");
             fileblobDestroy(fb);
         } else
             rc = fileblobScanAndDestroy(fb);
         mctx->files++;
     } else
         cli_dbgmsg("Not found a bounce message\n");
 
     return rc;
47d9cc65
 }
 
 /*
ede9939c
  * Get string representation of mimetype
  */
288057e9
 static const char *getMimeTypeStr(mime_type mimetype)
ede9939c
 {
288057e9
     const struct tableinit *entry = mimeTypeStr;
 
     while (entry->key) {
         if (mimetype == entry->value)
             return entry->key;
         entry++;
     }
     return "UNKNOWN";
ede9939c
 }
 
 /*
  * Get string representation of encoding type
  */
288057e9
 static const char *getEncTypeStr(encoding_type enctype)
ede9939c
 {
288057e9
     const struct tableinit *entry = encTypeStr;
 
     while (entry->key) {
         if (enctype == entry->value)
             return entry->key;
         entry++;
     }
     return "UNKNOWN";
ede9939c
 }
 
 /*
c1fce7f7
  * Handle the ith element of a number of multiparts, e.g. multipart/alternative
  */
 static message *
ecc3d638
 do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, mbox_ctx *mctx, message *messageIn, text **tptr, unsigned int recursion_level)
c1fce7f7
 {
288057e9
     bool addToText = FALSE;
     const char *dtype;
 #ifndef SAVE_TO_DISC
     message *body;
c1fce7f7
 #endif
288057e9
     message *aMessage        = messages[i];
     const int doPhishingScan = mctx->ctx->engine->dboptions & CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE);
ede9939c
 #if HAVE_JSON
288057e9
     json_object *thisobj = NULL, *saveobj = mctx->wrkobj;
 
     if (mctx->wrkobj != NULL) {
         json_object *multiobj = cli_jsonarray(mctx->wrkobj, "Multipart");
         if (multiobj == NULL) {
             cli_errmsg("Cannot get multipart preclass array\n");
             *rc = -1;
             return mainMessage;
         }
 
         thisobj = messageGetJObj(aMessage);
         if (thisobj == NULL) {
             cli_errmsg("Cannot get message preclass object\n");
             *rc = -1;
             return mainMessage;
         }
         if (cli_json_addowner(multiobj, thisobj, NULL, -1) != CL_SUCCESS) {
             cli_errmsg("Cannot assign message preclass object to multipart preclass array\n");
             *rc = -1;
             return mainMessage;
         }
     }
ede9939c
 #endif
 
288057e9
     if (aMessage == NULL) {
ede9939c
 #if HAVE_JSON
288057e9
         if (thisobj != NULL)
             cli_jsonstr(thisobj, "MimeType", "NULL");
ede9939c
 #endif
288057e9
         return mainMessage;
     }
c1fce7f7
 
288057e9
     if (*rc != OK)
         return mainMessage;
69c62847
 
288057e9
     cli_dbgmsg("Mixed message part %d is of type %d\n",
                i, messageGetMimeType(aMessage));
c1fce7f7
 
ede9939c
 #if HAVE_JSON
288057e9
     if (thisobj != NULL) {
         cli_jsonstr(thisobj, "MimeType", getMimeTypeStr(messageGetMimeType(aMessage)));
         cli_jsonstr(thisobj, "MimeSubtype", messageGetMimeSubtype(aMessage));
         cli_jsonstr(thisobj, "EncodingType", getEncTypeStr(messageGetEncoding(aMessage)));
         cli_jsonstr(thisobj, "Disposition", messageGetDispositionType(aMessage));
         cli_jsonstr(thisobj, "Filename", messageHasFilename(aMessage) ? messageGetFilename(aMessage) : "(inline)");
     }
ede9939c
 #endif
 
288057e9
     switch (messageGetMimeType(aMessage)) {
         case APPLICATION:
         case AUDIO:
         case IMAGE:
         case VIDEO:
             break;
         case NOMIME:
             cli_dbgmsg("No mime headers found in multipart part %d\n", i);
             if (mainMessage) {
                 if (binhexBegin(aMessage)) {
                     cli_dbgmsg("Found binhex message in multipart/mixed mainMessage\n");
 
                     if (exportBinhexMessage(mctx, mainMessage))
                         *rc = VIRUS;
                 }
                 if (mainMessage != messageIn)
                     messageDestroy(mainMessage);
                 mainMessage = NULL;
             } else if (aMessage) {
                 if (binhexBegin(aMessage)) {
                     cli_dbgmsg("Found binhex message in multipart/mixed non mime part\n");
                     if (exportBinhexMessage(mctx, aMessage))
                         *rc = VIRUS;
                     assert(aMessage == messages[i]);
                     messageReset(messages[i]);
                 }
             }
             addToText = TRUE;
             if (messageGetBody(aMessage) == NULL)
                 /*
c1fce7f7
 				 * No plain text version
 				 */
288057e9
                 cli_dbgmsg("No plain text alternative\n");
             break;
         case TEXT:
             dtype = messageGetDispositionType(aMessage);
             cli_dbgmsg("Mixed message text part disposition \"%s\"\n",
                        dtype);
             if (strcasecmp(dtype, "attachment") == 0)
                 break;
             if ((*dtype == '\0') || (strcasecmp(dtype, "inline") == 0)) {
                 const char *cptr;
 
                 if (mainMessage && (mainMessage != messageIn))
                     messageDestroy(mainMessage);
                 mainMessage = NULL;
                 cptr        = messageGetMimeSubtype(aMessage);
                 cli_dbgmsg("Mime subtype \"%s\"\n", cptr);
                 if ((tableFind(mctx->subtypeTable, cptr) == PLAIN) &&
                     (messageGetEncoding(aMessage) == NOENCODING)) {
                     /*
be32043e
 					 * Strictly speaking, a text/plain part
 					 * is not an attachment. We pretend it
 					 * is so that we can decode and scan it
c1fce7f7
 					 */
288057e9
                     if (!messageHasFilename(aMessage)) {
                         cli_dbgmsg("Adding part to main message\n");
                         addToText = TRUE;
                     } else
                         cli_dbgmsg("Treating inline as attachment\n");
                 } else {
                     const int is_html = (tableFind(mctx->subtypeTable, cptr) == HTML);
                     if (doPhishingScan)
                         checkURLs(aMessage, mctx, rc, is_html);
                     messageAddArgument(aMessage,
                                        "filename=mixedtextportion");
                 }
                 break;
             }
             cli_dbgmsg("Text type %s is not supported\n", dtype);
             return mainMessage;
         case MESSAGE:
             /* Content-Type: message/rfc822 */
             cli_dbgmsg("Found message inside multipart (encoding type %d)\n",
                        messageGetEncoding(aMessage));
 #ifndef SCAN_UNENCODED_BOUNCES
             switch (messageGetEncoding(aMessage)) {
                 case NOENCODING:
                 case EIGHTBIT:
                 case BINARY:
                     if (encodingLine(aMessage) == NULL) {
                         /*
c1fce7f7
 						 * This means that the message
 						 * has no attachments
 						 *
 						 * The test for
 						 * messageGetEncoding is needed
 						 * since encodingLine won't have
 						 * been set if the message
 						 * itself has been encoded
 						 */
288057e9
                         cli_dbgmsg("Unencoded multipart/message will not be scanned\n");
                         assert(aMessage == messages[i]);
                         messageDestroy(messages[i]);
                         messages[i] = NULL;
                         return mainMessage;
                     }
                     /* FALLTHROUGH */
                 default:
                     cli_dbgmsg("Encoded multipart/message will be scanned\n");
             }
c1fce7f7
 #endif
288057e9
 #if 0
c1fce7f7
 			messageAddStrAtTop(aMessage,
 				"Received: by clamd (message/rfc822)");
 #endif
288057e9
 #ifdef SAVE_TO_DISC
             /*
c1fce7f7
 			 * Save this embedded message
 			 * to a temporary file
 			 */
288057e9
             if (saveTextPart(mctx, aMessage, 1) == CL_VIRUS)
                 *rc = VIRUS;
             assert(aMessage == messages[i]);
             messageDestroy(messages[i]);
             messages[i] = NULL;
c1fce7f7
 #else
288057e9
             /*
b912eaf2
 			 * Scan in memory, faster but is open to DoS attacks
 			 * when many nested levels are involved.
c1fce7f7
 			 */
288057e9
             body = parseEmailHeaders(aMessage, mctx->rfc821Table);
0072fa21
 
288057e9
             /*
7cd9337a
 			 * We've finished with the
c1fce7f7
 			 * original copy of the message,
 			 * so throw that away and
 			 * deal with the encapsulated
 			 * message as a message.
 			 * This can save a lot of memory
 			 */
288057e9
             assert(aMessage == messages[i]);
             messageDestroy(messages[i]);
             messages[i]  = NULL;
ede9939c
 #if HAVE_JSON
288057e9
             mctx->wrkobj = thisobj;
ede9939c
 #endif
288057e9
             if (body) {
                 messageSetCTX(body, mctx->ctx);
                 *rc = parseEmailBody(body, NULL, mctx, recursion_level + 1);
                 if ((*rc == OK) && messageContainsVirus(body))
                     *rc = VIRUS;
                 messageDestroy(body);
             }
ede9939c
 #if HAVE_JSON
288057e9
             mctx->wrkobj = saveobj;
ede9939c
 #endif
c1fce7f7
 #endif
288057e9
             return mainMessage;
         case MULTIPART:
             /*
c1fce7f7
 			 * It's a multi part within a multi part
 			 * Run the message parser on this bit, it won't
 			 * be an attachment
 			 */
288057e9
             cli_dbgmsg("Found multipart inside multipart\n");
ede9939c
 #if HAVE_JSON
288057e9
             mctx->wrkobj = thisobj;
ede9939c
 #endif
288057e9
             if (aMessage) {
                 /*
c1fce7f7
 				 * The headers were parsed when reading in the
 				 * whole multipart section
 				 */
288057e9
                 *rc = parseEmailBody(aMessage, *tptr, mctx, recursion_level + 1);
                 cli_dbgmsg("Finished recursion, rc = %d\n", (int)*rc);
                 assert(aMessage == messages[i]);
                 messageDestroy(messages[i]);
                 messages[i] = NULL;
             } else {
                 *rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1);
                 if (mainMessage && (mainMessage != messageIn))
                     messageDestroy(mainMessage);
                 mainMessage = NULL;
             }
ede9939c
 #if HAVE_JSON
288057e9
             mctx->wrkobj = saveobj;
ede9939c
 #endif
288057e9
             return mainMessage;
         default:
             cli_dbgmsg("Only text and application attachments are fully supported, type = %d\n",
                        messageGetMimeType(aMessage));
             /* fall through - we may be able to salvage something */
     }
 
     if (*rc != VIRUS) {
         fileblob *fb = messageToFileblob(aMessage, mctx->dir, 1);
ede9939c
 #if HAVE_JSON
288057e9
         json_object *arrobj;
0efcd558
         size_t arrlen = 0;
ede9939c
 
288057e9
         if (thisobj != NULL) {
             /* attempt to determine container size - prevents incorrect type reporting */
             if (json_object_object_get_ex(mctx->ctx->wrkproperty, "ContainedObjects", &arrobj))
                 arrlen = json_object_array_length(arrobj);
         }
c1fce7f7
 
ede9939c
 #endif
288057e9
         if (fb) {
             /* aMessage doesn't always have a ctx set */
             fileblobSetCTX(fb, mctx->ctx);
             if (fileblobScanAndDestroy(fb) == CL_VIRUS)
                 *rc = VIRUS;
             if (!addToText)
                 mctx->files++;
         }
ede9939c
 #if HAVE_JSON
288057e9
         if (thisobj != NULL) {
             json_object *entry = NULL;
             const char *dtype  = NULL;
 
             /* attempt to acquire container type */
             if (json_object_object_get_ex(mctx->ctx->wrkproperty, "ContainedObjects", &arrobj))
                 if (json_object_array_length(arrobj) > arrlen)
                     entry = json_object_array_get_idx(arrobj, arrlen);
             if (entry) {
                 json_object_object_get_ex(entry, "FileType", &entry);
                 if (entry)
                     dtype = json_object_get_string(entry);
             }
0efcd558
             cli_jsonint(thisobj, "ContainedObjectsIndex", (uint32_t)arrlen);
288057e9
             cli_jsonstr(thisobj, "ClamAVFileType", dtype ? dtype : "UNKNOWN");
         }
ede9939c
 #endif
288057e9
         if (messageContainsVirus(aMessage))
             *rc = VIRUS;
     }
     messageDestroy(aMessage);
     messages[i] = NULL;
c1fce7f7
 
288057e9
     return mainMessage;
c1fce7f7
 }
4f4a8f4a
 
 /*
  * Returns the number of quote characters in the given string
  */
 static int
 count_quotes(const char *buf)
 {
288057e9
     int quotes = 0;
4f4a8f4a
 
288057e9
     while (*buf)
         if (*buf++ == '\"')
             quotes++;
4f4a8f4a
 
288057e9
     return quotes;
4f4a8f4a
 }
842c7d49
 
 /*
  * Will the next line be a folded header? See RFC2822 section 2.2.3
  */
 static bool
 next_is_folded_header(const text *t)
 {
288057e9
     const text *next = t->t_next;
     const char *data, *ptr;
842c7d49
 
288057e9
     if (next == NULL)
         return FALSE;
842c7d49
 
288057e9
     if (next->t_line == NULL)
         return FALSE;
842c7d49
 
288057e9
     data = lineGetData(next->t_line);
842c7d49
 
288057e9
     /*
b912eaf2
 	 * Section B.2 of RFC822 says TAB or SPACE means a continuation of the
842c7d49
 	 * previous entry.
 	 */
288057e9
     if (isblank(data[0]))
         return TRUE;
842c7d49
 
288057e9
     if (strchr(data, '=') == NULL)
         /*
842c7d49
 		 * Avoid false positives with
 		 *	Content-Type: text/html;
 		 *	Content-Transfer-Encoding: quoted-printable
 		 */
288057e9
         return FALSE;
bc6bbeff
 
288057e9
     /*
842c7d49
 	 * Some are broken and don't fold headers lines
 	 * correctly as per section 2.2.3 of RFC2822.
 	 * Generally they miss the white space at
 	 * the start of the fold line:
 	 *	Content-Type: multipart/related;
 	 *	type="multipart/alternative";
 	 *	boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
 	 * should read:
 	 *	Content-Type: multipart/related;
 	 *	 type="multipart/alternative";
 	 *	 boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
 	 * Since we're a virus checker not an RFC
 	 * verifier we need to handle these
 	 */
288057e9
     data = lineGetData(t->t_line);
 
     ptr = strchr(data, '\0');
 
     while (--ptr > data)
         switch (*ptr) {
             case ';':
                 return TRUE;
             case '\n':
             case ' ':
             case '\r':
             case '\t':
                 continue; /* white space at end of line */
             default:
                 return FALSE;
         }
     return FALSE;
842c7d49
 }
0cf4cea7
 
 /*
  * This routine is called on the first line of the body of
  * an email to handle broken messages that have newlines
  * in the middle of its headers
  */
 static bool
 newline_in_header(const char *line)
 {
288057e9
     cli_dbgmsg("newline_in_header, check \"%s\"\n", line);
0cf4cea7
 
288057e9
     if (strncmp(line, "Message-Id: ", 12) == 0)
         return TRUE;
     if (strncmp(line, "Date: ", 6) == 0)
         return TRUE;
0cf4cea7
 
288057e9
     return FALSE;
0cf4cea7
 }