libclamav/mbox.c
e3aaff8e
 /*
c442ca9c
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
  *  Copyright (C) 2007-2013 Sourcefire, Inc.
2023340a
  *
  *  Authors: Nigel Horne
6289eda8
  * 
  *  Acknowledgements: Some ideas came from Stephen White <stephen@earth.li>,
  *                    Michael Dankov <misha@btrc.ru>, Gianluigi Tiesi <sherpya@netfarm.it>,
  *                    Everton da Silva Marques, Thomas Lamy <Thomas.Lamy@in-online.net>,
  *                    James Stevens <James@kyzo.com>
e3aaff8e
  *
  *  This program is free software; you can redistribute it and/or modify
2023340a
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
e3aaff8e
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
48b7b4a7
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
e3aaff8e
  */
2023340a
 
6d6e8271
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
e3aaff8e
 
 #ifdef CL_THREAD_SAFE
98cb5cba
 #ifndef	_REENTRANT
e3aaff8e
 #define	_REENTRANT	/* for Solaris 2.8 */
 #endif
98cb5cba
 #endif
e3aaff8e
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <errno.h>
 #include <assert.h>
 #include <string.h>
bc6bbeff
 #ifdef	HAVE_STRINGS_H
e3aaff8e
 #include <strings.h>
bc6bbeff
 #endif
e906fef3
 #ifdef	HAVE_STRING_H
 #include <string.h>
 #endif
e3aaff8e
 #include <ctype.h>
 #include <time.h>
 #include <fcntl.h>
bc6bbeff
 #ifdef	HAVE_SYS_PARAM_H
d4d14218
 #include <sys/param.h>
bc6bbeff
 #endif
f10460ed
 #include <dirent.h>
a0b21816
 #include <limits.h>
093e013c
 #include <signal.h>
e3aaff8e
 
cd153266
 #ifdef	HAVE_UNISTD_H
 #include <unistd.h>
 #endif
 
242bfde8
 #if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
 #include <stddef.h>
 #endif
 
e2875303
 #ifdef	CL_THREAD_SAFE
 #include <pthread.h>
 #endif
 
60d8d2c3
 #include "clamav.h"
0f7f7682
 #include "others.h"
 #include "str.h"
 #include "filetypes.h"
e3aaff8e
 #include "mbox.h"
d77ac7de
 #include "dconf.h"
ee1b2a6c
 #include "fmap.h"
ede9939c
 #include "json_api.h"
c2df9f79
 #include "msxml_parser.h"
 
 #if HAVE_LIBXML2
f5412cc9
 #include <libxml/xmlversion.h>
c2df9f79
 #include <libxml/HTMLtree.h>
f5412cc9
 #include <libxml/HTMLparser.h>
c2df9f79
 #include <libxml/xmlreader.h>
 #endif
d77ac7de
 
 #define DCONF_PHISHING mctx->ctx->dconf->phishing
e3aaff8e
 
02927896
 #ifdef	CL_DEBUG
093e013c
 
6670d61d
 #if	defined(C_LINUX)
093e013c
 #include <features.h>
92dbfae7
 #endif
093e013c
 
02927896
 #if __GLIBC__ == 2 && __GLIBC_MINOR__ >= 1
 #define HAVE_BACKTRACE
 #endif
3f3f9085
 #endif
02927896
 
 #ifdef HAVE_BACKTRACE
 #include <execinfo.h>
 #include <syslog.h>
 
 static	void	sigsegv(int sig);
 static	void	print_trace(int use_syslog);
a9d251e0
 
96435bdc
 /*#define	SAVE_TMP */	/* Save the file being worked on in tmp */
02927896
 #endif
 
c2b2d8af
 #if	defined(NO_STRTOK_R) || !defined(CL_THREAD_SAFE)
e3aaff8e
 #undef strtok_r
 #undef __strtok_r
 #define strtok_r(a,b,c)	strtok(a,b)
 #endif
 
0cf4cea7
 #ifdef	HAVE_STDBOOL_H
89d4073d
 #ifdef	C_BEOS
 #include "SupportDefs.h"
 #else
edee0700
 #include <stdbool.h>
89d4073d
 #endif
edee0700
 #else
 #ifdef	FALSE
 typedef	unsigned	char	bool;
 #else
 typedef enum	{ FALSE = 0, TRUE = 1 } bool;
e3aaff8e
 #endif
 #endif
 
ecc3d638
 typedef	enum {
 	FAIL,
 	OK,
 	OK_ATTACHMENTS_NOT_SAVED,
69c62847
 	VIRUS,
001ad879
 	MAXREC,
 	MAXFILES
ecc3d638
 } mbox_status;
 
9f2024cc
 #ifndef isblank
 #define isblank(c)	(((c) == ' ') || ((c) == '\t'))
 #endif
 
9b4bb8b7
 #define	SAVE_TO_DISC	/* multipart/message are saved in a temporary file */
393a6d67
 
c52d991e
 #include "htmlnorm.h"
 
 #include "phishcheck.h"
 
be4bf7f4
 #ifndef	_WIN32
081f6473
 #include <sys/time.h>
ea541184
 #include <netdb.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
29b92a2d
 #if !defined(C_BEOS) && !defined(C_INTERIX)
ea541184
 #include <net/if.h>
 #include <arpa/inet.h>
 #endif
89d4073d
 #endif
f4a02249
 
ea541184
 #include <fcntl.h>
 
f10460ed
 /*
d7979d4f
  * Use CL_SCAN_MAIL_PARTIAL_MESSAGE to handle messages covered by section 7.3.2 of RFC1341.
f10460ed
  *	This is experimental code so it is up to YOU to (1) ensure it's secure
cf569541
  * (2) periodically trim the directory of old files
  *
  * If you use the load balancing feature of clamav-milter to run clamd on
fb79b576
  * more than one machine you must make sure that .../partial is on a shared
cf569541
  * network filesystem
f10460ed
  */
ac9b941b
 /*#define	NEW_WORLD*/
d72749e0
 
8c68fcc1
 /*#define	SCAN_UNENCODED_BOUNCES	*//*
12bd9764
 					 * Slows things down a lot and only catches unencoded copies
214621f2
 					 * of EICAR within bounces, which don't matter
12bd9764
 					 */
 
c1fce7f7
 typedef	struct	mbox_ctx {
 	const	char	*dir;
 	const	table_t	*rfc821Table;
 	const	table_t	*subtypeTable;
 	cli_ctx	*ctx;
b5231f5f
 	unsigned	int	files;	/* number of files extracted */
ede9939c
 #if HAVE_JSON
 	json_object *wrkobj;
 #endif
c1fce7f7
 } mbox_ctx;
 
f87a92ca
 /* if supported by the system, use the optimized
  * version of getc, that doesn't do locking,
  * and is possibly implemented entirely as a macro */
 #if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L
 #define GETC(fp) getc_unlocked(fp)
 #define LOCKFILE(fp) flockfile(fp)
 #define UNLOCKFILE(fp) funlockfile(fp)
 #else
 #define GETC(fp) getc(fp)
 #define LOCKFILE(fp)
 #define UNLOCKFILE(fp)
 #endif
 
2df29bde
 static	int	cli_parse_mbox(const char *dir, cli_ctx *ctx);
49cc1e3c
 static	message	*parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821Table, const char *firstLine, const char *dir);
2673dc74
 static	message	*parseEmailHeaders(message *m, const table_t *rfc821Table);
4c60b74f
 static	int	parseEmailHeader(message *m, const char *line, const table_t *rfc821Table);
a6369bc8
 static	int	parseMHTMLComment(const char *comment, cli_ctx *ctx, void *wrkjobj, void *cbdata);
c2df9f79
 static	mbox_status	parseRootMHTML(mbox_ctx *mctx, message *m, text *t);
ecc3d638
 static	mbox_status	parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int recursion_level);
e3aaff8e
 static	int	boundaryStart(const char *line, const char *boundary);
69c62847
 static	int	boundaryEnd(const char *line, const char *boundary);
e3aaff8e
 static	int	initialiseTables(table_t **rfc821Table, table_t **subtypeTable);
 static	int	getTextPart(message *const messages[], size_t size);
 static	size_t	strip(char *buf, int len);
 static	int	parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg);
001ad879
 static	int	saveTextPart(mbox_ctx *mctx, message *m, int destroy_text);
50df4118
 static	char	*rfc2047(const char *in);
d72749e0
 static	char	*rfc822comments(const char *in, char *out);
f10460ed
 static	int	rfc1341(message *m, const char *dir);
ddea752e
 static	bool	usefulHeader(int commandNumber, const char *cmd);
49cc1e3c
 static	char	*getline_from_mbox(char *buffer, size_t len, fmap_t *map, size_t *at);
7021b545
 static	bool	isBounceStart(mbox_ctx *mctx, const char *line);
001ad879
 static	bool	exportBinhexMessage(mbox_ctx *mctx, message *m);
 static	int	exportBounceMessage(mbox_ctx *ctx, text *start);
ede9939c
 static	const	char	*getMimeTypeStr(mime_type mimetype);
 static	const	char	*getEncTypeStr(encoding_type enctype);
ecc3d638
 static	message	*do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, mbox_ctx *mctx, message *messageIn, text **tptr, unsigned int recursion_level);
4f4a8f4a
 static	int	count_quotes(const char *buf);
842c7d49
 static	bool	next_is_folded_header(const text *t);
0cf4cea7
 static	bool	newline_in_header(const char *line);
9b4bb8b7
 
ecc3d638
 static	blob	*getHrefs(message *m, tag_arguments_t *hrefs);
 static	void	hrefs_done(blob *b, tag_arguments_t *hrefs);
ad422cc9
 static	void	checkURLs(message *m, mbox_ctx *mctx, mbox_status *rc, int is_html);
9b4bb8b7
 
9fe789f8
 /* Maximum line length according to RFC2821 */
85bb253e
 #define	RFC2821LENGTH	1000
e3aaff8e
 
 /* Hashcodes for our hash tables */
 #define	CONTENT_TYPE			1
 #define	CONTENT_TRANSFER_ENCODING	2
 #define	CONTENT_DISPOSITION		3
 
 /* Mime sub types */
 #define	PLAIN		1
 #define	ENRICHED	2
 #define	HTML		3
 #define	RICHTEXT	4
 #define	MIXED		5
946a0ad3
 #define	ALTERNATIVE	6	/* RFC1521*/
e3aaff8e
 #define	DIGEST		7
 #define	SIGNED		8
 #define	PARALLEL	9
 #define	RELATED		10	/* RFC2387 */
 #define	REPORT		11	/* RFC1892 */
c9b8f252
 #define	APPLEDOUBLE	12	/* Handling of this in only noddy for now */
393a6d67
 #define	FAX		MIXED	/*
 				 * RFC3458
 				 * Drafts stated to treat is as mixed if it is
 				 * not known.  This disappeared in the final
 				 * version (except when talking about
 				 * voice-message), but it is good enough for us
 				 * since we do no validation of coversheet
 				 * presence etc. (which also has disappeared
 				 * in the final version)
 				 */
9a729c80
 #define	ENCRYPTED	13	/*
 				 * e.g. RFC2015
 				 * Content-Type: multipart/encrypted;
 				 * boundary="nextPart1383049.XCRrrar2yq";
 				 * protocol="application/pgp-encrypted"
 				 */
6e5d95eb
 #define	X_BFILE		RELATED	/*
 				 * BeOS, expert two parts: the file and it's
 				 * attributes. The attributes part comes as
 				 *	Content-Type: application/x-be_attribute
 				 *		name="foo"
 				 * I can't find where it is defined, any
 				 * pointers would be appreciated. For now
 				 * we treat it as multipart/related
 				 */
c79a2273
 #define	KNOWBOT		14	/* Unknown and undocumented format? */
e3aaff8e
 
 static	const	struct tableinit {
 	const	char	*key;
 	int	value;
 } rfc821headers[] = {
303f9be9
 	/* TODO: make these regular expressions */
5c1150ac
 	{	"Content-Type",			CONTENT_TYPE		},
a9f386ed
 	{	"Content-Transfer-Encoding",	CONTENT_TRANSFER_ENCODING	},
 	{	"Content-Disposition",		CONTENT_DISPOSITION	},
e3aaff8e
 	{	NULL,				0			}
15033cb6
 }, mimeSubtypes[] = {	/* see RFC2045 */
e3aaff8e
 		/* subtypes of Text */
 	{	"plain",	PLAIN		},
 	{	"enriched",	ENRICHED	},
 	{	"html",		HTML		},
 	{	"richtext",	RICHTEXT	},
 		/* subtypes of Multipart */
 	{	"mixed",	MIXED		},
 	{	"alternative",	ALTERNATIVE	},
 	{	"digest",	DIGEST		},
 	{	"signed",	SIGNED		},
 	{	"parallel",	PARALLEL	},
 	{	"related",	RELATED		},
 	{	"report",	REPORT		},
c9b8f252
 	{	"appledouble",	APPLEDOUBLE	},
393a6d67
 	{	"fax-message",	FAX		},
9a729c80
 	{	"encrypted",	ENCRYPTED	},
6e5d95eb
 	{	"x-bfile",	X_BFILE		},	/* BeOS */
c79a2273
 	{	"knowbot",		KNOWBOT		},	/* ??? */
 	{	"knowbot-metadata",	KNOWBOT		},	/* ??? */
 	{	"knowbot-code",		KNOWBOT		},	/* ??? */
 	{	"knowbot-state",	KNOWBOT		},	/* ??? */
e3aaff8e
 	{	NULL,		0		}
ede9939c
 }, mimeTypeStr[] = {
 	{	"NOMIME", 	NOMIME		},
 	{	"APPLICATION",	APPLICATION	},
 	{	"AUDIO",	AUDIO		},
 	{	"IMAGE",	IMAGE		},
 	{	"MESSAGE",	MESSAGE		},
 	{	"MULTIPART",	MULTIPART	},
 	{	"TEXT",		TEXT		},
 	{	"VIDEO",	VIDEO		},
 	{	"MEXTENSION",	MEXTENSION	},
 	{	NULL,		0		}
 }, encTypeStr[] = {
 	{	"NOENCODING", 	NOENCODING	},
 	{	"QUOTEDPRINTABLE", 	QUOTEDPRINTABLE	},
 	{	"BASE64", 	BASE64		},
 	{	"EIGHTBIT", 	EIGHTBIT	},
 	{	"BINARY", 	BINARY		},
 	{	"UUENCODE", 	UUENCODE	},
 	{	"YENCODE", 	YENCODE		},
 	{	"EEXTENSION", 	EEXTENSION	},
 	{	"BINHEX", 	BINHEX		},
 	{	NULL,		0		}
e3aaff8e
 };
e2875303
 
 #ifdef	CL_THREAD_SAFE
 static	pthread_mutex_t	tables_mutex = PTHREAD_MUTEX_INITIALIZER;
 #endif
61f01c95
 static	table_t *rfc821 = NULL;
 static	table_t *subtype = NULL;
e3aaff8e
 
f24bf390
 int
2df29bde
 cli_mbox(const char *dir, cli_ctx *ctx)
f24bf390
 {
7c56033f
 	if(dir == NULL) {
6351aa86
 		cli_dbgmsg("cli_mbox called with NULL dir\n");
7c56033f
 		return CL_ENULLARG;
 	}
2df29bde
 	return cli_parse_mbox(dir, ctx);
f24bf390
 }
 
e3aaff8e
 /*
  * TODO: when signal handling is added, need to remove temp files when a
ef822cfc
  *	signal is received
e3aaff8e
  * TODO: add option to scan in memory not via temp files, perhaps with a
1bfbedd4
  * named pipe or memory mapped file, though this won't work on big e-mails
  * containing many levels of encapsulated messages - it'd just take too much
  * RAM
049a18b9
  * TODO: parse .msg format files
c9b8f252
  * TODO: fully handle AppleDouble format, see
ef822cfc
  *	http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
89670d69
  * TODO: ensure parseEmailHeaders is always called before parseEmailBody
  * TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody
7cd9337a
  * TODO: Handle unexpected NUL bytes in header lines which stop strcmp()s:
9f2024cc
  *	e.g. \0Content-Type: application/binary;
e3aaff8e
  */
f24bf390
 static int
2df29bde
 cli_parse_mbox(const char *dir, cli_ctx *ctx)
e3aaff8e
 {
bb1e844c
 	int retcode;
ddea752e
 	message *body;
85bb253e
 	char buffer[RFC2821LENGTH + 1];
c1fce7f7
 	mbox_ctx mctx;
ee1b2a6c
 	size_t at = 0;
49cc1e3c
 	fmap_t *map = *ctx->fmap;
e3aaff8e
 
 	cli_dbgmsg("in mbox()\n");
5cdb01fc
 
ee1b2a6c
 	if(!fmap_gets(map, buffer, &at, sizeof(buffer) - 1)) {
049a18b9
 		/* empty message */
ef822cfc
 		return CL_CLEAN;
049a18b9
 	}
e2875303
 #ifdef	CL_THREAD_SAFE
 	pthread_mutex_lock(&tables_mutex);
 #endif
393a6d67
 	if(rfc821 == NULL) {
 		assert(subtype == NULL);
51fc2aa8
 
393a6d67
 		if(initialiseTables(&rfc821, &subtype) < 0) {
 			rfc821 = NULL;
 			subtype = NULL;
e2875303
 #ifdef	CL_THREAD_SAFE
 			pthread_mutex_unlock(&tables_mutex);
 #endif
ef822cfc
 			return CL_EMEM;
51fc2aa8
 		}
e3aaff8e
 	}
e2875303
 #ifdef	CL_THREAD_SAFE
 	pthread_mutex_unlock(&tables_mutex);
 #endif
e3aaff8e
 
a603478f
 	retcode = CL_SUCCESS;
e791b5ac
 	body = NULL;
 
c1fce7f7
 	mctx.dir = dir;
 	mctx.rfc821Table = rfc821;
 	mctx.subtypeTable = subtype;
 	mctx.ctx = ctx;
001ad879
 	mctx.files = 0;
ede9939c
 #if HAVE_JSON
 	mctx.wrkobj = ctx->wrkproperty;
 #endif
c1fce7f7
 
89670d69
 	/*
45dc1456
 	 * Is it a UNIX style mbox with more than one
89670d69
 	 * mail message, or just a single mail message?
45dc1456
 	 *
 	 * TODO: It would be better if we called cli_scandir here rather than
 	 * in cli_scanmail. Then we could improve the way mailboxes with more
001ad879
 	 * than one message is handled, e.g. giving a better indication of
 	 * which message within the mailbox is infected
89670d69
 	 */
25071deb
 	/*if((strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {*/
 	if(strncmp(buffer, "From ", 5) == 0) {
e3aaff8e
 		/*
049a18b9
 		 * Have been asked to check a UNIX style mbox file, which
 		 * may contain more than one e-mail message to decode
f003b79e
 		 *
 		 * It would be far better for scanners.c to do this splitting
 		 * and do this
 		 *	FOR EACH mail in the mailbox
 		 *	DO
 		 *		pass this mail to cli_mbox --
 		 *		scan this file
 		 *		IF this file has a virus quit
 		 *		THEN
 		 *			return CL_VIRUS
 		 *		FI
 		 *	END
 		 * This would remove a problem with this code that it can
 		 * fill up the tmp directory before it starts scanning
e3aaff8e
 		 */
ddea752e
 		bool lastLineWasEmpty;
 		int messagenumber;
 		message *m = messageCreate();
 
ee1b2a6c
 		if(m == NULL)
ddea752e
 			return CL_EMEM;
 
 		lastLineWasEmpty = FALSE;
 		messagenumber = 1;
a603478f
 		messageSetCTX(m, ctx);
e3aaff8e
 
049a18b9
 		do {
89670d69
 			cli_chomp(buffer);
25071deb
 			/*if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {*/
 			if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) {
7dde984e
 				cli_dbgmsg("Deal with message number %d\n", messagenumber++);
e3aaff8e
 				/*
89670d69
 				 * End of a message in the mail box
e3aaff8e
 				 */
b2223aad
 				body = parseEmailHeaders(m, rfc821);
4f1d0bfc
 				if(body == NULL) {
 					messageReset(m);
 					continue;
 				}
a603478f
 				messageSetCTX(body, ctx);
89670d69
 				messageDestroy(m);
a603478f
 				if(messageGetBody(body)) {
ecc3d638
 					mbox_status rc = parseEmailBody(body, NULL, &mctx, 0);
 					if(rc == FAIL) {
e17491b2
 						messageReset(body);
 						m = body;
 						continue;
ecc3d638
 					} else if(rc == VIRUS) {
a603478f
 						cli_dbgmsg("Message number %d is infected\n",
150c9f33
 							messagenumber-1);
a603478f
 						retcode = CL_VIRUS;
826c9892
 						m = NULL;
a603478f
 						break;
e17491b2
 					}
a603478f
 				}
e3aaff8e
 				/*
89670d69
 				 * Starting a new message, throw away all the
f24bf390
 				 * information about the old one. It would
 				 * be best to be able to scan this message
 				 * now, but cli_scanfile needs arguments
 				 * that haven't been passed here so it can't be
 				 * called
e3aaff8e
 				 */
89670d69
 				m = body;
 				messageReset(body);
a603478f
 				messageSetCTX(body, ctx);
e3aaff8e
 
049a18b9
 				cli_dbgmsg("Finished processing message\n");
89670d69
 			} else
547b89de
 				lastLineWasEmpty = (bool)(buffer[0] == '\0');
4945127a
 
fa5661be
 			if(isuuencodebegin(buffer)) {
5198de85
 				/*
4945127a
 				 * Fast track visa to uudecode.
 				 * TODO: binhex, yenc
 				 */
ee1b2a6c
 			  if(uudecodeFile(m, buffer, dir, map, &at) < 0)
fa5661be
 					if(messageAddStr(m, buffer) < 0)
 						break;
 			} else
69c62847
 				/* at this point, the \n has been removed */
4945127a
 				if(messageAddStr(m, buffer) < 0)
 					break;
ee1b2a6c
 		} while(fmap_gets(map, buffer, &at, sizeof(buffer) - 1));
ddea752e
 
a603478f
 		if(retcode == CL_SUCCESS) {
 			cli_dbgmsg("Extract attachments from email %d\n", messagenumber);
 			body = parseEmailHeaders(m, rfc821);
 		}
 		if(m)
 			messageDestroy(m);
4f1d0bfc
 	} else {
7e577f26
 		/*
 		 * It's a single message, parse the headers then the body
4f1d0bfc
 		 */
69543a9d
 		if(strncmp(buffer, "P I ", 4) == 0)
 			/*
 			 * CommuniGate Pro format: ignore headers until
 			 * blank line
 			 */
ee1b2a6c
 			while(fmap_gets(map, buffer, &at, sizeof(buffer) - 1) &&
69543a9d
 				(strchr("\r\n", buffer[0]) == NULL))
 					;
f87a92ca
 		/* getline_from_mbox could be using unlocked_stdio(3),
 		 * so lock file here */
69543a9d
 		/*
 		 * Ignore any blank lines at the top of the message
 		 */
4f1d0bfc
 		while(strchr("\r\n", buffer[0]) &&
ee1b2a6c
 		      (getline_from_mbox(buffer, sizeof(buffer) - 1, map, &at) != NULL))
87c9313e
 			;
 
9ed148a8
 		buffer[sizeof(buffer) - 1] = '\0';
4b187745
 
ee1b2a6c
 		body = parseEmailFile(map, &at, rfc821, buffer, dir);
4f1d0bfc
 	}
7e577f26
 
4f1d0bfc
 	if(body) {
 		/*
 		 * Write out the last entry in the mailbox
 		 */
a603478f
 		if((retcode == CL_SUCCESS) && messageGetBody(body)) {
 			messageSetCTX(body, ctx);
242ffd7a
 			switch(parseEmailBody(body, NULL, &mctx, 0)) {
96435bdc
 				case OK:
 				case OK_ATTACHMENTS_NOT_SAVED:
 					break;
ecc3d638
 				case FAIL:
69c62847
 					/*
 					 * beware: cli_magic_scandesc(),
 					 * changes this into CL_CLEAN, so only
 					 * use it to inform the higher levels
 					 * that we couldn't decode it because
 					 * it isn't an mbox, not to signal
 					 * decoding errors on what *is* a valid
 					 * mbox
 					 */
a603478f
 					retcode = CL_EFORMAT;
 					break;
69c62847
 				case MAXREC:
 					retcode = CL_EMAXREC;
 					break;
001ad879
 				case MAXFILES:
 					retcode = CL_EMAXFILES;
 					break;
ecc3d638
 				case VIRUS:
a603478f
 					retcode = CL_VIRUS;
 					break;
 			}
 		}
e3aaff8e
 
92012beb
 		if(body->isTruncated && retcode == CL_SUCCESS)
 			retcode = CL_EMEM;
4f1d0bfc
 		/*
 		 * Tidy up and quit
 		 */
 		messageDestroy(body);
 	}
6ad45a29
 	
 	if((retcode == CL_CLEAN) && ctx->found_possibly_unwanted &&
d7979d4f
 	   (*ctx->virname == NULL || SCAN_ALLMATCHES)) {
cbf5017a
 	    retcode = cli_append_virus(ctx, "Heuristics.Phishing.Email");
6ad45a29
 	    ctx->found_possibly_unwanted = 0;
ef5b10e0
 	}
 
e3aaff8e
 	cli_dbgmsg("cli_mbox returning %d\n", retcode);
 
 	return retcode;
 }
 
 /*
ddea752e
  * Read in an email message from fin, parse it, and return the message
7e577f26
  *
ddea752e
  * FIXME: files full of new lines and nothing else are
  * handled ungracefully...
  */
 static message *
49cc1e3c
 parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *firstLine, const char *dir)
ddea752e
 {
 	bool inHeader = TRUE;
6e3d492a
 	bool bodyIsEmpty = TRUE;
06466233
 	bool lastWasBlank = FALSE, lastBodyLineWasBlank = FALSE;
ddea752e
 	message *ret;
 	bool anyHeadersFound = FALSE;
 	int commandNumber = -1;
41b7a56b
 	char *fullline = NULL, *boundary = NULL;
ddea752e
 	size_t fulllinelength = 0;
85bb253e
 	char buffer[RFC2821LENGTH + 1];
ddea752e
 
 	cli_dbgmsg("parseEmailFile\n");
 
 	ret = messageCreate();
 	if(ret == NULL)
 		return NULL;
 
69ffb9da
 	strncpy(buffer, firstLine, sizeof(buffer)-1);
ddea752e
 	do {
4f4a8f4a
 		const char *line;
ddea752e
 
 		(void)cli_chomp(buffer);
 
4f4a8f4a
 		if(buffer[0] == '\0')
72cf1461
 			line = NULL;
4f4a8f4a
 		else
 			line = buffer;
ddea752e
 
 		/*
 		 * Don't blank lines which are only spaces from headers,
 		 * otherwise they'll be treated as the end of header marker
 		 */
41b7a56b
 		if(lastWasBlank) {
 			lastWasBlank = FALSE;
 			if(boundaryStart(buffer, boundary)) {
 				cli_dbgmsg("Found a header line with space that should be blank\n");
 				inHeader = FALSE;
 			}
 		}
ddea752e
 		if(inHeader) {
0ed29506
 			cli_dbgmsg("parseEmailFile: check '%s' fullline %p\n",
1f6e52cb
 				buffer, fullline);
2a0041b8
 			/*
 			 * Ensure wide characters are handled where
 			 * sizeof(char) > 1
 			 */
 			if(line && isspace(line[0] & 0xFF)) {
41b7a56b
 				char copy[sizeof(buffer)];
 
 				strcpy(copy, buffer);
 				strstrip(copy);
 				if(copy[0] == '\0') {
 					/*
4d4166a9
 					 * The header line contains only white
 					 * space. This is not the end of the
 					 * headers according to RFC2822, but
 					 * some MUAs will handle it as though
 					 * it were, and virus writers exploit
 					 * this bug. We can't just break from
 					 * the loop here since that would allow
 					 * other exploits such as inserting a
 					 * white space line before the
 					 * content-type line. So we just have
 					 * to make a best guess. Sigh.
41b7a56b
 					 */
 					if(fullline) {
 						if(parseEmailHeader(ret, fullline, rfc821) < 0)
 							continue;
 
 						free(fullline);
 						fullline = NULL;
 					}
300a8ae9
 					if(boundary ||
 					   ((boundary = (char *)messageFindArgument(ret, "boundary")) != NULL)) {
41b7a56b
 						lastWasBlank = TRUE;
 						continue;
 					}
 				}
 			}
72cf1461
 			if((line == NULL) && (fullline == NULL)) {	/* empty line */
0ed29506
 				/*
 				 * A blank line signifies the end of
 				 * the header and the start of the text
 				 */
 				if(!anyHeadersFound)
 					/* Ignore the junk at the top */
 					continue;
5860ae08
 
0ed29506
 				cli_dbgmsg("End of header information\n");
 				inHeader = FALSE;
 				bodyIsEmpty = TRUE;
ddea752e
 			} else {
 				char *ptr;
f304dc68
 				const char *lookahead;
ddea752e
 
 				if(fullline == NULL) {
85bb253e
 					char cmd[RFC2821LENGTH + 1], out[RFC2821LENGTH + 1];
ddea752e
 
 					/*
 					 * Continuation of line we're ignoring?
 					 */
0ed29506
 					if(isblank(line[0]))
ddea752e
 						continue;
e107e8cf
 
ddea752e
 					/*
 					 * Is this a header we're interested in?
 					 */
72cf1461
 					if((strchr(line, ':') == NULL) ||
 					   (cli_strtokbuf(line, 0, ":", cmd) == NULL)) {
 						if(strncmp(line, "From ", 5) == 0)
ddea752e
 							anyHeadersFound = TRUE;
 						continue;
 					}
 
d72749e0
 					ptr = rfc822comments(cmd, out);
ddea752e
 					commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
 
 					switch(commandNumber) {
 						case CONTENT_TRANSFER_ENCODING:
 						case CONTENT_DISPOSITION:
 						case CONTENT_TYPE:
 							anyHeadersFound = TRUE;
 							break;
 						default:
 							if(!anyHeadersFound)
 								anyHeadersFound = usefulHeader(commandNumber, cmd);
 							continue;
 					}
0cf4cea7
 					fullline = cli_strdup(line);
72cf1461
 					fulllinelength = strlen(line) + 1;
92012beb
 					if(!fullline) {
 						if(ret)
 							ret->isTruncated = TRUE;
 						break;
 					}
72cf1461
 				} else if(line != NULL) {
1f271616
 					fulllinelength += strlen(line) + 1;
468c0f21
 					ptr = cli_realloc(fullline, fulllinelength);
 					if(ptr == NULL)
 						continue;
 					fullline = ptr;
1f271616
 					cli_strlcat(fullline, line, fulllinelength);
ddea752e
 				}
 
 				assert(fullline != NULL);
 
ee1b2a6c
 				if((lookahead = fmap_need_off_once(map, *at, 1))) {
ddea752e
 					/*
 					 * Section B.2 of RFC822 says TAB or
 					 * SPACE means a continuation of the
 					 * previous entry.
 					 *
 					 * Add all the arguments on the line
 					 */
ee1b2a6c
 					if(isblank(*lookahead))
ddea752e
 						continue;
 				}
 
11f253d6
 				/*
 				 * Handle broken headers, where the next
 				 * line isn't indented by whitespace
 				 */
631baca8
 				if(fullline[strlen(fullline) - 1] == ';')
11f253d6
 					/* Add arguments to this line */
 					continue;
 
4f4a8f4a
 				if(line && (count_quotes(fullline) & 1))
 					continue;
ddea752e
 
d72749e0
 				ptr = rfc822comments(fullline, NULL);
ddea752e
 				if(ptr) {
 					free(fullline);
 					fullline = ptr;
 				}
 
 				if(parseEmailHeader(ret, fullline, rfc821) < 0)
 					continue;
 
 				free(fullline);
 				fullline = NULL;
 			}
fa5661be
 		} else if(line && isuuencodebegin(line)) {
ae5c693a
 			/*
 			 * Fast track visa to uudecode.
 			 * TODO: binhex, yenc
 			 */
6e3d492a
 			bodyIsEmpty = FALSE;
ee1b2a6c
 			if(uudecodeFile(ret, line, dir, map, at) < 0)
fa5661be
 				if(messageAddStr(ret, line) < 0)
 					break;
06466233
 		} else {
 			if(line == NULL) {
c52d991e
 				/*
 				 * Although this would save time and RAM, some
 				 * phish signatures have been built which need
 				 * the blank lines
 				 */
 				if(lastBodyLineWasBlank &&
 				  (messageGetMimeType(ret) != TEXT)) {
06466233
 					cli_dbgmsg("Ignoring consecutive blank lines in the body\n");
 					continue;
 				}
 				lastBodyLineWasBlank = TRUE;
6e3d492a
 			} else {
 				if(bodyIsEmpty) {
 					/*
 					 * Broken message: new line in the
 					 * middle of the headers, so the first
 					 * line of the body is in fact
 					 * the last lines of the header
 					 */
0cf4cea7
 					if(newline_in_header(line))
6e3d492a
 						continue;
0cf4cea7
 					bodyIsEmpty = FALSE;
6e3d492a
 				}
06466233
 				lastBodyLineWasBlank = FALSE;
6e3d492a
 			}
06466233
 
72cf1461
 			if(messageAddStr(ret, line) < 0)
ddea752e
 				break;
06466233
 		}
ee1b2a6c
 	} while(getline_from_mbox(buffer, sizeof(buffer) - 1, map, at) != NULL);
ddea752e
 
300a8ae9
 	if(boundary)
 		free(boundary);
 
ddea752e
 	if(fullline) {
 		if(*fullline) switch(commandNumber) {
 			case CONTENT_TRANSFER_ENCODING:
 			case CONTENT_DISPOSITION:
 			case CONTENT_TYPE:
0d35f10f
 				cli_dbgmsg("parseEmailFile: Fullline unparsed '%s'\n", fullline);
ddea752e
 		}
 		free(fullline);
 	}
 
 	if(!anyHeadersFound) {
 		/*
 		 * False positive in believing we have an e-mail when we don't
 		 */
 		messageDestroy(ret);
 		cli_dbgmsg("parseEmailFile: no headers found, assuming it isn't an email\n");
 		return NULL;
 	}
 
 	cli_dbgmsg("parseEmailFile: return\n");
 
 	return ret;
 }
 
 /*
  * The given message contains a raw e-mail.
e06d34dc
  *
94f051b0
  * Returns the message's body with the correct arguments set, empties the
  * given message's contents (note that it isn't destroyed)
ddea752e
  *
  * TODO: remove the duplication with parseEmailFile
7e577f26
  */
e06d34dc
 static message *
2673dc74
 parseEmailHeaders(message *m, const table_t *rfc821)
7e577f26
 {
e06d34dc
 	bool inHeader = TRUE;
0d35f10f
 	bool bodyIsEmpty = TRUE;
94f051b0
 	text *t;
89670d69
 	message *ret;
4f1d0bfc
 	bool anyHeadersFound = FALSE;
15033cb6
 	int commandNumber = -1;
efb5f16c
 	char *fullline = NULL;
2ad0c86e
 	size_t fulllinelength = 0;
89670d69
 
02927896
 	cli_dbgmsg("parseEmailHeaders\n");
 
89670d69
 	if(m == NULL)
 		return NULL;
 
 	ret = messageCreate();
7e577f26
 
b2223aad
 	for(t = messageGetBody(m); t; t = t->t_next) {
0cf4cea7
 		const char *line;
7e577f26
 
b2223aad
 		if(t->t_line)
0cf4cea7
 			line = lineGetData(t->t_line);
b2223aad
 		else
0cf4cea7
 			line = NULL;
7e577f26
 
28010d29
 		if(inHeader) {
4e75d9b6
 			cli_dbgmsg("parseEmailHeaders: check '%s'\n",
0cf4cea7
 				line ? line : "");
 			if(line == NULL) {
4e75d9b6
 				/*
 				 * A blank line signifies the end of
 				 * the header and the start of the text
 				 */
 				cli_dbgmsg("End of header information\n");
ad091acf
 				if(!anyHeadersFound) {
 					cli_dbgmsg("Nothing interesting in the header\n");
 					break;
 				}
0d35f10f
 				inHeader = FALSE;
 				bodyIsEmpty = TRUE;
2ad0c86e
 			} else {
b116962d
 				char *ptr;
 
2ad0c86e
 				if(fullline == NULL) {
85bb253e
 					char cmd[RFC2821LENGTH + 1];
45aba293
 
 					/*
 					 * Continuation of line we're ignoring?
 					 */
0cf4cea7
 					if(isblank(line[0]))
45aba293
 						continue;
 
 					/*
 					 * Is this a header we're interested in?
 					 */
0cf4cea7
 					if((strchr(line, ':') == NULL) ||
 					   (cli_strtokbuf(line, 0, ":", cmd) == NULL)) {
 						if(strncmp(line, "From ", 5) == 0)
b116962d
 							anyHeadersFound = TRUE;
45aba293
 						continue;
b116962d
 					}
45aba293
 
d72749e0
 					ptr = rfc822comments(cmd, NULL);
59921c02
 					commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
 					if(ptr)
 						free(ptr);
45aba293
 
 					switch(commandNumber) {
 						case CONTENT_TRANSFER_ENCODING:
 						case CONTENT_DISPOSITION:
 						case CONTENT_TYPE:
b116962d
 							anyHeadersFound = TRUE;
45aba293
 							break;
 						default:
ddea752e
 							if(!anyHeadersFound)
 								anyHeadersFound = usefulHeader(commandNumber, cmd);
45aba293
 							continue;
 					}
0cf4cea7
 					fullline = cli_strdup(line);
 					fulllinelength = strlen(line) + 1;
 				} else if(line) {
1f271616
 					fulllinelength += strlen(line) + 1;
468c0f21
 					ptr = cli_realloc(fullline, fulllinelength);
 					if(ptr == NULL)
 						continue;
 					fullline = ptr;
1f271616
 					cli_strlcat(fullline, line, fulllinelength);
2ad0c86e
 				}
ddea752e
 				assert(fullline != NULL);
2ad0c86e
 
842c7d49
 				if(next_is_folded_header(t))
 					/* Add arguments to this line */
 					continue;
2ad0c86e
 
94f051b0
 				lineUnlink(t->t_line);
 				t->t_line = NULL;
 
4f4a8f4a
 				if(count_quotes(fullline) & 1)
2ad0c86e
 					continue;
 
d72749e0
 				ptr = rfc822comments(fullline, NULL);
2ad0c86e
 				if(ptr) {
 					free(fullline);
 					fullline = ptr;
 				}
c77c8809
 
ddea752e
 				if(parseEmailHeader(ret, fullline, rfc821) < 0)
 					continue;
28010d29
 
ddea752e
 				free(fullline);
 				fullline = NULL;
f12d2498
 			}
0d35f10f
 		} else {
 			if(bodyIsEmpty) {
0cf4cea7
 				if(line == NULL)
0d35f10f
 					/* throw away leading blank lines */
 					continue;
 				/*
 				 * Broken message: new line in the
 				 * middle of the headers, so the first
 				 * line of the body is in fact
 				 * the last lines of the header
 				 */
0cf4cea7
 				if(newline_in_header(line))
0d35f10f
 					continue;
 				bodyIsEmpty = FALSE;
 			}
 			/*if(t->t_line && isuuencodebegin(t->t_line))
 				puts("FIXME: add fast visa here");*/
faa0d267
 			cli_dbgmsg("parseEmailHeaders: finished with headers, moving body\n");
94f051b0
 			messageMoveText(ret, t, m);
 			break;
0d35f10f
 		}
d879a7b0
 	}
e06d34dc
 
efb5f16c
 	if(fullline) {
2ad0c86e
 		if(*fullline) switch(commandNumber) {
 			case CONTENT_TRANSFER_ENCODING:
 			case CONTENT_DISPOSITION:
 			case CONTENT_TYPE:
aaaae842
 				cli_dbgmsg("parseEmailHeaders: Fullline unparsed '%s'\n", fullline);
2ad0c86e
 		}
efb5f16c
 		free(fullline);
 	}
 
4f1d0bfc
 	if(!anyHeadersFound) {
 		/*
 		 * False positive in believing we have an e-mail when we don't
 		 */
 		messageDestroy(ret);
 		cli_dbgmsg("parseEmailHeaders: no headers found, assuming it isn't an email\n");
 		return NULL;
 	}
 
e17491b2
 	cli_dbgmsg("parseEmailHeaders: return\n");
 
e06d34dc
 	return ret;
7e577f26
 }
 
 /*
4c60b74f
  * Handle a header line of an email message
  */
 static int
393a6d67
 parseEmailHeader(message *m, const char *line, const table_t *rfc821)
4c60b74f
 {
001ad879
 	int ret;
4c60b74f
 #ifdef CL_THREAD_SAFE
 	char *strptr;
 #endif
7cd9337a
 	const char *separator;
 	char *cmd, *copy, tokenseparator[2];
4c60b74f
 
20d3dde9
 	cli_dbgmsg("parseEmailHeader '%s'\n", line);
 
97867f21
 	/*
7cd9337a
 	 * In RFC822 the separator between the key a value is a colon,
97867f21
 	 * e.g.	Content-Transfer-Encoding: base64
 	 * However some MUA's are lapse about this and virus writers exploit
22cb38ed
 	 * this hole, so we need to check all known possibilities
97867f21
 	 */
7cd9337a
 	for(separator = ":= "; *separator; separator++)
 		if(strchr(line, *separator) != NULL)
97867f21
 			break;
 
7cd9337a
 	if(*separator == '\0')
1bfbedd4
 		return -1;
 
50df4118
 	copy = rfc2047(line);
 	if(copy == NULL)
8ebe8dbc
 		/* an RFC checker would return -1 here */
0cf4cea7
 		copy = cli_strdup(line);
f2f25418
 
7cd9337a
 	tokenseparator[0] = *separator;
 	tokenseparator[1] = '\0';
97867f21
 
001ad879
 	ret = -1;
 
548a5f96
 #ifdef	CL_THREAD_SAFE
7cd9337a
 	cmd = strtok_r(copy, tokenseparator, &strptr);
548a5f96
 #else
7cd9337a
 	cmd = strtok(copy, tokenseparator);
548a5f96
 #endif
4c60b74f
 
3499d81e
 	if(cmd && (strstrip(cmd) > 0)) {
548a5f96
 #ifdef	CL_THREAD_SAFE
4c60b74f
 		char *arg = strtok_r(NULL, "", &strptr);
548a5f96
 #else
 		char *arg = strtok(NULL, "");
 #endif
4c60b74f
 
 		if(arg)
 			/*
 			 * Found a header such as
 			 * Content-Type: multipart/mixed;
 			 * set arg to be
 			 * "multipart/mixed" and cmd to
a9f386ed
 			 * be "Content-Type"
4c60b74f
 			 */
393a6d67
 			ret = parseMimeHeader(m, cmd, rfc821, arg);
4c60b74f
 	}
50df4118
 	free(copy);
4c60b74f
 	return ret;
 }
 
c2df9f79
 #if HAVE_LIBXML2
 static const struct key_entry mhtml_keys[] = {
a6369bc8
 	/* root html tags for microsoft office document */
 	{	"html",			"RootHTML",		MSXML_JSON_ROOT | MSXML_JSON_ATTRIB	},
c2df9f79
 
a6369bc8
 	{	"head",			"Head",			MSXML_JSON_WRKPTR | MSXML_COMMENT_CB	},
 	{	"meta",			"Meta",			MSXML_JSON_WRKPTR | MSXML_JSON_MULTI | MSXML_JSON_ATTRIB	},
 	{	"link",			"Link",			MSXML_JSON_WRKPTR | MSXML_JSON_MULTI | MSXML_JSON_ATTRIB	},
 	{	"script",		"Script",		MSXML_JSON_WRKPTR | MSXML_JSON_MULTI | MSXML_JSON_VALUE		}
c2df9f79
 };
 static size_t num_mhtml_keys = sizeof(mhtml_keys) / sizeof(struct key_entry);
a6369bc8
 
 static const struct key_entry mhtml_comment_keys[] = {
 	/* embedded xml tags (comment) for microsoft office document */
 	{	"o:documentproperties",	"DocumentProperties",	MSXML_JSON_ROOT | MSXML_JSON_ATTRIB	},
 	{	"o:author",		"Author",		MSXML_JSON_WRKPTR | MSXML_JSON_VALUE	},
 	{	"o:lastauthor",		"LastAuthor",		MSXML_JSON_WRKPTR | MSXML_JSON_VALUE	},
 	{	"o:revision",		"Revision",		MSXML_JSON_WRKPTR | MSXML_JSON_VALUE	},
 	{	"o:totaltime",		"TotalTime",		MSXML_JSON_WRKPTR | MSXML_JSON_VALUE	},
 	{	"o:created",		"Created",		MSXML_JSON_WRKPTR | MSXML_JSON_VALUE	},
 	{	"o:lastsaved",		"LastSaved",		MSXML_JSON_WRKPTR | MSXML_JSON_VALUE	},
 	{	"o:pages",		"Pages",		MSXML_JSON_WRKPTR | MSXML_JSON_VALUE	},
 	{	"o:words",		"Words",		MSXML_JSON_WRKPTR | MSXML_JSON_VALUE	},
 	{	"o:characters",		"Characters",		MSXML_JSON_WRKPTR | MSXML_JSON_VALUE	},
 	{	"o:company",		"Company",		MSXML_JSON_WRKPTR | MSXML_JSON_VALUE	},
 	{	"o:lines",		"Lines",		MSXML_JSON_WRKPTR | MSXML_JSON_VALUE	},
 	{	"o:paragraphs",		"Paragraphs",		MSXML_JSON_WRKPTR | MSXML_JSON_VALUE	},
 	{	"o:characterswithspaces",	"CharactersWithSpaces",	MSXML_JSON_WRKPTR | MSXML_JSON_VALUE	},
 	{	"o:version",		"Version",		MSXML_JSON_WRKPTR | MSXML_JSON_VALUE	},
 
 	{	"o:officedocumentsettings",	"DocumentSettings",	MSXML_IGNORE_ELEM	},
 	{	"w:worddocument",	"WordDocument",		MSXML_IGNORE_ELEM	},
 	{	"w:latentstyles",	"LatentStyles",		MSXML_IGNORE_ELEM	}
 };
 static size_t num_mhtml_comment_keys = sizeof(mhtml_comment_keys) / sizeof(struct key_entry);
c2df9f79
 #endif
 
 /*
a6369bc8
  * The related multipart root HTML file comment parsing wrapper.
  *
  * Attempts to leverage msxml parser, cannot operate without LIBXML2.
  * This function is only used for Preclassification JSON.
  */
 static int
 parseMHTMLComment(const char *comment, cli_ctx *ctx, void *wrkjobj, void *cbdata)
 {
 #if HAVE_LIBXML2
 	const char *xmlsrt, *xmlend;
 	xmlTextReaderPtr reader;
 #if HAVE_JSON
 	json_object *thisjobj = (json_object *)wrkjobj;
 #endif
 	int ret = CL_SUCCESS;
 
 	UNUSEDPARAM(cbdata);
4cd97da4
 	UNUSEDPARAM(wrkjobj);
a6369bc8
 
 	xmlend = comment;
 	while ((xmlsrt = strstr(xmlend, "<xml>"))) {
 		xmlend = strstr(xmlsrt, "</xml>");
 		if (xmlend == NULL) {
 			cli_dbgmsg("parseMHTMLComment: unbounded xml tag\n");
 			break;
 		}
 
 		reader = xmlReaderForMemory(xmlsrt, xmlend-xmlsrt+6, "comment.xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS);
 		if (!reader) {
7cd9337a
 			cli_dbgmsg("parseMHTMLComment: cannot initialize xmlReader\n");
a6369bc8
 
 #if HAVE_JSON
ee6fb5fb
                        if (ctx->wrkproperty != NULL)
                            ret = cli_json_parse_error(ctx->wrkproperty, "MHTML_ERROR_XML_READER_MEM");
a6369bc8
 #endif
 			return ret; // libxml2 failed!
 		}
 
 		/* comment callback is not set to prevent recursion */
 		/* TODO: should we separate the key dictionaries? */
 		/* TODO: should we use the json object pointer? */
 		ret = cli_msxml_parse_document(ctx, reader, mhtml_comment_keys, num_mhtml_comment_keys, MSXML_FLAG_JSON, NULL);
 
 		xmlTextReaderClose(reader);
 		xmlFreeTextReader(reader);
 		if (ret != CL_SUCCESS)
 			return ret;
 	}
 #else
 	UNUSEDPARAM(comment);
 	UNUSEDPARAM(ctx);
 	UNUSEDPARAM(wrkjobj);
 	UNUSEDPARAM(cbdata);
 
 	cli_dbgmsg("in parseMHTMLComment\n");
 	cli_dbgmsg("parseMHTMLComment: parsing html xml-comments requires libxml2!\n");
 #endif
 	return CL_SUCCESS;
 }
 
 /*
c2df9f79
  * The related multipart root HTML file parsing wrapper.
  *
  * Attempts to leverage msxml parser, cannot operate without LIBXML2.
  * This function is only used for Preclassification JSON.
  */
 static mbox_status
 parseRootMHTML(mbox_ctx *mctx, message *m, text *t)
 {
a6369bc8
 	cli_ctx *ctx = mctx->ctx;
c2df9f79
 #if HAVE_LIBXML2
f5412cc9
 #ifdef LIBXML_HTML_ENABLED
a6369bc8
 	struct msxml_ctx mxctx;
4cd97da4
 	blob *input = NULL;
a6369bc8
 	htmlDocPtr htmlDoc;
 	xmlTextReaderPtr reader;
 	int ret = CL_SUCCESS;
 	mbox_status rc = OK;
c2df9f79
 #if HAVE_JSON
a6369bc8
 	json_object *rhtml;
c2df9f79
 #endif
 
a6369bc8
 	cli_dbgmsg("in parseRootMHTML\n");
c2df9f79
 
a6369bc8
 	if (ctx == NULL)
 		return OK;
c2df9f79
 
a6369bc8
 	if (m == NULL && t == NULL)
 		return OK;
c2df9f79
 
a6369bc8
 	if (m != NULL)
 		input = messageToBlob(m, 0);
4cd97da4
 	else /* t != NULL */
a6369bc8
 		input = textToBlob(t, NULL, 0);
4cd97da4
 
a6369bc8
 	if (input == NULL)
 		return OK;
c2df9f79
 
26427291
 	htmlDoc = htmlReadMemory((char*)input->data, input->len, "mhtml.html", NULL, CLAMAV_MIN_XMLREADER_FLAGS);
a6369bc8
 	if (htmlDoc == NULL) {
7cd9337a
 		cli_dbgmsg("parseRootMHTML: cannot initialize read html document\n");
c2df9f79
 #if HAVE_JSON
ee6fb5fb
                 if (ctx->wrkproperty != NULL)
                     ret = cli_json_parse_error(ctx->wrkproperty, "MHTML_ERROR_HTML_READ");
a6369bc8
 		if (ret != CL_SUCCESS)
 			rc = FAIL;
c2df9f79
 #endif
a6369bc8
 		blobDestroy(input);
 		return rc;
 	}
c2df9f79
 
 #if HAVE_JSON
a6369bc8
 	if (mctx->wrkobj) {
 		rhtml = cli_jsonobj(mctx->wrkobj, "RootHTML");
 		if (rhtml != NULL) {
 			/* MHTML-specific properties */
dd2ed14d
 			cli_jsonstr(rhtml, "Encoding", (const char*)htmlGetMetaEncoding(htmlDoc));
a6369bc8
 			cli_jsonint(rhtml, "CompressMode", xmlGetDocCompressMode(htmlDoc));
 		}
c2df9f79
 	}
 #endif
 
a6369bc8
 	reader = xmlReaderWalker(htmlDoc);
 	if (reader == NULL) {
7cd9337a
 		cli_dbgmsg("parseRootMHTML: cannot initialize xmlTextReader\n");
c2df9f79
 #if HAVE_JSON
ee6fb5fb
                 if (ctx->wrkproperty != NULL)
                     ret = cli_json_parse_error(ctx->wrkproperty, "MHTML_ERROR_XML_READER_IO");
a6369bc8
 		if (ret != CL_SUCCESS)
 			rc = FAIL;
c2df9f79
 #endif
a6369bc8
 		blobDestroy(input);
 		return rc;
 	}
 
 	memset(&mxctx, 0, sizeof(mxctx));
 	/* no scanning callback set */
 	mxctx.comment_cb = parseMHTMLComment;
 	ret = cli_msxml_parse_document(ctx, reader, mhtml_keys, num_mhtml_keys, MSXML_FLAG_JSON | MSXML_FLAG_WALK, &mxctx);
 	switch (ret) {
 	case CL_SUCCESS:
 	case CL_ETIMEOUT:
 	case CL_BREAK:
 		rc = OK;
 		break;
 
 	case CL_EMAXREC:
 		rc = MAXREC;
 		break;
 
 	case CL_EMAXFILES:
 		rc = MAXFILES;
 		break;
 
 	case CL_VIRUS:
26427291
 		rc = VIRUS;
a6369bc8
 		break;
 
 	default:
 		rc = FAIL;
 	}
 
c44d7e7a
 	xmlTextReaderClose(reader);
 	xmlFreeTextReader(reader);
a6369bc8
 	xmlFreeDoc(htmlDoc);
c2df9f79
 	blobDestroy(input);
 	return rc;
f5412cc9
 #else  /* LIBXML_HTML_ENABLED */
 	UNUSEDPARAM(m);
 	UNUSEDPARAM(t);
 	cli_dbgmsg("in parseRootMHTML\n");
 	cli_dbgmsg("parseRootMHTML: parsing html documents disabled in libxml2!\n");
 #endif /* LIBXML_HTML_ENABLED */
 #else  /* HAVE_LIBXML2 */
a6369bc8
 	UNUSEDPARAM(m);
 	UNUSEDPARAM(t);
c2df9f79
 	cli_dbgmsg("in parseRootMHTML\n");
 	cli_dbgmsg("parseRootMHTML: parsing html documents requires libxml2!\n");
 
 	return OK;
f5412cc9
 #endif /* HAVE_LIBXML2 */
c2df9f79
 }
 
4c60b74f
 /*
e3aaff8e
  * This is a recursive routine.
  *
7e577f26
  * This function parses the body of mainMessage and saves its attachments in dir
  *
e06d34dc
  * mainMessage is the buffer to be parsed, it contains an e-mail's body, without
f12d2498
  * any headers. First time of calling it'll be
  * the whole message. Later it'll be parts of a multipart message
e3aaff8e
  * textIn is the plain text message being built up so far
  */
ecc3d638
 static mbox_status
242ffd7a
 parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int recursion_level)
e3aaff8e
 {
42eebd87
 	mbox_status rc;
2673dc74
 	text *aText = textIn;
 	message *mainMessage = messageIn;
b0b860f1
 	fileblob *fb;
a603478f
 	bool infected = FALSE;
724b2bf7
 	const struct cl_engine *engine = mctx->ctx->engine;
 	const int doPhishingScan = engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE);
ede9939c
 #if HAVE_JSON
 	json_object *saveobj = mctx->wrkobj;
 #endif
8386c723
 
001ad879
 	cli_dbgmsg("in parseEmailBody, %u files saved so far\n",
 		mctx->files);
e3aaff8e
 
724b2bf7
 	/* FIXMELIMITS: this should be better integrated */
 	if(engine->maxreclevel)
 		/*
 		 * This is approximate
 		 */
 		if(recursion_level > engine->maxreclevel) {
a5faab66
 
6351aa86
 				cli_dbgmsg("parseEmailBody: hit maximum recursion level (%u)\n", recursion_level);
d91ab809
 				return MAXREC;
001ad879
 			}
724b2bf7
 	if(engine->maxfiles && (mctx->files >= engine->maxfiles)) {
 		/*
 		 * FIXME: This is only approx - it may have already
 		 * been exceeded
 		 */
 		cli_dbgmsg("parseEmailBody: number of files exceeded %u\n", engine->maxfiles);
 		return MAXFILES;
72910996
 	}
242ffd7a
 
f7bf6fd2
 	rc = OK;
 
e3aaff8e
 	/* Anything left to be parsed? */
d4d14218
 	if(mainMessage && (messageGetBody(mainMessage) != NULL)) {
e3aaff8e
 		mime_type mimeType;
2673dc74
 		int subtype, inhead, htmltextPart, inMimeHead, i;
95e11e5a
 		const char *mimeSubtype;
6351aa86
 		char *boundary;
e3aaff8e
 		const text *t_line;
98cb5cba
 		/*bool isAlternative;*/
e3aaff8e
 		message *aMessage;
2673dc74
 		int multiparts = 0;
 		message **messages = NULL;	/* parts of a multipart message */
e3aaff8e
 
049a18b9
 		cli_dbgmsg("Parsing mail file\n");
 
e3aaff8e
 		mimeType = messageGetMimeType(mainMessage);
 		mimeSubtype = messageGetMimeSubtype(mainMessage);
ede9939c
 #if HAVE_JSON
 		if (mctx->wrkobj != NULL) {
 			mctx->wrkobj = cli_jsonobj(mctx->wrkobj, "Body");
 			cli_jsonstr(mctx->wrkobj, "MimeType", getMimeTypeStr(mimeType));
 			cli_jsonstr(mctx->wrkobj, "MimeSubtype", mimeSubtype);
 			cli_jsonstr(mctx->wrkobj, "EncodingType", getEncTypeStr(messageGetEncoding(mainMessage)));
 			cli_jsonstr(mctx->wrkobj, "Disposition", messageGetDispositionType(mainMessage));
 			cli_jsonstr(mctx->wrkobj, "Filename", messageHasFilename(mainMessage) ?
 				    messageGetFilename(mainMessage): "(inline)");
 		}
 #endif
e3aaff8e
 
528c8a2d
 		/* pre-process */
c1fce7f7
 		subtype = tableFind(mctx->subtypeTable, mimeSubtype);
7c5a7a47
 		if((mimeType == TEXT) && (subtype == PLAIN)) {
e3aaff8e
 			/*
 			 * This is effectively no encoding, notice that we
 			 * don't check that charset is us-ascii
 			 */
7cd9337a
 			cli_dbgmsg("text/plain: Assume no attachments\n");
e3aaff8e
 			mimeType = NOMIME;
ae5c693a
 			messageSetMimeSubtype(mainMessage, "");
528c8a2d
 		} else if((mimeType == MESSAGE) &&
 			  (strcasecmp(mimeSubtype, "rfc822-headers") == 0)) {
 			/*
 			 * RFC1892/RFC3462: section 2 text/rfc822-headers
 			 * incorrectly sent as message/rfc822-headers
46d375fe
 			 *
 			 * Parse as text/plain, i.e. no mime
528c8a2d
 			 */
 			cli_dbgmsg("Changing message/rfc822-headers to text/rfc822-headers\n");
46d375fe
 			mimeType = NOMIME;
ae5c693a
 			messageSetMimeSubtype(mainMessage, "");
a05e6d45
 		} else
8affc406
 			cli_dbgmsg("mimeType = %d\n", (int)mimeType);
049a18b9
 
e3aaff8e
 		switch(mimeType) {
 		case NOMIME:
6fd711b2
 			cli_dbgmsg("Not a mime encoded message\n");
e3aaff8e
 			aText = textAddMessage(aText, mainMessage);
faa0d267
 
647a4f8d
 			if(!doPhishingScan)
 				break;
94aea271
 			/*
 			 * Fall through: some phishing mails claim they are
 			 * text/plain, when they are in fact html
 			 */
e3aaff8e
 		case TEXT:
8a892c3b
 			/* text/plain has been preprocessed as no encoding */
6a4dd9dc
 			if(doPhishingScan) {
47d9cc65
 				/*
 				 * It would be better to save and scan the
 				 * file and only checkURLs if it's found to be
 				 * clean
 				 */
8b899010
 				checkURLs(mainMessage, mctx, &rc, (subtype == HTML));
 				/*
 				 * There might be html sent without subtype
 				 * html too, so scan them for phishing
 				 */
ecc3d638
 				if(rc == VIRUS)
8b899010
 					infected = TRUE;
c52d991e
 			}
e3aaff8e
 			break;
 		case MULTIPART:
c8a7cef0
 			cli_dbgmsg("Content-type 'multipart' handler\n");
e3aaff8e
 			boundary = messageFindArgument(mainMessage, "boundary");
 
ede9939c
 #if HAVE_JSON
ee6fb5fb
                         if (mctx->wrkobj != NULL)
                             cli_jsonstr(mctx->wrkobj, "Boundary", boundary);
ede9939c
 #endif
 
e3aaff8e
 			if(boundary == NULL) {
e25960d2
 				cli_dbgmsg("Multipart/%s MIME message contains no boundary header\n",
842c7d49
 					mimeSubtype);
e2e7ebf5
 				/* Broken e-mail message */
 				mimeType = NOMIME;
 				/*
 				 * The break means that we will still
 				 * check if the file contains a uuencoded file
 				 */
 				break;
e3aaff8e
 			}
 
ede9939c
 			cli_chomp(boundary);
23889d27
 
c79a2273
 			/* Perhaps it should assume mixed? */
cb5a87e0
 			if(mimeSubtype[0] == '\0') {
6351aa86
 				cli_dbgmsg("Multipart has no subtype assuming alternative\n");
cb5a87e0
 				mimeSubtype = "alternative";
 				messageSetMimeSubtype(mainMessage, "alternative");
 			}
 
e3aaff8e
 			/*
 			 * Get to the start of the first message
 			 */
20d3dde9
 			t_line = messageGetBody(mainMessage);
 
 			if(t_line == NULL) {
6351aa86
 				cli_dbgmsg("Multipart MIME message has no body\n");
20d3dde9
 				free((char *)boundary);
 				mimeType = NOMIME;
 				break;
 			}
 
 			do
bae9c53f
 				if(t_line->t_line) {
 					if(boundaryStart(lineGetData(t_line->t_line), boundary))
 						break;
 					/*
182bbcc8
 					 * Found a binhex file before
ebe57840
 					 *	the first multipart
b116962d
 					 * TODO: check yEnc
bae9c53f
 					 */
182bbcc8
 					if(binhexBegin(mainMessage) == t_line) {
001ad879
 						if(exportBinhexMessage(mctx, mainMessage)) {
a05e6d45
 							/* virus found */
ecc3d638
 							rc = VIRUS;
47d9cc65
 							infected = TRUE;
a05e6d45
 							break;
b116962d
 						}
42eebd87
 					} else if(t_line->t_next &&
a5faab66
 						 (encodingLine(mainMessage) == t_line->t_next)) {
99c2299d
 						/*
 						 * We look for the next line
 						 * since later on we'll skip
 						 * over the important line when
 						 * we think it's a blank line
 						 * at the top of the message -
 						 * which it would have been in
 						 * an RFC compliant world
 						 */
42eebd87
 						cli_dbgmsg("Found MIME attachment before the first MIME section \"%s\"\n",
 							lineGetData(t_line->t_next->t_line));
99c2299d
 						if(messageGetEncoding(mainMessage) == NOENCODING)
 							break;
b116962d
 					}
bae9c53f
 				}
20d3dde9
 			while((t_line = t_line->t_next) != NULL);
e3aaff8e
 
 			if(t_line == NULL) {
47d9cc65
 				cli_dbgmsg("Multipart MIME message contains no boundary lines (%s)\n",
 					boundary);
5a642650
 				free((char *)boundary);
e2e7ebf5
 				mimeType = NOMIME;
 				/*
 				 * The break means that we will still
182bbcc8
 				 * check if the file contains a yEnc/binhex file
e2e7ebf5
 				 */
 				break;
e3aaff8e
 			}
 			/*
 			 * Build up a table of all of the parts of this
 			 * multipart message. Remember, each part may itself
 			 * be a multipart message.
 			 */
 			inhead = 1;
 			inMimeHead = 0;
 
e06d34dc
 			/*
9bccc1e5
 			 * Re-read this variable in case mimeSubtype has changed
 			 */
 			subtype = tableFind(mctx->subtypeTable, mimeSubtype);
 
 			/*
9a729c80
 			 * Parse the mainMessage object and create an array
 			 * of objects called messages, one for each of the
9bccc1e5
 			 * multiparts that mainMessage contains.
3f46285b
 			 *
e06d34dc
 			 * This looks like parseEmailHeaders() - maybe there's
 			 * some duplication of code to be cleaned up
9bccc1e5
 			 *
842c7d49
 			 * We may need to create an array rather than just
9bccc1e5
 			 * save each part as it is found because not all
 			 * elements will need scanning, and we don't yet know
 			 * which of those elements it will be, except in
 			 * the case of mixed, when all parts need to be scanned.
e06d34dc
 			 */
9bccc1e5
 			for(multiparts = 0; t_line && !infected; multiparts++) {
56ae62e2
 				int lines = 0;
1a74d4df
 				message **m;
69c62847
 				mbox_status old_rc;
56ae62e2
 
1a74d4df
 				m = cli_realloc(messages, ((multiparts + 1) * sizeof(message *)));
f12d2498
 				if(m == NULL)
1a74d4df
 					break;
 				messages = m;
b726511f
 
e3aaff8e
 				aMessage = messages[multiparts] = messageCreate();
3f3f9085
 				if(aMessage == NULL) {
 					multiparts--;
f7f59c08
 					/* if allocation failed the first time,
 					 * there's no point in retrying, just
 					 * break out */
 					break;
3f3f9085
 				}
c1fce7f7
 				messageSetCTX(aMessage, mctx->ctx);
e3aaff8e
 
 				cli_dbgmsg("Now read in part %d\n", multiparts);
 
8ba634a9
 				/*
 				 * Ignore blank lines. There shouldn't be ANY
 				 * but some viruses insert them
 				 */
02927896
 				while((t_line = t_line->t_next) != NULL)
b2223aad
 					if(t_line->t_line &&
 					   /*(cli_chomp(t_line->t_text) > 0))*/
 					   (strlen(lineGetData(t_line->t_line)) > 0))
d79597e3
 						break;
8ba634a9
 
 				if(t_line == NULL) {
 					cli_dbgmsg("Empty part\n");
b9ce9639
 					/*
 					 * Remove this part unless there's
182bbcc8
 					 * a binhex portion somewhere in
b9ce9639
 					 * the complete message that we may
 					 * throw away by mistake if the MIME
 					 * encoding information is incorrect
 					 */
9bccc1e5
 					if(mainMessage &&
 					   (binhexBegin(mainMessage) == NULL)) {
b9ce9639
 						messageDestroy(aMessage);
 						--multiparts;
 					}
8ba634a9
 					continue;
 				}
 
 				do {
b2223aad
 					const char *line = lineGetData(t_line->t_line);
e3aaff8e
 
fa5661be
 					/*cli_dbgmsg("multipart %d: inMimeHead %d inhead %d boundary '%s' line '%s' next '%s'\n",
 						multiparts, inMimeHead, inhead, boundary, line,
391f7bb3
 						t_line->t_next && t_line->t_next->t_line ? lineGetData(t_line->t_next->t_line) : "(null)");*/
e3aaff8e
 
f1c1300c
 					if(inMimeHead) {	/* continuation line */
02927896
 						if(line == NULL) {
59b99810
 							/*inhead =*/ inMimeHead = 0;
02927896
 							continue;
 						}
3a978f7d
 						/*
 						 * Handle continuation lines
 						 * because the previous line
1eec55a6
 						 * ended with a ; or this line
 						 * starts with a white space
3a978f7d
 						 */
1eec55a6
 						cli_dbgmsg("Multipart %d: About to add mime Argument '%s'\n",
 							multiparts, line);
3a978f7d
 						/*
 						 * Handle the case when it
 						 * isn't really a continuation
 						 * line:
 						 * Content-Type: application/octet-stream;
 						 * Content-Transfer-Encoding: base64
 						 */
c1fce7f7
 						parseEmailHeader(aMessage, line, mctx->rfc821Table);
3a978f7d
 
e3aaff8e
 						while(isspace((int)*line))
 							line++;
 
 						if(*line == '\0') {
 							inhead = inMimeHead = 0;
 							continue;
 						}
0ed29506
 						inMimeHead = FALSE;
e3aaff8e
 						messageAddArgument(aMessage, line);
f1c1300c
 					} else if(inhead) {	/* handling normal headers */
4f4a8f4a
 						/*int quotes;*/
c8a7cef0
 						char *fullline, *ptr;
2ad0c86e
 
02927896
 						if(line == NULL) {
7e67e382
 							/*
 							 * empty line, should the end of the headers,
 							 * but some base64 decoders, e.g. uudeview, are broken
 							 * and will handle this type of entry, decoding the
 							 * base64 content...
 							 * Content-Type: application/octet-stream; name=text.zip
 							 * Content-Transfer-Encoding: base64
 							 * Content-Disposition: attachment; filename="text.zip"
5198de85
 							 *
7e67e382
 							 * Content-Disposition: attachment;
 							 *	filename=text.zip
 							 * Content-Type: application/octet-stream;
 							 *	name=text.zip
 							 * Content-Transfer-Encoding: base64
5198de85
 							 *
7e67e382
 							 * UEsDBAoAAAAAAACgPjJ2RHw676gAAO+oAABEAAAAbWFpbF90ZXh0LWluZm8udHh0ICAgICAgICAg
 							 */
842c7d49
 							const text *next = t_line->t_next;
 
7e67e382
 							if(next && next->t_line) {
 								const char *data = lineGetData(next->t_line);
5b76248c
 
 								if((messageGetEncoding(aMessage) == NOENCODING) &&
842c7d49
 								   (messageGetMimeType(aMessage) == APPLICATION) &&
faa0d267
 								   data && strstr(data, "base64")) {
d72749e0
 									/*
 									 * Handle this nightmare (note the blank
 									 * line in the header and the incorrect
 									 * content-transfer-encoding header)
 									 *
 									 * Content-Type: application/octet-stream; name="zipped_files.EXEX-Spanska: Yes
 									 *
 									 * r-Encoding: base64
 									 * Content-Disposition: attachment; filename="zipped_files.EXE"
 									 */
842c7d49
 									messageSetEncoding(aMessage, "base64");
 									cli_dbgmsg("Ignoring fake end of headers\n");
 									continue;
 								}
5cdb01fc
 								if((strncmp(data, "Content", 7) == 0) ||
 								   (strncmp(data, "filename=", 9) == 0)) {
7e67e382
 									cli_dbgmsg("Ignoring fake end of headers\n");
 									continue;
 								}
 							}
59b99810
 							cli_dbgmsg("Multipart %d: End of header information\n",
 								multiparts);
e3aaff8e
 							inhead = 0;
 							continue;
 						}
c76810dc
 						if(isspace((int)*line)) {
 							/*
 							 * The first line is
 							 * continuation line.
 							 * This is tricky
 							 * to handle, but
 							 * all we can do is our
 							 * best
 							 */
 							cli_dbgmsg("Part %d starts with a continuation line\n",
 								multiparts);
 							messageAddArgument(aMessage, line);
 							/*
 							 * Give it a default
 							 * MIME type since
 							 * that may be the
 							 * missing line
 							 *
 							 * Choose application to
 							 * force a save
 							 */
 							if(messageGetMimeType(aMessage) == NOMIME)
 								messageSetMimeType(aMessage, "application");
 							continue;
 						}
 
c8a7cef0
 						inMimeHead = FALSE;
b2223aad
 
85bb253e
 						assert(strlen(line) <= RFC2821LENGTH);
f1c1300c
 
d72749e0
 						fullline = rfc822comments(line, NULL);
c8a7cef0
 						if(fullline == NULL)
0cf4cea7
 							fullline = cli_strdup(line);
2ad0c86e
 
4f4a8f4a
 						/*quotes = count_quotes(fullline);*/
f1c1300c
 
c8a7cef0
 						/*
 						 * Fold next lines to the end of this
 						 * if they start with a white space
 						 * or if this line has an odd number of quotes:
 						 * Content-Type: application/octet-stream; name="foo
 						 * "
 						 */
842c7d49
 						while(t_line && next_is_folded_header(t_line)) {
 							const char *data;
ede9939c
 							size_t datasz;
842c7d49
 
 							t_line = t_line->t_next;
 
 							data = lineGetData(t_line->t_line);
f1c1300c
 
f1d57230
 							if(data[1] == '\0') {
 								/*
 								 * Broken message: the
 								 * blank line at the end
 								 * of the headers isn't blank -
 								 * it contains a space
 								 */
 								cli_dbgmsg("Multipart %d: headers not terminated by blank line\n",
 									multiparts);
 								inhead = FALSE;
 								break;
 							}
 
ede9939c
 							datasz = strlen(fullline) + strlen(data) + 1;
1f271616
 							ptr = cli_realloc(fullline, datasz);
303f9be9
 
c8a7cef0
 							if(ptr == NULL)
 								break;
1eec55a6
 
c8a7cef0
 							fullline = ptr;
1f271616
 							cli_strlcat(fullline, data, datasz);
2ad0c86e
 
4f4a8f4a
 							/*quotes = count_quotes(data);*/
1eec55a6
 						}
842c7d49
 
c8a7cef0
 						cli_dbgmsg("Multipart %d: About to parse folded header '%s'\n",
 							multiparts, fullline);
 
c1fce7f7
 						parseEmailHeader(aMessage, fullline, mctx->rfc821Table);
c8a7cef0
 						free(fullline);
69c62847
 					} else if(boundaryEnd(line, boundary)) {
e3aaff8e
 						/*
 						 * Some viruses put information
 						 * *after* the end of message,
 						 * which presumably some broken
 						 * mail clients find, so we
 						 * can't assume that this
 						 * is the end of the message
 						 */
 						/* t_line = NULL;*/
 						break;
391f7bb3
 					} else if(boundaryStart(line, boundary)) {
 						inhead = 1;
 						break;
56ae62e2
 					} else {
b2223aad
 						if(messageAddLine(aMessage, t_line->t_line) < 0)
1a74d4df
 							break;
56ae62e2
 						lines++;
 					}
8ba634a9
 				} while((t_line = t_line->t_next) != NULL);
 
69c62847
 				cli_dbgmsg("Part %d has %d lines, rc = %d\n",
8affc406
 					multiparts, lines, (int)rc);
9bccc1e5
 
 				/*
 				 * Only save in the array of messages if some
 				 * decision will be taken on whether to scan.
 				 * If all parts will be scanned then save to
 				 * file straight away
 				 */
 				switch(subtype) {
 					case MIXED:
 					case ALTERNATIVE:
 					case REPORT:
 					case DIGEST:
 					case APPLEDOUBLE:
 					case KNOWBOT:
 					case -1:
69c62847
 						old_rc = rc;
9bccc1e5
 						mainMessage = do_multipart(mainMessage,
 							messages, multiparts,
 							&rc, mctx, messageIn,
5684fccf
 							&aText, recursion_level);
69c62847
 						if((rc == OK_ATTACHMENTS_NOT_SAVED) && (old_rc == OK))
 							rc = OK;
a7a2e2d4
 						if(messages[multiparts]) {
 							messageDestroy(messages[multiparts]);
 							messages[multiparts] = NULL;
 						}
9bccc1e5
 						--multiparts;
ecc3d638
 						if(rc == VIRUS)
9bccc1e5
 							infected = TRUE;
 						break;
f7f59c08
 
 					case RELATED:
 					case ENCRYPTED:
 					case SIGNED:
 					case PARALLEL:
 						/* all the subtypes that we handle
 						 * (all from the switch(tableFind...) below)
 						 * must be listed here */
 						break;
 					default:
 						/* this is a subtype that we 
 						 * don't handle anyway, 
 						 * don't store */
 						if(messages[multiparts]) {
 							messageDestroy(messages[multiparts]);
 							messages[multiparts] = NULL;
 						}
 						--multiparts;
9bccc1e5
 				}
e3aaff8e
 			}
 
 			free((char *)boundary);
 
7c1eb3bf
 			/*
c79a2273
 			 * Preprocess. Anything special to be done before
 			 * we handle the multiparts?
9a729c80
 			 */
9bccc1e5
 			switch(subtype) {
c79a2273
 				case KNOWBOT:
 					/* TODO */
 					cli_dbgmsg("multipart/knowbot parsed as multipart/mixed for now\n");
 					mimeSubtype = "mixed";
 					break;
5d8100cb
 				case -1:
 					/*
 					 * According to section 7.2.6 of
7cd9337a
 					 * RFC1521, unrecognized multiparts
5d8100cb
 					 * should be treated as multipart/mixed.
 					 */
38d07186
 					cli_dbgmsg("Unsupported multipart format `%s', parsed as mixed\n", mimeSubtype);
5d8100cb
 					mimeSubtype = "mixed";
 					break;
c79a2273
 			}
9a729c80
 
 			/*
7c1eb3bf
 			 * We've finished message we're parsing
 			 */
 			if(mainMessage && (mainMessage != messageIn)) {
 				messageDestroy(mainMessage);
 				mainMessage = NULL;
ad9c6836
 			}
e3aaff8e
 
9bccc1e5
 			cli_dbgmsg("The message has %d parts\n", multiparts);
 
a585329e
 			if(infected || ((multiparts == 0) && (aText == NULL))) {
b912eaf2
 				if(messages) {
 					for(i = 0; i < multiparts; i++)
 						if(messages[i])
 							messageDestroy(messages[i]);
b726511f
 					free(messages);
b912eaf2
 				}
584aadd5
 				if(aText && (textIn == NULL))
 					textDestroy(aText);
b912eaf2
 
ede9939c
 #if HAVE_JSON
 				mctx->wrkobj = saveobj;
 #endif
9bccc1e5
 				/*
ecc3d638
 				 * Nothing to do
9bccc1e5
 				 */
69c62847
 				switch(rc) {
 					case VIRUS: return VIRUS;
 					case MAXREC: return MAXREC;
 					default: return OK_ATTACHMENTS_NOT_SAVED;
 				}
b726511f
 			}
7c1eb3bf
 
393a6d67
 			cli_dbgmsg("Find out the multipart type (%s)\n", mimeSubtype);
e3aaff8e
 
9a729c80
 			/*
 			 * We now have all the parts of the multipart message
 			 * in the messages array:
 			 *	message *messages[multiparts]
 			 * Let's decide what to do with them all
 			 */
c1fce7f7
 			switch(tableFind(mctx->subtypeTable, mimeSubtype)) {
e3aaff8e
 			case RELATED:
e06d34dc
 				cli_dbgmsg("Multipart related handler\n");
e3aaff8e
 				/*
294d0774
 				 * Have a look to see if there's HTML code
 				 * which will need scanning
e3aaff8e
 				 */
 				aMessage = NULL;
 				assert(multiparts > 0);
 
d4d14218
 				htmltextPart = getTextPart(messages, multiparts);
e3aaff8e
 
f7f59c08
 				if(htmltextPart >= 0 && messages) {
73ddf91f
 					if(messageGetBody(messages[htmltextPart]))
faa0d267
 
73ddf91f
 						aText = textAddMessage(aText, messages[htmltextPart]);
 				} else
e3aaff8e
 					/*
294d0774
 					 * There isn't an HTML bit. If there's a
 					 * multipart bit, it'll may be in there
 					 * somewhere
e3aaff8e
 					 */
 					for(i = 0; i < multiparts; i++)
 						if(messageGetMimeType(messages[i]) == MULTIPART) {
 							aMessage = messages[i];
d4d14218
 							htmltextPart = i;
e3aaff8e
 							break;
 						}
 
59da5a4f
 				if(htmltextPart == -1)
16037392
 					cli_dbgmsg("No HTML code found to be scanned\n");
59da5a4f
 				else {
c2df9f79
 #if HAVE_JSON
 					/* Send root HTML file for preclassification */
 					if (mctx->ctx->wrkproperty)
 						parseRootMHTML(mctx, aMessage, aText);
 #endif
242ffd7a
 					rc = parseEmailBody(aMessage, aText, mctx, recursion_level + 1);
99f7771b
 					if((rc == OK) && aMessage) {
59da5a4f
 						assert(aMessage == messages[htmltextPart]);
99f7771b
 						messageDestroy(aMessage);
59da5a4f
 						messages[htmltextPart] = NULL;
94f051b0
 					} else if(rc == VIRUS) {
a585329e
 						infected = TRUE;
 						break;
 					}
59da5a4f
 				}
e3aaff8e
 
 				/*
 				 * The message is confused about the difference
 				 * between alternative and related. Badtrans.B
 				 * suffers from this problem.
 				 *
 				 * Fall through in this case:
 				 * Content-Type: multipart/related;
 				 *	type="multipart/alternative"
 				 */
ba867aed
 			case DIGEST:
 				/*
 				 * According to section 5.1.5 RFC2046, the
 				 * default mime type of multipart/digest parts
 				 * is message/rfc822
 				 *
 				 * We consider them as alternative, wrong in
 				 * the strictest sense since they aren't
 				 * alternatives - all parts a valid - but it's
 				 * OK for our needs since it means each part
 				 * will be scanned
 				 */
e3aaff8e
 			case ALTERNATIVE:
 				cli_dbgmsg("Multipart alternative handler\n");
 
 				/*
 				 * Fall through - some clients are broken and
 				 * say alternative instead of mixed. The Klez
5d8100cb
 				 * virus is broken that way, and anyway we
 				 * wish to scan all of the alternatives
e3aaff8e
 				 */
 			case REPORT:
 				/*
 				 * According to section 1 of RFC1892, the
 				 * syntax of multipart/report is the same
 				 * as multipart/mixed. There are some required
 				 * parameters, but there's no need for us to
 				 * verify that they exist
 				 */
6351aa86
 			case ENCRYPTED:
 				/* MUAs without encryption plugins can display as multipart/mixed,
 				 * just scan it*/
e3aaff8e
 			case MIXED:
c9b8f252
 			case APPLEDOUBLE:	/* not really supported */
e3aaff8e
 				/*
 				 * Look for attachments
 				 *
 				 * Not all formats are supported. If an
 				 * unsupported format turns out to be
 				 * common enough to implement, it is a simple
 				 * matter to add it
 				 */
ad9c6836
 				if(aText) {
 					if(mainMessage && (mainMessage != messageIn))
 						messageDestroy(mainMessage);
e3aaff8e
 					mainMessage = NULL;
ad9c6836
 				}
e3aaff8e
 
 				cli_dbgmsg("Mixed message with %d parts\n", multiparts);
 				for(i = 0; i < multiparts; i++) {
c1fce7f7
 					mainMessage = do_multipart(mainMessage,
 						messages, i, &rc, mctx,
242ffd7a
 						messageIn, &aText, recursion_level + 1);
ecc3d638
 					if(rc == VIRUS) {
a603478f
 						infected = TRUE;
 						break;
 					}
69c62847
 					if(rc == MAXREC)
 						break;
c89cebe8
 					if (rc == OK_ATTACHMENTS_NOT_SAVED)
 					    rc = OK;
e3aaff8e
 				}
 
242ffd7a
 				/* rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1); */
e3aaff8e
 				break;
 			case SIGNED:
 			case PARALLEL:
 				/*
 				 * If we're here it could be because we have a
 				 * multipart/mixed message, consisting of a
 				 * message followed by an attachment. That
 				 * message itself is a multipart/alternative
 				 * message and we need to dig out the plain
 				 * text part of that alternative
 				 */
f7f59c08
 				if(messages) {
 					htmltextPart = getTextPart(messages, multiparts);
 					if(htmltextPart == -1)
 						htmltextPart = 0;
 					rc = parseEmailBody(messages[htmltextPart], aText, mctx, recursion_level + 1);
 				}
e3aaff8e
 				break;
 			default:
5d8100cb
 				assert(0);
e3aaff8e
 			}
 
ad9c6836
 			if(mainMessage && (mainMessage != messageIn))
 				messageDestroy(mainMessage);
 
6e84cebb
 			if(aText && (textIn == NULL)) {
a603478f
 				if((!infected) && (fb = fileblobCreate()) != NULL) {
c8a7cef0
 					cli_dbgmsg("Save non mime and/or text/plain part\n");
c1fce7f7
 					fileblobSetFilename(fb, mctx->dir, "textpart");
e097c0dd
 					/*fileblobAddData(fb, "Received: by clamd (textpart)\n", 30);*/
c1fce7f7
 					fileblobSetCTX(fb, mctx->ctx);
3f46285b
 					(void)textToFileblob(aText, fb, 1);
6e84cebb
 
 					fileblobDestroy(fb);
001ad879
 					mctx->files++;
6e84cebb
 				}
049a18b9
 				textDestroy(aText);
6e84cebb
 			}
049a18b9
 
b116962d
 			for(i = 0; i < multiparts; i++)
 				if(messages[i])
 					messageDestroy(messages[i]);
 
b726511f
 			if(messages)
 				free(messages);
 
ede9939c
 #if HAVE_JSON
 			mctx->wrkobj = saveobj;
 #endif
e3aaff8e
 			return rc;
 
 		case MESSAGE:
 			/*
 			 * Check for forbidden encodings
 			 */
 			switch(messageGetEncoding(mainMessage)) {
 				case NOENCODING:
 				case EIGHTBIT:
 				case BINARY:
 					break;
 				default:
6351aa86
 					cli_dbgmsg("MIME type 'message' cannot be decoded\n");
e3aaff8e
 					break;
 			}
ecc3d638
 			rc = FAIL;
049a18b9
 			if((strcasecmp(mimeSubtype, "rfc822") == 0) ||
 			   (strcasecmp(mimeSubtype, "delivery-status") == 0)) {
c1fce7f7
 				message *m = parseEmailHeaders(mainMessage, mctx->rfc821Table);
c693116d
 				if(m) {
af3c6acb
 					cli_dbgmsg("Decode rfc822\n");
c693116d
 
c1fce7f7
 					messageSetCTX(m, mctx->ctx);
a603478f
 
93d41ee4
 					if(mainMessage && (mainMessage != messageIn)) {
 						messageDestroy(mainMessage);
 						mainMessage = NULL;
59da5a4f
 					} else
 						messageReset(mainMessage);
c693116d
 					if(messageGetBody(m))
242ffd7a
 						rc = parseEmailBody(m, NULL, mctx, recursion_level + 1);
c693116d
 
 					messageDestroy(m);
 				}
e3aaff8e
 				break;
5a15955b
 			} else if(strcasecmp(mimeSubtype, "disposition-notification") == 0) {
12f3689d
 				/* RFC 2298 - handle like a normal email */
ecc3d638
 				rc = OK;
12f3689d
 				break;
5a15955b
 			} else if(strcasecmp(mimeSubtype, "partial") == 0) {
d7979d4f
 				if(mctx->ctx->options->mail & CL_SCAN_MAIL_PARTIAL_MESSAGE) {
4270f93b
 					/* RFC1341 message split over many emails */
 					if(rfc1341(mainMessage, mctx->dir) >= 0)
 						rc = OK;
 				} else {
 					cli_warnmsg("Partial message received from MUA/MTA - message cannot be scanned\n");
 				}
f10460ed
 			} else if(strcasecmp(mimeSubtype, "external-body") == 0)
22080fa5
 				/* TODO */
1405207a
 				cli_warnmsg("Attempt to send Content-type message/external-body trapped\n");
5a642650
 			else
6fcf5624
 				cli_warnmsg("Unsupported message format `%s' - if you believe this file contains a virus, submit it to www.clamav.net\n", mimeSubtype);
e3aaff8e
 
f10460ed
 
ad9c6836
 			if(mainMessage && (mainMessage != messageIn))
 				messageDestroy(mainMessage);
b726511f
 			if(messages)
 				free(messages);
ede9939c
 #if HAVE_JSON
 			mctx->wrkobj = saveobj;
 #endif
f10460ed
 			return rc;
e3aaff8e
 
5b2bcb65
 		default:
1405207a
 			cli_dbgmsg("Message received with unknown mime encoding - assume application\n");
5b2bcb65
 			/*
 			 * Some Yahoo emails attach as
 			 * Content-Type: X-unknown/unknown;
 			 * instead of
 			 * Content-Type: application/unknown;
 			 * so let's try our best to salvage something
 			 */
e3aaff8e
 		case APPLICATION:
4ab382c3
 			/*cptr = messageGetMimeSubtype(mainMessage);
d4d14218
 
4ab382c3
 			if((strcasecmp(cptr, "octet-stream") == 0) ||
aa0210b6
 			   (strcasecmp(cptr, "x-msdownload") == 0)) {*/
 			{
2673dc74
 				fb = messageToFileblob(mainMessage, mctx->dir, 1);
e3aaff8e
 
0e5a0129
 				if(fb) {
 					cli_dbgmsg("Saving main message as attachment\n");
a585329e
 					if(fileblobScanAndDestroy(fb) == CL_VIRUS)
 						rc = VIRUS;
001ad879
 					mctx->files++;
fbb7262e
 					if(mainMessage != messageIn) {
 						messageDestroy(mainMessage);
 						mainMessage = NULL;
 					} else
 						messageReset(mainMessage);
e3aaff8e
 				}
aa0210b6
 			} /*else
 				cli_warnmsg("Discarded application not sent as attachment\n");*/
e3aaff8e
 			break;
 
 		case AUDIO:
 		case VIDEO:
 		case IMAGE:
 			break;
 		}
2673dc74
 
b912eaf2
 		if(messages) {
 			/* "can't happen" */
fc809c9e
 			cli_warnmsg("messages != NULL\n");
2673dc74
 			free(messages);
b912eaf2
 		}
e3aaff8e
 	}
 
f12d2498
 	if(aText && (textIn == NULL)) {
6fd711b2
 		/* Look for a bounce in the text (non mime encoded) portion */
 		const text *t;
93b4a7b1
 		/* isBounceStart() is expensive, reduce the number of calls */
 		bool lookahead_definately_is_bounce = FALSE;
b116962d
 
a585329e
 		for(t = aText; t && (rc != VIRUS); t = t->t_next) {
6fd711b2
 			const line_t *l = t->t_line;
ebe57840
 			const text *lookahead, *topofbounce;
6fd711b2
 			const char *s;
ebe57840
 			bool inheader;
b116962d
 
93b4a7b1
 			if(l == NULL) {
 				/* assert(lookahead_definately_is_bounce == FALSE) */
6fd711b2
 				continue;
93b4a7b1
 			}
6fd711b2
 
93b4a7b1
 			if(lookahead_definately_is_bounce)
 				lookahead_definately_is_bounce = FALSE;
7021b545
 			else if(!isBounceStart(mctx, lineGetData(l)))
6fd711b2
 				continue;
 
a7a2e2d4
 			lookahead = t->t_next;
 			if(lookahead) {
7021b545
 				if(isBounceStart(mctx, lineGetData(lookahead->t_line))) {
93b4a7b1
 					lookahead_definately_is_bounce = TRUE;
a7a2e2d4
 					/* don't save worthless header lines */
 					continue;
93b4a7b1
 				}
a7a2e2d4
 			} else	/* don't save a single liner */
 				break;
 
6fd711b2
 			/*
 			 * We've found what looks like the start of a bounce
 			 * message. Only bother saving if it really is a bounce
 			 * message, this helps to speed up scanning of ping-pong
 			 * messages that have lots of bounces within bounces in
 			 * them
 			 */
a7a2e2d4
 			for(; lookahead; lookahead = lookahead->t_next) {
6fd711b2
 				l = lookahead->t_line;
 
 				if(l == NULL)
 					break;
 				s = lineGetData(l);
584aadd5
 				if(strncasecmp(s, "Content-Type:", 13) == 0) {
6fd711b2
 					/*
584aadd5
 					 * Don't bother with text/plain or
 					 * text/html
6fd711b2
 					 */
e906fef3
 					if(cli_strcasestr(s, "text/plain") != NULL)
6862efc7
 						/*
584aadd5
 						 * Don't bother to save the
 						 * unuseful part, read past
 						 * the headers then we'll go
 						 * on to look for the next
 						 * bounce message
6862efc7
 						 */
584aadd5
 						continue;
 					if((!doPhishingScan) &&
e906fef3
 					   (cli_strcasestr(s, "text/html") != NULL))
584aadd5
 						continue;
 					break;
 				}
6fd711b2
 			}
 
 			if(lookahead && (lookahead->t_line == NULL)) {
 				cli_dbgmsg("Non mime part bounce message is not mime encoded, so it will not be scanned\n");
 				t = lookahead;
 				/* look for next bounce message */
 				continue;
 			}
 
6862efc7
 			/*
 			 * Prescan the bounce message to see if there's likely
 			 * to be anything nasty.
 			 * This algorithm is hand crafted and may be breakable
 			 * so all submissions are welcome. It's best NOT to
 			 * remove this however you may be tempted, because it
 			 * significantly speeds up the scanning of multiple
 			 * bounces (i.e. bounces within many bounces)
 			 */
 			for(; lookahead; lookahead = lookahead->t_next) {
 				l = lookahead->t_line;
 
 				if(l) {
 					s = lineGetData(l);
 					if((strncasecmp(s, "Content-Type:", 13) == 0) &&
 					   (strstr(s, "multipart/") == NULL) &&
 					   (strstr(s, "message/rfc822") == NULL) &&
 					   (strstr(s, "text/plain") == NULL))
 						break;
 				}
 			}
 			if(lookahead == NULL) {
7c56033f
 				cli_dbgmsg("cli_mbox: I believe it's plain text which must be clean\n");
6862efc7
 				/* nothing here, move along please */
 				break;
 			}
ebe57840
 			if((fb = fileblobCreate()) == NULL)
 				break;
 			cli_dbgmsg("Save non mime part bounce message\n");
c1fce7f7
 			fileblobSetFilename(fb, mctx->dir, "bounce");
95e11e5a
 			fileblobAddData(fb, (const unsigned char *)"Received: by clamd (bounce)\n", 28);
c1fce7f7
 			fileblobSetCTX(fb, mctx->ctx);
ebe57840
 
 			inheader = TRUE;
 			topofbounce = NULL;
01c99f53
 			do {
ebe57840
 				l = t->t_line;
 
 				if(l == NULL) {
 					if(inheader) {
 						inheader = FALSE;
 						topofbounce = t;
 					}
 				} else {
 					s = lineGetData(l);
95e11e5a
 					fileblobAddData(fb, (const unsigned char *)s, strlen(s));
ebe57840
 				}
95e11e5a
 				fileblobAddData(fb, (const unsigned char *)"\n", 1);
ebe57840
 				lookahead = t->t_next;
 				if(lookahead == NULL)
 					break;
 				t = lookahead;
 				l = t->t_line;
 				if((!inheader) && l) {
 					s = lineGetData(l);
7021b545
 					if(isBounceStart(mctx, s)) {
b2ba24f5
 						cli_dbgmsg("Found the start of another bounce candidate (%s)\n", s);
be32043e
 						lookahead_definately_is_bounce = TRUE;
ebe57840
 						break;
 					}
 				}
a585329e
 			} while(!fileblobInfected(fb));
 
 			if(fileblobScanAndDestroy(fb) == CL_VIRUS)
 				rc = VIRUS;
001ad879
 			mctx->files++;
ebe57840
 
 			if(topofbounce)
 				t = topofbounce;
6fd711b2
 		}
f12d2498
 		textDestroy(aText);
 		aText = NULL;
 	}
 
b0b860f1
 	/*
 	 * No attachments - scan the text portions, often files
 	 * are hidden in HTML code
 	 */
ecc3d638
 	if(mainMessage && (rc != VIRUS)) {
47d9cc65
 		text *t_line;
 
e3aaff8e
 		/*
b0b860f1
 		 * Look for uu-encoded main file
e3aaff8e
 		 */
f75c04b0
 		if(mainMessage->body_first != NULL &&
 			(encodingLine(mainMessage) != NULL) &&
 			((t_line = bounceBegin(mainMessage)) != NULL))
001ad879
 			rc = (exportBounceMessage(mctx, t_line) == CL_VIRUS) ? VIRUS : OK;
7dde984e
 		else {
b0b860f1
 			bool saveIt;
d4d14218
 
b0b860f1
 			if(messageGetMimeType(mainMessage) == MESSAGE)
f01bbfe8
 				/*
b0b860f1
 				 * Quick peek, if the encapsulated
 				 * message has no
 				 * content encoding statement don't
 				 * bother saving to scan, it's safe
f01bbfe8
 				 */
47d9cc65
 				saveIt = (bool)(encodingLine(mainMessage) != NULL);
7cf2a701
 			else if(mainMessage->body_last != NULL && (t_line = encodingLine(mainMessage)) != NULL) {
a7527b1f
 				/*
b0b860f1
 				 * Some bounces include the message
 				 * body without the headers.
ebe57840
 				 * FIXME: Unfortunately this generates a
b0b860f1
 				 * lot of false positives that a bounce
 				 * has been found when it hasn't.
a7527b1f
 				 */
b0b860f1
 				if((fb = fileblobCreate()) != NULL) {
ae5c693a
 					cli_dbgmsg("Found a bounce message with no header at '%s'\n",
 						lineGetData(t_line->t_line));
c1fce7f7
 					fileblobSetFilename(fb, mctx->dir, "bounce");
f24bf390
 					fileblobAddData(fb,
 						(const unsigned char *)"Received: by clamd (bounce)\n",
 						28);
cca4efe4
 
7dde984e
 					fileblobSetCTX(fb, mctx->ctx);
 					if(fileblobScanAndDestroy(textToFileblob(t_line, fb, 1)) == CL_VIRUS)
 						rc = VIRUS;
001ad879
 					mctx->files++;
5c1150ac
 				}
b0b860f1
 				saveIt = FALSE;
2673dc74
 			} else
b0b860f1
 				/*
 				 * Save the entire text portion,
 				 * since it it may be an HTML file with
a05e6d45
 				 * a JavaScript virus or a phish
b0b860f1
 				 */
 				saveIt = TRUE;
e3aaff8e
 
b0b860f1
 			if(saveIt) {
7dde984e
 				cli_dbgmsg("Saving text part to scan, rc = %d\n",
8affc406
 					(int)rc);
001ad879
 				if(saveTextPart(mctx, mainMessage, 1) == CL_VIRUS)
7dde984e
 					rc = VIRUS;
 
59da5a4f
 				if(mainMessage != messageIn) {
 					messageDestroy(mainMessage);
 					mainMessage = NULL;
 				} else
 					messageReset(mainMessage);
049a18b9
 			}
e3aaff8e
 		}
69c62847
 	} /*else
96435bdc
 		rc = OK_ATTACHMENTS_NOT_SAVED; */	/* nothing saved */
e3aaff8e
 
ad9c6836
 	if(mainMessage && (mainMessage != messageIn))
 		messageDestroy(mainMessage);
 
ecc3d638
 	if((rc != FAIL) && infected)
 		rc = VIRUS;
a603478f
 
ede9939c
 #if HAVE_JSON
 	mctx->wrkobj = saveobj;
 #endif
 
8affc406
 	cli_dbgmsg("parseEmailBody() returning %d\n", (int)rc);
e3aaff8e
 
e06d34dc
 	return rc;
e3aaff8e
 }
 
 /*
  * Is the current line the start of a new section?
  *
  * New sections start with --boundary
  */
 static int
 boundaryStart(const char *line, const char *boundary)
 {
95e11e5a
 	const char *ptr;
 	char *out;
df8806fd
 	int rc;
85bb253e
 	char buf[RFC2821LENGTH + 1];
23889d27
     char *newline;
2ad0c86e
 
6b579c8d
 	if(line == NULL || *line == '\0')
0e4e16d4
 		return 0;	/* empty line */
300a8ae9
 	if(boundary == NULL)
 		return 0;
0e4e16d4
 
23889d27
     newline = strdup(line);
     if (!(newline))
cd94be7a
         newline = (char *)line;
23889d27
 
6b579c8d
     if (newline != line && strlen(line)) {
23889d27
         char *p;
         /* Trim trailing spaces */
77326673
         p = newline + strlen(line) - 1;
26f42733
         while (p >= newline && *p == ' ')
23889d27
             *(p--) = '\0';
     }
 
     if (newline != line)
         cli_chomp(newline);
2ad0c86e
 
d00281e1
 	/* cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary); */
23889d27
 
 	if((*newline != '-') && (*newline != '(')) {
         if (newline != line)
             free(newline);
ebe57840
 		return 0;
23889d27
     }
ebe57840
 
23889d27
 	if(strchr(newline, '-') == NULL) {
         if (newline != line)
             free(newline);
ebe57840
 		return 0;
23889d27
     }
ebe57840
 
23889d27
 	if(strlen(newline) <= sizeof(buf)) {
d72749e0
 		out = NULL;
23889d27
 		ptr = rfc822comments(newline, buf);
d72749e0
 	} else
23889d27
 		ptr = out = rfc822comments(newline, NULL);
d72749e0
 
2ad0c86e
 	if(ptr == NULL)
23889d27
 		ptr = newline;
2ad0c86e
 
a9d251e0
 	if((*ptr++ != '-') || (*ptr == '\0')) {
d72749e0
 		if(out)
 			free(out);
23889d27
         if (newline != line)
             free(newline);
 
0e4e16d4
 		return 0;
2ad0c86e
 	}
0e4e16d4
 
e3aaff8e
 	/*
0e4e16d4
 	 * Gibe.B3 is broken, it has:
e3aaff8e
 	 *	boundary="---- =_NextPart_000_01C31177.9DC7C000"
 	 * but it's boundaries look like
 	 *	------ =_NextPart_000_01C31177.9DC7C000
0e4e16d4
 	 * notice the one too few '-'.
 	 * Presumably this is a deliberate exploitation of a bug in some mail
 	 * clients.
 	 *
 	 * The trouble is that this creates a lot of false positives for
 	 * boundary conditions, if we're too lax about matches. We do our level
 	 * best to avoid these false positives. For example if we have
 	 * boundary="1" we want to ensure that we don't break out of every line
 	 * that has -1 in it instead of starting --1. This needs some more work.
7c56033f
 	 *
 	 * Look with and without RFC822 comments stripped, I've seen some
 	 * samples where () are taken as comments in boundaries and some where
69c62847
 	 * they're not. Irrespective of whatever RFC2822 says, we need to find
 	 * viruses in both types of mails.
e3aaff8e
 	 */
23889d27
 	if((strstr(&ptr[1], boundary) != NULL) || (strstr(newline, boundary) != NULL)) {
69c62847
 		const char *k = ptr;
 
 		/*
 		 * We need to ensure that we don't match --11=-=-=11 when
 		 * looking for --1=-=-=1 in well behaved headers, that's a
 		 * false positive problem mentioned above
 		 */
 		rc = 0;
 		do
 			if(strcmp(++k, boundary) == 0) {
 				rc = 1;
 				break;
 			}
 		while(*k == '-');
 		if(rc == 0) {
 			k = &line[1];
 			do
 				if(strcmp(++k, boundary) == 0) {
 					rc = 1;
 					break;
 				}
 			while(*k == '-');
 		}
 	} else if(*ptr++ != '-')
 		rc = 0;
df8806fd
 	else
79179da5
 		rc = (strcasecmp(ptr, boundary) == 0);
df8806fd
 
d72749e0
 	if(out)
 		free(out);
df8806fd
 
69c62847
 	if(rc == 1)
df8806fd
 		cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line);
 
23889d27
     if (newline != line)
         free(newline);
 
df8806fd
 	return rc;
e3aaff8e
 }
 
 /*
  * Is the current line the end?
  *
  * The message ends with with --boundary--
  */
 static int
69c62847
 boundaryEnd(const char *line, const char *boundary)
e3aaff8e
 {
 	size_t len;
23889d27
     char *newline, *p, *p2;
e3aaff8e
 
6b579c8d
 	if(line == NULL || *line == '\0')
02927896
 		return 0;
69c62847
 
23889d27
     p = newline = strdup(line);
d00281e1
     if (!(newline)) {
cd94be7a
         p = (char *)line;
         newline = (char *)line;
d00281e1
     }
23889d27
 
6b579c8d
     if (newline != line && strlen(line)) {
23889d27
         /* Trim trailing spaces */
77326673
         p2 = newline + strlen(line) - 1;
4a5d422d
         while (p2 >= newline && *p2 == ' ')
23889d27
             *(p2--) = '\0';
     }
 
d00281e1
 	/* cli_dbgmsg("boundaryEnd: line = '%s' boundary = '%s'\n", newline, boundary); */
69c62847
 
95890079
 	if(*p++ != '-') {
         if (newline != line)
             free(newline);
e3aaff8e
 		return 0;
95890079
     }
 
 	if(*p++ != '-') {
         if (newline != line)
             free(newline);
 
e3aaff8e
 		return 0;
95890079
     }
 
e3aaff8e
 	len = strlen(boundary);
23889d27
 	if(strncasecmp(p, boundary, len) != 0) {
         if (newline != line)
             free(newline);
 
049a18b9
 		return 0;
23889d27
     }
0e523db2
 	/*
 	 * Use < rather than == because some broken mails have white
 	 * space after the boundary
 	 */
d00281e1
 	if(strlen(p) < (len + 2)) {
         if (newline != line)
             free(newline);
 
e3aaff8e
 		return 0;
d00281e1
     }
95890079
 
23889d27
 	p = &p[len];
 	if(*p++ != '-') {
         if (newline != line)
             free(newline);
 
e3aaff8e
 		return 0;
23889d27
     }
95890079
 
23889d27
 	if(*p == '-') {
d00281e1
 		/* cli_dbgmsg("boundaryEnd: found %s in %s\n", boundary, p); */
23889d27
         if (newline != line)
             free(newline);
d00281e1
 
69c62847
 		return 1;
 	}
745cda08
 
     if (newline != line)
         free(newline);
 
69c62847
 	return 0;
e3aaff8e
 }
 
 /*
  * Initialise the various lookup tables
  */
 static int
 initialiseTables(table_t **rfc821Table, table_t **subtypeTable)
 {
 	const struct tableinit *tableinit;
 
 	/*
 	 * Initialise the various look up tables
 	 */
 	*rfc821Table = tableCreate();
 	assert(*rfc821Table != NULL);
 
 	for(tableinit = rfc821headers; tableinit->key; tableinit++)
51fc2aa8
 		if(tableInsert(*rfc821Table, tableinit->key, tableinit->value) < 0) {
 			tableDestroy(*rfc821Table);
767f16ab
 			*rfc821Table = NULL;
e3aaff8e
 			return -1;
51fc2aa8
 		}
e3aaff8e
 
 	*subtypeTable = tableCreate();
 	assert(*subtypeTable != NULL);
 
 	for(tableinit = mimeSubtypes; tableinit->key; tableinit++)
 		if(tableInsert(*subtypeTable, tableinit->key, tableinit->value) < 0) {
 			tableDestroy(*rfc821Table);
51fc2aa8
 			tableDestroy(*subtypeTable);
767f16ab
 			*rfc821Table = NULL;
 			*subtypeTable = NULL;
e3aaff8e
 			return -1;
 		}
 
 	return 0;
 }
 
 /*
d4d14218
  * If there's a HTML text version use that, otherwise
e3aaff8e
  * use the first text part, otherwise just use the
d4d14218
  * first one around. HTML text is most likely to include
  * a scripting worm
e3aaff8e
  *
  * If we can't find one, return -1
  */
 static int
 getTextPart(message *const messages[], size_t size)
 {
 	size_t i;
28010d29
 	int textpart = -1;
e3aaff8e
 
73ddf91f
 	for(i = 0; i < size; i++)
 		if(messages[i] && (messageGetMimeType(messages[i]) == TEXT)) {
28010d29
 			if(strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0)
 				return (int)i;
 			textpart = (int)i;
 		}
73ddf91f
 
28010d29
 	return textpart;
e3aaff8e
 }
 
 /*
  * strip -
767f16ab
  *	Remove the trailing spaces from a buffer. Don't call this directly,
  * always call strstrip() which is a wrapper to this routine to be used with
  * NUL terminated strings. This code looks a bit strange because of it's
  * heritage from code that worked on strings that weren't necessarily NUL
  * terminated.
  * TODO: rewrite for clamAV
  *
e3aaff8e
  * Returns it's new length (a la strlen)
  *
  * len must be int not size_t because of the >= 0 test, it is sizeof(buf)
  *	not strlen(buf)
  */
 static size_t
 strip(char *buf, int len)
 {
 	register char *ptr;
 	register size_t i;
 
 	if((buf == NULL) || (len <= 0))
767f16ab
 		return 0;
e3aaff8e
 
 	i = strlen(buf);
 	if(len > (int)(i + 1))
767f16ab
 		return i;
e3aaff8e
 	ptr = &buf[--len];
 
ecc3d638
 #if	defined(UNIX) || defined(C_LINUX) || defined(C_DARWIN)	/* watch - it may be in shared text area */
e3aaff8e
 	do
 		if(*ptr)
 			*ptr = '\0';
87c9313e
 	while((--len >= 0) && (!isgraph(*--ptr)) && (*ptr != '\n') && (*ptr != '\r'));
e3aaff8e
 #else	/* more characters can be displayed on DOS */
 	do
 #ifndef	REAL_MODE_DOS
 		if(*ptr)	/* C8.0 puts into a text area */
 #endif
 			*ptr = '\0';
ced371fe
 	while((--len >= 0) && ((*--ptr == '\0') || isspace((int)(*ptr & 0xFF))));
e3aaff8e
 #endif
 	return((size_t)(len + 1));
 }
 
 /*
  * strstrip:
  *	Strip a given string
  */
f0627588
 size_t
e3aaff8e
 strstrip(char *s)
 {
 	if(s == (char *)NULL)
 		return(0);
02927896
 
5cd3f734
 	return(strip(s, strlen(s) + 1));
e3aaff8e
 }
 
30e18caf
 /*
  * Returns 0 for OK, -1 for error
  */
e3aaff8e
 static int
 parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg)
 {
30e18caf
 	char *copy, *p, *buf;
38d07186
 	const char *ptr;
8b3563f2
 	int commandNumber;
4f1d0bfc
 
e3aaff8e
 	cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg);
8b3563f2
 
d72749e0
 	copy = rfc822comments(cmd, NULL);
38d07186
 	if(copy) {
 		commandNumber = tableFind(rfc821Table, copy);
 		free(copy);
f017fbdd
 	} else
 		commandNumber = tableFind(rfc821Table, cmd);
8b3563f2
 
d72749e0
 	copy = rfc822comments(arg, NULL);
e3aaff8e
 
38d07186
 	if(copy)
 		ptr = copy;
 	else
 		ptr = arg;
e75e1ad1
 
30e18caf
 	buf = NULL;
 
8b3563f2
 	switch(commandNumber) {
e3aaff8e
 		case CONTENT_TYPE:
 			/*
 			 * Fix for non RFC1521 compliant mailers
 			 * that send content-type: Text instead
 			 * of content-type: Text/Plain, or
 			 * just simply "Content-Type:"
 			 */
5e394e73
 			if(arg == NULL)
69543a9d
 				/*
 				 * According to section 4 of RFC1521:
 				 * "Note also that a subtype specification is
 				 * MANDATORY. There are no default subtypes"
 				 *
1eec55a6
 				 * We have to break this and make an assumption
69543a9d
 				 * for the subtype because virus writers and
 				 * email client writers don't get it right
 				 */
6351aa86
 				 cli_dbgmsg("Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii\n");
38d07186
 			else if(strchr(ptr, '/') == NULL)
69543a9d
 				/*
 				 * Empty field, such as
 				 *	Content-Type:
 				 * which I believe is illegal according to
 				 * RFC1521
 				 */
38d07186
 				cli_dbgmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", ptr);
e3aaff8e
 			else {
8037334b
 				int i;
6d312569
 
30e18caf
 				buf = cli_malloc(strlen(ptr) + 1);
 				if(buf == NULL) {
059ca614
                     cli_errmsg("parseMimeHeader: Unable to allocate memory for buf %llu\n", (long long unsigned)(strlen(ptr) + 1));
30e18caf
 					if(copy)
 						free(copy);
 					return -1;
 				}
e3aaff8e
 				/*
 				 * Some clients are broken and
 				 * put white space after the ;
 				 */
2625d6a0
 				if(*arg == '/') {
6351aa86
 					cli_dbgmsg("Content-type '/' received, assuming application/octet-stream\n");
2625d6a0
 					messageSetMimeType(m, "application");
 					messageSetMimeSubtype(m, "octet-stream");
 				} else {
 					/*
0960ff5e
 					 * The content type could be in quotes:
 					 *	Content-Type: "multipart/mixed"
 					 * FIXME: this is a hack in that ignores
 					 *	the quotes, it doesn't handle
 					 *	them properly
2625d6a0
 					 */
38d07186
 					while(isspace(*ptr))
 						ptr++;
 					if(ptr[0] == '\"')
 						ptr++;
0960ff5e
 
38d07186
 					if(ptr[0] != '/') {
f017fbdd
 						char *s;
38d07186
 #ifdef CL_THREAD_SAFE
1adb6fa8
 						char *strptr = NULL;
38d07186
 #endif
f017fbdd
 
30e18caf
 						s = cli_strtokbuf(ptr, 0, ";", buf);
0960ff5e
 						/*
6d312569
 						 * Handle
 						 * Content-Type: foo/bar multipart/mixed
 						 * and
 						 * Content-Type: multipart/mixed foo/bar
0960ff5e
 						 */
584aadd5
 						if(s && *s) {
 							char *buf2 = cli_strdup(buf);
 
 							if(buf2 == NULL) {
 								if(copy)
 									free(copy);
 								free(buf);
 								return -1;
 							}
 							for(;;) {
548a5f96
 #ifdef	CL_THREAD_SAFE
584aadd5
 								int set = messageSetMimeType(m, strtok_r(s, "/", &strptr));
548a5f96
 #else
584aadd5
 								int set = messageSetMimeType(m, strtok(s, "/"));
548a5f96
 #endif
6d312569
 
548a5f96
 #ifdef	CL_THREAD_SAFE
584aadd5
 								s = strtok_r(NULL, ";", &strptr);
548a5f96
 #else
584aadd5
 								s = strtok(NULL, ";");
 #endif
 								if(s == NULL)
 									break;
 								if(set) {
 									size_t len = strstrip(s) - 1;
 									if(s[len] == '\"') {
 										s[len] = '\0';
 										len = strstrip(s);
 									}
 									if(len) {
 										if(strchr(s, ' '))
 											messageSetMimeSubtype(m,
 												cli_strtokbuf(s, 0, " ", buf2));
 										else
 											messageSetMimeSubtype(m, s);
 									}
6d312569
 								}
 
584aadd5
 								while(*s && !isspace(*s))
 									s++;
 								if(*s++ == '\0')
 									break;
 								if(*s == '\0')
 									break;
 							}
 							free(buf2);
0960ff5e
 						}
f017fbdd
 					}
2625d6a0
 				}
e3aaff8e
 
 				/*
20d3dde9
 				 * Add in all rest of the the arguments.
 				 * e.g. if the header is this:
 				 * Content-Type:', arg='multipart/mixed; boundary=foo
 				 * we find the boundary argument set it
e3aaff8e
 				 */
8037334b
 				i = 1;
94f051b0
 				while(cli_strtokbuf(ptr, i++, ";", buf) != NULL) {
 					cli_dbgmsg("mimeArgs = '%s'\n", buf);
8037334b
 
94f051b0
 					messageAddArguments(m, buf);
6d312569
 				}
e3aaff8e
 			}
 			break;
 		case CONTENT_TRANSFER_ENCODING:
38d07186
 			messageSetEncoding(m, ptr);
e3aaff8e
 			break;
 		case CONTENT_DISPOSITION:
30e18caf
 			buf = cli_malloc(strlen(ptr) + 1);
 			if(buf == NULL) {
059ca614
                 cli_errmsg("parseMimeHeader: Unable to allocate memory for buf %llu\n", (long long unsigned)(strlen(ptr) + 1));
30e18caf
 				if(copy)
 					free(copy);
 				return -1;
 			}
 			p = cli_strtokbuf(ptr, 0, ";", buf);
faa0d267
 			if(p && *p) {
 				messageSetDispositionType(m, p);
 				messageAddArgument(m, cli_strtokbuf(ptr, 1, ";", buf));
548a5f96
 			}
be32043e
 			if(!messageHasFilename(m))
ab84808e
 				/*
 				 * Handle this type of header, without
 				 * a filename (e.g. some Worm.Torvil.D)
 				 *	Content-ID: <nRfkHdrKsAxRU>
 				 * Content-Transfer-Encoding: base64
 				 * Content-Disposition: attachment
 				 */
 				messageAddArgument(m, "filename=unknown");
e3aaff8e
 	}
38d07186
 	if(copy)
 		free(copy);
30e18caf
 	if(buf)
 		free(buf);
e3aaff8e
 
4f1d0bfc
 	return 0;
e3aaff8e
 }
 
e06d34dc
 /*
cca4efe4
  * Save the text portion of the message
  */
7dde984e
 static int
001ad879
 saveTextPart(mbox_ctx *mctx, message *m, int destroy_text)
cca4efe4
 {
0e5a0129
 	fileblob *fb;
cca4efe4
 
 	messageAddArgument(m, "filename=textportion");
001ad879
 	if((fb = messageToFileblob(m, mctx->dir, destroy_text)) != NULL) {
cca4efe4
 		/*
 		 * Save main part to scan that
 		 */
c77c8809
 		cli_dbgmsg("Saving main message\n");
cca4efe4
 
001ad879
 		mctx->files++;
7dde984e
 		return fileblobScanAndDestroy(fb);
cca4efe4
 	}
7dde984e
 	return CL_ETMPFILE;
cca4efe4
 }
 
a9714c49
 /*
f017fbdd
  * Handle RFC822 comments in headers.
d72749e0
  * If out == NULL, return a buffer without the comments, the caller must free
  *	the returned buffer
  * Return NULL on error or if the input * has no comments.
7cd9337a
  * See section 3.4.3 of RFC822
a9714c49
  * TODO: handle comments that go on to more than one line
  */
 static char *
d72749e0
 rfc822comments(const char *in, char *out)
a9714c49
 {
 	const char *iptr;
d72749e0
 	char *optr;
a9714c49
 	int backslash, inquote, commentlevel;
 
 	if(in == NULL)
f017fbdd
 		return NULL;
a9714c49
 
 	if(strchr(in, '(') == NULL)
f017fbdd
 		return NULL;
a9714c49
 
d72749e0
 	assert(out != in);
 
faa0d267
 	while(isspace(*in))
 		in++;
 
d72749e0
 	if(out == NULL) {
 		out = cli_malloc(strlen(in) + 1);
241e7eb1
 		if(out == NULL) {
059ca614
             cli_errmsg("rfc822comments: Unable to allocate memory for out %llu\n", (long long unsigned)(strlen(in) + 1));
d72749e0
 			return NULL;
241e7eb1
         }
d72749e0
 	}
a9714c49
 
 	backslash = commentlevel = inquote = 0;
 	optr = out;
 
 	cli_dbgmsg("rfc822comments: contains a comment\n");
 
 	for(iptr = in; *iptr; iptr++)
 		if(backslash) {
b814e02a
 			if(commentlevel == 0)
 				*optr++ = *iptr;
a9714c49
 			backslash = 0;
 		} else switch(*iptr) {
 			case '\\':
 				backslash = 1;
 				break;
 			case '\"':
7c56033f
 				*optr++ = '\"';
a9714c49
 				inquote = !inquote;
 				break;
 			case '(':
7c56033f
 				if(inquote)
 					*optr++ = '(';
 				else
 					commentlevel++;
a9714c49
 				break;
 			case ')':
7c56033f
 				if(inquote)
 					*optr++ = ')';
 				else if(commentlevel > 0)
a9714c49
 					commentlevel--;
 				break;
 			default:
 				if(commentlevel == 0)
 					*optr++ = *iptr;
 		}
 
 	if(backslash)	/* last character was a single backslash */
 		*optr++ = '\\';
 	*optr = '\0';
 
d72749e0
 	/*strstrip(out);*/
a9714c49
 
 	cli_dbgmsg("rfc822comments '%s'=>'%s'\n", in, out);
 
 	return out;
 }
50df4118
 
 /*
  * Handle RFC2047 encoding. Returns a malloc'd buffer that the caller must
  * free, or NULL on error
  */
 static char *
 rfc2047(const char *in)
 {
 	char *out, *pout;
 	size_t len;
 
4de5fffd
 	if((strstr(in, "=?") == NULL) || (strstr(in, "?=") == NULL))
0cf4cea7
 		return cli_strdup(in);
50df4118
 
 	cli_dbgmsg("rfc2047 '%s'\n", in);
 	out = cli_malloc(strlen(in) + 1);
 
241e7eb1
 	if(out == NULL) {
059ca614
         cli_errmsg("rfc2047: Unable to allocate memory for out %llu\n", (long long unsigned)(strlen(in) + 1));
50df4118
 		return NULL;
241e7eb1
     }
50df4118
 
 	pout = out;
 
 	/* For each RFC2047 string */
 	while(*in) {
cf569541
 		char encoding, *ptr, *enctext;
50df4118
 		message *m;
 		blob *b;
 
 		/* Find next RFC2047 string */
 		while(*in) {
 			if((*in == '=') && (in[1] == '?')) {
 				in += 2;
 				break;
 			}
 			*pout++ = *in++;
 		}
 		/* Skip over charset, find encoding */
 		while((*in != '?') && *in)
 			in++;
 		if(*in == '\0')
 			break;
 		encoding = *++in;
3471518d
 		encoding = (char)tolower(encoding);
50df4118
 
 		if((encoding != 'q') && (encoding != 'b')) {
6fcf5624
 			cli_warnmsg("Unsupported RFC2047 encoding type '%c' - if you believe this file contains a virus, submit it to www.clamav.net\n", encoding);
1b00d9a4
 			free(out);
 			out = NULL;
50df4118
 			break;
 		}
 		/* Skip to encoded text */
 		if(*++in != '?')
 			break;
 		if(*++in == '\0')
 			break;
 
0cf4cea7
 		enctext = cli_strdup(in);
cf569541
 		if(enctext == NULL) {
 			free(out);
 			out = NULL;
 			break;
 		}
50df4118
 		in = strstr(in, "?=");
cf569541
 		if(in == NULL) {
 			free(enctext);
50df4118
 			break;
cf569541
 		}
50df4118
 		in += 2;
 		ptr = strstr(enctext, "?=");
 		assert(ptr != NULL);
 		*ptr = '\0';
 		/*cli_dbgmsg("Need to decode '%s' with method '%c'\n", enctext, encoding);*/
 
 		m = messageCreate();
c77c8809
 		if(m == NULL)
50df4118
 			break;
564b3e07
 		messageAddStr(m, enctext);
cf569541
 		free(enctext);
767f16ab
 		switch(encoding) {
50df4118
 			case 'q':
 				messageSetEncoding(m, "quoted-printable");
 				break;
 			case 'b':
 				messageSetEncoding(m, "base64");
 				break;
 		}
2673dc74
 		b = messageToBlob(m, 1);
39c89d14
                 if (b == NULL) {
                     messageDestroy(m);
                     break;
                 }
50df4118
 		len = blobGetDataSize(b);
95e11e5a
 		cli_dbgmsg("Decoded as '%*.*s'\n", (int)len, (int)len,
8affc406
 			(const char *)blobGetData(b));
50df4118
 		memcpy(pout, blobGetData(b), len);
 		blobDestroy(b);
 		messageDestroy(m);
d1100be3
 		if(len > 0 && pout[len - 1] == '\n')
50df4118
 			pout += len - 1;
 		else
 			pout += len;
 
 	}
ec8e31fa
 	if(out == NULL)
 		return NULL;
 
 	*pout = '\0';
50df4118
 
ec8e31fa
 	cli_dbgmsg("rfc2047 returns '%s'\n", out);
50df4118
 	return out;
 }
 
f10460ed
 /*
  * Handle partial messages
  */
 static int
 rfc1341(message *m, const char *dir)
 {
13462674
 	char *arg, *id, *number, *total, *oldfilename;
fb79b576
 	const char *tmpdir;
4270f93b
 	int n;
16ea58a2
 	char pdir[NAME_MAX + 1];
4270f93b
 	unsigned char md5_val[16];
 	char *md5_hex;
fb79b576
 
f003b79e
 	id = (char *)messageFindArgument(m, "id");
 	if(id == NULL)
 		return -1;
 
081f6473
 	tmpdir = cli_gettmpdir();
f10460ed
 
58481352
 	snprintf(pdir, sizeof(pdir) - 1, "%s"PATHSEP"clamav-partial", tmpdir);
fb79b576
 
ba74b333
 	if((mkdir(pdir, S_IRWXU) < 0) && (errno != EEXIST)) {
fb79b576
 		cli_errmsg("Can't create the directory '%s'\n", pdir);
ba74b333
 		free(id);
f10460ed
 		return -1;
7dde984e
 	} else if(errno == EEXIST) {
a2a004df
 		STATBUF statb;
13462674
 
d9b6b8c7
 		if(CLAMSTAT(pdir, &statb) < 0) {
e68d70e7
 			char err[128];
ba74b333
 			cli_errmsg("Partial directory %s: %s\n", pdir,
e68d70e7
 				cli_strerror(errno, err, sizeof(err)));
ba74b333
 			free(id);
13462674
 			return -1;
 		}
1cb424e6
 		if(statb.st_mode & 077)
13462674
 			cli_warnmsg("Insecure partial directory %s (mode 0%o)\n",
ba74b333
 				pdir,
 #ifdef	ACCESSPERMS
 				(int)(statb.st_mode&ACCESSPERMS)
 #else
 				(int)(statb.st_mode & 0777)
 #endif
 			);
f10460ed
 	}
 
 	number = (char *)messageFindArgument(m, "number");
 	if(number == NULL) {
 		free(id);
 		return -1;
 	}
 
ba74b333
 	oldfilename = messageGetFilename(m);
f10460ed
 
 	arg = cli_malloc(10 + strlen(id) + strlen(number));
a43dd3cd
 	if(arg) {
 		sprintf(arg, "filename=%s%s", id, number);
 		messageAddArgument(m, arg);
 		free(arg);
 	}
f10460ed
 
 	if(oldfilename) {
6351aa86
 		cli_dbgmsg("Must reset to %s\n", oldfilename);
f10460ed
 		free(oldfilename);
 	}
 
4270f93b
 	n = atoi(number);
7fb5036f
     cl_hash_data("md5", id, strlen(id), md5_val, NULL);
4270f93b
 	md5_hex = cli_str2hex((const char*)md5_val, 16);
 
 	if(!md5_hex) {
f10460ed
 		free(id);
 		free(number);
4270f93b
 		return CL_EMEM;
f10460ed
 	}
 
4270f93b
 	if(messageSavePartial(m, pdir, md5_hex, n) < 0) {
 		free(md5_hex);
 		free(id);
 		free(number);
 		return -1;
 	}
f10460ed
 
 	total = (char *)messageFindArgument(m, "total");
 	cli_dbgmsg("rfc1341: %s, %s of %s\n", id, number, (total) ? total : "?");
 	if(total) {
 		int t = atoi(total);
 		DIR *dd = NULL;
 
f003b79e
 		free(total);
f10460ed
 		/*
 		 * If it's the last one - reassemble it
3a0f75c6
 		 * FIXME: this assumes that we receive the parts in order
f10460ed
 		 */
fb79b576
 		if((n == t) && ((dd = opendir(pdir)) != NULL)) {
f10460ed
 			FILE *fout;
 			char outname[NAME_MAX + 1];
e6be10f7
 			time_t now;
f10460ed
 
11b50569
 			sanitiseName(id);
95e11e5a
 
58481352
 			snprintf(outname, sizeof(outname) - 1, "%s"PATHSEP"%s", dir, id);
f10460ed
 
 			cli_dbgmsg("outname: %s\n", outname);
 
 			fout = fopen(outname, "wb");
 			if(fout == NULL) {
3a0f75c6
 				cli_errmsg("Can't open '%s' for writing", outname);
f10460ed
 				free(id);
 				free(number);
4270f93b
 				free(md5_hex);
f10460ed
 				closedir(dd);
 				return -1;
 			}
 
e6be10f7
 			time(&now);
f10460ed
 			for(n = 1; n <= t; n++) {
 				char filename[NAME_MAX + 1];
43f1a140
 				struct dirent *dent;
13462674
 #if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
242bfde8
 				union {
 					struct dirent d;
 					char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
 				} result;
13462674
 #endif
f10460ed
 
4270f93b
 				snprintf(filename, sizeof(filename), "_%s-%u", md5_hex, n);
fdc6adbe
 
242bfde8
 #ifdef HAVE_READDIR_R_3
 				while((readdir_r(dd, &result.d, &dent) == 0) && dent) {
f10460ed
 #elif defined(HAVE_READDIR_R_2)
242bfde8
 				while((dent = (struct dirent *)readdir_r(dd, &result.d))) {
fdc6adbe
 #else	/*!HAVE_READDIR_R*/
f10460ed
 				while((dent = readdir(dd))) {
 #endif
 					FILE *fin;
e6be10f7
 					char buffer[BUFSIZ], fullname[NAME_MAX + 1];
f10460ed
 					int nblanks;
a2a004df
 					STATBUF statb;
4270f93b
 					const char *dentry_idpart;
b2c04b6c
                     int test_fd;
be4bf7f4
 
970ab7d0
 					if(dent->d_ino == 0)
 						continue;
f10460ed
 
1cb424e6
 					if(!strcmp(".",dent->d_name) ||
 							!strcmp("..", dent->d_name))
f10460ed
 						continue;
a43dd3cd
 					snprintf(fullname, sizeof(fullname) - 1,
58481352
 						"%s"PATHSEP"%s", pdir, dent->d_name);
4270f93b
 					dentry_idpart = strchr(dent->d_name, '_');
a43dd3cd
 
4270f93b
 					if(!dentry_idpart ||
 							strcmp(filename, dentry_idpart) != 0) {
33068e09
 						if(!m->ctx->engine->keeptmp)
e6be10f7
 							continue;
b2c04b6c
 
                         if ((test_fd = open(fullname, O_RDONLY)) < 0)
                             continue;
 
a2a004df
 						if(FSTAT(test_fd, &statb) < 0) {
b2c04b6c
                             close(test_fd);
e6be10f7
 							continue;
b2c04b6c
                         }
 
997a0e0b
 						if(now - statb.st_mtime > (time_t)(7 * 24 * 3600)) {
 							if (cli_unlink(fullname)) {
 								cli_unlink(outname);
 								fclose(fout);
4270f93b
 								free(md5_hex);
997a0e0b
 								free(id);
 								free(number);
 								closedir(dd);
b2c04b6c
                                 close(test_fd);
997a0e0b
 								return -1;
 							}
 						}
b2c04b6c
 
                         close(test_fd);
f10460ed
 						continue;
e6be10f7
 					}
f10460ed
 
e6be10f7
 					fin = fopen(fullname, "rb");
f10460ed
 					if(fin == NULL) {
e6be10f7
 						cli_errmsg("Can't open '%s' for reading", fullname);
f10460ed
 						fclose(fout);
c0a95e0c
 						cli_unlink(outname);
bebd86a6
 						free(md5_hex);
f10460ed
 						free(id);
 						free(number);
 						closedir(dd);
 						return -1;
 					}
 					nblanks = 0;
9f2024cc
 					while(fgets(buffer, sizeof(buffer) - 1, fin) != NULL)
f10460ed
 						/*
 						 * Ensure that trailing newlines
 						 * aren't copied
 						 */
f003b79e
 						if(buffer[0] == '\n')
f10460ed
 							nblanks++;
f003b79e
 						else {
f10460ed
 							if(nblanks)
6c06c7b3
 								do {
 									if (putc('\n', fout)==EOF) break;
 								} while(--nblanks > 0);
 							if (nblanks || fputs(buffer, fout)==EOF) {
 								fclose(fin);
 								fclose(fout);
 								cli_unlink(outname);
4270f93b
 								free(md5_hex);
6c06c7b3
 								free(id);
 								free(number);
 								closedir(dd);
 								return -1;
 							}
f10460ed
 						}
 					fclose(fin);
fb79b576
 
997a0e0b
 					/* don't unlink if leave temps */
33068e09
 					if(!m->ctx->engine->keeptmp) {
997a0e0b
 						if(cli_unlink(fullname)) {
 							fclose(fout);
 							cli_unlink(outname);
4270f93b
 							free(md5_hex);
997a0e0b
 							free(id);
 							free(number);
 							closedir(dd);
 							return -1;
 						}
 					}
f10460ed
 					break;
 				}
 				rewinddir(dd);
 			}
 			closedir(dd);
 			fclose(fout);
 		}
 	}
f003b79e
 	free(number);
f10460ed
 	free(id);
4270f93b
 	free(md5_hex);
f10460ed
 
 	return 0;
 }
 
c52d991e
 static void
 hrefs_done(blob *b, tag_arguments_t *hrefs)
 {
 	if(b)
 		blobDestroy(b);
 	html_tag_arg_free(hrefs);
 }
 
f2b71eb9
 /* extract URLs from static text */
 static void extract_text_urls(const unsigned char *mem, size_t len, tag_arguments_t *hrefs)
 {
     char url[1024];
     size_t off;
     for (off=0;off + 10 < len;off++) {
 	/* check whether this is the start of a URL */
 	int32_t proto = cli_readint32(mem + off);
 	/* convert to lowercase */
 	proto |= 0x20202020;
 	/* 'http:', 'https:', or 'ftp:' in little-endian */
 	if ((proto == 0x70747468 &&
 	     (mem[off+4] == ':' || (mem[off+5] == 's' && mem[off+6] == ':')))
 	    || proto == 0x3a707466) {
 	    size_t url_len;
 	    for (url_len=4; off + url_len < len && url_len < (sizeof(url)-1); url_len++) {
 		unsigned char c = mem[off + url_len];
 		/* smart compilers will compile this if into
 		 * a single bt + jb instruction */
 		if (c == ' ' || c == '\n' || c == '\t')
 		    break;
 	    }
 	    memcpy(url, mem + off, url_len);
 	    url[url_len] = '\0';
 	    html_tag_arg_add(hrefs, "href", url);
 	    off += url_len;
 	}
     }
 }
 
c52d991e
 /*
  * This used to be part of checkURLs, split out, because phishingScan needs it
  * too, and phishingScan might be used in situations where checkURLs is
  * disabled (see ifdef)
  */
 static blob *
 getHrefs(message *m, tag_arguments_t *hrefs)
 {
f2b71eb9
 	unsigned char *mem;
8386c723
 	blob *b = messageToBlob(m, 0);
c52d991e
 	size_t len;
 
 	if(b == NULL)
 		return NULL;
 
 	len = blobGetDataSize(b);
 
 	if(len == 0) {
 		blobDestroy(b);
 		return NULL;
 	}
 
 	/* TODO: make this size customisable */
 	if(len > 100*1024) {
e25960d2
 		cli_dbgmsg("Viruses pointed to by URLs not scanned in large message\n");
c52d991e
 		blobDestroy(b);
 		return NULL;
 	}
 
 	hrefs->count = 0;
 	hrefs->tag = hrefs->value = NULL;
 	hrefs->contents = NULL;
 
8386c723
 	cli_dbgmsg("getHrefs: calling html_normalise_mem\n");
f2b71eb9
 	mem = blobGetData(b);
 	if(!html_normalise_mem(mem, (off_t)len, NULL, hrefs,m->ctx->dconf)) {
c52d991e
 		blobDestroy(b);
 		return NULL;
 	}
8386c723
 	cli_dbgmsg("getHrefs: html_normalise_mem returned\n");
f2b71eb9
 	if (!hrefs->count && hrefs->scanContents) {
 	    extract_text_urls(mem, len, hrefs);
 	}
c52d991e
 
 	/* TODO: Do we need to call remove_html_comments? */
 	return b;
 }
 
ad422cc9
 /*
7dde984e
  * validate URLs for phishes
ad422cc9
  * followurls: see if URLs point to malware
  */
c52d991e
 static void
ecc3d638
 checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html)
c52d991e
 {
94aea271
 	blob *b;
ad422cc9
 	tag_arguments_t hrefs;
c52d991e
 
cd94be7a
     UNUSEDPARAM(is_html);
 
7dde984e
 	if(*rc == VIRUS)
 		return;
 
d77ac7de
 	hrefs.scanContents = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE);
c52d991e
 
94aea271
 	if(!hrefs.scanContents)
093e013c
 		/*
 		 * Don't waste time extracting hrefs (parsing html), nobody
 		 * will need it
 		 */
c52d991e
 		return;
 
94aea271
 	hrefs.count = 0;
 	hrefs.tag = hrefs.value = NULL;
 	hrefs.contents = NULL;
 
 	b = getHrefs(mainMessage, &hrefs);
 	if(b) {
d77ac7de
 		if(hrefs.scanContents) {
08402afa
 			if(phishingScan(mctx->ctx, &hrefs) == CL_VIRUS) {
8affc406
 				/*
 				 * FIXME: message objects' contents are
 				 *	encapsulated so we should not access
 				 *	the members directly
 				 */
94aea271
 				mainMessage->isInfected = TRUE;
ecc3d638
 				*rc = VIRUS;
94aea271
 				cli_dbgmsg("PH:Phishing found\n");
 			}
 		}
 	}
 	hrefs_done(b,&hrefs);
c52d991e
 }
 
f2f25418
 #ifdef HAVE_BACKTRACE
4f1d0bfc
 static void
02927896
 sigsegv(int sig)
 {
 	signal(SIGSEGV, SIG_DFL);
f2f25418
 	print_trace(1);
02927896
 	exit(SIGSEGV);
 }
 
4f1d0bfc
 static void
02927896
 print_trace(int use_syslog)
 {
 	void *array[10];
 	size_t size;
 	char **strings;
 	size_t i;
 	pid_t pid = getpid();
 
d563818f
 	cli_errmsg("Segmentation fault, attempting to print backtrace\n");
 
02927896
 	size = backtrace(array, 10);
 	strings = backtrace_symbols(array, size);
 
d563818f
 	cli_errmsg("Backtrace of pid %d:\n", pid);
 	if(use_syslog)
02927896
 		syslog(LOG_ERR, "Backtrace of pid %d:", pid);
 
d563818f
 	for(i = 0; i < size; i++) {
 		cli_errmsg("%s\n", strings[i]);
02927896
 		if(use_syslog)
ce2dcb53
 			syslog(LOG_ERR, "bt[%llu]: %s", (unsigned long long)i, strings[i]);
d563818f
 	}
02927896
 
d563818f
 #ifdef	SAVE_TMP
 	cli_errmsg("The errant mail file has been saved\n");
 #endif
 	/* #else TODO: dump the current email */
ef822cfc
 
02927896
 	free(strings);
 }
 #endif
f24bf390
 
6cad6a15
 /* See also clamav-milter */
ddea752e
 static bool
 usefulHeader(int commandNumber, const char *cmd)
 {
 	switch(commandNumber) {
 		case CONTENT_TRANSFER_ENCODING:
 		case CONTENT_DISPOSITION:
 		case CONTENT_TYPE:
 			return TRUE;
 		default:
 			if(strcasecmp(cmd, "From") == 0)
 				return TRUE;
6cad6a15
 			if(strcasecmp(cmd, "Received") == 0)
ddea752e
 				return TRUE;
6cad6a15
 			if(strcasecmp(cmd, "De") == 0)
ddea752e
 				return TRUE;
 	}
 
 	return FALSE;
 }
 
ae5c693a
 /*
72cf1461
  * Like fgets but cope with end of line by "\n", "\r\n", "\n\r", "\r"
  */
 static char *
6bcd0c86
 getline_from_mbox(char *buffer, size_t buffer_len, fmap_t *map, size_t *at)
72cf1461
 {
f304dc68
     const char *src, *cursrc;
     char *curbuf;
ee1b2a6c
     size_t i;
6bcd0c86
     size_t input_len = MIN(map->len - *at, buffer_len + 1);
     src = cursrc = fmap_need_off_once(map, *at, input_len);
72cf1461
 
f87a92ca
 /*	we check for eof from the result of GETC()
  *	if(feof(fin)) 
 		return NULL;*/
ee1b2a6c
     if(!src) {
 	cli_dbgmsg("getline_from_mbox: fmap need failed\n");
 	return NULL;
     }
6bcd0c86
     if((buffer_len == 0) || (buffer == NULL)) {
964a1e73
 	cli_errmsg("Invalid call to getline_from_mbox(). Refer to https://www.clamav.net/documents/installing-clamav\n");
ee1b2a6c
 	return NULL;
     }
72cf1461
 
ee1b2a6c
     curbuf = buffer;
 	
6bcd0c86
     for(i=0; i<buffer_len-1; i++) {
 	char c;
72cf1461
 
6bcd0c86
 	if(!input_len--) {
 	    if(curbuf == buffer) {
 		/* EOF on first char */
72cf1461
 		return NULL;
6bcd0c86
 	    }
 	    break;
72cf1461
 	}
 
6bcd0c86
 	switch((c = *cursrc++)) {
ee1b2a6c
 	case '\0':
 	    continue;
 	case '\n':
 	    *curbuf++ = '\n';
6bcd0c86
 	    if(input_len && *cursrc == '\r') {
ee1b2a6c
 		i++;
 		cursrc++;
 	    }
 	    break;
 	case '\r':
 	    *curbuf++ = '\r';
6bcd0c86
 	    if(input_len && *cursrc == '\n') {
ee1b2a6c
 		i++;
 		cursrc++;
 	    }
 	    break;
 	default:
 	    *curbuf++ = c;
 	    continue;
72cf1461
 	}
ee1b2a6c
 	break;
     }
     *at += cursrc - src;
     *curbuf = '\0';
     
     return buffer;
72cf1461
 }
b2ba24f5
 
a603478f
 /*
  * Is this line a candidate for the start of a bounce message?
  */
b2ba24f5
 static bool
7021b545
 isBounceStart(mbox_ctx *mctx, const char *line)
b2ba24f5
 {
be32043e
 	size_t len;
 
b2ba24f5
 	if(line == NULL)
 		return FALSE;
 	if(*line == '\0')
 		return FALSE;
25071deb
 	/*if((strncmp(line, "From ", 5) == 0) && !isalnum(line[5]))
b2ba24f5
 		return FALSE;
 	if((strncmp(line, ">From ", 6) == 0) && !isalnum(line[6]))
25071deb
 		return FALSE;*/
b2ba24f5
 
be32043e
 	len = strlen(line);
 	if((len < 6) || (len >= 72))
 		return FALSE;
 
 	if((memcmp(line, "From ", 5) == 0) ||
 	   (memcmp(line, ">From ", 6) == 0)) {
b2ba24f5
 		int numSpaces = 0, numDigits = 0;
 
d563818f
 		line += 4;
 
b2ba24f5
 		do
 			if(*line == ' ')
 				numSpaces++;
8affc406
 			else if(isdigit((*line) & 0xFF))
b2ba24f5
 				numDigits++;
 		while(*++line != '\0');
 
 		if(numSpaces < 6)
 			return FALSE;
 		if(numDigits < 11)
 			return FALSE;
164ba3b8
 		return TRUE;
b2ba24f5
 	}
1f6e52cb
 	return (bool)(cli_filetype((const unsigned char *)line, len, mctx->ctx->engine) == CL_TYPE_MAIL);
b2ba24f5
 }
a05e6d45
 
 /*
  * Extract a binhexEncoded message, return if it's found to be infected as we
  *	extract it
  */
 static bool
001ad879
 exportBinhexMessage(mbox_ctx *mctx, message *m)
a05e6d45
 {
 	bool infected = FALSE;
 	fileblob *fb;
 
 	if(messageGetEncoding(m) == NOENCODING)
 		messageSetEncoding(m, "x-binhex");
 
001ad879
 	fb = messageToFileblob(m, mctx->dir, 0);
a05e6d45
 
 	if(fb) {
 		cli_dbgmsg("Binhex file decoded to %s\n",
 			fileblobGetFilename(fb));
a585329e
 
 		if(fileblobScanAndDestroy(fb) == CL_VIRUS)
 			infected = TRUE;
001ad879
 		mctx->files++;
a05e6d45
 	} else
001ad879
 		cli_errmsg("Couldn't decode binhex file to %s\n", mctx->dir);
a05e6d45
 
 	return infected;
 }
c1fce7f7
 
 /*
7dde984e
  * Locate any bounce message and extract it. Return cl_status
47d9cc65
  */
 static int
001ad879
 exportBounceMessage(mbox_ctx *mctx, text *start)
47d9cc65
 {
7dde984e
 	int rc = CL_CLEAN;
47d9cc65
 	text *t;
 	fileblob *fb;
 
 	/*
 	 * Attempt to save the original (unbounced)
 	 * message - clamscan will find that in the
 	 * directory and call us again (with any luck)
 	 * having found an e-mail message to handle.
 	 *
 	 * This finds a lot of false positives, the
 	 * search that a content type is in the
 	 * bounce (i.e. it's after the bounce header)
 	 * helps a bit.
 	 *
 	 * messageAddLine
7cd9337a
 	 * optimization could help here, but needs
47d9cc65
 	 * careful thought, do it with line numbers
 	 * would be best, since the current method in
 	 * messageAddLine of checking encoding first
 	 * must remain otherwise non bounce messages
 	 * won't be scanned
 	 */
 	for(t = start; t; t = t->t_next) {
 		const char *txt = lineGetData(t->t_line);
ecc3d638
 		char cmd[RFC2821LENGTH + 1];
47d9cc65
 
 		if(txt == NULL)
 			continue;
 		if(cli_strtokbuf(txt, 0, ":", cmd) == NULL)
 			continue;
 
 		switch(tableFind(mctx->rfc821Table, cmd)) {
 			case CONTENT_TRANSFER_ENCODING:
 				if((strstr(txt, "7bit") == NULL) &&
 				   (strstr(txt, "8bit") == NULL))
 					break;
 				continue;
 			case CONTENT_DISPOSITION:
 				break;
 			case CONTENT_TYPE:
 				if(strstr(txt, "text/plain") != NULL)
 					t = NULL;
 				break;
 			default:
 				if(strcasecmp(cmd, "From") == 0)
 					start = t;
 				else if(strcasecmp(cmd, "Received") == 0)
 					start = t;
 				continue;
 		}
 		break;
 	}
 	if(t && ((fb = fileblobCreate()) != NULL)) {
 		cli_dbgmsg("Found a bounce message\n");
 		fileblobSetFilename(fb, mctx->dir, "bounce");
7dde984e
 		fileblobSetCTX(fb, mctx->ctx);
 		if(textToFileblob(start, fb, 1) == NULL) {
47d9cc65
 			cli_dbgmsg("Nothing new to save in the bounce message\n");
7dde984e
 			fileblobDestroy(fb);
 		} else
 			rc = fileblobScanAndDestroy(fb);
001ad879
 		mctx->files++;
47d9cc65
 	} else
 		cli_dbgmsg("Not found a bounce message\n");
 
 	return rc;
 }
 
 /*
ede9939c
  * Get string representation of mimetype
  */
 static	const	char	*getMimeTypeStr(mime_type mimetype)
 {
 	const struct tableinit *entry = mimeTypeStr;
 
 	while (entry->key) {
 		if (mimetype == entry->value)
 			return entry->key;
 		entry++;
 	}
 	return "UNKNOWN";
 }
 
 /*
  * Get string representation of encoding type
  */
 static	const	char	*getEncTypeStr(encoding_type enctype)
 {
 	const struct tableinit *entry = encTypeStr;
 
 	while (entry->key) {
 		if (enctype == entry->value)
 			return entry->key;
 		entry++;
 	}
 	return "UNKNOWN";
 }
 
 /*
c1fce7f7
  * Handle the ith element of a number of multiparts, e.g. multipart/alternative
  */
 static message *
ecc3d638
 do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, mbox_ctx *mctx, message *messageIn, text **tptr, unsigned int recursion_level)
c1fce7f7
 {
 	bool addToText = FALSE;
 	const char *dtype;
 #ifndef	SAVE_TO_DISC
 	message *body;
 #endif
 	message *aMessage = messages[i];
d77ac7de
 	const int doPhishingScan = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING&PHISHING_CONF_ENGINE);
ede9939c
 #if HAVE_JSON
 	const char *mtype = NULL;
c44d7e7a
 	json_object *thisobj = NULL, *saveobj = mctx->wrkobj;
ede9939c
 
 	if (mctx->wrkobj != NULL) {
c44d7e7a
 		json_object *multiobj = cli_jsonarray(mctx->wrkobj, "Multipart");
ede9939c
 		if (multiobj == NULL) {
 			cli_errmsg("Cannot get multipart preclass array\n");
 			*rc = -1;
 			return mainMessage;
 		}
d77ac7de
 
ede9939c
 		thisobj = messageGetJObj(aMessage);
 		if (thisobj == NULL) {
 			cli_errmsg("Cannot get message preclass object\n");
 			*rc = -1;
 			return mainMessage;
 		}
 		if (cli_json_addowner(multiobj, thisobj, NULL, -1) != CL_SUCCESS) {
 			cli_errmsg("Cannot assign message preclass object to multipart preclass array\n");
 			*rc = -1;
 			return mainMessage;
 		}
 	}
 #endif
 
 	if(aMessage == NULL) {
 #if HAVE_JSON
 		if (thisobj != NULL)
 			cli_jsonstr(thisobj, "MimeType", "NULL");
 #endif
c1fce7f7
 		return mainMessage;
ede9939c
 	}
c1fce7f7
 
69c62847
 	if(*rc != OK)
 		return mainMessage;
 
c1fce7f7
 	cli_dbgmsg("Mixed message part %d is of type %d\n",
 		i, messageGetMimeType(aMessage));
 
ede9939c
 #if HAVE_JSON
 	if (thisobj != NULL) {
 		cli_jsonstr(thisobj, "MimeType", getMimeTypeStr(messageGetMimeType(aMessage)));
 		cli_jsonstr(thisobj, "MimeSubtype", messageGetMimeSubtype(aMessage));
 		cli_jsonstr(thisobj, "EncodingType", getEncTypeStr(messageGetEncoding(aMessage)));
 		cli_jsonstr(thisobj, "Disposition", messageGetDispositionType(aMessage));
 		cli_jsonstr(thisobj, "Filename", messageHasFilename(aMessage) ?
 			    messageGetFilename(aMessage): "(inline)");
 	}
 #endif
 
c1fce7f7
 	switch(messageGetMimeType(aMessage)) {
 		case APPLICATION:
 		case AUDIO:
 		case IMAGE:
 		case VIDEO:
 			break;
 		case NOMIME:
 			cli_dbgmsg("No mime headers found in multipart part %d\n", i);
 			if(mainMessage) {
 				if(binhexBegin(aMessage)) {
 					cli_dbgmsg("Found binhex message in multipart/mixed mainMessage\n");
 
001ad879
 					if(exportBinhexMessage(mctx, mainMessage))
5684fccf
 						*rc = VIRUS;
c1fce7f7
 				}
 				if(mainMessage != messageIn)
 					messageDestroy(mainMessage);
 				mainMessage = NULL;
 			} else if(aMessage) {
 				if(binhexBegin(aMessage)) {
 					cli_dbgmsg("Found binhex message in multipart/mixed non mime part\n");
001ad879
 					if(exportBinhexMessage(mctx, aMessage))
5684fccf
 						*rc = VIRUS;
c1fce7f7
 					assert(aMessage == messages[i]);
 					messageReset(messages[i]);
 				}
 			}
 			addToText = TRUE;
 			if(messageGetBody(aMessage) == NULL)
 				/*
 				 * No plain text version
 				 */
 				cli_dbgmsg("No plain text alternative\n");
 			break;
 		case TEXT:
 			dtype = messageGetDispositionType(aMessage);
 			cli_dbgmsg("Mixed message text part disposition \"%s\"\n",
 				dtype);
 			if(strcasecmp(dtype, "attachment") == 0)
 				break;
 			if((*dtype == '\0') || (strcasecmp(dtype, "inline") == 0)) {
 				const char *cptr;
 
 				if(mainMessage && (mainMessage != messageIn))
 					messageDestroy(mainMessage);
 				mainMessage = NULL;
 				cptr = messageGetMimeSubtype(aMessage);
 				cli_dbgmsg("Mime subtype \"%s\"\n", cptr);
 				if((tableFind(mctx->subtypeTable, cptr) == PLAIN) &&
be32043e
 				   (messageGetEncoding(aMessage) == NOENCODING)) {
c1fce7f7
 					/*
be32043e
 					 * Strictly speaking, a text/plain part
 					 * is not an attachment. We pretend it
 					 * is so that we can decode and scan it
c1fce7f7
 					 */
be32043e
 					if(!messageHasFilename(aMessage)) {
c1fce7f7
 						cli_dbgmsg("Adding part to main message\n");
 						addToText = TRUE;
be32043e
 					} else
 						cli_dbgmsg("Treating inline as attachment\n");
c1fce7f7
 				} else {
56aea026
 					const int is_html = (tableFind(mctx->subtypeTable, cptr) == HTML);
6a4dd9dc
 					if(doPhishingScan)
56aea026
 						checkURLs(aMessage, mctx, rc, is_html);
c1fce7f7
 					messageAddArgument(aMessage,
 						"filename=mixedtextportion");
 				}
 				break;
 			}
 			cli_dbgmsg("Text type %s is not supported\n", dtype);
 			return mainMessage;
 		case MESSAGE:
 			/* Content-Type: message/rfc822 */
 			cli_dbgmsg("Found message inside multipart (encoding type %d)\n",
 				messageGetEncoding(aMessage));
 #ifndef	SCAN_UNENCODED_BOUNCES
 			switch(messageGetEncoding(aMessage)) {
 				case NOENCODING:
 				case EIGHTBIT:
 				case BINARY:
 					if(encodingLine(aMessage) == NULL) {
 						/*
 						 * This means that the message
 						 * has no attachments
 						 *
 						 * The test for
 						 * messageGetEncoding is needed
 						 * since encodingLine won't have
 						 * been set if the message
 						 * itself has been encoded
 						 */
 						cli_dbgmsg("Unencoded multipart/message will not be scanned\n");
 						assert(aMessage == messages[i]);
 						messageDestroy(messages[i]);
 						messages[i] = NULL;
 						return mainMessage;
 					}
 					/* FALLTHROUGH */
 				default:
 					cli_dbgmsg("Encoded multipart/message will be scanned\n");
 			}
 #endif
 #if	0
 			messageAddStrAtTop(aMessage,
 				"Received: by clamd (message/rfc822)");
 #endif
 #ifdef	SAVE_TO_DISC
 			/*
 			 * Save this embedded message
 			 * to a temporary file
 			 */
001ad879
 			if(saveTextPart(mctx, aMessage, 1) == CL_VIRUS)
7dde984e
 				*rc = VIRUS;
c1fce7f7
 			assert(aMessage == messages[i]);
 			messageDestroy(messages[i]);
 			messages[i] = NULL;
 #else
 			/*
b912eaf2
 			 * Scan in memory, faster but is open to DoS attacks
 			 * when many nested levels are involved.
c1fce7f7
 			 */
0072fa21
 			body = parseEmailHeaders(aMessage, mctx->rfc821Table);
 
c1fce7f7
 			/*
7cd9337a
 			 * We've finished with the
c1fce7f7
 			 * original copy of the message,
 			 * so throw that away and
 			 * deal with the encapsulated
 			 * message as a message.
 			 * This can save a lot of memory
 			 */
 			assert(aMessage == messages[i]);
 			messageDestroy(messages[i]);
 			messages[i] = NULL;
ede9939c
 #if HAVE_JSON
 			mctx->wrkobj = thisobj;
 #endif
c1fce7f7
 			if(body) {
0072fa21
 				messageSetCTX(body, mctx->ctx);
 				*rc = parseEmailBody(body, NULL, mctx, recursion_level + 1);
69c62847
 				if((*rc == OK) && messageContainsVirus(body))
5684fccf
 					*rc = VIRUS;
c1fce7f7
 				messageDestroy(body);
 			}
ede9939c
 #if HAVE_JSON
 			mctx->wrkobj = saveobj;
 #endif
c1fce7f7
 #endif
 			return mainMessage;
 		case MULTIPART:
 			/*
 			 * It's a multi part within a multi part
 			 * Run the message parser on this bit, it won't
 			 * be an attachment
 			 */
 			cli_dbgmsg("Found multipart inside multipart\n");
ede9939c
 #if HAVE_JSON
 			mctx->wrkobj = thisobj;
 #endif
c1fce7f7
 			if(aMessage) {
 				/*
 				 * The headers were parsed when reading in the
 				 * whole multipart section
 				 */
242ffd7a
 				*rc = parseEmailBody(aMessage, *tptr, mctx, recursion_level + 1);
8affc406
 				cli_dbgmsg("Finished recursion, rc = %d\n", (int)*rc);
c1fce7f7
 				assert(aMessage == messages[i]);
 				messageDestroy(messages[i]);
 				messages[i] = NULL;
 			} else {
242ffd7a
 				*rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1);
c1fce7f7
 				if(mainMessage && (mainMessage != messageIn))
 					messageDestroy(mainMessage);
 				mainMessage = NULL;
 			}
ede9939c
 #if HAVE_JSON
 			mctx->wrkobj = saveobj;
 #endif
c1fce7f7
 			return mainMessage;
 		default:
6351aa86
 			cli_dbgmsg("Only text and application attachments are fully supported, type = %d\n",
c1fce7f7
 				messageGetMimeType(aMessage));
5b2bcb65
 			/* fall through - we may be able to salvage something */
c1fce7f7
 	}
 
a585329e
 	if(*rc != VIRUS) {
cfeeb327
 		fileblob *fb = messageToFileblob(aMessage, mctx->dir, 1);
ede9939c
 #if HAVE_JSON
 		json_object *arrobj;
 		int arrlen = 0;
 
 		if (thisobj != NULL) {
 			/* attempt to determine container size - prevents incorrect type reporting */
 			if (json_object_object_get_ex(mctx->ctx->wrkproperty, "ContainedObjects", &arrobj))
 				arrlen = json_object_array_length(arrobj);
 		}
c1fce7f7
 
ede9939c
 #endif
cfeeb327
 		if(fb) {
 			/* aMessage doesn't always have a ctx set */
 			fileblobSetCTX(fb, mctx->ctx);
 			if(fileblobScanAndDestroy(fb) == CL_VIRUS)
 				*rc = VIRUS;
 			if (!addToText)
001ad879
 				mctx->files++;
c1fce7f7
 		}
ede9939c
 #if HAVE_JSON
 		if (thisobj != NULL) {
 			json_object *entry = NULL;
 			const char *dtype = NULL;
 
 			/* attempt to acquire container type */
 			if (json_object_object_get_ex(mctx->ctx->wrkproperty, "ContainedObjects", &arrobj))
 				if (json_object_array_length(arrobj) > arrlen)
 					entry = json_object_array_get_idx(arrobj, arrlen);
 			if (entry) {
 				json_object_object_get_ex(entry, "FileType", &entry);
 				if (entry)
 					dtype = json_object_get_string(entry);
 			}
 			cli_jsonint(thisobj, "ContainedObjectsIndex", arrlen);
 			cli_jsonstr(thisobj, "ClamAVFileType", dtype ? dtype : "UNKNOWN");
 		}
 #endif
a585329e
 		if(messageContainsVirus(aMessage))
 			*rc = VIRUS;
c1fce7f7
 	}
 	messageDestroy(aMessage);
 	messages[i] = NULL;
 
 	return mainMessage;
 }
4f4a8f4a
 
 /*
  * Returns the number of quote characters in the given string
  */
 static int
 count_quotes(const char *buf)
 {
 	int quotes = 0;
 
 	while(*buf)
 		if(*buf++ == '\"')
 			quotes++;
 
 	return quotes;
 }
842c7d49
 
 /*
  * Will the next line be a folded header? See RFC2822 section 2.2.3
  */
 static bool
 next_is_folded_header(const text *t)
 {
 	const text *next = t->t_next;
 	const char *data, *ptr;
 
 	if(next == NULL)
 		return FALSE;
 
 	if(next->t_line == NULL)
 		return FALSE;
 
 	data = lineGetData(next->t_line);
 
 	/*
b912eaf2
 	 * Section B.2 of RFC822 says TAB or SPACE means a continuation of the
842c7d49
 	 * previous entry.
 	 */
 	if(isblank(data[0]))
 		return TRUE;
 
 	if(strchr(data, '=') == NULL)
 		/*
 		 * Avoid false positives with
 		 *	Content-Type: text/html;
 		 *	Content-Transfer-Encoding: quoted-printable
 		 */
 		return FALSE;
bc6bbeff
 
842c7d49
 	/*
 	 * Some are broken and don't fold headers lines
 	 * correctly as per section 2.2.3 of RFC2822.
 	 * Generally they miss the white space at
 	 * the start of the fold line:
 	 *	Content-Type: multipart/related;
 	 *	type="multipart/alternative";
 	 *	boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
 	 * should read:
 	 *	Content-Type: multipart/related;
 	 *	 type="multipart/alternative";
 	 *	 boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
 	 * Since we're a virus checker not an RFC
 	 * verifier we need to handle these
 	 */
 	data = lineGetData(t->t_line);
 
 	ptr = strchr(data, '\0');
 
 	while(--ptr > data)
 		switch(*ptr) {
 			case ';':
 				return TRUE;
 			case '\n':
 			case ' ':
 			case '\r':
 			case '\t':
 				continue;	/* white space at end of line */
 			default:
 				return FALSE;
 		}
 	return FALSE;
 }
0cf4cea7
 
 /*
  * This routine is called on the first line of the body of
  * an email to handle broken messages that have newlines
  * in the middle of its headers
  */
 static bool
 newline_in_header(const char *line)
 {
 	cli_dbgmsg("newline_in_header, check \"%s\"\n", line);
 
 	if(strncmp(line, "Message-Id: ", 12) == 0)
 		return TRUE;
 	if(strncmp(line, "Date: ", 6) == 0)
 		return TRUE;
 
 	return FALSE;
 }