libclamav/mbox.c
b151ef55
 /*
3163dc8e
  *  Copyright (C) 2002-2006 Nigel Horne <njh@bandsman.co.uk>
b151ef55
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
30738099
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
b151ef55
  */
e84f1e26
 static	char	const	rcsid[] = "$Id: mbox.c,v 1.380 2007/02/13 19:47:37 njh Exp $";
e1bbfed7
 
 #ifdef	_MSC_VER
 #include <winsock.h>	/* only needed in CL_EXPERIMENTAL */
 #endif
8b242bb9
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
b151ef55
 
 #ifndef	CL_DEBUG
897fd9c7
 #define	NDEBUG	/* map CLAMAV debug onto standard */
b151ef55
 #endif
 
 #ifdef CL_THREAD_SAFE
f5e9abc8
 #ifndef	_REENTRANT
b151ef55
 #define	_REENTRANT	/* for Solaris 2.8 */
 #endif
f5e9abc8
 #endif
b151ef55
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <errno.h>
 #include <assert.h>
 #include <string.h>
40d54f7f
 #ifdef	HAVE_STRINGS_H
b151ef55
 #include <strings.h>
40d54f7f
 #endif
b151ef55
 #include <ctype.h>
 #include <time.h>
 #include <fcntl.h>
40d54f7f
 #ifdef	HAVE_SYS_PARAM_H
0bcad2b1
 #include <sys/param.h>
40d54f7f
 #endif
 #include "clamav.h"
 #ifndef	C_WINDOWS
9a7398ee
 #include <dirent.h>
40d54f7f
 #endif
67a25177
 #include <limits.h>
52634964
 #include <signal.h>
b151ef55
 
8d97b60a
 #ifdef	HAVE_UNISTD_H
 #include <unistd.h>
 #endif
 
a77dc192
 #if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
 #include <stddef.h>
 #endif
 
8a88fb93
 #ifdef	CL_THREAD_SAFE
 #include <pthread.h>
 #endif
 
5c86c162
 #include "others.h"
 #include "defaults.h"
 #include "str.h"
 #include "filetypes.h"
b151ef55
 #include "mbox.h"
 
98685ac1
 #ifdef	CL_DEBUG
52634964
 
7f5e7448
 #if	defined(C_LINUX) || defined(C_CYGWIN)
52634964
 #include <features.h>
7f5e7448
 #endif
52634964
 
98685ac1
 #if __GLIBC__ == 2 && __GLIBC_MINOR__ >= 1
 #define HAVE_BACKTRACE
 #endif
89e9a596
 #endif
98685ac1
 
 #ifdef HAVE_BACKTRACE
 #include <execinfo.h>
 #include <syslog.h>
 
 static	void	sigsegv(int sig);
 static	void	print_trace(int use_syslog);
 #endif
 
07cbf822
 #if	defined(NO_STRTOK_R) || !defined(CL_THREAD_SAFE)
b151ef55
 #undef strtok_r
 #undef __strtok_r
 #define strtok_r(a,b,c)	strtok(a,b)
 #endif
 
4db74788
 #ifdef	HAVE_STDBOOL_H
0a266df3
 #ifdef	C_BEOS
 #include "SupportDefs.h"
 #else
a4f8f199
 #include <stdbool.h>
0a266df3
 #endif
a4f8f199
 #else
 #ifdef	FALSE
 typedef	unsigned	char	bool;
 #else
 typedef enum	{ FALSE = 0, TRUE = 1 } bool;
b151ef55
 #endif
 #endif
 
2345b4cd
 typedef	enum {
 	FAIL,
 	OK,
 	OK_ATTACHMENTS_NOT_SAVED,
 	VIRUS
 } mbox_status;
 
eddb5dbb
 #ifndef isblank
 #define isblank(c)	(((c) == ' ') || ((c) == '\t'))
 #endif
 
3fa72383
 #define	SAVE_TO_DISC	/* multipart/message are saved in a temporary file */
49674596
 
90343a0f
 #ifndef CL_EXPERIMENTAL
e9bdeb72
 /*
f3ec89d2
  * Code does exist to run FOLLOWURLS on systems without libcurl, however that
e9bdeb72
  * is not recommended so it is not compiled by default
cbe29191
  *
dd35cb90
  * On Solaris, when using the GNU C compiler, the clamAV build system uses the
  * Sun supplied ld instead of the GNU ld causing an error. Therefore you cannot
  * use WITH_CURL on Solaris with gcc, you must configure with
  * "--without-libcurl". I don't know if it works with Sun's own compiler
  *
cbe29191
  * Fails to link on Solaris 10 with this error:
c8bc44d6
  *      Undefined			first referenced
  *  symbol				in file
  *  __floatdidf				/opt/sfw/lib/libcurl.s
e9bdeb72
  */
dbca666a
 #if	defined(C_SOLARIS) && defined(__GNUC__)
cbe29191
 #undef	WITH_CURL
 #endif
90343a0f
 #endif
cbe29191
 
8247ea69
 #if	defined(WITH_CURL) || defined(CL_EXPERIMENTAL)
f52d7358
 #define	FOLLOWURLS	5	/*
 				 * Maximum number of URLs scanned in a message
b1a3a05b
 				 * part. Helps to prevent Dialer.gen-45 and
 				 * Trojan.WinREG.Zapchast which are often
 				 * dispatched by emails which point to it. If
f52d7358
 				 * not defined, don't check any URLs
 				 */
8247ea69
 #endif
3fa72383
 
8247ea69
 #ifdef	FOLLOWURLS
7133ee9d
 #include "htmlnorm.h"
 #endif
 
 #ifdef CL_EXPERIMENTAL
 #include "phishcheck.h"
 #endif
 
3eb12bae
 #ifdef	FOLLOWURLS
da812a6a
 
e1bbfed7
 #ifndef	C_WINDOWS
 #include <netdb.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
0a266df3
 #ifndef	C_BEOS
e1bbfed7
 #include <net/if.h>
 #include <arpa/inet.h>
 #endif
0a266df3
 #endif
468ef563
 
 #ifndef	C_WINDOWS
 #define	closesocket(s)	close(s)
 #endif
 
 #ifdef	CL_EXPERIMENTAL	/* dropping curl support */
 
e1bbfed7
 #include <fcntl.h>
 #ifndef	C_WINDOWS
 #include <sys/time.h>
 #endif
 
 #ifndef HAVE_IN_PORT_T
 typedef	unsigned	short	in_port_t;
 #endif
 
 #ifndef HAVE_IN_ADDR_T
 typedef	unsigned	int	in_addr_t;
 #endif
 
 #if	(!defined(EALREADY)) && (defined(WSAEALREADY))
 #define EALREADY	WSAEALREADY
 #endif
 #if	(!defined(EINPROGRESS)) && (defined(WSAEINPROGRESS))
 #define EINPROGRESS	WSAEINPROGRESS
 #endif
 #if	(!defined(EISCONN)) && (defined(WSAEISCONN))
 #define EISCONN	WSAEISCONN
 #endif
 
 #else
 
da812a6a
 #ifdef	WITH_CURL	/* Set in configure */
 /*
  * To build with WITH_CURL:
  * LDFLAGS=`curl-config --libs` ./configure ...
  */
88771ffa
 #include <curl/curl.h>
6736d46f
 
 /*
4e23105e
  * Needs curl >= 7.11 (I've heard that 7.9 can cause crashes and I have seen
  *	7.10 segfault, later versions can be flakey as well)
6736d46f
  * untested)
f3ec89d2
  *
  * Even 7.15 crashes, valgrind shows this:
  *	==2835== Warning: client switching stacks?  SP change: 0xBEB0FD2C --> 0xD0678F0
 *	==2835==          to suppress, use: --max-stackframe=1314225092 or greater
 
  *	==2835== Invalid write of size 4
  *	==2835==    at 0x40F67BD: Curl_resolv (in /usr/lib/libcurl.so.3.0.0)
  *	==2835==  Address 0xD0678F4 is on thread 1's stack
  *	==2835== Can't extend stack to 0xD067390 during signal delivery for thread 1:
  *	==2835==   no stack segment
  *	==2835==
  *	==2835== Process terminating with default action of signal 11 (SIGSEGV)
  *	==2835==  Access not within mapped region at address 0xD067390
  *	==2835==    at 0x40F67BD: Curl_resolv (in /usr/lib/libcurl.so.3.0.0)
  *
  * This bug has been reported upstream, however they claim that the bug
12cc3b41
  *	does not exist :-(. I have received reports that 7.15.5 suffers from the
  *	same problem in Curl_resolv
  *
  * TODO: Drop curl and do it ourselves
6736d46f
  */
89343098
 #if	(LIBCURL_VERSION_NUM < 0x070B00)
6736d46f
 #undef	WITH_CURL	/* also undef FOLLOWURLS? */
 #endif
 
ca31cc3d
 #else
 #error	"FOLLOWURLS without CURL is no longer supported"
 
6736d46f
 #endif	/*WITH_CURL*/
 
e1bbfed7
 #endif	/* CL_EXPERIMENTAL */
 
3eb12bae
 #else	/*!FOLLOWURLS*/
 #undef	WITH_CURL
6736d46f
 #endif	/*FOLLOWURLS*/
88771ffa
 
9a7398ee
 /*
c29ebe66
  * Define this to handle messages covered by section 7.3.2 of RFC1341.
9a7398ee
  *	This is experimental code so it is up to YOU to (1) ensure it's secure
291ac47f
  * (2) periodically trim the directory of old files
  *
  * If you use the load balancing feature of clamav-milter to run clamd on
d85c1fad
  * more than one machine you must make sure that .../partial is on a shared
291ac47f
  * network filesystem
9a7398ee
  */
40d54f7f
 #ifndef	C_WINDOWS	/* TODO: when opendir() is done */
d85c1fad
 #define	PARTIAL_DIR
40d54f7f
 #endif
9a7398ee
 
0491b8a4
 /*#define	NEW_WORLD*/
273cd2bb
 
3e25e41c
 /*#define	SCAN_UNENCODED_BOUNCES	*//*
04b698ac
 					 * Slows things down a lot and only catches unencoded copies
74c59ced
 					 * of EICAR within bounces, which don't matter
04b698ac
 					 */
 
d9bde711
 typedef	struct	mbox_ctx {
 	const	char	*dir;
 	const	table_t	*rfc821Table;
 	const	table_t	*subtypeTable;
 	cli_ctx	*ctx;
 } mbox_ctx;
 
5c86c162
 static	int	cli_parse_mbox(const char *dir, int desc, cli_ctx *ctx);
b3a5cdd8
 static	message	*parseEmailFile(FILE *fin, const table_t *rfc821Table, const char *firstLine, const char *dir);
985cc85e
 static	message	*parseEmailHeaders(message *m, const table_t *rfc821Table);
8c0250d5
 static	int	parseEmailHeader(message *m, const char *line, const table_t *rfc821Table);
2345b4cd
 static	mbox_status	parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int recursion_level);
b151ef55
 static	int	boundaryStart(const char *line, const char *boundary);
 static	int	endOfMessage(const char *line, const char *boundary);
 static	int	initialiseTables(table_t **rfc821Table, table_t **subtypeTable);
 static	int	getTextPart(message *const messages[], size_t size);
 static	size_t	strip(char *buf, int len);
 static	int	parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg);
0e01c158
 static	void	saveTextPart(message *m, const char *dir, int destroy_text);
0674e2af
 static	char	*rfc2047(const char *in);
273cd2bb
 static	char	*rfc822comments(const char *in, char *out);
9a7398ee
 #ifdef	PARTIAL_DIR
 static	int	rfc1341(message *m, const char *dir);
 #endif
82933497
 static	bool	usefulHeader(int commandNumber, const char *cmd);
b7afd2bf
 static	char	*getline_from_mbox(char *buffer, size_t len, FILE *fin);
b65d2aad
 static	bool	isBounceStart(const char *line);
195e3683
 static	bool	exportBinhexMessage(const char *dir, message *m);
 static	int	exportBounceMessage(text *start, const mbox_ctx *ctx);
2345b4cd
 static	message	*do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, mbox_ctx *mctx, message *messageIn, text **tptr, unsigned int recursion_level);
dd2131c9
 static	int	count_quotes(const char *buf);
0066d39b
 static	bool	next_is_folded_header(const text *t);
4db74788
 static	bool	newline_in_header(const char *line);
3fa72383
 
2345b4cd
 static	void	checkURLs(message *m, mbox_ctx *mctx, mbox_status *rc, int is_html);
7133ee9d
 
 #ifdef CL_EXPERIMENTAL
2345b4cd
 static	void	do_checkURLs(message *m, const char *dir, tag_arguments_t *hrefs);
 static	blob	*getHrefs(message *m, tag_arguments_t *hrefs);
 static	void	hrefs_done(blob *b, tag_arguments_t *hrefs);
7133ee9d
 #endif
 
48130f7e
 #if	defined(FOLLOWURLS) && (FOLLOWURLS > 0)
314ff77b
 struct arg {
90343a0f
 #ifdef	CL_EXPERIMENTAL
b53134f5
 	char *url;
 #else
e1bbfed7
 	CURL *curl;
a95c894a
 	const char *url;
b53134f5
 #endif
a95c894a
 	const char *dir;
314ff77b
 	char *filename;
 };
 #ifdef	CL_THREAD_SAFE
 static	void	*getURL(void *a);
 #else
 static	void	*getURL(struct arg *arg);
 #endif
3fa72383
 #endif
 
b151ef55
 /* Maximum line length according to RFC821 */
2683ac8b
 #define	RFC2821LENGTH	1000
b151ef55
 
 /* Hashcodes for our hash tables */
 #define	CONTENT_TYPE			1
 #define	CONTENT_TRANSFER_ENCODING	2
 #define	CONTENT_DISPOSITION		3
 
 /* Mime sub types */
 #define	PLAIN		1
 #define	ENRICHED	2
 #define	HTML		3
 #define	RICHTEXT	4
 #define	MIXED		5
a9541ab3
 #define	ALTERNATIVE	6	/* RFC1521*/
b151ef55
 #define	DIGEST		7
 #define	SIGNED		8
 #define	PARALLEL	9
 #define	RELATED		10	/* RFC2387 */
 #define	REPORT		11	/* RFC1892 */
fdc8a467
 #define	APPLEDOUBLE	12	/* Handling of this in only noddy for now */
49674596
 #define	FAX		MIXED	/*
 				 * RFC3458
 				 * Drafts stated to treat is as mixed if it is
 				 * not known.  This disappeared in the final
 				 * version (except when talking about
 				 * voice-message), but it is good enough for us
 				 * since we do no validation of coversheet
 				 * presence etc. (which also has disappeared
 				 * in the final version)
 				 */
b62a19da
 #define	ENCRYPTED	13	/*
 				 * e.g. RFC2015
 				 * Content-Type: multipart/encrypted;
 				 * boundary="nextPart1383049.XCRrrar2yq";
 				 * protocol="application/pgp-encrypted"
 				 */
db09f781
 #define	X_BFILE		RELATED	/*
 				 * BeOS, expert two parts: the file and it's
 				 * attributes. The attributes part comes as
 				 *	Content-Type: application/x-be_attribute
 				 *		name="foo"
 				 * I can't find where it is defined, any
 				 * pointers would be appreciated. For now
 				 * we treat it as multipart/related
 				 */
cbc2eaa9
 #define	KNOWBOT		14	/* Unknown and undocumented format? */
b151ef55
 
 static	const	struct tableinit {
 	const	char	*key;
 	int	value;
 } rfc821headers[] = {
68badbc1
 	/* TODO: make these regular expressions */
b759d5eb
 	{	"Content-Type",			CONTENT_TYPE		},
39ff42ee
 	{	"Content-Transfer-Encoding",	CONTENT_TRANSFER_ENCODING	},
 	{	"Content-Disposition",		CONTENT_DISPOSITION	},
b151ef55
 	{	NULL,				0			}
4fc38d69
 }, mimeSubtypes[] = {	/* see RFC2045 */
b151ef55
 		/* subtypes of Text */
 	{	"plain",	PLAIN		},
 	{	"enriched",	ENRICHED	},
 	{	"html",		HTML		},
 	{	"richtext",	RICHTEXT	},
 		/* subtypes of Multipart */
 	{	"mixed",	MIXED		},
 	{	"alternative",	ALTERNATIVE	},
 	{	"digest",	DIGEST		},
 	{	"signed",	SIGNED		},
 	{	"parallel",	PARALLEL	},
 	{	"related",	RELATED		},
 	{	"report",	REPORT		},
fdc8a467
 	{	"appledouble",	APPLEDOUBLE	},
49674596
 	{	"fax-message",	FAX		},
b62a19da
 	{	"encrypted",	ENCRYPTED	},
db09f781
 	{	"x-bfile",	X_BFILE		},	/* BeOS */
cbc2eaa9
 	{	"knowbot",		KNOWBOT		},	/* ??? */
 	{	"knowbot-metadata",	KNOWBOT		},	/* ??? */
 	{	"knowbot-code",		KNOWBOT		},	/* ??? */
 	{	"knowbot-state",	KNOWBOT		},	/* ??? */
b151ef55
 	{	NULL,		0		}
 };
8a88fb93
 
 #ifdef	CL_THREAD_SAFE
 static	pthread_mutex_t	tables_mutex = PTHREAD_MUTEX_INITIALIZER;
 #endif
b151ef55
 
0dbec6b9
 #ifndef	O_BINARY
 #define	O_BINARY	0
 #endif
 
273cd2bb
 #ifdef	NEW_WORLD
00615ec9
 
b4f31b38
 #include "matcher.h"
 
f5eb4324
 #undef	PARTIAL_DIR
 
00615ec9
 #if HAVE_MMAP
 #if HAVE_SYS_MMAN_H
 #include <sys/mman.h>
 #else /* HAVE_SYS_MMAN_H */
 #undef HAVE_MMAP
 #endif
2455f7a7
 #else	/*HAVE_MMAP*/
 #undef	NEW_WORLD
 #endif
00615ec9
 #endif
 
2455f7a7
 #ifdef	NEW_WORLD
725bd592
 /*
  * Files larger than this are scanned with the old method, should be
  *	StreamMaxLength, I guess
  * If NW_MAX_FILE_SIZE is not defined, all files go through the
  *	new method. This definition is for machines very tight on RAM, or
  *	with large StreamMaxLength values
  */
 #define	MAX_ALLOCATION	134217728	/* see libclamav/others.c */
 #define	NW_MAX_FILE_SIZE	MAX_ALLOCATION
 
273cd2bb
 struct scanlist {
725bd592
 	const	char	*start;
 	size_t	size;
 	encoding_type	decoder;	/* only BASE64 and QUOTEDPRINTABLE for now */
 	struct	scanlist *next;
273cd2bb
 };
 
725bd592
 static struct map {
 	const	char	*offset;	/* sorted */
 	const	char	*word;
 	struct	map	*next;
 } *map, *tail;
 
b4f31b38
 static	int	save_text(cli_ctx *ctx, const char *dir, const char *start, size_t len);
725bd592
 static	void	create_map(const char *begin, const char *end);
 static	void	add_to_map(const char *offset, const char *word);
 static	const	char	*find_in_map(const char *offset, const char *word);
 static	void	free_map(void);
 
00615ec9
 /*
  * This could be the future. Instead of parsing and decoding it just decodes.
273cd2bb
  *
00615ec9
  * USE IT AT YOUR PERIL, a large number of viruses are not detected with this
  * method, possibly because the decoded files must be exact and not have
  * extra data at the start or end, which this code will produce.
53ee0b60
  *
273cd2bb
  * Currently only supports base64 and quoted-printable
  *
  * You may also see a lot of warnings. For the moment it falls back to old
  *	world mode if it doesn't know what to do - that'll be removed.
  * The code is untidy...
  *
  * FIXME: Some mailbox scans are slower with this method. I suspect that it's
  * because the scan can proceed to the end of the file rather than the end
  * of the attachment which can mean than later emails are scanned many times
b1455eb2
  *
53d7f9e8
  * FIXME: quoted printable doesn't know when to stop, so size related virus
  *	matching breaks
  *
725bd592
  * TODO: Fall through to cli_parse_mbox() too often
f5eb4324
  *
2455f7a7
  * TODO: Add support for systems without mmap()
f5eb4324
  *
  * TODO: partial_dir fall through
f3ec89d2
  *
  * FIXME: Some EICAR gets through
00615ec9
  */
 int
5c86c162
 cli_mbox(const char *dir, int desc, cli_ctx *ctx)
00615ec9
 {
2455f7a7
 	char *start, *ptr, *line;
 	const char *last, *p, *q;
8496a349
 	size_t size;
00615ec9
 	struct stat statb;
 	message *m;
 	fileblob *fb;
b4f31b38
 	int ret = CL_CLEAN;
8c2e0f32
 	int wasAlloced;
273cd2bb
 	struct scanlist *scanlist, *scanelem;
00615ec9
 
f60a8d41
 	if(dir == NULL) {
 		cli_warnmsg("cli_mbox called with NULL dir\n");
 		return CL_ENULLARG;
 	}
00615ec9
 	if(fstat(desc, &statb) < 0)
 		return CL_EOPEN;
 
 	size = statb.st_size;
 
 	if(size == 0)
 		return CL_CLEAN;
 
725bd592
 #ifdef	NW_MAX_FILE_SIZE
 	if(size > NW_MAX_FILE_SIZE)
5c86c162
 		return cli_parse_mbox(dir, desc, ctx);
725bd592
 #endif
00615ec9
 
f5eb4324
 	/*cli_warnmsg("NEW_WORLD is new code - use at your own risk.\n");*/
53d7f9e8
 #ifdef	PARTIAL_DIR
 	cli_warnmsg("PARTIAL_DIR doesn't work in the NEW_WORLD yet\n");
 #endif
0d9e07a9
 
273cd2bb
 	start = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, 0);
 	if(start == MAP_FAILED)
00615ec9
 		return CL_EMEM;
 
 	cli_dbgmsg("mmap'ed mbox\n");
 
273cd2bb
 	ptr = cli_malloc(size);
 	if(ptr) {
 		memcpy(ptr, start, size);
8c2e0f32
 		munmap(start, size);
273cd2bb
 		start = ptr;
725bd592
 		wasAlloced = 1;
8c2e0f32
 	} else
 		wasAlloced = 0;
 
725bd592
 	/* last points to the last *valid* address in the array */
 	last = &start[size - 1];
 
 	create_map(start, last);
 
273cd2bb
 	scanelem = scanlist = NULL;
 	q = start;
725bd592
 	/*
 	 * FIXME: mismatch of const char * and char * here and in later calls
 	 *	to find_in_map()
 	 */
 	while((p = find_in_map(q, "base64")) != NULL) {
273cd2bb
 		cli_dbgmsg("Found base64\n");
 		if(scanelem) {
 			scanelem->next = cli_malloc(sizeof(struct scanlist));
 			scanelem = scanelem->next;
 		} else
 			scanlist = scanelem = cli_malloc(sizeof(struct scanlist));
 		scanelem->next = NULL;
 		scanelem->decoder = BASE64;
 		q = scanelem->start = &p[6];
725bd592
 		if(((p = find_in_map(q, "\nFrom ")) != NULL) ||
 		   ((p = find_in_map(q, "base64")) != NULL) ||
 		   ((p = find_in_map(q, "quoted-printable")) != NULL)) {
64ff0d49
 			scanelem->size = (size_t)(p - q);
273cd2bb
 			q = p;
b1455eb2
 		} else {
 			scanelem->size = (size_t)(last - scanelem->start) + 1;
 			break;
 		}
8496a349
 		cli_dbgmsg("base64: last %u q %u\n", (unsigned int)last, (unsigned int)q);
273cd2bb
 		assert(scanelem->size <= size);
00615ec9
 	}
f5eb4324
 
273cd2bb
 	q = start;
725bd592
 	while((p = find_in_map(q, "quoted-printable")) != NULL) {
b1455eb2
 		if(p != q)
 			switch(p[-1]) {
 				case ' ':
 				case ':':
 				case '=':	/* wrong but allow it */
 					break;
 				default:
 					q = &p[16];
 					cli_dbgmsg("Ignore quoted-printable false positive\n");
 					continue;	/* false positive */
 			}
795d3afe
 
273cd2bb
 		cli_dbgmsg("Found quoted-printable\n");
f5eb4324
 #ifdef	notdef
 		/*
 		 * The problem with quoted printable is recognising when to stop
 		 * parsing
 		 */
273cd2bb
 		if(scanelem) {
 			scanelem->next = cli_malloc(sizeof(struct scanlist));
 			scanelem = scanelem->next;
 		} else
 			scanlist = scanelem = cli_malloc(sizeof(struct scanlist));
 		scanelem->next = NULL;
 		scanelem->decoder = QUOTEDPRINTABLE;
 		q = scanelem->start = &p[16];
8496a349
 		cli_dbgmsg("qp: last %u q %u\n", (unsigned int)last, (unsigned int)q);
725bd592
 		if(((p = find_in_map(q, "\nFrom ")) != NULL) ||
 		   ((p = find_in_map(q, "quoted-printable")) != NULL) ||
 		   ((p = find_in_map(q, "base64")) != NULL)) {
64ff0d49
 			scanelem->size = (size_t)(p - q);
273cd2bb
 			q = p;
b1455eb2
 			cli_dbgmsg("qp: scanelem->size = %u\n", scanelem->size);
 		} else {
 			scanelem->size = (size_t)(last - scanelem->start) + 1;
 			break;
 		}
273cd2bb
 		assert(scanelem->size <= size);
f5eb4324
 #else
 		if(wasAlloced)
 			free(start);
 		else
 			munmap(start, size);
 
725bd592
 		free_map();
5c86c162
 		return cli_parse_mbox(dir, desc, ctx);
f5eb4324
 #endif
00615ec9
 	}
 
273cd2bb
 	if(scanlist == NULL) {
 		const struct tableinit *tableinit;
 		bool anyHeadersFound = FALSE;
b1455eb2
 		bool hasuuencode = FALSE;
f5eb4324
 		cli_file_t type;
273cd2bb
 
 		/* FIXME: message: There could of course be no decoder needed... */
 		for(tableinit = rfc821headers; tableinit->key; tableinit++)
725bd592
 			if(find_in_map(start, tableinit->key)) {
273cd2bb
 				anyHeadersFound = TRUE;
 				break;
 			}
 
b4f31b38
 		if((!anyHeadersFound) &&
 		   ((p = find_in_map(start, "\nbegin ")) != NULL) &&
 		   (isuuencodebegin(++p)))
b1455eb2
 			/* uuencoded part */
 			hasuuencode = TRUE;
b4f31b38
 		else {
 			cli_dbgmsg("Nothing encoded, looking for a text part to save\n");
 			ret = save_text(ctx, dir, start, size);
 			if(wasAlloced)
 				free(start);
 			else
 				munmap(start, size);
 
 			free_map();
 			if(ret != CL_EFORMAT)
 				return ret;
 			ret = CL_CLEAN;
 		}
b1455eb2
 
725bd592
 		free_map();
 
f5eb4324
 		type = cli_filetype(start, size);
 
 		if((type == CL_TYPE_UNKNOWN_TEXT) &&
 		   (strncmp(start, "Microsoft Mail Internet Headers", 31) == 0))
725bd592
 			type = CL_TYPE_MAIL;
f5eb4324
 
8c2e0f32
 		if(wasAlloced)
 			free(start);
 		else
 			munmap(start, size);
00615ec9
 
b1455eb2
 		if(anyHeadersFound || hasuuencode) {
 			/* TODO: reduce the number of falls through here */
f5eb4324
 			if(hasuuencode)
b4f31b38
 				/* TODO: fast track visa */
 				cli_warnmsg("New world - fall back to old uudecoder\n");
f5eb4324
 			else
b4f31b38
 				cli_warnmsg("cli_mbox: unknown encoder, type %d\n", type);
f5eb4324
 			if(type == CL_TYPE_MAIL)
5c86c162
 				return cli_parse_mbox(dir, desc, ctx);
f5eb4324
 			cli_dbgmsg("Unknown filetype %d, return CLEAN\n", type);
 			return CL_CLEAN;
53ee0b60
 		}
b1455eb2
 
b4f31b38
 #if	0	/* I don't believe this is needed any more */
55d19b52
 		/*
 		 * The message could be a plain text phish
 		 * FIXME: Can't get to the option whether we are looking for
 		 *	phishes or not, so assume we are, this slows things a
 		 *	lot
 		 * Should be
 		 *	if((type == CL_TYPE_MAIL) && (!(no-phishing))
 		 */
 		if(type == CL_TYPE_MAIL)
5c86c162
 			return cli_parse_mbox(dir, desc, ctx);
b4f31b38
 #endif
55d19b52
 		cli_dbgmsg("cli_mbox: I believe it's plain text (type == %d) which must be clean\n",
 			type);
273cd2bb
 		return CL_CLEAN;
 	}
725bd592
 #if	0
 	if(wasAlloced) {
 		const char *max = NULL;
 
 		for(scanelem = scanlist; scanelem; scanelem = scanelem->next) {
 			const char *end = &scanelem->start[scanelem->size];
 
 			if(end > max)
 				max = end;
 		}
 
 		if(max < last)
 			printf("could free %d bytes\n", (int)(last - max));
 	}
 #endif
273cd2bb
 
 	for(scanelem = scanlist; scanelem; scanelem = scanelem->next) {
 		if(scanelem->decoder == BASE64) {
725bd592
 			const char *b64start = scanelem->start;
 			size_t b64size = scanelem->size;
273cd2bb
 
 			cli_dbgmsg("b64size = %lu\n", b64size);
b917322c
 			while((*b64start != '\n') && (*b64start != '\r')) {
53ee0b60
 				b64start++;
 				b64size--;
273cd2bb
 			}
 			/*
 			 * Look for the end of the headers
 			 */
 			while(b64start < last) {
 				if(*b64start == ';') {
53ee0b60
 					b64start++;
 					b64size--;
b917322c
 				} else if((memcmp(b64start, "\n\n", 2) == 0) ||
 					  (memcmp(b64start, "\r\r", 2) == 0)) {
 					b64start += 2;
 					b64size -= 2;
 					break;
 				} else if(memcmp(b64start, "\r\n\r\n", 4) == 0) {
 					b64start += 4;
 					b64size -= 4;
 					break;
f5eb4324
 				} else if(memcmp(b64start, "\n \n", 3) == 0) {
 					/*
 					 * Some viruses are broken and have
 					 * one space character at the end of
 					 * the headers
 					 */
 					b64start += 3;
 					b64size -= 3;
 					break;
 				} else if(memcmp(b64start, "\r\n \r\n", 5) == 0) {
 					/*
 					 * Some viruses are broken and have
 					 * one space character at the end of
 					 * the headers
 					 */
 					b64start += 5;
 					b64size -= 5;
 					break;
53ee0b60
 				}
6c4485f9
 				b64start++;
273cd2bb
 				b64size--;
6c4485f9
 			}
53ee0b60
 
273cd2bb
 			if(b64size > 0L)
b1455eb2
 				while((!isalnum(*b64start)) && (*b64start != '/')) {
273cd2bb
 					if(b64size-- == 0L)
 						break;
 					b64start++;
 				}
 
 			if(b64size > 0L) {
56c1b4ff
 				int lastline;
c8bc44d6
 				char *tmpfilename;
 				unsigned char *uptr;
 
273cd2bb
 				cli_dbgmsg("cli_mbox: decoding %ld base64 bytes\n", b64size);
56c1b4ff
 				if((fb = fileblobCreate()) == NULL) {
b4f31b38
 					free_map();
c8bc44d6
 					if(wasAlloced)
 						free(start);
 					else
 						munmap(start, size);
 
 					return CL_EMEM;
 				}
 
56c1b4ff
 				tmpfilename = cli_gentemp(dir);
b4f31b38
 				if(tmpfilename == NULL) {
 					free_map();
c8bc44d6
 					if(wasAlloced)
 						free(start);
 					else
 						munmap(start, size);
56c1b4ff
 					fileblobDestroy(fb);
c8bc44d6
 
56c1b4ff
 					return CL_EMEM;
c8bc44d6
 				}
56c1b4ff
 				fileblobSetFilename(fb, dir, tmpfilename);
 				free(tmpfilename);
c8bc44d6
 
273cd2bb
 				line = NULL;
53ee0b60
 
273cd2bb
 				m = messageCreate();
f5eb4324
 				if(m == NULL) {
b4f31b38
 					free_map();
f5eb4324
 					if(wasAlloced)
 						free(start);
 					else
 						munmap(start, size);
56c1b4ff
 					fileblobDestroy(fb);
f5eb4324
 
273cd2bb
 					return CL_EMEM;
f5eb4324
 				}
273cd2bb
 				messageSetEncoding(m, "base64");
53ee0b60
 
b65d2aad
 				messageSetCTX(m, ctx);
 				fileblobSetCTX(fb, ctx);
 
b917322c
 				lastline = 0;
b1455eb2
 				do {
c8bc44d6
 					int length = 0, datalen;
b917322c
 					char *newline, *equal;
c8bc44d6
 					unsigned char *bigbuf, *data;
 					unsigned char smallbuf[1024];
55d19b52
 					const char *cptr;
53ee0b60
 
273cd2bb
 					/*printf("%ld: ", b64size); fflush(stdout);*/
53ee0b60
 
55d19b52
 					for(cptr = b64start; b64size && (*cptr != '\n') && (*cptr != '\r'); cptr++) {
273cd2bb
 						length++;
 						--b64size;
 					}
53ee0b60
 
273cd2bb
 					/*printf("%d: ", length); fflush(stdout);*/
53ee0b60
 
beb70346
 					newline = cli_realloc(line, length + 1);
 					if(newline == NULL)
 						break;
 					line = newline;
53ee0b60
 
273cd2bb
 					memcpy(line, b64start, length);
 					line[length] = '\0';
53ee0b60
 
b917322c
 					equal = strchr(line, '=');
 					if(equal) {
 						lastline++;
 						*equal = '\0';
 					}
273cd2bb
 					/*puts(line);*/
53ee0b60
 
c8bc44d6
 #if	0
273cd2bb
 					if(messageAddStr(m, line) < 0)
 						break;
c8bc44d6
 #endif
56c1b4ff
 					if(length >= (int)sizeof(smallbuf)) {
c8bc44d6
 						datalen = length + 2;
 						data = bigbuf = cli_malloc(datalen);
 						if(data == NULL)
 							break;
 					} else {
 						bigbuf = NULL;
 						data = smallbuf;
 						datalen = sizeof(data) - 1;
 					}
 					uptr = decodeLine(m, BASE64, line, data, datalen);
 
 					if(uptr == NULL) {
 						if(bigbuf)
 							free(bigbuf);
 						break;
 					}
 					/*cli_dbgmsg("base64: write %u bytes\n", (size_t)(uptr - data));*/
56c1b4ff
 					datalen = fileblobAddData(fb, data, (size_t)(uptr - data));
c8bc44d6
 					if(bigbuf)
 						free(bigbuf);
53ee0b60
 
56c1b4ff
 					if(datalen < 0)
 						break;
ea916d6a
 					if(fileblobContainsVirus(fb))
a4b13e7a
 						break;
56c1b4ff
 
55d19b52
 					if((b64size > 0) && (*cptr == '\r')) {
 						b64start = ++cptr;
b917322c
 						--b64size;
 					}
55d19b52
 					if((b64size > 0) && (*cptr == '\n')) {
 						b64start = ++cptr;
273cd2bb
 						--b64size;
 					}
b917322c
 					if(lastline)
273cd2bb
 						break;
b1455eb2
 				} while(b64size > 0L);
 
c8bc44d6
 				if(m->base64chars) {
 					unsigned char data[4];
 
 					uptr = base64Flush(m, data);
 					if(uptr) {
 						/*cli_dbgmsg("base64: flush %u bytes\n", (size_t)(uptr - data));*/
56c1b4ff
 						(void)fileblobAddData(fb, data, (size_t)(uptr - data));
c8bc44d6
 					}
 				}
56c1b4ff
 				if(fb)
 					fileblobDestroy(fb);
 				else
ea916d6a
 					ret = -1;
c8bc44d6
 
 				messageDestroy(m);
 				free(line);
00615ec9
 			}
273cd2bb
 		} else if(scanelem->decoder == QUOTEDPRINTABLE) {
725bd592
 			const char *quotedstart = scanelem->start;
 			size_t quotedsize = scanelem->size;
53ee0b60
 
273cd2bb
 			cli_dbgmsg("quotedsize = %lu\n", quotedsize);
 			while(*quotedstart != '\n') {
53ee0b60
 				quotedstart++;
 				quotedsize--;
273cd2bb
 			}
 			/*
 			 * Look for the end of the headers
 			 */
 			while(quotedstart < last) {
 				if(*quotedstart == ';') {
53ee0b60
 					quotedstart++;
 					quotedsize--;
b917322c
 				} else if((*quotedstart == '\n') || (*quotedstart == '\r')) {
273cd2bb
 					quotedstart++;
 					quotedsize--;
 					if((*quotedstart == '\n') || (*quotedstart == '\r')) {
 						quotedstart++;
 						quotedsize--;
 						break;
 					}
53ee0b60
 				}
273cd2bb
 				quotedstart++;
 				quotedsize--;
53ee0b60
 			}
00615ec9
 
273cd2bb
 			while(!isalnum(*quotedstart)) {
 				quotedstart++;
 				quotedsize--;
 			}
00615ec9
 
273cd2bb
 			if(quotedsize > 0L) {
 				cli_dbgmsg("cli_mbox: decoding %ld quoted-printable bytes\n", quotedsize);
00615ec9
 
273cd2bb
 				m = messageCreate();
f5eb4324
 				if(m == NULL) {
b4f31b38
 					free_map();
f5eb4324
 					if(wasAlloced)
 						free(start);
 					else
 						munmap(start, size);
 
273cd2bb
 					return CL_EMEM;
f5eb4324
 				}
273cd2bb
 				messageSetEncoding(m, "quoted-printable");
b65d2aad
 				messageSetCTX(m, ctx);
00615ec9
 
273cd2bb
 				line = NULL;
00615ec9
 
b1455eb2
 				do {
273cd2bb
 					int length = 0;
beb70346
 					char *newline;
55d19b52
 					const char *cptr;
00615ec9
 
273cd2bb
 					/*printf("%ld: ", quotedsize); fflush(stdout);*/
00615ec9
 
55d19b52
 					for(cptr = quotedstart; quotedsize && (*cptr != '\n') && (*cptr != '\r'); cptr++) {
273cd2bb
 						length++;
 						--quotedsize;
 					}
00615ec9
 
273cd2bb
 					/*printf("%d: ", length); fflush(stdout);*/
00615ec9
 
beb70346
 					newline = cli_realloc(line, length + 1);
 					if(newline == NULL)
 						break;
 					line = newline;
00615ec9
 
273cd2bb
 					memcpy(line, quotedstart, length);
 					line[length] = '\0';
00615ec9
 
273cd2bb
 					/*puts(line);*/
00615ec9
 
273cd2bb
 					if(messageAddStr(m, line) < 0)
 						break;
53ee0b60
 
55d19b52
 					if((quotedsize > 0) && (*cptr == '\r')) {
 						quotedstart = ++cptr;
b917322c
 						--quotedsize;
 					}
55d19b52
 					if((quotedsize > 0) && (*cptr == '\n')) {
 						quotedstart = ++cptr;
273cd2bb
 						--quotedsize;
 					}
b1455eb2
 				} while(quotedsize > 0L);
 
273cd2bb
 				free(line);
985cc85e
 				fb = messageToFileblob(m, dir, 1);
273cd2bb
 				messageDestroy(m);
53ee0b60
 
ea916d6a
 				if(fb)
273cd2bb
 					fileblobDestroy(fb);
ea916d6a
 				else
 					ret = -1;
273cd2bb
 			}
00615ec9
 		}
 	}
273cd2bb
 	scanelem = scanlist;
 
b4f31b38
 	/*
 	 * There could be a phish in the plain text part, so save that
 	 * FIXME: Can't get to the option whether we are looking for
 	 *	phishes or not, so assume we are, this slows things a
 	 *	lot
 	 * Should be
 	 *	if((type == CL_TYPE_MAIL) && (!(no-phishing))
 	 */
 	ret = save_text(ctx, dir, start, size);
 
 	free_map();
 
273cd2bb
 	while(scanelem) {
 		struct scanlist *n = scanelem->next;
 
 		free(scanelem);
 		scanelem = n;
 	}
00615ec9
 
8c2e0f32
 	if(wasAlloced)
 		free(start);
 	else
 		munmap(start, size);
00615ec9
 
8c2e0f32
 	/*
 	 * FIXME: Need to run cl_scandir() here and return that value
 	 */
c8bc44d6
 	cli_dbgmsg("cli_mbox: ret = %d\n", ret);
b4f31b38
 	if(ret != CL_EFORMAT)
 		return ret;
53ee0b60
 
b4f31b38
 	cli_warnmsg("New world - don't know what to do - fall back to old world\n");
273cd2bb
 	/* Fall back for now */
b1455eb2
 	lseek(desc, 0L, SEEK_SET);
5c86c162
 	return cli_parse_mbox(dir, desc, ctx);
00615ec9
 }
725bd592
 
b4f31b38
 /*
  * Save a text part - it could contain phish or jscript
  */
 static int
 save_text(cli_ctx *ctx, const char *dir, const char *start, size_t len)
 {
 	const char *p;
 
 	if((p = find_in_map(start, "\n\n")) || (p = find_in_map(start, "\r\n\r\n"))) {
 		const char *q;
 		fileblob *fb;
 		char *tmpfilename;
 
 		if(((q = find_in_map(start, "base64")) == NULL) &&
 		   ((q = find_in_map(start, "quoted_printable")) == NULL)) {
 			cli_dbgmsg("It's all plain text!\n");
 			if(*p == '\r')
 				p += 4;
 			else
 				p += 2;
 			len -= (p - start);
 		} else if(((q = find_in_map(p, "\nFrom ")) == NULL) &&
 		   ((q = find_in_map(p, "base64")) == NULL) &&
 		   ((q = find_in_map(p, "quoted-printable")) == NULL))
 			cli_dbgmsg("Can't find end of plain text - assume it's all\n");
 		else
 			len = (size_t)(q - p);
 
 		if(len < 5) {
 			cli_dbgmsg("save_text: Too small\n");
 			return CL_EFORMAT;
 		}
 		if(ctx->scanned)
 			*ctx->scanned += len / CL_COUNT_PRECISION;
 
 		/*
 		 * This doesn't work, cli_scanbuff isn't designed to be used
 		 *	in this way. It gets the "filetype" wrong and then
 		 *	doesn't scan correctly
 		 */
b9cd4bef
 		if(cli_scanbuff((char *)p, len, ctx->virname, ctx->engine, CL_TYPE_UNKNOWN_DATA) == CL_VIRUS) {
b4f31b38
 			cli_dbgmsg("save_text: found %s\n", *ctx->virname);
 			return CL_VIRUS;
 		}
 
 		fb = fileblobCreate();
 		if(fb == NULL)
 			return CL_EMEM;
 
 		tmpfilename = cli_gentemp(dir);
 
 		if(tmpfilename == NULL) {
 			fileblobDestroy(fb);
 			return CL_ETMPFILE;
 		}
 		cli_dbgmsg("save plain bit to %s, %u bytes\n",
 			tmpfilename, len);
 
 		fileblobSetFilename(fb, dir, tmpfilename);
 		free(tmpfilename);
 
 		(void)fileblobAddData(fb, (const unsigned char *)p, len);
 		fileblobDestroy(fb);
 		return CL_SUCCESS;
 	}
 	cli_dbgmsg("No text part found to save\n");
 	return CL_EFORMAT;
 }
 
725bd592
 static void
 create_map(const char *begin, const char *end)
 {
 	const struct wordlist {
 		const char *word;
 		int len;
 	} wordlist[] = {
 		{	"base64",		6	},
 		{	"quoted-printable",	16	},
 		{	"\nbegin ",		7	},
b4f31b38
 		{	"\nFrom ",		6	},
 		{	"\n\n",			2	},
 		{	"\r\n\r\n",		4	},
725bd592
 		{	NULL,			0	}
 	};
 
 	if(map) {
 		cli_warnmsg("create_map called without free_map\n");
 		free_map();
 	}
 	while(begin < end) {
 		const struct wordlist *word;
 
 		for(word = wordlist; word->word; word++) {
 			if((end - begin) < word->len)
 				continue;
 			if(strncasecmp(begin, word->word, word->len) == 0) {
 				add_to_map(begin, word->word);
 				break;
 			}
 		}
 		begin++;
 	}
 }
 
 /* To sort map, assume 'offset' is presented in sorted order */
 static void
 add_to_map(const char *offset, const char *word)
 {
 	if(map) {
 		tail->next = cli_malloc(sizeof(struct map));	/* FIXME: verify */
 		tail = tail->next;
 	} else
 		map = tail = cli_malloc(sizeof(struct map));	/* FIXME: verify */
 
 	tail->offset = offset;
 	tail->word = word;
 	tail->next = NULL;
 }
 
 static const char *
 find_in_map(const char *offset, const char *word)
 {
 	const struct map *item;
 
 	for(item = map; item; item = item->next)
 		if(item->offset >= offset)
 			if(strcasecmp(word, item->word) == 0)
 				return item->offset;
 
 	return NULL;
 }
 
 static void
 free_map(void)
 {
 	while(map) {
 		struct map *next = map->next;
 
 		free(map);
 		map = next;
 	}
 	map = NULL;
 }
 
 #else	/*!NEW_WORLD*/
00615ec9
 int
5c86c162
 cli_mbox(const char *dir, int desc, cli_ctx *ctx)
00615ec9
 {
f60a8d41
 	if(dir == NULL) {
 		cli_warnmsg("cli_mbox called with NULL dir\n");
 		return CL_ENULLARG;
 	}
5c86c162
 	return cli_parse_mbox(dir, desc, ctx);
00615ec9
 }
 #endif
 
b151ef55
 /*
  * TODO: when signal handling is added, need to remove temp files when a
7d3d11d0
  *	signal is received
b151ef55
  * TODO: add option to scan in memory not via temp files, perhaps with a
74b5c349
  * named pipe or memory mapped file, though this won't work on big e-mails
  * containing many levels of encapsulated messages - it'd just take too much
  * RAM
c6259ac5
  * TODO: parse .msg format files
fdc8a467
  * TODO: fully handle AppleDouble format, see
7d3d11d0
  *	http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
f54a8635
  * TODO: ensure parseEmailHeaders is always called before parseEmailBody
  * TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody
eddb5dbb
  * TODO: Handle unepected NUL bytes in header lines which stop strcmp()s:
  *	e.g. \0Content-Type: application/binary;
b151ef55
  */
00615ec9
 static int
5c86c162
 cli_parse_mbox(const char *dir, int desc, cli_ctx *ctx)
b151ef55
 {
c6259ac5
 	int retcode, i;
82933497
 	message *body;
b151ef55
 	FILE *fd;
2683ac8b
 	char buffer[RFC2821LENGTH + 1];
d9bde711
 	mbox_ctx mctx;
0e3b08fc
 #ifdef HAVE_BACKTRACE
98685ac1
 	void (*segv)(int);
 #endif
49674596
 	static table_t *rfc821, *subtype;
11466e82
 #ifdef	CL_DEBUG
 	char tmpfilename[16];
 	int tmpfd;
 #endif
e1bbfed7
 #if	defined(FOLLOWURLS) && (!defined(CL_EXPERIMENTAL))
12cc3b41
 	static int initialised = 0;
 #ifdef	CL_THREAD_SAFE
 	static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
 #endif
 #endif
b151ef55
 
8ba16c7b
 #ifdef	NEW_WORLD
 	cli_dbgmsg("fall back to old world\n");
 #else
b151ef55
 	cli_dbgmsg("in mbox()\n");
8ba16c7b
 #endif
b151ef55
 
e1bbfed7
 #if	defined(FOLLOWURLS) && (!defined(CL_EXPERIMENTAL))
12cc3b41
 	if(ctx->options&CL_SCAN_MAILURL) {
 #ifdef	CL_THREAD_SAFE
 		pthread_mutex_lock(&init_mutex);
 #endif
 		if(!initialised) {
 			if(curl_global_init(CURL_GLOBAL_ALL) != 0) {
 #ifdef	CL_THREAD_SAFE
 				pthread_mutex_unlock(&init_mutex);
 #endif
 				cli_warnmsg("curl_global_init failed, disabling mail-follow-urls");
 				ctx->options &= ~CL_SCAN_MAILURL;
 			}
 			initialised = 1;
 		}
 #ifdef	CL_THREAD_SAFE
 		pthread_mutex_unlock(&init_mutex);
 #endif
 	}
 #endif
 
c6259ac5
 	i = dup(desc);
 	if((fd = fdopen(i, "rb")) == NULL) {
 		cli_errmsg("Can't open descriptor %d\n", desc);
 		close(i);
7d3d11d0
 		return CL_EOPEN;
c6259ac5
 	}
b1a3a05b
 	rewind(fd);	/* bug 240 */
11466e82
 #ifdef	CL_DEBUG
 	/*
 	 * Copy the incoming mail for debugging, so that if it falls over
 	 * we have a copy of the offending email. This is debugging code
 	 * that you shouldn't of course install in a live environment. I am
 	 * not interested in hearing about security issues with this section
 	 * of the parser.
 	 */
 	strcpy(tmpfilename, "/tmp/mboxXXXXXX");
 	tmpfd = mkstemp(tmpfilename);
 	if(tmpfd < 0) {
 		perror(tmpfilename);
 		cli_errmsg("Can't make debugging file\n");
 	} else {
 		FILE *tmpfp = fdopen(tmpfd, "w");
 
 		if(tmpfp) {
 			while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL)
 				fputs(buffer, tmpfp);
 			fclose(tmpfp);
 			rewind(fd);
 		} else
 			cli_errmsg("Can't fdopen debugging file\n");
 	}
 #endif
802c37fc
 	if(fgets(buffer, sizeof(buffer) - 1, fd) == NULL) {
c6259ac5
 		/* empty message */
 		fclose(fd);
11466e82
 #ifdef	CL_DEBUG
 		unlink(tmpfilename);
 #endif
7d3d11d0
 		return CL_CLEAN;
c6259ac5
 	}
8a88fb93
 #ifdef	CL_THREAD_SAFE
 	pthread_mutex_lock(&tables_mutex);
 #endif
49674596
 	if(rfc821 == NULL) {
 		assert(subtype == NULL);
7b8fb055
 
49674596
 		if(initialiseTables(&rfc821, &subtype) < 0) {
 			rfc821 = NULL;
 			subtype = NULL;
8a88fb93
 #ifdef	CL_THREAD_SAFE
 			pthread_mutex_unlock(&tables_mutex);
 #endif
7b8fb055
 			fclose(fd);
11466e82
 #ifdef	CL_DEBUG
 			unlink(tmpfilename);
 #endif
7d3d11d0
 			return CL_EMEM;
7b8fb055
 		}
b151ef55
 	}
8a88fb93
 #ifdef	CL_THREAD_SAFE
 	pthread_mutex_unlock(&tables_mutex);
 #endif
b151ef55
 
89e9a596
 #ifdef HAVE_BACKTRACE
98685ac1
 	segv = signal(SIGSEGV, sigsegv);
 #endif
 
b65d2aad
 	retcode = CL_SUCCESS;
301d2cc0
 	body = NULL;
 
d9bde711
 	mctx.dir = dir;
 	mctx.rfc821Table = rfc821;
 	mctx.subtypeTable = subtype;
 	mctx.ctx = ctx;
 
f54a8635
 	/*
3edf8bda
 	 * Is it a UNIX style mbox with more than one
f54a8635
 	 * mail message, or just a single mail message?
3edf8bda
 	 *
 	 * TODO: It would be better if we called cli_scandir here rather than
 	 * in cli_scanmail. Then we could improve the way mailboxes with more
 	 * than one message is handled, e.g. stopping parsing when an infected
 	 * message is stopped, and giving a better indication of which message
 	 * within the mailbox is infected
f54a8635
 	 */
e39c0901
 	/*if((strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {*/
 	if(strncmp(buffer, "From ", 5) == 0) {
b151ef55
 		/*
c6259ac5
 		 * Have been asked to check a UNIX style mbox file, which
 		 * may contain more than one e-mail message to decode
53ee0b60
 		 *
 		 * It would be far better for scanners.c to do this splitting
 		 * and do this
 		 *	FOR EACH mail in the mailbox
 		 *	DO
 		 *		pass this mail to cli_mbox --
 		 *		scan this file
 		 *		IF this file has a virus quit
 		 *		THEN
 		 *			return CL_VIRUS
 		 *		FI
 		 *	END
 		 * This would remove a problem with this code that it can
 		 * fill up the tmp directory before it starts scanning
b151ef55
 		 */
82933497
 		bool lastLineWasEmpty;
 		int messagenumber;
 		message *m = messageCreate();
 
 		if(m == NULL) {
 			fclose(fd);
 #ifdef HAVE_BACKTRACE
 			signal(SIGSEGV, segv);
 #endif
11466e82
 #ifdef	CL_DEBUG
 			unlink(tmpfilename);
 #endif
82933497
 			return CL_EMEM;
 		}
 
 		lastLineWasEmpty = FALSE;
 		messagenumber = 1;
b65d2aad
 		messageSetCTX(m, ctx);
b151ef55
 
c6259ac5
 		do {
f54a8635
 			cli_chomp(buffer);
e39c0901
 			/*if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {*/
 			if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) {
bf497d0a
 				cli_dbgmsg("Deal with email number %d\n", messagenumber++);
b151ef55
 				/*
f54a8635
 				 * End of a message in the mail box
b151ef55
 				 */
de617e3e
 				body = parseEmailHeaders(m, rfc821);
f2b068fb
 				if(body == NULL) {
 					messageReset(m);
 					continue;
 				}
b65d2aad
 				messageSetCTX(body, ctx);
f54a8635
 				messageDestroy(m);
b65d2aad
 				if(messageGetBody(body)) {
2345b4cd
 					mbox_status rc = parseEmailBody(body, NULL, &mctx, 0);
 					if(rc == FAIL) {
09ccd6e0
 						messageReset(body);
 						m = body;
 						continue;
2345b4cd
 					} else if(rc == VIRUS) {
b65d2aad
 						cli_dbgmsg("Message number %d is infected\n",
 							messagenumber);
 						retcode = CL_VIRUS;
3295a626
 						m = NULL;
b65d2aad
 						break;
09ccd6e0
 					}
b65d2aad
 				}
b151ef55
 				/*
f54a8635
 				 * Starting a new message, throw away all the
00615ec9
 				 * information about the old one. It would
 				 * be best to be able to scan this message
 				 * now, but cli_scanfile needs arguments
 				 * that haven't been passed here so it can't be
 				 * called
b151ef55
 				 */
f54a8635
 				m = body;
 				messageReset(body);
b65d2aad
 				messageSetCTX(body, ctx);
b151ef55
 
c6259ac5
 				cli_dbgmsg("Finished processing message\n");
f54a8635
 			} else
a66ca28a
 				lastLineWasEmpty = (bool)(buffer[0] == '\0');
d1a6ea81
 
8f1f6383
 			if(isuuencodebegin(buffer)) {
64ff0d49
 				/*
d1a6ea81
 				 * Fast track visa to uudecode.
 				 * TODO: binhex, yenc
 				 */
165d8543
 				if(uudecodeFile(m, buffer, dir, fd) < 0)
8f1f6383
 					if(messageAddStr(m, buffer) < 0)
 						break;
 			} else
d1a6ea81
 				if(messageAddStr(m, buffer) < 0)
 					break;
802c37fc
 		} while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL);
bf497d0a
 
82933497
 		fclose(fd);
 
b65d2aad
 		if(retcode == CL_SUCCESS) {
 			cli_dbgmsg("Extract attachments from email %d\n", messagenumber);
 			body = parseEmailHeaders(m, rfc821);
 		}
 		if(m)
 			messageDestroy(m);
f2b068fb
 	} else {
7fca6080
 		/*
 		 * It's a single message, parse the headers then the body
f2b068fb
 		 */
f1c33aa0
 		if(strncmp(buffer, "P I ", 4) == 0)
 			/*
 			 * CommuniGate Pro format: ignore headers until
 			 * blank line
 			 */
802c37fc
 			while((fgets(buffer, sizeof(buffer) - 1, fd) != NULL) &&
f1c33aa0
 				(strchr("\r\n", buffer[0]) == NULL))
 					;
 		/*
 		 * Ignore any blank lines at the top of the message
 		 */
f2b068fb
 		while(strchr("\r\n", buffer[0]) &&
b7afd2bf
 		     (getline_from_mbox(buffer, sizeof(buffer) - 1, fd) != NULL))
6b93ea0c
 			;
 
a78256af
 		buffer[sizeof(buffer) - 1] = '\0';
802c37fc
 
b3a5cdd8
 		body = parseEmailFile(fd, rfc821, buffer, dir);
82933497
 		fclose(fd);
f2b068fb
 	}
7fca6080
 
f2b068fb
 	if(body) {
 		/*
 		 * Write out the last entry in the mailbox
 		 */
b65d2aad
 		if((retcode == CL_SUCCESS) && messageGetBody(body)) {
 			messageSetCTX(body, ctx);
ddd53493
 			switch(parseEmailBody(body, NULL, &mctx, 0)) {
2345b4cd
 				case FAIL:
b65d2aad
 					retcode = CL_EFORMAT;
 					break;
2345b4cd
 				case VIRUS:
b65d2aad
 					retcode = CL_VIRUS;
 					break;
 			}
 		}
b151ef55
 
f2b068fb
 		/*
 		 * Tidy up and quit
 		 */
 		messageDestroy(body);
 	}
b151ef55
 
 	cli_dbgmsg("cli_mbox returning %d\n", retcode);
 
89e9a596
 #ifdef HAVE_BACKTRACE
98685ac1
 	signal(SIGSEGV, segv);
 #endif
 
11466e82
 #ifdef	CL_DEBUG
 	unlink(tmpfilename);
 #endif
b151ef55
 	return retcode;
 }
 
 /*
82933497
  * Read in an email message from fin, parse it, and return the message
7fca6080
  *
82933497
  * FIXME: files full of new lines and nothing else are
  * handled ungracefully...
  */
 static message *
b3a5cdd8
 parseEmailFile(FILE *fin, const table_t *rfc821, const char *firstLine, const char *dir)
82933497
 {
 	bool inHeader = TRUE;
c5c3b7c7
 	bool bodyIsEmpty = TRUE;
2c405268
 	bool lastWasBlank = FALSE, lastBodyLineWasBlank = FALSE;
82933497
 	message *ret;
 	bool anyHeadersFound = FALSE;
 	int commandNumber = -1;
7e0afd07
 	char *fullline = NULL, *boundary = NULL;
82933497
 	size_t fulllinelength = 0;
2683ac8b
 	char buffer[RFC2821LENGTH + 1];
82933497
 
 	cli_dbgmsg("parseEmailFile\n");
 
 	ret = messageCreate();
 	if(ret == NULL)
 		return NULL;
 
 	strcpy(buffer, firstLine);
 	do {
dd2131c9
 		const char *line;
82933497
 
 		(void)cli_chomp(buffer);
 
dd2131c9
 		if(buffer[0] == '\0')
795d3afe
 			line = NULL;
dd2131c9
 		else
 			line = buffer;
82933497
 
 		/*
 		 * Don't blank lines which are only spaces from headers,
 		 * otherwise they'll be treated as the end of header marker
 		 */
7e0afd07
 		if(lastWasBlank) {
 			lastWasBlank = FALSE;
 			if(boundaryStart(buffer, boundary)) {
 				cli_dbgmsg("Found a header line with space that should be blank\n");
 				inHeader = FALSE;
 			}
 		}
82933497
 		if(inHeader) {
a6d794c7
 			cli_dbgmsg("parseEmailFile: check '%s' fullline %p\n",
 				buffer ? buffer : "", fullline);
795d3afe
 			if(line && isspace(line[0])) {
7e0afd07
 				char copy[sizeof(buffer)];
 
 				strcpy(copy, buffer);
 				strstrip(copy);
 				if(copy[0] == '\0') {
 					/*
097d9527
 					 * The header line contains only white
 					 * space. This is not the end of the
 					 * headers according to RFC2822, but
 					 * some MUAs will handle it as though
 					 * it were, and virus writers exploit
 					 * this bug. We can't just break from
 					 * the loop here since that would allow
 					 * other exploits such as inserting a
 					 * white space line before the
 					 * content-type line. So we just have
 					 * to make a best guess. Sigh.
7e0afd07
 					 */
 					if(fullline) {
 						if(parseEmailHeader(ret, fullline, rfc821) < 0)
 							continue;
 
 						free(fullline);
 						fullline = NULL;
 					}
67fac7ee
 					if(boundary ||
 					   ((boundary = (char *)messageFindArgument(ret, "boundary")) != NULL)) {
7e0afd07
 						lastWasBlank = TRUE;
 						continue;
 					}
 				}
 			}
795d3afe
 			if((line == NULL) && (fullline == NULL)) {	/* empty line */
a6d794c7
 				/*
 				 * A blank line signifies the end of
 				 * the header and the start of the text
 				 */
 				if(!anyHeadersFound)
 					/* Ignore the junk at the top */
 					continue;
0c9750fa
 
a6d794c7
 				cli_dbgmsg("End of header information\n");
 				inHeader = FALSE;
 				bodyIsEmpty = TRUE;
82933497
 			} else {
 				char *ptr;
cdfc0f05
 				int lookahead;
82933497
 
 				if(fullline == NULL) {
2683ac8b
 					char cmd[RFC2821LENGTH + 1], out[RFC2821LENGTH + 1];
82933497
 
 					/*
 					 * Continuation of line we're ignoring?
 					 */
a6d794c7
 					if(isblank(line[0]))
82933497
 						continue;
 
 					/*
 					 * Is this a header we're interested in?
 					 */
795d3afe
 					if((strchr(line, ':') == NULL) ||
 					   (cli_strtokbuf(line, 0, ":", cmd) == NULL)) {
 						if(strncmp(line, "From ", 5) == 0)
82933497
 							anyHeadersFound = TRUE;
 						continue;
 					}
 
273cd2bb
 					ptr = rfc822comments(cmd, out);
82933497
 					commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
 
 					switch(commandNumber) {
 						case CONTENT_TRANSFER_ENCODING:
 						case CONTENT_DISPOSITION:
 						case CONTENT_TYPE:
 							anyHeadersFound = TRUE;
 							break;
 						default:
 							if(!anyHeadersFound)
 								anyHeadersFound = usefulHeader(commandNumber, cmd);
 							continue;
 					}
4db74788
 					fullline = cli_strdup(line);
795d3afe
 					fulllinelength = strlen(line) + 1;
 				} else if(line != NULL) {
 					fulllinelength += strlen(line);
beb70346
 					ptr = cli_realloc(fullline, fulllinelength);
 					if(ptr == NULL)
 						continue;
 					fullline = ptr;
795d3afe
 					strcat(fullline, line);
82933497
 				}
 
 				assert(fullline != NULL);
 
 				lookahead = getc(fin);
 				if(lookahead != EOF) {
 					ungetc(lookahead, fin);
 
 					/*
 					 * Section B.2 of RFC822 says TAB or
 					 * SPACE means a continuation of the
 					 * previous entry.
 					 *
 					 * Add all the arguments on the line
 					 */
eddb5dbb
 					if(isblank(lookahead))
82933497
 						continue;
 				}
 
14ed0d40
 				/*
 				 * Handle broken headers, where the next
 				 * line isn't indented by whitespace
 				 */
 				if(fullline[fulllinelength - 2] == ';')
 					/* Add arguments to this line */
 					continue;
 
dd2131c9
 				if(line && (count_quotes(fullline) & 1))
 					continue;
82933497
 
273cd2bb
 				ptr = rfc822comments(fullline, NULL);
82933497
 				if(ptr) {
 					free(fullline);
 					fullline = ptr;
 				}
 
 				if(parseEmailHeader(ret, fullline, rfc821) < 0)
 					continue;
 
 				free(fullline);
 				fullline = NULL;
 			}
8f1f6383
 		} else if(line && isuuencodebegin(line)) {
b3a5cdd8
 			/*
 			 * Fast track visa to uudecode.
 			 * TODO: binhex, yenc
 			 */
c5c3b7c7
 			bodyIsEmpty = FALSE;
165d8543
 			if(uudecodeFile(ret, line, dir, fin) < 0)
8f1f6383
 				if(messageAddStr(ret, line) < 0)
 					break;
2c405268
 		} else {
 			if(line == NULL) {
7133ee9d
 				/*
 				 * Although this would save time and RAM, some
 				 * phish signatures have been built which need
 				 * the blank lines
 				 */
 				if(lastBodyLineWasBlank &&
 				  (messageGetMimeType(ret) != TEXT)) {
2c405268
 					cli_dbgmsg("Ignoring consecutive blank lines in the body\n");
 					continue;
 				}
 				lastBodyLineWasBlank = TRUE;
c5c3b7c7
 			} else {
 				if(bodyIsEmpty) {
 					/*
 					 * Broken message: new line in the
 					 * middle of the headers, so the first
 					 * line of the body is in fact
 					 * the last lines of the header
 					 */
4db74788
 					if(newline_in_header(line))
c5c3b7c7
 						continue;
4db74788
 					bodyIsEmpty = FALSE;
c5c3b7c7
 				}
2c405268
 				lastBodyLineWasBlank = FALSE;
c5c3b7c7
 			}
2c405268
 
795d3afe
 			if(messageAddStr(ret, line) < 0)
82933497
 				break;
2c405268
 		}
b7afd2bf
 	} while(getline_from_mbox(buffer, sizeof(buffer) - 1, fin) != NULL);
82933497
 
67fac7ee
 	if(boundary)
 		free(boundary);
 
82933497
 	if(fullline) {
 		if(*fullline) switch(commandNumber) {
 			case CONTENT_TRANSFER_ENCODING:
 			case CONTENT_DISPOSITION:
 			case CONTENT_TYPE:
ed92b9c2
 				cli_dbgmsg("parseEmailFile: Fullline unparsed '%s'\n", fullline);
82933497
 		}
 		free(fullline);
 	}
 
 	if(!anyHeadersFound) {
 		/*
 		 * False positive in believing we have an e-mail when we don't
 		 */
 		messageDestroy(ret);
 		cli_dbgmsg("parseEmailFile: no headers found, assuming it isn't an email\n");
 		return NULL;
 	}
 
 	messageClean(ret);
 
 	cli_dbgmsg("parseEmailFile: return\n");
 
 	return ret;
 }
 
 /*
  * The given message contains a raw e-mail.
68be129f
  *
  * Returns the message's body with the correct arguments set
735377bc
  *
  * The downside of this approach is that for a short time we have two copies
  * of the message in memory, the upside is that it makes for easier parsing
  * of encapsulated messages, and in the long run uses less memory in those
  * scenarios
82933497
  *
  * TODO: remove the duplication with parseEmailFile
7fca6080
  */
68be129f
 static message *
985cc85e
 parseEmailHeaders(message *m, const table_t *rfc821)
7fca6080
 {
68be129f
 	bool inHeader = TRUE;
ed92b9c2
 	bool bodyIsEmpty = TRUE;
de617e3e
 	const text *t;
f54a8635
 	message *ret;
f2b068fb
 	bool anyHeadersFound = FALSE;
4fc38d69
 	int commandNumber = -1;
d768ac5a
 	char *fullline = NULL;
ad642304
 	size_t fulllinelength = 0;
f54a8635
 
98685ac1
 	cli_dbgmsg("parseEmailHeaders\n");
 
f54a8635
 	if(m == NULL)
 		return NULL;
 
 	ret = messageCreate();
7fca6080
 
de617e3e
 	for(t = messageGetBody(m); t; t = t->t_next) {
4db74788
 		const char *line;
7fca6080
 
de617e3e
 		if(t->t_line)
4db74788
 			line = lineGetData(t->t_line);
de617e3e
 		else
4db74788
 			line = NULL;
7fca6080
 
b4cb4486
 		if(inHeader) {
663e5963
 			cli_dbgmsg("parseEmailHeaders: check '%s'\n",
4db74788
 				line ? line : "");
 			if(line == NULL) {
663e5963
 				/*
 				 * A blank line signifies the end of
 				 * the header and the start of the text
 				 */
 				cli_dbgmsg("End of header information\n");
f52d7358
 				if(!anyHeadersFound) {
 					cli_dbgmsg("Nothing interesting in the header\n");
 					break;
 				}
ed92b9c2
 				inHeader = FALSE;
 				bodyIsEmpty = TRUE;
ad642304
 			} else {
0856891e
 				char *ptr;
 
ad642304
 				if(fullline == NULL) {
2683ac8b
 					char cmd[RFC2821LENGTH + 1];
9180b8bb
 
 					/*
 					 * Continuation of line we're ignoring?
 					 */
4db74788
 					if(isblank(line[0]))
9180b8bb
 						continue;
 
 					/*
 					 * Is this a header we're interested in?
 					 */
4db74788
 					if((strchr(line, ':') == NULL) ||
 					   (cli_strtokbuf(line, 0, ":", cmd) == NULL)) {
 						if(strncmp(line, "From ", 5) == 0)
0856891e
 							anyHeadersFound = TRUE;
9180b8bb
 						continue;
0856891e
 					}
9180b8bb
 
273cd2bb
 					ptr = rfc822comments(cmd, NULL);
a1c924f9
 					commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
 					if(ptr)
 						free(ptr);
9180b8bb
 
 					switch(commandNumber) {
 						case CONTENT_TRANSFER_ENCODING:
 						case CONTENT_DISPOSITION:
 						case CONTENT_TYPE:
0856891e
 							anyHeadersFound = TRUE;
9180b8bb
 							break;
 						default:
82933497
 							if(!anyHeadersFound)
 								anyHeadersFound = usefulHeader(commandNumber, cmd);
9180b8bb
 							continue;
 					}
4db74788
 					fullline = cli_strdup(line);
 					fulllinelength = strlen(line) + 1;
 				} else if(line) {
 					fulllinelength += strlen(line);
beb70346
 					ptr = cli_realloc(fullline, fulllinelength);
 					if(ptr == NULL)
 						continue;
 					fullline = ptr;
4db74788
 					strcat(fullline, line);
ad642304
 				}
3a0946f5
 
82933497
 				assert(fullline != NULL);
ad642304
 
0066d39b
 				if(next_is_folded_header(t))
 					/* Add arguments to this line */
 					continue;
ad642304
 
dd2131c9
 				if(count_quotes(fullline) & 1)
ad642304
 					continue;
 
273cd2bb
 				ptr = rfc822comments(fullline, NULL);
ad642304
 				if(ptr) {
 					free(fullline);
 					fullline = ptr;
 				}
37819555
 
82933497
 				if(parseEmailHeader(ret, fullline, rfc821) < 0)
 					continue;
b4cb4486
 
82933497
 				free(fullline);
 				fullline = NULL;
d32343c3
 			}
ed92b9c2
 		} else {
 			if(bodyIsEmpty) {
4db74788
 				if(line == NULL)
ed92b9c2
 					/* throw away leading blank lines */
 					continue;
 				/*
 				 * Broken message: new line in the
 				 * middle of the headers, so the first
 				 * line of the body is in fact
 				 * the last lines of the header
 				 */
4db74788
 				if(newline_in_header(line))
ed92b9c2
 					continue;
 				bodyIsEmpty = FALSE;
 			}
 			/*if(t->t_line && isuuencodebegin(t->t_line))
 				puts("FIXME: add fast visa here");*/
4db74788
 			/*cli_dbgmsg("Add line to body '%s'\n", line);*/
de617e3e
 			if(messageAddLine(ret, t->t_line) < 0)
80a8c7d8
 				break;
ed92b9c2
 		}
ffd59a3e
 	}
68be129f
 
d768ac5a
 	if(fullline) {
ad642304
 		if(*fullline) switch(commandNumber) {
 			case CONTENT_TRANSFER_ENCODING:
 			case CONTENT_DISPOSITION:
 			case CONTENT_TYPE:
17bd4b65
 				cli_dbgmsg("parseEmailHeaders: Fullline unparsed '%s'\n", fullline);
ad642304
 		}
d768ac5a
 		free(fullline);
 	}
 
f2b068fb
 	if(!anyHeadersFound) {
 		/*
 		 * False positive in believing we have an e-mail when we don't
 		 */
 		messageDestroy(ret);
 		cli_dbgmsg("parseEmailHeaders: no headers found, assuming it isn't an email\n");
 		return NULL;
 	}
 
4465fb04
 	messageClean(ret);
 
09ccd6e0
 	cli_dbgmsg("parseEmailHeaders: return\n");
 
68be129f
 	return ret;
7fca6080
 }
 
 /*
8c0250d5
  * Handle a header line of an email message
  */
 static int
49674596
 parseEmailHeader(message *m, const char *line, const table_t *rfc821)
8c0250d5
 {
de509b8e
 	char *cmd;
8c0250d5
 	int ret = -1;
 #ifdef CL_THREAD_SAFE
 	char *strptr;
 #endif
31b05bcb
 	const char *separater;
0674e2af
 	char *copy, tokenseparater[2];
8c0250d5
 
0704dad8
 	cli_dbgmsg("parseEmailHeader '%s'\n", line);
 
31b05bcb
 	/*
 	 * In RFC822 the separater between the key a value is a colon,
 	 * e.g.	Content-Transfer-Encoding: base64
 	 * However some MUA's are lapse about this and virus writers exploit
 	 * this hole, so we need to check all known possiblities
 	 */
 	for(separater = ":= "; *separater; separater++)
 		if(strchr(line, *separater) != NULL)
 			break;
 
 	if(*separater == '\0')
74b5c349
 		return -1;
 
0674e2af
 	copy = rfc2047(line);
 	if(copy == NULL)
bd6146af
 		/* an RFC checker would return -1 here */
4db74788
 		copy = cli_strdup(line);
d1382234
 
31b05bcb
 	tokenseparater[0] = *separater;
 	tokenseparater[1] = '\0';
 
897fd9c7
 #ifdef	CL_THREAD_SAFE
31b05bcb
 	cmd = strtok_r(copy, tokenseparater, &strptr);
897fd9c7
 #else
 	cmd = strtok(copy, tokenseparater);
 #endif
8c0250d5
 
3a0ef2ee
 	if(cmd && (strstrip(cmd) > 0)) {
897fd9c7
 #ifdef	CL_THREAD_SAFE
8c0250d5
 		char *arg = strtok_r(NULL, "", &strptr);
897fd9c7
 #else
 		char *arg = strtok(NULL, "");
 #endif
8c0250d5
 
 		if(arg)
 			/*
 			 * Found a header such as
 			 * Content-Type: multipart/mixed;
 			 * set arg to be
 			 * "multipart/mixed" and cmd to
39ff42ee
 			 * be "Content-Type"
8c0250d5
 			 */
49674596
 			ret = parseMimeHeader(m, cmd, rfc821, arg);
8c0250d5
 	}
0674e2af
 	free(copy);
8c0250d5
 	return ret;
 }
 
 /*
b151ef55
  * This is a recursive routine.
3edf8bda
  * FIXME: We are not passed &mrec so we can't check against MAX_MAIL_RECURSION
b151ef55
  *
7fca6080
  * This function parses the body of mainMessage and saves its attachments in dir
  *
68be129f
  * mainMessage is the buffer to be parsed, it contains an e-mail's body, without
d32343c3
  * any headers. First time of calling it'll be
  * the whole message. Later it'll be parts of a multipart message
b151ef55
  * textIn is the plain text message being built up so far
  */
2345b4cd
 static mbox_status
ddd53493
 parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int recursion_level)
b151ef55
 {
a15ad8f3
 	mbox_status rc;
985cc85e
 	text *aText = textIn;
 	message *mainMessage = messageIn;
565c449d
 	fileblob *fb;
b65d2aad
 	bool infected = FALSE;
7133ee9d
 #ifdef CL_EXPERIMENTAL
19a2af6d
 	const int doPhishingScan = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS; /* || (mctx->ctx->options&CL_SCAN_PHISHING_GA_TRAIN) || (mctx->ctx->options&CL_SCAN_PHISHING_GA);  kept here for the GA MERGE */
7133ee9d
 #endif
aea1b159
 
565c449d
 	cli_dbgmsg("in parseEmailBody\n");
b151ef55
 
5e45f438
 	if(mctx->ctx->limits->maxmailrec) {
 		const cli_ctx *ctx = mctx->ctx;	/* needed for BLOCKMAX :-( */
 
52f670eb
 		/*
 		 * This is approximate
 		 */
a8310427
 		if(recursion_level > ctx->limits->maxmailrec) {
 
 			cli_warnmsg("parseEmailBody: hit maximum recursion level (%u)\n", recursion_level);
 			if(BLOCKMAX) {
 				if(ctx->virname)
 					*ctx->virname = "MIME.RecursionLimit";
 				return VIRUS;
 			} else
 				return OK_ATTACHMENTS_NOT_SAVED;
ddd53493
 		}
5e45f438
 	}
ddd53493
 
3c533c6f
 	rc = OK;
 
b151ef55
 	/* Anything left to be parsed? */
0bcad2b1
 	if(mainMessage && (messageGetBody(mainMessage) != NULL)) {
b151ef55
 		mime_type mimeType;
985cc85e
 		int subtype, inhead, htmltextPart, inMimeHead, i;
dbca666a
 		const char *mimeSubtype;
 		char *protocol, *boundary;
b151ef55
 		const text *t_line;
f5e9abc8
 		/*bool isAlternative;*/
b151ef55
 		message *aMessage;
985cc85e
 		int multiparts = 0;
 		message **messages = NULL;	/* parts of a multipart message */
b151ef55
 
c6259ac5
 		cli_dbgmsg("Parsing mail file\n");
 
b151ef55
 		mimeType = messageGetMimeType(mainMessage);
 		mimeSubtype = messageGetMimeSubtype(mainMessage);
 
20917083
 		/* pre-process */
d9bde711
 		subtype = tableFind(mctx->subtypeTable, mimeSubtype);
5eeffbb9
 		if((mimeType == TEXT) && (subtype == PLAIN)) {
b151ef55
 			/*
 			 * This is effectively no encoding, notice that we
 			 * don't check that charset is us-ascii
 			 */
 			cli_dbgmsg("assume no encoding\n");
 			mimeType = NOMIME;
b3a5cdd8
 			messageSetMimeSubtype(mainMessage, "");
20917083
 		} else if((mimeType == MESSAGE) &&
 			  (strcasecmp(mimeSubtype, "rfc822-headers") == 0)) {
 			/*
 			 * RFC1892/RFC3462: section 2 text/rfc822-headers
 			 * incorrectly sent as message/rfc822-headers
ef5eba29
 			 *
 			 * Parse as text/plain, i.e. no mime
20917083
 			 */
 			cli_dbgmsg("Changing message/rfc822-headers to text/rfc822-headers\n");
ef5eba29
 			mimeType = NOMIME;
b3a5cdd8
 			messageSetMimeSubtype(mainMessage, "");
f3ec89d2
 		} else
 			cli_dbgmsg("mimeType = %d\n", mimeType);
c6259ac5
 
b151ef55
 		switch(mimeType) {
 		case NOMIME:
22f3b19b
 			cli_dbgmsg("Not a mime encoded message\n");
b151ef55
 			aText = textAddMessage(aText, mainMessage);
7133ee9d
 #ifdef CL_EXPERIMENTAL
d996b7e8
 			if(!doPhishingScan)
 				break;
89343098
 			/*
 			 * Fall through: some phishing mails claim they are
 			 * text/plain, when they are in fact html
 			 */
e0e62fa1
 #else
 			break;
7133ee9d
 #endif
b151ef55
 		case TEXT:
3497daca
 			/* text/plain has been preprocessed as no encoding */
7133ee9d
 #ifdef CL_EXPERIMENTAL
e0e62fa1
 			if((subtype == HTML) || doPhishingScan) {
7133ee9d
 #else
d9bde711
 			if((mctx->ctx->options&CL_SCAN_MAILURL) && (subtype == HTML))
7133ee9d
 #endif
195e3683
 				/*
 				 * It would be better to save and scan the
 				 * file and only checkURLs if it's found to be
 				 * clean
 				 */
e0e62fa1
 				checkURLs(mainMessage, mctx, &rc, (subtype == HTML));
7133ee9d
 #ifdef CL_EXPERIMENTAL
e0e62fa1
 				/*
 				 * There might be html sent without subtype
 				 * html too, so scan them for phishing
 				 */
2345b4cd
 				if(rc == VIRUS)
e0e62fa1
 					infected = TRUE;
7133ee9d
 			}
 #endif
b151ef55
 			break;
 		case MULTIPART:
10c3ed55
 			cli_dbgmsg("Content-type 'multipart' handler\n");
b151ef55
 			boundary = messageFindArgument(mainMessage, "boundary");
 
 			if(boundary == NULL) {
0066d39b
 				cli_warnmsg("Multipart/%s MIME message contains no boundary header\n",
 					mimeSubtype);
2227f20e
 				/* Broken e-mail message */
 				mimeType = NOMIME;
 				/*
 				 * The break means that we will still
 				 * check if the file contains a uuencoded file
 				 */
 				break;
b151ef55
 			}
 
cbc2eaa9
 			/* Perhaps it should assume mixed? */
93002b48
 			if(mimeSubtype[0] == '\0') {
 				cli_warnmsg("Multipart has no subtype assuming alternative\n");
 				mimeSubtype = "alternative";
 				messageSetMimeSubtype(mainMessage, "alternative");
 			}
 
b151ef55
 			/*
 			 * Get to the start of the first message
 			 */
0704dad8
 			t_line = messageGetBody(mainMessage);
 
 			if(t_line == NULL) {
 				cli_warnmsg("Multipart MIME message has no body\n");
 				free((char *)boundary);
 				mimeType = NOMIME;
 				break;
 			}
 
 			do
74ca33e9
 				if(t_line->t_line) {
 					if(boundaryStart(lineGetData(t_line->t_line), boundary))
 						break;
 					/*
2add0ed7
 					 * Found a binhex file before
4b54f2e0
 					 *	the first multipart
0856891e
 					 * TODO: check yEnc
74ca33e9
 					 */
2add0ed7
 					if(binhexBegin(mainMessage) == t_line) {
195e3683
 						if(exportBinhexMessage(mctx->dir, mainMessage)) {
f3ec89d2
 							/* virus found */
2345b4cd
 							rc = VIRUS;
195e3683
 							infected = TRUE;
f3ec89d2
 							break;
0856891e
 						}
a15ad8f3
 					} else if(t_line->t_next &&
a8310427
 						 (encodingLine(mainMessage) == t_line->t_next)) {
9f43cc75
 						/*
 						 * We look for the next line
 						 * since later on we'll skip
 						 * over the important line when
 						 * we think it's a blank line
 						 * at the top of the message -
 						 * which it would have been in
 						 * an RFC compliant world
 						 */
a15ad8f3
 						cli_dbgmsg("Found MIME attachment before the first MIME section \"%s\"\n",
 							lineGetData(t_line->t_next->t_line));
9f43cc75
 						if(messageGetEncoding(mainMessage) == NOENCODING)
 							break;
0856891e
 					}
74ca33e9
 				}
0704dad8
 			while((t_line = t_line->t_next) != NULL);
b151ef55
 
 			if(t_line == NULL) {
195e3683
 				cli_dbgmsg("Multipart MIME message contains no boundary lines (%s)\n",
 					boundary);
bf8ea488
 				/*
 				 * Free added by Thomas Lamy
 				 * <Thomas.Lamy@in-online.net>
 				 */
 				free((char *)boundary);
2227f20e
 				mimeType = NOMIME;
 				/*
 				 * The break means that we will still
2add0ed7
 				 * check if the file contains a yEnc/binhex file
2227f20e
 				 */
 				break;
b151ef55
 			}
 			/*
 			 * Build up a table of all of the parts of this
 			 * multipart message. Remember, each part may itself
 			 * be a multipart message.
 			 */
 			inhead = 1;
 			inMimeHead = 0;
 
68be129f
 			/*
7b45c8e3
 			 * Re-read this variable in case mimeSubtype has changed
 			 */
 			subtype = tableFind(mctx->subtypeTable, mimeSubtype);
 
 			/*
b62a19da
 			 * Parse the mainMessage object and create an array
 			 * of objects called messages, one for each of the
7b45c8e3
 			 * multiparts that mainMessage contains.
0e01c158
 			 *
68be129f
 			 * This looks like parseEmailHeaders() - maybe there's
 			 * some duplication of code to be cleaned up
7b45c8e3
 			 *
0066d39b
 			 * We may need to create an array rather than just
7b45c8e3
 			 * save each part as it is found because not all
 			 * elements will need scanning, and we don't yet know
 			 * which of those elements it will be, except in
 			 * the case of mixed, when all parts need to be scanned.
68be129f
 			 */
7b45c8e3
 			for(multiparts = 0; t_line && !infected; multiparts++) {
26564cf5
 				int lines = 0;
79e432d2
 				message **m;
26564cf5
 
79e432d2
 				m = cli_realloc(messages, ((multiparts + 1) * sizeof(message *)));
d32343c3
 				if(m == NULL)
79e432d2
 					break;
 				messages = m;
6613d595
 
b151ef55
 				aMessage = messages[multiparts] = messageCreate();
89e9a596
 				if(aMessage == NULL) {
 					multiparts--;
 					continue;
 				}
d9bde711
 				messageSetCTX(aMessage, mctx->ctx);
b151ef55
 
 				cli_dbgmsg("Now read in part %d\n", multiparts);
 
0bf1353d
 				/*
 				 * Ignore blank lines. There shouldn't be ANY
 				 * but some viruses insert them
 				 */
98685ac1
 				while((t_line = t_line->t_next) != NULL)
de617e3e
 					if(t_line->t_line &&
 					   /*(cli_chomp(t_line->t_text) > 0))*/
 					   (strlen(lineGetData(t_line->t_line)) > 0))
784e2335
 						break;
0bf1353d
 
 				if(t_line == NULL) {
 					cli_dbgmsg("Empty part\n");
61db35a1
 					/*
 					 * Remove this part unless there's
2add0ed7
 					 * a binhex portion somewhere in
61db35a1
 					 * the complete message that we may
 					 * throw away by mistake if the MIME
 					 * encoding information is incorrect
 					 */
7b45c8e3
 					if(mainMessage &&
 					   (binhexBegin(mainMessage) == NULL)) {
61db35a1
 						messageDestroy(aMessage);
 						--multiparts;
 					}
0bf1353d
 					continue;
 				}
 
 				do {
de617e3e
 					const char *line = lineGetData(t_line->t_line);
b151ef55
 
8f1f6383
 					/*cli_dbgmsg("multipart %d: inMimeHead %d inhead %d boundary '%s' line '%s' next '%s'\n",
 						multiparts, inMimeHead, inhead, boundary, line,
30fb8a0b
 						t_line->t_next && t_line->t_next->t_line ? lineGetData(t_line->t_next->t_line) : "(null)");*/
b151ef55
 
e0377124
 					if(inMimeHead) {	/* continuation line */
98685ac1
 						if(line == NULL) {
699fafc3
 							/*inhead =*/ inMimeHead = 0;
98685ac1
 							continue;
 						}
7baeb4a6
 						/*
 						 * Handle continuation lines
 						 * because the previous line
21cd233d
 						 * ended with a ; or this line
 						 * starts with a white space
7baeb4a6
 						 */
21cd233d
 						cli_dbgmsg("Multipart %d: About to add mime Argument '%s'\n",
 							multiparts, line);
7baeb4a6
 						/*
 						 * Handle the case when it
 						 * isn't really a continuation
 						 * line:
 						 * Content-Type: application/octet-stream;
 						 * Content-Transfer-Encoding: base64
 						 */
d9bde711
 						parseEmailHeader(aMessage, line, mctx->rfc821Table);
7baeb4a6
 
b151ef55
 						while(isspace((int)*line))
 							line++;
 
 						if(*line == '\0') {
 							inhead = inMimeHead = 0;
 							continue;
 						}
a6d794c7
 						inMimeHead = FALSE;
b151ef55
 						messageAddArgument(aMessage, line);
e0377124
 					} else if(inhead) {	/* handling normal headers */
dd2131c9
 						/*int quotes;*/
10c3ed55
 						char *fullline, *ptr;
ad642304
 
98685ac1
 						if(line == NULL) {
9cb47b80
 							/*
 							 * empty line, should the end of the headers,
 							 * but some base64 decoders, e.g. uudeview, are broken
 							 * and will handle this type of entry, decoding the
 							 * base64 content...
 							 * Content-Type: application/octet-stream; name=text.zip
 							 * Content-Transfer-Encoding: base64
 							 * Content-Disposition: attachment; filename="text.zip"
64ff0d49
 							 *
9cb47b80
 							 * Content-Disposition: attachment;
 							 *	filename=text.zip
 							 * Content-Type: application/octet-stream;
 							 *	name=text.zip
 							 * Content-Transfer-Encoding: base64
64ff0d49
 							 *
9cb47b80
 							 * UEsDBAoAAAAAAACgPjJ2RHw676gAAO+oAABEAAAAbWFpbF90ZXh0LWluZm8udHh0ICAgICAgICAg
 							 */
0066d39b
 							const text *next = t_line->t_next;
 
9cb47b80
 							if(next && next->t_line) {
 								const char *data = lineGetData(next->t_line);
6c4485f9
 
 								if((messageGetEncoding(aMessage) == NOENCODING) &&
0066d39b
 								   (messageGetMimeType(aMessage) == APPLICATION) &&
 								   strstr(data, "base64")) {
273cd2bb
 									/*
 									 * Handle this nightmare (note the blank
 									 * line in the header and the incorrect
 									 * content-transfer-encoding header)
 									 *
 									 * Content-Type: application/octet-stream; name="zipped_files.EXEX-Spanska: Yes
 									 *
 									 * r-Encoding: base64
 									 * Content-Disposition: attachment; filename="zipped_files.EXE"
 									 */
0066d39b
 									messageSetEncoding(aMessage, "base64");
 									cli_dbgmsg("Ignoring fake end of headers\n");
 									continue;
 								}
11466e82
 								if((strncmp(data, "Content", 7) == 0) ||
 								   (strncmp(data, "filename=", 9) == 0)) {
9cb47b80
 									cli_dbgmsg("Ignoring fake end of headers\n");
 									continue;
 								}
 							}
699fafc3
 							cli_dbgmsg("Multipart %d: End of header information\n",
 								multiparts);
b151ef55
 							inhead = 0;
 							continue;
 						}
a64bf87e
 						if(isspace((int)*line)) {
 							/*
 							 * The first line is
 							 * continuation line.
 							 * This is tricky
 							 * to handle, but
 							 * all we can do is our
 							 * best
 							 */
 							cli_dbgmsg("Part %d starts with a continuation line\n",
 								multiparts);
 							messageAddArgument(aMessage, line);
 							/*
 							 * Give it a default
 							 * MIME type since
 							 * that may be the
 							 * missing line
 							 *
 							 * Choose application to
 							 * force a save
 							 */
 							if(messageGetMimeType(aMessage) == NOMIME)
 								messageSetMimeType(aMessage, "application");
 							continue;
 						}
 
10c3ed55
 						inMimeHead = FALSE;
de617e3e
 
2683ac8b
 						assert(strlen(line) <= RFC2821LENGTH);
e0377124
 
273cd2bb
 						fullline = rfc822comments(line, NULL);
10c3ed55
 						if(fullline == NULL)
4db74788
 							fullline = cli_strdup(line);
ad642304
 
dd2131c9
 						/*quotes = count_quotes(fullline);*/
e0377124
 
10c3ed55
 						/*
 						 * Fold next lines to the end of this
 						 * if they start with a white space
 						 * or if this line has an odd number of quotes:
 						 * Content-Type: application/octet-stream; name="foo
 						 * "
 						 */
0066d39b
 						while(t_line && next_is_folded_header(t_line)) {
 							const char *data;
 
 							t_line = t_line->t_next;
 
 							data = lineGetData(t_line->t_line);
e0377124
 
359926e7
 							if(data[1] == '\0') {
 								/*
 								 * Broken message: the
 								 * blank line at the end
 								 * of the headers isn't blank -
 								 * it contains a space
 								 */
 								cli_dbgmsg("Multipart %d: headers not terminated by blank line\n",
 									multiparts);
 								inhead = FALSE;
 								break;
 							}
 
10c3ed55
 							ptr = cli_realloc(fullline,
 								strlen(fullline) + strlen(data) + 1);
68badbc1
 
10c3ed55
 							if(ptr == NULL)
 								break;
21cd233d
 
10c3ed55
 							fullline = ptr;
 							strcat(fullline, data);
ad642304
 
dd2131c9
 							/*quotes = count_quotes(data);*/
21cd233d
 						}
0066d39b
 
10c3ed55
 						cli_dbgmsg("Multipart %d: About to parse folded header '%s'\n",
 							multiparts, fullline);
 
d9bde711
 						parseEmailHeader(aMessage, fullline, mctx->rfc821Table);
10c3ed55
 						free(fullline);
b151ef55
 					} else if(endOfMessage(line, boundary)) {
 						/*
 						 * Some viruses put information
 						 * *after* the end of message,
 						 * which presumably some broken
 						 * mail clients find, so we
 						 * can't assume that this
 						 * is the end of the message
 						 */
 						/* t_line = NULL;*/
 						break;
30fb8a0b
 					} else if(boundaryStart(line, boundary)) {
 						inhead = 1;
 						break;
26564cf5
 					} else {
de617e3e
 						if(messageAddLine(aMessage, t_line->t_line) < 0)
79e432d2
 							break;
26564cf5
 						lines++;
 					}
0bf1353d
 				} while((t_line = t_line->t_next) != NULL);
 
26564cf5
 				cli_dbgmsg("Part %d has %d lines\n",
 					multiparts, lines);
7b45c8e3
 
 				/*
 				 * Only save in the array of messages if some
 				 * decision will be taken on whether to scan.
 				 * If all parts will be scanned then save to
 				 * file straight away
 				 */
 				switch(subtype) {
 					case MIXED:
 					case ALTERNATIVE:
 					case REPORT:
 					case DIGEST:
 					case APPLEDOUBLE:
 					case KNOWBOT:
 					case -1:
 						mainMessage = do_multipart(mainMessage,
 							messages, multiparts,
 							&rc, mctx, messageIn,
52f670eb
 							&aText, recursion_level);
7b45c8e3
 						--multiparts;
2345b4cd
 						if(rc == VIRUS)
7b45c8e3
 							infected = TRUE;
 						break;
 					default:
 						messageClean(aMessage);
 				}
b151ef55
 			}
 
 			free((char *)boundary);
 
6638be41
 			/*
cbc2eaa9
 			 * Preprocess. Anything special to be done before
 			 * we handle the multiparts?
b62a19da
 			 */
7b45c8e3
 			switch(subtype) {
cbc2eaa9
 				case KNOWBOT:
 					/* TODO */
 					cli_dbgmsg("multipart/knowbot parsed as multipart/mixed for now\n");
 					mimeSubtype = "mixed";
 					break;
e279f3ea
 				case -1:
 					/*
 					 * According to section 7.2.6 of
 					 * RFC1521, unrecognised multiparts
 					 * should be treated as multipart/mixed.
 					 */
0cb54827
 					cli_dbgmsg("Unsupported multipart format `%s', parsed as mixed\n", mimeSubtype);
e279f3ea
 					mimeSubtype = "mixed";
 					break;
cbc2eaa9
 			}
b62a19da
 
 			/*
6638be41
 			 * We've finished message we're parsing
 			 */
 			if(mainMessage && (mainMessage != messageIn)) {
 				messageDestroy(mainMessage);
 				mainMessage = NULL;
2250ea69
 			}
b151ef55
 
7b45c8e3
 			cli_dbgmsg("The message has %d parts\n", multiparts);
 
 			if(((multiparts == 0) || infected) && (aText == NULL)) {
038c52be
 				if(messages) {
 					for(i = 0; i < multiparts; i++)
 						if(messages[i])
 							messageDestroy(messages[i]);
6613d595
 					free(messages);
038c52be
 				}
 
7b45c8e3
 				/*
 				 * FIXME: we could return 2 here when we have
 				 * saved stuff earlier
2345b4cd
 				 *
 				 * Nothing to do
7b45c8e3
 				 */
2345b4cd
 				return (rc == VIRUS) ? VIRUS : OK_ATTACHMENTS_NOT_SAVED;
6613d595
 			}
6638be41
 
49674596
 			cli_dbgmsg("Find out the multipart type (%s)\n", mimeSubtype);
b151ef55
 
b62a19da
 			/*
 			 * We now have all the parts of the multipart message
 			 * in the messages array:
 			 *	message *messages[multiparts]
 			 * Let's decide what to do with them all
 			 */
d9bde711
 			switch(tableFind(mctx->subtypeTable, mimeSubtype)) {
b151ef55
 			case RELATED:
68be129f
 				cli_dbgmsg("Multipart related handler\n");
b151ef55
 				/*
295e425f
 				 * Have a look to see if there's HTML code
 				 * which will need scanning
b151ef55
 				 */
 				aMessage = NULL;
 				assert(multiparts > 0);
 
0bcad2b1
 				htmltextPart = getTextPart(messages, multiparts);
b151ef55
 
0bcad2b1
 				if(htmltextPart >= 0)
 					aText = textAddMessage(aText, messages[htmltextPart]);
b151ef55
 				else
 					/*
295e425f
 					 * There isn't an HTML bit. If there's a
 					 * multipart bit, it'll may be in there
 					 * somewhere
b151ef55
 					 */
 					for(i = 0; i < multiparts; i++)
 						if(messageGetMimeType(messages[i]) == MULTIPART) {
 							aMessage = messages[i];
0bcad2b1
 							htmltextPart = i;
b151ef55
 							break;
 						}
 
74c6f514
 				if(htmltextPart == -1)
a3d2da70
 					cli_dbgmsg("No HTML code found to be scanned\n");
74c6f514
 				else {
ddd53493
 					rc = parseEmailBody(aMessage, aText, mctx, recursion_level + 1);
2345b4cd
 					if(rc == OK) {
74c6f514
 						assert(aMessage == messages[htmltextPart]);
 						messageDestroy(aMessage);
 						messages[htmltextPart] = NULL;
 					}
 				}
b151ef55
 
 				/*
 				 * Fixed based on an idea from Stephen White <stephen@earth.li>
 				 * The message is confused about the difference
 				 * between alternative and related. Badtrans.B
 				 * suffers from this problem.
 				 *
 				 * Fall through in this case:
 				 * Content-Type: multipart/related;
 				 *	type="multipart/alternative"
 				 */
f5e9abc8
 				/*
 				 * Changed to always fall through based on
 				 * an idea from Michael Dankov <misha@btrc.ru>
 				 * that some viruses are completely confused
 				 * about the difference between related
 				 * and mixed
 				 */
 				/*cptr = messageFindArgument(mainMessage, "type");
b151ef55
 				if(cptr == NULL)
 					break;
 				isAlternative = (bool)(strcasecmp(cptr, "multipart/alternative") == 0);
 				free((char *)cptr);
 				if(!isAlternative)
f5e9abc8
 					break;*/
d28e1902
 			case DIGEST:
 				/*
 				 * According to section 5.1.5 RFC2046, the
 				 * default mime type of multipart/digest parts
 				 * is message/rfc822
 				 *
 				 * We consider them as alternative, wrong in
 				 * the strictest sense since they aren't
 				 * alternatives - all parts a valid - but it's
 				 * OK for our needs since it means each part
 				 * will be scanned
 				 */
b151ef55
 			case ALTERNATIVE:
 				cli_dbgmsg("Multipart alternative handler\n");
 
 				/*
 				 * Fall through - some clients are broken and
 				 * say alternative instead of mixed. The Klez
e279f3ea
 				 * virus is broken that way, and anyway we
 				 * wish to scan all of the alternatives
b151ef55
 				 */
 			case REPORT:
 				/*
 				 * According to section 1 of RFC1892, the
 				 * syntax of multipart/report is the same
 				 * as multipart/mixed. There are some required
 				 * parameters, but there's no need for us to
 				 * verify that they exist
 				 */
 			case MIXED:
fdc8a467
 			case APPLEDOUBLE:	/* not really supported */
b151ef55
 				/*
 				 * Look for attachments
 				 *
 				 * Not all formats are supported. If an
 				 * unsupported format turns out to be
 				 * common enough to implement, it is a simple
 				 * matter to add it
 				 */
2250ea69
 				if(aText) {
 					if(mainMessage && (mainMessage != messageIn))
 						messageDestroy(mainMessage);
b151ef55
 					mainMessage = NULL;
2250ea69
 				}
b151ef55
 
 				cli_dbgmsg("Mixed message with %d parts\n", multiparts);
 				for(i = 0; i < multiparts; i++) {
d9bde711
 					mainMessage = do_multipart(mainMessage,
 						messages, i, &rc, mctx,
ddd53493
 						messageIn, &aText, recursion_level + 1);
2345b4cd
 					if(rc == VIRUS) {
b65d2aad
 						infected = TRUE;
 						break;
 					}
b151ef55
 				}
 
ddd53493
 				/* rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1); */
b151ef55
 				break;
 			case SIGNED:
 			case PARALLEL:
 				/*
 				 * If we're here it could be because we have a
 				 * multipart/mixed message, consisting of a
 				 * message followed by an attachment. That
 				 * message itself is a multipart/alternative
 				 * message and we need to dig out the plain
 				 * text part of that alternative
 				 */
0bcad2b1
 				htmltextPart = getTextPart(messages, multiparts);
 				if(htmltextPart == -1)
 					htmltextPart = 0;
b151ef55
 
ddd53493
 				rc = parseEmailBody(messages[htmltextPart], aText, mctx, recursion_level + 1);
b151ef55
 				break;
b62a19da
 			case ENCRYPTED:
37032143
 				rc = FAIL;	/* Not yet handled */
cbc2eaa9
 				protocol = (char *)messageFindArgument(mainMessage, "protocol");
b62a19da
 				if(protocol) {
 					if(strcasecmp(protocol, "application/pgp-encrypted") == 0) {
 						/* RFC2015 */
 						cli_warnmsg("PGP encoded attachment not scanned\n");
2345b4cd
 						rc = OK_ATTACHMENTS_NOT_SAVED;
b62a19da
 					} else
a363da65
 						cli_warnmsg("Unknown encryption protocol '%s' - if you believe this file contains a virus, submit it to www.clamav.net\n", protocol);
b62a19da
 					free(protocol);
 				} else
23e1c37c
 					cli_dbgmsg("Encryption method missing protocol name\n");
b62a19da
 
 				break;
b151ef55
 			default:
e279f3ea
 				assert(0);
b151ef55
 			}
 
2250ea69
 			if(mainMessage && (mainMessage != messageIn))
 				messageDestroy(mainMessage);
 
c29ebe66
 			if(aText && (textIn == NULL)) {
b65d2aad
 				if((!infected) && (fb = fileblobCreate()) != NULL) {
10c3ed55
 					cli_dbgmsg("Save non mime and/or text/plain part\n");
d9bde711
 					fileblobSetFilename(fb, mctx->dir, "textpart");
b5053e3f
 					/*fileblobAddData(fb, "Received: by clamd (textpart)\n", 30);*/
d9bde711
 					fileblobSetCTX(fb, mctx->ctx);
0e01c158
 					(void)textToFileblob(aText, fb, 1);
c29ebe66
 
 					fileblobDestroy(fb);
 				}
c6259ac5
 				textDestroy(aText);
c29ebe66
 			}
c6259ac5
 
0856891e
 			for(i = 0; i < multiparts; i++)
 				if(messages[i])
 					messageDestroy(messages[i]);
 
6613d595
 			if(messages)
 				free(messages);
 
b151ef55
 			return rc;
 
 		case MESSAGE:
 			/*
 			 * Check for forbidden encodings
 			 */
 			switch(messageGetEncoding(mainMessage)) {
 				case NOENCODING:
 				case EIGHTBIT:
 				case BINARY:
 					break;
 				default:
c6259ac5
 					cli_warnmsg("MIME type 'message' cannot be decoded\n");
b151ef55
 					break;
 			}
2345b4cd
 			rc = FAIL;
c6259ac5
 			if((strcasecmp(mimeSubtype, "rfc822") == 0) ||
 			   (strcasecmp(mimeSubtype, "delivery-status") == 0)) {
d9bde711
 				message *m = parseEmailHeaders(mainMessage, mctx->rfc821Table);
bad123c6
 				if(m) {
56896211
 					cli_dbgmsg("Decode rfc822\n");
bad123c6
 
d9bde711
 					messageSetCTX(m, mctx->ctx);
b65d2aad
 
4465fb04
 					if(mainMessage && (mainMessage != messageIn)) {
 						messageDestroy(mainMessage);
 						mainMessage = NULL;
74c6f514
 					} else
 						messageReset(mainMessage);
bad123c6
 					if(messageGetBody(m))
ddd53493
 						rc = parseEmailBody(m, NULL, mctx, recursion_level + 1);
bad123c6
 
 					messageDestroy(m);
 				}
b151ef55
 				break;
f8c25c7a
 			} else if(strcasecmp(mimeSubtype, "disposition-notification") == 0) {
d6e30cce
 				/* RFC 2298 - handle like a normal email */
2345b4cd
 				rc = OK;
d6e30cce
 				break;
f8c25c7a
 			} else if(strcasecmp(mimeSubtype, "partial") == 0) {
9a7398ee
 #ifdef	PARTIAL_DIR
 				/* RFC1341 message split over many emails */
d9bde711
 				if(rfc1341(mainMessage, mctx->dir) >= 0)
2345b4cd
 					rc = OK;
9a7398ee
 #else
e94471f4
 				cli_warnmsg("Partial message received from MUA/MTA - message cannot be scanned\n");
9a7398ee
 #endif
 			} else if(strcasecmp(mimeSubtype, "external-body") == 0)
e94471f4
 				/* TODO */
b151ef55
 				cli_warnmsg("Attempt to send Content-type message/external-body trapped");
bf8ea488
 			else
a363da65
 				cli_warnmsg("Unsupported message format `%s' - if you believe this file contains a virus, submit it to www.clamav.net\n", mimeSubtype);
b151ef55
 
9a7398ee
 
2250ea69
 			if(mainMessage && (mainMessage != messageIn))
 				messageDestroy(mainMessage);
6613d595
 			if(messages)
 				free(messages);
9a7398ee
 			return rc;
b151ef55
 
 		case APPLICATION:
23e1c37c
 			/*cptr = messageGetMimeSubtype(mainMessage);
0bcad2b1
 
23e1c37c
 			if((strcasecmp(cptr, "octet-stream") == 0) ||
04421a14
 			   (strcasecmp(cptr, "x-msdownload") == 0)) {*/
 			{
985cc85e
 				fb = messageToFileblob(mainMessage, mctx->dir, 1);
b151ef55
 
1e06e1ab
 				if(fb) {
 					cli_dbgmsg("Saving main message as attachment\n");
 					fileblobDestroy(fb);
43756987
 					if(mainMessage != messageIn) {
 						messageDestroy(mainMessage);
 						mainMessage = NULL;
 					} else
 						messageReset(mainMessage);
b151ef55
 				}
04421a14
 			} /*else
 				cli_warnmsg("Discarded application not sent as attachment\n");*/
b151ef55
 			break;
 
 		case AUDIO:
 		case VIDEO:
 		case IMAGE:
 			break;
 
 		default:
 			cli_warnmsg("Message received with unknown mime encoding");
 			break;
 		}
985cc85e
 
038c52be
 		if(messages) {
 			/* "can't happen" */
b01f527d
 			cli_warnmsg("messages != NULL, report to http://bugs.clamav.net\n");
985cc85e
 			free(messages);
038c52be
 		}
b151ef55
 	}
 
d32343c3
 	if(aText && (textIn == NULL)) {
22f3b19b
 		/* Look for a bounce in the text (non mime encoded) portion */
 		const text *t;
0856891e
 
22f3b19b
 		for(t = aText; t; t = t->t_next) {
 			const line_t *l = t->t_line;
4b54f2e0
 			const text *lookahead, *topofbounce;
22f3b19b
 			const char *s;
4b54f2e0
 			bool inheader;
0856891e
 
22f3b19b
 			if(l == NULL)
 				continue;
 
b65d2aad
 			if(!isBounceStart(lineGetData(l)))
22f3b19b
 				continue;
 
 			/*
 			 * We've found what looks like the start of a bounce
 			 * message. Only bother saving if it really is a bounce
 			 * message, this helps to speed up scanning of ping-pong
 			 * messages that have lots of bounces within bounces in
 			 * them
 			 */
 			for(lookahead = t->t_next; lookahead; lookahead = lookahead->t_next) {
 				l = lookahead->t_line;
 
 				if(l == NULL)
 					break;
 				s = lineGetData(l);
 				if(strncasecmp(s, "Content-Type:", 13) == 0)
 					/*
 					 * Don't bother with plain/text or
 					 * plain/html
 					 */
 					if(strstr(s, "text/") == NULL)
0d9e07a9
 						/*
 						 * Don't bother to save the unuseful
 						 * part
 						 */
22f3b19b
 						break;
 			}
 
 			if(lookahead && (lookahead->t_line == NULL)) {
 				cli_dbgmsg("Non mime part bounce message is not mime encoded, so it will not be scanned\n");
 				t = lookahead;
 				/* look for next bounce message */
 				continue;
 			}
 
0d9e07a9
 			/*
 			 * Prescan the bounce message to see if there's likely
 			 * to be anything nasty.
 			 * This algorithm is hand crafted and may be breakable
 			 * so all submissions are welcome. It's best NOT to
 			 * remove this however you may be tempted, because it
 			 * significantly speeds up the scanning of multiple
 			 * bounces (i.e. bounces within many bounces)
 			 */
 			for(; lookahead; lookahead = lookahead->t_next) {
 				l = lookahead->t_line;
 
 				if(l) {
 					s = lineGetData(l);
 					if((strncasecmp(s, "Content-Type:", 13) == 0) &&
 					   (strstr(s, "multipart/") == NULL) &&
 					   (strstr(s, "message/rfc822") == NULL) &&
 					   (strstr(s, "text/plain") == NULL))
 						break;
 				}
 			}
 			if(lookahead == NULL) {
f60a8d41
 				cli_dbgmsg("cli_mbox: I believe it's plain text which must be clean\n");
0d9e07a9
 				/* nothing here, move along please */
 				break;
 			}
4b54f2e0
 			if((fb = fileblobCreate()) == NULL)
 				break;
 			cli_dbgmsg("Save non mime part bounce message\n");
d9bde711
 			fileblobSetFilename(fb, mctx->dir, "bounce");
dbca666a
 			fileblobAddData(fb, (const unsigned char *)"Received: by clamd (bounce)\n", 28);
d9bde711
 			fileblobSetCTX(fb, mctx->ctx);
4b54f2e0
 
 			inheader = TRUE;
 			topofbounce = NULL;
a4b13e7a
 			do {
4b54f2e0
 				l = t->t_line;
 
 				if(l == NULL) {
 					if(inheader) {
 						inheader = FALSE;
 						topofbounce = t;
 					}
 				} else {
 					s = lineGetData(l);
dbca666a
 					fileblobAddData(fb, (const unsigned char *)s, strlen(s));
4b54f2e0
 				}
dbca666a
 				fileblobAddData(fb, (const unsigned char *)"\n", 1);
4b54f2e0
 				lookahead = t->t_next;
 				if(lookahead == NULL)
 					break;
 				t = lookahead;
 				l = t->t_line;
 				if((!inheader) && l) {
 					s = lineGetData(l);
b65d2aad
 					if(isBounceStart(s)) {
133dcdcd
 						cli_dbgmsg("Found the start of another bounce candidate (%s)\n", s);
4b54f2e0
 						break;
 					}
 				}
a4b13e7a
 			} while(!fileblobContainsVirus(fb));
4b54f2e0
 
 			fileblobDestroy(fb);
 			if(topofbounce)
 				t = topofbounce;
 			/*
 			 * Don't do this - it slows bugs.txt
 			 */
 			/*if(mainMessage)
 				mainMessage->bounce = NULL;*/
22f3b19b
 		}
d32343c3
 		textDestroy(aText);
 		aText = NULL;
 	}
 
565c449d
 	/*
 	 * No attachments - scan the text portions, often files
 	 * are hidden in HTML code
 	 */
2345b4cd
 	if(mainMessage && (rc != VIRUS)) {
195e3683
 		text *t_line;
 
b151ef55
 		/*
565c449d
 		 * Look for uu-encoded main file
b151ef55
 		 */
2add0ed7
 		if((encodingLine(mainMessage) != NULL) &&
195e3683
 		   ((t_line = bounceBegin(mainMessage)) != NULL)) {
 			if(exportBounceMessage(t_line, mctx))
2345b4cd
 				rc = OK;
565c449d
 		} else {
 			bool saveIt;
0bcad2b1
 
565c449d
 			if(messageGetMimeType(mainMessage) == MESSAGE)
15c8cace
 				/*
565c449d
 				 * Quick peek, if the encapsulated
 				 * message has no
 				 * content encoding statement don't
 				 * bother saving to scan, it's safe
15c8cace
 				 */
195e3683
 				saveIt = (bool)(encodingLine(mainMessage) != NULL);
565c449d
 			else if((t_line = encodingLine(mainMessage)) != NULL) {
92915cee
 				/*
565c449d
 				 * Some bounces include the message
 				 * body without the headers.
4b54f2e0
 				 * FIXME: Unfortunately this generates a
565c449d
 				 * lot of false positives that a bounce
 				 * has been found when it hasn't.
92915cee
 				 */
565c449d
 				if((fb = fileblobCreate()) != NULL) {
b3a5cdd8
 					cli_dbgmsg("Found a bounce message with no header at '%s'\n",
 						lineGetData(t_line->t_line));
d9bde711
 					fileblobSetFilename(fb, mctx->dir, "bounce");
00615ec9
 					fileblobAddData(fb,
 						(const unsigned char *)"Received: by clamd (bounce)\n",
 						28);
5a01973c
 
a4b13e7a
 					/*fileblobSetCTX(fb, ctx);*/
89343098
 					fileblobDestroy(textToFileblob(t_line, fb, 1));
b759d5eb
 				}
565c449d
 				saveIt = FALSE;
985cc85e
 			} else
565c449d
 				/*
 				 * Save the entire text portion,
 				 * since it it may be an HTML file with
f3ec89d2
 				 * a JavaScript virus or a phish
565c449d
 				 */
 				saveIt = TRUE;
b151ef55
 
565c449d
 			if(saveIt) {
 				cli_dbgmsg("Saving text part to scan\n");
0e01c158
 				saveTextPart(mainMessage, mctx->dir, 1);
74c6f514
 				if(mainMessage != messageIn) {
 					messageDestroy(mainMessage);
 					mainMessage = NULL;
 				} else
 					messageReset(mainMessage);
2345b4cd
 				rc = OK;
c6259ac5
 			}
b151ef55
 		}
565c449d
 	} else
2345b4cd
 		rc = OK_ATTACHMENTS_NOT_SAVED;	/* nothing saved */
b151ef55
 
2250ea69
 	if(mainMessage && (mainMessage != messageIn))
 		messageDestroy(mainMessage);
 
2345b4cd
 	if((rc != FAIL) && infected)
 		rc = VIRUS;
b65d2aad
 
68be129f
 	cli_dbgmsg("parseEmailBody() returning %d\n", rc);
b151ef55
 
68be129f
 	return rc;
b151ef55
 }
 
 /*
  * Is the current line the start of a new section?
  *
  * New sections start with --boundary
  */
 static int
 boundaryStart(const char *line, const char *boundary)
 {
dbca666a
 	const char *ptr;
 	char *out;
2ed1bc5a
 	int rc;
2683ac8b
 	char buf[RFC2821LENGTH + 1];
ad642304
 
80a8c7d8
 	if(line == NULL)
 		return 0;	/* empty line */
67fac7ee
 	if(boundary == NULL)
 		return 0;
80a8c7d8
 
c29ebe66
 	/*cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary);*/
ad642304
 
4b54f2e0
 	if((*line != '-') && (*line != '('))
 		return 0;
 
 	if(strchr(line, '-') == NULL)
 		return 0;
 
273cd2bb
 	if(strlen(line) <= sizeof(buf)) {
 		out = NULL;
 		ptr = rfc822comments(line, buf);
 	} else
dbca666a
 		ptr = out = rfc822comments(line, NULL);
273cd2bb
 
ad642304
 	if(ptr == NULL)
dbca666a
 		ptr = line;
ad642304
 
 	if(*ptr++ != '-') {
273cd2bb
 		if(out)
 			free(out);
80a8c7d8
 		return 0;
ad642304
 	}
80a8c7d8
 
b151ef55
 	/*
80a8c7d8
 	 * Gibe.B3 is broken, it has:
b151ef55
 	 *	boundary="---- =_NextPart_000_01C31177.9DC7C000"
 	 * but it's boundaries look like
 	 *	------ =_NextPart_000_01C31177.9DC7C000
80a8c7d8
 	 * notice the one too few '-'.
 	 * Presumably this is a deliberate exploitation of a bug in some mail
 	 * clients.
 	 *
 	 * The trouble is that this creates a lot of false positives for
 	 * boundary conditions, if we're too lax about matches. We do our level
 	 * best to avoid these false positives. For example if we have
 	 * boundary="1" we want to ensure that we don't break out of every line
 	 * that has -1 in it instead of starting --1. This needs some more work.
f60a8d41
 	 *
 	 * Look with and without RFC822 comments stripped, I've seen some
 	 * samples where () are taken as comments in boundaries and some where
 	 * they're not. Irrespective of whatever RFC2822 says we need to find
 	 * viruses in both types of mails
b151ef55
 	 */
f60a8d41
 	if((strstr(ptr, boundary) != NULL) || (strstr(line, boundary) != NULL))
2345b4cd
 		rc = OK;
2ed1bc5a
 	else if(*ptr++ != '-')
2345b4cd
 		rc = FAIL;
2ed1bc5a
 	else
4ce9c996
 		rc = (strcasecmp(ptr, boundary) == 0);
2ed1bc5a
 
273cd2bb
 	if(out)
 		free(out);
2ed1bc5a
 
2345b4cd
 	if(rc == OK)
2ed1bc5a
 		cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line);
 
 	return rc;
b151ef55
 }
 
 /*
  * Is the current line the end?
  *
  * The message ends with with --boundary--
  */
 static int
 endOfMessage(const char *line, const char *boundary)
 {
 	size_t len;
 
98685ac1
 	if(line == NULL)
 		return 0;
c29ebe66
 	/*cli_dbgmsg("endOfMessage: line = '%s' boundary = '%s'\n", line, boundary);*/
b151ef55
 	if(*line++ != '-')
 		return 0;
 	if(*line++ != '-')
 		return 0;
 	len = strlen(boundary);
c6259ac5
 	if(strncasecmp(line, boundary, len) != 0)
 		return 0;
ef3cf57d
 	/*
 	 * Use < rather than == because some broken mails have white
 	 * space after the boundary
 	 */
39b5a552
 	if(strlen(line) < (len + 2))
b151ef55
 		return 0;
 	line = &line[len];
 	if(*line++ != '-')
 		return 0;
 	return *line == '-';
 }
 
 /*
  * Initialise the various lookup tables
  */
 static int
 initialiseTables(table_t **rfc821Table, table_t **subtypeTable)
 {
 	const struct tableinit *tableinit;
 
 	/*
 	 * Initialise the various look up tables
 	 */
 	*rfc821Table = tableCreate();
 	assert(*rfc821Table != NULL);
 
 	for(tableinit = rfc821headers; tableinit->key; tableinit++)
7b8fb055
 		if(tableInsert(*rfc821Table, tableinit->key, tableinit->value) < 0) {
 			tableDestroy(*rfc821Table);
4d9c0ca8
 			*rfc821Table = NULL;
b151ef55
 			return -1;
7b8fb055
 		}
b151ef55
 
 	*subtypeTable = tableCreate();
 	assert(*subtypeTable != NULL);
 
 	for(tableinit = mimeSubtypes; tableinit->key; tableinit++)
 		if(tableInsert(*subtypeTable, tableinit->key, tableinit->value) < 0) {
 			tableDestroy(*rfc821Table);
7b8fb055
 			tableDestroy(*subtypeTable);
4d9c0ca8
 			*rfc821Table = NULL;
 			*subtypeTable = NULL;
b151ef55
 			return -1;
 		}
 
 	return 0;
 }
 
 /*
0bcad2b1
  * If there's a HTML text version use that, otherwise
b151ef55
  * use the first text part, otherwise just use the
0bcad2b1
  * first one around. HTML text is most likely to include
  * a scripting worm
b151ef55
  *
  * If we can't find one, return -1
  */
 static int
 getTextPart(message *const messages[], size_t size)
 {
 	size_t i;
b4cb4486
 	int textpart = -1;
b151ef55
 
 	for(i = 0; i < size; i++) {
 		assert(messages[i] != NULL);
b4cb4486
 		if(messageGetMimeType(messages[i]) == TEXT) {
 			if(strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0)
 				return (int)i;
 			textpart = (int)i;
 		}
b151ef55
 	}
b4cb4486
 	return textpart;
b151ef55
 }
 
 /*
  * strip -
4d9c0ca8
  *	Remove the trailing spaces from a buffer. Don't call this directly,
  * always call strstrip() which is a wrapper to this routine to be used with
  * NUL terminated strings. This code looks a bit strange because of it's
  * heritage from code that worked on strings that weren't necessarily NUL
  * terminated.
  * TODO: rewrite for clamAV
  *
b151ef55
  * Returns it's new length (a la strlen)
  *
  * len must be int not size_t because of the >= 0 test, it is sizeof(buf)
  *	not strlen(buf)
  */
 static size_t
 strip(char *buf, int len)
 {
 	register char *ptr;
 	register size_t i;
 
 	if((buf == NULL) || (len <= 0))
4d9c0ca8
 		return 0;
b151ef55
 
 	i = strlen(buf);
 	if(len > (int)(i + 1))
4d9c0ca8
 		return i;
b151ef55
 	ptr = &buf[--len];
 
2345b4cd
 #if	defined(UNIX) || defined(C_LINUX) || defined(C_DARWIN)	/* watch - it may be in shared text area */
b151ef55
 	do
 		if(*ptr)
 			*ptr = '\0';
6b93ea0c
 	while((--len >= 0) && (!isgraph(*--ptr)) && (*ptr != '\n') && (*ptr != '\r'));
b151ef55
 #else	/* more characters can be displayed on DOS */
 	do
 #ifndef	REAL_MODE_DOS
 		if(*ptr)	/* C8.0 puts into a text area */
 #endif
 			*ptr = '\0';
 	while((--len >= 0) && ((*--ptr == '\0') || (isspace((int)*ptr))));
 #endif
 	return((size_t)(len + 1));
 }
 
 /*
  * strstrip:
  *	Strip a given string
  */
3db105a2
 size_t
b151ef55
 strstrip(char *s)
 {
 	if(s == (char *)NULL)
 		return(0);
98685ac1
 
40d54f7f
 	return(strip(s, (int)strlen(s) + 1));
b151ef55
 }
 
 static int
 parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg)
 {
0cb54827
 	char *copy, *p;
 	const char *ptr;
56d8328d
 	int commandNumber;
f2b068fb
 
b151ef55
 	cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg);
56d8328d
 
273cd2bb
 	copy = rfc822comments(cmd, NULL);
0cb54827
 	if(copy) {
 		commandNumber = tableFind(rfc821Table, copy);
 		free(copy);
e9bdeb72
 	} else
 		commandNumber = tableFind(rfc821Table, cmd);
56d8328d
 
273cd2bb
 	copy = rfc822comments(arg, NULL);
b151ef55
 
0cb54827
 	if(copy)
 		ptr = copy;
 	else
 		ptr = arg;
ad3d1172
 
56d8328d
 	switch(commandNumber) {
b151ef55
 		case CONTENT_TYPE:
 			/*
 			 * Fix for non RFC1521 compliant mailers
 			 * that send content-type: Text instead
 			 * of content-type: Text/Plain, or
 			 * just simply "Content-Type:"
 			 */
a8c7e017
 			if(arg == NULL)
f1c33aa0
 				/*
 				 * According to section 4 of RFC1521:
 				 * "Note also that a subtype specification is
 				 * MANDATORY. There are no default subtypes"
 				 *
21cd233d
 				 * We have to break this and make an assumption
f1c33aa0
 				 * for the subtype because virus writers and
 				 * email client writers don't get it right
 				 */
 				 cli_warnmsg("Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii\n");
0cb54827
 			else if(strchr(ptr, '/') == NULL)
f1c33aa0
 				/*
 				 * Empty field, such as
 				 *	Content-Type:
 				 * which I believe is illegal according to
 				 * RFC1521
 				 */
0cb54827
 				cli_dbgmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", ptr);
b151ef55
 			else {
137740e1
 				int i;
6fd6d771
 				char *mimeArgs;	/* RHS of the ; */
 
b151ef55
 				/*
 				 * Some clients are broken and
 				 * put white space after the ;
 				 */
7e572372
 				if(*arg == '/') {
 					cli_warnmsg("Content-type '/' received, assuming application/octet-stream\n");
 					messageSetMimeType(m, "application");
 					messageSetMimeSubtype(m, "octet-stream");
 				} else {
 					/*
de509b8e
 					 * The content type could be in quotes:
 					 *	Content-Type: "multipart/mixed"
 					 * FIXME: this is a hack in that ignores
 					 *	the quotes, it doesn't handle
 					 *	them properly
7e572372
 					 */
0cb54827
 					while(isspace(*ptr))
 						ptr++;
 					if(ptr[0] == '\"')
 						ptr++;
de509b8e
 
0cb54827
 					if(ptr[0] != '/') {
e9bdeb72
 						char *s;
 						char *mimeType;	/* LHS of the ; */
0cb54827
 #ifdef CL_THREAD_SAFE
862bf074
 						char *strptr = NULL;
0cb54827
 #endif
e9bdeb72
 
0cb54827
 						s = mimeType = cli_strtok(ptr, 0, ";");
de509b8e
 						/*
6fd6d771
 						 * Handle
 						 * Content-Type: foo/bar multipart/mixed
 						 * and
 						 * Content-Type: multipart/mixed foo/bar
de509b8e
 						 */
803055ea
 						if(s && *s) for(;;) {
897fd9c7
 #ifdef	CL_THREAD_SAFE
6fd6d771
 							int set = messageSetMimeType(m, strtok_r(s, "/", &strptr));
897fd9c7
 #else
 							int set = messageSetMimeType(m, strtok(s, "/"));
 #endif
6fd6d771
 
 							/*
 							 * Stephen White <stephen@earth.li>
 							 * Some clients put space after
 							 * the mime type but before
 							 * the ;
 							 */
897fd9c7
 #ifdef	CL_THREAD_SAFE
6fd6d771
 							s = strtok_r(NULL, ";", &strptr);
897fd9c7
 #else
 							s = strtok(NULL, ";");
 #endif
6fd6d771
 							if(s == NULL)
 								break;
 							if(set) {
e9bdeb72
 								size_t len = strstrip(s) - 1;
6fd6d771
 								if(s[len] == '\"') {
 									s[len] = '\0';
 									len = strstrip(s);
 								}
 								if(len) {
e9bdeb72
 									if(strchr(s, ' ')) {
 										char *t = cli_strtok(s, 0, " ");
6fd6d771
 
e9bdeb72
 										messageSetMimeSubtype(m, t);
 										free(t);
 									} else
 										messageSetMimeSubtype(m, s);
6fd6d771
 								}
de509b8e
 							}
6fd6d771
 
 							while(*s && !isspace(*s))
 								s++;
 							if(*s++ == '\0')
 								break;
 							if(*s == '\0')
 								break;
de509b8e
 						}
803055ea
 						if(mimeType)
 							free(mimeType);
e9bdeb72
 					}
7e572372
 				}
b151ef55
 
 				/*
0704dad8
 				 * Add in all rest of the the arguments.
 				 * e.g. if the header is this:
 				 * Content-Type:', arg='multipart/mixed; boundary=foo
 				 * we find the boundary argument set it
b151ef55
 				 */
137740e1
 				i = 1;
0cb54827
 				while((mimeArgs = cli_strtok(ptr, i++, ";")) != NULL) {
137740e1
 					cli_dbgmsg("mimeArgs = '%s'\n", mimeArgs);
 
6fd6d771
 					messageAddArguments(m, mimeArgs);
 					free(mimeArgs);
 				}
b151ef55
 			}
 			break;
 		case CONTENT_TRANSFER_ENCODING:
0cb54827
 			messageSetEncoding(m, ptr);
b151ef55
 			break;
 		case CONTENT_DISPOSITION:
0cb54827
 			p = cli_strtok(ptr, 0, ";");
 			if(p) {
 				if(*p) {
 					messageSetDispositionType(m, p);
 					free(p);
 					p = cli_strtok(ptr, 1, ";");
 					messageAddArgument(m, p);
 				}
 				free(p);
897fd9c7
 			}
ea7412b4
 			if((p = (char *)messageFindArgument(m, "filename")) == NULL)
 				/*
 				 * Handle this type of header, without
 				 * a filename (e.g. some Worm.Torvil.D)
 				 *	Content-ID: <nRfkHdrKsAxRU>
 				 * Content-Transfer-Encoding: base64
 				 * Content-Disposition: attachment
 				 */
 				messageAddArgument(m, "filename=unknown");
 			else
 				free(p);
b151ef55
 	}
0cb54827
 	if(copy)
 		free(copy);
b151ef55
 
f2b068fb
 	return 0;
b151ef55
 }
 
68be129f
 /*
5a01973c
  * Save the text portion of the message
  */
 static void
0e01c158
 saveTextPart(message *m, const char *dir, int destroy_text)
5a01973c
 {
1e06e1ab
 	fileblob *fb;
5a01973c
 
 	messageAddArgument(m, "filename=textportion");
0e01c158
 	if((fb = messageToFileblob(m, dir, destroy_text)) != NULL) {
5a01973c
 		/*
 		 * Save main part to scan that
 		 */
37819555
 		cli_dbgmsg("Saving main message\n");
5a01973c
 
1e06e1ab
 		fileblobDestroy(fb);
5a01973c
 	}
 }
 
90905415
 /*
e9bdeb72
  * Handle RFC822 comments in headers.
273cd2bb
  * If out == NULL, return a buffer without the comments, the caller must free
  *	the returned buffer
  * Return NULL on error or if the input * has no comments.
e9bdeb72
  * See secion 3.4.3 of RFC822
90905415
  * TODO: handle comments that go on to more than one line
  */
 static char *
273cd2bb
 rfc822comments(const char *in, char *out)
90905415
 {
 	const char *iptr;
273cd2bb
 	char *optr;
90905415
 	int backslash, inquote, commentlevel;
 
 	if(in == NULL)
e9bdeb72
 		return NULL;
90905415
 
 	if(strchr(in, '(') == NULL)
e9bdeb72
 		return NULL;
90905415
 
273cd2bb
 	assert(out != in);
 
 	if(out == NULL) {
 		out = cli_malloc(strlen(in) + 1);
 		if(out == NULL)
 			return NULL;
 	}
90905415
 
 	backslash = commentlevel = inquote = 0;
 	optr = out;
 
 	cli_dbgmsg("rfc822comments: contains a comment\n");
 
 	for(iptr = in; *iptr; iptr++)
 		if(backslash) {
0a94ffaf
 			if(commentlevel == 0)
 				*optr++ = *iptr;
90905415
 			backslash = 0;
 		} else switch(*iptr) {
 			case '\\':
 				backslash = 1;
 				break;
 			case '\"':
f60a8d41
 				*optr++ = '\"';
90905415
 				inquote = !inquote;
 				break;
 			case '(':
f60a8d41
 				if(inquote)
 					*optr++ = '(';
 				else
 					commentlevel++;
90905415
 				break;
 			case ')':
f60a8d41
 				if(inquote)
 					*optr++ = ')';
 				else if(commentlevel > 0)
90905415
 					commentlevel--;
 				break;
 			default:
 				if(commentlevel == 0)
 					*optr++ = *iptr;
 		}
 
 	if(backslash)	/* last character was a single backslash */
 		*optr++ = '\\';
 	*optr = '\0';
 
273cd2bb
 	/*strstrip(out);*/
90905415
 
 	cli_dbgmsg("rfc822comments '%s'=>'%s'\n", in, out);
 
 	return out;
 }
0674e2af
 
 /*
  * Handle RFC2047 encoding. Returns a malloc'd buffer that the caller must
  * free, or NULL on error
  */
 static char *
 rfc2047(const char *in)
 {
 	char *out, *pout;
 	size_t len;
 
95f98162
 	if((strstr(in, "=?") == NULL) || (strstr(in, "?=") == NULL))
4db74788
 		return cli_strdup(in);
0674e2af
 
 	cli_dbgmsg("rfc2047 '%s'\n", in);
 	out = cli_malloc(strlen(in) + 1);
 
 	if(out == NULL)
 		return NULL;
 
 	pout = out;
 
 	/* For each RFC2047 string */
 	while(*in) {
291ac47f
 		char encoding, *ptr, *enctext;
0674e2af
 		message *m;
 		blob *b;
 
 		/* Find next RFC2047 string */
 		while(*in) {
 			if((*in == '=') && (in[1] == '?')) {
 				in += 2;
 				break;
 			}
 			*pout++ = *in++;
 		}
 		/* Skip over charset, find encoding */
 		while((*in != '?') && *in)
 			in++;
 		if(*in == '\0')
 			break;
 		encoding = *++in;
 		encoding = tolower(encoding);
 
 		if((encoding != 'q') && (encoding != 'b')) {
a363da65
 			cli_warnmsg("Unsupported RFC2047 encoding type '%c' - if you believe this file contains a virus, submit it to www.clamav.net\n", encoding);
c3400886
 			free(out);
 			out = NULL;
0674e2af
 			break;
 		}
 		/* Skip to encoded text */
 		if(*++in != '?')
 			break;
 		if(*++in == '\0')
 			break;
 
4db74788
 		enctext = cli_strdup(in);
291ac47f
 		if(enctext == NULL) {
 			free(out);
 			out = NULL;
 			break;
 		}
0674e2af
 		in = strstr(in, "?=");
291ac47f
 		if(in == NULL) {
 			free(enctext);
0674e2af
 			break;
291ac47f
 		}
0674e2af
 		in += 2;
 		ptr = strstr(enctext, "?=");
 		assert(ptr != NULL);
 		*ptr = '\0';
 		/*cli_dbgmsg("Need to decode '%s' with method '%c'\n", enctext, encoding);*/
 
 		m = messageCreate();
37819555
 		if(m == NULL)
0674e2af
 			break;
321d5c00
 		messageAddStr(m, enctext);
291ac47f
 		free(enctext);
4d9c0ca8
 		switch(encoding) {
0674e2af
 			case 'q':
 				messageSetEncoding(m, "quoted-printable");
 				break;
 			case 'b':
 				messageSetEncoding(m, "base64");
 				break;
 		}
985cc85e
 		b = messageToBlob(m, 1);
0674e2af
 		len = blobGetDataSize(b);
dbca666a
 		cli_dbgmsg("Decoded as '%*.*s'\n", (int)len, (int)len,
 			blobGetData(b));
0674e2af
 		memcpy(pout, blobGetData(b), len);
 		blobDestroy(b);
 		messageDestroy(m);
 		if(pout[len - 1] == '\n')
 			pout += len - 1;
 		else
 			pout += len;
 
 	}
5e5a162c
 	if(out == NULL)
 		return NULL;
 
 	*pout = '\0';
0674e2af
 
5e5a162c
 	cli_dbgmsg("rfc2047 returns '%s'\n", out);
0674e2af
 	return out;
 }
 
9a7398ee
 #ifdef	PARTIAL_DIR
 /*
  * Handle partial messages
  */
 static int
 rfc1341(message *m, const char *dir)
 {
 	fileblob *fb;
3a0946f5
 	char *arg, *id, *number, *total, *oldfilename;
d85c1fad
 	const char *tmpdir;
a7a6fff4
 	char pdir[NAME_MAX + 1];
d85c1fad
 
53ee0b60
 	id = (char *)messageFindArgument(m, "id");
 	if(id == NULL)
 		return -1;
 
f3f953da
 #ifdef  C_CYGWIN
9180b8bb
 	if((tmpdir = getenv("TEMP")) == (char *)NULL)
 		if((tmpdir = getenv("TMP")) == (char *)NULL)
 			if((tmpdir = getenv("TMPDIR")) == (char *)NULL)
 				tmpdir = "C:\\";
d85c1fad
 #else
9180b8bb
 	if((tmpdir = getenv("TMPDIR")) == (char *)NULL)
 		if((tmpdir = getenv("TMP")) == (char *)NULL)
 			if((tmpdir = getenv("TEMP")) == (char *)NULL)
d85c1fad
 #ifdef	P_tmpdir
9180b8bb
 				tmpdir = P_tmpdir;
d85c1fad
 #else
9180b8bb
 				tmpdir = "/tmp";
d85c1fad
 #endif
 #endif
9a7398ee
 
a7a6fff4
 	snprintf(pdir, sizeof(pdir) - 1, "%s/clamav-partial", tmpdir);
d85c1fad
 
 	if((mkdir(pdir, 0700) < 0) && (errno != EEXIST)) {
 		cli_errmsg("Can't create the directory '%s'\n", pdir);
9a7398ee
 		return -1;
3a0946f5
 	} else {
 		struct stat statb;
 
d85c1fad
 		if(stat(pdir, &statb) < 0) {
 			cli_errmsg("Can't stat the directory '%s'\n", pdir);
3a0946f5
 			return -1;
 		}
 		if(statb.st_mode & 077)
 			cli_warnmsg("Insecure partial directory %s (mode 0%o)\n",
dbca666a
 				pdir, (int)(statb.st_mode & 0777));
9a7398ee
 	}
 
 	number = (char *)messageFindArgument(m, "number");
 	if(number == NULL) {
 		free(id);
 		return -1;
 	}
 
 	oldfilename = (char *)messageFindArgument(m, "filename");
 	if(oldfilename == NULL)
 		oldfilename = (char *)messageFindArgument(m, "name");
 
 	arg = cli_malloc(10 + strlen(id) + strlen(number));
5557e4c5
 	if(arg) {
 		sprintf(arg, "filename=%s%s", id, number);
 		messageAddArgument(m, arg);
 		free(arg);
 	}
9a7398ee
 
 	if(oldfilename) {
 		cli_warnmsg("Must reset to %s\n", oldfilename);
 		free(oldfilename);
 	}
 
985cc85e
 	if((fb = messageToFileblob(m, pdir, 0)) == NULL) {
9a7398ee
 		free(id);
 		free(number);
 		return -1;
 	}
 
 	fileblobDestroy(fb);
 
 	total = (char *)messageFindArgument(m, "total");
 	cli_dbgmsg("rfc1341: %s, %s of %s\n", id, number, (total) ? total : "?");
 	if(total) {
 		int n = atoi(number);
 		int t = atoi(total);
 		DIR *dd = NULL;
 
53ee0b60
 		free(total);
9a7398ee
 		/*
 		 * If it's the last one - reassemble it
138b73f6
 		 * FIXME: this assumes that we receive the parts in order
9a7398ee
 		 */
d85c1fad
 		if((n == t) && ((dd = opendir(pdir)) != NULL)) {
9a7398ee
 			FILE *fout;
 			char outname[NAME_MAX + 1];
7dda22c2
 			time_t now;
9a7398ee
 
dbca666a
 			sanitiseName(id);
 
9a7398ee
 			snprintf(outname, sizeof(outname) - 1, "%s/%s", dir, id);
 
 			cli_dbgmsg("outname: %s\n", outname);
 
 			fout = fopen(outname, "wb");
 			if(fout == NULL) {
138b73f6
 				cli_errmsg("Can't open '%s' for writing", outname);
9a7398ee
 				free(id);
 				free(number);
 				closedir(dd);
 				return -1;
 			}
 
7dda22c2
 			time(&now);
9a7398ee
 			for(n = 1; n <= t; n++) {
 				char filename[NAME_MAX + 1];
3a0946f5
 				const struct dirent *dent;
 #if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
a77dc192
 				union {
 					struct dirent d;
 					char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
 				} result;
3a0946f5
 #endif
9a7398ee
 
 				snprintf(filename, sizeof(filename), "%s%d", id, n);
2c7b958d
 
a77dc192
 #ifdef HAVE_READDIR_R_3
 				while((readdir_r(dd, &result.d, &dent) == 0) && dent) {
9a7398ee
 #elif defined(HAVE_READDIR_R_2)
a77dc192
 				while((dent = (struct dirent *)readdir_r(dd, &result.d))) {
2c7b958d
 #else	/*!HAVE_READDIR_R*/
9a7398ee
 				while((dent = readdir(dd))) {
 #endif
 					FILE *fin;
7dda22c2
 					char buffer[BUFSIZ], fullname[NAME_MAX + 1];
9a7398ee
 					int nblanks;
7dda22c2
 					struct stat statb;
dbca666a
 					extern short cli_leavetemps_flag;
9a7398ee
 
4d42fc2f
 #ifndef  C_CYGWIN
9a7398ee
 					if(dent->d_ino == 0)
 						continue;
f3f953da
 #endif
9a7398ee
 
5557e4c5
 					snprintf(fullname, sizeof(fullname) - 1,
 						"%s/%s", pdir, dent->d_name);
 
7dda22c2
 					if(strncmp(filename, dent->d_name, strlen(filename)) != 0) {
 						if(!cli_leavetemps_flag)
 							continue;
 						if(stat(fullname, &statb) < 0)
 							continue;
 						if(now - statb.st_mtime > (time_t)(7 * 24 * 3600))
 							if(unlink(fullname) >= 0)
 								cli_warnmsg("removed old RFC1341 file %s\n", fullname);
9a7398ee
 						continue;
7dda22c2
 					}
9a7398ee
 
7dda22c2
 					fin = fopen(fullname, "rb");
9a7398ee
 					if(fin == NULL) {
7dda22c2
 						cli_errmsg("Can't open '%s' for reading", fullname);
9a7398ee
 						fclose(fout);
 						unlink(outname);
 						free(id);
 						free(number);
 						closedir(dd);
 						return -1;
 					}
 					nblanks = 0;
eddb5dbb
 					while(fgets(buffer, sizeof(buffer) - 1, fin) != NULL)
9a7398ee
 						/*
 						 * Ensure that trailing newlines
 						 * aren't copied
 						 */
53ee0b60
 						if(buffer[0] == '\n')
9a7398ee
 							nblanks++;
53ee0b60
 						else {
9a7398ee
 							if(nblanks)
 								do
 									putc('\n', fout);
 								while(--nblanks > 0);
 							fputs(buffer, fout);
 						}
 					fclose(fin);
d85c1fad
 
 					/* don't unlink if leave temps */
 					if(!cli_leavetemps_flag)
7dda22c2
 						unlink(fullname);
9a7398ee
 					break;
 				}
 				rewinddir(dd);
 			}
 			closedir(dd);
 			fclose(fout);
 		}
 	}
53ee0b60
 	free(number);
9a7398ee
 	free(id);
 
 	return 0;
 }
 #endif
 
7133ee9d
 #ifdef CL_EXPERIMENTAL
 static void
 hrefs_done(blob *b, tag_arguments_t *hrefs)
 {
 	if(b)
 		blobDestroy(b);
 	html_tag_arg_free(hrefs);
 }
 
 /*
  * This used to be part of checkURLs, split out, because phishingScan needs it
  * too, and phishingScan might be used in situations where checkURLs is
  * disabled (see ifdef)
  */
 static blob *
 getHrefs(message *m, tag_arguments_t *hrefs)
 {
aea1b159
 	blob *b = messageToBlob(m, 0);
7133ee9d
 	size_t len;
 
 	if(b == NULL)
 		return NULL;
 
 	len = blobGetDataSize(b);
 
 	if(len == 0) {
 		blobDestroy(b);
 		return NULL;
 	}
 
 	/* TODO: make this size customisable */
 	if(len > 100*1024) {
12cc3b41
 		cli_warnmsg("Viruses pointed to by URLs not scanned in large message\n");
7133ee9d
 		blobDestroy(b);
 		return NULL;
 	}
 
 	hrefs->count = 0;
 	hrefs->tag = hrefs->value = NULL;
 	hrefs->contents = NULL;
 
aea1b159
 	cli_dbgmsg("getHrefs: calling html_normalise_mem\n");
e1bbfed7
 	if(!html_normalise_mem(blobGetData(b), (off_t)len, NULL, hrefs)) {
7133ee9d
 		blobDestroy(b);
 		return NULL;
 	}
aea1b159
 	cli_dbgmsg("getHrefs: html_normalise_mem returned\n");
7133ee9d
 
 	/* TODO: Do we need to call remove_html_comments? */
 	return b;
 }
 
 static void
2345b4cd
 checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html)
7133ee9d
 {
89343098
 	tag_arguments_t hrefs;
 	blob *b;
7133ee9d
 
19a2af6d
 	/* aCaB: stripped GA related stuff */
 	hrefs.scanContents = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS;
7133ee9d
 
 #if    (!defined(FOLLOWURLS)) || (FOLLOWURLS <= 0)
89343098
 	if(!hrefs.scanContents)
52634964
 		/*
 		 * Don't waste time extracting hrefs (parsing html), nobody
 		 * will need it
 		 */
7133ee9d
 		return;
 #endif
 
89343098
 	hrefs.count = 0;
 	hrefs.tag = hrefs.value = NULL;
 	hrefs.contents = NULL;
 
 	b = getHrefs(mainMessage, &hrefs);
 	if(b) {
2345b4cd
 		if(hrefs.scanContents /*mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS*/) {
89343098
 			if(phishingScan(mainMessage, mctx->dir, mctx->ctx, &hrefs) == CL_VIRUS) {
 				mainMessage->isInfected = TRUE;
2345b4cd
 				*rc = VIRUS;
89343098
 				cli_dbgmsg("PH:Phishing found\n");
 			}
 		}
2345b4cd
 		if(is_html && (mctx->ctx->options&CL_SCAN_MAILURL) && (*rc != VIRUS))
89343098
 			do_checkURLs(mainMessage, mctx->dir, &hrefs);
 	}
 	hrefs_done(b,&hrefs);
7133ee9d
 }
 
 #if	defined(FOLLOWURLS) && (FOLLOWURLS > 0)
 static void
 do_checkURLs(message *m, const char *dir, tag_arguments_t *hrefs)
 {
 	table_t *t;
 	int i, n;
90343a0f
 #ifdef	CL_THREAD_SAFE
7133ee9d
 	pthread_t tid[FOLLOWURLS];
 	struct arg args[FOLLOWURLS];
 #endif
 
 	t = tableCreate();
 	if(t == NULL)
 		return;
 
 	n = 0;
 
 	for(i = 0; i < hrefs->count; i++) {
 		const char *url = (const char *)hrefs->value[i];
 
 		/*
 		 * TODO: If it's an image source, it'd be nice to note beacons
 		 *	where width="0" height="0", which needs support from
 		 *	the HTML normalise code
 		 */
 		if(strncasecmp("http://", url, 7) == 0) {
 			char *ptr;
 #ifndef	CL_THREAD_SAFE
 			struct arg arg;
 #endif
 			char name[NAME_MAX + 1];
 
 			if(tableFind(t, url) == 1) {
 				cli_dbgmsg("URL %s already downloaded\n", url);
 				continue;
 			}
 			/*
 			 * What about foreign character spoofing?
 			 * It would be useful be able to check if url
 			 *	is the same as the text displayed, e.g.
 			 *	<a href="http://dodgy.biz">www.paypal.com</a>
 			 *	but that needs support from HTML normalise
 			 */
 			if(strchr(url, '%') && strchr(url, '@'))
 				cli_warnmsg("Possible URL spoofing attempt noticed, but not yet handled (%s)\n", url);
 
 			if(n == FOLLOWURLS) {
 				cli_warnmsg("URL %s will not be scanned\n", url);
 				break;
 			}
 
 			(void)tableInsert(t, url, 1);
 			cli_dbgmsg("Downloading URL %s to be scanned\n", url);
 			strncpy(name, url, sizeof(name) - 1);
 			name[sizeof(name) - 1] = '\0';
 			for(ptr = name; *ptr; ptr++)
 				if(*ptr == '/')
 					*ptr = '_';
 
 #ifdef	CL_THREAD_SAFE
 			args[n].dir = dir;
4db74788
 			args[n].url = cli_strdup(url);
 			args[n].filename = cli_strdup(name);
7133ee9d
 			pthread_create(&tid[n], NULL, getURL, &args[n]);
 #else
4db74788
 			arg.url = cli_strdup(url);
7133ee9d
 			arg.dir = dir;
 			arg.filename = name;
 			getURL(&arg);
b53134f5
 			free(arg.url);
7133ee9d
 #endif
 			++n;
 		}
 	}
 	tableDestroy(t);
 
90343a0f
 #ifdef	CL_THREAD_SAFE
7133ee9d
 	assert(n <= FOLLOWURLS);
 	cli_dbgmsg("checkURLs: waiting for %d thread(s) to finish\n", n);
 	while(--n >= 0) {
 		pthread_join(tid[n], NULL);
 		free(args[n].filename);
b53134f5
 		free(args[n].url);
7133ee9d
 	}
 #endif
 }
 #else
 static void
 do_checkURLs(message *m, const char *dir, tag_arguments_t *hrefs)
 {
 }
 #endif
 
 #else	/*!CL_EXPERIMENTAL*/
 
f52d7358
 #if	defined(FOLLOWURLS) && (FOLLOWURLS > 0)
c5ed8336
 static void
2345b4cd
 checkURLs(message *m, mbox_ctx *mctx, mbox_status *rc, int is_html)
c5ed8336
 {
985cc85e
 	blob *b = messageToBlob(m, 0);
c5ed8336
 	size_t len;
6b93ea0c
 	table_t *t;
6da40aa1
 	int i, n;
314ff77b
 #if	defined(WITH_CURL) && defined(CL_THREAD_SAFE)
f52d7358
 	pthread_t tid[FOLLOWURLS];
 	struct arg args[FOLLOWURLS];
314ff77b
 #endif
6da40aa1
 	tag_arguments_t hrefs;
c5ed8336
 
 	if(b == NULL)
 		return;
 
 	len = blobGetDataSize(b);
 
e94471f4
 	if(len == 0) {
 		blobDestroy(b);
3eb12bae
 		return;
e94471f4
 	}
3eb12bae
 
6b93ea0c
 	/* TODO: make this size customisable */
 	if(len > 100*1024) {
 		cli_warnmsg("Viruses pointed to by URL not scanned in large message\n");
 		blobDestroy(b);
e94471f4
 		return;
6b93ea0c
 	}
 
 	t = tableCreate();
3a0ef2ee
 	if(t == NULL) {
 		blobDestroy(b);
 		return;
 	}
3eb12bae
 
a2d786fc
 	hrefs.count = 0;
 	hrefs.tag = hrefs.value = NULL;
6da40aa1
 
 	cli_dbgmsg("checkURLs: calling html_normalise_mem\n");
3a0ef2ee
 	if(!html_normalise_mem(blobGetData(b), len, NULL, &hrefs)) {
7d3d11d0
 		blobDestroy(b);
 		tableDestroy(t);
 		return;
3a0ef2ee
 	}
 	cli_dbgmsg("checkURLs: html_normalise_mem returned\n");
 
e745ac7e
 	/* TODO: Do we need to call remove_html_comments? */
6b93ea0c
 
6da40aa1
 	n = 0;
 
 	for(i = 0; i < hrefs.count; i++) {
a77dc192
 		const char *url = (const char *)hrefs.value[i];
6da40aa1
 
5927cd86
 		/*
 		 * TODO: If it's an image source, it'd be nice to note beacons
 		 *	where width="0" height="0", which needs support from
 		 *	the HTML normalise code
 		 */
6da40aa1
 		if(strncasecmp("http://", url, 7) == 0) {
 			char *ptr;
314ff77b
 #ifdef	WITH_CURL
 #ifndef	CL_THREAD_SAFE
 			struct arg arg;
 #endif
 
 #else	/*!WITH_CURL*/
bf6f653d
 #ifdef	CL_THREAD_SAFE
 			static pthread_mutex_t system_mutex = PTHREAD_MUTEX_INITIALIZER;
 #endif
6b93ea0c
 			struct stat statb;
 			char cmd[512];
314ff77b
 #endif	/*WITH_CURL*/
a95c894a
 			char name[NAME_MAX + 1];
6da40aa1
 
 			if(tableFind(t, url) == 1) {
 				cli_dbgmsg("URL %s already downloaded\n", url);
f2b068fb
 				continue;
 			}
5927cd86
 			/*
 			 * What about foreign character spoofing?
 			 * It would be useful be able to check if url
 			 *	is the same as the text displayed, e.g.
 			 *	<a href="http://dodgy.biz">www.paypal.com</a>
 			 *	but that needs support from HTML normalise
 			 */
 			if(strchr(url, '%') && strchr(url, '@'))
2959f29c
 				cli_warnmsg("Possible URL spoofing attempt noticed, but not yet handled (%s)\n", url);
5927cd86
 
eddb5dbb
 			if(n == FOLLOWURLS) {
 				cli_warnmsg("URL %s will not be scanned\n", url);
 				break;
 			}
 
6da40aa1
 			(void)tableInsert(t, url, 1);
 			cli_dbgmsg("Downloading URL %s to be scanned\n", url);
2176c0e5
 			strncpy(name, url, sizeof(name) - 1);
39d09964
 			name[sizeof(name) - 1] = '\0';
6da40aa1
 			for(ptr = name; *ptr; ptr++)
 				if(*ptr == '/')
 					*ptr = '_';
c5ed8336
 
da812a6a
 #ifdef	WITH_CURL
314ff77b
 #ifdef	CL_THREAD_SAFE
12cc3b41
 			args[n].curl = curl_easy_init();
 			if(args[n].curl == NULL) {
 				cli_errmsg("curl_easy_init failed\n");
 				continue;
 			}
c017364c
 			args[n].dir = mctx->dir;
a95c894a
 			args[n].url = url;
4db74788
 			args[n].filename = cli_strdup(name);
314ff77b
 			pthread_create(&tid[n], NULL, getURL, &args[n]);
 #else
12cc3b41
 			/* easy isn't the word I'd use... */
 			arg.curl = curl_easy_init();
 			if(arg.curl == NULL) {
 				cli_errmsg("curl_easy_init failed\n");
 				continue;
 			}
6da40aa1
 			arg.url = url;
c017364c
 			arg.dir = mctx->dir;
314ff77b
 			arg.filename = name;
 			getURL(&arg);
12cc3b41
 			curl_easy_cleanup(arg.curl);
314ff77b
 #endif
 
ca31cc3d
 #else	/*!WITH_CURL*/
 			cli_warnmsg("The use of mail-follow-urls without CURL being installed is deprecated\n");
3fa72383
 			/*
 			 * TODO: maximum size and timeouts
 			 */
c017364c
 			len = sizeof(cmd) - 26 - strlen(mctx->dir) - strlen(name);
2176c0e5
 #ifdef	CL_DEBUG
c017364c
 			snprintf(cmd, sizeof(cmd) - 1, "GET -t10 \"%.*s\" >%s/%s", len, url, mctx->dir, name);
2176c0e5
 #else
c017364c
 			snprintf(cmd, sizeof(cmd) - 1, "GET -t10 \"%.*s\" >%s/%s 2>/dev/null", len, url, mctx->dir, name);
2176c0e5
 #endif
 			cmd[sizeof(cmd) - 1] = '\0';
 
c5ed8336
 			cli_dbgmsg("%s\n", cmd);
bf6f653d
 #ifdef	CL_THREAD_SAFE
 			pthread_mutex_lock(&system_mutex);
 #endif
c5ed8336
 			system(cmd);
bf6f653d
 #ifdef	CL_THREAD_SAFE
 			pthread_mutex_unlock(&system_mutex);
 #endif
c017364c
 			snprintf(cmd, sizeof(cmd), "%s/%s", mctx->dir, name);
bf6f653d
 			if(stat(cmd, &statb) >= 0)
 				if(statb.st_size == 0) {
6da40aa1
 					cli_warnmsg("URL %s failed to download\n", url);
bf6f653d
 					/*
 					 * Don't bother scanning an empty file
 					 */
 					(void)unlink(cmd);
 				}
3fa72383
 #endif
314ff77b
 			++n;
c5ed8336
 		}
 	}
 	blobDestroy(b);
f2b068fb
 	tableDestroy(t);
314ff77b
 
 #if	defined(WITH_CURL) && defined(CL_THREAD_SAFE)
f52d7358
 	assert(n <= FOLLOWURLS);
314ff77b
 	cli_dbgmsg("checkURLs: waiting for %d thread(s) to finish\n", n);
 	while(--n >= 0) {
 		pthread_join(tid[n], NULL);
 		free(args[n].filename);
12cc3b41
 		curl_easy_cleanup(args[n].curl);
314ff77b
 	}
 #endif
a95c894a
 	html_tag_arg_free(&hrefs);
c5ed8336
 }
 
7133ee9d
 #else
 
 static void
7e503f45
 checkURLs(message *m, mbox_ctx *mctx, mbox_status* rc, int is_html)
7133ee9d
 {
 }
 #endif
e0e62fa1
 #endif /* CL_EXPERIMENTAL */
7133ee9d
 
12cc3b41
 #if	defined(FOLLOWURLS) && (FOLLOWURLS > 0)
90ad7db0
 /*
  * Includes some Win32 patches by Gianluigi Tiesi <sherpya@netfarm.it>
8d7066f4
  *
  * FIXME: Often WMF exploits work by sending people an email directing them
  *	to a page which displays a picture containing the exploit. This is not
  *	currently found, since only the HTML on the referred page is downloaded.
  *	It would be useful to scan the HTML for references to pictures and
  *	download them for scanning. But that will hit performance so there is
  *	an issue here.
90ad7db0
  */
52634964
 
90343a0f
 #if	defined(CL_EXPERIMENTAL) || (!defined(WITH_CURL))
 
256a86cb
 /*
  * Removing the reliance on libcurl
  * Includes some of the freshclam hacks by Everton da Silva Marques
  * everton.marques@gmail.com>
  */
 #ifndef timercmp
89343098
 # define timercmp(a, b, cmp)	  \
   (((a)->tv_sec == (b)->tv_sec) ?	\
256a86cb
    ((a)->tv_usec cmp (b)->tv_usec) :  \
    ((a)->tv_sec cmp (b)->tv_sec))
 #endif /* timercmp */
 
 #ifndef timersub
89343098
 # define timersub(a, b, result)	 \
   do {				\
 	(result)->tv_sec = (a)->tv_sec - (b)->tv_sec;	\
 	(result)->tv_usec = (a)->tv_usec - (b)->tv_usec;  \
 	if ((result)->tv_usec < 0) {			\
 		--(result)->tv_sec;			 \
 		(result)->tv_usec += 1000000;		 \
 	}						 \
256a86cb
   } while (0)
 #endif /* timersub */
 
63bbf0ec
 static	long	nonblock_fcntl(int sock);
 static	void	restore_fcntl(int sock, long fcntl_flags);
 static	int	nonblock_connect(int sock, const struct sockaddr *addr, socklen_t addrlen, int secs);
 static	int	connect_error(int sock);
6feb4a59
 static	int	my_r_gethostbyname(const char *hostname, struct hostent *hp, char *buf, size_t len);
256a86cb
 
b53134f5
 #define NONBLOCK_SELECT_MAX_FAILURES	3
 #define NONBLOCK_MAX_BOGUS_LOOPS	10
63bbf0ec
 
256a86cb
 static void *
 #ifdef	CL_THREAD_SAFE
 getURL(void *a)
 #else
 getURL(struct arg *arg)
 #endif
 {
 	FILE *fp;
 #ifdef	CL_THREAD_SAFE
 	struct arg *arg = (struct arg *)a;
 #endif
 	const char *url = arg->url;
 	const char *dir = arg->dir;
 	const char *filename = arg->filename;
 	char fout[NAME_MAX + 1];
e1bbfed7
 #ifdef	C_WINDOWS
 	SOCKET sd;
 #else
 	int sd;
 #endif
 	int n;
256a86cb
 	struct sockaddr_in server;
e1bbfed7
 #ifdef	HAVE_IN_ADDR_T
256a86cb
 	in_addr_t ip;
e1bbfed7
 #else
90780776
 	unsigned int ip;
e1bbfed7
 #endif
256a86cb
 	char buf[BUFSIZ];
 	char site[BUFSIZ];
63bbf0ec
 	in_port_t port;
 	static in_port_t default_port;
 	static int tcp;
27fb3ba8
 	int doingsite, firstpacket;
256a86cb
 	char *ptr;
b53134f5
 	int flags, via_proxy;
256a86cb
 	const char *proxy;
 
6feb4a59
 	if(strlen(url) > (sizeof(site) - 1)) {
 		cli_dbgmsg("Ignoring long URL \"%s\"\n", url);
 		return NULL;
 	}
 
256a86cb
 	snprintf(fout, sizeof(fout) - 1, "%s/%s", dir, filename);
 
 	fp = fopen(fout, "wb");
 
 	if(fp == NULL) {
 		cli_errmsg("Can't open '%s' for writing", fout);
 		return NULL;
 	}
6feb4a59
 	cli_dbgmsg("Saving %s to %s\n", url, fout);
63bbf0ec
 
0a266df3
 #ifndef	C_BEOS
63bbf0ec
 	if(tcp == 0) {
 		const struct protoent *proto = getprotobyname("tcp");
 
 		if(proto == NULL) {
 			cli_warnmsg("Unknown prototol tcp, check /etc/protocols\n");
 			fclose(fp);
 			return NULL;
 		}
 		tcp = proto->p_proto;
e1bbfed7
 #ifndef	C_WINDOWS
27fb3ba8
 		endprotoent();
e1bbfed7
 #endif
63bbf0ec
 	}
0a266df3
 #endif
63bbf0ec
 	if(default_port == 0) {
 		const struct servent *servent = getservbyname("http", "tcp");
 
 		if(servent)
 			default_port = (in_port_t)ntohs(servent->s_port);
 		else
 			default_port = 80;
e84f1e26
 #if	!defined(C_WINDOWS) && !defined(C_BEOS)
63bbf0ec
 		endservent();
e1bbfed7
 #endif
63bbf0ec
 	}
 	port = default_port;
 
6feb4a59
 	doingsite = 1;
 	ptr = site;
 
256a86cb
 	proxy = getenv("http_proxy");	/* FIXME: handle no_proxy */
b53134f5
 
 	via_proxy = (proxy && *proxy);
 
 	if(via_proxy) {
256a86cb
 		if(strncasecmp(proxy, "http://", 7) != 0) {
 			cli_warnmsg("Unsupported proxy protocol\n");
 			fclose(fp);
 			return NULL;
 		}
 
6feb4a59
 		cli_dbgmsg("Getting %s via %s\n", url, proxy);
 
256a86cb
 		proxy += 7;
 		while(*proxy) {
 			if(doingsite && (*proxy == ':')) {
 				port = 0;
 				while(isdigit(*++proxy)) {
 					port *= 10;
 					port += *proxy - '0';
 				}
 				continue;
 			}
 			if(doingsite && (*proxy == '/')) {
 				proxy++;
 				break;
 			}
 			*ptr++ = *proxy++;
 		}
 	} else {
 		cli_dbgmsg("Getting %s\n", url);
 
 		if(strncasecmp(url, "http://", 7) != 0) {
 			cli_warnmsg("Unsupported protocol\n");
 			fclose(fp);
 			return NULL;
 		}
 
 		url += 7;
 		while(*url) {
 			if(doingsite && (*url == ':')) {
 				port = 0;
 				while(isdigit(*++url)) {
 					port *= 10;
 					port += *url - '0';
 				}
 				continue;
 			}
 			if(doingsite && (*url == '/')) {
 				url++;
 				break;
 			}
 			*ptr++ = *url++;
 		}
6feb4a59
 	}
 	*ptr = '\0';
256a86cb
 
6feb4a59
 	memset((char *)&server, '\0', sizeof(struct sockaddr_in));
 	server.sin_family = AF_INET;
 	server.sin_port = (in_port_t)htons(port);
256a86cb
 
6feb4a59
 	ip = inet_addr(site);
256a86cb
 #ifdef	INADDR_NONE
6feb4a59
 	if(ip == INADDR_NONE) {
256a86cb
 #else
6feb4a59
 	if(ip == (in_addr_t)-1) {
256a86cb
 #endif
6feb4a59
 		struct hostent h;
27fb3ba8
 
 		if((my_r_gethostbyname(site, &h, buf, sizeof(buf)) != 0) ||
 		   (h.h_addr_list == NULL) ||
 		   (h.h_addr == NULL)) {
6feb4a59
 			cli_dbgmsg("Unknown host %s\n", site);
256a86cb
 			fclose(fp);
 			return NULL;
 		}
 
6feb4a59
 		memcpy((char *)&ip, h.h_addr, sizeof(ip));
256a86cb
 	}
6feb4a59
 	server.sin_addr.s_addr = ip;
 	if((sd = socket(AF_INET, SOCK_STREAM, tcp)) < 0) {
 		fclose(fp);
 		return NULL;
 	}
 	flags = nonblock_fcntl(sd);
 	if(nonblock_connect(sd, (struct sockaddr *)&server, sizeof(struct sockaddr_in), 5) < 0) {
e1bbfed7
 		closesocket(sd);
6feb4a59
 		fclose(fp);
 		return NULL;
 	}
 
 	restore_fcntl(sd, flags);
 	/*
 	 * TODO: consider HTTP/1.1
 	 */
b53134f5
 	if(via_proxy)
6feb4a59
 		snprintf(buf, sizeof(buf) - 1,
 			"GET %s HTTP/1.0\nUser-Agent: www.clamav.net\n\n", url);
 	else
 		snprintf(buf, sizeof(buf) - 1,
 			"GET /%s HTTP/1.0\nUser-Agent: www.clamav.net\n\n", url);
256a86cb
 
90343a0f
 	/*cli_dbgmsg("%s", buf);*/
b53134f5
 
e1bbfed7
 	if(send(sd, buf, (int)strlen(buf), 0) < 0) {
 		closesocket(sd);
256a86cb
 		fclose(fp);
 		return NULL;
 	}
 
e1bbfed7
 #ifdef	SHUT_WR
256a86cb
 	shutdown(sd, SHUT_WR);
e1bbfed7
 #else
 	shutdown(sd, 1);
 #endif
256a86cb
 
27fb3ba8
 	firstpacket = 1;
 
256a86cb
 	for(;;) {
 		fd_set set;
 		struct timeval tv;
 
 		FD_ZERO(&set);
 		FD_SET(sd, &set);
 
 		tv.tv_sec = 30;	/* FIXME: make this customisable */
 		tv.tv_usec = 0;
 
 		if(select(sd + 1, &set, NULL, NULL, &tv) < 0) {
 			if(errno == EINTR)
 				continue;
e1bbfed7
 			closesocket(sd);
256a86cb
 			fclose(fp);
 			return NULL;
 		}
 		if(!FD_ISSET(sd, &set)) {
 			fclose(fp);
e1bbfed7
 			closesocket(sd);
256a86cb
 			return NULL;
 		}
 		n = recv(sd, buf, BUFSIZ, 0);
27fb3ba8
 
256a86cb
 		if(n < 0) {
 			fclose(fp);
e1bbfed7
 			closesocket(sd);
256a86cb
 			return NULL;
 		}
 		if(n == 0)
 			break;
f0187ba2
 
 		/*
 		 * FIXME: Handle header in more than one packet
 		 */
27fb3ba8
 		if(firstpacket) {
 			char *statusptr;
 
 			buf[n] = '\0';
 
 			statusptr = cli_strtok(buf, 1, " ");
 
 			if(statusptr) {
 				int status = atoi(statusptr);
 
 				cli_dbgmsg("HTTP status %d\n", status);
 
 				free(statusptr);
b53134f5
 
 				if((status == 301) || (status == 302)) {
 					char *location;
 
 					location = strstr(buf, "\nLocation: ");
 
 					if(location) {
 						char *end;
 
 						fclose(fp);
e1bbfed7
 						closesocket(sd);
b53134f5
 						unlink(fout);
 
 						location += 11;
 						free(arg->url);
 						end = location;
 						while(*end && (*end != '\n'))
 							end++;
 						*end = '\0';
4db74788
 						arg->url = cli_strdup(location);
b53134f5
 						cli_dbgmsg("Redirecting to %s\n", arg->url);
 						return getURL(arg);
 					}
 				}
27fb3ba8
 			}
f0187ba2
 			/*
 			 * Don't write the HTTP header
 			 */
 			ptr = strstr(buf, "\n\n");
 			if(ptr != NULL) {
 				ptr += 2;
 				n -= (int)(ptr - buf);
 			} else
 				ptr = buf;
 
27fb3ba8
 			firstpacket = 0;
f0187ba2
 		} else
 			ptr = buf;
27fb3ba8
 
f0187ba2
 		if(fwrite(ptr, n, 1, fp) != 1) {
256a86cb
 			cli_warnmsg("Error writing %d bytes to %s\n",
 				n, fout);
 			break;
 		}
 	}
 
 	fclose(fp);
e1bbfed7
 	closesocket(sd);
256a86cb
 	return NULL;
 }
 
6feb4a59
 /*
  * Have a copy here because r_gethostbyname is in shared not libclamav :-(
  */
 static int
 my_r_gethostbyname(const char *hostname, struct hostent *hp, char *buf, size_t len)
 {
 #if	defined(HAVE_GETHOSTBYNAME_R_6)
 	/* e.g. Linux */
 	struct hostent *hp2;
 	int ret = -1;
 
 	if((hostname == NULL) || (hp == NULL))
 		return -1;
 	if(gethostbyname_r(hostname, hp, buf, len, &hp2, &ret) < 0)
 		return ret;
 #elif	defined(HAVE_GETHOSTBYNAME_R_5)
 	/* e.g. BSD, Solaris, Cygwin */
e84f1e26
 	/*
 	 * Configure doesn't work on BeOS. We need -lnet to link, but configure
 	 * doesn't add it, so you need to do something like
 	 *	LIBS=-lnet ./configure --enable-cache --disable-clamav
 	 */
6feb4a59
 	int ret = -1;
 
 	if((hostname == NULL) || (hp == NULL))
 		return -1;
 	if(gethostbyname_r(hostname, hp, buf, len, &ret) == NULL)
 		return ret;
 #elif	defined(HAVE_GETHOSTBYNAME_R_3)
 	/* e.g. HP/UX, AIX */
 	if((hostname == NULL) || (hp == NULL))
 		return -1;
 	if(gethostbyname_r(hostname, &hp, (struct hostent_data *)buf) < 0)
 		return h_errno;
 #else
e1bbfed7
 	/* Single thread the code e.g. VS2005 */
6feb4a59
 	struct hostent *hp2;
 #ifdef  CL_THREAD_SAFE
 	static pthread_mutex_t hostent_mutex = PTHREAD_MUTEX_INITIALIZER;
 #endif
 
 	if((hostname == NULL) || (hp == NULL))
 		return -1;
 #ifdef  CL_THREAD_SAFE
 	pthread_mutex_lock(&hostent_mutex);
 #endif
 	if((hp2 = gethostbyname(hostname)) == NULL) {
 #ifdef  CL_THREAD_SAFE
 		pthread_mutex_unlock(&hostent_mutex);
 #endif
 		return h_errno;
 	}
 	memcpy(hp, hp2, sizeof(struct hostent));
 #ifdef  CL_THREAD_SAFE
 	pthread_mutex_unlock(&hostent_mutex);
 #endif
 
 #endif
 	return 0;
 }
 
256a86cb
 static long
 nonblock_fcntl(int sock)
 {
e1bbfed7
 #ifdef	F_GETFL
256a86cb
 	long fcntl_flags;	/* Save fcntl() flags */
 
 	fcntl_flags = fcntl(sock, F_GETFL, 0);
 	if(fcntl_flags < 0)
 		cli_warnmsg("nonblock_fcntl: saving: fcntl(%d, F_GETFL): errno=%d: %s\n",
 			sock, errno, strerror(errno));
 	else if(fcntl(sock, F_SETFL, fcntl_flags | O_NONBLOCK))
 		cli_warnmsg("nonblock_fcntl: fcntl(%d, F_SETFL, O_NONBLOCK): errno=%d: %s\n",
 			sock, errno, strerror(errno));
 
 	return fcntl_flags;
e1bbfed7
 #else
 	return 0L;
 #endif
256a86cb
 }
 
 static void
 restore_fcntl(int sock, long fcntl_flags)
 {
e1bbfed7
 #ifdef	F_SETFL
27fb3ba8
 	if(fcntl_flags != -1)
 		if(fcntl(sock, F_SETFL, fcntl_flags)) {
256a86cb
 			cli_warnmsg("restore_fcntl: restoring: fcntl(%d, F_SETFL): errno=%d: %s\n",
 				sock, errno, strerror(errno));
 		}
e1bbfed7
 #endif
256a86cb
 }
 
 static int
 nonblock_connect(int sock, const struct sockaddr *addr, socklen_t addrlen, int secs)
 {
 	/* Max. of unexpected select() failures */
 	int select_failures = NONBLOCK_SELECT_MAX_FAILURES;
 	/* Max. of useless loops */
 	int bogus_loops = NONBLOCK_MAX_BOGUS_LOOPS;
27fb3ba8
 	struct timeval timeout;	/* When we should time out */
256a86cb
 	int numfd;		/* Highest fdset fd plus 1 */
 
 	/* Calculate into 'timeout' when we should time out */
 	gettimeofday(&timeout, 0);
 	timeout.tv_sec += secs;
 
 	/* Launch (possibly) non-blocking connect() request */
 	if(connect(sock, addr, addrlen)) {
 		int e = errno;
 		cli_dbgmsg("DEBUG nonblock_connect: connect(): fd=%d errno=%d: %s\n",
 			sock, e, strerror(e));
 		switch (e) {
 			case EALREADY:
 			case EINPROGRESS:
 				break; /* wait for connection */
 			case EISCONN:
 				return 0; /* connected */
 			default:
 				cli_warnmsg("nonblock_connect: connect(): fd=%d errno=%d: %s\n",
 					sock, e, strerror(e));
 				return -1; /* failed */
 		}
 	} else
 		return connect_error(sock);
 
 	numfd = sock + 1; /* Highest fdset fd plus 1 */
 
 	for (;;) {
 		fd_set fds;
 		struct timeval now;
 		struct timeval wait;
 		int n;
 
 		/* Force timeout if we ran out of time */
 		gettimeofday(&now, 0);
 		if (timercmp(&now, &timeout, >)) {
 			cli_warnmsg("connect timing out (%d secs)\n",
 				secs);
 			break; /* failed */
 		}
 
 		/* Calculate into 'wait' how long to wait */
 		timersub(&timeout, &now, &wait); /* wait = timeout - now */
 
 		/* Init fds with 'sock' as the only fd */
 		FD_ZERO(&fds);
 		FD_SET(sock, &fds);
 
 		n = select(numfd, 0, &fds, 0, &wait);
 		if (n < 0) {
 			cli_warnmsg("nonblock_connect: select() failure %d: errno=%d: %s\n",
 				select_failures, errno, strerror(errno));
 			if (--select_failures >= 0)
 				continue; /* keep waiting */
 			break; /* failed */
 		}
 
 		cli_dbgmsg("DEBUG nonblock_connect: select = %d\n", n);
 
63bbf0ec
 		if(n)
256a86cb
 			return connect_error(sock);
 
 		/* Select returned, but there is no work to do... */
 		if (--bogus_loops < 0) {
 			cli_warnmsg("nonblock_connect: giving up due to excessive bogus loops\n");
 			break; /* failed */
 		}
 
 	} /* for loop: keep waiting */
 
 	return -1; /* failed */
 }
 
 static int
 connect_error(int sock)
 {
67513511
 #ifdef	SO_ERROR
256a86cb
 	int optval;
 	socklen_t optlen;
 
 	optlen = sizeof(optval);
 	getsockopt(sock, SOL_SOCKET, SO_ERROR, &optval, &optlen);
 
 	if(optval)
 		cli_warnmsg("connect_error: getsockopt(SO_ERROR): fd=%d error=%d: %s\n",
 			sock, optval, strerror(optval));
 
 	return optval ? -1 : 0;
67513511
 #else
 	return 0;
 #endif
256a86cb
 }
 
 #else
 
52634964
 static	int	curl_has_segfaulted;
 /*
  * Inspite of numerious bug reports, curl is still buggy :-(
  *	For a fuller explanation, read the long comment at the top, including
  *	the valgrind evidence
  */
 static void
 curlsegv(int sig)
 {
 	curl_has_segfaulted = 1;
 }
 
314ff77b
 static void *
 #ifdef	CL_THREAD_SAFE
 getURL(void *a)
 #else
 getURL(struct arg *arg)
 #endif
3fa72383
 {
 	FILE *fp;
6b93ea0c
 	struct curl_slist *headers;
314ff77b
 #ifdef	CL_THREAD_SAFE
 	struct arg *arg = (struct arg *)a;
 #endif
 	const char *url = arg->url;
 	const char *dir = arg->dir;
12cc3b41
 	CURL *curl = arg->curl;
314ff77b
 	const char *filename = arg->filename;
0cb54827
 	char fout[NAME_MAX + 1];
12cc3b41
 	void (*oldsegv)(int);
3edf8bda
 #ifdef	CURLOPT_ERRORBUFFER
aea1b159
 	char errorbuffer[CURL_ERROR_SIZE + 1];
277de80f
 #elif	(LIBCURL_VERSION_NUM >= 0x070C00)
90ad7db0
 	CURLcode res = CURLE_OK;
3edf8bda
 #endif
3fa72383
 
6b93ea0c
 	(void)curl_easy_setopt(curl, CURLOPT_USERAGENT, "www.clamav.net");
 
3163dc8e
 	if(curl_easy_setopt(curl, CURLOPT_URL, url) != 0) {
aea1b159
 		cli_errmsg("%s: curl_easy_setopt failed\n", url);
314ff77b
 		return NULL;
3163dc8e
 	}
6b93ea0c
 
aea1b159
 	snprintf(fout, sizeof(fout) - 1, "%s/%s", dir, filename);
3fa72383
 
12cc3b41
 	cli_dbgmsg("Saving %s to %s\n", url, fout);
90ad7db0
 	fp = fopen(fout, "wb");
3fa72383
 
 	if(fp == NULL) {
138b73f6
 		cli_errmsg("Can't open '%s' for writing", fout);
314ff77b
 		return NULL;
3fa72383
 	}
05ea2522
 #ifdef	CURLOPT_WRITEDATA
314ff77b
 	if(curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp) != 0) {
 		fclose(fp);
 		return NULL;
 	}
05ea2522
 #else
 	if(curl_easy_setopt(curl, CURLOPT_FILE, fp) != 0) {
 		fclose(fp);
 		return NULL;
 	}
 #endif
314ff77b
 
6b93ea0c
 	/*
3eb12bae
 	 * If an item is in squid's cache get it from there (TCP_HIT/200)
6b93ea0c
 	 * by default curl doesn't (TCP_CLIENT_REFRESH_MISS/200)
 	 */
 	headers = curl_slist_append(NULL, "Pragma:");
 	curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
3fa72383
 
6b93ea0c
 	/* These should be customisable */
 	curl_easy_setopt(curl, CURLOPT_TIMEOUT, 30);
 	curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10);
49674596
 #ifdef	CURLOPT_MAXFILESIZE
 	curl_easy_setopt(curl, CURLOPT_MAXFILESIZE, 50*1024);
 #endif
3fa72383
 
314ff77b
 #ifdef  CL_THREAD_SAFE
c07de365
 #ifdef	CURLOPT_DNS_USE_GLOBAL_CACHE
12cc3b41
 	/* Apparently this is depracated */
 	/*curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);*/
 #endif
 #endif
 
 #ifdef  CL_THREAD_SAFE
 #ifdef	CURLOPT_NOSIGNAL
 	curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
314ff77b
 #endif
c07de365
 #endif
02406150
 
 	/*
 	 * Prevent password: prompting with older versions
 	 * FIXME: a better username?
 	 */
66df01fa
 	curl_easy_setopt(curl, CURLOPT_USERPWD, "username:password");
02406150
 
314ff77b
 	/*
 	 * FIXME: valgrind reports "pthread_mutex_unlock: mutex is not locked"
 	 * from gethostbyaddr_r within this. It may be a bug in libcurl
 	 * rather than this code, but I need to check, see Curl_resolv()
 	 * If pushed really hard it will sometimes say
 	 * Conditional jump or move depends on uninitialised value(s) and
 	 * quit. But the program seems to work OK without valgrind...
e3174677
 	 * Perhaps Curl_resolv() isn't thread safe?
 	 *
 	 * I have seen segfaults in version 7.12.3. Version 7.14 seems OK.
314ff77b
 	 */
8386482b
 	/*
aa479b7d
 	 * On some C libraries (notably with FC3, glibc-2.3.3-74) you get a
3163dc8e
 	 * memory leak here in getaddrinfo(), see
0856891e
 	 *	https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=139559
8386482b
 	 */
12cc3b41
 	curl_has_segfaulted = 0;
 	oldsegv = signal(SIGSEGV, curlsegv);
3edf8bda
 #ifdef	CURLOPT_ERRORBUFFER
277de80f
 	curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errorbuffer);
 
89785561
 	if(curl_easy_perform(curl) != CURLE_OK)
3edf8bda
 		cli_warnmsg("URL %s failed to download: %s\n", url, errorbuffer);
643847d9
 #elif	(LIBCURL_VERSION_NUM >= 0x070C00)
89785561
 	if((res = curl_easy_perform(curl)) != CURLE_OK)
90ad7db0
 		cli_warnmsg("URL %s failed to download: %s\n", url,
 			curl_easy_strerror(res));
643847d9
 #else
277de80f
 	if(curl_easy_perform(curl) != CURLE_OK)
643847d9
 		cli_warnmsg("URL %s failed to download\n", url);
3edf8bda
 #endif
6b93ea0c
 
 	fclose(fp);
9c0ac287
 	curl_slist_free_all(headers);
314ff77b
 
12cc3b41
 	if(curl_has_segfaulted)
 		cli_warnmsg("Libcurl has segfaulted on '%s'\n", url);
 
 	signal(SIGSEGV, oldsegv);
314ff77b
 	return NULL;
3fa72383
 }
256a86cb
 #endif
52634964
 
3fa72383
 #endif
90343a0f
 
d1382234
 #ifdef HAVE_BACKTRACE
f2b068fb
 static void
98685ac1
 sigsegv(int sig)
 {
 	signal(SIGSEGV, SIG_DFL);
d1382234
 	print_trace(1);
98685ac1
 	exit(SIGSEGV);
 }
 
f2b068fb
 static void
98685ac1
 print_trace(int use_syslog)
 {
 	void *array[10];
 	size_t size;
 	char **strings;
 	size_t i;
 	pid_t pid = getpid();
 
 	size = backtrace(array, 10);
 	strings = backtrace_symbols(array, size);
 
 	if(use_syslog == 0)
 		cli_dbgmsg("Backtrace of pid %d:\n", pid);
735377bc
 	else
98685ac1
 		syslog(LOG_ERR, "Backtrace of pid %d:", pid);
 
 	for(i = 0; i < size; i++)
 		if(use_syslog)
60aec445
 			syslog(LOG_ERR, "bt[%u]: %s", i, strings[i]);
98685ac1
 		else
 			cli_dbgmsg("%s\n", strings[i]);
 
7d3d11d0
 	/* TODO: dump the current email */
 
98685ac1
 	free(strings);
 }
 #endif
00615ec9
 
409f1680
 /* See also clamav-milter */
82933497
 static bool
 usefulHeader(int commandNumber, const char *cmd)
 {
 	switch(commandNumber) {
 		case CONTENT_TRANSFER_ENCODING:
 		case CONTENT_DISPOSITION:
 		case CONTENT_TYPE:
 			return TRUE;
 		default:
 			if(strcasecmp(cmd, "From") == 0)
 				return TRUE;
409f1680
 			if(strcasecmp(cmd, "Received") == 0)
82933497
 				return TRUE;
409f1680
 			if(strcasecmp(cmd, "De") == 0)
82933497
 				return TRUE;
 	}
 
 	return FALSE;
 }
 
b3a5cdd8
 /*
795d3afe
  * Like fgets but cope with end of line by "\n", "\r\n", "\n\r", "\r"
  */
 static char *
b7afd2bf
 getline_from_mbox(char *buffer, size_t len, FILE *fin)
795d3afe
 {
 	char *ret;
 
 	if(feof(fin))
 		return NULL;
 
 	if((len == 0) || (buffer == NULL)) {
690d27ca
 		cli_errmsg("Invalid call to getline_from_mbox(). Refer to http://www.clamav.net/bugs\n");
795d3afe
 		return NULL;
 	}
 
 	ret = buffer;
 
 	do {
 		int c = getc(fin);
 
 		if(ferror(fin))
 			return NULL;
 
 		switch(c) {
 			case '\n':
 				*buffer++ = '\n';
 				c = getc(fin);
 				if((c != '\r') && !feof(fin))
 					ungetc(c, fin);
 				break;
 			default:
df72ad55
 				*buffer++ = (char)c;
795d3afe
 				continue;
 			case EOF:
 				break;
 			case '\r':
 				*buffer++ = '\n';
 				c = getc(fin);
 				if((c != '\n') && !feof(fin))
 					ungetc(c, fin);
 				break;
 		}
 		break;
2683ac8b
 	} while(--len > 1);
795d3afe
 
 	if(len == 0) {
2add0ed7
 		/* the email probably breaks RFC821 */
12cc3b41
 		cli_warnmsg("getline_from_mbox: buffer overflow stopped, line lost\n");
795d3afe
 		return NULL;
 	}
12cc3b41
 	*buffer = '\0';
 
2683ac8b
 	if(len == 1)
ed92b9c2
 		/* overflows will have appeared on separate lines */
27fb3ba8
 		cli_dbgmsg("getline_from_mbox: buffer overflow stopped, line recovered\n");
795d3afe
 
 	return ret;
 }
133dcdcd
 
b65d2aad
 /*
  * Is this line a candidate for the start of a bounce message?
  */
133dcdcd
 static bool
b65d2aad
 isBounceStart(const char *line)
133dcdcd
 {
 	if(line == NULL)
 		return FALSE;
 	if(*line == '\0')
 		return FALSE;
e39c0901
 	/*if((strncmp(line, "From ", 5) == 0) && !isalnum(line[5]))
133dcdcd
 		return FALSE;
 	if((strncmp(line, ">From ", 6) == 0) && !isalnum(line[6]))
e39c0901
 		return FALSE;*/
a4f8f199
 	if(cli_filetype((const unsigned char *)line, strlen(line)) != CL_TYPE_MAIL)
133dcdcd
 		return FALSE;
 
 	if((strncmp(line, "From ", 5) == 0) ||
 	   (strncmp(line, ">From ", 6) == 0)) {
 		int numSpaces = 0, numDigits = 0;
 
 		do
 			if(*line == ' ')
 				numSpaces++;
 			else if(isdigit(*line))
 				numDigits++;
 		while(*++line != '\0');
 
 		if(numSpaces < 6)
 			return FALSE;
 		if(numDigits < 11)
 			return FALSE;
 	}
 	return TRUE;
 }
f3ec89d2
 
 /*
  * Extract a binhexEncoded message, return if it's found to be infected as we
  *	extract it
  */
 static bool
195e3683
 exportBinhexMessage(const char *dir, message *m)
f3ec89d2
 {
 	bool infected = FALSE;
 	fileblob *fb;
 
 	if(messageGetEncoding(m) == NOENCODING)
 		messageSetEncoding(m, "x-binhex");
 
985cc85e
 	fb = messageToFileblob(m, dir, 0);
f3ec89d2
 
 	if(fb) {
 		if(fileblobContainsVirus(fb))
 			infected = TRUE;
 
 		cli_dbgmsg("Binhex file decoded to %s\n",
 			fileblobGetFilename(fb));
 		fileblobDestroy(fb);
 	} else
 		cli_errmsg("Couldn't decode binhex file to %s\n", dir);
 
 	return infected;
 }
d9bde711
 
 /*
195e3683
  * Locate any bounce message and extract it. Return 1 if anything found
  */
 static int
 exportBounceMessage(text *start, const mbox_ctx *mctx)
 {
 	int rc = 0;
 	text *t;
 	fileblob *fb;
 
 	/*
 	 * Attempt to save the original (unbounced)
 	 * message - clamscan will find that in the
 	 * directory and call us again (with any luck)
 	 * having found an e-mail message to handle.
 	 *
 	 * This finds a lot of false positives, the
 	 * search that a content type is in the
 	 * bounce (i.e. it's after the bounce header)
 	 * helps a bit.
 	 *
 	 * messageAddLine
 	 * optimisation could help here, but needs
 	 * careful thought, do it with line numbers
 	 * would be best, since the current method in
 	 * messageAddLine of checking encoding first
 	 * must remain otherwise non bounce messages
 	 * won't be scanned
 	 */
 	for(t = start; t; t = t->t_next) {
 		const char *txt = lineGetData(t->t_line);
2345b4cd
 		char cmd[RFC2821LENGTH + 1];
195e3683
 
 		if(txt == NULL)
 			continue;
 		if(cli_strtokbuf(txt, 0, ":", cmd) == NULL)
 			continue;
 
 		switch(tableFind(mctx->rfc821Table, cmd)) {
 			case CONTENT_TRANSFER_ENCODING:
 				if((strstr(txt, "7bit") == NULL) &&
 				   (strstr(txt, "8bit") == NULL))
 					break;
 				continue;
 			case CONTENT_DISPOSITION:
 				break;
 			case CONTENT_TYPE:
 				if(strstr(txt, "text/plain") != NULL)
 					t = NULL;
 				break;
 			default:
 				if(strcasecmp(cmd, "From") == 0)
 					start = t;
 				else if(strcasecmp(cmd, "Received") == 0)
 					start = t;
 				continue;
 		}
 		break;
 	}
 	if(t && ((fb = fileblobCreate()) != NULL)) {
 		cli_dbgmsg("Found a bounce message\n");
 		fileblobSetFilename(fb, mctx->dir, "bounce");
 		/*fileblobSetCTX(fb, mctx->ctx);*/
 		if(textToFileblob(start, fb, 1) == NULL)
 			cli_dbgmsg("Nothing new to save in the bounce message\n");
 		else
 			rc = 1;
 		fileblobDestroy(fb);
 	} else
 		cli_dbgmsg("Not found a bounce message\n");
 
 	return rc;
 }
 
 /*
d9bde711
  * Handle the ith element of a number of multiparts, e.g. multipart/alternative
  */
 static message *
2345b4cd
 do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, mbox_ctx *mctx, message *messageIn, text **tptr, unsigned int recursion_level)
d9bde711
 {
 	bool addToText = FALSE;
 	const char *dtype;
 #ifndef	SAVE_TO_DISC
 	message *body;
 #endif
 	message *aMessage = messages[i];
 
 	if(aMessage == NULL)
 		return mainMessage;
 
 	cli_dbgmsg("Mixed message part %d is of type %d\n",
 		i, messageGetMimeType(aMessage));
 
 	switch(messageGetMimeType(aMessage)) {
 		case APPLICATION:
 		case AUDIO:
 		case IMAGE:
 		case VIDEO:
 			break;
 		case NOMIME:
 			cli_dbgmsg("No mime headers found in multipart part %d\n", i);
 			if(mainMessage) {
 				if(binhexBegin(aMessage)) {
 					cli_dbgmsg("Found binhex message in multipart/mixed mainMessage\n");
 
195e3683
 					if(exportBinhexMessage(mctx->dir, mainMessage))
52f670eb
 						*rc = VIRUS;
d9bde711
 				}
 				if(mainMessage != messageIn)
 					messageDestroy(mainMessage);
 				mainMessage = NULL;
 			} else if(aMessage) {
 				if(binhexBegin(aMessage)) {
 					cli_dbgmsg("Found binhex message in multipart/mixed non mime part\n");
195e3683
 					if(exportBinhexMessage(mctx->dir, aMessage))
52f670eb
 						*rc = VIRUS;
d9bde711
 					assert(aMessage == messages[i]);
 					messageReset(messages[i]);
 				}
 			}
 			addToText = TRUE;
 			if(messageGetBody(aMessage) == NULL)
 				/*
 				 * No plain text version
 				 */
 				cli_dbgmsg("No plain text alternative\n");
 			break;
 		case TEXT:
 			dtype = messageGetDispositionType(aMessage);
 			cli_dbgmsg("Mixed message text part disposition \"%s\"\n",
 				dtype);
 			if(strcasecmp(dtype, "attachment") == 0)
 				break;
 			if((*dtype == '\0') || (strcasecmp(dtype, "inline") == 0)) {
 				const char *cptr;
 
 				if(mainMessage && (mainMessage != messageIn))
 					messageDestroy(mainMessage);
 				mainMessage = NULL;
 				cptr = messageGetMimeSubtype(aMessage);
 				cli_dbgmsg("Mime subtype \"%s\"\n", cptr);
 				if((tableFind(mctx->subtypeTable, cptr) == PLAIN) &&
 					  (messageGetEncoding(aMessage) == NOENCODING)) {
 					char *filename;
 					/*
 					 * Strictly speaking
 					 * a text/plain part is
 					 * not an attachment. We
 					 * pretend it is so that
 					 * we can decode and
 					 * scan it
 					 */
 					filename = (char *)messageFindArgument(aMessage, "filename");
 					if(filename == NULL)
 						filename = (char *)messageFindArgument(aMessage, "name");
 
 					if(filename == NULL) {
 						cli_dbgmsg("Adding part to main message\n");
 						addToText = TRUE;
 					} else {
 						cli_dbgmsg("Treating %s as attachment\n",
 							filename);
 						free(filename);
 					}
 				} else {
60f1af8b
 					const int is_html = (tableFind(mctx->subtypeTable, cptr) == HTML);
 					if((mctx->ctx->options&CL_SCAN_MAILURL) && is_html)
 						checkURLs(aMessage, mctx, rc, 1);
12cc3b41
 #ifdef	CL_EXPERIMENTAL
19a2af6d
 					else if(mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS)
60f1af8b
 						checkURLs(aMessage, mctx, rc, is_html);
12cc3b41
 #endif
d9bde711
 					messageAddArgument(aMessage,
 						"filename=mixedtextportion");
 				}
 				break;
 			}
 			cli_dbgmsg("Text type %s is not supported\n", dtype);
 			return mainMessage;
 		case MESSAGE:
 			/* Content-Type: message/rfc822 */
 			cli_dbgmsg("Found message inside multipart (encoding type %d)\n",
 				messageGetEncoding(aMessage));
 #ifndef	SCAN_UNENCODED_BOUNCES
 			switch(messageGetEncoding(aMessage)) {
 				case NOENCODING:
 				case EIGHTBIT:
 				case BINARY:
 					if(encodingLine(aMessage) == NULL) {
 						/*
 						 * This means that the message
 						 * has no attachments
 						 *
 						 * The test for
 						 * messageGetEncoding is needed
 						 * since encodingLine won't have
 						 * been set if the message
 						 * itself has been encoded
 						 */
 						cli_dbgmsg("Unencoded multipart/message will not be scanned\n");
 						assert(aMessage == messages[i]);
 						messageDestroy(messages[i]);
 						messages[i] = NULL;
 						return mainMessage;
 					}
 					/* FALLTHROUGH */
 				default:
 					cli_dbgmsg("Encoded multipart/message will be scanned\n");
 			}
 #endif
 #if	0
 			messageAddStrAtTop(aMessage,
 				"Received: by clamd (message/rfc822)");
 #endif
 #ifdef	SAVE_TO_DISC
 			/*
 			 * Save this embedded message
 			 * to a temporary file
 			 */
0e01c158
 			saveTextPart(aMessage, mctx->dir, 1);
d9bde711
 			assert(aMessage == messages[i]);
 			messageDestroy(messages[i]);
 			messages[i] = NULL;
 #else
 			/*
038c52be
 			 * Scan in memory, faster but is open to DoS attacks
 			 * when many nested levels are involved.
d9bde711
 			 */
24e78b19
 			body = parseEmailHeaders(aMessage, mctx->rfc821Table);
 
d9bde711
 			/*
 			 * We've fininished with the
 			 * original copy of the message,
 			 * so throw that away and
 			 * deal with the encapsulated
 			 * message as a message.
 			 * This can save a lot of memory
 			 */
 			assert(aMessage == messages[i]);
 			messageDestroy(messages[i]);
 			messages[i] = NULL;
 			if(body) {
24e78b19
 				messageSetCTX(body, mctx->ctx);
 				*rc = parseEmailBody(body, NULL, mctx, recursion_level + 1);
d9bde711
 				if(messageContainsVirus(body))
52f670eb
 					*rc = VIRUS;
d9bde711
 				messageDestroy(body);
 			}
 #endif
 			return mainMessage;
 		case MULTIPART:
 			/*
 			 * It's a multi part within a multi part
 			 * Run the message parser on this bit, it won't
 			 * be an attachment
 			 */
 			cli_dbgmsg("Found multipart inside multipart\n");
 			if(aMessage) {
 				/*
 				 * The headers were parsed when reading in the
 				 * whole multipart section
 				 */
ddd53493
 				*rc = parseEmailBody(aMessage, *tptr, mctx, recursion_level + 1);
d9bde711
 				cli_dbgmsg("Finished recursion\n");
 				assert(aMessage == messages[i]);
 				messageDestroy(messages[i]);
 				messages[i] = NULL;
 			} else {
ddd53493
 				*rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1);
d9bde711
 				if(mainMessage && (mainMessage != messageIn))
 					messageDestroy(mainMessage);
 				mainMessage = NULL;
 			}
 			return mainMessage;
 		default:
 			cli_warnmsg("Only text and application attachments are supported, type = %d\n",
 				messageGetMimeType(aMessage));
 			return mainMessage;
 	}
 
 	if(addToText) {
 		cli_dbgmsg("Adding to non mime-part\n");
 		*tptr = textAdd(*tptr, messageGetBody(aMessage));
 	} else {
985cc85e
 		fileblob *fb = messageToFileblob(aMessage, mctx->dir, 1);
d9bde711
 
 		if(fb) {
 			if(fileblobContainsVirus(fb))
52f670eb
 				*rc = VIRUS;
d9bde711
 			fileblobDestroy(fb);
 		}
 	}
 	if(messageContainsVirus(aMessage))
52f670eb
 		*rc = VIRUS;
d9bde711
 	messageDestroy(aMessage);
 	messages[i] = NULL;
 
 	return mainMessage;
 }
dd2131c9
 
 /*
  * Returns the number of quote characters in the given string
  */
 static int
 count_quotes(const char *buf)
 {
 	int quotes = 0;
 
 	while(*buf)
 		if(*buf++ == '\"')
 			quotes++;
 
 	return quotes;
 }
0066d39b
 
 /*
  * Will the next line be a folded header? See RFC2822 section 2.2.3
  */
 static bool
 next_is_folded_header(const text *t)
 {
 	const text *next = t->t_next;
 	const char *data, *ptr;
 
 	if(next == NULL)
 		return FALSE;
 
 	if(next->t_line == NULL)
 		return FALSE;
 
 	data = lineGetData(next->t_line);
 
 	/*
038c52be
 	 * Section B.2 of RFC822 says TAB or SPACE means a continuation of the
0066d39b
 	 * previous entry.
 	 */
 	if(isblank(data[0]))
 		return TRUE;
 
 	if(strchr(data, '=') == NULL)
 		/*
 		 * Avoid false positives with
 		 *	Content-Type: text/html;
 		 *	Content-Transfer-Encoding: quoted-printable
 		 */
 		return FALSE;
40d54f7f
 
0066d39b
 	/*
 	 * Some are broken and don't fold headers lines
 	 * correctly as per section 2.2.3 of RFC2822.
 	 * Generally they miss the white space at
 	 * the start of the fold line:
 	 *	Content-Type: multipart/related;
 	 *	type="multipart/alternative";
 	 *	boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
 	 * should read:
 	 *	Content-Type: multipart/related;
 	 *	 type="multipart/alternative";
 	 *	 boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
 	 * Since we're a virus checker not an RFC
 	 * verifier we need to handle these
 	 */
 	data = lineGetData(t->t_line);
 
 	ptr = strchr(data, '\0');
 
 	while(--ptr > data)
 		switch(*ptr) {
 			case ';':
 				return TRUE;
 			case '\n':
 			case ' ':
 			case '\r':
 			case '\t':
 				continue;	/* white space at end of line */
 			default:
 				return FALSE;
 		}
 	return FALSE;
 }
4db74788
 
 /*
  * This routine is called on the first line of the body of
  * an email to handle broken messages that have newlines
  * in the middle of its headers
  */
 static bool
 newline_in_header(const char *line)
 {
 	cli_dbgmsg("newline_in_header, check \"%s\"\n", line);
 
 	if(strncmp(line, "Message-Id: ", 12) == 0)
 		return TRUE;
 	if(strncmp(line, "Date: ", 6) == 0)
 		return TRUE;
 
 	return FALSE;
 }