libclamav/message.c
b151ef55
 /*
70b54406
  *  Copyright (C) 2002-2006 Nigel Horne <njh@bandsman.co.uk>
b151ef55
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
30738099
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
b151ef55
  */
4db74788
 static	char	const	rcsid[] = "$Id: message.c,v 1.193 2007/01/07 21:30:49 njh Exp $";
8b242bb9
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
b151ef55
 
 #ifndef	CL_DEBUG
0b08b624
 #define	NDEBUG	/* map CLAMAV debug onto standard */
b151ef55
 #endif
 
 #ifdef CL_THREAD_SAFE
dd8a7e90
 #ifndef	_REENTRANT
b151ef55
 #define	_REENTRANT	/* for Solaris 2.8 */
 #endif
dd8a7e90
 #endif
b151ef55
 
f0146bc6
 #ifdef	C_DARWIN
b151ef55
 #include <sys/types.h>
 #endif
 #include <stdlib.h>
 #include <string.h>
40d54f7f
 #ifdef	HAVE_STRINGS_H
b151ef55
 #include <strings.h>
40d54f7f
 #endif
b151ef55
 #include <assert.h>
 #include <ctype.h>
 #include <stdio.h>
 
8a88fb93
 #ifdef	CL_THREAD_SAFE
 #include <pthread.h>
 #endif
 
5c86c162
 #include "others.h"
 #include "str.h"
 #include "filetypes.h"
 
b151ef55
 #include "mbox.h"
 
06bce849
 #ifndef isblank
 #define isblank(c)	(((c) == ' ') || ((c) == '\t'))
 #endif
 
53ee0b60
 #define	RFC2045LENGTH	76	/* maximum number of characters on a line */
 
4db74788
 #ifdef	HAVE_STDBOOL_H
a4f8f199
 #include <stdbool.h>
 #else
 #ifdef	FALSE
 typedef	unsigned	char	bool;
 #else
 typedef enum	{ FALSE = 0, TRUE = 1 } bool;
 #endif
 #endif
b151ef55
 
de617e3e
 static	void	messageIsEncoding(message *m);
285a69b4
 static unsigned char *decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast);
fb405afc
 static	void	sanitiseBase64(char *s);
f5d6e670
 #ifdef	__GNUC__
 static	unsigned	char	hex(char c)	__attribute__((const));
 static	unsigned	char	base64(char c)	__attribute__((const));
 static	unsigned	char	uudecode(char c)	__attribute__((const));
 #else
b151ef55
 static	unsigned	char	hex(char c);
 static	unsigned	char	base64(char c);
 static	unsigned	char	uudecode(char c);
f5d6e670
 #endif
b151ef55
 static	const	char	*messageGetArgument(const message *m, int arg);
985cc85e
 static	void	*messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy)(void *), void (*setFilename)(void *, const char *, const char *), void (*addData)(void *, const unsigned char *, size_t), void *(*exportText)(text *, void *, int), void (*setCTX)(void *, cli_ctx *), int destroy_text);
b4cb4486
 static	int	usefulArg(const char *arg);
e24738dc
 static	void	messageDedup(message *m);
5e5a162c
 static	char	*rfc2231(const char *in);
b329234a
 static	int	simil(const char *str1, const char *str2);
b151ef55
 
 /*
  * These maps are ordered in decreasing likelyhood of their appearance
0e3b08fc
  * in an e-mail. Probably these should be in a table...
b151ef55
  */
 static	const	struct	encoding_map {
 	const	char	*string;
 	encoding_type	type;
da850706
 } encoding_map[] = {	/* rfc2045 */
b151ef55
 	{	"7bit",			NOENCODING	},
b759d5eb
 	{	"text/plain",		NOENCODING	},
da850706
 	{	"quoted-printable",	QUOTEDPRINTABLE	},	/* rfc2045 */
98685ac1
 	{	"base64",		BASE64		},	/* rfc2045 */
b151ef55
 	{	"8bit",			EIGHTBIT	},
bb2432d7
 	{	"binary",		BINARY		},
af66c329
 	{	"x-uuencode",		UUENCODE	},	/* uuencode(5) */
00f95393
 	{	"x-yencode",		YENCODE		},
fef5ad63
 	{	"x-binhex",		BINHEX		},
7ea0c270
 	{	"us-ascii",		NOENCODING	},	/* incorrect */
ef3cf57d
 	{	"x-uue",		UUENCODE	},	/* incorrect */
a4c3d0a3
 	{	"uuencode",		UUENCODE	},	/* incorrect */
6ba88eb8
 	{	NULL,			NOENCODING	}
b151ef55
 };
 
 static	struct	mime_map {
 	const	char	*string;
 	mime_type	type;
 } mime_map[] = {
 	{	"text",			TEXT		},
 	{	"multipart",		MULTIPART	},
 	{	"application",		APPLICATION	},
 	{	"audio",		AUDIO		},
 	{	"image",		IMAGE		},
 	{	"message",		MESSAGE		},
 	{	"video",		VIDEO		},
6ba88eb8
 	{	NULL,			TEXT		}
b151ef55
 };
 
fe6ce0ba
 /*
  * See RFC2045, section 6.8, table 1
  */
d17de037
 static const unsigned char base64Table[256] = {
5ae253d2
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,62,255,255,255,63,
 	52,53,54,55,56,57,58,59,60,61,255,255,255,0,255,255,
 	255,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
 	15,16,17,18,19,20,21,22,23,24,25,255,255,255,255,255,
 	255,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
 	41,42,43,44,45,46,47,48,49,50,51,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
 };
 
b151ef55
 message *
 messageCreate(void)
 {
 	message *m = (message *)cli_calloc(1, sizeof(message));
 
0e3b08fc
 	if(m)
bbf43447
 		m->mimeType = NOMIME;
b151ef55
 
 	return m;
 }
 
 void
 messageDestroy(message *m)
 {
4d9c0ca8
 	assert(m != NULL);
 
b151ef55
 	messageReset(m);
 
 	free(m);
 }
 
 void
 messageReset(message *m)
 {
 	int i;
 
 	assert(m != NULL);
 
 	if(m->mimeSubtype)
 		free(m->mimeSubtype);
 
 	if(m->mimeDispositionType)
 		free(m->mimeDispositionType);
 
27a375f2
 	if(m->mimeArguments) {
 		for(i = 0; i < m->numberOfArguments; i++)
 			free(m->mimeArguments[i]);
 		free(m->mimeArguments);
 	}
b151ef55
 
 	if(m->body_first)
 		textDestroy(m->body_first);
 
285a69b4
 	assert(m->base64chars == 0);
 
0e3b08fc
 	if(m->encodingTypes) {
 		assert(m->numberOfEncTypes > 0);
 		free(m->encodingTypes);
 	}
73175a15
 
 	memset(m, '\0', sizeof(message));
 	m->mimeType = NOMIME;
b151ef55
 }
 
de509b8e
 /*
bb2432d7
  * Handle the Content-Type header. The syntax is in RFC1341.
2bcec72b
  * Return success (1) or failure (0). Failure only happens when it's an
  * unknown type and we've already received a known type, or we've received an
  * empty type. If we receive an unknown type by itself we default to application
de509b8e
  */
2bcec72b
 int
b151ef55
 messageSetMimeType(message *mess, const char *type)
 {
8a88fb93
 #ifdef	CL_THREAD_SAFE
 	static pthread_mutex_t mime_mutex = PTHREAD_MUTEX_INITIALIZER;
 #endif
1f8eb426
 	const struct mime_map *m;
dad64ecb
 	int typeval;
1f8eb426
 	static table_t *mime_table;
b151ef55
 
 	assert(mess != NULL);
4a46b8a2
 	if(type == NULL) {
 		cli_warnmsg("Empty content-type field\n");
 		return 0;
 	}
b151ef55
 
 	cli_dbgmsg("messageSetMimeType: '%s'\n", type);
 
 	/* Ignore leading spaces */
82189c76
 	while(!isalpha(*type))
b151ef55
 		if(*type++ == '\0')
2bcec72b
 			return 0;
b151ef55
 
8a88fb93
 #ifdef	CL_THREAD_SAFE
 	pthread_mutex_lock(&mime_mutex);
 #endif
dad64ecb
 	if(mime_table == NULL) {
 		mime_table = tableCreate();
8a88fb93
 		if(mime_table == NULL) {
 #ifdef	CL_THREAD_SAFE
 			pthread_mutex_unlock(&mime_mutex);
 #endif
2bcec72b
 			return 0;
8a88fb93
 		}
dad64ecb
 
 		for(m = mime_map; m->string; m++)
 			if(!tableInsert(mime_table, m->string, m->type)) {
 				tableDestroy(mime_table);
8a88fb93
 				mime_table = NULL;
 #ifdef	CL_THREAD_SAFE
 				pthread_mutex_unlock(&mime_mutex);
 #endif
2bcec72b
 				return 0;
dad64ecb
 			}
 	}
8a88fb93
 #ifdef	CL_THREAD_SAFE
 	pthread_mutex_unlock(&mime_mutex);
 #endif
dad64ecb
 
 	typeval = tableFind(mime_table, type);
 
2bcec72b
 	if(typeval != -1) {
f0146bc6
 		mess->mimeType = (mime_type)typeval;
2bcec72b
 		return 1;
1f8eb426
 	}
 	if(mess->mimeType == NOMIME) {
b151ef55
 		if(strncasecmp(type, "x-", 2) == 0)
 			mess->mimeType = MEXTENSION;
9fc8173e
 		else {
27a375f2
 			/*
 			 * Based on a suggestion by James Stevens
 			 *	<James@kyzo.com>
 			 * Force scanning of strange messages
 			 */
9fc8173e
 			if(strcasecmp(type, "plain") == 0) {
79879cfb
 				cli_dbgmsg("Incorrect MIME type: `plain', set to Text\n");
9fc8173e
 				mess->mimeType = TEXT;
 			} else {
 				/*
 				 * Don't handle broken e-mail probably sending
 				 *	Content-Type: plain/text
 				 * instead of
 				 *	Content-Type: text/plain
 				 * as an attachment
 				 */
1f8eb426
 				int highestSimil = 0, t = -1;
 				const char *closest = NULL;
 
 				for(m = mime_map; m->string; m++) {
 					const int s = simil(m->string, type);
 
 					if(s > highestSimil) {
 						highestSimil = s;
 						closest = m->string;
 						t = m->type;
 					}
 				}
 				if(highestSimil >= 50) {
 					cli_dbgmsg("Unknown MIME type \"%s\" - guessing as %s (%u%% certainty)\n",
 						type, closest, highestSimil);
388072d8
 					mess->mimeType = (mime_type)t;
1f8eb426
 				} else {
a363da65
 					cli_dbgmsg("Unknown MIME type: `%s', set to Application - if you believe this file contains a virus, submit it to www.clamav.net\n", type);
1f8eb426
 					mess->mimeType = APPLICATION;
 				}
9fc8173e
 			}
27a375f2
 		}
2bcec72b
 		return 1;
b151ef55
 	}
2bcec72b
 	return 0;
b151ef55
 }
 
 mime_type
 messageGetMimeType(const message *m)
 {
4d9c0ca8
 	assert(m != NULL);
 
 	return m->mimeType;
b151ef55
 }
 
 void
 messageSetMimeSubtype(message *m, const char *subtype)
 {
 	assert(m != NULL);
6b9ba2a4
 
 	if(subtype == NULL) {
 		/*
 		 * Handle broken content-type lines, e.g.
 		 *	Content-Type: text/
 		 */
 		cli_dbgmsg("Empty content subtype\n");
 		subtype = "";
 	}
b151ef55
 
 	if(m->mimeSubtype)
 		free(m->mimeSubtype);
 
4db74788
 	m->mimeSubtype = cli_strdup(subtype);
b151ef55
 }
 
 const char *
 messageGetMimeSubtype(const message *m)
 {
b329234a
 	return (m->mimeSubtype) ? m->mimeSubtype : "";
b151ef55
 }
 
 void
 messageSetDispositionType(message *m, const char *disptype)
 {
 	assert(m != NULL);
 
3db105a2
 	if(m->mimeDispositionType)
 		free(m->mimeDispositionType);
 	if(disptype == NULL) {
 		m->mimeDispositionType = NULL;
 		return;
 	}
 
8b04b4f6
 	/*
 	 * It's broken for there to be an entry such as "Content-Disposition:"
 	 * However some spam and viruses are rather broken, it's a sign
 	 * that something is wrong if we get that - maybe we should force a
 	 * scan of this part
 	 */
3db105a2
 	while(*disptype && isspace((int)*disptype))
 		disptype++;
 	if(*disptype) {
4db74788
 		m->mimeDispositionType = cli_strdup(disptype);
3db105a2
 		if(m->mimeDispositionType)
 			strstrip(m->mimeDispositionType);
552878b2
 	} else
 		m->mimeDispositionType = NULL;
b151ef55
 }
 
 const char *
 messageGetDispositionType(const message *m)
 {
b329234a
 	return (m->mimeDispositionType) ? m->mimeDispositionType : "";
b151ef55
 }
 
 /*
  * TODO:
  *	Arguments are held on a per message basis, they should be held on
  * a per section basis. Otherwise what happens if two sections have two
  * different values for charset? Probably doesn't matter for the use this
  * code will be given, but will need fixing if this code is used elsewhere
  */
 void
 messageAddArgument(message *m, const char *arg)
 {
 	int offset;
 
 	assert(m != NULL);
 
 	if(arg == NULL)
 		return;	/* Note: this is not an error condition */
 
 	while(isspace(*arg))
 		arg++;
 
 	if(*arg == '\0')
 		/* Empty argument? Probably a broken mail client... */
 		return;
 
49dff330
 	cli_dbgmsg("messageAddArgument, arg='%s'\n", arg);
 
b4cb4486
 	if(!usefulArg(arg))
cea95096
 		return;
 
c6259ac5
 	for(offset = 0; offset < m->numberOfArguments; offset++)
b151ef55
 		if(m->mimeArguments[offset] == NULL)
 			break;
 		else if(strcasecmp(arg, m->mimeArguments[offset]) == 0)
 			return;	/* already in there */
 
c6259ac5
 	if(offset == m->numberOfArguments) {
bbf43447
 		char **ptr;
 
c6259ac5
 		m->numberOfArguments++;
bbf43447
 		ptr = (char **)cli_realloc(m->mimeArguments, m->numberOfArguments * sizeof(char *));
 		if(ptr == NULL) {
 			m->numberOfArguments--;
 			return;
 		}
 		m->mimeArguments = ptr;
c6259ac5
 	}
b151ef55
 
802c37fc
 	arg = m->mimeArguments[offset] = rfc2231(arg);
0bf1353d
 
 	/*
 	 * This is terribly broken from an RFC point of view but is useful
 	 * for catching viruses which have a filename but no type of
 	 * mime. By pretending defaulting to an application rather than
 	 * to nomime we can ensure they're saved and scanned
 	 */
 	if((strncasecmp(arg, "filename=", 9) == 0) || (strncasecmp(arg, "name=", 5) == 0))
 		if(messageGetMimeType(m) == NOMIME) {
 			cli_dbgmsg("Force mime encoding to application\n");
 			messageSetMimeType(m, "application");
 		}
b151ef55
 }
 
 /*
  * Add in all the arguments.
  * Cope with:
  *	name="foo bar.doc"
  *	charset=foo name=bar
  */
 void
 messageAddArguments(message *m, const char *s)
 {
 	const char *string = s;
 
 	cli_dbgmsg("Add arguments '%s'\n", string);
 
 	assert(string != NULL);
 
 	while(*string) {
 		const char *key, *cptr;
 		char *data, *field;
 
 		if(isspace(*string) || (*string == ';')) {
 			string++;
 			continue;
 		}
 
 		key = string;
b4cb4486
 
b151ef55
 		data = strchr(string, '=');
 
 		/*
da850706
 		 * Some spam breaks RFC2045 by using ':' instead of '='
b151ef55
 		 * e.g.:
 		 *	Content-Type: text/html; charset:ISO-8859-1
 		 * should be:
 		 *	Content-type: text/html; charset=ISO-8859-1
 		 *
 		 * We give up with lines that are completely broken because
 		 * we don't have ESP and don't know what was meant to be there.
 		 * It's unlikely to really be a problem.
 		 */
 		if(data == NULL)
 			data = strchr(string, ':');
 
 		if(data == NULL) {
 			/*
 			 * Completely broken, give up
 			 */
b4cb4486
 			cli_dbgmsg("Can't parse header \"%s\"\n", s);
b151ef55
 			return;
 		}
 
53ee0b60
 		string = &data[1];
b151ef55
 
752c34b9
 		/*
 		 * Handle white space to the right of the equals sign
da850706
 		 * This breaks RFC2045 which has:
b4cb4486
 		 *	parameter := attribute "=" value
 		 *	attribute := token   ; case-insensitive
 		 *	token  :=  1*<any (ASCII) CHAR except SPACE, CTLs,
 		 *		or tspecials>
 		 * But too many MUAs ignore this
752c34b9
 		 */
 		while(isspace(*string) && (*string != '\0'))
 			string++;
 
b151ef55
 		cptr = string++;
 
1ecd46be
 		if(strlen(key) == 0)
 			continue;
 
b151ef55
 		if(*cptr == '"') {
 			char *ptr;
 
 			/*
 			 * The field is in quotes, so look for the
 			 * closing quotes
 			 */
4db74788
 			key = cli_strdup(key);
4d9c0ca8
 
 			if(key == NULL)
 				return;
 
b151ef55
 			ptr = strchr(key, '=');
 			if(ptr == NULL)
 				ptr = strchr(key, ':');
 			*ptr = '\0';
 
53ee0b60
 			string = strchr(++cptr, '"');
b4cb4486
 
1ecd46be
 			if(string == NULL) {
 				cli_dbgmsg("Unbalanced quote character in \"%s\"\n", s);
 				string = "";
 			} else
 				string++;
b151ef55
 
b4cb4486
 			if(!usefulArg(key)) {
 				free((char *)key);
 				continue;
 			}
 
4db74788
 			data = cli_strdup(cptr);
b151ef55
 
bbf43447
 			ptr = (data) ? strchr(data, '"') : NULL;
b151ef55
 			if(ptr == NULL) {
 				/*
 				 * Weird e-mail header such as:
 				 * Content-Type: application/octet-stream; name="
 				 * "
 				 * Content-Transfer-Encoding: base64
 				 * Content-Disposition: attachment; filename="
 				 * "
 				 *
 				 * TODO: the file should still be saved and
 				 * virus checked
 				 */
a363da65
 				cli_dbgmsg("Can't parse header \"%s\" - if you believe this file contains a virus, submit it to www.clamav.net\n", s);
bbf43447
 				if(data)
 					free(data);
b151ef55
 				free((char *)key);
 				return;
 			}
 
 			*ptr = '\0';
 
d32343c3
 			field = cli_realloc((char *)key, strlen(key) + strlen(data) + 2);
 			if(field) {
 				strcat(field, "=");
 				strcat(field, data);
 			} else
 				free((char *)key);
 			free(data);
b151ef55
 		} else {
 			size_t len;
bf8ea488
 
 			if(*cptr == '\0') {
06bce849
 				cli_dbgmsg("Ignoring empty field in \"%s\"\n", s);
bf8ea488
 				return;
 			}
 
b151ef55
 			/*
 			 * The field is not in quotes, so look for the closing
 			 * white space
 			 */
 			while((*string != '\0') && !isspace(*string))
 				string++;
 
 			len = (size_t)string - (size_t)key + 1;
 			field = cli_malloc(len);
 
bbf43447
 			if(field) {
 				memcpy(field, key, len - 1);
 				field[len - 1] = '\0';
 			}
 		}
 		if(field) {
 			messageAddArgument(m, field);
 			free(field);
b151ef55
 		}
 	}
 }
 
 static const char *
 messageGetArgument(const message *m, int arg)
 {
 	assert(m != NULL);
 	assert(arg >= 0);
c6259ac5
 	assert(arg < m->numberOfArguments);
b151ef55
 
b329234a
 	return (m->mimeArguments[arg]) ? m->mimeArguments[arg] : "";
b151ef55
 }
 
 /*
  * Find a MIME variable from the header and return a COPY to the value of that
  * variable. The caller must free the copy
  */
 const char *
 messageFindArgument(const message *m, const char *variable)
 {
 	int i;
dad64ecb
 	size_t len;
b151ef55
 
 	assert(m != NULL);
 	assert(variable != NULL);
 
dad64ecb
 	len = strlen(variable);
 
c6259ac5
 	for(i = 0; i < m->numberOfArguments; i++) {
b151ef55
 		const char *ptr;
 
 		ptr = messageGetArgument(m, i);
 		if((ptr == NULL) || (*ptr == '\0'))
dad64ecb
 			continue;
b151ef55
 #ifdef	CL_DEBUG
 		cli_dbgmsg("messageFindArgument: compare %d bytes of %s with %s\n",
 			len, variable, ptr);
 #endif
 		if(strncasecmp(ptr, variable, len) == 0) {
 			ptr = &ptr[len];
 			while(isspace(*ptr))
 				ptr++;
752c34b9
 			if(*ptr != '=') {
133dcdcd
 				cli_warnmsg("messageFindArgument: no '=' sign found in MIME header '%s' (%s)\n", variable, messageGetArgument(m, i));
752c34b9
 				return NULL;
 			}
b151ef55
 			if((*++ptr == '"') && (strchr(&ptr[1], '"') != NULL)) {
bf8ea488
 				/* Remove any quote characters */
4db74788
 				char *ret = cli_strdup(++ptr);
bf8ea488
 				char *p;
 
bbf43447
 				if(ret == NULL)
 					return NULL;
 
bf8ea488
 				/*
 				 * Thomas Lamy <Thomas.Lamy@in-online.net>:
 				 * fix un-quoting of boundary strings from
 				 * header, occurs if boundary was given as
 				 *	'boundary="_Test_";'
 				 *
 				 * At least two quotes in string, assume
 				 * quoted argument
 				 * end string at next quote
 				 */
53ee0b60
 				if((p = strchr(ret, '"')) != NULL) {
 					ret[strlen(ret) - 1] = '\0';
bf8ea488
 					*p = '\0';
53ee0b60
 				}
bbf43447
 				return ret;
b151ef55
 			}
4db74788
 			return cli_strdup(ptr);
b151ef55
 		}
 	}
bbf43447
 	return NULL;
b151ef55
 }
 
 void
 messageSetEncoding(message *m, const char *enctype)
 {
 	const struct encoding_map *e;
4db74788
 	int i;
0e3b08fc
 	char *type;
4db74788
 
b151ef55
 	assert(m != NULL);
 	assert(enctype != NULL);
 
0e3b08fc
 	/*m->encodingType = EEXTENSION;*/
b151ef55
 
06bce849
 	while(isblank(*enctype))
4674dc9a
 		enctype++;
 
a4c3d0a3
 	cli_dbgmsg("messageSetEncoding: '%s'\n", enctype);
 
9fc8173e
 	if(strcasecmp(enctype, "8 bit") == 0) {
 		cli_dbgmsg("Broken content-transfer-encoding: '8 bit' changed to '8bit'\n");
bb2432d7
 		enctype = "8bit";
9fc8173e
 	}
bb2432d7
 
 	/*
0e3b08fc
 	 * Iterate through
 	 *	Content-Transfer-Encoding: base64 binary
 	 * cli_strtok's fieldno counts from 0
 	 */
 	i = 0;
 	while((type = cli_strtok(enctype, i++, " \t")) != NULL) {
b329234a
 		int highestSimil = 0;
 		const char *closest = NULL;
 
53ee0b60
 		for(e = encoding_map; e->string; e++) {
 			int sim;
 			const char lowertype = tolower(type[0]);
23e1c37c
 
53ee0b60
 			if((lowertype != tolower(e->string[0])) && (lowertype != 'x'))
 				/*
 				 * simil is expensive, I'm yet to encounter only
 				 * one example of a missent encoding when the
 				 * first character was wrong, so lets assume no
 				 * match to save the call.
 				 *
 				 * That example was quoted-printable sent as
 				 * X-quoted-printable.
 				 */
 				continue;
 
2add0ed7
 			if(strcmp(e->string, "uuencode") == 0)
 				/*
 				 * No need to test here - fast track visa will have
 				 * handled uuencoded files
 				 */
 				continue;
 
53ee0b60
 			sim = simil(type, e->string);
 
 			if(sim == 100) {
0e3b08fc
 				int j;
 				encoding_type *et;
 
aedb0336
 				for(j = 0; j < m->numberOfEncTypes; j++)
53ee0b60
 					if(m->encodingTypes[j] == e->type)
0e3b08fc
 						break;
53ee0b60
 
 				if(j < m->numberOfEncTypes) {
 					cli_dbgmsg("Ignoring duplicate encoding mechanism '%s'\n",
 						type);
 					break;
 				}
b329234a
 
0e3b08fc
 				et = (encoding_type *)cli_realloc(m->encodingTypes, (m->numberOfEncTypes + 1) * sizeof(encoding_type));
b329234a
 				if(et == NULL)
 					break;
0e3b08fc
 
 				m->encodingTypes = et;
 				m->encodingTypes[m->numberOfEncTypes++] = e->type;
 
 				cli_dbgmsg("Encoding type %d is \"%s\"\n", m->numberOfEncTypes, type);
 				break;
53ee0b60
 			} else if(sim > highestSimil) {
 				closest = e->string;
 				highestSimil = sim;
0e3b08fc
 			}
53ee0b60
 		}
0e3b08fc
 
 		if(e->string == NULL) {
 			/*
cf25aed7
 			 * The stated encoding type is illegal, so we
 			 * use a best guess of what it should be.
 			 *
b329234a
 			 * 50% is arbitary. For example 7bi will match as
 			 * 66% certain to be 7bit
0e3b08fc
 			 */
1f8eb426
 			if(highestSimil >= 50) {
90639c82
 				cli_dbgmsg("Unknown encoding type \"%s\" - guessing as %s (%u%% certainty)\n",
b329234a
 					type, closest, highestSimil);
 				messageSetEncoding(m, closest);
 			} else {
a363da65
 				cli_dbgmsg("Unknown encoding type \"%s\" - if you believe this file contains a virus, submit it to www.clamav.net\n", type);
b329234a
 				/*
 				 * Err on the side of safety, enable all
 				 * decoding modules
 				 */
 				messageSetEncoding(m, "base64");
 				messageSetEncoding(m, "quoted-printable");
 			}
b151ef55
 		}
 
0e3b08fc
 		free(type);
 	}
b151ef55
 }
 
 encoding_type
 messageGetEncoding(const message *m)
 {
 	assert(m != NULL);
0e3b08fc
 
 	if(m->numberOfEncTypes == 0)
 		return NOENCODING;
 	return m->encodingTypes[0];
b151ef55
 }
 
de617e3e
 int
 messageAddLine(message *m, line_t *line)
 {
 	assert(m != NULL);
 
 	if(m->body_first == NULL)
 		m->body_last = m->body_first = (text *)cli_malloc(sizeof(text));
 	else {
 		m->body_last->t_next = (text *)cli_malloc(sizeof(text));
 		m->body_last = m->body_last->t_next;
 	}
 
 	if(m->body_last == NULL)
 		return -1;
 
 	m->body_last->t_next = NULL;
 
 	if(line && lineGetData(line)) {
 		m->body_last->t_line = lineLink(line);
 
 		messageIsEncoding(m);
 	} else
 		m->body_last->t_line = NULL;
 
 	return 1;
 }
 
b151ef55
 /*
edb35c0a
  * Add the given line to the end of the given message
ffd59a3e
  * If needed a copy of the given line is taken which the caller must free
edb35c0a
  * Line must not be terminated by a \n
b151ef55
  */
4c927f11
 int
321d5c00
 messageAddStr(message *m, const char *data)
b151ef55
 {
c1e96196
 	line_t *repeat = NULL;
 
b151ef55
 	assert(m != NULL);
 
c1e96196
 	if(data) {
321d5c00
 		if(*data == '\0')
 			data = NULL;
 		else {
 			/*
 			 * If it's only white space, just store one space to
 			 * save memory. You must store something since it may
 			 * be a header line
 			 */
 			int iswhite = 1;
 			const char *p;
c1e96196
 
321d5c00
 			for(p = data; *p; p++)
5e28cd2b
 				if(((*p) & 0x80) || !isspace(*p)) {
321d5c00
 					iswhite = 0;
 					break;
 				}
 			if(iswhite) {
 				/*cli_dbgmsg("messageAddStr: empty line: '%s'\n", data);*/
 				data = " ";
a78256af
 			}
c1e96196
 		}
 	}
 
b151ef55
 	if(m->body_first == NULL)
 		m->body_last = m->body_first = (text *)cli_malloc(sizeof(text));
 	else {
e24738dc
 		assert(m->body_last != NULL);
af66c329
 		if((data == NULL) && (m->body_last->t_line == NULL))
24c897dc
 			/*
 			 * Although this would save time and RAM, some
 			 * phish signatures have been built which need the
 			 * blank lines
 			 */
 			if(messageGetMimeType(m) != TEXT)
 				/* don't save two blank lines in sucession */
 				return 1;
af66c329
 
b151ef55
 		m->body_last->t_next = (text *)cli_malloc(sizeof(text));
e24738dc
 		if(m->body_last->t_next == NULL) {
 			messageDedup(m);
 			m->body_last->t_next = (text *)cli_malloc(sizeof(text));
 			if(m->body_last->t_next == NULL) {
 				cli_errmsg("messageAddStr: out of memory\n");
 				return -1;
 			}
 		}
 
c1e96196
 		if(data && m->body_last->t_line && (strcmp(data, lineGetData(m->body_last->t_line)) == 0))
 			repeat = m->body_last->t_line;
b151ef55
 		m->body_last = m->body_last->t_next;
 	}
 
e24738dc
 	if(m->body_last == NULL) {
 		cli_errmsg("messageAddStr: out of memory\n");
4c927f11
 		return -1;
e24738dc
 	}
f5a4d7e8
 
b151ef55
 	m->body_last->t_next = NULL;
 
de617e3e
 	if(data && *data) {
c1e96196
 		if(repeat)
 			m->body_last->t_line = lineLink(repeat);
399e1865
 		else {
e24738dc
 			m->body_last->t_line = lineCreate(data);
 
 			if(m->body_last->t_line == NULL) {
399e1865
 				messageDedup(m);
 				m->body_last->t_line = lineCreate(data);
de617e3e
 
399e1865
 				if(m->body_last->t_line == NULL) {
 					cli_errmsg("messageAddStr: out of memory\n");
 					return -1;
 				}
 			}
 			/* cli_chomp(m->body_last->t_text); */
5ae253d2
 			messageIsEncoding(m);
399e1865
 		}
98685ac1
 	} else
de617e3e
 		m->body_last->t_line = NULL;
98685ac1
 
4c927f11
 	return 1;
b151ef55
 }
 
ffd59a3e
 /*
edb35c0a
  * Add the given line to the start of the given message
  * A copy of the given line is taken which the caller must free
  * Line must not be terminated by a \n
  */
 int
de617e3e
 messageAddStrAtTop(message *m, const char *data)
edb35c0a
 {
 	text *oldfirst;
 
 	assert(m != NULL);
 
 	if(m->body_first == NULL)
de617e3e
 		return messageAddLine(m, lineCreate(data));
bbf43447
 
edb35c0a
 	oldfirst = m->body_first;
 	m->body_first = (text *)cli_malloc(sizeof(text));
 	if(m->body_first == NULL) {
 		m->body_first = oldfirst;
 		return -1;
 	}
 
 	m->body_first->t_next = oldfirst;
de617e3e
 	m->body_first->t_line = lineCreate((data) ? data : "");
edb35c0a
 
de617e3e
 	if(m->body_first->t_line == NULL) {
 		cli_errmsg("messageAddStrAtTop: out of memory\n");
edb35c0a
 		return -1;
 	}
 	return 1;
 }
 
 /*
de617e3e
  * See if the last line marks the start of a non MIME inclusion that
  * will need to be scanned
  */
 static void
 messageIsEncoding(message *m)
 {
 	static const char encoding[] = "Content-Transfer-Encoding";
 	static const char binhex[] = "(This file must be converted with BinHex 4.0)";
 	const char *line = lineGetData(m->body_last->t_line);
 
55274fda
 	/* not enough matches to warrant this test */
 	/*if(lineGetRefCount(m->body_last->t_line) > 1) {
 		return;
 	}*/
 
de617e3e
 	if((m->encoding == NULL) &&
 	   (strncasecmp(line, encoding, sizeof(encoding) - 1) == 0) &&
 	   (strstr(line, "7bit") == NULL))
 		m->encoding = m->body_last;
cf25aed7
 	else if((m->bounce == NULL) &&
ce0883f6
 		(strncasecmp(line, "Received: ", 10) == 0) &&
a4f8f199
 		(cli_filetype((const unsigned char *)line, strlen(line)) == CL_TYPE_MAIL))
de617e3e
 			m->bounce = m->body_last;
2add0ed7
 		/* Not needed with fast track visa technology */
 	/*else if((m->uuencode == NULL) && isuuencodebegin(line))
 		m->uuencode = m->body_last;*/
5ae253d2
 	else if((m->binhex == NULL) &&
1a220adb
 		strstr(line, "BinHex") &&
4bdd7a93
 		(simil(line, binhex) > 90))
1a220adb
 			/*
 			 * Look for close matches for BinHex, but
 			 * simil() is expensive so only do it if it's
 			 * likely to be found
 			 */
5ae253d2
 			m->binhex = m->body_last;
00f95393
 	else if((m->yenc == NULL) && (strncmp(line, "=ybegin line=", 13) == 0))
 		m->yenc = m->body_last;
de617e3e
 }
 
 /*
ffd59a3e
  * Returns a pointer to the body of the message. Note that it does NOT return
  * a copy of the data
  */
985cc85e
 text *
 messageGetBody(message *m)
b151ef55
 {
 	assert(m != NULL);
ffd59a3e
 	return m->body_first;
b151ef55
 }
 
 /*
  * Clean up the message by removing trailing spaces and blank lines
  */
 void
 messageClean(message *m)
 {
 	text *newEnd = textClean(m->body_first);
 
 	if(newEnd)
 		m->body_last = newEnd;
 }
 
 /*
e6b25cd3
  * Export a message using the given export routines
0d252351
  *
  * TODO: It really should export into an array, one
  * for each encoding algorithm. However, what it does is it returns the
  * last item that was exported. That's sufficient for now.
b151ef55
  */
a446de17
 static void *
985cc85e
 messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy)(void *), void (*setFilename)(void *, const char *, const char *), void (*addData)(void *, const unsigned char *, size_t), void *(*exportText)(text *, void *, int), void(*setCTX)(void *, cli_ctx *), int destroy_text)
b151ef55
 {
e6b25cd3
 	void *ret;
985cc85e
 	text *t_line;
dd8a7e90
 	char *filename;
0e3b08fc
 	int i;
b151ef55
 
 	assert(m != NULL);
 
0e3b08fc
 	if(messageGetBody(m) == NULL)
 		return NULL;
 
e6b25cd3
 	ret = (*create)();
b151ef55
 
e6b25cd3
 	if(ret == NULL)
02c9dc2a
 		return NULL;
b151ef55
 
802c37fc
 	cli_dbgmsg("messageExport: numberOfEncTypes == %d\n", m->numberOfEncTypes);
 
0e3b08fc
 	if((t_line = binhexBegin(m)) != NULL) {
a42dba7d
 		unsigned char byte;
40d54f7f
 		size_t newlen = 0L, len, dataforklen, resourceforklen, l;
a4c3d0a3
 		unsigned char *data;
bbf43447
 		char *ptr;
bb5d6279
 		int bytenumber;
285a69b4
 		blob *tmp;
bc75e1d1
 
 		/*
 		 * Table look up by Thomas Lamy <Thomas.Lamy@in-online.net>
 		 * HQX conversion table - illegal chars are 0xff
 		 */
 		const unsigned char hqxtbl[] = {
 			     /*   00   01   02   03   04   05   06   07   08   09   0a   0b   0c   0d   0e   0f */
 		/* 00-0f */	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
 		/* 10-1f */	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
 		/* 20-2f */	0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0xff,0xff,
 		/* 30-3f */	0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0xff,0x14,0x15,0xff,0xff,0xff,0xff,0xff,0xff,
 		/* 40-4f */	0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0xff,
 		/* 50-5f */	0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0xff,0x2c,0x2d,0x2e,0x2f,0xff,0xff,0xff,0xff,
 		/* 60-6f */	0x30,0x31,0x32,0x33,0x34,0x35,0x36,0xff,0x37,0x38,0x39,0x3a,0x3b,0x3c,0xff,0xff,
 		/* 70-7f */	0x3d,0x3e,0x3f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
 		};
a42dba7d
 
9b2be218
 		cli_dbgmsg("messageExport: decode binhex\n");
a42dba7d
 		/*
 		 * Decode BinHex4. First create a temporary blob which contains
 		 * the encoded message. Then decode that blob to the target
0bf1353d
 		 * blob, free the temporary blob and return the target one
bc75e1d1
 		 *
fdb974a5
 		 * FIXME: EICAR isn't detected: should create 3 files in fork
 		 *	format: .info, .data and .rsrc. This is needed for
 		 *	position dependant detection such as EICAR
 		 *
bc75e1d1
 		 * See RFC1741
a42dba7d
 		 */
a4c3d0a3
 		while(((t_line = t_line->t_next) != NULL) &&
 		      (t_line->t_line == NULL))
 			;
a42dba7d
 
985cc85e
 		tmp = textToBlob(t_line, NULL,
 			((m->numberOfEncTypes == 1) && (m->encodingTypes[0] == BINHEX)) ? destroy_text : 0);
 
a4c3d0a3
 		if(tmp == NULL) {
985cc85e
 			/*
 			 * FIXME: We've probably run out of memory during the
 			 * text to blob.
 			 * TODO: if m->numberOfEncTypes == 1 we could delete
 			 * the text object as we decode it
 			 */
 			cli_warnmsg("Couldn't start binhex parser\n");
e6b25cd3
 			(*destroy)(ret);
bc75e1d1
 			return NULL;
 		}
bb5d6279
 
a4c3d0a3
 		data = blobGetData(tmp);
a42dba7d
 
a4c3d0a3
 		if(data == NULL) {
 			cli_warnmsg("Couldn't locate the binhex message that was claimed to be there\n");
bbf43447
 			blobDestroy(tmp);
e6b25cd3
 			(*destroy)(ret);
bbf43447
 			return NULL;
 		}
a4c3d0a3
 		len = blobGetDataSize(tmp);
a42dba7d
 
a4c3d0a3
 		if(data[0] == ':') {
 			unsigned char *uptr;
 			/* 7 bit (ala RFC1741) */
a42dba7d
 
a4c3d0a3
 			/*
 			 * FIXME: this is dirty code, modification of the
 			 * contents of a member of the blob object should be
 			 * done through blob.c
 			 *
 			 * Convert 7 bit data into 8 bit
 			 */
 			cli_dbgmsg("decode HQX7 message (%lu bytes)\n", len);
bb5d6279
 
a4c3d0a3
 			uptr = cli_malloc(len);
 			if(uptr == NULL) {
 				blobDestroy(tmp);
 				(*destroy)(ret);
 				return NULL;
bc75e1d1
 			}
a4c3d0a3
 			memcpy(uptr, data, len);
 			bytenumber = 0;
83ec020f
 
bc75e1d1
 			/*
a4c3d0a3
 			 * uptr now contains the encoded (7bit) data - len bytes long
 			 * data will contain the unencoded (8bit) data
bc75e1d1
 			 */
a4c3d0a3
 			for(l = 1; l < len; l++) {
 				unsigned char c = uptr[l];
 
 				if(c == ':')
bc75e1d1
 					break;
a4c3d0a3
 
 				if((c == '\n') || (c == '\r'))
 					continue;
 
 				if((c < 0x20) || (c > 0x7f) || (hqxtbl[c] == 0xff)) {
 					cli_warnmsg("Invalid HQX7 character '%c' (0x%02x)\n", c, c);
bc75e1d1
 					break;
a4c3d0a3
 				}
 				c = hqxtbl[c];
 				assert(c <= 63);
 
 				/*
 				 * These masks probably aren't needed, but
 				 * they're here to verify the code is correct
 				 */
 				switch(bytenumber) {
 					case 0:
 						data[newlen] = (c << 2) & 0xFC;
 						bytenumber = 1;
 						break;
 					case 1:
 						data[newlen++] |= (c >> 4) & 0x3;
 						data[newlen] = (c << 4) & 0xF0;
 						bytenumber = 2;
 						break;
 					case 2:
 						data[newlen++] |= (c >> 2) & 0xF;
 						data[newlen] = (c << 6) & 0xC0;
 						bytenumber = 3;
 						break;
 					case 3:
 						data[newlen++] |= c & 0x3F;
 						bytenumber = 0;
 						break;
 				}
bc75e1d1
 			}
bb5d6279
 
a4c3d0a3
 			cli_dbgmsg("decoded HQX7 message (now %lu bytes)\n", newlen);
bc75e1d1
 
a4c3d0a3
 			/*
 			 * Throw away the old encoded (7bit) data
 			 * data now points to the encoded (8bit) data - newlen bytes
 			 *
 			 * The data array may contain repetitive characters
 			 */
 			free(uptr);
 		} else {
a363da65
 			cli_warnmsg("HQX8 messages not yet supported - if you believe this file contains a virus, submit it to www.clamav.net\n");
a4c3d0a3
 			newlen = len;
 		}
bc75e1d1
 
 		/*
 		 * Uncompress repetitive characters
 		 */
 		if(memchr(data, 0x90, newlen)) {
 			blob *u = blobCreate();	/* uncompressed data */
 
285a69b4
 			if(u == NULL) {
e6b25cd3
 				(*destroy)(ret);
285a69b4
 				blobDestroy(tmp);
 				return NULL;
 			}
bc75e1d1
 			/*
 			 * Includes compression
 			 */
 			for(l = 0L; l < newlen; l++) {
 				unsigned char c = data[l];
a42dba7d
 
 				/*
bc75e1d1
 				 * TODO: handle the case where the first byte
 				 * is 0x90
a42dba7d
 				 */
bc75e1d1
 				blobAddData(u, &c, 1);
 
 				if((l < (newlen - 1L)) && (data[l + 1] == 0x90)) {
 					int count;
 
 					l += 2;
 					count = data[l];
 
 					if(count == 0) {
 						c = 0x90;
 						blobAddData(u, &c, 1);
ffd59a3e
 					} else {
0d252351
 #ifdef	CL_DEBUG
 						cli_dbgmsg("uncompress HQX7 at 0x%06x: %d repetitive bytes\n", l, count);
 #endif
ffd59a3e
 						blobGrow(u, count);
bc75e1d1
 						while(--count > 0)
 							blobAddData(u, &c, 1);
ffd59a3e
 					}
a42dba7d
 				}
 			}
3fbd1711
 			blobDestroy(tmp);
bc75e1d1
 			tmp = u;
 			data = blobGetData(tmp);
 			len = blobGetDataSize(tmp);
 			cli_dbgmsg("Uncompressed %lu bytes to %lu\n", newlen, len);
 		} else {
 			len = newlen;
 			cli_dbgmsg("HQX7 message (%lu bytes) is not compressed\n",
 				len);
a42dba7d
 		}
dad64ecb
 		if(len == 0) {
 			cli_warnmsg("Discarding empty binHex attachment\n");
e6b25cd3
 			(*destroy)(ret);
dad64ecb
 			blobDestroy(tmp);
 			return NULL;
 		}
a42dba7d
 
 		/*
bc75e1d1
 		 * The blob tmp now contains the uncompressed data
 		 * of len bytes, i.e. the repetitive bytes have been removed
 		 */
 
 		/*
 		 * Parse the header
 		 *
a42dba7d
 		 * TODO: set filename argument in message as well
 		 */
 		byte = data[0];
6afdc3ab
 		if(byte >= len) {
e6b25cd3
 			(*destroy)(ret);
6afdc3ab
 			blobDestroy(tmp);
 			return NULL;
 		}
a42dba7d
 		filename = cli_malloc(byte + 1);
bbf43447
 		if(filename == NULL) {
e6b25cd3
 			(*destroy)(ret);
bbf43447
 			blobDestroy(tmp);
 			return NULL;
 		}
bc75e1d1
 		memcpy(filename, &data[1], byte);
a42dba7d
 		filename[byte] = '\0';
e6b25cd3
 		(*setFilename)(ret, dir, filename);
dad64ecb
 		/*ptr = cli_malloc(strlen(filename) + 6);*/
 		ptr = cli_malloc(byte + 6);
bbf43447
 		if(ptr) {
 			sprintf(ptr, "name=%s", filename);
 			messageAddArgument(m, ptr);
 			free(ptr);
 		}
a42dba7d
 
 		/*
 		 * skip over length, filename, version, type, creator and flags
 		 */
 		byte = 1 + byte + 1 + 4 + 4 + 2;
83ec020f
 
 		/*
 		 * Set len to be the data fork length
 		 */
64b0fff6
 		dataforklen = ((data[byte] << 24) & 0xFF000000) |
582808c3
 			((data[byte + 1] << 16) & 0xFF0000) |
 			((data[byte + 2] << 8) & 0xFF00) |
 			(data[byte + 3] & 0xFF);
bc75e1d1
 
64b0fff6
 		resourceforklen = ((data[byte + 4] << 24) & 0xFF000000) |
582808c3
 			((data[byte + 5] << 16) & 0xFF0000) |
 			((data[byte + 6] << 8) & 0xFF00) |
 			(data[byte + 7] & 0xFF);
64b0fff6
 
 		cli_dbgmsg("Filename = '%s', data fork length = %lu, resource fork length = %lu bytes\n",
 			filename, dataforklen, resourceforklen);
bc75e1d1
 
 		free((char *)filename);
a42dba7d
 
 		/*
 		 * Skip over data fork length, resource fork length and CRC
 		 */
 		byte += 10;
 
dad64ecb
 		l = blobGetDataSize(tmp) - byte;
db42f46e
 
64b0fff6
 		if(l < dataforklen) {
db42f46e
 			cli_warnmsg("Corrupt BinHex file, claims it is %lu bytes long in a message of %lu bytes\n",
64b0fff6
 				dataforklen, l);
 			dataforklen = l;
db42f46e
 		}
55274fda
 		if(setCTX && m->ctx)
 			(*setCTX)(ret, m->ctx);
 
64b0fff6
 		(*addData)(ret, &data[byte], dataforklen);
a42dba7d
 
 		blobDestroy(tmp);
 
fd969c26
 		if(destroy_text)
 			m->binhex = NULL;
fef5ad63
 
fd969c26
 		if((m->numberOfEncTypes == 0) ||
 		   ((m->numberOfEncTypes == 1) && (m->encodingTypes[0] == BINHEX))) {
fef5ad63
 			cli_dbgmsg("Finished exporting binhex file\n");
 			return ret;
 		}
0e3b08fc
 	}
 
 	if(m->numberOfEncTypes == 0) {
 		/*
 		 * Fast copy
 		 */
fd969c26
 		cli_dbgmsg("messageExport: Entering fast copy mode\n");
 
6a91c55b
 		filename = (char *)messageFindArgument(m, "filename");
b151ef55
 		if(filename == NULL) {
6a91c55b
 			filename = (char *)messageFindArgument(m, "name");
b151ef55
 
 			if(filename == NULL) {
c93e52c1
 				cli_dbgmsg("Unencoded attachment sent with no filename\n");
3b6eace4
 				messageAddArgument(m, "name=attachment");
0e3b08fc
 			} else
bbf43447
 				/*
 				 * Some virus attachments don't say how they've
 				 * been encoded. We assume base64
 				 */
 				messageSetEncoding(m, "base64");
b151ef55
 		}
 
63f87938
 		(*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");
b151ef55
 
5eeffbb9
 		if(filename)
 			free((char *)filename);
b151ef55
 
2add0ed7
 		if(m->numberOfEncTypes == 0)
985cc85e
 			return exportText(messageGetBody(m), ret, destroy_text);
b151ef55
 	}
 
55274fda
 	if(setCTX && m->ctx)
 		(*setCTX)(ret, m->ctx);
 
0e3b08fc
 	for(i = 0; i < m->numberOfEncTypes; i++) {
 		encoding_type enctype = m->encodingTypes[i];
d17de037
 		size_t size;
0e3b08fc
 
0d252351
 		if(i > 0) {
 			void *newret;
 
 			newret = (*create)();
 			if(newret == NULL) {
 				cli_errmsg("Not all decoding algorithms were run\n");
 				return ret;
 			}
 			(*destroy)(ret);
 			ret = newret;
 		}
ad642304
 		cli_dbgmsg("messageExport: enctype %d is %d\n", i, enctype);
b151ef55
 		/*
0e3b08fc
 		 * Find the filename to decode
b151ef55
 		 */
2add0ed7
 		if(((enctype == YENCODE) && yEncBegin(m)) || ((i == 0) && yEncBegin(m))) {
00f95393
 			/*
 			 * TODO: handle multipart yEnc encoded files
 			 */
 			t_line = yEncBegin(m);
bb2432d7
 			filename = (char *)lineGetData(t_line->t_line);
00f95393
 
 			if((filename = strstr(filename, " name=")) != NULL) {
4db74788
 				filename = cli_strdup(&filename[6]);
00f95393
 				if(filename) {
 					cli_chomp(filename);
 					strstrip(filename);
 					cli_dbgmsg("Set yEnc filename to \"%s\"\n", filename);
 				}
5eeffbb9
 			}
00f95393
 
63f87938
 			(*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");
5eeffbb9
 			if(filename) {
 				free((char *)filename);
 				filename = NULL;
 			}
00f95393
 			t_line = t_line->t_next;
 			enctype = YENCODE;
74ca33e9
 			m->yenc = NULL;
0e3b08fc
 		} else {
2add0ed7
 			if(enctype == UUENCODE) {
 				/*
 				 * The body will have been stripped out by the fast track visa
 				 * system. Treat as plain/text, which means we'll still scan
 				 * for funnies outside of the uuencoded portion.
 				 */
56896211
 				cli_dbgmsg("messageExport: treat uuencode as text/plain\n");
2add0ed7
 				enctype = m->encodingTypes[i] = NOENCODING;
 			}
0e3b08fc
 			filename = (char *)messageFindArgument(m, "filename");
 			if(filename == NULL) {
 				filename = (char *)messageFindArgument(m, "name");
 
 				if(filename == NULL) {
 					cli_dbgmsg("Attachment sent with no filename\n");
 					messageAddArgument(m, "name=attachment");
 				} else if(enctype == NOENCODING)
 					/*
24c897dc
 					 * Some virus attachments don't say how
 					 * they've been encoded. We assume
 					 * base64.
2add0ed7
 					 *
24c897dc
 					 * FIXME: don't do this if it's a fall
 					 * through from uuencode
0e3b08fc
 					 */
 					messageSetEncoding(m, "base64");
 			}
 
63f87938
 			(*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");
0e3b08fc
 
 			t_line = messageGetBody(m);
 		}
ad642304
 
00f95393
 		if(filename)
 			free((char *)filename);
0e3b08fc
 
 		/*
c93e52c1
 		 * t_line should now point to the first (encoded) line of the
 		 * message
0e3b08fc
 		 */
 		if(t_line == NULL) {
 			cli_warnmsg("Empty attachment not saved\n");
 			(*destroy)(ret);
 			return NULL;
 		}
 
 		if(enctype == NOENCODING) {
ce73653f
 			/*
0e3b08fc
 			 * Fast copy
ce73653f
 			 */
985cc85e
 			if(i == m->numberOfEncTypes - 1) {
 				/* last one */
 				(void)exportText(t_line, ret, destroy_text);
 				break;
 			}
 			(void)exportText(t_line, ret, 0);
0e3b08fc
 			continue;
ce73653f
 		}
b151ef55
 
d17de037
 		size = 0;
0e3b08fc
 		do {
0d252351
 			unsigned char smallbuf[1024];
 			unsigned char *uptr, *data;
0e3b08fc
 			const char *line = lineGetData(t_line->t_line);
0d252351
 			unsigned char *bigbuf;
 			size_t datasize;
b151ef55
 
2add0ed7
 			if(enctype == YENCODE) {
00f95393
 				if(line == NULL)
 					continue;
5eeffbb9
 				if(strncmp(line, "=yend ", 6) == 0)
00f95393
 					break;
0e3b08fc
 			}
 
0d252351
 			/*
 			 * Add two bytes for '\n' and '\0'
 			 */
 			datasize = (line) ? strlen(line) + 2 : 0;
02c9dc2a
 
23e1c37c
 			if(datasize >= sizeof(smallbuf))
 				data = bigbuf = (unsigned char *)cli_malloc(datasize);
0d252351
 			else {
 				bigbuf = NULL;
 				data = smallbuf;
 				datasize = sizeof(smallbuf);
 			}
02c9dc2a
 
23e1c37c
 			uptr = decodeLine(m, enctype, line, data, datasize);
0d252351
 			if(uptr == NULL) {
 				if(data == bigbuf)
 					free(data);
 				break;
 			}
1e06e1ab
 
d17de037
 			if(uptr != data) {
af66c329
 				assert((size_t)(uptr - data) < datasize);
0e3b08fc
 				(*addData)(ret, data, (size_t)(uptr - data));
d17de037
 				size += (size_t)(uptr - data);
 			}
02c9dc2a
 
0d252351
 			if(data == bigbuf)
 				free(data);
 
0e3b08fc
 			/*
da850706
 			 * According to RFC2045, '=' is used to pad out
0e3b08fc
 			 * the last byte and should be used as evidence
 			 * of the end of the data. Some mail clients
 			 * annoyingly then put plain text after the '='
 			 * byte and viruses exploit this bug. Sigh
 			 */
 			/*if(enctype == BASE64)
 				if(strchr(line, '='))
 					break;*/
0e01c158
 			if(line && destroy_text && (i == m->numberOfEncTypes - 1)) {
 				lineUnlink(t_line->t_line);
 				t_line->t_line = NULL;
 			}
0e3b08fc
 		} while((t_line = t_line->t_next) != NULL);
d17de037
 
8f465848
 		cli_dbgmsg("Exported %u bytes using enctype %d\n", size, enctype);
752c34b9
 
f98d4ab4
 		/* Verify we have nothing left to flush out */
 		if(m->base64chars) {
 			unsigned char data[4];
 			unsigned char *ptr;
285a69b4
 
c8bc44d6
 			ptr = base64Flush(m, data);
f98d4ab4
 			if(ptr)
 				(*addData)(ret, data, (size_t)(ptr - data));
 		}
285a69b4
 	}
 
e6b25cd3
 	return ret;
 }
 
c8bc44d6
 unsigned char *
 base64Flush(message *m, unsigned char *buf)
 {
 	cli_dbgmsg("%u trailing bytes to export\n", m->base64chars);
 
 	if(m->base64chars) {
 		unsigned char *ret = decode(m, NULL, buf, base64, FALSE);
 
 		m->base64chars = 0;
 
 		return ret;
 	}
 	return NULL;
 }
 
e6b25cd3
 /*
  * Decode and transfer the contents of the message into a fileblob
  * The caller must free the returned fileblob
  */
 fileblob *
985cc85e
 messageToFileblob(message *m, const char *dir, int destroy)
e6b25cd3
 {
985cc85e
 	fileblob *fb;
 
a446de17
 	cli_dbgmsg("messageToFileblob\n");
6fe0da47
 	fb = messageExport(m, dir,
 		(void *(*)(void))fileblobCreate,
 		(void(*)(void *))fileblobDestroy,
 		(void(*)(void *, const char *, const char *))fileblobSetFilename,
 		(void(*)(void *, const unsigned char *, size_t))fileblobAddData,
 		(void *(*)(text *, void *, int))textToFileblob,
 		(void(*)(void *, cli_ctx *))fileblobSetCTX,
 		destroy);
985cc85e
 	if(destroy && m->body_first) {
 		textDestroy(m->body_first);
 		m->body_first = m->body_last = NULL;
 	}
 	return fb;
e6b25cd3
 }
 
 /*
aea1b159
  * Decode and transfer the contents of the message into a closed blob
e6b25cd3
  * The caller must free the returned blob
  */
 blob *
985cc85e
 messageToBlob(message *m, int destroy)
e6b25cd3
 {
fd969c26
 	blob *b;
 
 	cli_dbgmsg("messageToBlob\n");
 
 	b = messageExport(m, NULL,
6fe0da47
 		(void *(*)(void))blobCreate,
 		(void(*)(void *))blobDestroy,
 		(void(*)(void *, const char *, const char *))blobSetFilename,
 		(void(*)(void *, const unsigned char *, size_t))blobAddData,
 		(void *(*)(text *, void *, int))textToBlob,
 		(void(*)(void *, cli_ctx *))NULL,
 		destroy);
985cc85e
 
 	if(destroy && m->body_first) {
 		textDestroy(m->body_first);
 		m->body_first = m->body_last = NULL;
 	}
 	return b;
b151ef55
 }
 
 /*
  * Decode and transfer the contents of the message into a text area
ffd59a3e
  * The caller must free the returned text
b151ef55
  */
 text *
285a69b4
 messageToText(message *m)
b151ef55
 {
0e3b08fc
 	int i;
b151ef55
 	text *first = NULL, *last = NULL;
 	const text *t_line;
 
 	assert(m != NULL);
 
0e3b08fc
 	if(m->numberOfEncTypes == 0) {
b151ef55
 		/*
 		 * Fast copy
 		 */
 		for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
 			if(first == NULL)
 				first = last = cli_malloc(sizeof(text));
 			else {
 				last->t_next = cli_malloc(sizeof(text));
 				last = last->t_next;
 			}
 
de617e3e
 			if(last == NULL) {
285a69b4
 				if(first)
 					textDestroy(first);
cea95096
 				return NULL;
 			}
0b08b624
 			if(t_line->t_line)
 				last->t_line = lineLink(t_line->t_line);
 			else
 				last->t_line = NULL;	/* empty line */
b151ef55
 		}
0e3b08fc
 		if(last)
 			last->t_next = NULL;
 
 		return first;
 	}
 	/*
 	 * Scan over the data a number of times once for each claimed encoding
 	 * type
 	 */
 	for(i = 0; i < m->numberOfEncTypes; i++) {
 		const encoding_type enctype = m->encodingTypes[i];
 
 		cli_dbgmsg("messageToText: export transfer method %d = %d\n",
 			i, enctype);
eac601be
 
 		switch(enctype) {
 			case NOENCODING:
 			case BINARY:
 			case EIGHTBIT:
 				/*
 				 * Fast copy
 				 */
 				for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
 					if(first == NULL)
 						first = last = cli_malloc(sizeof(text));
 					else {
 						last->t_next = cli_malloc(sizeof(text));
 						last = last->t_next;
 					}
 
 					if(last == NULL) {
 						if(first) {
 							last->t_next = NULL;
 							textDestroy(first);
 						}
 						return NULL;
 					}
 					if(t_line->t_line)
 						last->t_line = lineLink(t_line->t_line);
 					else
 						last->t_line = NULL;	/* empty line */
0e3b08fc
 				}
eac601be
 				continue;
 			case UUENCODE:
b01f527d
 				cli_errmsg("messageToText: Unexpected attempt to handle uuencoded file - report to http://bugs.clamav.net\n");
2add0ed7
 				if(first) {
 					last->t_next = NULL;
 					textDestroy(first);
0e3b08fc
 				}
2add0ed7
 				return NULL;
eac601be
 			case YENCODE:
 				t_line = yEncBegin(m);
00f95393
 
eac601be
 				if(t_line == NULL) {
 					/*cli_warnmsg("YENCODED attachment is missing begin statement\n");*/
 					if(first) {
 						last->t_next = NULL;
 						textDestroy(first);
 					}
 					return NULL;
15bfc2e4
 				}
eac601be
 				t_line = t_line->t_next;
 			default:
 				if((i == 0) && binhexBegin(m))
 					cli_warnmsg("Binhex messages not supported yet.\n");
 				t_line = messageGetBody(m);
a42dba7d
 		}
b151ef55
 
27a375f2
 		for(; t_line; t_line = t_line->t_next) {
 			unsigned char data[1024];
 			unsigned char *uptr;
de617e3e
 			const char *line = lineGetData(t_line->t_line);
27a375f2
 
2add0ed7
 			if(enctype == BASE64)
285a69b4
 				/*
 				 * ignore blanks - breaks RFC which is
 				 * probably the point!
 				 */
 				if(line == NULL)
 					continue;
752c34b9
 
0d252351
 			assert((line == NULL) || (strlen(line) <= sizeof(data)));
 
0e3b08fc
 			uptr = decodeLine(m, enctype, line, data, sizeof(data));
b151ef55
 
27a375f2
 			if(uptr == NULL)
 				break;
b151ef55
 
27a375f2
 			assert(uptr <= &data[sizeof(data)]);
 
 			if(first == NULL)
 				first = last = cli_malloc(sizeof(text));
 			else {
 				last->t_next = cli_malloc(sizeof(text));
 				last = last->t_next;
 			}
b151ef55
 
98685ac1
 			if(last == NULL)
bbf43447
 				break;
752c34b9
 
290ba18f
 			/*
 			 * If the decoded line is the same as the encoded
 			 * there's no need to take a copy, just link it.
 			 * Note that the comparison is done without the
 			 * trailing newline that the decoding routine may have
 			 * added - that's why there's a strncmp rather than a
 			 * strcmp - that'd be bad for MIME decoders, but is OK
 			 * for AV software
 			 */
5eeffbb9
 			if((data[0] == '\n') || (data[0] == '\0'))
 				last->t_line = NULL;
d16754aa
 			else if(line && (strncmp((const char *)data, line, strlen(line)) == 0)) {
74ca33e9
 #ifdef	CL_DEBUG
290ba18f
 				cli_dbgmsg("messageToText: decoded line is the same(%s)\n", data);
74ca33e9
 #endif
290ba18f
 				last->t_line = lineLink(t_line->t_line);
 			} else
5eeffbb9
 				last->t_line = lineCreate((char *)data);
98685ac1
 
0e3b08fc
 			if(line && enctype == BASE64)
752c34b9
 				if(strchr(line, '='))
 					break;
27a375f2
 		}
82348395
 		if(m->base64chars) {
 			unsigned char data[4];
 
4b0e970e
 			memset(data, '\0', sizeof(data));
5eeffbb9
 			if(decode(m, NULL, data, base64, FALSE) && data[0]) {
82348395
 				if(first == NULL)
 					first = last = cli_malloc(sizeof(text));
 				else {
 					last->t_next = cli_malloc(sizeof(text));
 					last = last->t_next;
 				}
 
 				if(last != NULL)
5eeffbb9
 					last->t_line = lineCreate((char *)data);
82348395
 			}
 			m->base64chars = 0;
 		}
b151ef55
 	}
 
 	if(last)
 		last->t_next = NULL;
 
 	return first;
 }
 
985cc85e
 text *
 yEncBegin(message *m)
00f95393
 {
 	return m->yenc;
 }
 
b151ef55
 /*
a42dba7d
  * Scan to find the BINHEX message (if any)
  */
f5a4d7e8
 #if	0
985cc85e
 const text *
 binhexBegin(message *m)
a42dba7d
 {
 	const text *t_line;
 
 	for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next)
 		if(strcasecmp(t_line->t_text, "(This file must be converted with BinHex 4.0)") == 0)
 			return t_line;
 
 	return NULL;
 }
f5a4d7e8
 #else
985cc85e
 text *
 binhexBegin(message *m)
f5a4d7e8
 {
 	return m->binhex;
 }
 #endif
a42dba7d
 
 /*
5a01973c
  * Scan to find a bounce message. There is no standard for these, not
  * even a convention, so don't expect this to be foolproof
  */
f5a4d7e8
 #if	0
985cc85e
 text *
 bounceBegin(message *m)
5a01973c
 {
 	const text *t_line;
bb5d6279
 
20fa2f53
 	for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next)
06d4e856
 		if(cli_filetype(t_line->t_text, strlen(t_line->t_text)) == CL_TYPE_MAIL)
20fa2f53
 			return t_line;
5a01973c
 
 	return NULL;
 }
f5a4d7e8
 #else
985cc85e
 text *
 bounceBegin(message *m)
f5a4d7e8
 {
 	return m->bounce;
 }
 #endif
 
 /*
  * If a message doesn't not contain another message which could be harmful
  * it is deemed to be safe.
  *
  * TODO: ensure nothing can get through this
  *
  * TODO: check to see if we need to
  * find anything else, perhaps anything
  * from the RFC821 table?
  */
 #if	0
 int
 messageIsAllText(const message *m)
 {
 	const text *t;
 
 	for(t = messageGetBody(m); t; t = t->t_next)
 		if(strncasecmp(t->t_text,
 			"Content-Transfer-Encoding",
 			strlen("Content-Transfer-Encoding")) == 0)
 				return 0;
 
 	return 1;
 }
 #else
985cc85e
 text *
 encodingLine(message *m)
f5a4d7e8
 {
627465e7
 	return m->encoding;
f5a4d7e8
 }
 #endif
5a01973c
 
a446de17
 void
 messageClearMarkers(message *m)
 {
2add0ed7
 	m->encoding = m->bounce = m->binhex = NULL;
a446de17
 }
 
5a01973c
 /*
b151ef55
  * Decode a line and add it to a buffer, return the end of the buffer
27a375f2
  * to help appending callers. There is no new line at the end of "line"
eaacc2de
  *
  * len is sizeof(ptr)
b151ef55
  */
d1a6ea81
 unsigned char *
0e3b08fc
 decodeLine(message *m, encoding_type et, const char *line, unsigned char *buf, size_t buflen)
b151ef55
 {
2a3ceff4
 	size_t len, reallen;
bf8ea488
 	bool softbreak;
23e1c37c
 	char *p2, *copy;
53ee0b60
 	char base64buf[RFC2045LENGTH + 1];
b151ef55
 
15bfc2e4
 	/*cli_dbgmsg("decodeLine(et = %d buflen = %u)\n", (int)et, buflen);*/
0d252351
 
b151ef55
 	assert(m != NULL);
eaacc2de
 	assert(buf != NULL);
b151ef55
 
0e3b08fc
 	switch(et) {
ee576466
 		case BINARY:
 			/*
 			 * TODO: find out what this is, encoded as binary??
 			 */
 			/* fall through */
b151ef55
 		case NOENCODING:
 		case EIGHTBIT:
c6259ac5
 		default:	/* unknown encoding type - try our best */
963e073f
 			if(line)	/* empty line? */
abac42dd
 				buf = (unsigned char *)cli_strrcpy((char *)buf, line);
b151ef55
 			/* Put the new line back in */
abac42dd
 			return (unsigned char *)cli_strrcpy((char *)buf, "\n");
b151ef55
 
 		case QUOTEDPRINTABLE:
98685ac1
 			if(line == NULL) {	/* empty line */
 				*buf++ = '\n';
 				break;
 			}
285a69b4
 
da850706
 			softbreak = FALSE;
23e1c37c
 			while(buflen && *line) {
da850706
 				if(*line == '=') {
 					unsigned char byte;
 
 					if((*++line == '\0') || (*line == '\n')) {
 						softbreak = TRUE;
 						/* soft line break */
 						break;
 					}
 
 					byte = hex(*line);
 
 					if((*++line == '\0') || (*line == '\n')) {
 						/*
 						 * broken e-mail, not
 						 * adhering to RFC2045
 						 */
 						*buf++ = byte;
 						break;
 					}
 
582808c3
 					/*
 					 * Fix by Torok Edvin
 					 * <edwintorok@gmail.com>
 					 * Handle messages that use a broken
 					 * quoted-printable encoding of
 					 * href=\"http://, instead of =3D
 					 */
 					if(byte != '=') {
 						byte <<= 4;
 						byte += hex(*line);
 					} else
 						line -= 2;
 
da850706
 					*buf++ = byte;
 				} else
 					*buf++ = *line;
23e1c37c
 				++line;
 				--buflen;
da850706
 			}
bf8ea488
 			if(!softbreak)
 				/* Put the new line back in */
eaacc2de
 				*buf++ = '\n';
b151ef55
 			break;
 
 		case BASE64:
98685ac1
 			if(line == NULL)
 				break;
752c34b9
 			/*
da850706
 			 * RFC2045 sets the maximum length to 76 bytes
752c34b9
 			 * but many e-mail clients ignore that
 			 */
53ee0b60
 			if(strlen(line) < sizeof(base64buf)) {
 				strcpy(base64buf, line);
 				copy = base64buf;
 			} else {
4db74788
 				copy = cli_strdup(line);
53ee0b60
 				if(copy == NULL)
 					break;
 			}
bbf43447
 
752c34b9
 			p2 = strchr(copy, '=');
b151ef55
 			if(p2)
 				*p2 = '\0';
285a69b4
 
d17de037
 			sanitiseBase64(copy);
 
b151ef55
 			/*
 			 * Klez doesn't always put "=" on the last line
 			 */
285a69b4
 			buf = decode(m, copy, buf, base64, (p2 == NULL) && ((strlen(copy) & 3) == 0));
752c34b9
 
53ee0b60
 			if(copy != base64buf)
 				free(copy);
b151ef55
 			break;
 
 		case UUENCODE:
b3a5cdd8
 			assert(m->base64chars == 0);
 
98685ac1
 			if((line == NULL) || (*line == '\0'))	/* empty line */
3c52fb18
 				break;
b151ef55
 			if(strcasecmp(line, "end") == 0)
 				break;
64ff0d49
 			if(isuuencodebegin(line))
 				break;
b151ef55
 
 			if((line[0] & 0x3F) == ' ')
 				break;
 
af66c329
 			/*
 			 * reallen contains the number of bytes that were
 			 *	encoded
 			 */
2a3ceff4
 			reallen = (size_t)uudecode(*line++);
af66c329
 			if(reallen <= 0)
 				break;
 			if(reallen > 62)
2a3ceff4
 				break;
 			len = strlen(line);
b151ef55
 
af66c329
 			if((len > buflen) || (reallen > len))
eaacc2de
 				/*
 				 * In practice this should never occur since
 				 * the maximum length of a uuencoded line is
 				 * 62 characters
 				 */
8dc9ee9e
 				cli_warnmsg("uudecode: buffer overflow stopped, attempting to ignore but decoding may fail\n");
2a3ceff4
 			else {
 				(void)decode(m, line, buf, uudecode, (len & 3) == 0);
 				buf = &buf[reallen];
 			}
b3a5cdd8
 			m->base64chars = 0;	/* this happens with broken uuencoded files */
b151ef55
 			break;
00f95393
 		case YENCODE:
 			if((line == NULL) || (*line == '\0'))	/* empty line */
 				break;
 			if(strncmp(line, "=yend ", 6) == 0)
 				break;
 
 			while(*line)
 				if(*line == '=') {
 					if(*++line == '\0')
 						break;
 					*buf++ = ((*line++ - 64) & 255);
 				} else
 					*buf++ = ((*line++ - 42) & 255);
 			break;
b151ef55
 	}
 
eaacc2de
 	*buf = '\0';
 	return buf;
b151ef55
 }
 
6ba88eb8
 /*
fb405afc
  * Remove the non base64 characters such as spaces from a string. Spaces
  * shouldn't appear mid string in base64 files, but some broken mail clients
  * ignore such errors rather than discarding the mail, and virus writers
  * exploit this bug
285a69b4
  */
 static void
fb405afc
 sanitiseBase64(char *s)
285a69b4
 {
15bfc2e4
 	/*cli_dbgmsg("sanitiseBase64 '%s'\n", s);*/
fb405afc
 	for(; *s; s++)
15bfc2e4
 		if(base64Table[(unsigned int)(*s & 0xFF)] == 255) {
fb405afc
 			char *p1;
e982ca83
 
fb405afc
 			for(p1 = s; p1[0] != '\0'; p1++)
 				p1[0] = p1[1];
811e3356
 			--s;
fb405afc
 		}
285a69b4
 }
 
 /*
6ba88eb8
  * Returns one byte after the end of the decoded data in "out"
285a69b4
  *
  * Update m->base64chars with the last few bytes of data that we haven't
  * decoded. After the last line is found, decode will be called with in = NULL
  * to flush these out
6ba88eb8
  */
b151ef55
 static unsigned char *
285a69b4
 decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast)
 {
 	unsigned char b1, b2, b3, b4;
 	unsigned char cb1, cb2, cb3;	/* carried over from last line */
 
15bfc2e4
 	/*cli_dbgmsg("decode %s (len %d isFast %d base64chars %d)\n", in,
285a69b4
 		in ? strlen(in) : 0,
f0146bc6
 		isFast, m->base64chars);*/
285a69b4
 
 	cb1 = cb2 = cb3 = '\0';
 
 	switch(m->base64chars) {
 		case 3:
 			cb3 = m->base64_3;
 			/* FALLTHROUGH */
 		case 2:
 			cb2 = m->base64_2;
 			/* FALLTHROUGH */
 		case 1:
 			cb1 = m->base64_1;
 			isFast = FALSE;
 			break;
 		default:
 			assert(m->base64chars <= 3);
 	}
 
 	if(isFast)
 		/* Fast decoding if not last line */
 		while(*in) {
 			b1 = (*decoder)(*in++);
 			b2 = (*decoder)(*in++);
 			b3 = (*decoder)(*in++);
 			/*
 			 * Put this line here to help on some compilers which
 			 * can make use of some architecure's ability to
 			 * multiprocess when different variables can be
 			 * updated at the same time - here b3 is used in
 			 * one line, b1/b2 in the next and b4 in the next after
 			 * that, b3 and b4 rely on in but b1/b2 don't
 			 */
 			*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
 			b4 = (*decoder)(*in++);
 			*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
 			*out++ = (b3 << 6) | (b4 & 0x3F);
 		}
0d252351
 	else if(in == NULL) {	/* flush */
 		int nbytes;
 
 		if(m->base64chars == 0)
 			return out;
285a69b4
 
0d252351
 		cli_dbgmsg("base64chars = %d (%c %c %c)\n", m->base64chars,
87901cab
 			isalnum(cb1) ? cb1 : '@',
 			isalnum(cb2) ? cb2 : '@',
 			isalnum(cb3) ? cb3 : '@');
285a69b4
 
0d252351
 		m->base64chars--;
 		b1 = cb1;
 		nbytes = 1;
d17de037
 
0d252351
 		if(m->base64chars) {
285a69b4
 			m->base64chars--;
0d252351
 			b2 = cb2;
285a69b4
 
 			if(m->base64chars) {
c8e1ad63
 				nbytes = 2;
285a69b4
 				m->base64chars--;
0d252351
 				b3 = cb3;
362fe28f
 				nbytes = 3;
0d252351
 			} else if(b2)
c8e1ad63
 				nbytes = 2;
0d252351
 		}
285a69b4
 
0d252351
 		switch(nbytes) {
 			case 3:
 				b4 = '\0';
 				/* fall through */
 			case 4:
 				*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
 				*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
70b54406
 				if((nbytes == 4) || b3)
 					*out++ = (b3 << 6) | (b4 & 0x3F);
0d252351
 				break;
 			case 2:
 				*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
c8e1ad63
 				if((b2 << 4) & 0xFF)
 					*out++ = b2 << 4;
0d252351
 				break;
 			case 1:
 				*out++ = b1 << 2;
 				break;
 			default:
 				assert(0);
 		}
 	} else while(*in) {
 		int nbytes;
285a69b4
 
0d252351
 		if(m->base64chars) {
 			m->base64chars--;
 			b1 = cb1;
 		} else
 			b1 = (*decoder)(*in++);
285a69b4
 
0d252351
 		if(*in == '\0') {
 			b2 = '\0';
 			nbytes = 1;
 		} else {
285a69b4
 			if(m->base64chars) {
 				m->base64chars--;
0d252351
 				b2 = cb2;
285a69b4
 			} else
0d252351
 				b2 = (*decoder)(*in++);
285a69b4
 
 			if(*in == '\0') {
0d252351
 				b3 = '\0';
 				nbytes = 2;
285a69b4
 			} else {
 				if(m->base64chars) {
 					m->base64chars--;
0d252351
 					b3 = cb3;
285a69b4
 				} else
0d252351
 					b3 = (*decoder)(*in++);
285a69b4
 
 				if(*in == '\0') {
0d252351
 					b4 = '\0';
 					nbytes = 3;
285a69b4
 				} else {
0d252351
 					b4 = (*decoder)(*in++);
 					nbytes = 4;
285a69b4
 				}
 			}
0d252351
 		}
285a69b4
 
0d252351
 		switch(nbytes) {
 			case 3:
 				m->base64_3 = b3;
 			case 2:
 				m->base64_2 = b2;
 			case 1:
 				m->base64_1 = b1;
285a69b4
 				break;
0d252351
 			case 4:
 				*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
 				*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
 				*out++ = (b3 << 6) | (b4 & 0x3F);
 				break;
 			default:
 				assert(0);
 		}
 		if(nbytes != 4) {
 			m->base64chars = nbytes;
 			break;
285a69b4
 		}
 	}
 	return out;
 }
b151ef55
 
 static unsigned char
 hex(char c)
 {
 	if(isdigit(c))
 		return c - '0';
 	if((c >= 'A') && (c <= 'F'))
 		return c - 'A' + 10;
e66e8982
 	if((c >= 'a') && (c <= 'f'))
 		return c - 'a' + 10;
 	cli_dbgmsg("Illegal hex character '%c'\n", c);
b151ef55
 
 	/*
da850706
 	 * Some mails (notably some spam) break RFC2045 by failing to encode
b151ef55
 	 * the '=' character
 	 */
 	return '=';
 }
 
5ae253d2
 static unsigned char
 base64(char c)
 {
15bfc2e4
 	const unsigned char ret = base64Table[(unsigned int)(c & 0xFF)];
5ae253d2
 
 	if(ret == 255) {
0d252351
 		/*cli_dbgmsg("Illegal character <%c> in base64 encoding\n", c);*/
5ae253d2
 		return 63;
 	}
 	return ret;
 }
b151ef55
 
 static unsigned char
 uudecode(char c)
 {
b329234a
 	return c - ' ';
b151ef55
 }
b4cb4486
 
 /*
  * These are the only arguments we're interested in.
  * Do 'fgrep messageFindArgument *.c' if you don't believe me!
  * It's probably not good doing this since each time a new
  * messageFindArgument is added I need to remember to look here,
  * but it can save a lot of memory...
  */
 static int
 usefulArg(const char *arg)
 {
 	if((strncasecmp(arg, "name", 4) != 0) &&
 	   (strncasecmp(arg, "filename", 8) != 0) &&
 	   (strncasecmp(arg, "boundary", 8) != 0) &&
b62a19da
 	   (strncasecmp(arg, "protocol", 8) != 0) &&
9a7398ee
 	   (strncasecmp(arg, "id", 2) != 0) &&
 	   (strncasecmp(arg, "number", 6) != 0) &&
 	   (strncasecmp(arg, "total", 5) != 0) &&
b4cb4486
 	   (strncasecmp(arg, "type", 4) != 0)) {
 		cli_dbgmsg("Discarding unwanted argument '%s'\n", arg);
 		return 0;
 	}
 	return 1;
 }
e24738dc
 
b65d2aad
 void
 messageSetCTX(message *m, cli_ctx *ctx)
 {
 	m->ctx = ctx;
 }
 
 int
 messageContainsVirus(const message *m)
 {
 	return m->isInfected ? TRUE : FALSE;
 }
 
e24738dc
 /*
  * We've run out of memory. Try to recover some by
  * deduping the message
9a69a785
  *
  * FIXME: this can take a long time. The real solution is for system admins
  *	to refrain from setting ulimits too low, then this routine won't be
  *	called
e24738dc
  */
 static void
 messageDedup(message *m)
 {
 	const text *t1;
 	size_t saved = 0;
 
d16754aa
 	cli_dbgmsg("messageDedup\n");
 
e24738dc
 	t1 = m->dedupedThisFar ? m->dedupedThisFar : m->body_first;
 
 	for(t1 = m->body_first; t1; t1 = t1->t_next) {
 		const char *d1;
 		text *t2;
 		line_t *l1;
 		unsigned int r1;
 
 		if(saved >= 100*1000)
 			break;	/* that's enough */
 		l1 = t1->t_line;
 		if(l1 == NULL)
 			continue;
 		d1 = lineGetData(l1);
 		if(strlen(d1) < 8)
 			continue;	/* wouldn't recover many bytes */
d16754aa
 
e24738dc
 		r1 = (unsigned int)lineGetRefCount(l1);
 		if(r1 == 255)
 			continue;
 		/*
 		 * We don't want to foul up any pointers
 		 */
 		if(t1 == m->encoding)
 			continue;
 		if(t1 == m->bounce)
 			continue;
 		if(t1 == m->binhex)
 			continue;
 		if(t1 == m->yenc)
 			continue;
 
 		for(t2 = t1->t_next; t2; t2 = t2->t_next) {
 			const char *d2;
 			line_t *l2 = t2->t_line;
 
 			if(l2 == NULL)
 				continue;
 			d2 = lineGetData(l2);
 			if(d1 == d2)
 				/* already linked */
 				continue;
 			if(strcmp(d1, d2) == 0) {
 				if(lineUnlink(l2) == NULL)
d16754aa
 					saved += strlen(d1) + 1;
e24738dc
 				t2->t_line = lineLink(l1);
 				if(t2->t_line == NULL) {
 					cli_errmsg("messageDedup: out of memory\n");
 					return;
 				}
d16754aa
 				if(++r1 == 255)
 					break;
e24738dc
 			}
 		}
 	}
d16754aa
 
 	cli_dbgmsg("messageDedup reclaimed %u bytes\n", saved);
e24738dc
 	m->dedupedThisFar = t1;
 }
b329234a
 
 /*
5e5a162c
  * Handle RFC2231 encoding. Returns a malloc'd buffer that the caller must
  * free, or NULL on error.
  *
  * TODO: Currently only handles paragraph 4 of RFC2231 e.g.
  *	 protocol*=ansi-x3.4-1968''application%2Fpgp-signature;
  */
 static char *
 rfc2231(const char *in)
 {
802c37fc
 	const char *ptr;
 	char *ret, *out;
49dff330
 	enum { LANGUAGE, CHARSET, CONTENTS } field;
5e5a162c
 
49dff330
 	if(strstr(in, "*0*=") != NULL) {
 		cli_warnmsg("RFC2231 parameter continuations are not yet handled\n");
4db74788
 		return cli_strdup(in);
49dff330
 	}
 
 	ptr = strstr(in, "*0=");
 	if(ptr != NULL)
 		/*
 		 * Parameter continuation, with no continuation
 		 * Thunderbird 1.5 (and possibly other versions) does this
 		 */
 		field = CONTENTS;
 	else {
 		ptr = strstr(in, "*=");
 		field = LANGUAGE;
 	}
5e5a162c
 
 	if(ptr == NULL)	/* quick return */
4db74788
 		return cli_strdup(in);
5e5a162c
 
 	cli_dbgmsg("rfc2231 '%s'\n", in);
 
 	ret = cli_malloc(strlen(in) + 1);
 
 	if(ret == NULL)
 		return NULL;
 
49dff330
 	/*
 	 * memcpy(out, in, (ptr - in));
 	 * out = &out[ptr - in];
 	 * in = ptr;
 	 */
51f308f2
 	out = ret;
 	while(in != ptr)
 		*out++ = *in++;
5e5a162c
 
 	*out++ = '=';
 
49dff330
 	while(*ptr++ != '=')
 		;
 
5e5a162c
 	/*
 	 * We don't do anything with the language and character set, just skip
 	 * over them!
 	 */
49dff330
 	while(*ptr) {
5e5a162c
 		switch(field) {
 			case LANGUAGE:
49dff330
 				if(*ptr == '\'')
5e5a162c
 					field = CHARSET;
 				break;
 			case CHARSET:
49dff330
 				if(*ptr == '\'')
5e5a162c
 					field = CONTENTS;
 				break;
 			case CONTENTS:
49dff330
 				if(*ptr == '%') {
5e5a162c
 					unsigned char byte;
 
49dff330
 					if((*++ptr == '\0') || (*ptr == '\n'))
5e5a162c
 						break;
 
49dff330
 					byte = hex(*ptr);
5e5a162c
 
49dff330
 					if((*++ptr == '\0') || (*ptr == '\n')) {
5e5a162c
 						*out++ = byte;
 						break;
 					}
 
 					byte <<= 4;
49dff330
 					byte += hex(*ptr);
5e5a162c
 					*out++ = byte;
 				} else
49dff330
 					*out++ = *ptr;
5e5a162c
 		}
49dff330
 		if(*ptr++ == '\0')
fe6ce0ba
 			/*
 			 * Incorrect message that has just one character after
 			 * a '%'.
 			 * FIXME: stash something in out that would, for example
 			 *	treat %2 as %02, assuming field == CONTENTS
 			 */
abaac091
 			break;
5e5a162c
 	}
 
 	if(field != CONTENTS) {
 		free(ret);
802c37fc
 		cli_warnmsg("Invalid RFC2231 header: '%s'\n", in);
4db74788
 		return cli_strdup("");
5e5a162c
 	}
802c37fc
 
5e5a162c
 	*out = '\0';
 
 	cli_dbgmsg("rfc2231 returns '%s'\n", ret);
 
 	return ret;
 }
 
 /*
b329234a
  * common/simil:
  *	From Computing Magazine 20/8/92
  * Returns %ge number from 0 to 100 - how similar are 2 strings?
  * 100 for exact match, < for error
  */
 struct	pstr_list {	/* internal stack */
 	char	*d1;
 	struct	pstr_list	*next;
 };
 
 #define	OUT_OF_MEMORY	(-2)
 #define	FAILURE	(-3)
 #define	SUCCESS	(-4)
 #define	ARRAY_OVERFLOW	(-5)
 typedef	struct	pstr_list	ELEMENT1;
 typedef	ELEMENT1		*LINK1;
 
 static	int	push(LINK1 *top, const char *string);
 static	int	pop(LINK1 *top, char *buffer);
 static	unsigned	int	compare(char *ls1, char **rs1, char *ls2, char **rs2);
 
4bdd7a93
 #define	MAX_PATTERN_SIZ	50	/* maximum string lengths */
b329234a
 
 static int
 simil(const char *str1, const char *str2)
 {
 	LINK1 top = NULL;
 	unsigned int score = 0;
40d54f7f
 	size_t common, total;
 	size_t len1, len2;
b329234a
 	char *rs1 = NULL, *rs2 = NULL;
 	char *s1, *s2;
4db74788
 	char ls1[MAX_PATTERN_SIZ], ls2[MAX_PATTERN_SIZ];
b329234a
 
 	if(strcasecmp(str1, str2) == 0)
 		return 100;
 
4db74788
 	if((s1 = cli_strdup(str1)) == NULL)
b329234a
 		return OUT_OF_MEMORY;
4db74788
 	if((s2 = cli_strdup(str2)) == NULL) {
b329234a
 		free(s1);
 		return OUT_OF_MEMORY;
 	}
 
 	if(((total = strstrip(s1)) > MAX_PATTERN_SIZ - 1) || ((len2 = strstrip(s2)) > MAX_PATTERN_SIZ - 1)) {
 		free(s1);
 		free(s2);
 		return ARRAY_OVERFLOW;
 	}
 
 	total += len2;
 
63f87938
 	if((push(&top, s1) == OUT_OF_MEMORY) ||
 	   (push(&top, s2) == OUT_OF_MEMORY)) {
 		free(s1);
 		free(s2);
b329234a
 		return OUT_OF_MEMORY;
63f87938
 	}
b329234a
 
 	while(pop(&top, ls2) == SUCCESS) {
 		pop(&top, ls1);
 		common = compare(ls1, &rs1, ls2, &rs2);
 		if(common > 0) {
40d54f7f
 			score += (unsigned int)common;
b329234a
 			len1 = strlen(ls1);
 			len2 = strlen(ls2);
 
 			if((len1 > 1 && len2 >= 1) || (len2 > 1 && len1 >= 1))
 				if((push(&top, ls1) == OUT_OF_MEMORY) || (push(&top, ls2) == OUT_OF_MEMORY)) {
 					free(s1);
 					free(s2);
 					return OUT_OF_MEMORY;
 				}
 			len1 = strlen(rs1);
 			len2 = strlen(rs2);
 
 			if((len1 > 1 && len2 >= 1) || (len2 > 1 && len1 >= 1))
 				if((push(&top, rs1) == OUT_OF_MEMORY) || (push(&top, rs2) == OUT_OF_MEMORY)) {
 					free(s1);
 					free(s2);
 					return OUT_OF_MEMORY;
 				}
 		}
 	}
 	free(s1);
 	free(s2);
 	return (total > 0) ? ((score * 200) / total) : 0;
 }
 
 static unsigned int
 compare(char *ls1, char **rs1, char *ls2, char **rs2)
 {
4db74788
 	unsigned int common, maxchars = 0;
b329234a
 	bool some_similarity = FALSE;
 	char *s1, *s2;
 	char *maxs1 = NULL, *maxs2 = NULL, *maxe1 = NULL, *maxe2 = NULL;
 	char *cs1, *cs2, *start1, *end1, *end2;
 
 	end1 = ls1 + strlen(ls1);
 	end2 = ls2 + strlen(ls2);
 	start1 = ls1;
 
 	for(;;) {
 		s1 = start1;
 		s2 = ls2;
 
 		if(s1 < end1) {
 			while(s1 < end1 && s2 < end2) {
 				if(tolower(*s1) == tolower(*s2)) {
 					some_similarity = TRUE;
 					cs1 = s1;
 					cs2 = s2;
 					common = 0;
 					do
 						if(s1 == end1 || s2 == end2)
 							break;
 						else {
 							s1++;
 							s2++;
 							common++;
 						}
 					while(tolower(*s1) == tolower(*s2));
 
 					if(common > maxchars) {
4db74788
 						unsigned int diff = common - maxchars;
b329234a
 						maxchars = common;
 						maxs1 = cs1;
 						maxs2 = cs2;
 						maxe1 = s1;
 						maxe2 = s2;
 						end1 -= diff;
 						end2 -= diff;
 					} else
 						s1 -= common;
 				} else
 					s2++;
 			}
 			start1++;
 		} else
 			break;
 	}
 	if(some_similarity) {
 		*maxs1 = '\0';
 		*maxs2 = '\0';
 		*rs1 = maxe1;
 		*rs2 = maxe2;
 	}
 	return maxchars;
 }
 
 static int
 push(LINK1 *top, const char *string)
 {
 	LINK1 element;
 
 	if((element = (LINK1)cli_malloc(sizeof(ELEMENT1))) == NULL)
 		return OUT_OF_MEMORY;
4db74788
 	if((element->d1 = cli_strdup(string)) == NULL)
b329234a
 		return OUT_OF_MEMORY;
 	element->next = *top;
 	*top = element;
 
 	return SUCCESS;
 }
 
 static int
 pop(LINK1 *top, char *buffer)
 {
 	LINK1 t1;
 
 	if((t1 = *top) != NULL) {
 		(void)strcpy(buffer, t1->d1);
 		*top = t1->next;
 		free(t1->d1);
 		free((char *)t1);
 		return SUCCESS;
 	}
 	return FAILURE;
 }
64ff0d49
 
 /*
  * Have we found a line that is a start of a uuencoded file (see uuencode(5))?
  */
 int
 isuuencodebegin(const char *line)
 {
2add0ed7
 	if(line[0] != 'b')	/* quick check */
 		return 0;
 
64ff0d49
 	if(strlen(line) < 10)
 		return 0;
 
 	return (strncasecmp(line, "begin ", 6) == 0) &&
 		isdigit(line[6]) && isdigit(line[7]) &&
 		isdigit(line[8]) && (line[9] == ' ');
 }