libclamav/message.c
e3aaff8e
 /*
c442ca9c
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
  *  Copyright (C) 2007-2013 Sourcefire, Inc.
2023340a
  *
  *  Authors: Nigel Horne
e3aaff8e
  *
  *  This program is free software; you can redistribute it and/or modify
2023340a
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
e3aaff8e
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
48b7b4a7
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
73ddf91f
  *
  * TODO: Optimise messageExport, decodeLine, messageIsEncoding
e3aaff8e
  */
6d6e8271
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
e3aaff8e
 
 #ifdef CL_THREAD_SAFE
406b1800
 #ifndef	_REENTRANT
e3aaff8e
 #define	_REENTRANT	/* for Solaris 2.8 */
 #endif
406b1800
 #endif
e3aaff8e
 
cd11ef39
 #ifdef	C_DARWIN
e3aaff8e
 #include <sys/types.h>
 #endif
 #include <stdlib.h>
 #include <string.h>
bc6bbeff
 #ifdef	HAVE_STRINGS_H
e3aaff8e
 #include <strings.h>
bc6bbeff
 #endif
e3aaff8e
 #include <assert.h>
 #include <ctype.h>
 #include <stdio.h>
 
e2875303
 #ifdef	CL_THREAD_SAFE
 #include <pthread.h>
 #endif
 
0f7f7682
 #include "others.h"
 #include "str.h"
 #include "filetypes.h"
 
e3aaff8e
 #include "mbox.h"
60d8d2c3
 #include "clamav.h"
e83019ae
 #include "json_api.h"
e3aaff8e
 
2d773a31
 #ifndef isblank
 #define isblank(c)	(((c) == ' ') || ((c) == '\t'))
 #endif
 
f003b79e
 #define	RFC2045LENGTH	76	/* maximum number of characters on a line */
 
0cf4cea7
 #ifdef	HAVE_STDBOOL_H
edee0700
 #include <stdbool.h>
 #else
 #ifdef	FALSE
 typedef	unsigned	char	bool;
 #else
 typedef enum	{ FALSE = 0, TRUE = 1 } bool;
 #endif
 #endif
e3aaff8e
 
be32043e
 static	int	messageHasArgument(const message *m, const char *variable);
b2223aad
 static	void	messageIsEncoding(message *m);
0c0894b8
 static unsigned char *decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast);
9e1dc6e8
 static	void	sanitiseBase64(char *s);
a765e132
 #ifdef	__GNUC__
 static	unsigned	char	hex(char c)	__attribute__((const));
 static	unsigned	char	base64(char c)	__attribute__((const));
 static	unsigned	char	uudecode(char c)	__attribute__((const));
 #else
e3aaff8e
 static	unsigned	char	hex(char c);
 static	unsigned	char	base64(char c);
 static	unsigned	char	uudecode(char c);
a765e132
 #endif
e3aaff8e
 static	const	char	*messageGetArgument(const message *m, int arg);
2673dc74
 static	void	*messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy)(void *), void (*setFilename)(void *, const char *, const char *), void (*addData)(void *, const unsigned char *, size_t), void *(*exportText)(text *, void *, int), void (*setCTX)(void *, cli_ctx *), int destroy_text);
28010d29
 static	int	usefulArg(const char *arg);
86355dc2
 static	void	messageDedup(message *m);
ec8e31fa
 static	char	*rfc2231(const char *in);
1602f612
 static	int	simil(const char *str1, const char *str2);
e3aaff8e
 
 /*
7cd9337a
  * These maps are ordered in decreasing likelihood of their appearance
c7b69776
  * in an e-mail. Probably these should be in a table...
e3aaff8e
  */
 static	const	struct	encoding_map {
 	const	char	*string;
 	encoding_type	type;
4685e392
 } encoding_map[] = {	/* rfc2045 */
e3aaff8e
 	{	"7bit",			NOENCODING	},
5c1150ac
 	{	"text/plain",		NOENCODING	},
4685e392
 	{	"quoted-printable",	QUOTEDPRINTABLE	},	/* rfc2045 */
02927896
 	{	"base64",		BASE64		},	/* rfc2045 */
e3aaff8e
 	{	"8bit",			EIGHTBIT	},
83e42783
 	{	"binary",		BINARY		},
5be8beb8
 	{	"x-uuencode",		UUENCODE	},	/* uuencode(5) */
16394c6d
 	{	"x-yencode",		YENCODE		},
86d24ebe
 	{	"x-binhex",		BINHEX		},
1c2d9d58
 	{	"us-ascii",		NOENCODING	},	/* incorrect */
0e523db2
 	{	"x-uue",		UUENCODE	},	/* incorrect */
5ee8f96d
 	{	"uuencode",		UUENCODE	},	/* incorrect */
c43c9798
 	{	NULL,			NOENCODING	}
e3aaff8e
 };
 
f7cd5fbf
 static	const	struct	mime_map {
e3aaff8e
 	const	char	*string;
 	mime_type	type;
 } mime_map[] = {
 	{	"text",			TEXT		},
 	{	"multipart",		MULTIPART	},
 	{	"application",		APPLICATION	},
 	{	"audio",		AUDIO		},
 	{	"image",		IMAGE		},
 	{	"message",		MESSAGE		},
 	{	"video",		VIDEO		},
c43c9798
 	{	NULL,			TEXT		}
e3aaff8e
 };
 
6e2ba331
 /*
  * See RFC2045, section 6.8, table 1
  */
32c9b306
 static const unsigned char base64Table[256] = {
621a667a
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,62,255,255,255,63,
 	52,53,54,55,56,57,58,59,60,61,255,255,255,0,255,255,
 	255,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
 	15,16,17,18,19,20,21,22,23,24,25,255,255,255,255,255,
 	255,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
 	41,42,43,44,45,46,47,48,49,50,51,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
 };
 
e3aaff8e
 message *
 messageCreate(void)
 {
 	message *m = (message *)cli_calloc(1, sizeof(message));
 
c7b69776
 	if(m)
843e1da6
 		m->mimeType = NOMIME;
e3aaff8e
 
 	return m;
 }
 
 void
 messageDestroy(message *m)
 {
767f16ab
 	assert(m != NULL);
 
e3aaff8e
 	messageReset(m);
 
 	free(m);
 }
 
 void
 messageReset(message *m)
 {
 	int i;
 
 	assert(m != NULL);
 
 	if(m->mimeSubtype)
 		free(m->mimeSubtype);
 
 	if(m->mimeDispositionType)
 		free(m->mimeDispositionType);
 
0ae75a8d
 	if(m->mimeArguments) {
 		for(i = 0; i < m->numberOfArguments; i++)
 			free(m->mimeArguments[i]);
 		free(m->mimeArguments);
 	}
e3aaff8e
 
 	if(m->body_first)
 		textDestroy(m->body_first);
 
0c0894b8
 	assert(m->base64chars == 0);
 
c7b69776
 	if(m->encodingTypes) {
 		assert(m->numberOfEncTypes > 0);
 		free(m->encodingTypes);
 	}
1f4d8d3e
 
e83019ae
 #if HAVE_JSON
 	if(m->jobj)
 		cli_json_delobj(m->jobj);
 #endif
 
1f4d8d3e
 	memset(m, '\0', sizeof(message));
 	m->mimeType = NOMIME;
e3aaff8e
 }
 
0960ff5e
 /*
83e42783
  * Handle the Content-Type header. The syntax is in RFC1341.
53bfac08
  * Return success (1) or failure (0). Failure only happens when it's an
  * unknown type and we've already received a known type, or we've received an
  * empty type. If we receive an unknown type by itself we default to application
0960ff5e
  */
53bfac08
 int
e3aaff8e
 messageSetMimeType(message *mess, const char *type)
 {
e2875303
 #ifdef	CL_THREAD_SAFE
 	static pthread_mutex_t mime_mutex = PTHREAD_MUTEX_INITIALIZER;
 #endif
721aaeb4
 	const struct mime_map *m;
9425e7ce
 	int typeval;
721aaeb4
 	static table_t *mime_table;
e3aaff8e
 
 	assert(mess != NULL);
f749115c
 	if(type == NULL) {
6351aa86
 		cli_dbgmsg("Empty content-type field\n");
f749115c
 		return 0;
 	}
e3aaff8e
 
 	cli_dbgmsg("messageSetMimeType: '%s'\n", type);
 
 	/* Ignore leading spaces */
127d2f98
 	while(!isalpha(*type))
e3aaff8e
 		if(*type++ == '\0')
53bfac08
 			return 0;
e3aaff8e
 
e2875303
 #ifdef	CL_THREAD_SAFE
 	pthread_mutex_lock(&mime_mutex);
 #endif
9425e7ce
 	if(mime_table == NULL) {
 		mime_table = tableCreate();
e2875303
 		if(mime_table == NULL) {
 #ifdef	CL_THREAD_SAFE
 			pthread_mutex_unlock(&mime_mutex);
 #endif
53bfac08
 			return 0;
e2875303
 		}
9425e7ce
 
 		for(m = mime_map; m->string; m++)
 			if(!tableInsert(mime_table, m->string, m->type)) {
 				tableDestroy(mime_table);
e2875303
 				mime_table = NULL;
 #ifdef	CL_THREAD_SAFE
 				pthread_mutex_unlock(&mime_mutex);
 #endif
53bfac08
 				return 0;
9425e7ce
 			}
 	}
e2875303
 #ifdef	CL_THREAD_SAFE
 	pthread_mutex_unlock(&mime_mutex);
 #endif
9425e7ce
 
 	typeval = tableFind(mime_table, type);
 
53bfac08
 	if(typeval != -1) {
cd11ef39
 		mess->mimeType = (mime_type)typeval;
53bfac08
 		return 1;
721aaeb4
 	}
 	if(mess->mimeType == NOMIME) {
e3aaff8e
 		if(strncasecmp(type, "x-", 2) == 0)
 			mess->mimeType = MEXTENSION;
0356cdc0
 		else {
0ae75a8d
 			/*
 			 * Force scanning of strange messages
 			 */
0356cdc0
 			if(strcasecmp(type, "plain") == 0) {
f9dc65f3
 				cli_dbgmsg("Incorrect MIME type: `plain', set to Text\n");
0356cdc0
 				mess->mimeType = TEXT;
 			} else {
 				/*
 				 * Don't handle broken e-mail probably sending
 				 *	Content-Type: plain/text
 				 * instead of
 				 *	Content-Type: text/plain
 				 * as an attachment
 				 */
721aaeb4
 				int highestSimil = 0, t = -1;
 				const char *closest = NULL;
 
 				for(m = mime_map; m->string; m++) {
 					const int s = simil(m->string, type);
 
 					if(s > highestSimil) {
 						highestSimil = s;
 						closest = m->string;
 						t = m->type;
 					}
 				}
 				if(highestSimil >= 50) {
9fe789f8
 					cli_dbgmsg("Unknown MIME type \"%s\" - guessing as %s (%d%% certainty)\n",
8affc406
 						type, closest,
9fe789f8
 						highestSimil);
b9a141ea
 					mess->mimeType = (mime_type)t;
721aaeb4
 				} else {
6fcf5624
 					cli_dbgmsg("Unknown MIME type: `%s', set to Application - if you believe this file contains a virus, submit it to www.clamav.net\n", type);
721aaeb4
 					mess->mimeType = APPLICATION;
 				}
0356cdc0
 			}
0ae75a8d
 		}
53bfac08
 		return 1;
e3aaff8e
 	}
53bfac08
 	return 0;
e3aaff8e
 }
 
 mime_type
 messageGetMimeType(const message *m)
 {
767f16ab
 	assert(m != NULL);
 
 	return m->mimeType;
e3aaff8e
 }
 
 void
 messageSetMimeSubtype(message *m, const char *subtype)
 {
 	assert(m != NULL);
68361484
 
 	if(subtype == NULL) {
 		/*
 		 * Handle broken content-type lines, e.g.
 		 *	Content-Type: text/
 		 */
 		cli_dbgmsg("Empty content subtype\n");
 		subtype = "";
 	}
e3aaff8e
 
 	if(m->mimeSubtype)
 		free(m->mimeSubtype);
 
0cf4cea7
 	m->mimeSubtype = cli_strdup(subtype);
e3aaff8e
 }
 
 const char *
 messageGetMimeSubtype(const message *m)
 {
1602f612
 	return (m->mimeSubtype) ? m->mimeSubtype : "";
e3aaff8e
 }
 
 void
 messageSetDispositionType(message *m, const char *disptype)
 {
 	assert(m != NULL);
 
f0627588
 	if(m->mimeDispositionType)
 		free(m->mimeDispositionType);
 	if(disptype == NULL) {
 		m->mimeDispositionType = NULL;
 		return;
 	}
 
86c4e9d5
 	/*
 	 * It's broken for there to be an entry such as "Content-Disposition:"
 	 * However some spam and viruses are rather broken, it's a sign
 	 * that something is wrong if we get that - maybe we should force a
 	 * scan of this part
 	 */
f0627588
 	while(*disptype && isspace((int)*disptype))
 		disptype++;
 	if(*disptype) {
0cf4cea7
 		m->mimeDispositionType = cli_strdup(disptype);
f0627588
 		if(m->mimeDispositionType)
 			strstrip(m->mimeDispositionType);
2ece6169
 	} else
 		m->mimeDispositionType = NULL;
e3aaff8e
 }
 
 const char *
 messageGetDispositionType(const message *m)
 {
1602f612
 	return (m->mimeDispositionType) ? m->mimeDispositionType : "";
e3aaff8e
 }
 
 /*
  * TODO:
  *	Arguments are held on a per message basis, they should be held on
  * a per section basis. Otherwise what happens if two sections have two
  * different values for charset? Probably doesn't matter for the use this
  * code will be given, but will need fixing if this code is used elsewhere
  */
 void
 messageAddArgument(message *m, const char *arg)
 {
 	int offset;
faa0d267
 	char *p;
e3aaff8e
 
 	assert(m != NULL);
 
 	if(arg == NULL)
 		return;	/* Note: this is not an error condition */
 
 	while(isspace(*arg))
 		arg++;
 
 	if(*arg == '\0')
 		/* Empty argument? Probably a broken mail client... */
 		return;
 
9f5f1b1a
 	cli_dbgmsg("messageAddArgument, arg='%s'\n", arg);
 
28010d29
 	if(!usefulArg(arg))
963c6ae7
 		return;
 
049a18b9
 	for(offset = 0; offset < m->numberOfArguments; offset++)
e3aaff8e
 		if(m->mimeArguments[offset] == NULL)
 			break;
 		else if(strcasecmp(arg, m->mimeArguments[offset]) == 0)
 			return;	/* already in there */
 
049a18b9
 	if(offset == m->numberOfArguments) {
faa0d267
 		char **q;
843e1da6
 
049a18b9
 		m->numberOfArguments++;
faa0d267
 		q = (char **)cli_realloc(m->mimeArguments, m->numberOfArguments * sizeof(char *));
 		if(q == NULL) {
843e1da6
 			m->numberOfArguments--;
 			return;
 		}
faa0d267
 		m->mimeArguments = q;
049a18b9
 	}
e3aaff8e
 
faa0d267
 	p = m->mimeArguments[offset] = rfc2231(arg);
5362c7ae
 	if(!p) {
 		/* problem inside rfc2231() */
 		cli_dbgmsg("messageAddArgument, error from rfc2231()\n");
 		return;
 	}
faa0d267
 
 	if(strchr(p, '=') == NULL) {
 		if(strncmp(p, "filename", 8) == 0) {
 			/*
 			 * FIXME: Bounce message handling is corrupting the in
 			 * core copies of headers
 			 */
586a5180
                         if (strlen(p) > 8) {
                             cli_dbgmsg("Possible data corruption fixed\n");
                             p[8] = '=';
                         } else {
                             cli_dbgmsg("Possible data corruption not fixed\n");
                         }
faa0d267
 		} else {
5362c7ae
 			if(*p)
6351aa86
 				cli_dbgmsg("messageAddArgument, '%s' contains no '='\n", p);
faa0d267
 			free(m->mimeArguments[offset]);
 			m->mimeArguments[offset] = NULL;
 			return;
 		}
 	}
8ba634a9
 
 	/*
 	 * This is terribly broken from an RFC point of view but is useful
 	 * for catching viruses which have a filename but no type of
 	 * mime. By pretending defaulting to an application rather than
 	 * to nomime we can ensure they're saved and scanned
 	 */
5362c7ae
 	if((strncasecmp(p, "filename=", 9) == 0) || (strncasecmp(p, "name=", 5) == 0))
8ba634a9
 		if(messageGetMimeType(m) == NOMIME) {
 			cli_dbgmsg("Force mime encoding to application\n");
 			messageSetMimeType(m, "application");
 		}
e3aaff8e
 }
 
 /*
  * Add in all the arguments.
  * Cope with:
  *	name="foo bar.doc"
  *	charset=foo name=bar
  */
 void
 messageAddArguments(message *m, const char *s)
 {
 	const char *string = s;
 
 	cli_dbgmsg("Add arguments '%s'\n", string);
 
 	assert(string != NULL);
 
 	while(*string) {
 		const char *key, *cptr;
 		char *data, *field;
1f271616
         size_t datasz=0;
e3aaff8e
 
af9cdbe1
 		if(isspace(*string & 0xff) || (*string == ';')) {
e3aaff8e
 			string++;
 			continue;
 		}
 
 		key = string;
28010d29
 
e3aaff8e
 		data = strchr(string, '=');
 
 		/*
4685e392
 		 * Some spam breaks RFC2045 by using ':' instead of '='
e3aaff8e
 		 * e.g.:
 		 *	Content-Type: text/html; charset:ISO-8859-1
 		 * should be:
 		 *	Content-type: text/html; charset=ISO-8859-1
 		 *
 		 * We give up with lines that are completely broken because
 		 * we don't have ESP and don't know what was meant to be there.
 		 * It's unlikely to really be a problem.
 		 */
 		if(data == NULL)
 			data = strchr(string, ':');
 
 		if(data == NULL) {
 			/*
 			 * Completely broken, give up
 			 */
28010d29
 			cli_dbgmsg("Can't parse header \"%s\"\n", s);
e3aaff8e
 			return;
 		}
 
f003b79e
 		string = &data[1];
e3aaff8e
 
28c29d59
 		/*
 		 * Handle white space to the right of the equals sign
4685e392
 		 * This breaks RFC2045 which has:
28010d29
 		 *	parameter := attribute "=" value
 		 *	attribute := token   ; case-insensitive
 		 *	token  :=  1*<any (ASCII) CHAR except SPACE, CTLs,
 		 *		or tspecials>
 		 * But too many MUAs ignore this
28c29d59
 		 */
 		while(isspace(*string) && (*string != '\0'))
 			string++;
 
9ead6904
 		cptr = string;
e3aaff8e
 
9ead6904
 		if (*string)
 			string++;
121ec511
 
e3aaff8e
 		if(*cptr == '"') {
95e11e5a
 			char *ptr, *kcopy;
e3aaff8e
 
 			/*
 			 * The field is in quotes, so look for the
 			 * closing quotes
 			 */
95e11e5a
 			kcopy = cli_strdup(key);
767f16ab
 
95e11e5a
 			if(kcopy == NULL)
767f16ab
 				return;
 
95e11e5a
 			ptr = strchr(kcopy, '=');
b2c04b6c
 			if(ptr == NULL) {
95e11e5a
 				ptr = strchr(kcopy, ':');
b2c04b6c
                 if (ptr == NULL) {
                     cli_dbgmsg("Can't parse header \"%s\"\n", s);
3a72170f
                     free(kcopy);
b2c04b6c
                     return;
                 }
             }
 
e3aaff8e
 			*ptr = '\0';
 
f003b79e
 			string = strchr(++cptr, '"');
28010d29
 
121ec511
 			if(string == NULL) {
 				cli_dbgmsg("Unbalanced quote character in \"%s\"\n", s);
 				string = "";
 			} else
 				string++;
e3aaff8e
 
95e11e5a
 			if(!usefulArg(kcopy)) {
 				free(kcopy);
28010d29
 				continue;
 			}
 
0cf4cea7
 			data = cli_strdup(cptr);
e3aaff8e
 
9ead6904
 			if (!data) {
 				cli_dbgmsg("Can't parse header \"%s\" - if you believe this file contains a missed virus, report it to bugs@clamav.net\n", s);
 				free((char *)key);
 				return;
 				}
 
 			ptr = strchr(data, '"');
 
e3aaff8e
 			if(ptr == NULL) {
 				/*
 				 * Weird e-mail header such as:
 				 * Content-Type: application/octet-stream; name="
 				 * "
 				 * Content-Transfer-Encoding: base64
 				 * Content-Disposition: attachment; filename="
 				 * "
 				 *
9ead6904
 				 * Use the end of line as data.
e3aaff8e
 				 */
9ead6904
 				}
 			else
 				*ptr = '\0';
e3aaff8e
 
1f271616
             datasz = strlen(kcopy) + strlen(data) + 2;
95e11e5a
 			field = cli_realloc(kcopy, strlen(kcopy) + strlen(data) + 2);
f12d2498
 			if(field) {
1f271616
                 cli_strlcat(field, "=", datasz);
                 cli_strlcat(field, data, datasz);
 			} else {
95e11e5a
 				free(kcopy);
1f271616
             }
f12d2498
 			free(data);
e3aaff8e
 		} else {
 			size_t len;
5a642650
 
 			if(*cptr == '\0') {
2d773a31
 				cli_dbgmsg("Ignoring empty field in \"%s\"\n", s);
5a642650
 				return;
 			}
 
e3aaff8e
 			/*
 			 * The field is not in quotes, so look for the closing
 			 * white space
 			 */
 			while((*string != '\0') && !isspace(*string))
 				string++;
 
 			len = (size_t)string - (size_t)key + 1;
 			field = cli_malloc(len);
 
843e1da6
 			if(field) {
 				memcpy(field, key, len - 1);
 				field[len - 1] = '\0';
 			}
 		}
 		if(field) {
 			messageAddArgument(m, field);
 			free(field);
e3aaff8e
 		}
 	}
 }
 
 static const char *
 messageGetArgument(const message *m, int arg)
 {
 	assert(m != NULL);
 	assert(arg >= 0);
049a18b9
 	assert(arg < m->numberOfArguments);
e3aaff8e
 
1602f612
 	return (m->mimeArguments[arg]) ? m->mimeArguments[arg] : "";
e3aaff8e
 }
 
 /*
  * Find a MIME variable from the header and return a COPY to the value of that
  * variable. The caller must free the copy
  */
95e11e5a
 char *
e3aaff8e
 messageFindArgument(const message *m, const char *variable)
 {
 	int i;
9425e7ce
 	size_t len;
e3aaff8e
 
 	assert(m != NULL);
 	assert(variable != NULL);
 
9425e7ce
 	len = strlen(variable);
 
049a18b9
 	for(i = 0; i < m->numberOfArguments; i++) {
e3aaff8e
 		const char *ptr;
 
 		ptr = messageGetArgument(m, i);
 		if((ptr == NULL) || (*ptr == '\0'))
9425e7ce
 			continue;
e3aaff8e
 #ifdef	CL_DEBUG
95e11e5a
 		cli_dbgmsg("messageFindArgument: compare %lu bytes of %s with %s\n",
 			(unsigned long)len, variable, ptr);
e3aaff8e
 #endif
 		if(strncasecmp(ptr, variable, len) == 0) {
 			ptr = &ptr[len];
 			while(isspace(*ptr))
 				ptr++;
28c29d59
 			if(*ptr != '=') {
6351aa86
 				cli_dbgmsg("messageFindArgument: no '=' sign found in MIME header '%s' (%s)\n", variable, messageGetArgument(m, i));
28c29d59
 				return NULL;
 			}
f84289de
                         ptr++;
                         if((strlen(ptr) > 1) && (*ptr == '"') && (strchr(&ptr[1], '"') != NULL)) {
5a642650
 				/* Remove any quote characters */
0cf4cea7
 				char *ret = cli_strdup(++ptr);
5a642650
 				char *p;
 
843e1da6
 				if(ret == NULL)
 					return NULL;
 
5a642650
 				/*
 				 * fix un-quoting of boundary strings from
 				 * header, occurs if boundary was given as
 				 *	'boundary="_Test_";'
 				 *
 				 * At least two quotes in string, assume
 				 * quoted argument
 				 * end string at next quote
 				 */
f003b79e
 				if((p = strchr(ret, '"')) != NULL) {
 					ret[strlen(ret) - 1] = '\0';
5a642650
 					*p = '\0';
f003b79e
 				}
843e1da6
 				return ret;
e3aaff8e
 			}
0cf4cea7
 			return cli_strdup(ptr);
e3aaff8e
 		}
 	}
843e1da6
 	return NULL;
e3aaff8e
 }
 
ba74b333
 char *
 messageGetFilename(const message *m)
 {
 	char *filename = (char *)messageFindArgument(m, "filename");
 
 	if(filename)
 		return filename;
 
94f051b0
 	return (char *)messageFindArgument(m, "name");
ba74b333
 }
 
be32043e
 /* Returns true or false */
 static int
 messageHasArgument(const message *m, const char *variable)
 {
 	int i;
 	size_t len;
 
 	assert(m != NULL);
 	assert(variable != NULL);
 
 	len = strlen(variable);
 
 	for(i = 0; i < m->numberOfArguments; i++) {
 		const char *ptr;
 
 		ptr = messageGetArgument(m, i);
 		if((ptr == NULL) || (*ptr == '\0'))
 			continue;
 #ifdef	CL_DEBUG
faa0d267
 		cli_dbgmsg("messageHasArgument: compare %lu bytes of %s with %s\n",
be32043e
 			(unsigned long)len, variable, ptr);
 #endif
 		if(strncasecmp(ptr, variable, len) == 0) {
 			ptr = &ptr[len];
 			while(isspace(*ptr))
 				ptr++;
 			if(*ptr != '=') {
6351aa86
 				cli_dbgmsg("messageHasArgument: no '=' sign found in MIME header '%s' (%s)\n", variable, messageGetArgument(m, i));
be32043e
 				return 0;
 			}
 			return 1;
 		}
 	}
 	return 0;
 }
 
 int
 messageHasFilename(const message *m)
 {
 	return messageHasArgument(m, "filename") || messageHasArgument(m, "file");
 }
 
e3aaff8e
 void
 messageSetEncoding(message *m, const char *enctype)
 {
 	const struct encoding_map *e;
0cf4cea7
 	int i;
c7b69776
 	char *type;
0cf4cea7
 
e3aaff8e
 	assert(m != NULL);
 	assert(enctype != NULL);
 
c7b69776
 	/*m->encodingType = EEXTENSION;*/
e3aaff8e
 
2d773a31
 	while(isblank(*enctype))
098d38f1
 		enctype++;
 
5ee8f96d
 	cli_dbgmsg("messageSetEncoding: '%s'\n", enctype);
 
0356cdc0
 	if(strcasecmp(enctype, "8 bit") == 0) {
 		cli_dbgmsg("Broken content-transfer-encoding: '8 bit' changed to '8bit'\n");
83e42783
 		enctype = "8bit";
0356cdc0
 	}
83e42783
 
 	/*
c7b69776
 	 * Iterate through
 	 *	Content-Transfer-Encoding: base64 binary
 	 * cli_strtok's fieldno counts from 0
 	 */
 	i = 0;
 	while((type = cli_strtok(enctype, i++, " \t")) != NULL) {
1602f612
 		int highestSimil = 0;
 		const char *closest = NULL;
 
f003b79e
 		for(e = encoding_map; e->string; e++) {
 			int sim;
 			const char lowertype = tolower(type[0]);
4ab382c3
 
f003b79e
 			if((lowertype != tolower(e->string[0])) && (lowertype != 'x'))
 				/*
 				 * simil is expensive, I'm yet to encounter only
 				 * one example of a missent encoding when the
 				 * first character was wrong, so lets assume no
 				 * match to save the call.
 				 *
 				 * That example was quoted-printable sent as
 				 * X-quoted-printable.
 				 */
 				continue;
 
182bbcc8
 			if(strcmp(e->string, "uuencode") == 0)
 				/*
 				 * No need to test here - fast track visa will have
 				 * handled uuencoded files
 				 */
 				continue;
 
f003b79e
 			sim = simil(type, e->string);
 
 			if(sim == 100) {
c7b69776
 				int j;
 				encoding_type *et;
 
496e1116
 				for(j = 0; j < m->numberOfEncTypes; j++)
f003b79e
 					if(m->encodingTypes[j] == e->type)
c7b69776
 						break;
f003b79e
 
 				if(j < m->numberOfEncTypes) {
 					cli_dbgmsg("Ignoring duplicate encoding mechanism '%s'\n",
 						type);
 					break;
 				}
1602f612
 
c7b69776
 				et = (encoding_type *)cli_realloc(m->encodingTypes, (m->numberOfEncTypes + 1) * sizeof(encoding_type));
1602f612
 				if(et == NULL)
 					break;
c7b69776
 
 				m->encodingTypes = et;
 				m->encodingTypes[m->numberOfEncTypes++] = e->type;
 
 				cli_dbgmsg("Encoding type %d is \"%s\"\n", m->numberOfEncTypes, type);
 				break;
f003b79e
 			} else if(sim > highestSimil) {
 				closest = e->string;
 				highestSimil = sim;
c7b69776
 			}
f003b79e
 		}
c7b69776
 
 		if(e->string == NULL) {
 			/*
68bad3a8
 			 * The stated encoding type is illegal, so we
 			 * use a best guess of what it should be.
 			 *
7cd9337a
 			 * 50% is arbitrary. For example 7bi will match as
1602f612
 			 * 66% certain to be 7bit
c7b69776
 			 */
721aaeb4
 			if(highestSimil >= 50) {
2b94f5a6
 				cli_dbgmsg("Unknown encoding type \"%s\" - guessing as %s (%u%% certainty)\n",
1602f612
 					type, closest, highestSimil);
 				messageSetEncoding(m, closest);
 			} else {
6fcf5624
 				cli_dbgmsg("Unknown encoding type \"%s\" - if you believe this file contains a virus, submit it to www.clamav.net\n", type);
1602f612
 				/*
 				 * Err on the side of safety, enable all
 				 * decoding modules
 				 */
 				messageSetEncoding(m, "base64");
 				messageSetEncoding(m, "quoted-printable");
 			}
e3aaff8e
 		}
 
c7b69776
 		free(type);
 	}
e3aaff8e
 }
 
 encoding_type
 messageGetEncoding(const message *m)
 {
 	assert(m != NULL);
c7b69776
 
 	if(m->numberOfEncTypes == 0)
 		return NOENCODING;
 	return m->encodingTypes[0];
e3aaff8e
 }
 
b2223aad
 int
 messageAddLine(message *m, line_t *line)
 {
 	assert(m != NULL);
 
 	if(m->body_first == NULL)
 		m->body_last = m->body_first = (text *)cli_malloc(sizeof(text));
 	else {
 		m->body_last->t_next = (text *)cli_malloc(sizeof(text));
 		m->body_last = m->body_last->t_next;
 	}
 
241e7eb1
 	if(m->body_last == NULL) {
         cli_errmsg("messageAddLine: out of memory for m->body_last\n");
b2223aad
 		return -1;
241e7eb1
     }
b2223aad
 
 	m->body_last->t_next = NULL;
 
 	if(line && lineGetData(line)) {
 		m->body_last->t_line = lineLink(line);
 
 		messageIsEncoding(m);
 	} else
 		m->body_last->t_line = NULL;
 
 	return 1;
 }
 
e3aaff8e
 /*
3e69b5be
  * Add the given line to the end of the given message
d879a7b0
  * If needed a copy of the given line is taken which the caller must free
3e69b5be
  * Line must not be terminated by a \n
e3aaff8e
  */
12f5aef2
 int
564b3e07
 messageAddStr(message *m, const char *data)
e3aaff8e
 {
381b67a7
 	line_t *repeat = NULL;
 
e3aaff8e
 	assert(m != NULL);
 
381b67a7
 	if(data) {
564b3e07
 		if(*data == '\0')
 			data = NULL;
 		else {
 			/*
 			 * If it's only white space, just store one space to
 			 * save memory. You must store something since it may
 			 * be a header line
 			 */
 			int iswhite = 1;
 			const char *p;
381b67a7
 
564b3e07
 			for(p = data; *p; p++)
86a4070b
 				if(((*p) & 0x80) || !isspace(*p)) {
564b3e07
 					iswhite = 0;
 					break;
 				}
 			if(iswhite) {
 				/*cli_dbgmsg("messageAddStr: empty line: '%s'\n", data);*/
 				data = " ";
9ed148a8
 			}
381b67a7
 		}
 	}
 
e3aaff8e
 	if(m->body_first == NULL)
 		m->body_last = m->body_first = (text *)cli_malloc(sizeof(text));
 	else {
86355dc2
 		assert(m->body_last != NULL);
5be8beb8
 		if((data == NULL) && (m->body_last->t_line == NULL))
fca571cb
 			/*
 			 * Although this would save time and RAM, some
 			 * phish signatures have been built which need the
 			 * blank lines
 			 */
 			if(messageGetMimeType(m) != TEXT)
7cd9337a
 				/* don't save two blank lines in succession */
fca571cb
 				return 1;
5be8beb8
 
e3aaff8e
 		m->body_last->t_next = (text *)cli_malloc(sizeof(text));
86355dc2
 		if(m->body_last->t_next == NULL) {
 			messageDedup(m);
 			m->body_last->t_next = (text *)cli_malloc(sizeof(text));
 			if(m->body_last->t_next == NULL) {
 				cli_errmsg("messageAddStr: out of memory\n");
 				return -1;
 			}
 		}
 
381b67a7
 		if(data && m->body_last->t_line && (strcmp(data, lineGetData(m->body_last->t_line)) == 0))
 			repeat = m->body_last->t_line;
e3aaff8e
 		m->body_last = m->body_last->t_next;
 	}
 
86355dc2
 	if(m->body_last == NULL) {
 		cli_errmsg("messageAddStr: out of memory\n");
12f5aef2
 		return -1;
86355dc2
 	}
ae3bda56
 
e3aaff8e
 	m->body_last->t_next = NULL;
 
b2223aad
 	if(data && *data) {
381b67a7
 		if(repeat)
 			m->body_last->t_line = lineLink(repeat);
79ef4f70
 		else {
86355dc2
 			m->body_last->t_line = lineCreate(data);
 
 			if(m->body_last->t_line == NULL) {
79ef4f70
 				messageDedup(m);
 				m->body_last->t_line = lineCreate(data);
b2223aad
 
79ef4f70
 				if(m->body_last->t_line == NULL) {
 					cli_errmsg("messageAddStr: out of memory\n");
 					return -1;
 				}
 			}
 			/* cli_chomp(m->body_last->t_text); */
621a667a
 			messageIsEncoding(m);
79ef4f70
 		}
02927896
 	} else
b2223aad
 		m->body_last->t_line = NULL;
02927896
 
12f5aef2
 	return 1;
e3aaff8e
 }
 
d879a7b0
 /*
3e69b5be
  * Add the given line to the start of the given message
  * A copy of the given line is taken which the caller must free
  * Line must not be terminated by a \n
  */
 int
b2223aad
 messageAddStrAtTop(message *m, const char *data)
3e69b5be
 {
 	text *oldfirst;
 
 	assert(m != NULL);
 
 	if(m->body_first == NULL)
b2223aad
 		return messageAddLine(m, lineCreate(data));
843e1da6
 
3e69b5be
 	oldfirst = m->body_first;
 	m->body_first = (text *)cli_malloc(sizeof(text));
 	if(m->body_first == NULL) {
 		m->body_first = oldfirst;
 		return -1;
 	}
 
 	m->body_first->t_next = oldfirst;
b2223aad
 	m->body_first->t_line = lineCreate((data) ? data : "");
3e69b5be
 
b2223aad
 	if(m->body_first->t_line == NULL) {
 		cli_errmsg("messageAddStrAtTop: out of memory\n");
3e69b5be
 		return -1;
 	}
 	return 1;
 }
 
 /*
94f051b0
  * Put the contents of the given text at the end of the current object.
  * Can be used either to move a text object into a message, or to move a
  * message's text into another message only moving from a given offset.
  * The given text emptied; it can be used again if needed, though be warned that
  * it will have an empty line at the start.
  * Returns 0 for failure, 1 for success
  */
 int
 messageMoveText(message *m, text *t, message *old_message)
 {
 	int rc;
 
 	if(m->body_first == NULL) {
 		if(old_message) {
 			text *u;
 			/*
 			 * t is within old_message which is about to be
 			 * destroyed
 			 */
 			assert(old_message->body_first != NULL);
 
 			m->body_first = t;
 			for(u = old_message->body_first; u != t;) {
 				text *next;
 
7cf2a701
 				if(u->t_line) {
94f051b0
 					lineUnlink(u->t_line);
7cf2a701
 					u->t_line = NULL;
 				}
94f051b0
 				next = u->t_next;
 
 				free(u);
 				u = next;
 
 				if(u == NULL) {
f2d79ab3
 					cli_dbgmsg("messageMoveText sanity check: t not within old_message\n");
94f051b0
 					return -1;
 				}
 			}
fb148729
 			assert(old_message->body_last->t_next == NULL);
94f051b0
 
 			m->body_last = old_message->body_last;
 			old_message->body_first = old_message->body_last = NULL;
 
 			/* Do any pointers need to be reset? */
 			if((old_message->bounce == NULL) &&
 			   (old_message->encoding == NULL) &&
 			   (old_message->binhex == NULL) &&
 			   (old_message->yenc == NULL))
 				return 0;
 
 			m->body_last = m->body_first;
 			rc = 0;
 		} else {
 			m->body_last = m->body_first = textMove(NULL, t);
 			if(m->body_first == NULL)
86e7c149
 				return -1;
94f051b0
 			else
 				rc = 0;
 		}
 	} else {
 		m->body_last = textMove(m->body_last, t);
 		if(m->body_last == NULL) {
 			rc = -1;
 			m->body_last = m->body_first;
 		} else
 			rc = 0;
 	}
 
 	while(m->body_last->t_next) {
 		m->body_last = m->body_last->t_next;
 		if(m->body_last->t_line)
 			messageIsEncoding(m);
 	}
 
 	return rc;
 }
 
 /*
b2223aad
  * See if the last line marks the start of a non MIME inclusion that
  * will need to be scanned
  */
 static void
 messageIsEncoding(message *m)
 {
 	static const char encoding[] = "Content-Transfer-Encoding";
 	static const char binhex[] = "(This file must be converted with BinHex 4.0)";
 	const char *line = lineGetData(m->body_last->t_line);
 
faa0d267
 	/*if(m->ctx == NULL)
 		cli_dbgmsg("messageIsEncoding, ctx == NULL\n");*/
 
b2223aad
 	if((m->encoding == NULL) &&
 	   (strncasecmp(line, encoding, sizeof(encoding) - 1) == 0) &&
 	   (strstr(line, "7bit") == NULL))
 		m->encoding = m->body_last;
faa0d267
 	else if((m->bounce == NULL) && m->ctx &&
fc36c0f9
 		(strncasecmp(line, "Received: ", 10) == 0) &&
7021b545
 		(cli_filetype((const unsigned char *)line, strlen(line), m->ctx->engine) == CL_TYPE_MAIL))
b2223aad
 			m->bounce = m->body_last;
182bbcc8
 		/* Not needed with fast track visa technology */
 	/*else if((m->uuencode == NULL) && isuuencodebegin(line))
 		m->uuencode = m->body_last;*/
621a667a
 	else if((m->binhex == NULL) &&
47193544
 		strstr(line, "BinHex") &&
1e8911e5
 		(simil(line, binhex) > 90))
47193544
 			/*
 			 * Look for close matches for BinHex, but
 			 * simil() is expensive so only do it if it's
 			 * likely to be found
 			 */
621a667a
 			m->binhex = m->body_last;
16394c6d
 	else if((m->yenc == NULL) && (strncmp(line, "=ybegin line=", 13) == 0))
 		m->yenc = m->body_last;
b2223aad
 }
 
 /*
d879a7b0
  * Returns a pointer to the body of the message. Note that it does NOT return
  * a copy of the data
  */
2673dc74
 text *
 messageGetBody(message *m)
e3aaff8e
 {
 	assert(m != NULL);
d879a7b0
 	return m->body_first;
e3aaff8e
 }
 
 /*
78e302e1
  * Export a message using the given export routines
e7aa5e3d
  *
  * TODO: It really should export into an array, one
  * for each encoding algorithm. However, what it does is it returns the
  * last item that was exported. That's sufficient for now.
e3aaff8e
  */
09e05292
 static void *
2673dc74
 messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy)(void *), void (*setFilename)(void *, const char *, const char *), void (*addData)(void *, const unsigned char *, size_t), void *(*exportText)(text *, void *, int), void(*setCTX)(void *, cli_ctx *), int destroy_text)
e3aaff8e
 {
78e302e1
 	void *ret;
2673dc74
 	text *t_line;
406b1800
 	char *filename;
c7b69776
 	int i;
e3aaff8e
 
 	assert(m != NULL);
 
c7b69776
 	if(messageGetBody(m) == NULL)
 		return NULL;
 
78e302e1
 	ret = (*create)();
e3aaff8e
 
78e302e1
 	if(ret == NULL)
c81143fc
 		return NULL;
e3aaff8e
 
4b187745
 	cli_dbgmsg("messageExport: numberOfEncTypes == %d\n", m->numberOfEncTypes);
 
c7b69776
 	if(m->numberOfEncTypes == 0) {
 		/*
 		 * Fast copy
 		 */
c691512c
 		cli_dbgmsg("messageExport: Entering fast copy mode\n");
 
ba74b333
 #if	0
 		filename = messageGetFilename(m);
 
 		if(filename == NULL) {
 			cli_dbgmsg("Unencoded attachment sent with no filename\n");
 			messageAddArgument(m, "name=attachment");
 		} else if((strcmp(filename, "textportion") != 0) && (strcmp(filename, "mixedtextportion") != 0))
 			/*
 			 * Some virus attachments don't say how they've
 			 * been encoded. We assume base64
 			 */
 			messageSetEncoding(m, "base64");
 #else
2a4b5c6e
 		filename = (char *)messageFindArgument(m, "filename");
e3aaff8e
 		if(filename == NULL) {
2a4b5c6e
 			filename = (char *)messageFindArgument(m, "name");
e3aaff8e
 
 			if(filename == NULL) {
af780d0c
 				cli_dbgmsg("Unencoded attachment sent with no filename\n");
ec83932f
 				messageAddArgument(m, "name=attachment");
c7b69776
 			} else
843e1da6
 				/*
 				 * Some virus attachments don't say how they've
75cc6fb0
 				 * been encoded. We assume base64.
 				 * RFC says encoding should be 7-bit.
843e1da6
 				 */
75cc6fb0
 				messageSetEncoding(m, "7-bit");
e3aaff8e
 		}
ba74b333
 #endif
e3aaff8e
 
bea62be3
 		(*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");
e3aaff8e
 
7c5a7a47
 		if(filename)
 			free((char *)filename);
e3aaff8e
 
182bbcc8
 		if(m->numberOfEncTypes == 0)
2673dc74
 			return exportText(messageGetBody(m), ret, destroy_text);
e3aaff8e
 	}
 
3e55fc76
 	if(setCTX && m->ctx)
 		(*setCTX)(ret, m->ctx);
 
c7b69776
 	for(i = 0; i < m->numberOfEncTypes; i++) {
 		encoding_type enctype = m->encodingTypes[i];
32c9b306
 		size_t size;
c7b69776
 
e7aa5e3d
 		if(i > 0) {
 			void *newret;
 
 			newret = (*create)();
 			if(newret == NULL) {
f2d79ab3
 				cli_dbgmsg("Not all decoding algorithms were run\n");
e7aa5e3d
 				return ret;
 			}
 			(*destroy)(ret);
 			ret = newret;
 		}
8affc406
 		cli_dbgmsg("messageExport: enctype %d is %d\n", i, (int)enctype);
e3aaff8e
 		/*
c7b69776
 		 * Find the filename to decode
e3aaff8e
 		 */
73ddf91f
 		if(((enctype == YENCODE) || (i == 0)) && yEncBegin(m)) {
95e11e5a
 			const char *f;
 
16394c6d
 			/*
 			 * TODO: handle multipart yEnc encoded files
 			 */
 			t_line = yEncBegin(m);
95e11e5a
 			f = lineGetData(t_line->t_line);
16394c6d
 
95e11e5a
 			if((filename = strstr(f, " name=")) != NULL) {
0cf4cea7
 				filename = cli_strdup(&filename[6]);
16394c6d
 				if(filename) {
 					cli_chomp(filename);
 					strstrip(filename);
 					cli_dbgmsg("Set yEnc filename to \"%s\"\n", filename);
 				}
7c5a7a47
 			}
16394c6d
 
bea62be3
 			(*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");
7c5a7a47
 			if(filename) {
 				free((char *)filename);
 				filename = NULL;
 			}
16394c6d
 			t_line = t_line->t_next;
 			enctype = YENCODE;
bae9c53f
 			m->yenc = NULL;
c7b69776
 		} else {
182bbcc8
 			if(enctype == UUENCODE) {
 				/*
73ddf91f
 				 * The body will have been stripped out by the
 				 * fast track visa system. Treat as plain/text,
 				 * which means we'll still scan for funnies
 				 * outside of the uuencoded portion.
182bbcc8
 				 */
af3c6acb
 				cli_dbgmsg("messageExport: treat uuencode as text/plain\n");
182bbcc8
 				enctype = m->encodingTypes[i] = NOENCODING;
 			}
ba74b333
 			filename = messageGetFilename(m);
 
c7b69776
 			if(filename == NULL) {
ba74b333
 				cli_dbgmsg("Attachment sent with no filename\n");
 				messageAddArgument(m, "name=attachment");
 			} else if(enctype == NOENCODING)
 				/*
 				 * Some virus attachments don't say how
 				 * they've been encoded. We assume
 				 * base64.
 				 *
 				 * FIXME: don't do this if it's a fall
 				 * through from uuencode
 				 */
 				messageSetEncoding(m, "base64");
c7b69776
 
bea62be3
 			(*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");
c7b69776
 
 			t_line = messageGetBody(m);
 		}
2ad0c86e
 
16394c6d
 		if(filename)
 			free((char *)filename);
c7b69776
 
 		/*
af780d0c
 		 * t_line should now point to the first (encoded) line of the
 		 * message
c7b69776
 		 */
 		if(t_line == NULL) {
6351aa86
 			cli_dbgmsg("Empty attachment not saved\n");
c7b69776
 			(*destroy)(ret);
 			return NULL;
 		}
 
 		if(enctype == NOENCODING) {
de101a82
 			/*
c7b69776
 			 * Fast copy
de101a82
 			 */
2673dc74
 			if(i == m->numberOfEncTypes - 1) {
 				/* last one */
 				(void)exportText(t_line, ret, destroy_text);
 				break;
 			}
 			(void)exportText(t_line, ret, 0);
c7b69776
 			continue;
de101a82
 		}
e3aaff8e
 
32c9b306
 		size = 0;
c7b69776
 		do {
e7aa5e3d
 			unsigned char smallbuf[1024];
 			unsigned char *uptr, *data;
c7b69776
 			const char *line = lineGetData(t_line->t_line);
e7aa5e3d
 			unsigned char *bigbuf;
 			size_t datasize;
e3aaff8e
 
182bbcc8
 			if(enctype == YENCODE) {
16394c6d
 				if(line == NULL)
 					continue;
7c5a7a47
 				if(strncmp(line, "=yend ", 6) == 0)
16394c6d
 					break;
c7b69776
 			}
 
e7aa5e3d
 			/*
 			 * Add two bytes for '\n' and '\0'
 			 */
 			datasize = (line) ? strlen(line) + 2 : 0;
c81143fc
 
4ab382c3
 			if(datasize >= sizeof(smallbuf))
 				data = bigbuf = (unsigned char *)cli_malloc(datasize);
e7aa5e3d
 			else {
 				bigbuf = NULL;
 				data = smallbuf;
 				datasize = sizeof(smallbuf);
 			}
c81143fc
 
4ab382c3
 			uptr = decodeLine(m, enctype, line, data, datasize);
e7aa5e3d
 			if(uptr == NULL) {
 				if(data == bigbuf)
 					free(data);
 				break;
 			}
0e5a0129
 
32c9b306
 			if(uptr != data) {
5be8beb8
 				assert((size_t)(uptr - data) < datasize);
c7b69776
 				(*addData)(ret, data, (size_t)(uptr - data));
32c9b306
 				size += (size_t)(uptr - data);
 			}
c81143fc
 
e7aa5e3d
 			if(data == bigbuf)
 				free(data);
 
c7b69776
 			/*
4685e392
 			 * According to RFC2045, '=' is used to pad out
c7b69776
 			 * the last byte and should be used as evidence
 			 * of the end of the data. Some mail clients
 			 * annoyingly then put plain text after the '='
 			 * byte and viruses exploit this bug. Sigh
 			 */
 			/*if(enctype == BASE64)
 				if(strchr(line, '='))
 					break;*/
3f46285b
 			if(line && destroy_text && (i == m->numberOfEncTypes - 1)) {
 				lineUnlink(t_line->t_line);
 				t_line->t_line = NULL;
 			}
c7b69776
 		} while((t_line = t_line->t_next) != NULL);
32c9b306
 
95e11e5a
 		cli_dbgmsg("Exported %lu bytes using enctype %d\n",
8affc406
 			(unsigned long)size, (int)enctype);
28c29d59
 
cdd91f4f
 		/* Verify we have nothing left to flush out */
 		if(m->base64chars) {
 			unsigned char data[4];
 			unsigned char *ptr;
0c0894b8
 
a9ecf619
 			ptr = base64Flush(m, data);
cdd91f4f
 			if(ptr)
 				(*addData)(ret, data, (size_t)(ptr - data));
 		}
0c0894b8
 	}
 
78e302e1
 	return ret;
 }
 
a9ecf619
 unsigned char *
 base64Flush(message *m, unsigned char *buf)
 {
8affc406
 	cli_dbgmsg("%d trailing bytes to export\n", m->base64chars);
a9ecf619
 
 	if(m->base64chars) {
 		unsigned char *ret = decode(m, NULL, buf, base64, FALSE);
 
 		m->base64chars = 0;
 
 		return ret;
 	}
 	return NULL;
 }
 
4270f93b
 int messageSavePartial(message *m, const char *dir, const char *md5id, unsigned part)
 {
 	char fullname[1024];
 	fileblob *fb;
 	unsigned long time_val;
 
 	cli_dbgmsg("messageSavePartial\n");
 	time_val  = time(NULL);
58481352
 	snprintf(fullname, 1024, "%s"PATHSEP"clamav-partial-%lu_%s-%u", dir, time_val, md5id, part);
4270f93b
 
 	fb = messageExport(m, fullname,
 		(void *(*)(void))fileblobCreate,
 		(void(*)(void *))fileblobDestroy,
 		(void(*)(void *, const char *, const char *))fileblobPartialSet,
 		(void(*)(void *, const unsigned char *, size_t))fileblobAddData,
 		(void *(*)(text *, void *, int))textToFileblob,
 		(void(*)(void *, cli_ctx *))fileblobSetCTX,
 		0);
 	if(!fb)
 		return CL_EFORMAT;
 	fileblobDestroy(fb);
 	return CL_SUCCESS;
 }
 
78e302e1
 /*
  * Decode and transfer the contents of the message into a fileblob
  * The caller must free the returned fileblob
  */
 fileblob *
2673dc74
 messageToFileblob(message *m, const char *dir, int destroy)
78e302e1
 {
2673dc74
 	fileblob *fb;
 
09e05292
 	cli_dbgmsg("messageToFileblob\n");
5c8a1012
 	fb = messageExport(m, dir,
 		(void *(*)(void))fileblobCreate,
 		(void(*)(void *))fileblobDestroy,
 		(void(*)(void *, const char *, const char *))fileblobSetFilename,
 		(void(*)(void *, const unsigned char *, size_t))fileblobAddData,
 		(void *(*)(text *, void *, int))textToFileblob,
 		(void(*)(void *, cli_ctx *))fileblobSetCTX,
 		destroy);
2673dc74
 	if(destroy && m->body_first) {
 		textDestroy(m->body_first);
 		m->body_first = m->body_last = NULL;
 	}
 	return fb;
78e302e1
 }
 
 /*
8386c723
  * Decode and transfer the contents of the message into a closed blob
78e302e1
  * The caller must free the returned blob
  */
 blob *
2673dc74
 messageToBlob(message *m, int destroy)
78e302e1
 {
c691512c
 	blob *b;
 
 	cli_dbgmsg("messageToBlob\n");
 
 	b = messageExport(m, NULL,
5c8a1012
 		(void *(*)(void))blobCreate,
 		(void(*)(void *))blobDestroy,
 		(void(*)(void *, const char *, const char *))blobSetFilename,
 		(void(*)(void *, const unsigned char *, size_t))blobAddData,
 		(void *(*)(text *, void *, int))textToBlob,
 		(void(*)(void *, cli_ctx *))NULL,
 		destroy);
2673dc74
 
 	if(destroy && m->body_first) {
 		textDestroy(m->body_first);
 		m->body_first = m->body_last = NULL;
 	}
 	return b;
e3aaff8e
 }
 
 /*
  * Decode and transfer the contents of the message into a text area
d879a7b0
  * The caller must free the returned text
e3aaff8e
  */
 text *
0c0894b8
 messageToText(message *m)
e3aaff8e
 {
c7b69776
 	int i;
e3aaff8e
 	text *first = NULL, *last = NULL;
 	const text *t_line;
 
 	assert(m != NULL);
 
c7b69776
 	if(m->numberOfEncTypes == 0) {
e3aaff8e
 		/*
 		 * Fast copy
 		 */
 		for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
 			if(first == NULL)
 				first = last = cli_malloc(sizeof(text));
 			else {
 				last->t_next = cli_malloc(sizeof(text));
 				last = last->t_next;
 			}
 
b2223aad
 			if(last == NULL) {
0c0894b8
 				if(first)
 					textDestroy(first);
963c6ae7
 				return NULL;
 			}
420df63c
 			if(t_line->t_line)
 				last->t_line = lineLink(t_line->t_line);
 			else
 				last->t_line = NULL;	/* empty line */
e3aaff8e
 		}
c7b69776
 		if(last)
 			last->t_next = NULL;
 
 		return first;
 	}
 	/*
 	 * Scan over the data a number of times once for each claimed encoding
 	 * type
 	 */
 	for(i = 0; i < m->numberOfEncTypes; i++) {
 		const encoding_type enctype = m->encodingTypes[i];
 
 		cli_dbgmsg("messageToText: export transfer method %d = %d\n",
8affc406
 			i, (int)enctype);
7ae8fbfb
 
 		switch(enctype) {
 			case NOENCODING:
 			case BINARY:
 			case EIGHTBIT:
 				/*
 				 * Fast copy
 				 */
 				for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
 					if(first == NULL)
 						first = last = cli_malloc(sizeof(text));
92012beb
 					else if (last) {
7ae8fbfb
 						last->t_next = cli_malloc(sizeof(text));
 						last = last->t_next;
 					}
 
 					if(last == NULL) {
 						if(first) {
 							textDestroy(first);
 						}
 						return NULL;
 					}
 					if(t_line->t_line)
 						last->t_line = lineLink(t_line->t_line);
 					else
 						last->t_line = NULL;	/* empty line */
c7b69776
 				}
7ae8fbfb
 				continue;
 			case UUENCODE:
fc809c9e
 				cli_warnmsg("messageToText: Unexpected attempt to handle uuencoded file\n");
182bbcc8
 				if(first) {
92012beb
 					if(last)
 						last->t_next = NULL;
182bbcc8
 					textDestroy(first);
c7b69776
 				}
182bbcc8
 				return NULL;
7ae8fbfb
 			case YENCODE:
 				t_line = yEncBegin(m);
16394c6d
 
7ae8fbfb
 				if(t_line == NULL) {
 					/*cli_warnmsg("YENCODED attachment is missing begin statement\n");*/
 					if(first) {
92012beb
 						if(last)
 							last->t_next = NULL;
7ae8fbfb
 						textDestroy(first);
 					}
 					return NULL;
11926f3b
 				}
7ae8fbfb
 				t_line = t_line->t_next;
 			default:
 				if((i == 0) && binhexBegin(m))
 					cli_warnmsg("Binhex messages not supported yet.\n");
 				t_line = messageGetBody(m);
130bc08c
 		}
e3aaff8e
 
0ae75a8d
 		for(; t_line; t_line = t_line->t_next) {
 			unsigned char data[1024];
 			unsigned char *uptr;
b2223aad
 			const char *line = lineGetData(t_line->t_line);
0ae75a8d
 
182bbcc8
 			if(enctype == BASE64)
0c0894b8
 				/*
 				 * ignore blanks - breaks RFC which is
 				 * probably the point!
 				 */
 				if(line == NULL)
 					continue;
28c29d59
 
e7aa5e3d
 			assert((line == NULL) || (strlen(line) <= sizeof(data)));
 
c7b69776
 			uptr = decodeLine(m, enctype, line, data, sizeof(data));
e3aaff8e
 
0ae75a8d
 			if(uptr == NULL)
 				break;
e3aaff8e
 
0ae75a8d
 			assert(uptr <= &data[sizeof(data)]);
 
 			if(first == NULL)
 				first = last = cli_malloc(sizeof(text));
92012beb
 			else if (last) {
0ae75a8d
 				last->t_next = cli_malloc(sizeof(text));
 				last = last->t_next;
 			}
e3aaff8e
 
02927896
 			if(last == NULL)
843e1da6
 				break;
28c29d59
 
ab4038b4
 			/*
 			 * If the decoded line is the same as the encoded
 			 * there's no need to take a copy, just link it.
 			 * Note that the comparison is done without the
 			 * trailing newline that the decoding routine may have
 			 * added - that's why there's a strncmp rather than a
 			 * strcmp - that'd be bad for MIME decoders, but is OK
 			 * for AV software
 			 */
7c5a7a47
 			if((data[0] == '\n') || (data[0] == '\0'))
 				last->t_line = NULL;
8f1a49cd
 			else if(line && (strncmp((const char *)data, line, strlen(line)) == 0)) {
bae9c53f
 #ifdef	CL_DEBUG
ab4038b4
 				cli_dbgmsg("messageToText: decoded line is the same(%s)\n", data);
bae9c53f
 #endif
ab4038b4
 				last->t_line = lineLink(t_line->t_line);
 			} else
7c5a7a47
 				last->t_line = lineCreate((char *)data);
02927896
 
c7b69776
 			if(line && enctype == BASE64)
28c29d59
 				if(strchr(line, '='))
 					break;
0ae75a8d
 		}
3de8ba3d
 		if(m->base64chars) {
 			unsigned char data[4];
 
df3abd1f
 			memset(data, '\0', sizeof(data));
7c5a7a47
 			if(decode(m, NULL, data, base64, FALSE) && data[0]) {
3de8ba3d
 				if(first == NULL)
 					first = last = cli_malloc(sizeof(text));
92012beb
 				else if (last) {
3de8ba3d
 					last->t_next = cli_malloc(sizeof(text));
 					last = last->t_next;
 				}
 
 				if(last != NULL)
7c5a7a47
 					last->t_line = lineCreate((char *)data);
3de8ba3d
 			}
 			m->base64chars = 0;
 		}
e3aaff8e
 	}
 
 	if(last)
 		last->t_next = NULL;
 
 	return first;
 }
 
2673dc74
 text *
 yEncBegin(message *m)
16394c6d
 {
 	return m->yenc;
 }
 
e3aaff8e
 /*
130bc08c
  * Scan to find the BINHEX message (if any)
  */
ae3bda56
 #if	0
2673dc74
 const text *
 binhexBegin(message *m)
130bc08c
 {
 	const text *t_line;
 
 	for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next)
 		if(strcasecmp(t_line->t_text, "(This file must be converted with BinHex 4.0)") == 0)
 			return t_line;
 
 	return NULL;
 }
ae3bda56
 #else
2673dc74
 text *
 binhexBegin(message *m)
ae3bda56
 {
 	return m->binhex;
 }
 #endif
130bc08c
 
 /*
cca4efe4
  * Scan to find a bounce message. There is no standard for these, not
  * even a convention, so don't expect this to be foolproof
  */
ae3bda56
 #if	0
2673dc74
 text *
 bounceBegin(message *m)
cca4efe4
 {
 	const text *t_line;
1892da50
 
86cf20d6
 	for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next)
3805ebcb
 		if(cli_filetype(t_line->t_text, strlen(t_line->t_text)) == CL_TYPE_MAIL)
86cf20d6
 			return t_line;
cca4efe4
 
 	return NULL;
 }
ae3bda56
 #else
2673dc74
 text *
 bounceBegin(message *m)
ae3bda56
 {
 	return m->bounce;
 }
 #endif
 
 /*
  * If a message doesn't not contain another message which could be harmful
  * it is deemed to be safe.
  *
  * TODO: ensure nothing can get through this
  *
  * TODO: check to see if we need to
  * find anything else, perhaps anything
  * from the RFC821 table?
  */
 #if	0
 int
 messageIsAllText(const message *m)
 {
 	const text *t;
 
 	for(t = messageGetBody(m); t; t = t->t_next)
 		if(strncasecmp(t->t_text,
 			"Content-Transfer-Encoding",
 			strlen("Content-Transfer-Encoding")) == 0)
 				return 0;
 
 	return 1;
 }
 #else
2673dc74
 text *
 encodingLine(message *m)
ae3bda56
 {
61485e09
 	return m->encoding;
ae3bda56
 }
 #endif
cca4efe4
 
 /*
e3aaff8e
  * Decode a line and add it to a buffer, return the end of the buffer
0ae75a8d
  * to help appending callers. There is no new line at the end of "line"
9b9fcfc5
  *
  * len is sizeof(ptr)
e3aaff8e
  */
4945127a
 unsigned char *
c7b69776
 decodeLine(message *m, encoding_type et, const char *line, unsigned char *buf, size_t buflen)
e3aaff8e
 {
a2959b28
 	size_t len, reallen;
5a642650
 	bool softbreak;
4ab382c3
 	char *p2, *copy;
f003b79e
 	char base64buf[RFC2045LENGTH + 1];
e3aaff8e
 
11926f3b
 	/*cli_dbgmsg("decodeLine(et = %d buflen = %u)\n", (int)et, buflen);*/
e7aa5e3d
 
e3aaff8e
 	assert(m != NULL);
9b9fcfc5
 	assert(buf != NULL);
e3aaff8e
 
c7b69776
 	switch(et) {
cc96e455
 		case BINARY:
 			/*
 			 * TODO: find out what this is, encoded as binary??
 			 */
 			/* fall through */
e3aaff8e
 		case NOENCODING:
 		case EIGHTBIT:
049a18b9
 		default:	/* unknown encoding type - try our best */
50ecf683
 			if(line)	/* empty line? */
9b133473
 				buf = (unsigned char *)cli_strrcpy((char *)buf, line);
e3aaff8e
 			/* Put the new line back in */
9b133473
 			return (unsigned char *)cli_strrcpy((char *)buf, "\n");
e3aaff8e
 
 		case QUOTEDPRINTABLE:
02927896
 			if(line == NULL) {	/* empty line */
 				*buf++ = '\n';
 				break;
 			}
0c0894b8
 
4685e392
 			softbreak = FALSE;
4ab382c3
 			while(buflen && *line) {
4685e392
 				if(*line == '=') {
 					unsigned char byte;
 
 					if((*++line == '\0') || (*line == '\n')) {
 						softbreak = TRUE;
 						/* soft line break */
 						break;
 					}
 
 					byte = hex(*line);
 
 					if((*++line == '\0') || (*line == '\n')) {
 						/*
 						 * broken e-mail, not
 						 * adhering to RFC2045
 						 */
 						*buf++ = byte;
 						break;
 					}
 
ce30bbe0
 					/*
 					 * Handle messages that use a broken
 					 * quoted-printable encoding of
 					 * href=\"http://, instead of =3D
 					 */
9fe789f8
 					if(byte != '=')
 						byte = (byte << 4) | hex(*line);
 					else
ce30bbe0
 						line -= 2;
 
4685e392
 					*buf++ = byte;
 				} else
 					*buf++ = *line;
4ab382c3
 				++line;
 				--buflen;
4685e392
 			}
5a642650
 			if(!softbreak)
 				/* Put the new line back in */
9b9fcfc5
 				*buf++ = '\n';
e3aaff8e
 			break;
 
 		case BASE64:
02927896
 			if(line == NULL)
 				break;
28c29d59
 			/*
4685e392
 			 * RFC2045 sets the maximum length to 76 bytes
28c29d59
 			 * but many e-mail clients ignore that
 			 */
f003b79e
 			if(strlen(line) < sizeof(base64buf)) {
 				strcpy(base64buf, line);
 				copy = base64buf;
 			} else {
0cf4cea7
 				copy = cli_strdup(line);
f003b79e
 				if(copy == NULL)
 					break;
 			}
843e1da6
 
28c29d59
 			p2 = strchr(copy, '=');
e3aaff8e
 			if(p2)
 				*p2 = '\0';
0c0894b8
 
32c9b306
 			sanitiseBase64(copy);
 
e3aaff8e
 			/*
 			 * Klez doesn't always put "=" on the last line
 			 */
0c0894b8
 			buf = decode(m, copy, buf, base64, (p2 == NULL) && ((strlen(copy) & 3) == 0));
28c29d59
 
f003b79e
 			if(copy != base64buf)
 				free(copy);
e3aaff8e
 			break;
 
 		case UUENCODE:
ae5c693a
 			assert(m->base64chars == 0);
 
02927896
 			if((line == NULL) || (*line == '\0'))	/* empty line */
f6ded658
 				break;
e3aaff8e
 			if(strcasecmp(line, "end") == 0)
 				break;
5198de85
 			if(isuuencodebegin(line))
 				break;
e3aaff8e
 
 			if((line[0] & 0x3F) == ' ')
 				break;
 
5be8beb8
 			/*
 			 * reallen contains the number of bytes that were
 			 *	encoded
 			 */
a2959b28
 			reallen = (size_t)uudecode(*line++);
5be8beb8
 			if(reallen <= 0)
 				break;
 			if(reallen > 62)
a2959b28
 				break;
 			len = strlen(line);
e3aaff8e
 
5be8beb8
 			if((len > buflen) || (reallen > len))
9b9fcfc5
 				/*
 				 * In practice this should never occur since
 				 * the maximum length of a uuencoded line is
 				 * 62 characters
 				 */
6351aa86
 				cli_dbgmsg("uudecode: buffer overflow stopped, attempting to ignore but decoding may fail\n");
a2959b28
 			else {
 				(void)decode(m, line, buf, uudecode, (len & 3) == 0);
 				buf = &buf[reallen];
 			}
ae5c693a
 			m->base64chars = 0;	/* this happens with broken uuencoded files */
e3aaff8e
 			break;
16394c6d
 		case YENCODE:
 			if((line == NULL) || (*line == '\0'))	/* empty line */
 				break;
 			if(strncmp(line, "=yend ", 6) == 0)
 				break;
 
 			while(*line)
 				if(*line == '=') {
 					if(*++line == '\0')
 						break;
 					*buf++ = ((*line++ - 64) & 255);
 				} else
 					*buf++ = ((*line++ - 42) & 255);
 			break;
e3aaff8e
 	}
 
9b9fcfc5
 	*buf = '\0';
 	return buf;
e3aaff8e
 }
 
c43c9798
 /*
9e1dc6e8
  * Remove the non base64 characters such as spaces from a string. Spaces
  * shouldn't appear mid string in base64 files, but some broken mail clients
  * ignore such errors rather than discarding the mail, and virus writers
  * exploit this bug
0c0894b8
  */
 static void
9e1dc6e8
 sanitiseBase64(char *s)
0c0894b8
 {
9fe789f8
 	cli_dbgmsg("sanitiseBase64 '%s'\n", s);
 	while(*s)
11926f3b
 		if(base64Table[(unsigned int)(*s & 0xFF)] == 255) {
9e1dc6e8
 			char *p1;
17dd811e
 
9e1dc6e8
 			for(p1 = s; p1[0] != '\0'; p1++)
 				p1[0] = p1[1];
9fe789f8
 		} else
 			s++;
0c0894b8
 }
 
 /*
c43c9798
  * Returns one byte after the end of the decoded data in "out"
0c0894b8
  *
  * Update m->base64chars with the last few bytes of data that we haven't
  * decoded. After the last line is found, decode will be called with in = NULL
  * to flush these out
c43c9798
  */
e3aaff8e
 static unsigned char *
0c0894b8
 decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast)
 {
 	unsigned char b1, b2, b3, b4;
 	unsigned char cb1, cb2, cb3;	/* carried over from last line */
 
11926f3b
 	/*cli_dbgmsg("decode %s (len %d isFast %d base64chars %d)\n", in,
0c0894b8
 		in ? strlen(in) : 0,
cd11ef39
 		isFast, m->base64chars);*/
0c0894b8
 
 	cb1 = cb2 = cb3 = '\0';
 
 	switch(m->base64chars) {
 		case 3:
 			cb3 = m->base64_3;
 			/* FALLTHROUGH */
 		case 2:
 			cb2 = m->base64_2;
 			/* FALLTHROUGH */
 		case 1:
 			cb1 = m->base64_1;
 			isFast = FALSE;
 			break;
 		default:
 			assert(m->base64chars <= 3);
 	}
 
 	if(isFast)
 		/* Fast decoding if not last line */
 		while(*in) {
 			b1 = (*decoder)(*in++);
 			b2 = (*decoder)(*in++);
 			b3 = (*decoder)(*in++);
 			/*
 			 * Put this line here to help on some compilers which
7cd9337a
 			 * can make use of some architecture's ability to
0c0894b8
 			 * multiprocess when different variables can be
 			 * updated at the same time - here b3 is used in
 			 * one line, b1/b2 in the next and b4 in the next after
 			 * that, b3 and b4 rely on in but b1/b2 don't
 			 */
 			*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
 			b4 = (*decoder)(*in++);
 			*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
 			*out++ = (b3 << 6) | (b4 & 0x3F);
 		}
e7aa5e3d
 	else if(in == NULL) {	/* flush */
 		int nbytes;
 
 		if(m->base64chars == 0)
 			return out;
0c0894b8
 
e7aa5e3d
 		cli_dbgmsg("base64chars = %d (%c %c %c)\n", m->base64chars,
e414082f
 			isalnum(cb1) ? cb1 : '@',
 			isalnum(cb2) ? cb2 : '@',
 			isalnum(cb3) ? cb3 : '@');
0c0894b8
 
e7aa5e3d
 		m->base64chars--;
 		b1 = cb1;
 		nbytes = 1;
32c9b306
 
e7aa5e3d
 		if(m->base64chars) {
0c0894b8
 			m->base64chars--;
e7aa5e3d
 			b2 = cb2;
0c0894b8
 
 			if(m->base64chars) {
afe4ae14
 				nbytes = 2;
0c0894b8
 				m->base64chars--;
e7aa5e3d
 				b3 = cb3;
b1ea0d64
 				nbytes = 3;
e7aa5e3d
 			} else if(b2)
afe4ae14
 				nbytes = 2;
e7aa5e3d
 		}
0c0894b8
 
e7aa5e3d
 		switch(nbytes) {
 			case 3:
 				b4 = '\0';
 				/* fall through */
 			case 4:
 				*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
 				*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
62b7686d
 				if((nbytes == 4) || (b3&0x3))
d746152b
 					*out++ = (b3 << 6) | (b4 & 0x3F);
e7aa5e3d
 				break;
 			case 2:
 				*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
afe4ae14
 				if((b2 << 4) & 0xFF)
 					*out++ = b2 << 4;
e7aa5e3d
 				break;
 			case 1:
 				*out++ = b1 << 2;
 				break;
 			default:
 				assert(0);
 		}
 	} else while(*in) {
 		int nbytes;
0c0894b8
 
e7aa5e3d
 		if(m->base64chars) {
 			m->base64chars--;
 			b1 = cb1;
 		} else
 			b1 = (*decoder)(*in++);
0c0894b8
 
e7aa5e3d
 		if(*in == '\0') {
 			b2 = '\0';
 			nbytes = 1;
 		} else {
0c0894b8
 			if(m->base64chars) {
 				m->base64chars--;
e7aa5e3d
 				b2 = cb2;
0c0894b8
 			} else
e7aa5e3d
 				b2 = (*decoder)(*in++);
0c0894b8
 
 			if(*in == '\0') {
e7aa5e3d
 				b3 = '\0';
 				nbytes = 2;
0c0894b8
 			} else {
 				if(m->base64chars) {
 					m->base64chars--;
e7aa5e3d
 					b3 = cb3;
0c0894b8
 				} else
e7aa5e3d
 					b3 = (*decoder)(*in++);
0c0894b8
 
 				if(*in == '\0') {
e7aa5e3d
 					b4 = '\0';
 					nbytes = 3;
0c0894b8
 				} else {
e7aa5e3d
 					b4 = (*decoder)(*in++);
 					nbytes = 4;
0c0894b8
 				}
 			}
e7aa5e3d
 		}
0c0894b8
 
e7aa5e3d
 		switch(nbytes) {
30e18caf
 			case 4:
 				*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
 				*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
 				*out++ = (b3 << 6) | (b4 & 0x3F);
 				continue;
e7aa5e3d
 			case 3:
 				m->base64_3 = b3;
 			case 2:
 				m->base64_2 = b2;
 			case 1:
 				m->base64_1 = b1;
30e18caf
 				m->base64chars = nbytes;
e7aa5e3d
 				break;
 			default:
 				assert(0);
 		}
30e18caf
 		break;	/* nbytes != 4 => EOL */
0c0894b8
 	}
 	return out;
 }
e3aaff8e
 
 static unsigned char
 hex(char c)
 {
 	if(isdigit(c))
 		return c - '0';
 	if((c >= 'A') && (c <= 'F'))
 		return c - 'A' + 10;
d2072c9e
 	if((c >= 'a') && (c <= 'f'))
 		return c - 'a' + 10;
 	cli_dbgmsg("Illegal hex character '%c'\n", c);
e3aaff8e
 
 	/*
4685e392
 	 * Some mails (notably some spam) break RFC2045 by failing to encode
e3aaff8e
 	 * the '=' character
 	 */
 	return '=';
 }
 
621a667a
 static unsigned char
 base64(char c)
 {
11926f3b
 	const unsigned char ret = base64Table[(unsigned int)(c & 0xFF)];
621a667a
 
 	if(ret == 255) {
e7aa5e3d
 		/*cli_dbgmsg("Illegal character <%c> in base64 encoding\n", c);*/
621a667a
 		return 63;
 	}
 	return ret;
 }
e3aaff8e
 
 static unsigned char
 uudecode(char c)
 {
1602f612
 	return c - ' ';
e3aaff8e
 }
28010d29
 
 /*
  * These are the only arguments we're interested in.
  * Do 'fgrep messageFindArgument *.c' if you don't believe me!
  * It's probably not good doing this since each time a new
  * messageFindArgument is added I need to remember to look here,
  * but it can save a lot of memory...
  */
 static int
 usefulArg(const char *arg)
 {
 	if((strncasecmp(arg, "name", 4) != 0) &&
 	   (strncasecmp(arg, "filename", 8) != 0) &&
 	   (strncasecmp(arg, "boundary", 8) != 0) &&
9a729c80
 	   (strncasecmp(arg, "protocol", 8) != 0) &&
f10460ed
 	   (strncasecmp(arg, "id", 2) != 0) &&
 	   (strncasecmp(arg, "number", 6) != 0) &&
 	   (strncasecmp(arg, "total", 5) != 0) &&
28010d29
 	   (strncasecmp(arg, "type", 4) != 0)) {
 		cli_dbgmsg("Discarding unwanted argument '%s'\n", arg);
 		return 0;
 	}
 	return 1;
 }
86355dc2
 
a603478f
 void
 messageSetCTX(message *m, cli_ctx *ctx)
 {
 	m->ctx = ctx;
 }
 
 int
 messageContainsVirus(const message *m)
 {
 	return m->isInfected ? TRUE : FALSE;
 }
 
86355dc2
 /*
  * We've run out of memory. Try to recover some by
  * deduping the message
70a968be
  *
  * FIXME: this can take a long time. The real solution is for system admins
  *	to refrain from setting ulimits too low, then this routine won't be
  *	called
86355dc2
  */
 static void
 messageDedup(message *m)
 {
 	const text *t1;
 	size_t saved = 0;
 
8f1a49cd
 	cli_dbgmsg("messageDedup\n");
 
86355dc2
 	t1 = m->dedupedThisFar ? m->dedupedThisFar : m->body_first;
 
 	for(t1 = m->body_first; t1; t1 = t1->t_next) {
 		const char *d1;
 		text *t2;
 		line_t *l1;
 		unsigned int r1;
 
 		if(saved >= 100*1000)
 			break;	/* that's enough */
 		l1 = t1->t_line;
 		if(l1 == NULL)
 			continue;
 		d1 = lineGetData(l1);
 		if(strlen(d1) < 8)
 			continue;	/* wouldn't recover many bytes */
8f1a49cd
 
86355dc2
 		r1 = (unsigned int)lineGetRefCount(l1);
 		if(r1 == 255)
 			continue;
 		/*
 		 * We don't want to foul up any pointers
 		 */
 		if(t1 == m->encoding)
 			continue;
 		if(t1 == m->bounce)
 			continue;
 		if(t1 == m->binhex)
 			continue;
 		if(t1 == m->yenc)
 			continue;
 
 		for(t2 = t1->t_next; t2; t2 = t2->t_next) {
 			const char *d2;
 			line_t *l2 = t2->t_line;
 
 			if(l2 == NULL)
 				continue;
 			d2 = lineGetData(l2);
 			if(d1 == d2)
 				/* already linked */
 				continue;
 			if(strcmp(d1, d2) == 0) {
 				if(lineUnlink(l2) == NULL)
8f1a49cd
 					saved += strlen(d1) + 1;
86355dc2
 				t2->t_line = lineLink(l1);
 				if(t2->t_line == NULL) {
 					cli_errmsg("messageDedup: out of memory\n");
 					return;
 				}
8f1a49cd
 				if(++r1 == 255)
 					break;
86355dc2
 			}
 		}
 	}
8f1a49cd
 
95e11e5a
 	cli_dbgmsg("messageDedup reclaimed %lu bytes\n", (unsigned long)saved);
86355dc2
 	m->dedupedThisFar = t1;
 }
1602f612
 
 /*
ec8e31fa
  * Handle RFC2231 encoding. Returns a malloc'd buffer that the caller must
  * free, or NULL on error.
  *
  * TODO: Currently only handles paragraph 4 of RFC2231 e.g.
  *	 protocol*=ansi-x3.4-1968''application%2Fpgp-signature;
  */
 static char *
 rfc2231(const char *in)
 {
4b187745
 	const char *ptr;
 	char *ret, *out;
9f5f1b1a
 	enum { LANGUAGE, CHARSET, CONTENTS } field;
ec8e31fa
 
9f5f1b1a
 	if(strstr(in, "*0*=") != NULL) {
9fe789f8
 		char *p;
 
 		/* Don't handle continuations, decode what we can */
 		p = ret = cli_malloc(strlen(in) + 16);
241e7eb1
 		if(ret == NULL) {
             cli_errmsg("rfc2331: out of memory, unable to proceed\n");
9fe789f8
 			return NULL;
241e7eb1
         }
9fe789f8
 
 		do {
 			switch(*in) {
 				default:
 					*p++ = *in++;
 					continue;
 				case '*':
 					do
 						in++;
 					while((*in != '*') && *in);
 					if(*in) {
 						in++;
 						continue;
 					}
 					break;
 				case '=':
 					/*strcpy(p, in);*/
 					strcpy(p, "=rfc2231failure");
06046183
                                         p += strlen ("=rfc2231failure");
9fe789f8
 					break;
 			}
 			break;
 		} while(*in);
06046183
                 *p = '\0';
9fe789f8
 
f59513f3
 		cli_dbgmsg("RFC2231 parameter continuations are not yet handled, returning \"%s\"\n",
9fe789f8
 			ret);
 		return ret;
9f5f1b1a
 	}
 
 	ptr = strstr(in, "*0=");
 	if(ptr != NULL)
 		/*
 		 * Parameter continuation, with no continuation
 		 * Thunderbird 1.5 (and possibly other versions) does this
 		 */
 		field = CONTENTS;
 	else {
 		ptr = strstr(in, "*=");
 		field = LANGUAGE;
 	}
ec8e31fa
 
faa0d267
 	if(ptr == NULL) {	/* quick return */
 		out = ret = cli_strdup(in);
 		while(*out)
 			*out++ &= 0x7F;
 		return ret;
 	}
ec8e31fa
 
 	cli_dbgmsg("rfc2231 '%s'\n", in);
 
 	ret = cli_malloc(strlen(in) + 1);
 
241e7eb1
 	if(ret == NULL) {
         cli_errmsg("rfc2331: out of memory for ret\n");
ec8e31fa
 		return NULL;
241e7eb1
     }
ec8e31fa
 
9f5f1b1a
 	/*
 	 * memcpy(out, in, (ptr - in));
 	 * out = &out[ptr - in];
 	 * in = ptr;
 	 */
ba27d939
 	out = ret;
 	while(in != ptr)
 		*out++ = *in++;
ec8e31fa
 
 	*out++ = '=';
 
9f5f1b1a
 	while(*ptr++ != '=')
 		;
 
ec8e31fa
 	/*
 	 * We don't do anything with the language and character set, just skip
 	 * over them!
 	 */
9f5f1b1a
 	while(*ptr) {
ec8e31fa
 		switch(field) {
 			case LANGUAGE:
9f5f1b1a
 				if(*ptr == '\'')
ec8e31fa
 					field = CHARSET;
 				break;
 			case CHARSET:
9f5f1b1a
 				if(*ptr == '\'')
ec8e31fa
 					field = CONTENTS;
 				break;
 			case CONTENTS:
9f5f1b1a
 				if(*ptr == '%') {
ec8e31fa
 					unsigned char byte;
 
9f5f1b1a
 					if((*++ptr == '\0') || (*ptr == '\n'))
ec8e31fa
 						break;
 
9f5f1b1a
 					byte = hex(*ptr);
ec8e31fa
 
9f5f1b1a
 					if((*++ptr == '\0') || (*ptr == '\n')) {
ec8e31fa
 						*out++ = byte;
 						break;
 					}
 
 					byte <<= 4;
9f5f1b1a
 					byte += hex(*ptr);
ec8e31fa
 					*out++ = byte;
 				} else
9f5f1b1a
 					*out++ = *ptr;
ec8e31fa
 		}
9f5f1b1a
 		if(*ptr++ == '\0')
6e2ba331
 			/*
 			 * Incorrect message that has just one character after
 			 * a '%'.
 			 * FIXME: stash something in out that would, for example
 			 *	treat %2 as %02, assuming field == CONTENTS
 			 */
ac702657
 			break;
ec8e31fa
 	}
 
 	if(field != CONTENTS) {
 		free(ret);
6351aa86
 		cli_dbgmsg("Invalid RFC2231 header: '%s'\n", in);
0cf4cea7
 		return cli_strdup("");
ec8e31fa
 	}
4b187745
 
ec8e31fa
 	*out = '\0';
 
 	cli_dbgmsg("rfc2231 returns '%s'\n", ret);
 
 	return ret;
 }
 
 /*
1602f612
  * common/simil:
  *	From Computing Magazine 20/8/92
  * Returns %ge number from 0 to 100 - how similar are 2 strings?
  * 100 for exact match, < for error
  */
 struct	pstr_list {	/* internal stack */
 	char	*d1;
 	struct	pstr_list	*next;
 };
 
 #define	OUT_OF_MEMORY	(-2)
 #define	FAILURE	(-3)
 #define	SUCCESS	(-4)
 #define	ARRAY_OVERFLOW	(-5)
 typedef	struct	pstr_list	ELEMENT1;
 typedef	ELEMENT1		*LINK1;
 
 static	int	push(LINK1 *top, const char *string);
 static	int	pop(LINK1 *top, char *buffer);
 static	unsigned	int	compare(char *ls1, char **rs1, char *ls2, char **rs2);
 
1e8911e5
 #define	MAX_PATTERN_SIZ	50	/* maximum string lengths */
1602f612
 
 static int
 simil(const char *str1, const char *str2)
 {
 	LINK1 top = NULL;
 	unsigned int score = 0;
bc6bbeff
 	size_t common, total;
 	size_t len1, len2;
1602f612
 	char *rs1 = NULL, *rs2 = NULL;
 	char *s1, *s2;
0cf4cea7
 	char ls1[MAX_PATTERN_SIZ], ls2[MAX_PATTERN_SIZ];
1602f612
 
 	if(strcasecmp(str1, str2) == 0)
 		return 100;
 
0cf4cea7
 	if((s1 = cli_strdup(str1)) == NULL)
1602f612
 		return OUT_OF_MEMORY;
0cf4cea7
 	if((s2 = cli_strdup(str2)) == NULL) {
1602f612
 		free(s1);
 		return OUT_OF_MEMORY;
 	}
 
 	if(((total = strstrip(s1)) > MAX_PATTERN_SIZ - 1) || ((len2 = strstrip(s2)) > MAX_PATTERN_SIZ - 1)) {
 		free(s1);
 		free(s2);
 		return ARRAY_OVERFLOW;
 	}
 
 	total += len2;
 
bea62be3
 	if((push(&top, s1) == OUT_OF_MEMORY) ||
 	   (push(&top, s2) == OUT_OF_MEMORY)) {
 		free(s1);
 		free(s2);
1602f612
 		return OUT_OF_MEMORY;
bea62be3
 	}
1602f612
 
 	while(pop(&top, ls2) == SUCCESS) {
 		pop(&top, ls1);
 		common = compare(ls1, &rs1, ls2, &rs2);
 		if(common > 0) {
bc6bbeff
 			score += (unsigned int)common;
1602f612
 			len1 = strlen(ls1);
 			len2 = strlen(ls2);
 
 			if((len1 > 1 && len2 >= 1) || (len2 > 1 && len1 >= 1))
 				if((push(&top, ls1) == OUT_OF_MEMORY) || (push(&top, ls2) == OUT_OF_MEMORY)) {
 					free(s1);
 					free(s2);
 					return OUT_OF_MEMORY;
 				}
 			len1 = strlen(rs1);
 			len2 = strlen(rs2);
 
 			if((len1 > 1 && len2 >= 1) || (len2 > 1 && len1 >= 1))
 				if((push(&top, rs1) == OUT_OF_MEMORY) || (push(&top, rs2) == OUT_OF_MEMORY)) {
 					free(s1);
 					free(s2);
 					return OUT_OF_MEMORY;
 				}
 		}
 	}
 	free(s1);
 	free(s2);
 	return (total > 0) ? ((score * 200) / total) : 0;
 }
 
 static unsigned int
 compare(char *ls1, char **rs1, char *ls2, char **rs2)
 {
0cf4cea7
 	unsigned int common, maxchars = 0;
1602f612
 	bool some_similarity = FALSE;
 	char *s1, *s2;
 	char *maxs1 = NULL, *maxs2 = NULL, *maxe1 = NULL, *maxe2 = NULL;
 	char *cs1, *cs2, *start1, *end1, *end2;
 
 	end1 = ls1 + strlen(ls1);
 	end2 = ls2 + strlen(ls2);
 	start1 = ls1;
 
 	for(;;) {
 		s1 = start1;
 		s2 = ls2;
 
 		if(s1 < end1) {
 			while(s1 < end1 && s2 < end2) {
 				if(tolower(*s1) == tolower(*s2)) {
 					some_similarity = TRUE;
 					cs1 = s1;
 					cs2 = s2;
 					common = 0;
 					do
 						if(s1 == end1 || s2 == end2)
 							break;
 						else {
 							s1++;
 							s2++;
 							common++;
 						}
 					while(tolower(*s1) == tolower(*s2));
 
 					if(common > maxchars) {
0cf4cea7
 						unsigned int diff = common - maxchars;
1602f612
 						maxchars = common;
 						maxs1 = cs1;
 						maxs2 = cs2;
 						maxe1 = s1;
 						maxe2 = s2;
 						end1 -= diff;
 						end2 -= diff;
 					} else
 						s1 -= common;
 				} else
 					s2++;
 			}
 			start1++;
 		} else
 			break;
 	}
 	if(some_similarity) {
 		*maxs1 = '\0';
 		*maxs2 = '\0';
 		*rs1 = maxe1;
 		*rs2 = maxe2;
 	}
 	return maxchars;
 }
 
 static int
 push(LINK1 *top, const char *string)
 {
 	LINK1 element;
 
 	if((element = (LINK1)cli_malloc(sizeof(ELEMENT1))) == NULL)
 		return OUT_OF_MEMORY;
bebd86a6
 	if((element->d1 = cli_strdup(string)) == NULL) {
 		free (element);
1602f612
 		return OUT_OF_MEMORY;
bebd86a6
 	}
1602f612
 	element->next = *top;
 	*top = element;
 
 	return SUCCESS;
 }
 
 static int
 pop(LINK1 *top, char *buffer)
 {
 	LINK1 t1;
 
 	if((t1 = *top) != NULL) {
 		(void)strcpy(buffer, t1->d1);
 		*top = t1->next;
 		free(t1->d1);
 		free((char *)t1);
 		return SUCCESS;
 	}
 	return FAILURE;
 }
5198de85
 
 /*
  * Have we found a line that is a start of a uuencoded file (see uuencode(5))?
  */
 int
 isuuencodebegin(const char *line)
 {
182bbcc8
 	if(line[0] != 'b')	/* quick check */
 		return 0;
 
5198de85
 	if(strlen(line) < 10)
 		return 0;
 
 	return (strncasecmp(line, "begin ", 6) == 0) &&
 		isdigit(line[6]) && isdigit(line[7]) &&
 		isdigit(line[8]) && (line[9] == ' ');
 }
e83019ae
 
 #if HAVE_JSON
 json_object *messageGetJObj(message *m)
 {
4619f636
     if (m == NULL) {
         return NULL;
     }
e83019ae
 
 	if(m->jobj == NULL)
 		m->jobj = cli_jsonobj(NULL, NULL);
 
 	return m->jobj;
 }
 #endif