/*
 *  Copyright (C) 2007-2008 Sourcefire, Inc.
 *
 *  Authors: Nigel Horne
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 *  MA 02110-1301, USA.
 *
 * TODO: Optimise messageExport, decodeLine, messageIsEncoding
 */
static	char	const	rcsid[] = "$Id: message.c,v 1.195 2007/02/12 20:46:09 njh Exp $";

#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif

#ifdef CL_THREAD_SAFE
#ifndef	_REENTRANT
#define	_REENTRANT	/* for Solaris 2.8 */
#endif
#endif

#ifdef	C_DARWIN
#include <sys/types.h>
#endif
#include <stdlib.h>
#include <string.h>
#ifdef	HAVE_STRINGS_H
#include <strings.h>
#endif
#include <assert.h>
#include <ctype.h>
#include <stdio.h>

#ifdef	CL_THREAD_SAFE
#include <pthread.h>
#endif

#include "others.h"
#include "str.h"
#include "filetypes.h"

#include "mbox.h"

#ifndef isblank
#define isblank(c)	(((c) == ' ') || ((c) == '\t'))
#endif

#define	RFC2045LENGTH	76	/* maximum number of characters on a line */

#ifdef	HAVE_STDBOOL_H
#include <stdbool.h>
#else
#ifdef	FALSE
typedef	unsigned	char	bool;
#else
typedef enum	{ FALSE = 0, TRUE = 1 } bool;
#endif
#endif

static	int	messageHasArgument(const message *m, const char *variable);
static	void	messageIsEncoding(message *m);
static unsigned char *decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast);
static	void	sanitiseBase64(char *s);
#ifdef	__GNUC__
static	unsigned	char	hex(char c)	__attribute__((const));
static	unsigned	char	base64(char c)	__attribute__((const));
static	unsigned	char	uudecode(char c)	__attribute__((const));
#else
static	unsigned	char	hex(char c);
static	unsigned	char	base64(char c);
static	unsigned	char	uudecode(char c);
#endif
static	const	char	*messageGetArgument(const message *m, int arg);
static	void	*messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy)(void *), void (*setFilename)(void *, const char *, const char *), void (*addData)(void *, const unsigned char *, size_t), void *(*exportText)(text *, void *, int), void (*setCTX)(void *, cli_ctx *), int destroy_text);
static	int	usefulArg(const char *arg);
static	void	messageDedup(message *m);
static	char	*rfc2231(const char *in);
static	int	simil(const char *str1, const char *str2);

/*
 * These maps are ordered in decreasing likelyhood of their appearance
 * in an e-mail. Probably these should be in a table...
 */
static	const	struct	encoding_map {
	const	char	*string;
	encoding_type	type;
} encoding_map[] = {	/* rfc2045 */
	{	"7bit",			NOENCODING	},
	{	"text/plain",		NOENCODING	},
	{	"quoted-printable",	QUOTEDPRINTABLE	},	/* rfc2045 */
	{	"base64",		BASE64		},	/* rfc2045 */
	{	"8bit",			EIGHTBIT	},
	{	"binary",		BINARY		},
	{	"x-uuencode",		UUENCODE	},	/* uuencode(5) */
	{	"x-yencode",		YENCODE		},
	{	"x-binhex",		BINHEX		},
	{	"us-ascii",		NOENCODING	},	/* incorrect */
	{	"x-uue",		UUENCODE	},	/* incorrect */
	{	"uuencode",		UUENCODE	},	/* incorrect */
	{	NULL,			NOENCODING	}
};

static	const	struct	mime_map {
	const	char	*string;
	mime_type	type;
} mime_map[] = {
	{	"text",			TEXT		},
	{	"multipart",		MULTIPART	},
	{	"application",		APPLICATION	},
	{	"audio",		AUDIO		},
	{	"image",		IMAGE		},
	{	"message",		MESSAGE		},
	{	"video",		VIDEO		},
	{	NULL,			TEXT		}
};

/*
 * See RFC2045, section 6.8, table 1
 */
static const unsigned char base64Table[256] = {
	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
	255,255,255,255,255,255,255,255,255,255,255,62,255,255,255,63,
	52,53,54,55,56,57,58,59,60,61,255,255,255,0,255,255,
	255,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
	15,16,17,18,19,20,21,22,23,24,25,255,255,255,255,255,
	255,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
	41,42,43,44,45,46,47,48,49,50,51,255,255,255,255,255,
	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
};

message *
messageCreate(void)
{
	message *m = (message *)cli_calloc(1, sizeof(message));

	if(m)
		m->mimeType = NOMIME;

	return m;
}

void
messageDestroy(message *m)
{
	assert(m != NULL);

	messageReset(m);

	free(m);
}

void
messageReset(message *m)
{
	int i;

	assert(m != NULL);

	if(m->mimeSubtype)
		free(m->mimeSubtype);

	if(m->mimeDispositionType)
		free(m->mimeDispositionType);

	if(m->mimeArguments) {
		for(i = 0; i < m->numberOfArguments; i++)
			free(m->mimeArguments[i]);
		free(m->mimeArguments);
	}

	if(m->body_first)
		textDestroy(m->body_first);

	assert(m->base64chars == 0);

	if(m->encodingTypes) {
		assert(m->numberOfEncTypes > 0);
		free(m->encodingTypes);
	}

	memset(m, '\0', sizeof(message));
	m->mimeType = NOMIME;
}

/*
 * Handle the Content-Type header. The syntax is in RFC1341.
 * Return success (1) or failure (0). Failure only happens when it's an
 * unknown type and we've already received a known type, or we've received an
 * empty type. If we receive an unknown type by itself we default to application
 */
int
messageSetMimeType(message *mess, const char *type)
{
#ifdef	CL_THREAD_SAFE
	static pthread_mutex_t mime_mutex = PTHREAD_MUTEX_INITIALIZER;
#endif
	const struct mime_map *m;
	int typeval;
	static table_t *mime_table;

	assert(mess != NULL);
	if(type == NULL) {
		cli_dbgmsg("Empty content-type field\n");
		return 0;
	}

	cli_dbgmsg("messageSetMimeType: '%s'\n", type);

	/* Ignore leading spaces */
	while(!isalpha(*type))
		if(*type++ == '\0')
			return 0;

#ifdef	CL_THREAD_SAFE
	pthread_mutex_lock(&mime_mutex);
#endif
	if(mime_table == NULL) {
		mime_table = tableCreate();
		if(mime_table == NULL) {
#ifdef	CL_THREAD_SAFE
			pthread_mutex_unlock(&mime_mutex);
#endif
			return 0;
		}

		for(m = mime_map; m->string; m++)
			if(!tableInsert(mime_table, m->string, m->type)) {
				tableDestroy(mime_table);
				mime_table = NULL;
#ifdef	CL_THREAD_SAFE
				pthread_mutex_unlock(&mime_mutex);
#endif
				return 0;
			}
	}
#ifdef	CL_THREAD_SAFE
	pthread_mutex_unlock(&mime_mutex);
#endif

	typeval = tableFind(mime_table, type);

	if(typeval != -1) {
		mess->mimeType = (mime_type)typeval;
		return 1;
	}
	if(mess->mimeType == NOMIME) {
		if(strncasecmp(type, "x-", 2) == 0)
			mess->mimeType = MEXTENSION;
		else {
			/*
			 * Force scanning of strange messages
			 */
			if(strcasecmp(type, "plain") == 0) {
				cli_dbgmsg("Incorrect MIME type: `plain', set to Text\n");
				mess->mimeType = TEXT;
			} else {
				/*
				 * Don't handle broken e-mail probably sending
				 *	Content-Type: plain/text
				 * instead of
				 *	Content-Type: text/plain
				 * as an attachment
				 */
				int highestSimil = 0, t = -1;
				const char *closest = NULL;

				for(m = mime_map; m->string; m++) {
					const int s = simil(m->string, type);

					if(s > highestSimil) {
						highestSimil = s;
						closest = m->string;
						t = m->type;
					}
				}
				if(highestSimil >= 50) {
					cli_dbgmsg("Unknown MIME type \"%s\" - guessing as %s (%d%% certainty)\n",
						type, closest,
						highestSimil);
					mess->mimeType = (mime_type)t;
				} else {
					cli_dbgmsg("Unknown MIME type: `%s', set to Application - if you believe this file contains a virus, submit it to www.clamav.net\n", type);
					mess->mimeType = APPLICATION;
				}
			}
		}
		return 1;
	}
	return 0;
}

mime_type
messageGetMimeType(const message *m)
{
	assert(m != NULL);

	return m->mimeType;
}

void
messageSetMimeSubtype(message *m, const char *subtype)
{
	assert(m != NULL);

	if(subtype == NULL) {
		/*
		 * Handle broken content-type lines, e.g.
		 *	Content-Type: text/
		 */
		cli_dbgmsg("Empty content subtype\n");
		subtype = "";
	}

	if(m->mimeSubtype)
		free(m->mimeSubtype);

	m->mimeSubtype = cli_strdup(subtype);
}

const char *
messageGetMimeSubtype(const message *m)
{
	return (m->mimeSubtype) ? m->mimeSubtype : "";
}

void
messageSetDispositionType(message *m, const char *disptype)
{
	assert(m != NULL);

	if(m->mimeDispositionType)
		free(m->mimeDispositionType);
	if(disptype == NULL) {
		m->mimeDispositionType = NULL;
		return;
	}

	/*
	 * It's broken for there to be an entry such as "Content-Disposition:"
	 * However some spam and viruses are rather broken, it's a sign
	 * that something is wrong if we get that - maybe we should force a
	 * scan of this part
	 */
	while(*disptype && isspace((int)*disptype))
		disptype++;
	if(*disptype) {
		m->mimeDispositionType = cli_strdup(disptype);
		if(m->mimeDispositionType)
			strstrip(m->mimeDispositionType);
	} else
		m->mimeDispositionType = NULL;
}

const char *
messageGetDispositionType(const message *m)
{
	return (m->mimeDispositionType) ? m->mimeDispositionType : "";
}

/*
 * TODO:
 *	Arguments are held on a per message basis, they should be held on
 * a per section basis. Otherwise what happens if two sections have two
 * different values for charset? Probably doesn't matter for the use this
 * code will be given, but will need fixing if this code is used elsewhere
 */
void
messageAddArgument(message *m, const char *arg)
{
	int offset;
	char *p;

	assert(m != NULL);

	if(arg == NULL)
		return;	/* Note: this is not an error condition */

	while(isspace(*arg))
		arg++;

	if(*arg == '\0')
		/* Empty argument? Probably a broken mail client... */
		return;

	cli_dbgmsg("messageAddArgument, arg='%s'\n", arg);

	if(!usefulArg(arg))
		return;

	for(offset = 0; offset < m->numberOfArguments; offset++)
		if(m->mimeArguments[offset] == NULL)
			break;
		else if(strcasecmp(arg, m->mimeArguments[offset]) == 0)
			return;	/* already in there */

	if(offset == m->numberOfArguments) {
		char **q;

		m->numberOfArguments++;
		q = (char **)cli_realloc(m->mimeArguments, m->numberOfArguments * sizeof(char *));
		if(q == NULL) {
			m->numberOfArguments--;
			return;
		}
		m->mimeArguments = q;
	}

	p = m->mimeArguments[offset] = rfc2231(arg);

	if(strchr(p, '=') == NULL) {
		if(strncmp(p, "filename", 8) == 0) {
			/*
			 * FIXME: Bounce message handling is corrupting the in
			 * core copies of headers
			 */
			cli_dbgmsg("Possible data corruption fixed\n");
			p[8] = '=';
		} else {
			if(p && *p)
				cli_dbgmsg("messageAddArgument, '%s' contains no '='\n", p);
			free(m->mimeArguments[offset]);
			m->mimeArguments[offset] = NULL;
			return;
		}
	}

	/*
	 * This is terribly broken from an RFC point of view but is useful
	 * for catching viruses which have a filename but no type of
	 * mime. By pretending defaulting to an application rather than
	 * to nomime we can ensure they're saved and scanned
	 */
	if(p && ((strncasecmp(p, "filename=", 9) == 0) || (strncasecmp(p, "name=", 5) == 0)))
		if(messageGetMimeType(m) == NOMIME) {
			cli_dbgmsg("Force mime encoding to application\n");
			messageSetMimeType(m, "application");
		}
}

/*
 * Add in all the arguments.
 * Cope with:
 *	name="foo bar.doc"
 *	charset=foo name=bar
 */
void
messageAddArguments(message *m, const char *s)
{
	const char *string = s;

	cli_dbgmsg("Add arguments '%s'\n", string);

	assert(string != NULL);

	while(*string) {
		const char *key, *cptr;
		char *data, *field;

		if(isspace(*string & 0xff) || (*string == ';')) {
			string++;
			continue;
		}

		key = string;

		data = strchr(string, '=');

		/*
		 * Some spam breaks RFC2045 by using ':' instead of '='
		 * e.g.:
		 *	Content-Type: text/html; charset:ISO-8859-1
		 * should be:
		 *	Content-type: text/html; charset=ISO-8859-1
		 *
		 * We give up with lines that are completely broken because
		 * we don't have ESP and don't know what was meant to be there.
		 * It's unlikely to really be a problem.
		 */
		if(data == NULL)
			data = strchr(string, ':');

		if(data == NULL) {
			/*
			 * Completely broken, give up
			 */
			cli_dbgmsg("Can't parse header \"%s\"\n", s);
			return;
		}

		string = &data[1];

		/*
		 * Handle white space to the right of the equals sign
		 * This breaks RFC2045 which has:
		 *	parameter := attribute "=" value
		 *	attribute := token   ; case-insensitive
		 *	token  :=  1*<any (ASCII) CHAR except SPACE, CTLs,
		 *		or tspecials>
		 * But too many MUAs ignore this
		 */
		while(isspace(*string) && (*string != '\0'))
			string++;

		cptr = string++;

		if(strlen(key) == 0)
			continue;

		if(*cptr == '"') {
			char *ptr, *kcopy;

			/*
			 * The field is in quotes, so look for the
			 * closing quotes
			 */
			kcopy = cli_strdup(key);

			if(kcopy == NULL)
				return;

			ptr = strchr(kcopy, '=');
			if(ptr == NULL)
				ptr = strchr(kcopy, ':');
			*ptr = '\0';

			string = strchr(++cptr, '"');

			if(string == NULL) {
				cli_dbgmsg("Unbalanced quote character in \"%s\"\n", s);
				string = "";
			} else
				string++;

			if(!usefulArg(kcopy)) {
				free(kcopy);
				continue;
			}

			data = cli_strdup(cptr);

			ptr = (data) ? strchr(data, '"') : NULL;
			if(ptr == NULL) {
				/*
				 * Weird e-mail header such as:
				 * Content-Type: application/octet-stream; name="
				 * "
				 * Content-Transfer-Encoding: base64
				 * Content-Disposition: attachment; filename="
				 * "
				 *
				 * TODO: the file should still be saved and
				 * virus checked
				 */
				cli_dbgmsg("Can't parse header \"%s\" - if you believe this file contains a virus, submit it to www.clamav.net\n", s);
				if(data)
					free(data);
				free(kcopy);
				return;
			}

			*ptr = '\0';

			field = cli_realloc(kcopy, strlen(kcopy) + strlen(data) + 2);
			if(field) {
				strcat(field, "=");
				strcat(field, data);
			} else
				free(kcopy);
			free(data);
		} else {
			size_t len;

			if(*cptr == '\0') {
				cli_dbgmsg("Ignoring empty field in \"%s\"\n", s);
				return;
			}

			/*
			 * The field is not in quotes, so look for the closing
			 * white space
			 */
			while((*string != '\0') && !isspace(*string))
				string++;

			len = (size_t)string - (size_t)key + 1;
			field = cli_malloc(len);

			if(field) {
				memcpy(field, key, len - 1);
				field[len - 1] = '\0';
			}
		}
		if(field) {
			messageAddArgument(m, field);
			free(field);
		}
	}
}

static const char *
messageGetArgument(const message *m, int arg)
{
	assert(m != NULL);
	assert(arg >= 0);
	assert(arg < m->numberOfArguments);

	return (m->mimeArguments[arg]) ? m->mimeArguments[arg] : "";
}

/*
 * Find a MIME variable from the header and return a COPY to the value of that
 * variable. The caller must free the copy
 */
char *
messageFindArgument(const message *m, const char *variable)
{
	int i;
	size_t len;

	assert(m != NULL);
	assert(variable != NULL);

	len = strlen(variable);

	for(i = 0; i < m->numberOfArguments; i++) {
		const char *ptr;

		ptr = messageGetArgument(m, i);
		if((ptr == NULL) || (*ptr == '\0'))
			continue;
#ifdef	CL_DEBUG
		cli_dbgmsg("messageFindArgument: compare %lu bytes of %s with %s\n",
			(unsigned long)len, variable, ptr);
#endif
		if(strncasecmp(ptr, variable, len) == 0) {
			ptr = &ptr[len];
			while(isspace(*ptr))
				ptr++;
			if(*ptr != '=') {
				cli_dbgmsg("messageFindArgument: no '=' sign found in MIME header '%s' (%s)\n", variable, messageGetArgument(m, i));
				return NULL;
			}
			if((*++ptr == '"') && (strchr(&ptr[1], '"') != NULL)) {
				/* Remove any quote characters */
				char *ret = cli_strdup(++ptr);
				char *p;

				if(ret == NULL)
					return NULL;

				/*
				 * fix un-quoting of boundary strings from
				 * header, occurs if boundary was given as
				 *	'boundary="_Test_";'
				 *
				 * At least two quotes in string, assume
				 * quoted argument
				 * end string at next quote
				 */
				if((p = strchr(ret, '"')) != NULL) {
					ret[strlen(ret) - 1] = '\0';
					*p = '\0';
				}
				return ret;
			}
			return cli_strdup(ptr);
		}
	}
	return NULL;
}

char *
messageGetFilename(const message *m)
{
	char *filename = (char *)messageFindArgument(m, "filename");

	if(filename)
		return filename;

	return (char *)messageFindArgument(m, "name");
}

/* Returns true or false */
static int
messageHasArgument(const message *m, const char *variable)
{
	int i;
	size_t len;

	assert(m != NULL);
	assert(variable != NULL);

	len = strlen(variable);

	for(i = 0; i < m->numberOfArguments; i++) {
		const char *ptr;

		ptr = messageGetArgument(m, i);
		if((ptr == NULL) || (*ptr == '\0'))
			continue;
#ifdef	CL_DEBUG
		cli_dbgmsg("messageHasArgument: compare %lu bytes of %s with %s\n",
			(unsigned long)len, variable, ptr);
#endif
		if(strncasecmp(ptr, variable, len) == 0) {
			ptr = &ptr[len];
			while(isspace(*ptr))
				ptr++;
			if(*ptr != '=') {
				cli_dbgmsg("messageHasArgument: no '=' sign found in MIME header '%s' (%s)\n", variable, messageGetArgument(m, i));
				return 0;
			}
			return 1;
		}
	}
	return 0;
}

int
messageHasFilename(const message *m)
{
	return messageHasArgument(m, "filename") || messageHasArgument(m, "file");
}

void
messageSetEncoding(message *m, const char *enctype)
{
	const struct encoding_map *e;
	int i;
	char *type;

	assert(m != NULL);
	assert(enctype != NULL);

	/*m->encodingType = EEXTENSION;*/

	while(isblank(*enctype))
		enctype++;

	cli_dbgmsg("messageSetEncoding: '%s'\n", enctype);

	if(strcasecmp(enctype, "8 bit") == 0) {
		cli_dbgmsg("Broken content-transfer-encoding: '8 bit' changed to '8bit'\n");
		enctype = "8bit";
	}

	/*
	 * Iterate through
	 *	Content-Transfer-Encoding: base64 binary
	 * cli_strtok's fieldno counts from 0
	 */
	i = 0;
	while((type = cli_strtok(enctype, i++, " \t")) != NULL) {
		int highestSimil = 0;
		const char *closest = NULL;

		for(e = encoding_map; e->string; e++) {
			int sim;
			const char lowertype = tolower(type[0]);

			if((lowertype != tolower(e->string[0])) && (lowertype != 'x'))
				/*
				 * simil is expensive, I'm yet to encounter only
				 * one example of a missent encoding when the
				 * first character was wrong, so lets assume no
				 * match to save the call.
				 *
				 * That example was quoted-printable sent as
				 * X-quoted-printable.
				 */
				continue;

			if(strcmp(e->string, "uuencode") == 0)
				/*
				 * No need to test here - fast track visa will have
				 * handled uuencoded files
				 */
				continue;

			sim = simil(type, e->string);

			if(sim == 100) {
				int j;
				encoding_type *et;

				for(j = 0; j < m->numberOfEncTypes; j++)
					if(m->encodingTypes[j] == e->type)
						break;

				if(j < m->numberOfEncTypes) {
					cli_dbgmsg("Ignoring duplicate encoding mechanism '%s'\n",
						type);
					break;
				}

				et = (encoding_type *)cli_realloc(m->encodingTypes, (m->numberOfEncTypes + 1) * sizeof(encoding_type));
				if(et == NULL)
					break;

				m->encodingTypes = et;
				m->encodingTypes[m->numberOfEncTypes++] = e->type;

				cli_dbgmsg("Encoding type %d is \"%s\"\n", m->numberOfEncTypes, type);
				break;
			} else if(sim > highestSimil) {
				closest = e->string;
				highestSimil = sim;
			}
		}

		if(e->string == NULL) {
			/*
			 * The stated encoding type is illegal, so we
			 * use a best guess of what it should be.
			 *
			 * 50% is arbitary. For example 7bi will match as
			 * 66% certain to be 7bit
			 */
			if(highestSimil >= 50) {
				cli_dbgmsg("Unknown encoding type \"%s\" - guessing as %s (%u%% certainty)\n",
					type, closest, highestSimil);
				messageSetEncoding(m, closest);
			} else {
				cli_dbgmsg("Unknown encoding type \"%s\" - if you believe this file contains a virus, submit it to www.clamav.net\n", type);
				/*
				 * Err on the side of safety, enable all
				 * decoding modules
				 */
				messageSetEncoding(m, "base64");
				messageSetEncoding(m, "quoted-printable");
			}
		}

		free(type);
	}
}

encoding_type
messageGetEncoding(const message *m)
{
	assert(m != NULL);

	if(m->numberOfEncTypes == 0)
		return NOENCODING;
	return m->encodingTypes[0];
}

int
messageAddLine(message *m, line_t *line)
{
	assert(m != NULL);

	if(m->body_first == NULL)
		m->body_last = m->body_first = (text *)cli_malloc(sizeof(text));
	else {
		m->body_last->t_next = (text *)cli_malloc(sizeof(text));
		m->body_last = m->body_last->t_next;
	}

	if(m->body_last == NULL)
		return -1;

	m->body_last->t_next = NULL;

	if(line && lineGetData(line)) {
		m->body_last->t_line = lineLink(line);

		messageIsEncoding(m);
	} else
		m->body_last->t_line = NULL;

	return 1;
}

/*
 * Add the given line to the end of the given message
 * If needed a copy of the given line is taken which the caller must free
 * Line must not be terminated by a \n
 */
int
messageAddStr(message *m, const char *data)
{
	line_t *repeat = NULL;

	assert(m != NULL);

	if(data) {
		if(*data == '\0')
			data = NULL;
		else {
			/*
			 * If it's only white space, just store one space to
			 * save memory. You must store something since it may
			 * be a header line
			 */
			int iswhite = 1;
			const char *p;

			for(p = data; *p; p++)
				if(((*p) & 0x80) || !isspace(*p)) {
					iswhite = 0;
					break;
				}
			if(iswhite) {
				/*cli_dbgmsg("messageAddStr: empty line: '%s'\n", data);*/
				data = " ";
			}
		}
	}

	if(m->body_first == NULL)
		m->body_last = m->body_first = (text *)cli_malloc(sizeof(text));
	else {
		assert(m->body_last != NULL);
		if((data == NULL) && (m->body_last->t_line == NULL))
			/*
			 * Although this would save time and RAM, some
			 * phish signatures have been built which need the
			 * blank lines
			 */
			if(messageGetMimeType(m) != TEXT)
				/* don't save two blank lines in sucession */
				return 1;

		m->body_last->t_next = (text *)cli_malloc(sizeof(text));
		if(m->body_last->t_next == NULL) {
			messageDedup(m);
			m->body_last->t_next = (text *)cli_malloc(sizeof(text));
			if(m->body_last->t_next == NULL) {
				cli_errmsg("messageAddStr: out of memory\n");
				return -1;
			}
		}

		if(data && m->body_last->t_line && (strcmp(data, lineGetData(m->body_last->t_line)) == 0))
			repeat = m->body_last->t_line;
		m->body_last = m->body_last->t_next;
	}

	if(m->body_last == NULL) {
		cli_errmsg("messageAddStr: out of memory\n");
		return -1;
	}

	m->body_last->t_next = NULL;

	if(data && *data) {
		if(repeat)
			m->body_last->t_line = lineLink(repeat);
		else {
			m->body_last->t_line = lineCreate(data);

			if(m->body_last->t_line == NULL) {
				messageDedup(m);
				m->body_last->t_line = lineCreate(data);

				if(m->body_last->t_line == NULL) {
					cli_errmsg("messageAddStr: out of memory\n");
					return -1;
				}
			}
			/* cli_chomp(m->body_last->t_text); */
			messageIsEncoding(m);
		}
	} else
		m->body_last->t_line = NULL;

	return 1;
}

/*
 * Add the given line to the start of the given message
 * A copy of the given line is taken which the caller must free
 * Line must not be terminated by a \n
 */
int
messageAddStrAtTop(message *m, const char *data)
{
	text *oldfirst;

	assert(m != NULL);

	if(m->body_first == NULL)
		return messageAddLine(m, lineCreate(data));

	oldfirst = m->body_first;
	m->body_first = (text *)cli_malloc(sizeof(text));
	if(m->body_first == NULL) {
		m->body_first = oldfirst;
		return -1;
	}

	m->body_first->t_next = oldfirst;
	m->body_first->t_line = lineCreate((data) ? data : "");

	if(m->body_first->t_line == NULL) {
		cli_errmsg("messageAddStrAtTop: out of memory\n");
		return -1;
	}
	return 1;
}

/*
 * Put the contents of the given text at the end of the current object.
 * Can be used either to move a text object into a message, or to move a
 * message's text into another message only moving from a given offset.
 * The given text emptied; it can be used again if needed, though be warned that
 * it will have an empty line at the start.
 * Returns 0 for failure, 1 for success
 */
int
messageMoveText(message *m, text *t, message *old_message)
{
	int rc;

	if(m->body_first == NULL) {
		if(old_message) {
			text *u;
			/*
			 * t is within old_message which is about to be
			 * destroyed
			 */
			assert(old_message->body_first != NULL);

			m->body_first = t;
			for(u = old_message->body_first; u != t;) {
				text *next;

				if(u->t_line)
					lineUnlink(u->t_line);
				next = u->t_next;

				free(u);
				u = next;

				if(u == NULL) {
					cli_dbgmsg("messageMoveText sanity check: t not within old_message\n");
					return -1;
				}
			}
			assert(old_message->body_last->t_next == NULL);

			m->body_last = old_message->body_last;
			old_message->body_first = old_message->body_last = NULL;

			/* Do any pointers need to be reset? */
			if((old_message->bounce == NULL) &&
			   (old_message->encoding == NULL) &&
			   (old_message->binhex == NULL) &&
			   (old_message->yenc == NULL))
				return 0;

			m->body_last = m->body_first;
			rc = 0;
		} else {
			m->body_last = m->body_first = textMove(NULL, t);
			if(m->body_first == NULL)
				rc = -1;
			else
				rc = 0;
		}
	} else {
		m->body_last = textMove(m->body_last, t);
		if(m->body_last == NULL) {
			rc = -1;
			m->body_last = m->body_first;
		} else
			rc = 0;
	}

	while(m->body_last->t_next) {
		m->body_last = m->body_last->t_next;
		if(m->body_last->t_line)
			messageIsEncoding(m);
	}

	return rc;
}

/*
 * See if the last line marks the start of a non MIME inclusion that
 * will need to be scanned
 */
static void
messageIsEncoding(message *m)
{
	static const char encoding[] = "Content-Transfer-Encoding";
	static const char binhex[] = "(This file must be converted with BinHex 4.0)";
	const char *line = lineGetData(m->body_last->t_line);

	/*if(m->ctx == NULL)
		cli_dbgmsg("messageIsEncoding, ctx == NULL\n");*/

	if((m->encoding == NULL) &&
	   (strncasecmp(line, encoding, sizeof(encoding) - 1) == 0) &&
	   (strstr(line, "7bit") == NULL))
		m->encoding = m->body_last;
	else if((m->bounce == NULL) && m->ctx &&
		(strncasecmp(line, "Received: ", 10) == 0) &&
		(cli_filetype((const unsigned char *)line, strlen(line), m->ctx->engine) == CL_TYPE_MAIL))
			m->bounce = m->body_last;
		/* Not needed with fast track visa technology */
	/*else if((m->uuencode == NULL) && isuuencodebegin(line))
		m->uuencode = m->body_last;*/
	else if((m->binhex == NULL) &&
		strstr(line, "BinHex") &&
		(simil(line, binhex) > 90))
			/*
			 * Look for close matches for BinHex, but
			 * simil() is expensive so only do it if it's
			 * likely to be found
			 */
			m->binhex = m->body_last;
	else if((m->yenc == NULL) && (strncmp(line, "=ybegin line=", 13) == 0))
		m->yenc = m->body_last;
}

/*
 * Returns a pointer to the body of the message. Note that it does NOT return
 * a copy of the data
 */
text *
messageGetBody(message *m)
{
	assert(m != NULL);
	return m->body_first;
}

/*
 * Export a message using the given export routines
 *
 * TODO: It really should export into an array, one
 * for each encoding algorithm. However, what it does is it returns the
 * last item that was exported. That's sufficient for now.
 */
static void *
messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy)(void *), void (*setFilename)(void *, const char *, const char *), void (*addData)(void *, const unsigned char *, size_t), void *(*exportText)(text *, void *, int), void(*setCTX)(void *, cli_ctx *), int destroy_text)
{
	void *ret;
	text *t_line;
	char *filename;
	int i;

	assert(m != NULL);

	if(messageGetBody(m) == NULL)
		return NULL;

	ret = (*create)();

	if(ret == NULL)
		return NULL;

	cli_dbgmsg("messageExport: numberOfEncTypes == %d\n", m->numberOfEncTypes);

	if((t_line = binhexBegin(m)) != NULL) {
		unsigned char byte;
		unsigned long newlen = 0L, len, dataforklen, resourceforklen, l;
		unsigned char *data;
		char *ptr;
		int bytenumber;
		blob *tmp;

		/*
		 * HQX conversion table - illegal chars are 0xff
		 */
		const unsigned char hqxtbl[] = {
			     /*   00   01   02   03   04   05   06   07   08   09   0a   0b   0c   0d   0e   0f */
		/* 00-0f */	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
		/* 10-1f */	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
		/* 20-2f */	0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0xff,0xff,
		/* 30-3f */	0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0xff,0x14,0x15,0xff,0xff,0xff,0xff,0xff,0xff,
		/* 40-4f */	0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0xff,
		/* 50-5f */	0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0xff,0x2c,0x2d,0x2e,0x2f,0xff,0xff,0xff,0xff,
		/* 60-6f */	0x30,0x31,0x32,0x33,0x34,0x35,0x36,0xff,0x37,0x38,0x39,0x3a,0x3b,0x3c,0xff,0xff,
		/* 70-7f */	0x3d,0x3e,0x3f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
		};

		cli_dbgmsg("messageExport: decode binhex\n");
		/*
		 * Decode BinHex4. First create a temporary blob which contains
		 * the encoded message. Then decode that blob to the target
		 * blob, free the temporary blob and return the target one
		 *
		 * FIXME: EICAR isn't detected: should create 3 files in fork
		 *	format: .info, .data and .rsrc. This is needed for
		 *	position dependant detection such as EICAR
		 *
		 * See RFC1741
		 */
		while(((t_line = t_line->t_next) != NULL) &&
		      (t_line->t_line == NULL))
			;

		tmp = textToBlob(t_line, NULL,
			((m->numberOfEncTypes == 1) && (m->encodingTypes[0] == BINHEX)) ? destroy_text : 0);

		if(tmp == NULL) {
			/*
			 * FIXME: We've probably run out of memory during the
			 * text to blob.
			 */
			cli_warnmsg("Couldn't start binhex parser\n");
			(*destroy)(ret);
			return NULL;
		}

		data = blobGetData(tmp);

		if(data == NULL) {
			cli_warnmsg("Couldn't locate the binhex message that was claimed to be there\n");
			blobDestroy(tmp);
			(*destroy)(ret);
			return NULL;
		}
		len = blobGetDataSize(tmp);

		if(data[0] == ':') {
			unsigned char *uptr;
			/* 7 bit (ala RFC1741) */

			/*
			 * FIXME: this is dirty code, modification of the
			 * contents of a member of the blob object should be
			 * done through blob.c
			 *
			 * Convert 7 bit data into 8 bit
			 */
			cli_dbgmsg("decode HQX7 message (%lu bytes)\n", len);

			uptr = cli_malloc(len);
			if(uptr == NULL) {
				blobDestroy(tmp);
				(*destroy)(ret);
				return NULL;
			}
			memcpy(uptr, data, len);
			bytenumber = 0;

			/*
			 * uptr now contains the encoded (7bit) data - len bytes long
			 * data will contain the unencoded (8bit) data
			 */
			for(l = 1; l < len; l++) {
				unsigned char c = uptr[l];

				if(c == ':')
					break;

				if((c == '\n') || (c == '\r'))
					continue;

				if((c < 0x20) || (c > 0x7f) || (hqxtbl[c] == 0xff)) {
					cli_dbgmsg("Invalid HQX7 character '%c' (0x%02x)\n", c, c);
					break;
				}
				c = hqxtbl[c];
				assert(c <= 63);

				/*
				 * These masks probably aren't needed, but
				 * they're here to verify the code is correct
				 */
				switch(bytenumber) {
					case 0:
						data[newlen] = (c << 2) & 0xFC;
						bytenumber = 1;
						break;
					case 1:
						data[newlen++] |= (c >> 4) & 0x3;
						data[newlen] = (c << 4) & 0xF0;
						bytenumber = 2;
						break;
					case 2:
						data[newlen++] |= (c >> 2) & 0xF;
						data[newlen] = (c << 6) & 0xC0;
						bytenumber = 3;
						break;
					case 3:
						data[newlen++] |= c & 0x3F;
						bytenumber = 0;
						break;
				}
			}

			cli_dbgmsg("decoded HQX7 message (now %lu bytes)\n", newlen);

			/*
			 * Throw away the old encoded (7bit) data
			 * data now points to the encoded (8bit) data - newlen bytes
			 *
			 * The data array may contain repetitive characters
			 */
			free(uptr);
		} else {
			cli_warnmsg("HQX8 messages not yet supported, extraction may fail - if you believe this file contains a virus, submit it to www.clamav.net\n");
			newlen = len;
		}

		/*
		 * Uncompress repetitive characters
		 */
		if(memchr(data, 0x90, newlen)) {
			blob *u = blobCreate();	/* uncompressed data */

			if(u == NULL) {
				(*destroy)(ret);
				blobDestroy(tmp);
				return NULL;
			}
			/*
			 * Includes compression
			 */
			for(l = 0L; l < newlen; l++) {
				unsigned char c = data[l];

				/*
				 * TODO: handle the case where the first byte
				 * is 0x90
				 */
				blobAddData(u, &c, 1);

				if((l < (newlen - 1L)) && (data[l + 1] == 0x90)) {
					int count;

					l += 2;
					count = data[l];

					if(count == 0) {
						c = 0x90;
						blobAddData(u, &c, 1);
					} else {
#ifdef	CL_DEBUG
						cli_dbgmsg("uncompress HQX7 at 0x%06lu: %d repetitive bytes\n", l, count);
#endif
						blobGrow(u, count);
						while(--count > 0)
							blobAddData(u, &c, 1);
					}
				}
			}
			blobDestroy(tmp);
			tmp = u;
			data = blobGetData(tmp);
			len = blobGetDataSize(tmp);
			cli_dbgmsg("Uncompressed %lu bytes to %lu\n", newlen, len);
		} else {
			len = newlen;
			cli_dbgmsg("HQX7 message (%lu bytes) is not compressed\n",
				len);
		}
		if(len == 0) {
			cli_dbgmsg("Discarding empty binHex attachment\n");
			(*destroy)(ret);
			blobDestroy(tmp);
			return NULL;
		}

		/*
		 * The blob tmp now contains the uncompressed data
		 * of len bytes, i.e. the repetitive bytes have been removed
		 */

		/*
		 * Parse the header
		 *
		 * TODO: set filename argument in message as well
		 */
		byte = data[0];
		if(byte >= len) {
			(*destroy)(ret);
			blobDestroy(tmp);
			return NULL;
		}
		filename = cli_malloc(byte + 1);
		if(filename == NULL) {
			(*destroy)(ret);
			blobDestroy(tmp);
			return NULL;
		}
		memcpy(filename, &data[1], byte);
		filename[byte] = '\0';
		(*setFilename)(ret, dir, filename);
		/*ptr = cli_malloc(strlen(filename) + 6);*/
		ptr = cli_malloc(byte + 6);
		if(ptr) {
			sprintf(ptr, "name=%s", filename);
			messageAddArgument(m, ptr);
			free(ptr);
		}

		/*
		 * skip over length, filename, version, type, creator and flags
		 */
		byte = 1 + byte + 1 + 4 + 4 + 2;

		/*
		 * Set len to be the data fork length
		 */
		dataforklen = ((data[byte] << 24) & 0xFF000000) |
			((data[byte + 1] << 16) & 0xFF0000) |
			((data[byte + 2] << 8) & 0xFF00) |
			(data[byte + 3] & 0xFF);

		resourceforklen = ((data[byte + 4] << 24) & 0xFF000000) |
			((data[byte + 5] << 16) & 0xFF0000) |
			((data[byte + 6] << 8) & 0xFF00) |
			(data[byte + 7] & 0xFF);

		cli_dbgmsg("Filename = '%s', data fork length = %lu, resource fork length = %lu bytes\n",
			filename, dataforklen, resourceforklen);

		free((char *)filename);

		/*
		 * Skip over data fork length, resource fork length and CRC
		 */
		byte += 10;

		l = blobGetDataSize(tmp) - byte;

		if(l < dataforklen) {
			cli_dbgmsg("Corrupt BinHex file, claims it is %lu bytes long in a message of %lu bytes\n",
				dataforklen, l);
			dataforklen = l;
		}
		if(setCTX && m->ctx)
			(*setCTX)(ret, m->ctx);

		(*addData)(ret, &data[byte], dataforklen);

		blobDestroy(tmp);

		if(destroy_text)
			m->binhex = NULL;

		if((m->numberOfEncTypes == 0) ||
		   ((m->numberOfEncTypes == 1) && (m->encodingTypes[0] == BINHEX))) {
			cli_dbgmsg("Finished exporting binhex file\n");
			return ret;
		}
	}

	if(m->numberOfEncTypes == 0) {
		/*
		 * Fast copy
		 */
		cli_dbgmsg("messageExport: Entering fast copy mode\n");

#if	0
		filename = messageGetFilename(m);

		if(filename == NULL) {
			cli_dbgmsg("Unencoded attachment sent with no filename\n");
			messageAddArgument(m, "name=attachment");
		} else if((strcmp(filename, "textportion") != 0) && (strcmp(filename, "mixedtextportion") != 0))
			/*
			 * Some virus attachments don't say how they've
			 * been encoded. We assume base64
			 */
			messageSetEncoding(m, "base64");
#else
		filename = (char *)messageFindArgument(m, "filename");
		if(filename == NULL) {
			filename = (char *)messageFindArgument(m, "name");

			if(filename == NULL) {
				cli_dbgmsg("Unencoded attachment sent with no filename\n");
				messageAddArgument(m, "name=attachment");
			} else
				/*
				 * Some virus attachments don't say how they've
				 * been encoded. We assume base64
				 */
				messageSetEncoding(m, "base64");
		}
#endif

		(*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");

		if(filename)
			free((char *)filename);

		if(m->numberOfEncTypes == 0)
			return exportText(messageGetBody(m), ret, destroy_text);
	}

	if(setCTX && m->ctx)
		(*setCTX)(ret, m->ctx);

	for(i = 0; i < m->numberOfEncTypes; i++) {
		encoding_type enctype = m->encodingTypes[i];
		size_t size;

		if(i > 0) {
			void *newret;

			newret = (*create)();
			if(newret == NULL) {
				cli_dbgmsg("Not all decoding algorithms were run\n");
				return ret;
			}
			(*destroy)(ret);
			ret = newret;
		}
		cli_dbgmsg("messageExport: enctype %d is %d\n", i, (int)enctype);
		/*
		 * Find the filename to decode
		 */
		if(((enctype == YENCODE) || (i == 0)) && yEncBegin(m)) {
			const char *f;

			/*
			 * TODO: handle multipart yEnc encoded files
			 */
			t_line = yEncBegin(m);
			f = lineGetData(t_line->t_line);

			if((filename = strstr(f, " name=")) != NULL) {
				filename = cli_strdup(&filename[6]);
				if(filename) {
					cli_chomp(filename);
					strstrip(filename);
					cli_dbgmsg("Set yEnc filename to \"%s\"\n", filename);
				}
			}

			(*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");
			if(filename) {
				free((char *)filename);
				filename = NULL;
			}
			t_line = t_line->t_next;
			enctype = YENCODE;
			m->yenc = NULL;
		} else {
			if(enctype == UUENCODE) {
				/*
				 * The body will have been stripped out by the
				 * fast track visa system. Treat as plain/text,
				 * which means we'll still scan for funnies
				 * outside of the uuencoded portion.
				 */
				cli_dbgmsg("messageExport: treat uuencode as text/plain\n");
				enctype = m->encodingTypes[i] = NOENCODING;
			}
			filename = messageGetFilename(m);

			if(filename == NULL) {
				cli_dbgmsg("Attachment sent with no filename\n");
				messageAddArgument(m, "name=attachment");
			} else if(enctype == NOENCODING)
				/*
				 * Some virus attachments don't say how
				 * they've been encoded. We assume
				 * base64.
				 *
				 * FIXME: don't do this if it's a fall
				 * through from uuencode
				 */
				messageSetEncoding(m, "base64");

			(*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");

			t_line = messageGetBody(m);
		}

		if(filename)
			free((char *)filename);

		/*
		 * t_line should now point to the first (encoded) line of the
		 * message
		 */
		if(t_line == NULL) {
			cli_dbgmsg("Empty attachment not saved\n");
			(*destroy)(ret);
			return NULL;
		}

		if(enctype == NOENCODING) {
			/*
			 * Fast copy
			 */
			if(i == m->numberOfEncTypes - 1) {
				/* last one */
				(void)exportText(t_line, ret, destroy_text);
				break;
			}
			(void)exportText(t_line, ret, 0);
			continue;
		}

		size = 0;
		do {
			unsigned char smallbuf[1024];
			unsigned char *uptr, *data;
			const char *line = lineGetData(t_line->t_line);
			unsigned char *bigbuf;
			size_t datasize;

			if(enctype == YENCODE) {
				if(line == NULL)
					continue;
				if(strncmp(line, "=yend ", 6) == 0)
					break;
			}

			/*
			 * Add two bytes for '\n' and '\0'
			 */
			datasize = (line) ? strlen(line) + 2 : 0;

			if(datasize >= sizeof(smallbuf))
				data = bigbuf = (unsigned char *)cli_malloc(datasize);
			else {
				bigbuf = NULL;
				data = smallbuf;
				datasize = sizeof(smallbuf);
			}

			uptr = decodeLine(m, enctype, line, data, datasize);
			if(uptr == NULL) {
				if(data == bigbuf)
					free(data);
				break;
			}

			if(uptr != data) {
				assert((size_t)(uptr - data) < datasize);
				(*addData)(ret, data, (size_t)(uptr - data));
				size += (size_t)(uptr - data);
			}

			if(data == bigbuf)
				free(data);

			/*
			 * According to RFC2045, '=' is used to pad out
			 * the last byte and should be used as evidence
			 * of the end of the data. Some mail clients
			 * annoyingly then put plain text after the '='
			 * byte and viruses exploit this bug. Sigh
			 */
			/*if(enctype == BASE64)
				if(strchr(line, '='))
					break;*/
			if(line && destroy_text && (i == m->numberOfEncTypes - 1)) {
				lineUnlink(t_line->t_line);
				t_line->t_line = NULL;
			}
		} while((t_line = t_line->t_next) != NULL);

		cli_dbgmsg("Exported %lu bytes using enctype %d\n",
			(unsigned long)size, (int)enctype);

		/* Verify we have nothing left to flush out */
		if(m->base64chars) {
			unsigned char data[4];
			unsigned char *ptr;

			ptr = base64Flush(m, data);
			if(ptr)
				(*addData)(ret, data, (size_t)(ptr - data));
		}
	}

	return ret;
}

unsigned char *
base64Flush(message *m, unsigned char *buf)
{
	cli_dbgmsg("%d trailing bytes to export\n", m->base64chars);

	if(m->base64chars) {
		unsigned char *ret = decode(m, NULL, buf, base64, FALSE);

		m->base64chars = 0;

		return ret;
	}
	return NULL;
}

int messageSavePartial(message *m, const char *dir, const char *md5id, unsigned part)
{
	char fullname[1024];
	fileblob *fb;
	unsigned long time_val;

	cli_dbgmsg("messageSavePartial\n");
	time_val  = time(NULL);
	snprintf(fullname, 1024, "%s/clamav-partial-%lu_%s-%u", dir, time_val, md5id, part);

	fb = messageExport(m, fullname,
		(void *(*)(void))fileblobCreate,
		(void(*)(void *))fileblobDestroy,
		(void(*)(void *, const char *, const char *))fileblobPartialSet,
		(void(*)(void *, const unsigned char *, size_t))fileblobAddData,
		(void *(*)(text *, void *, int))textToFileblob,
		(void(*)(void *, cli_ctx *))fileblobSetCTX,
		0);
	if(!fb)
		return CL_EFORMAT;
	fileblobDestroy(fb);
	return CL_SUCCESS;
}

/*
 * Decode and transfer the contents of the message into a fileblob
 * The caller must free the returned fileblob
 */
fileblob *
messageToFileblob(message *m, const char *dir, int destroy)
{
	fileblob *fb;

	cli_dbgmsg("messageToFileblob\n");
	fb = messageExport(m, dir,
		(void *(*)(void))fileblobCreate,
		(void(*)(void *))fileblobDestroy,
		(void(*)(void *, const char *, const char *))fileblobSetFilename,
		(void(*)(void *, const unsigned char *, size_t))fileblobAddData,
		(void *(*)(text *, void *, int))textToFileblob,
		(void(*)(void *, cli_ctx *))fileblobSetCTX,
		destroy);
	if(destroy && m->body_first) {
		textDestroy(m->body_first);
		m->body_first = m->body_last = NULL;
	}
	return fb;
}

/*
 * Decode and transfer the contents of the message into a closed blob
 * The caller must free the returned blob
 */
blob *
messageToBlob(message *m, int destroy)
{
	blob *b;

	cli_dbgmsg("messageToBlob\n");

	b = messageExport(m, NULL,
		(void *(*)(void))blobCreate,
		(void(*)(void *))blobDestroy,
		(void(*)(void *, const char *, const char *))blobSetFilename,
		(void(*)(void *, const unsigned char *, size_t))blobAddData,
		(void *(*)(text *, void *, int))textToBlob,
		(void(*)(void *, cli_ctx *))NULL,
		destroy);

	if(destroy && m->body_first) {
		textDestroy(m->body_first);
		m->body_first = m->body_last = NULL;
	}
	return b;
}

/*
 * Decode and transfer the contents of the message into a text area
 * The caller must free the returned text
 */
text *
messageToText(message *m)
{
	int i;
	text *first = NULL, *last = NULL;
	const text *t_line;

	assert(m != NULL);

	if(m->numberOfEncTypes == 0) {
		/*
		 * Fast copy
		 */
		for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
			if(first == NULL)
				first = last = cli_malloc(sizeof(text));
			else {
				last->t_next = cli_malloc(sizeof(text));
				last = last->t_next;
			}

			if(last == NULL) {
				if(first)
					textDestroy(first);
				return NULL;
			}
			if(t_line->t_line)
				last->t_line = lineLink(t_line->t_line);
			else
				last->t_line = NULL;	/* empty line */
		}
		if(last)
			last->t_next = NULL;

		return first;
	}
	/*
	 * Scan over the data a number of times once for each claimed encoding
	 * type
	 */
	for(i = 0; i < m->numberOfEncTypes; i++) {
		const encoding_type enctype = m->encodingTypes[i];

		cli_dbgmsg("messageToText: export transfer method %d = %d\n",
			i, (int)enctype);

		switch(enctype) {
			case NOENCODING:
			case BINARY:
			case EIGHTBIT:
				/*
				 * Fast copy
				 */
				for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
					if(first == NULL)
						first = last = cli_malloc(sizeof(text));
					else if (last) {
						last->t_next = cli_malloc(sizeof(text));
						last = last->t_next;
					}

					if(last == NULL) {
						if(first) {
							textDestroy(first);
						}
						return NULL;
					}
					if(t_line->t_line)
						last->t_line = lineLink(t_line->t_line);
					else
						last->t_line = NULL;	/* empty line */
				}
				continue;
			case UUENCODE:
				cli_warnmsg("messageToText: Unexpected attempt to handle uuencoded file\n");
				if(first) {
					if(last)
						last->t_next = NULL;
					textDestroy(first);
				}
				return NULL;
			case YENCODE:
				t_line = yEncBegin(m);

				if(t_line == NULL) {
					/*cli_warnmsg("YENCODED attachment is missing begin statement\n");*/
					if(first) {
						if(last)
							last->t_next = NULL;
						textDestroy(first);
					}
					return NULL;
				}
				t_line = t_line->t_next;
			default:
				if((i == 0) && binhexBegin(m))
					cli_warnmsg("Binhex messages not supported yet.\n");
				t_line = messageGetBody(m);
		}

		for(; t_line; t_line = t_line->t_next) {
			unsigned char data[1024];
			unsigned char *uptr;
			const char *line = lineGetData(t_line->t_line);

			if(enctype == BASE64)
				/*
				 * ignore blanks - breaks RFC which is
				 * probably the point!
				 */
				if(line == NULL)
					continue;

			assert((line == NULL) || (strlen(line) <= sizeof(data)));

			uptr = decodeLine(m, enctype, line, data, sizeof(data));

			if(uptr == NULL)
				break;

			assert(uptr <= &data[sizeof(data)]);

			if(first == NULL)
				first = last = cli_malloc(sizeof(text));
			else if (last) {
				last->t_next = cli_malloc(sizeof(text));
				last = last->t_next;
			}

			if(last == NULL)
				break;

			/*
			 * If the decoded line is the same as the encoded
			 * there's no need to take a copy, just link it.
			 * Note that the comparison is done without the
			 * trailing newline that the decoding routine may have
			 * added - that's why there's a strncmp rather than a
			 * strcmp - that'd be bad for MIME decoders, but is OK
			 * for AV software
			 */
			if((data[0] == '\n') || (data[0] == '\0'))
				last->t_line = NULL;
			else if(line && (strncmp((const char *)data, line, strlen(line)) == 0)) {
#ifdef	CL_DEBUG
				cli_dbgmsg("messageToText: decoded line is the same(%s)\n", data);
#endif
				last->t_line = lineLink(t_line->t_line);
			} else
				last->t_line = lineCreate((char *)data);

			if(line && enctype == BASE64)
				if(strchr(line, '='))
					break;
		}
		if(m->base64chars) {
			unsigned char data[4];

			memset(data, '\0', sizeof(data));
			if(decode(m, NULL, data, base64, FALSE) && data[0]) {
				if(first == NULL)
					first = last = cli_malloc(sizeof(text));
				else if (last) {
					last->t_next = cli_malloc(sizeof(text));
					last = last->t_next;
				}

				if(last != NULL)
					last->t_line = lineCreate((char *)data);
			}
			m->base64chars = 0;
		}
	}

	if(last)
		last->t_next = NULL;

	return first;
}

text *
yEncBegin(message *m)
{
	return m->yenc;
}

/*
 * Scan to find the BINHEX message (if any)
 */
#if	0
const text *
binhexBegin(message *m)
{
	const text *t_line;

	for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next)
		if(strcasecmp(t_line->t_text, "(This file must be converted with BinHex 4.0)") == 0)
			return t_line;

	return NULL;
}
#else
text *
binhexBegin(message *m)
{
	return m->binhex;
}
#endif

/*
 * Scan to find a bounce message. There is no standard for these, not
 * even a convention, so don't expect this to be foolproof
 */
#if	0
text *
bounceBegin(message *m)
{
	const text *t_line;

	for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next)
		if(cli_filetype(t_line->t_text, strlen(t_line->t_text)) == CL_TYPE_MAIL)
			return t_line;

	return NULL;
}
#else
text *
bounceBegin(message *m)
{
	return m->bounce;
}
#endif

/*
 * If a message doesn't not contain another message which could be harmful
 * it is deemed to be safe.
 *
 * TODO: ensure nothing can get through this
 *
 * TODO: check to see if we need to
 * find anything else, perhaps anything
 * from the RFC821 table?
 */
#if	0
int
messageIsAllText(const message *m)
{
	const text *t;

	for(t = messageGetBody(m); t; t = t->t_next)
		if(strncasecmp(t->t_text,
			"Content-Transfer-Encoding",
			strlen("Content-Transfer-Encoding")) == 0)
				return 0;

	return 1;
}
#else
text *
encodingLine(message *m)
{
	return m->encoding;
}
#endif

/*
 * Decode a line and add it to a buffer, return the end of the buffer
 * to help appending callers. There is no new line at the end of "line"
 *
 * len is sizeof(ptr)
 */
unsigned char *
decodeLine(message *m, encoding_type et, const char *line, unsigned char *buf, size_t buflen)
{
	size_t len, reallen;
	bool softbreak;
	char *p2, *copy;
	char base64buf[RFC2045LENGTH + 1];

	/*cli_dbgmsg("decodeLine(et = %d buflen = %u)\n", (int)et, buflen);*/

	assert(m != NULL);
	assert(buf != NULL);

	switch(et) {
		case BINARY:
			/*
			 * TODO: find out what this is, encoded as binary??
			 */
			/* fall through */
		case NOENCODING:
		case EIGHTBIT:
		default:	/* unknown encoding type - try our best */
			if(line)	/* empty line? */
				buf = (unsigned char *)cli_strrcpy((char *)buf, line);
			/* Put the new line back in */
			return (unsigned char *)cli_strrcpy((char *)buf, "\n");

		case QUOTEDPRINTABLE:
			if(line == NULL) {	/* empty line */
				*buf++ = '\n';
				break;
			}

			softbreak = FALSE;
			while(buflen && *line) {
				if(*line == '=') {
					unsigned char byte;

					if((*++line == '\0') || (*line == '\n')) {
						softbreak = TRUE;
						/* soft line break */
						break;
					}

					byte = hex(*line);

					if((*++line == '\0') || (*line == '\n')) {
						/*
						 * broken e-mail, not
						 * adhering to RFC2045
						 */
						*buf++ = byte;
						break;
					}

					/*
					 * Handle messages that use a broken
					 * quoted-printable encoding of
					 * href=\"http://, instead of =3D
					 */
					if(byte != '=')
						byte = (byte << 4) | hex(*line);
					else
						line -= 2;

					*buf++ = byte;
				} else
					*buf++ = *line;
				++line;
				--buflen;
			}
			if(!softbreak)
				/* Put the new line back in */
				*buf++ = '\n';
			break;

		case BASE64:
			if(line == NULL)
				break;
			/*
			 * RFC2045 sets the maximum length to 76 bytes
			 * but many e-mail clients ignore that
			 */
			if(strlen(line) < sizeof(base64buf)) {
				strcpy(base64buf, line);
				copy = base64buf;
			} else {
				copy = cli_strdup(line);
				if(copy == NULL)
					break;
			}

			p2 = strchr(copy, '=');
			if(p2)
				*p2 = '\0';

			sanitiseBase64(copy);

			/*
			 * Klez doesn't always put "=" on the last line
			 */
			buf = decode(m, copy, buf, base64, (p2 == NULL) && ((strlen(copy) & 3) == 0));

			if(copy != base64buf)
				free(copy);
			break;

		case UUENCODE:
			assert(m->base64chars == 0);

			if((line == NULL) || (*line == '\0'))	/* empty line */
				break;
			if(strcasecmp(line, "end") == 0)
				break;
			if(isuuencodebegin(line))
				break;

			if((line[0] & 0x3F) == ' ')
				break;

			/*
			 * reallen contains the number of bytes that were
			 *	encoded
			 */
			reallen = (size_t)uudecode(*line++);
			if(reallen <= 0)
				break;
			if(reallen > 62)
				break;
			len = strlen(line);

			if((len > buflen) || (reallen > len))
				/*
				 * In practice this should never occur since
				 * the maximum length of a uuencoded line is
				 * 62 characters
				 */
				cli_dbgmsg("uudecode: buffer overflow stopped, attempting to ignore but decoding may fail\n");
			else {
				(void)decode(m, line, buf, uudecode, (len & 3) == 0);
				buf = &buf[reallen];
			}
			m->base64chars = 0;	/* this happens with broken uuencoded files */
			break;
		case YENCODE:
			if((line == NULL) || (*line == '\0'))	/* empty line */
				break;
			if(strncmp(line, "=yend ", 6) == 0)
				break;

			while(*line)
				if(*line == '=') {
					if(*++line == '\0')
						break;
					*buf++ = ((*line++ - 64) & 255);
				} else
					*buf++ = ((*line++ - 42) & 255);
			break;
	}

	*buf = '\0';
	return buf;
}

/*
 * Remove the non base64 characters such as spaces from a string. Spaces
 * shouldn't appear mid string in base64 files, but some broken mail clients
 * ignore such errors rather than discarding the mail, and virus writers
 * exploit this bug
 */
static void
sanitiseBase64(char *s)
{
	cli_dbgmsg("sanitiseBase64 '%s'\n", s);
	while(*s)
		if(base64Table[(unsigned int)(*s & 0xFF)] == 255) {
			char *p1;

			for(p1 = s; p1[0] != '\0'; p1++)
				p1[0] = p1[1];
		} else
			s++;
}

/*
 * Returns one byte after the end of the decoded data in "out"
 *
 * Update m->base64chars with the last few bytes of data that we haven't
 * decoded. After the last line is found, decode will be called with in = NULL
 * to flush these out
 */
static unsigned char *
decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast)
{
	unsigned char b1, b2, b3, b4;
	unsigned char cb1, cb2, cb3;	/* carried over from last line */

	/*cli_dbgmsg("decode %s (len %d isFast %d base64chars %d)\n", in,
		in ? strlen(in) : 0,
		isFast, m->base64chars);*/

	cb1 = cb2 = cb3 = '\0';

	switch(m->base64chars) {
		case 3:
			cb3 = m->base64_3;
			/* FALLTHROUGH */
		case 2:
			cb2 = m->base64_2;
			/* FALLTHROUGH */
		case 1:
			cb1 = m->base64_1;
			isFast = FALSE;
			break;
		default:
			assert(m->base64chars <= 3);
	}

	if(isFast)
		/* Fast decoding if not last line */
		while(*in) {
			b1 = (*decoder)(*in++);
			b2 = (*decoder)(*in++);
			b3 = (*decoder)(*in++);
			/*
			 * Put this line here to help on some compilers which
			 * can make use of some architecure's ability to
			 * multiprocess when different variables can be
			 * updated at the same time - here b3 is used in
			 * one line, b1/b2 in the next and b4 in the next after
			 * that, b3 and b4 rely on in but b1/b2 don't
			 */
			*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
			b4 = (*decoder)(*in++);
			*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
			*out++ = (b3 << 6) | (b4 & 0x3F);
		}
	else if(in == NULL) {	/* flush */
		int nbytes;

		if(m->base64chars == 0)
			return out;

		cli_dbgmsg("base64chars = %d (%c %c %c)\n", m->base64chars,
			isalnum(cb1) ? cb1 : '@',
			isalnum(cb2) ? cb2 : '@',
			isalnum(cb3) ? cb3 : '@');

		m->base64chars--;
		b1 = cb1;
		nbytes = 1;

		if(m->base64chars) {
			m->base64chars--;
			b2 = cb2;

			if(m->base64chars) {
				nbytes = 2;
				m->base64chars--;
				b3 = cb3;
				nbytes = 3;
			} else if(b2)
				nbytes = 2;
		}

		switch(nbytes) {
			case 3:
				b4 = '\0';
				/* fall through */
			case 4:
				*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
				*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
				if((nbytes == 4) || (b3&0x3))
					*out++ = (b3 << 6) | (b4 & 0x3F);
				break;
			case 2:
				*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
				if((b2 << 4) & 0xFF)
					*out++ = b2 << 4;
				break;
			case 1:
				*out++ = b1 << 2;
				break;
			default:
				assert(0);
		}
	} else while(*in) {
		int nbytes;

		if(m->base64chars) {
			m->base64chars--;
			b1 = cb1;
		} else
			b1 = (*decoder)(*in++);

		if(*in == '\0') {
			b2 = '\0';
			nbytes = 1;
		} else {
			if(m->base64chars) {
				m->base64chars--;
				b2 = cb2;
			} else
				b2 = (*decoder)(*in++);

			if(*in == '\0') {
				b3 = '\0';
				nbytes = 2;
			} else {
				if(m->base64chars) {
					m->base64chars--;
					b3 = cb3;
				} else
					b3 = (*decoder)(*in++);

				if(*in == '\0') {
					b4 = '\0';
					nbytes = 3;
				} else {
					b4 = (*decoder)(*in++);
					nbytes = 4;
				}
			}
		}

		switch(nbytes) {
			case 4:
				*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
				*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
				*out++ = (b3 << 6) | (b4 & 0x3F);
				continue;
			case 3:
				m->base64_3 = b3;
			case 2:
				m->base64_2 = b2;
			case 1:
				m->base64_1 = b1;
				m->base64chars = nbytes;
				break;
			default:
				assert(0);
		}
		break;	/* nbytes != 4 => EOL */
	}
	return out;
}

static unsigned char
hex(char c)
{
	if(isdigit(c))
		return c - '0';
	if((c >= 'A') && (c <= 'F'))
		return c - 'A' + 10;
	if((c >= 'a') && (c <= 'f'))
		return c - 'a' + 10;
	cli_dbgmsg("Illegal hex character '%c'\n", c);

	/*
	 * Some mails (notably some spam) break RFC2045 by failing to encode
	 * the '=' character
	 */
	return '=';
}

static unsigned char
base64(char c)
{
	const unsigned char ret = base64Table[(unsigned int)(c & 0xFF)];

	if(ret == 255) {
		/*cli_dbgmsg("Illegal character <%c> in base64 encoding\n", c);*/
		return 63;
	}
	return ret;
}

static unsigned char
uudecode(char c)
{
	return c - ' ';
}

/*
 * These are the only arguments we're interested in.
 * Do 'fgrep messageFindArgument *.c' if you don't believe me!
 * It's probably not good doing this since each time a new
 * messageFindArgument is added I need to remember to look here,
 * but it can save a lot of memory...
 */
static int
usefulArg(const char *arg)
{
	if((strncasecmp(arg, "name", 4) != 0) &&
	   (strncasecmp(arg, "filename", 8) != 0) &&
	   (strncasecmp(arg, "boundary", 8) != 0) &&
	   (strncasecmp(arg, "protocol", 8) != 0) &&
	   (strncasecmp(arg, "id", 2) != 0) &&
	   (strncasecmp(arg, "number", 6) != 0) &&
	   (strncasecmp(arg, "total", 5) != 0) &&
	   (strncasecmp(arg, "type", 4) != 0)) {
		cli_dbgmsg("Discarding unwanted argument '%s'\n", arg);
		return 0;
	}
	return 1;
}

void
messageSetCTX(message *m, cli_ctx *ctx)
{
	m->ctx = ctx;
}

int
messageContainsVirus(const message *m)
{
	return m->isInfected ? TRUE : FALSE;
}

/*
 * We've run out of memory. Try to recover some by
 * deduping the message
 *
 * FIXME: this can take a long time. The real solution is for system admins
 *	to refrain from setting ulimits too low, then this routine won't be
 *	called
 */
static void
messageDedup(message *m)
{
	const text *t1;
	size_t saved = 0;

	cli_dbgmsg("messageDedup\n");

	t1 = m->dedupedThisFar ? m->dedupedThisFar : m->body_first;

	for(t1 = m->body_first; t1; t1 = t1->t_next) {
		const char *d1;
		text *t2;
		line_t *l1;
		unsigned int r1;

		if(saved >= 100*1000)
			break;	/* that's enough */
		l1 = t1->t_line;
		if(l1 == NULL)
			continue;
		d1 = lineGetData(l1);
		if(strlen(d1) < 8)
			continue;	/* wouldn't recover many bytes */

		r1 = (unsigned int)lineGetRefCount(l1);
		if(r1 == 255)
			continue;
		/*
		 * We don't want to foul up any pointers
		 */
		if(t1 == m->encoding)
			continue;
		if(t1 == m->bounce)
			continue;
		if(t1 == m->binhex)
			continue;
		if(t1 == m->yenc)
			continue;

		for(t2 = t1->t_next; t2; t2 = t2->t_next) {
			const char *d2;
			line_t *l2 = t2->t_line;

			if(l2 == NULL)
				continue;
			d2 = lineGetData(l2);
			if(d1 == d2)
				/* already linked */
				continue;
			if(strcmp(d1, d2) == 0) {
				if(lineUnlink(l2) == NULL)
					saved += strlen(d1) + 1;
				t2->t_line = lineLink(l1);
				if(t2->t_line == NULL) {
					cli_errmsg("messageDedup: out of memory\n");
					return;
				}
				if(++r1 == 255)
					break;
			}
		}
	}

	cli_dbgmsg("messageDedup reclaimed %lu bytes\n", (unsigned long)saved);
	m->dedupedThisFar = t1;
}

/*
 * Handle RFC2231 encoding. Returns a malloc'd buffer that the caller must
 * free, or NULL on error.
 *
 * TODO: Currently only handles paragraph 4 of RFC2231 e.g.
 *	 protocol*=ansi-x3.4-1968''application%2Fpgp-signature;
 */
static char *
rfc2231(const char *in)
{
	const char *ptr;
	char *ret, *out;
	enum { LANGUAGE, CHARSET, CONTENTS } field;

	if(strstr(in, "*0*=") != NULL) {
		char *p;

		/* Don't handle continuations, decode what we can */
		p = ret = cli_malloc(strlen(in) + 16);
		if(ret == NULL)
			return NULL;

		do {
			switch(*in) {
				default:
					*p++ = *in++;
					continue;
				case '*':
					do
						in++;
					while((*in != '*') && *in);
					if(*in) {
						in++;
						continue;
					}
					*p = '\0';
					break;
				case '=':
					/*strcpy(p, in);*/
					strcpy(p, "=rfc2231failure");
					break;
			}
			break;
		} while(*in);

		cli_dbgmsg("RFC2231 parameter continuations are not yet handled, returning \"%s\"\n",
			ret);
		return ret;
	}

	ptr = strstr(in, "*0=");
	if(ptr != NULL)
		/*
		 * Parameter continuation, with no continuation
		 * Thunderbird 1.5 (and possibly other versions) does this
		 */
		field = CONTENTS;
	else {
		ptr = strstr(in, "*=");
		field = LANGUAGE;
	}

	if(ptr == NULL) {	/* quick return */
		out = ret = cli_strdup(in);
		while(*out)
			*out++ &= 0x7F;
		return ret;
	}

	cli_dbgmsg("rfc2231 '%s'\n", in);

	ret = cli_malloc(strlen(in) + 1);

	if(ret == NULL)
		return NULL;

	/*
	 * memcpy(out, in, (ptr - in));
	 * out = &out[ptr - in];
	 * in = ptr;
	 */
	out = ret;
	while(in != ptr)
		*out++ = *in++;

	*out++ = '=';

	while(*ptr++ != '=')
		;

	/*
	 * We don't do anything with the language and character set, just skip
	 * over them!
	 */
	while(*ptr) {
		switch(field) {
			case LANGUAGE:
				if(*ptr == '\'')
					field = CHARSET;
				break;
			case CHARSET:
				if(*ptr == '\'')
					field = CONTENTS;
				break;
			case CONTENTS:
				if(*ptr == '%') {
					unsigned char byte;

					if((*++ptr == '\0') || (*ptr == '\n'))
						break;

					byte = hex(*ptr);

					if((*++ptr == '\0') || (*ptr == '\n')) {
						*out++ = byte;
						break;
					}

					byte <<= 4;
					byte += hex(*ptr);
					*out++ = byte;
				} else
					*out++ = *ptr;
		}
		if(*ptr++ == '\0')
			/*
			 * Incorrect message that has just one character after
			 * a '%'.
			 * FIXME: stash something in out that would, for example
			 *	treat %2 as %02, assuming field == CONTENTS
			 */
			break;
	}

	if(field != CONTENTS) {
		free(ret);
		cli_dbgmsg("Invalid RFC2231 header: '%s'\n", in);
		return cli_strdup("");
	}

	*out = '\0';

	cli_dbgmsg("rfc2231 returns '%s'\n", ret);

	return ret;
}

/*
 * common/simil:
 *	From Computing Magazine 20/8/92
 * Returns %ge number from 0 to 100 - how similar are 2 strings?
 * 100 for exact match, < for error
 */
struct	pstr_list {	/* internal stack */
	char	*d1;
	struct	pstr_list	*next;
};

#define	OUT_OF_MEMORY	(-2)
#define	FAILURE	(-3)
#define	SUCCESS	(-4)
#define	ARRAY_OVERFLOW	(-5)
typedef	struct	pstr_list	ELEMENT1;
typedef	ELEMENT1		*LINK1;

static	int	push(LINK1 *top, const char *string);
static	int	pop(LINK1 *top, char *buffer);
static	unsigned	int	compare(char *ls1, char **rs1, char *ls2, char **rs2);

#define	MAX_PATTERN_SIZ	50	/* maximum string lengths */

static int
simil(const char *str1, const char *str2)
{
	LINK1 top = NULL;
	unsigned int score = 0;
	size_t common, total;
	size_t len1, len2;
	char *rs1 = NULL, *rs2 = NULL;
	char *s1, *s2;
	char ls1[MAX_PATTERN_SIZ], ls2[MAX_PATTERN_SIZ];

	if(strcasecmp(str1, str2) == 0)
		return 100;

	if((s1 = cli_strdup(str1)) == NULL)
		return OUT_OF_MEMORY;
	if((s2 = cli_strdup(str2)) == NULL) {
		free(s1);
		return OUT_OF_MEMORY;
	}

	if(((total = strstrip(s1)) > MAX_PATTERN_SIZ - 1) || ((len2 = strstrip(s2)) > MAX_PATTERN_SIZ - 1)) {
		free(s1);
		free(s2);
		return ARRAY_OVERFLOW;
	}

	total += len2;

	if((push(&top, s1) == OUT_OF_MEMORY) ||
	   (push(&top, s2) == OUT_OF_MEMORY)) {
		free(s1);
		free(s2);
		return OUT_OF_MEMORY;
	}

	while(pop(&top, ls2) == SUCCESS) {
		pop(&top, ls1);
		common = compare(ls1, &rs1, ls2, &rs2);
		if(common > 0) {
			score += (unsigned int)common;
			len1 = strlen(ls1);
			len2 = strlen(ls2);

			if((len1 > 1 && len2 >= 1) || (len2 > 1 && len1 >= 1))
				if((push(&top, ls1) == OUT_OF_MEMORY) || (push(&top, ls2) == OUT_OF_MEMORY)) {
					free(s1);
					free(s2);
					return OUT_OF_MEMORY;
				}
			len1 = strlen(rs1);
			len2 = strlen(rs2);

			if((len1 > 1 && len2 >= 1) || (len2 > 1 && len1 >= 1))
				if((push(&top, rs1) == OUT_OF_MEMORY) || (push(&top, rs2) == OUT_OF_MEMORY)) {
					free(s1);
					free(s2);
					return OUT_OF_MEMORY;
				}
		}
	}
	free(s1);
	free(s2);
	return (total > 0) ? ((score * 200) / total) : 0;
}

static unsigned int
compare(char *ls1, char **rs1, char *ls2, char **rs2)
{
	unsigned int common, maxchars = 0;
	bool some_similarity = FALSE;
	char *s1, *s2;
	char *maxs1 = NULL, *maxs2 = NULL, *maxe1 = NULL, *maxe2 = NULL;
	char *cs1, *cs2, *start1, *end1, *end2;

	end1 = ls1 + strlen(ls1);
	end2 = ls2 + strlen(ls2);
	start1 = ls1;

	for(;;) {
		s1 = start1;
		s2 = ls2;

		if(s1 < end1) {
			while(s1 < end1 && s2 < end2) {
				if(tolower(*s1) == tolower(*s2)) {
					some_similarity = TRUE;
					cs1 = s1;
					cs2 = s2;
					common = 0;
					do
						if(s1 == end1 || s2 == end2)
							break;
						else {
							s1++;
							s2++;
							common++;
						}
					while(tolower(*s1) == tolower(*s2));

					if(common > maxchars) {
						unsigned int diff = common - maxchars;
						maxchars = common;
						maxs1 = cs1;
						maxs2 = cs2;
						maxe1 = s1;
						maxe2 = s2;
						end1 -= diff;
						end2 -= diff;
					} else
						s1 -= common;
				} else
					s2++;
			}
			start1++;
		} else
			break;
	}
	if(some_similarity) {
		*maxs1 = '\0';
		*maxs2 = '\0';
		*rs1 = maxe1;
		*rs2 = maxe2;
	}
	return maxchars;
}

static int
push(LINK1 *top, const char *string)
{
	LINK1 element;

	if((element = (LINK1)cli_malloc(sizeof(ELEMENT1))) == NULL)
		return OUT_OF_MEMORY;
	if((element->d1 = cli_strdup(string)) == NULL)
		return OUT_OF_MEMORY;
	element->next = *top;
	*top = element;

	return SUCCESS;
}

static int
pop(LINK1 *top, char *buffer)
{
	LINK1 t1;

	if((t1 = *top) != NULL) {
		(void)strcpy(buffer, t1->d1);
		*top = t1->next;
		free(t1->d1);
		free((char *)t1);
		return SUCCESS;
	}
	return FAILURE;
}

/*
 * Have we found a line that is a start of a uuencoded file (see uuencode(5))?
 */
int
isuuencodebegin(const char *line)
{
	if(line[0] != 'b')	/* quick check */
		return 0;

	if(strlen(line) < 10)
		return 0;

	return (strncasecmp(line, "begin ", 6) == 0) &&
		isdigit(line[6]) && isdigit(line[7]) &&
		isdigit(line[8]) && (line[9] == ' ');
}