libclamav/message.c
b151ef55
 /*
  *  Copyright (C) 2002 Nigel Horne <njh@bandsman.co.uk>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
cec5297a
  *
  * Change History:
  * $Log: message.c,v $
1f8eb426
  * Revision 1.137  2005/01/05 21:54:05  nigelhorne
  * Fuzzy logic lookup of content-type
  *
15bfc2e4
  * Revision 1.136  2005/01/05 21:07:15  nigelhorne
  * Fix crash when looking for uuencoded attachment fails
  *
a4c3d0a3
  * Revision 1.135  2005/01/01 12:52:58  nigelhorne
  * Some uuencoded viruses were getting through
  *
23e1c37c
  * Revision 1.134  2004/12/19 23:19:54  nigelhorne
  * Tidy
  *
53ee0b60
  * Revision 1.133  2004/12/19 13:50:08  nigelhorne
  * Tidy
  *
0d252351
  * Revision 1.132  2004/12/16 15:29:51  nigelhorne
  * Tidy
  *
da850706
  * Revision 1.131  2004/12/14 16:45:43  nigelhorne
  * Backtrack quoted-printable broken fix
  *
d16754aa
  * Revision 1.130  2004/12/14 10:27:57  nigelhorne
  * Better reclaiming when running short of memory
  *
4a46b8a2
  * Revision 1.129  2004/12/10 15:20:23  nigelhorne
  * Handle empty content-type fields
  *
f0146bc6
  * Revision 1.128  2004/12/01 12:12:27  nigelhorne
  * Part of rule 3 of paragraph 5.1 of RFC1521 was not being implemented
  *
1ecd46be
  * Revision 1.127  2004/11/30 12:03:57  nigelhorne
  * Handle unbalanced quote characters in headers better
  *
321d5c00
  * Revision 1.126  2004/11/28 22:06:39  nigelhorne
  * Tidy space only headers code
  *
a78256af
  * Revision 1.125  2004/11/28 21:05:49  nigelhorne
  * Handle headers with only spaces
  *
e66e8982
  * Revision 1.124  2004/11/28 16:24:12  nigelhorne
  * Allow lowercase hex characters in quoted-printable
  *
aedb0336
  * Revision 1.123  2004/11/27 21:54:27  nigelhorne
  * Tidy
  *
ef3cf57d
  * Revision 1.122  2004/11/27 13:16:54  nigelhorne
  * uuencode failures no longer fatal
  *
2d3d847c
  * Revision 1.121  2004/11/26 16:58:52  nigelhorne
  * Tidy
  *
ce0883f6
  * Revision 1.120  2004/11/26 12:04:22  nigelhorne
  * Fix small typo
  *
0856891e
  * Revision 1.119  2004/11/22 15:18:51  nigelhorne
  * Performance work
  *
8386482b
  * Revision 1.118  2004/11/18 18:09:08  nigelhorne
  * First draft of binhex.c
  *
fef5ad63
  * Revision 1.117  2004/11/18 10:39:56  nigelhorne
  * Added binhex filetype decoding
  *
cf25aed7
  * Revision 1.116  2004/11/17 17:32:15  nigelhorne
  * Find more bounce messages
  *
1a220adb
  * Revision 1.115  2004/11/12 22:21:57  nigelhorne
  * Binxhex detection speeded up
  *
4bdd7a93
  * Revision 1.114  2004/11/12 09:03:26  nigelhorne
  * Parse some malformed binhex files
  *
ad642304
  * Revision 1.113  2004/11/11 22:15:46  nigelhorne
  * Rewrite handling of folded headers
  *
74ca33e9
  * Revision 1.112  2004/11/09 19:40:06  nigelhorne
  * Find uuencoded files in preambles to multipart messages
  *
28ea5910
  * Revision 1.111  2004/11/08 16:27:09  nigelhorne
  * Fix crash with correctly encoded uuencode files
  *
802c37fc
  * Revision 1.110  2004/11/08 10:26:22  nigelhorne
  * Fix crash if x-yencode is mistakenly guessed
  *
5e5a162c
  * Revision 1.109  2004/11/07 16:39:00  nigelhorne
  * Handle para 4 of RFC2231
  *
63f87938
  * Revision 1.108  2004/10/31 09:28:27  nigelhorne
  * Improve the handling of blank filenames
  *
90639c82
  * Revision 1.107  2004/10/24 03:51:48  nigelhorne
  * Change encoding guess from warn to debug
  *
7ea0c270
  * Revision 1.106  2004/10/22 17:18:13  nigelhorne
  * Handle encoding type us-ascii - should be none
  *
b329234a
  * Revision 1.105  2004/10/22 15:53:45  nigelhorne
  * Fuzzy logic match for unknown encoding types
  *
d17de037
  * Revision 1.104  2004/10/19 13:53:55  nigelhorne
  * Don't add trailing NUL bytes
  *
e2a46f19
  * Revision 1.103  2004/10/17 09:29:21  nigelhorne
  * Advise to report broken emails
  *
9fc8173e
  * Revision 1.102  2004/10/16 20:53:28  nigelhorne
  * Tidy up
  *
bb2432d7
  * Revision 1.101  2004/10/16 13:53:52  nigelhorne
  * Handle '8 bit' and plain/text
  *
e24738dc
  * Revision 1.100  2004/10/14 17:45:55  nigelhorne
  * Try to reclaim some memory if it becomes low when decoding
  *
c6c33c1d
  * Revision 1.99  2004/10/12 10:40:48  nigelhorne
  * Remove shadow declaration of isblank
  *
fb405afc
  * Revision 1.98  2004/10/11 10:56:17  nigelhorne
  * Reimplement squeeze ads sanisiseBase64
  *
963e073f
  * Revision 1.97  2004/10/06 17:21:46  nigelhorne
  * Code tidy
  *
9a7398ee
  * Revision 1.96  2004/10/05 15:46:18  nigelhorne
  * First draft of code to handle RFC1341
  *
5ae253d2
  * Revision 1.95  2004/10/05 10:58:00  nigelhorne
  * Table driven base64 decoding
  *
b62a19da
  * Revision 1.94  2004/10/04 12:18:08  nigelhorne
  * Better warning message about PGP attachments not being scanned
  *
5eeffbb9
  * Revision 1.93  2004/10/01 13:49:22  nigelhorne
  * Minor code tidy
  *
c1e96196
  * Revision 1.92  2004/09/30 08:58:56  nigelhorne
  * Remove empty lines
  *
290ba18f
  * Revision 1.91  2004/09/28 18:39:48  nigelhorne
  * Don't copy if the decoded == the encoded
  *
1e192f4d
  * Revision 1.90  2004/09/22 16:24:22  nigelhorne
  * Fix error return
  *
6fd27194
  * Revision 1.89  2004/09/22 16:19:13  nigelhorne
  * Fix error return
  *
0b08b624
  * Revision 1.88  2004/09/21 14:55:26  nigelhorne
  * Handle blank lines in text/plain messages
  *
137740e1
  * Revision 1.87  2004/09/20 12:44:03  nigelhorne
  * Fix parsing error on mime arguments
  *
4d9c0ca8
  * Revision 1.86  2004/09/18 14:59:26  nigelhorne
  * Code tidy
  *
00f95393
  * Revision 1.85  2004/09/17 13:47:19  nigelhorne
  * Handle yEnc attachments
  *
2bcec72b
  * Revision 1.84  2004/09/17 09:48:53  nigelhorne
  * Handle attempts to hide mime type
  *
82189c76
  * Revision 1.83  2004/09/16 15:56:45  nigelhorne
  * Handle double colons
  *
de509b8e
  * Revision 1.82  2004/09/16 14:23:57  nigelhorne
  * Handle quotes around mime type
  *
31b05bcb
  * Revision 1.81  2004/09/16 12:59:36  nigelhorne
  * Handle = and space as header separaters
  *
73175a15
  * Revision 1.80  2004/09/16 11:35:08  nigelhorne
  * Minor code tidy
  *
8e3062cd
  * Revision 1.79  2004/09/16 10:05:59  nigelhorne
  * Use default decoders
  *
0e3b08fc
  * Revision 1.78  2004/09/15 18:08:23  nigelhorne
  * Handle multiple encoding types
  *
06d4e856
  * Revision 1.77  2004/09/13 16:44:01  kojm
  * minor cleanup
  *
b4cb4486
  * Revision 1.76  2004/09/03 15:59:00  nigelhorne
  * Handle boundary= "foo"
  *
a446de17
  * Revision 1.75  2004/08/23 13:15:16  nigelhorne
  * messageClearMarkers
  *
e6b25cd3
  * Revision 1.74  2004/08/22 15:08:59  nigelhorne
  * messageExport
  *
1e06e1ab
  * Revision 1.73  2004/08/22 10:34:24  nigelhorne
  * Use fileblob
  *
de617e3e
  * Revision 1.72  2004/08/21 11:57:57  nigelhorne
  * Use line.[ch]
  *
1d117881
  * Revision 1.71  2004/08/13 09:28:16  nigelhorne
  * Remove incorrect comment style
  *
bac2c10a
  * Revision 1.70  2004/08/08 19:13:15  nigelhorne
  * Better handling of bounces
  *
d32343c3
  * Revision 1.69  2004/08/04 18:59:19  nigelhorne
  * Tidy up multipart handling
  *
4b0e970e
  * Revision 1.68  2004/07/30 11:50:39  nigelhorne
  * Code tidy
  *
82348395
  * Revision 1.67  2004/07/26 08:31:04  nigelhorne
  * Fix embedded multi parts
  *
e982ca83
  * Revision 1.66  2004/07/20 15:17:44  nigelhorne
  * Remove overlapping strcpy
  *
285a69b4
  * Revision 1.65  2004/07/20 14:35:29  nigelhorne
  * Some MYDOOM.I were getting through
  *
2fe19b26
  * Revision 1.64  2004/07/02 23:00:57  kojm
  * new method of file type detection; HTML normalisation
  *
8dc9ee9e
  * Revision 1.63  2004/06/26 13:16:25  nigelhorne
  * Added newline to end of warning message
  *
ce73653f
  * Revision 1.62  2004/06/24 21:37:26  nigelhorne
  * Handle uuencoded files created with buggy software
  *
98685ac1
  * Revision 1.61  2004/06/22 04:08:02  nigelhorne
  * Optimise empty lines
  *
8a88fb93
  * Revision 1.60  2004/06/16 08:07:39  nigelhorne
  * Added thread safety
  *
6afdc3ab
  * Revision 1.59  2004/06/02 10:11:09  nigelhorne
  * Corrupted binHex could crash on non Linux systems
  *
dad64ecb
  * Revision 1.58  2004/06/01 09:07:19  nigelhorne
  * Corrupted binHex could crash on non Linux systems
  *
db42f46e
  * Revision 1.57  2004/05/27 16:52:47  nigelhorne
  * Short binhex data could confuse things
  *
bbf43447
  * Revision 1.56  2004/05/19 10:02:25  nigelhorne
  * Default encoding for attachments set to base64
  *
0b244177
  * Revision 1.55  2004/05/10 11:24:18  nigelhorne
  * Handle bounce message false positives
  *
edb35c0a
  * Revision 1.54  2004/05/06 18:01:25  nigelhorne
  * Force attachments marked as RFC822 messages to be scanned
  *
3db105a2
  * Revision 1.53  2004/04/29 08:59:24  nigelhorne
  * Tidied up SetDispositionType
  *
3b6eace4
  * Revision 1.52  2004/04/05 12:04:56  nigelhorne
  * Scan attachments with no filename
  *
4c927f11
  * Revision 1.51  2004/04/01 15:32:34  nigelhorne
  * Graceful exit if messageAddLine fails in strdup
  *
6638be41
  * Revision 1.50  2004/03/31 17:00:20  nigelhorne
  * Code tidy up free memory earlier
  *
ffd59a3e
  * Revision 1.49  2004/03/29 09:22:03  nigelhorne
  * Tidy up code and reduce shuffling of data
  *
02c9dc2a
  * Revision 1.48  2004/03/25 22:40:46  nigelhorne
  * Removed even more calls to realloc and some duplicated code
  *
627465e7
  * Revision 1.47  2004/03/21 17:19:49  nigelhorne
  * Handle bounce messages with no headers
  *
f5a4d7e8
  * Revision 1.46  2004/03/21 09:41:27  nigelhorne
  * Faster scanning for non MIME messages
  *
20fa2f53
  * Revision 1.45  2004/03/20 19:26:48  nigelhorne
  * Second attempt to handle all bounces
  *
3e556ea8
  * Revision 1.44  2004/03/20 17:39:23  nigelhorne
  * First attempt to handle all bounces
  *
e2c0ce9c
  * Revision 1.43  2004/03/20 13:23:44  nigelhorne
  * More bounces handled
  *
ee576466
  * Revision 1.42  2004/03/19 17:38:11  nigelhorne
  * Handle binary encoding as though it had no encoding
  *
ba17c9c3
  * Revision 1.41  2004/03/19 08:08:38  nigelhorne
  * Handle '8 bit' encoding as well as the RFC '8bit'
  *
b759d5eb
  * Revision 1.40  2004/03/18 21:51:41  nigelhorne
  * If a message only contains a single RFC822 message that has no encoding don't save for scanning
  *
b8b25118
  * Revision 1.39  2004/03/18 14:05:25  nigelhorne
  * Added bounce and handle text/plain encoding messages
  *
59bef287
  * Revision 1.38  2004/03/17 19:47:32  nigelhorne
  * Handle spaces in disposition type
  *
cea95096
  * Revision 1.37  2004/03/10 05:35:03  nigelhorne
  * Implemented a couple of small speed improvements
  *
ef1c883f
  * Revision 1.36  2004/03/07 15:11:48  nigelhorne
  * Fixed minor typo in bounce message
  *
6ba88eb8
  * Revision 1.35  2004/03/07 12:32:01  nigelhorne
  * Added new bounce message
  *
72a1b31d
  * Revision 1.34  2004/02/20 17:04:43  nigelhorne
  * Added new bounce delimeter
  *
26564cf5
  * Revision 1.33  2004/02/18 10:07:40  nigelhorne
  * Find some Yaha
  *
919fc8af
  * Revision 1.32  2004/02/17 20:43:50  nigelhorne
  * Added bounce message
  *
438edd40
  * Revision 1.31  2004/02/17 09:53:56  nigelhorne
  * Added bounce message
  *
d5539026
  * Revision 1.30  2004/02/13 14:23:56  nigelhorne
  * Add a new bounce delimeter
  *
a4b10000
  * Revision 1.29  2004/02/10 17:01:30  nigelhorne
  * Recognise a new type of bounce message
  *
6b9ba2a4
  * Revision 1.28  2004/02/07 23:13:55  nigelhorne
  * Handle content-type: text/
  *
8b242bb9
  * Revision 1.27  2004/02/06 13:46:08  kojm
  * Support for clamav-config.h
  *
3e74af5d
  * Revision 1.26  2004/02/06 13:10:34  nigelhorne
  * Now integrates with winzip
  *
bb5d6279
  * Revision 1.25  2004/02/05 11:23:07  nigelhorne
  * Bounce messages are now table driven
  *
bc75e1d1
  * Revision 1.24  2004/02/04 13:29:16  nigelhorne
  * Handle blobAddData of more than 128K
  *
40ab42d9
  * Revision 1.23  2004/02/03 23:04:09  nigelhorne
  * Disabled binhex code
  *
0bf1353d
  * Revision 1.22  2004/02/03 22:54:59  nigelhorne
  * Catch another example of Worm.Dumaru.Y
  *
83ec020f
  * Revision 1.21  2004/02/03 14:35:37  nigelhorne
  * Fixed an infinite loop on binhex
  *
0bac8a3c
  * Revision 1.20  2004/02/02 17:10:04  nigelhorne
  * Scan a rare form of bounce message
  *
1058c39e
  * Revision 1.19  2004/02/02 15:52:09  nigelhorne
  * Remove handling of 8bit binhex files for now
  *
3fbd1711
  * Revision 1.18  2004/02/02 15:30:54  nigelhorne
  * Remove handling of 8bit binhex files for now
  *
1068321e
  * Revision 1.17  2004/02/02 14:01:58  nigelhorne
  * Carefully crafted binhex messages could have caused a crash
  *
5a01973c
  * Revision 1.16  2004/01/28 10:15:24  nigelhorne
  * Added support to scan some bounce messages
  *
6a91c55b
  * Revision 1.15  2004/01/14 10:08:45  nigelhorne
  * blobGetData now allows contents to be changed - tuttut
  *
f14906ba
  * Revision 1.14  2004/01/10 13:01:19  nigelhorne
  * Added BinHex compression support
  *
a42dba7d
  * Revision 1.13  2004/01/09 18:01:03  nigelhorne
  * Started BinHex work
  *
dd8a7e90
  * Revision 1.12  2003/12/05 09:34:00  nigelhorne
  * Use cli_tok instead of strtok - replaced now by cli_strtok
  *
eaacc2de
  * Revision 1.11  2003/11/17 07:57:12  nigelhorne
  * Prevent buffer overflow in broken uuencoded files
  *
8b04b4f6
  * Revision 1.10  2003/11/05 07:03:51  nigelhorne
  * Handle broken content-disposition
  *
4674dc9a
  * Revision 1.9  2003/10/01 09:28:23  nigelhorne
  * Handle content-type header going over to a new line
  *
cec5297a
  * Revision 1.8  2003/09/28 10:07:08  nigelhorne
  * uuencodebegin() no longer static
  *
b151ef55
  */
1f8eb426
 static	char	const	rcsid[] = "$Id: message.c,v 1.137 2005/01/05 21:54:05 nigelhorne Exp $";
8b242bb9
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
b151ef55
 
 #ifndef	CL_DEBUG
0b08b624
 #define	NDEBUG	/* map CLAMAV debug onto standard */
b151ef55
 #endif
 
 #ifdef CL_THREAD_SAFE
dd8a7e90
 #ifndef	_REENTRANT
b151ef55
 #define	_REENTRANT	/* for Solaris 2.8 */
 #endif
dd8a7e90
 #endif
b151ef55
 
f0146bc6
 #ifdef	C_DARWIN
b151ef55
 #include <sys/types.h>
 #endif
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
 #include <assert.h>
 #include <ctype.h>
 #include <stdio.h>
 
8a88fb93
 #ifdef	CL_THREAD_SAFE
 #include <pthread.h>
 #endif
 
de617e3e
 #include "line.h"
b151ef55
 #include "mbox.h"
dad64ecb
 #include "table.h"
b151ef55
 #include "blob.h"
 #include "text.h"
 #include "strrcpy.h"
 #include "others.h"
dd8a7e90
 #include "str.h"
2fe19b26
 #include "filetypes.h"
b151ef55
 
 /* required for AIX and Tru64 */
 #ifdef TRUE
 #undef TRUE
 #endif
 #ifdef FALSE
 #undef FALSE
 #endif
 
53ee0b60
 #define	RFC2045LENGTH	76	/* maximum number of characters on a line */
 
bf8ea488
 typedef enum { FALSE = 0, TRUE = 1 } bool;
b151ef55
 
de617e3e
 static	void	messageIsEncoding(message *m);
0e3b08fc
 static	unsigned char	*decodeLine(message *m, encoding_type enctype, const char *line, unsigned char *buf, size_t buflen);
285a69b4
 static unsigned char *decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast);
fb405afc
 static	void	sanitiseBase64(char *s);
b151ef55
 static	unsigned	char	hex(char c);
 static	unsigned	char	base64(char c);
 static	unsigned	char	uudecode(char c);
 static	const	char	*messageGetArgument(const message *m, int arg);
a446de17
 static	void	*messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy)(void *), void (*setFilename)(void *, const char *, const char *), void (*addData)(void *, const unsigned char *, size_t), void *(*exportText)(const text *, void *));
b4cb4486
 static	int	usefulArg(const char *arg);
e24738dc
 static	void	messageDedup(message *m);
5e5a162c
 static	char	*rfc2231(const char *in);
b329234a
 static	int	simil(const char *str1, const char *str2);
b151ef55
 
 /*
  * These maps are ordered in decreasing likelyhood of their appearance
0e3b08fc
  * in an e-mail. Probably these should be in a table...
b151ef55
  */
 static	const	struct	encoding_map {
 	const	char	*string;
 	encoding_type	type;
da850706
 } encoding_map[] = {	/* rfc2045 */
b151ef55
 	{	"7bit",			NOENCODING	},
b759d5eb
 	{	"text/plain",		NOENCODING	},
da850706
 	{	"quoted-printable",	QUOTEDPRINTABLE	},	/* rfc2045 */
98685ac1
 	{	"base64",		BASE64		},	/* rfc2045 */
b151ef55
 	{	"8bit",			EIGHTBIT	},
bb2432d7
 	{	"binary",		BINARY		},
b151ef55
 	{	"x-uuencode",		UUENCODE	},
00f95393
 	{	"x-yencode",		YENCODE		},
fef5ad63
 	{	"x-binhex",		BINHEX		},
7ea0c270
 	{	"us-ascii",		NOENCODING	},	/* incorrect */
ef3cf57d
 	{	"x-uue",		UUENCODE	},	/* incorrect */
a4c3d0a3
 	{	"uuencode",		UUENCODE	},	/* incorrect */
6ba88eb8
 	{	NULL,			NOENCODING	}
b151ef55
 };
 
 static	struct	mime_map {
 	const	char	*string;
 	mime_type	type;
 } mime_map[] = {
 	{	"text",			TEXT		},
 	{	"multipart",		MULTIPART	},
 	{	"application",		APPLICATION	},
 	{	"audio",		AUDIO		},
 	{	"image",		IMAGE		},
 	{	"message",		MESSAGE		},
 	{	"video",		VIDEO		},
6ba88eb8
 	{	NULL,			TEXT		}
b151ef55
 };
 
5ae253d2
 #define	USE_TABLE	/* table driven base64 decoder */
 
 #ifdef	USE_TABLE
d17de037
 static const unsigned char base64Table[256] = {
5ae253d2
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,62,255,255,255,63,
 	52,53,54,55,56,57,58,59,60,61,255,255,255,0,255,255,
 	255,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
 	15,16,17,18,19,20,21,22,23,24,25,255,255,255,255,255,
 	255,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
 	41,42,43,44,45,46,47,48,49,50,51,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
 };
 #endif
 
b151ef55
 message *
 messageCreate(void)
 {
 	message *m = (message *)cli_calloc(1, sizeof(message));
 
0e3b08fc
 	if(m)
bbf43447
 		m->mimeType = NOMIME;
b151ef55
 
 	return m;
 }
 
 void
 messageDestroy(message *m)
 {
4d9c0ca8
 	assert(m != NULL);
 
b151ef55
 	messageReset(m);
 
 	free(m);
 }
 
 void
 messageReset(message *m)
 {
 	int i;
 
 	assert(m != NULL);
 
 	if(m->mimeSubtype)
 		free(m->mimeSubtype);
 
 	if(m->mimeDispositionType)
 		free(m->mimeDispositionType);
 
27a375f2
 	if(m->mimeArguments) {
 		for(i = 0; i < m->numberOfArguments; i++)
 			free(m->mimeArguments[i]);
 		free(m->mimeArguments);
 	}
b151ef55
 
 	if(m->body_first)
 		textDestroy(m->body_first);
 
285a69b4
 	assert(m->base64chars == 0);
 
0e3b08fc
 	if(m->encodingTypes) {
 		assert(m->numberOfEncTypes > 0);
 		free(m->encodingTypes);
 	}
73175a15
 
 	memset(m, '\0', sizeof(message));
 	m->mimeType = NOMIME;
b151ef55
 }
 
de509b8e
 /*
bb2432d7
  * Handle the Content-Type header. The syntax is in RFC1341.
2bcec72b
  * Return success (1) or failure (0). Failure only happens when it's an
  * unknown type and we've already received a known type, or we've received an
  * empty type. If we receive an unknown type by itself we default to application
de509b8e
  */
2bcec72b
 int
b151ef55
 messageSetMimeType(message *mess, const char *type)
 {
8a88fb93
 #ifdef	CL_THREAD_SAFE
 	static pthread_mutex_t mime_mutex = PTHREAD_MUTEX_INITIALIZER;
 #endif
1f8eb426
 	const struct mime_map *m;
dad64ecb
 	int typeval;
1f8eb426
 	static table_t *mime_table;
b151ef55
 
 	assert(mess != NULL);
4a46b8a2
 	if(type == NULL) {
 		cli_warnmsg("Empty content-type field\n");
 		return 0;
 	}
b151ef55
 
 	cli_dbgmsg("messageSetMimeType: '%s'\n", type);
 
 	/* Ignore leading spaces */
82189c76
 	while(!isalpha(*type))
b151ef55
 		if(*type++ == '\0')
2bcec72b
 			return 0;
b151ef55
 
8a88fb93
 #ifdef	CL_THREAD_SAFE
 	pthread_mutex_lock(&mime_mutex);
 #endif
dad64ecb
 	if(mime_table == NULL) {
 		mime_table = tableCreate();
8a88fb93
 		if(mime_table == NULL) {
 #ifdef	CL_THREAD_SAFE
 			pthread_mutex_unlock(&mime_mutex);
 #endif
2bcec72b
 			return 0;
8a88fb93
 		}
dad64ecb
 
 		for(m = mime_map; m->string; m++)
 			if(!tableInsert(mime_table, m->string, m->type)) {
 				tableDestroy(mime_table);
8a88fb93
 				mime_table = NULL;
 #ifdef	CL_THREAD_SAFE
 				pthread_mutex_unlock(&mime_mutex);
 #endif
2bcec72b
 				return 0;
dad64ecb
 			}
 	}
8a88fb93
 #ifdef	CL_THREAD_SAFE
 	pthread_mutex_unlock(&mime_mutex);
 #endif
dad64ecb
 
 	typeval = tableFind(mime_table, type);
 
2bcec72b
 	if(typeval != -1) {
f0146bc6
 		mess->mimeType = (mime_type)typeval;
2bcec72b
 		return 1;
1f8eb426
 	}
 	if(mess->mimeType == NOMIME) {
b151ef55
 		if(strncasecmp(type, "x-", 2) == 0)
 			mess->mimeType = MEXTENSION;
9fc8173e
 		else {
27a375f2
 			/*
 			 * Based on a suggestion by James Stevens
 			 *	<James@kyzo.com>
 			 * Force scanning of strange messages
 			 */
9fc8173e
 			if(strcasecmp(type, "plain") == 0) {
 				cli_dbgmsg("Incorrect MIME type: `plain', set to Text\n", type);
 				mess->mimeType = TEXT;
 			} else {
 				/*
 				 * Don't handle broken e-mail probably sending
 				 *	Content-Type: plain/text
 				 * instead of
 				 *	Content-Type: text/plain
 				 * as an attachment
 				 */
1f8eb426
 				int highestSimil = 0, t = -1;
 				const char *closest = NULL;
 
 				for(m = mime_map; m->string; m++) {
 					const int s = simil(m->string, type);
 
 					if(s > highestSimil) {
 						highestSimil = s;
 						closest = m->string;
 						t = m->type;
 					}
 				}
 				if(highestSimil >= 50) {
 					cli_dbgmsg("Unknown MIME type \"%s\" - guessing as %s (%u%% certainty)\n",
 						type, closest, highestSimil);
 					mess->mimeType = t;
 				} else {
 					cli_warnmsg("Unknown MIME type: `%s', set to Application - report to bugs@clamav.net\n", type);
 					mess->mimeType = APPLICATION;
 				}
9fc8173e
 			}
27a375f2
 		}
2bcec72b
 		return 1;
b151ef55
 	}
2bcec72b
 	return 0;
b151ef55
 }
 
 mime_type
 messageGetMimeType(const message *m)
 {
4d9c0ca8
 	assert(m != NULL);
 
 	return m->mimeType;
b151ef55
 }
 
 void
 messageSetMimeSubtype(message *m, const char *subtype)
 {
 	assert(m != NULL);
6b9ba2a4
 
 	if(subtype == NULL) {
 		/*
 		 * Handle broken content-type lines, e.g.
 		 *	Content-Type: text/
 		 */
 		cli_dbgmsg("Empty content subtype\n");
 		subtype = "";
 	}
b151ef55
 
 	if(m->mimeSubtype)
 		free(m->mimeSubtype);
 
 	m->mimeSubtype = strdup(subtype);
 }
 
 const char *
 messageGetMimeSubtype(const message *m)
 {
b329234a
 	return (m->mimeSubtype) ? m->mimeSubtype : "";
b151ef55
 }
 
 void
 messageSetDispositionType(message *m, const char *disptype)
 {
 	assert(m != NULL);
 
3db105a2
 	if(m->mimeDispositionType)
 		free(m->mimeDispositionType);
 	if(disptype == NULL) {
 		m->mimeDispositionType = NULL;
 		return;
 	}
 
8b04b4f6
 	/*
 	 * It's broken for there to be an entry such as "Content-Disposition:"
 	 * However some spam and viruses are rather broken, it's a sign
 	 * that something is wrong if we get that - maybe we should force a
 	 * scan of this part
 	 */
3db105a2
 	while(*disptype && isspace((int)*disptype))
 		disptype++;
 	if(*disptype) {
 		m->mimeDispositionType = strdup(disptype);
 		if(m->mimeDispositionType)
 			strstrip(m->mimeDispositionType);
59bef287
 	}
b151ef55
 }
 
 const char *
 messageGetDispositionType(const message *m)
 {
b329234a
 	return (m->mimeDispositionType) ? m->mimeDispositionType : "";
b151ef55
 }
 
 /*
  * TODO:
  *	Arguments are held on a per message basis, they should be held on
  * a per section basis. Otherwise what happens if two sections have two
  * different values for charset? Probably doesn't matter for the use this
  * code will be given, but will need fixing if this code is used elsewhere
  */
 void
 messageAddArgument(message *m, const char *arg)
 {
 	int offset;
 
 	assert(m != NULL);
 
 	if(arg == NULL)
 		return;	/* Note: this is not an error condition */
 
 	while(isspace(*arg))
 		arg++;
 
 	if(*arg == '\0')
 		/* Empty argument? Probably a broken mail client... */
 		return;
 
b4cb4486
 	if(!usefulArg(arg))
cea95096
 		return;
 
c6259ac5
 	for(offset = 0; offset < m->numberOfArguments; offset++)
b151ef55
 		if(m->mimeArguments[offset] == NULL)
 			break;
 		else if(strcasecmp(arg, m->mimeArguments[offset]) == 0)
 			return;	/* already in there */
 
c6259ac5
 	if(offset == m->numberOfArguments) {
bbf43447
 		char **ptr;
 
c6259ac5
 		m->numberOfArguments++;
bbf43447
 		ptr = (char **)cli_realloc(m->mimeArguments, m->numberOfArguments * sizeof(char *));
 		if(ptr == NULL) {
 			m->numberOfArguments--;
 			return;
 		}
 		m->mimeArguments = ptr;
c6259ac5
 	}
b151ef55
 
802c37fc
 	arg = m->mimeArguments[offset] = rfc2231(arg);
0bf1353d
 
 	/*
 	 * This is terribly broken from an RFC point of view but is useful
 	 * for catching viruses which have a filename but no type of
 	 * mime. By pretending defaulting to an application rather than
 	 * to nomime we can ensure they're saved and scanned
 	 */
 	if((strncasecmp(arg, "filename=", 9) == 0) || (strncasecmp(arg, "name=", 5) == 0))
 		if(messageGetMimeType(m) == NOMIME) {
 			cli_dbgmsg("Force mime encoding to application\n");
 			messageSetMimeType(m, "application");
 		}
b151ef55
 }
 
 /*
  * Add in all the arguments.
  * Cope with:
  *	name="foo bar.doc"
  *	charset=foo name=bar
  */
 void
 messageAddArguments(message *m, const char *s)
 {
 	const char *string = s;
 
 	cli_dbgmsg("Add arguments '%s'\n", string);
 
 	assert(string != NULL);
 
 	while(*string) {
 		const char *key, *cptr;
 		char *data, *field;
 
 		if(isspace(*string) || (*string == ';')) {
 			string++;
 			continue;
 		}
 
 		key = string;
b4cb4486
 
b151ef55
 		data = strchr(string, '=');
 
 		/*
da850706
 		 * Some spam breaks RFC2045 by using ':' instead of '='
b151ef55
 		 * e.g.:
 		 *	Content-Type: text/html; charset:ISO-8859-1
 		 * should be:
 		 *	Content-type: text/html; charset=ISO-8859-1
 		 *
 		 * We give up with lines that are completely broken because
 		 * we don't have ESP and don't know what was meant to be there.
 		 * It's unlikely to really be a problem.
 		 */
 		if(data == NULL)
 			data = strchr(string, ':');
 
 		if(data == NULL) {
 			/*
 			 * Completely broken, give up
 			 */
b4cb4486
 			cli_dbgmsg("Can't parse header \"%s\"\n", s);
b151ef55
 			return;
 		}
 
53ee0b60
 		string = &data[1];
b151ef55
 
752c34b9
 		/*
 		 * Handle white space to the right of the equals sign
da850706
 		 * This breaks RFC2045 which has:
b4cb4486
 		 *	parameter := attribute "=" value
 		 *	attribute := token   ; case-insensitive
 		 *	token  :=  1*<any (ASCII) CHAR except SPACE, CTLs,
 		 *		or tspecials>
 		 * But too many MUAs ignore this
752c34b9
 		 */
 		while(isspace(*string) && (*string != '\0'))
 			string++;
 
b151ef55
 		cptr = string++;
 
1ecd46be
 		if(strlen(key) == 0)
 			continue;
 
b151ef55
 		if(*cptr == '"') {
 			char *ptr;
 
 			/*
 			 * The field is in quotes, so look for the
 			 * closing quotes
 			 */
 			key = strdup(key);
4d9c0ca8
 
 			if(key == NULL)
 				return;
 
b151ef55
 			ptr = strchr(key, '=');
 			if(ptr == NULL)
 				ptr = strchr(key, ':');
 			*ptr = '\0';
 
53ee0b60
 			string = strchr(++cptr, '"');
b4cb4486
 
1ecd46be
 			if(string == NULL) {
 				cli_dbgmsg("Unbalanced quote character in \"%s\"\n", s);
 				string = "";
 			} else
 				string++;
b151ef55
 
b4cb4486
 			if(!usefulArg(key)) {
 				free((char *)key);
 				continue;
 			}
 
b151ef55
 			data = strdup(cptr);
 
bbf43447
 			ptr = (data) ? strchr(data, '"') : NULL;
b151ef55
 			if(ptr == NULL) {
 				/*
 				 * Weird e-mail header such as:
 				 * Content-Type: application/octet-stream; name="
 				 * "
 				 * Content-Transfer-Encoding: base64
 				 * Content-Disposition: attachment; filename="
 				 * "
 				 *
 				 * TODO: the file should still be saved and
 				 * virus checked
 				 */
1ecd46be
 				cli_dbgmsg("Can't parse header\"%s\" - report to bugs@clamav.net\n", s);
bbf43447
 				if(data)
 					free(data);
b151ef55
 				free((char *)key);
 				return;
 			}
 
 			*ptr = '\0';
 
d32343c3
 			field = cli_realloc((char *)key, strlen(key) + strlen(data) + 2);
 			if(field) {
 				strcat(field, "=");
 				strcat(field, data);
 			} else
 				free((char *)key);
 			free(data);
b151ef55
 		} else {
 			size_t len;
bf8ea488
 
 			if(*cptr == '\0') {
 				cli_warnmsg("Ignoring empty field in \"%s\"\n", s);
 				return;
 			}
 
b151ef55
 			/*
 			 * The field is not in quotes, so look for the closing
 			 * white space
 			 */
 			while((*string != '\0') && !isspace(*string))
 				string++;
 
 			len = (size_t)string - (size_t)key + 1;
 			field = cli_malloc(len);
 
bbf43447
 			if(field) {
 				memcpy(field, key, len - 1);
 				field[len - 1] = '\0';
 			}
 		}
 		if(field) {
 			messageAddArgument(m, field);
 			free(field);
b151ef55
 		}
 	}
 }
 
 static const char *
 messageGetArgument(const message *m, int arg)
 {
 	assert(m != NULL);
 	assert(arg >= 0);
c6259ac5
 	assert(arg < m->numberOfArguments);
b151ef55
 
b329234a
 	return (m->mimeArguments[arg]) ? m->mimeArguments[arg] : "";
b151ef55
 }
 
 /*
  * Find a MIME variable from the header and return a COPY to the value of that
  * variable. The caller must free the copy
  */
 const char *
 messageFindArgument(const message *m, const char *variable)
 {
 	int i;
dad64ecb
 	size_t len;
b151ef55
 
 	assert(m != NULL);
 	assert(variable != NULL);
 
dad64ecb
 	len = strlen(variable);
 
c6259ac5
 	for(i = 0; i < m->numberOfArguments; i++) {
b151ef55
 		const char *ptr;
 
 		ptr = messageGetArgument(m, i);
 		if((ptr == NULL) || (*ptr == '\0'))
dad64ecb
 			continue;
b151ef55
 #ifdef	CL_DEBUG
 		cli_dbgmsg("messageFindArgument: compare %d bytes of %s with %s\n",
 			len, variable, ptr);
 #endif
 		if(strncasecmp(ptr, variable, len) == 0) {
 			ptr = &ptr[len];
 			while(isspace(*ptr))
 				ptr++;
752c34b9
 			if(*ptr != '=') {
 				cli_warnmsg("messageFindArgument: no '=' sign found in MIME header\n");
 				return NULL;
 			}
b151ef55
 			if((*++ptr == '"') && (strchr(&ptr[1], '"') != NULL)) {
bf8ea488
 				/* Remove any quote characters */
b151ef55
 				char *ret = strdup(++ptr);
bf8ea488
 				char *p;
 
bbf43447
 				if(ret == NULL)
 					return NULL;
 
bf8ea488
 				/*
 				 * Thomas Lamy <Thomas.Lamy@in-online.net>:
 				 * fix un-quoting of boundary strings from
 				 * header, occurs if boundary was given as
 				 *	'boundary="_Test_";'
 				 *
 				 * At least two quotes in string, assume
 				 * quoted argument
 				 * end string at next quote
 				 */
53ee0b60
 				if((p = strchr(ret, '"')) != NULL) {
 					ret[strlen(ret) - 1] = '\0';
bf8ea488
 					*p = '\0';
53ee0b60
 				}
bbf43447
 				return ret;
b151ef55
 			}
bbf43447
 			return strdup(ptr);
b151ef55
 		}
 	}
bbf43447
 	return NULL;
b151ef55
 }
 
 void
 messageSetEncoding(message *m, const char *enctype)
 {
 	const struct encoding_map *e;
0e3b08fc
 	int i = 0;
 	char *type;
b151ef55
 	assert(m != NULL);
 	assert(enctype != NULL);
 
0e3b08fc
 	/*m->encodingType = EEXTENSION;*/
b151ef55
 
4674dc9a
 	while((*enctype == '\t') || (*enctype == ' '))
 		enctype++;
 
a4c3d0a3
 	cli_dbgmsg("messageSetEncoding: '%s'\n", enctype);
 
9fc8173e
 	if(strcasecmp(enctype, "8 bit") == 0) {
 		cli_dbgmsg("Broken content-transfer-encoding: '8 bit' changed to '8bit'\n");
bb2432d7
 		enctype = "8bit";
9fc8173e
 	}
bb2432d7
 
 	/*
0e3b08fc
 	 * Iterate through
 	 *	Content-Transfer-Encoding: base64 binary
 	 * cli_strtok's fieldno counts from 0
 	 */
 	i = 0;
 	while((type = cli_strtok(enctype, i++, " \t")) != NULL) {
b329234a
 		int highestSimil = 0;
 		const char *closest = NULL;
 
53ee0b60
 		for(e = encoding_map; e->string; e++) {
 			int sim;
 			const char lowertype = tolower(type[0]);
23e1c37c
 
53ee0b60
 			if((lowertype != tolower(e->string[0])) && (lowertype != 'x'))
 				/*
 				 * simil is expensive, I'm yet to encounter only
 				 * one example of a missent encoding when the
 				 * first character was wrong, so lets assume no
 				 * match to save the call.
 				 *
 				 * That example was quoted-printable sent as
 				 * X-quoted-printable.
 				 */
 				continue;
 
 			sim = simil(type, e->string);
 
 			if(sim == 100) {
0e3b08fc
 				int j;
 				encoding_type *et;
 
aedb0336
 				for(j = 0; j < m->numberOfEncTypes; j++)
53ee0b60
 					if(m->encodingTypes[j] == e->type)
0e3b08fc
 						break;
53ee0b60
 
 				if(j < m->numberOfEncTypes) {
 					cli_dbgmsg("Ignoring duplicate encoding mechanism '%s'\n",
 						type);
 					break;
 				}
b329234a
 
0e3b08fc
 				et = (encoding_type *)cli_realloc(m->encodingTypes, (m->numberOfEncTypes + 1) * sizeof(encoding_type));
b329234a
 				if(et == NULL)
 					break;
0e3b08fc
 
 				m->encodingTypes = et;
 				m->encodingTypes[m->numberOfEncTypes++] = e->type;
 
 				cli_dbgmsg("Encoding type %d is \"%s\"\n", m->numberOfEncTypes, type);
 				break;
53ee0b60
 			} else if(sim > highestSimil) {
 				closest = e->string;
 				highestSimil = sim;
0e3b08fc
 			}
53ee0b60
 		}
0e3b08fc
 
 		if(e->string == NULL) {
 			/*
cf25aed7
 			 * The stated encoding type is illegal, so we
 			 * use a best guess of what it should be.
 			 *
b329234a
 			 * 50% is arbitary. For example 7bi will match as
 			 * 66% certain to be 7bit
0e3b08fc
 			 */
1f8eb426
 			if(highestSimil >= 50) {
90639c82
 				cli_dbgmsg("Unknown encoding type \"%s\" - guessing as %s (%u%% certainty)\n",
b329234a
 					type, closest, highestSimil);
 				messageSetEncoding(m, closest);
 			} else {
 				cli_warnmsg("Unknown encoding type \"%s\" - report to bugs@clamav.net\n", type);
 				/*
 				 * Err on the side of safety, enable all
 				 * decoding modules
 				 */
 				messageSetEncoding(m, "base64");
 				messageSetEncoding(m, "quoted-printable");
 			}
b151ef55
 		}
 
0e3b08fc
 		free(type);
 	}
b151ef55
 }
 
 encoding_type
 messageGetEncoding(const message *m)
 {
 	assert(m != NULL);
0e3b08fc
 
 	if(m->numberOfEncTypes == 0)
 		return NOENCODING;
 	return m->encodingTypes[0];
b151ef55
 }
 
de617e3e
 int
 messageAddLine(message *m, line_t *line)
 {
 	assert(m != NULL);
 
 	if(m->body_first == NULL)
 		m->body_last = m->body_first = (text *)cli_malloc(sizeof(text));
 	else {
 		m->body_last->t_next = (text *)cli_malloc(sizeof(text));
 		m->body_last = m->body_last->t_next;
 	}
 
 	if(m->body_last == NULL)
 		return -1;
 
 	m->body_last->t_next = NULL;
 
 	if(line && lineGetData(line)) {
 		m->body_last->t_line = lineLink(line);
 
 		messageIsEncoding(m);
 	} else
 		m->body_last->t_line = NULL;
 
 	return 1;
 }
 
b151ef55
 /*
edb35c0a
  * Add the given line to the end of the given message
ffd59a3e
  * If needed a copy of the given line is taken which the caller must free
edb35c0a
  * Line must not be terminated by a \n
b151ef55
  */
4c927f11
 int
321d5c00
 messageAddStr(message *m, const char *data)
b151ef55
 {
c1e96196
 	line_t *repeat = NULL;
 
b151ef55
 	assert(m != NULL);
 
c1e96196
 	if(data) {
321d5c00
 		if(*data == '\0')
 			data = NULL;
 		else {
 			/*
 			 * If it's only white space, just store one space to
 			 * save memory. You must store something since it may
 			 * be a header line
 			 */
 			int iswhite = 1;
 			const char *p;
c1e96196
 
321d5c00
 			for(p = data; *p; p++)
 				if(!isspace(*p)) {
 					iswhite = 0;
 					break;
 				}
 			if(iswhite) {
 				/*cli_dbgmsg("messageAddStr: empty line: '%s'\n", data);*/
 				data = " ";
a78256af
 			}
c1e96196
 		}
 	}
 
b151ef55
 	if(m->body_first == NULL)
 		m->body_last = m->body_first = (text *)cli_malloc(sizeof(text));
 	else {
e24738dc
 		assert(m->body_last != NULL);
b151ef55
 		m->body_last->t_next = (text *)cli_malloc(sizeof(text));
e24738dc
 		if(m->body_last->t_next == NULL) {
 			messageDedup(m);
 			m->body_last->t_next = (text *)cli_malloc(sizeof(text));
 			if(m->body_last->t_next == NULL) {
 				cli_errmsg("messageAddStr: out of memory\n");
 				return -1;
 			}
 		}
 
c1e96196
 		if(data && m->body_last->t_line && (strcmp(data, lineGetData(m->body_last->t_line)) == 0))
 			repeat = m->body_last->t_line;
b151ef55
 		m->body_last = m->body_last->t_next;
 	}
 
e24738dc
 	if(m->body_last == NULL) {
 		cli_errmsg("messageAddStr: out of memory\n");
4c927f11
 		return -1;
e24738dc
 	}
f5a4d7e8
 
b151ef55
 	m->body_last->t_next = NULL;
 
de617e3e
 	if(data && *data) {
c1e96196
 		if(repeat)
 			m->body_last->t_line = lineLink(repeat);
 		else
 			m->body_last->t_line = lineCreate(data);
98685ac1
 
e24738dc
 		if((m->body_last->t_line == NULL) && (repeat == NULL)) {
 			messageDedup(m);
 			m->body_last->t_line = lineCreate(data);
 
 			if(m->body_last->t_line == NULL) {
 				cli_errmsg("messageAddStr: out of memory\n");
 				return -1;
 			}
de617e3e
 		}
 		/* cli_chomp(m->body_last->t_text); */
 
5ae253d2
 		if(repeat == NULL)
 			messageIsEncoding(m);
98685ac1
 	} else
de617e3e
 		m->body_last->t_line = NULL;
98685ac1
 
4c927f11
 	return 1;
b151ef55
 }
 
ffd59a3e
 /*
edb35c0a
  * Add the given line to the start of the given message
  * A copy of the given line is taken which the caller must free
  * Line must not be terminated by a \n
  */
 int
de617e3e
 messageAddStrAtTop(message *m, const char *data)
edb35c0a
 {
 	text *oldfirst;
 
 	assert(m != NULL);
 
 	if(m->body_first == NULL)
de617e3e
 		return messageAddLine(m, lineCreate(data));
bbf43447
 
edb35c0a
 	oldfirst = m->body_first;
 	m->body_first = (text *)cli_malloc(sizeof(text));
 	if(m->body_first == NULL) {
 		m->body_first = oldfirst;
 		return -1;
 	}
 
 	m->body_first->t_next = oldfirst;
de617e3e
 	m->body_first->t_line = lineCreate((data) ? data : "");
edb35c0a
 
de617e3e
 	if(m->body_first->t_line == NULL) {
 		cli_errmsg("messageAddStrAtTop: out of memory\n");
edb35c0a
 		return -1;
 	}
 	return 1;
 }
 
 /*
de617e3e
  * See if the last line marks the start of a non MIME inclusion that
  * will need to be scanned
  */
 static void
 messageIsEncoding(message *m)
 {
 	static const char encoding[] = "Content-Transfer-Encoding";
 	static const char binhex[] = "(This file must be converted with BinHex 4.0)";
 	const char *line = lineGetData(m->body_last->t_line);
 
 	if((m->encoding == NULL) &&
 	   (strncasecmp(line, encoding, sizeof(encoding) - 1) == 0) &&
 	   (strstr(line, "7bit") == NULL))
 		m->encoding = m->body_last;
cf25aed7
 	else if((m->bounce == NULL) &&
ce0883f6
 		(strncasecmp(line, "Received: ", 10) == 0) &&
06d4e856
 		(cli_filetype(line, strlen(line)) == CL_TYPE_MAIL))
de617e3e
 			m->bounce = m->body_last;
 	else if((m->uuencode == NULL) &&
 		((strncasecmp(line, "begin ", 6) == 0) &&
fef5ad63
 		isdigit(line[6]) &&
 		isdigit(line[7]) &&
 		isdigit(line[8]) &&
de617e3e
 		(line[9] == ' ')))
 			m->uuencode = m->body_last;
5ae253d2
 	else if((m->binhex == NULL) &&
1a220adb
 		strstr(line, "BinHex") &&
4bdd7a93
 		(simil(line, binhex) > 90))
1a220adb
 			/*
 			 * Look for close matches for BinHex, but
 			 * simil() is expensive so only do it if it's
 			 * likely to be found
 			 */
5ae253d2
 			m->binhex = m->body_last;
00f95393
 	else if((m->yenc == NULL) && (strncmp(line, "=ybegin line=", 13) == 0))
 		m->yenc = m->body_last;
de617e3e
 }
 
 /*
ffd59a3e
  * Returns a pointer to the body of the message. Note that it does NOT return
  * a copy of the data
  */
b151ef55
 const text *
 messageGetBody(const message *m)
 {
 	assert(m != NULL);
ffd59a3e
 	return m->body_first;
b151ef55
 }
 
 /*
  * Clean up the message by removing trailing spaces and blank lines
  */
 void
 messageClean(message *m)
 {
 	text *newEnd = textClean(m->body_first);
 
 	if(newEnd)
 		m->body_last = newEnd;
 }
 
 /*
e6b25cd3
  * Export a message using the given export routines
0d252351
  *
  * TODO: It really should export into an array, one
  * for each encoding algorithm. However, what it does is it returns the
  * last item that was exported. That's sufficient for now.
b151ef55
  */
a446de17
 static void *
e6b25cd3
 messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy)(void *), void (*setFilename)(void *, const char *, const char *), void (*addData)(void *, const unsigned char *, size_t), void *(*exportText)(const text *, void *))
b151ef55
 {
e6b25cd3
 	void *ret;
a446de17
 	const text *t_line;
dd8a7e90
 	char *filename;
0e3b08fc
 	int i;
b151ef55
 
 	assert(m != NULL);
 
0e3b08fc
 	if(messageGetBody(m) == NULL)
 		return NULL;
 
e6b25cd3
 	ret = (*create)();
b151ef55
 
e6b25cd3
 	if(ret == NULL)
02c9dc2a
 		return NULL;
b151ef55
 
802c37fc
 	cli_dbgmsg("messageExport: numberOfEncTypes == %d\n", m->numberOfEncTypes);
 
0e3b08fc
 	if((t_line = binhexBegin(m)) != NULL) {
a42dba7d
 		unsigned char byte;
a4c3d0a3
 		unsigned long newlen = 0L, len, l;
 		unsigned char *data;
bbf43447
 		char *ptr;
bb5d6279
 		int bytenumber;
285a69b4
 		blob *tmp;
bc75e1d1
 
 		/*
 		 * Table look up by Thomas Lamy <Thomas.Lamy@in-online.net>
 		 * HQX conversion table - illegal chars are 0xff
 		 */
 		const unsigned char hqxtbl[] = {
 			     /*   00   01   02   03   04   05   06   07   08   09   0a   0b   0c   0d   0e   0f */
 		/* 00-0f */	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
 		/* 10-1f */	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
 		/* 20-2f */	0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0xff,0xff,
 		/* 30-3f */	0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0xff,0x14,0x15,0xff,0xff,0xff,0xff,0xff,0xff,
 		/* 40-4f */	0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0xff,
 		/* 50-5f */	0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0xff,0x2c,0x2d,0x2e,0x2f,0xff,0xff,0xff,0xff,
 		/* 60-6f */	0x30,0x31,0x32,0x33,0x34,0x35,0x36,0xff,0x37,0x38,0x39,0x3a,0x3b,0x3c,0xff,0xff,
 		/* 70-7f */	0x3d,0x3e,0x3f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
 		};
a42dba7d
 
 		/*
 		 * Decode BinHex4. First create a temporary blob which contains
 		 * the encoded message. Then decode that blob to the target
0bf1353d
 		 * blob, free the temporary blob and return the target one
bc75e1d1
 		 *
 		 * See RFC1741
a42dba7d
 		 */
a4c3d0a3
 		while(((t_line = t_line->t_next) != NULL) &&
 		      (t_line->t_line == NULL))
 			;
a42dba7d
 
a4c3d0a3
 		tmp = textToBlob(t_line, NULL);
 		if(tmp == NULL) {
e6b25cd3
 			(*destroy)(ret);
bc75e1d1
 			return NULL;
 		}
bb5d6279
 
a4c3d0a3
 		data = blobGetData(tmp);
a42dba7d
 
a4c3d0a3
 		if(data == NULL) {
 			cli_warnmsg("Couldn't locate the binhex message that was claimed to be there\n");
bbf43447
 			blobDestroy(tmp);
e6b25cd3
 			(*destroy)(ret);
bbf43447
 			return NULL;
 		}
a4c3d0a3
 		len = blobGetDataSize(tmp);
a42dba7d
 
a4c3d0a3
 		if(data[0] == ':') {
 			unsigned char *uptr;
 			/* 7 bit (ala RFC1741) */
a42dba7d
 
a4c3d0a3
 			/*
 			 * FIXME: this is dirty code, modification of the
 			 * contents of a member of the blob object should be
 			 * done through blob.c
 			 *
 			 * Convert 7 bit data into 8 bit
 			 */
 			cli_dbgmsg("decode HQX7 message (%lu bytes)\n", len);
bb5d6279
 
a4c3d0a3
 			uptr = cli_malloc(len);
 			if(uptr == NULL) {
 				blobDestroy(tmp);
 				(*destroy)(ret);
 				return NULL;
bc75e1d1
 			}
a4c3d0a3
 			memcpy(uptr, data, len);
 			bytenumber = 0;
83ec020f
 
bc75e1d1
 			/*
a4c3d0a3
 			 * uptr now contains the encoded (7bit) data - len bytes long
 			 * data will contain the unencoded (8bit) data
bc75e1d1
 			 */
a4c3d0a3
 			for(l = 1; l < len; l++) {
 				unsigned char c = uptr[l];
 
 				if(c == ':')
bc75e1d1
 					break;
a4c3d0a3
 
 				if((c == '\n') || (c == '\r'))
 					continue;
 
 				if((c < 0x20) || (c > 0x7f) || (hqxtbl[c] == 0xff)) {
 					cli_warnmsg("Invalid HQX7 character '%c' (0x%02x)\n", c, c);
bc75e1d1
 					break;
a4c3d0a3
 				}
 				c = hqxtbl[c];
 				assert(c <= 63);
 
 				/*
 				 * These masks probably aren't needed, but
 				 * they're here to verify the code is correct
 				 */
 				switch(bytenumber) {
 					case 0:
 						data[newlen] = (c << 2) & 0xFC;
 						bytenumber = 1;
 						break;
 					case 1:
 						data[newlen++] |= (c >> 4) & 0x3;
 						data[newlen] = (c << 4) & 0xF0;
 						bytenumber = 2;
 						break;
 					case 2:
 						data[newlen++] |= (c >> 2) & 0xF;
 						data[newlen] = (c << 6) & 0xC0;
 						bytenumber = 3;
 						break;
 					case 3:
 						data[newlen++] |= c & 0x3F;
 						bytenumber = 0;
 						break;
 				}
bc75e1d1
 			}
bb5d6279
 
a4c3d0a3
 			cli_dbgmsg("decoded HQX7 message (now %lu bytes)\n", newlen);
bc75e1d1
 
a4c3d0a3
 			/*
 			 * Throw away the old encoded (7bit) data
 			 * data now points to the encoded (8bit) data - newlen bytes
 			 *
 			 * The data array may contain repetitive characters
 			 */
 			free(uptr);
 		} else {
 			cli_warnmsg("HQX8 messages not yet supported - send to bugs@clamav.net\n", len);
 			newlen = len;
 		}
bc75e1d1
 
 		/*
 		 * Uncompress repetitive characters
 		 */
 		if(memchr(data, 0x90, newlen)) {
 			blob *u = blobCreate();	/* uncompressed data */
 
285a69b4
 			if(u == NULL) {
e6b25cd3
 				(*destroy)(ret);
285a69b4
 				blobDestroy(tmp);
 				return NULL;
 			}
bc75e1d1
 			/*
 			 * Includes compression
 			 */
 			for(l = 0L; l < newlen; l++) {
 				unsigned char c = data[l];
a42dba7d
 
 				/*
bc75e1d1
 				 * TODO: handle the case where the first byte
 				 * is 0x90
a42dba7d
 				 */
bc75e1d1
 				blobAddData(u, &c, 1);
 
 				if((l < (newlen - 1L)) && (data[l + 1] == 0x90)) {
 					int count;
 
 					l += 2;
 					count = data[l];
 
 					if(count == 0) {
 						c = 0x90;
 						blobAddData(u, &c, 1);
ffd59a3e
 					} else {
0d252351
 #ifdef	CL_DEBUG
 						cli_dbgmsg("uncompress HQX7 at 0x%06x: %d repetitive bytes\n", l, count);
 #endif
ffd59a3e
 						blobGrow(u, count);
bc75e1d1
 						while(--count > 0)
 							blobAddData(u, &c, 1);
ffd59a3e
 					}
a42dba7d
 				}
 			}
3fbd1711
 			blobDestroy(tmp);
bc75e1d1
 			tmp = u;
 			data = blobGetData(tmp);
 			len = blobGetDataSize(tmp);
 			cli_dbgmsg("Uncompressed %lu bytes to %lu\n", newlen, len);
 		} else {
 			len = newlen;
 			cli_dbgmsg("HQX7 message (%lu bytes) is not compressed\n",
 				len);
a42dba7d
 		}
dad64ecb
 		if(len == 0) {
 			cli_warnmsg("Discarding empty binHex attachment\n");
e6b25cd3
 			(*destroy)(ret);
dad64ecb
 			blobDestroy(tmp);
 			return NULL;
 		}
a42dba7d
 
 		/*
bc75e1d1
 		 * The blob tmp now contains the uncompressed data
 		 * of len bytes, i.e. the repetitive bytes have been removed
 		 */
 
 		/*
 		 * Parse the header
 		 *
a42dba7d
 		 * TODO: set filename argument in message as well
 		 */
 		byte = data[0];
6afdc3ab
 		if(byte >= len) {
e6b25cd3
 			(*destroy)(ret);
6afdc3ab
 			blobDestroy(tmp);
 			return NULL;
 		}
a42dba7d
 		filename = cli_malloc(byte + 1);
bbf43447
 		if(filename == NULL) {
e6b25cd3
 			(*destroy)(ret);
bbf43447
 			blobDestroy(tmp);
 			return NULL;
 		}
bc75e1d1
 		memcpy(filename, &data[1], byte);
a42dba7d
 		filename[byte] = '\0';
e6b25cd3
 		(*setFilename)(ret, dir, filename);
dad64ecb
 		/*ptr = cli_malloc(strlen(filename) + 6);*/
 		ptr = cli_malloc(byte + 6);
bbf43447
 		if(ptr) {
 			sprintf(ptr, "name=%s", filename);
 			messageAddArgument(m, ptr);
 			free(ptr);
 		}
a42dba7d
 
 		/*
 		 * skip over length, filename, version, type, creator and flags
 		 */
 		byte = 1 + byte + 1 + 4 + 4 + 2;
83ec020f
 
 		/*
 		 * Set len to be the data fork length
 		 */
bc75e1d1
 		len = ((data[byte] << 24) & 0xFF000000) |
 		      ((data[byte + 1] << 16) & 0xFF0000) |
 		      ((data[byte + 2] << 8) & 0xFF00) |
 		      (data[byte + 3] & 0xFF);
 
 		cli_dbgmsg("Filename = '%s', data fork length = %lu bytes\n",
 			filename, len);
 
 		free((char *)filename);
a42dba7d
 
 		/*
 		 * Skip over data fork length, resource fork length and CRC
 		 */
 		byte += 10;
 
dad64ecb
 		l = blobGetDataSize(tmp) - byte;
db42f46e
 
 		if(l < len) {
 			cli_warnmsg("Corrupt BinHex file, claims it is %lu bytes long in a message of %lu bytes\n",
 				len, l);
 			len = l;
 		}
e6b25cd3
 		(*addData)(ret, &data[byte], len);
a42dba7d
 
 		blobDestroy(tmp);
 
0e3b08fc
 		m->binhex = NULL;
fef5ad63
 
 		if((m->numberOfEncTypes == 1) && (m->encodingTypes[0] == BINHEX)) {
 			cli_dbgmsg("Finished exporting binhex file\n");
 			return ret;
 		}
0e3b08fc
 	}
 
 	if(m->numberOfEncTypes == 0) {
 		/*
 		 * Fast copy
 		 */
6a91c55b
 		filename = (char *)messageFindArgument(m, "filename");
b151ef55
 		if(filename == NULL) {
6a91c55b
 			filename = (char *)messageFindArgument(m, "name");
b151ef55
 
 			if(filename == NULL) {
3b6eace4
 				cli_dbgmsg("Attachment sent with no filename\n");
 				messageAddArgument(m, "name=attachment");
0e3b08fc
 			} else
bbf43447
 				/*
 				 * Some virus attachments don't say how they've
 				 * been encoded. We assume base64
 				 */
 				messageSetEncoding(m, "base64");
b151ef55
 		}
 
63f87938
 		(*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");
b151ef55
 
5eeffbb9
 		if(filename)
 			free((char *)filename);
b151ef55
 
0e3b08fc
 		if(m->numberOfEncTypes == 0) {
 			if(uuencodeBegin(m))
 				messageSetEncoding(m, "x-uuencode");
 			else
 				return exportText(messageGetBody(m), ret);
 		}
b151ef55
 	}
 
0e3b08fc
 	for(i = 0; i < m->numberOfEncTypes; i++) {
 		encoding_type enctype = m->encodingTypes[i];
d17de037
 		size_t size;
0e3b08fc
 
0d252351
 		if(i > 0) {
 			void *newret;
 
 			newret = (*create)();
 			if(newret == NULL) {
 				cli_errmsg("Not all decoding algorithms were run\n");
 				return ret;
 			}
 			(*destroy)(ret);
 			ret = newret;
 		}
ad642304
 		cli_dbgmsg("messageExport: enctype %d is %d\n", i, enctype);
b151ef55
 		/*
0e3b08fc
 		 * Find the filename to decode
b151ef55
 		 */
a4c3d0a3
 		if((enctype == UUENCODE) || uuencodeBegin(m)) {
0e3b08fc
 			t_line = uuencodeBegin(m);
 
 			if(t_line == NULL) {
 				/*cli_warnmsg("UUENCODED attachment is missing begin statement\n");*/
ef3cf57d
 				m->uuencode = NULL;
1a220adb
 				m->base64chars = 0;
ef3cf57d
 				if(i == m->numberOfEncTypes - 1) {
 					(*destroy)(ret);
 					return NULL;
 				}
 				continue;
0e3b08fc
 			}
 
 			filename = cli_strtok(lineGetData(t_line->t_line), 2, " ");
752c34b9
 
0e3b08fc
 			if(filename == NULL) {
 				cli_dbgmsg("UUencoded attachment sent with no filename\n");
 				(*destroy)(ret);
 				return NULL;
 			}
 			cli_chomp(filename);
b151ef55
 
0e3b08fc
 			cli_dbgmsg("Set uuencode filename to \"%s\"\n", filename);
 
 			(*setFilename)(ret, dir, filename);
 			t_line = t_line->t_next;
 			enctype = UUENCODE;
74ca33e9
 			m->uuencode = NULL;
802c37fc
 		} else if(((enctype == YENCODE) && yEncBegin(m)) || ((i == 0) && yEncBegin(m))) {
00f95393
 			/*
 			 * TODO: handle multipart yEnc encoded files
 			 */
 			t_line = yEncBegin(m);
bb2432d7
 			filename = (char *)lineGetData(t_line->t_line);
00f95393
 
 			if((filename = strstr(filename, " name=")) != NULL) {
 				filename = strdup(&filename[6]);
 				if(filename) {
 					cli_chomp(filename);
 					strstrip(filename);
 					cli_dbgmsg("Set yEnc filename to \"%s\"\n", filename);
 				}
5eeffbb9
 			}
00f95393
 
63f87938
 			(*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");
5eeffbb9
 			if(filename) {
 				free((char *)filename);
 				filename = NULL;
 			}
00f95393
 			t_line = t_line->t_next;
 			enctype = YENCODE;
74ca33e9
 			m->yenc = NULL;
0e3b08fc
 		} else {
 			filename = (char *)messageFindArgument(m, "filename");
 			if(filename == NULL) {
 				filename = (char *)messageFindArgument(m, "name");
 
 				if(filename == NULL) {
 					cli_dbgmsg("Attachment sent with no filename\n");
 					messageAddArgument(m, "name=attachment");
 				} else if(enctype == NOENCODING)
 					/*
 					 * Some virus attachments don't say how they've
 					 * been encoded. We assume base64
 					 */
 					messageSetEncoding(m, "base64");
 			}
 
63f87938
 			(*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");
0e3b08fc
 
 			t_line = messageGetBody(m);
 		}
ad642304
 
00f95393
 		if(filename)
 			free((char *)filename);
0e3b08fc
 
 		/*
 		 * t_line should now point to the first (encoded) line of the message
 		 */
 		if(t_line == NULL) {
 			cli_warnmsg("Empty attachment not saved\n");
 			(*destroy)(ret);
 			return NULL;
 		}
 
 		if(enctype == NOENCODING) {
ce73653f
 			/*
0e3b08fc
 			 * Fast copy
ce73653f
 			 */
0e3b08fc
 			(void)exportText(t_line, ret);
 			continue;
ce73653f
 		}
b151ef55
 
d17de037
 		size = 0;
0e3b08fc
 		do {
0d252351
 			unsigned char smallbuf[1024];
 			unsigned char *uptr, *data;
0e3b08fc
 			const char *line = lineGetData(t_line->t_line);
0d252351
 			unsigned char *bigbuf;
 			size_t datasize;
b151ef55
 
0e3b08fc
 			if(enctype == UUENCODE) {
 				/*
0d252351
 				 * There should be no blank lines in uuencoded
 				 * files...
0e3b08fc
 				 */
 				if(line == NULL)
 					continue;
 				if(strcasecmp(line, "end") == 0)
 					break;
00f95393
 			} else if(enctype == YENCODE) {
 				if(line == NULL)
 					continue;
5eeffbb9
 				if(strncmp(line, "=yend ", 6) == 0)
00f95393
 					break;
0e3b08fc
 			}
 
0d252351
 			/*
 			 * Add two bytes for '\n' and '\0'
 			 */
 			datasize = (line) ? strlen(line) + 2 : 0;
02c9dc2a
 
23e1c37c
 			if(datasize >= sizeof(smallbuf))
 				data = bigbuf = (unsigned char *)cli_malloc(datasize);
0d252351
 			else {
 				bigbuf = NULL;
 				data = smallbuf;
 				datasize = sizeof(smallbuf);
 			}
02c9dc2a
 
23e1c37c
 			uptr = decodeLine(m, enctype, line, data, datasize);
0d252351
 			if(uptr == NULL) {
 				if(data == bigbuf)
 					free(data);
 				break;
 			}
1e06e1ab
 
d17de037
 			if(uptr != data) {
0e3b08fc
 				(*addData)(ret, data, (size_t)(uptr - data));
d17de037
 				size += (size_t)(uptr - data);
 			}
02c9dc2a
 
0d252351
 			if(data == bigbuf)
 				free(data);
 
0e3b08fc
 			/*
da850706
 			 * According to RFC2045, '=' is used to pad out
0e3b08fc
 			 * the last byte and should be used as evidence
 			 * of the end of the data. Some mail clients
 			 * annoyingly then put plain text after the '='
 			 * byte and viruses exploit this bug. Sigh
 			 */
 			/*if(enctype == BASE64)
 				if(strchr(line, '='))
 					break;*/
 
 		} while((t_line = t_line->t_next) != NULL);
d17de037
 
 		cli_dbgmsg("Exported %u bytes\n", size);
0e3b08fc
 	}
752c34b9
 
285a69b4
 	/* Verify we have nothing left to flush out */
 	if(m->base64chars) {
 		unsigned char data[4];
 		unsigned char *ptr;
 
 		ptr = decode(m, NULL, data, base64, FALSE);
 		if(ptr)
e6b25cd3
 			(*addData)(ret, data, (size_t)(ptr - data));
285a69b4
 		m->base64chars = 0;
 	}
 
e6b25cd3
 	return ret;
 }
 
 /*
  * Decode and transfer the contents of the message into a fileblob
  * The caller must free the returned fileblob
  */
 fileblob *
 messageToFileblob(message *m, const char *dir)
 {
a446de17
 	cli_dbgmsg("messageToFileblob\n");
e24738dc
 	return messageExport(m, dir, (void *)fileblobCreate, (void *)fileblobDestroy, (void *)fileblobSetFilename, (void *)fileblobAddData, (void *)textToFileblob);
e6b25cd3
 }
 
 /*
  * Decode and transfer the contents of the message into a blob
  * The caller must free the returned blob
  */
 blob *
 messageToBlob(message *m)
 {
e24738dc
 	return messageExport(m, NULL, (void *)blobCreate, (void *)blobDestroy, (void *)blobSetFilename, (void *)blobAddData, (void *)textToBlob);
b151ef55
 }
 
 /*
  * Decode and transfer the contents of the message into a text area
ffd59a3e
  * The caller must free the returned text
b151ef55
  */
 text *
285a69b4
 messageToText(message *m)
b151ef55
 {
0e3b08fc
 	int i;
b151ef55
 	text *first = NULL, *last = NULL;
 	const text *t_line;
 
 	assert(m != NULL);
 
0e3b08fc
 	if(m->numberOfEncTypes == 0) {
b151ef55
 		/*
 		 * Fast copy
 		 */
 		for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
 			if(first == NULL)
 				first = last = cli_malloc(sizeof(text));
 			else {
 				last->t_next = cli_malloc(sizeof(text));
 				last = last->t_next;
 			}
 
de617e3e
 			if(last == NULL) {
285a69b4
 				if(first)
 					textDestroy(first);
cea95096
 				return NULL;
 			}
0b08b624
 			if(t_line->t_line)
 				last->t_line = lineLink(t_line->t_line);
 			else
 				last->t_line = NULL;	/* empty line */
b151ef55
 		}
0e3b08fc
 		if(last)
 			last->t_next = NULL;
 
 		return first;
 	}
 	/*
 	 * Scan over the data a number of times once for each claimed encoding
 	 * type
 	 */
 	for(i = 0; i < m->numberOfEncTypes; i++) {
 		const encoding_type enctype = m->encodingTypes[i];
 
 		cli_dbgmsg("messageToText: export transfer method %d = %d\n",
 			i, enctype);
 		if(enctype == NOENCODING) {
 			/*
 			 * Fast copy
 			 */
 			for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
 				if(first == NULL)
 					first = last = cli_malloc(sizeof(text));
 				else {
 					last->t_next = cli_malloc(sizeof(text));
 					last = last->t_next;
 				}
 
 				if(last == NULL) {
15bfc2e4
 					if(first) {
 						last->t_next = NULL;
0e3b08fc
 						textDestroy(first);
15bfc2e4
 					}
0e3b08fc
 					return NULL;
 				}
0b08b624
 				if(t_line->t_line)
 					last->t_line = lineLink(t_line->t_line);
 				else
 					last->t_line = NULL;	/* empty line */
0e3b08fc
 			}
 			continue;
 		}
 		if(enctype == UUENCODE) {
27a375f2
 			t_line = uuencodeBegin(m);
b151ef55
 
27a375f2
 			if(t_line == NULL) {
 				/*cli_warnmsg("UUENCODED attachment is missing begin statement\n");*/
15bfc2e4
 				if(first) {
 					last->t_next = NULL;
0e3b08fc
 					textDestroy(first);
15bfc2e4
 				}
27a375f2
 				return NULL;
 			}
 			t_line = t_line->t_next;
00f95393
 		} else if(enctype == YENCODE) {
 			t_line = yEncBegin(m);
 
 			if(t_line == NULL) {
 				/*cli_warnmsg("YENCODED attachment is missing begin statement\n");*/
15bfc2e4
 				if(first) {
 					last->t_next = NULL;
00f95393
 					textDestroy(first);
15bfc2e4
 				}
00f95393
 				return NULL;
 			}
 			t_line = t_line->t_next;
a42dba7d
 		} else {
0e3b08fc
 			if((i == 0) && binhexBegin(m))
a446de17
 				cli_warnmsg("Binhex messages not supported yet.\n");
27a375f2
 			t_line = messageGetBody(m);
a42dba7d
 		}
b151ef55
 
27a375f2
 		for(; t_line; t_line = t_line->t_next) {
 			unsigned char data[1024];
 			unsigned char *uptr;
de617e3e
 			const char *line = lineGetData(t_line->t_line);
27a375f2
 
0e3b08fc
 			if(enctype == BASE64) {
285a69b4
 				/*
 				 * ignore blanks - breaks RFC which is
 				 * probably the point!
 				 */
 				if(line == NULL)
 					continue;
0e3b08fc
 			} else if(enctype == UUENCODE)
27a375f2
 				if(strcasecmp(line, "end") == 0)
 					break;
752c34b9
 
0d252351
 			assert((line == NULL) || (strlen(line) <= sizeof(data)));
 
0e3b08fc
 			uptr = decodeLine(m, enctype, line, data, sizeof(data));
b151ef55
 
27a375f2
 			if(uptr == NULL)
 				break;
b151ef55
 
27a375f2
 			assert(uptr <= &data[sizeof(data)]);
 
 			if(first == NULL)
 				first = last = cli_malloc(sizeof(text));
 			else {
 				last->t_next = cli_malloc(sizeof(text));
 				last = last->t_next;
 			}
b151ef55
 
98685ac1
 			if(last == NULL)
bbf43447
 				break;
752c34b9
 
290ba18f
 			/*
 			 * If the decoded line is the same as the encoded
 			 * there's no need to take a copy, just link it.
 			 * Note that the comparison is done without the
 			 * trailing newline that the decoding routine may have
 			 * added - that's why there's a strncmp rather than a
 			 * strcmp - that'd be bad for MIME decoders, but is OK
 			 * for AV software
 			 */
5eeffbb9
 			if((data[0] == '\n') || (data[0] == '\0'))
 				last->t_line = NULL;
d16754aa
 			else if(line && (strncmp((const char *)data, line, strlen(line)) == 0)) {
74ca33e9
 #ifdef	CL_DEBUG
290ba18f
 				cli_dbgmsg("messageToText: decoded line is the same(%s)\n", data);
74ca33e9
 #endif
290ba18f
 				last->t_line = lineLink(t_line->t_line);
 			} else
5eeffbb9
 				last->t_line = lineCreate((char *)data);
98685ac1
 
0e3b08fc
 			if(line && enctype == BASE64)
752c34b9
 				if(strchr(line, '='))
 					break;
27a375f2
 		}
82348395
 		if(m->base64chars) {
 			unsigned char data[4];
 
4b0e970e
 			memset(data, '\0', sizeof(data));
5eeffbb9
 			if(decode(m, NULL, data, base64, FALSE) && data[0]) {
82348395
 				if(first == NULL)
 					first = last = cli_malloc(sizeof(text));
 				else {
 					last->t_next = cli_malloc(sizeof(text));
 					last = last->t_next;
 				}
 
 				if(last != NULL)
5eeffbb9
 					last->t_line = lineCreate((char *)data);
82348395
 			}
 			m->base64chars = 0;
 		}
b151ef55
 	}
 
 	if(last)
 		last->t_next = NULL;
 
 	return first;
 }
 
a42dba7d
 /*
  * Scan to find the UUENCODED message (if any)
  */
f5a4d7e8
 #if	0
cec5297a
 const text *
27a375f2
 uuencodeBegin(const message *m)
 {
 	const text *t_line;
 
 	/*
 	 * Fix based on an idea by Magnus Jonsson
 	 * <Magnus.Jonsson@umdac.umu.se>, to allow for blank
 	 * lines before the begin. Should not happen, but some
 	 * e-mail clients are rather broken...
 	 */
 	for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
 		const char *line = t_line->t_text;
 
 		if((strncasecmp(line, "begin ", 6) == 0) &&
 		   (isdigit(line[6])) &&
 		   (isdigit(line[7])) &&
 		   (isdigit(line[8])) &&
 		   (line[9] == ' '))
 			return t_line;
 	}
 	return NULL;
 }
f5a4d7e8
 #else
 const text *
 uuencodeBegin(const message *m)
 {
 	return m->uuencode;
 }
 #endif
27a375f2
 
00f95393
 const text *
 yEncBegin(const message *m)
 {
 	return m->yenc;
 }
 
b151ef55
 /*
a42dba7d
  * Scan to find the BINHEX message (if any)
  */
f5a4d7e8
 #if	0
a446de17
 static const text *
a42dba7d
 binhexBegin(const message *m)
 {
 	const text *t_line;
 
 	for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next)
 		if(strcasecmp(t_line->t_text, "(This file must be converted with BinHex 4.0)") == 0)
 			return t_line;
 
 	return NULL;
 }
f5a4d7e8
 #else
0856891e
 const text *
f5a4d7e8
 binhexBegin(const message *m)
 {
 	return m->binhex;
 }
 #endif
a42dba7d
 
 /*
5a01973c
  * Scan to find a bounce message. There is no standard for these, not
  * even a convention, so don't expect this to be foolproof
  */
f5a4d7e8
 #if	0
5a01973c
 const text *
 bounceBegin(const message *m)
 {
 	const text *t_line;
bb5d6279
 
20fa2f53
 	for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next)
06d4e856
 		if(cli_filetype(t_line->t_text, strlen(t_line->t_text)) == CL_TYPE_MAIL)
20fa2f53
 			return t_line;
5a01973c
 
 	return NULL;
 }
f5a4d7e8
 #else
 const text *
 bounceBegin(const message *m)
 {
 	return m->bounce;
 }
 #endif
 
 /*
  * If a message doesn't not contain another message which could be harmful
  * it is deemed to be safe.
  *
  * TODO: ensure nothing can get through this
  *
  * TODO: check to see if we need to
  * find anything else, perhaps anything
  * from the RFC821 table?
  */
 #if	0
 int
 messageIsAllText(const message *m)
 {
 	const text *t;
 
 	for(t = messageGetBody(m); t; t = t->t_next)
 		if(strncasecmp(t->t_text,
 			"Content-Transfer-Encoding",
 			strlen("Content-Transfer-Encoding")) == 0)
 				return 0;
 
 	return 1;
 }
 #else
627465e7
 const text *
 encodingLine(const message *m)
f5a4d7e8
 {
627465e7
 	return m->encoding;
f5a4d7e8
 }
 #endif
5a01973c
 
a446de17
 void
 messageClearMarkers(message *m)
 {
 	m->encoding = m->bounce = m->uuencode = m->binhex = NULL;
 }
 
5a01973c
 /*
b151ef55
  * Decode a line and add it to a buffer, return the end of the buffer
27a375f2
  * to help appending callers. There is no new line at the end of "line"
eaacc2de
  *
  * len is sizeof(ptr)
b151ef55
  */
 static unsigned char *
0e3b08fc
 decodeLine(message *m, encoding_type et, const char *line, unsigned char *buf, size_t buflen)
b151ef55
 {
dd8a7e90
 	size_t len;
bf8ea488
 	bool softbreak;
23e1c37c
 	char *p2, *copy;
53ee0b60
 	char base64buf[RFC2045LENGTH + 1];
b151ef55
 
15bfc2e4
 	/*cli_dbgmsg("decodeLine(et = %d buflen = %u)\n", (int)et, buflen);*/
0d252351
 
b151ef55
 	assert(m != NULL);
eaacc2de
 	assert(buf != NULL);
b151ef55
 
0e3b08fc
 	switch(et) {
ee576466
 		case BINARY:
 			/*
 			 * TODO: find out what this is, encoded as binary??
 			 */
 			/* fall through */
b151ef55
 		case NOENCODING:
 		case EIGHTBIT:
c6259ac5
 		default:	/* unknown encoding type - try our best */
963e073f
 			if(line)	/* empty line? */
 				buf = (unsigned char *)strrcpy((char *)buf, line);
b151ef55
 			/* Put the new line back in */
eaacc2de
 			return (unsigned char *)strrcpy((char *)buf, "\n");
b151ef55
 
 		case QUOTEDPRINTABLE:
98685ac1
 			if(line == NULL) {	/* empty line */
 				*buf++ = '\n';
 				break;
 			}
285a69b4
 
da850706
 			softbreak = FALSE;
23e1c37c
 			while(buflen && *line) {
da850706
 				if(*line == '=') {
 					unsigned char byte;
 
 					if((*++line == '\0') || (*line == '\n')) {
 						softbreak = TRUE;
 						/* soft line break */
 						break;
 					}
 
 					byte = hex(*line);
 
 					if((*++line == '\0') || (*line == '\n')) {
 						/*
 						 * broken e-mail, not
 						 * adhering to RFC2045
 						 */
 						*buf++ = byte;
 						break;
 					}
 
 					byte <<= 4;
 					byte += hex(*line);
 					*buf++ = byte;
 				} else
 					*buf++ = *line;
23e1c37c
 				++line;
 				--buflen;
da850706
 			}
bf8ea488
 			if(!softbreak)
 				/* Put the new line back in */
eaacc2de
 				*buf++ = '\n';
b151ef55
 			break;
 
 		case BASE64:
98685ac1
 			if(line == NULL)
 				break;
752c34b9
 			/*
da850706
 			 * RFC2045 sets the maximum length to 76 bytes
752c34b9
 			 * but many e-mail clients ignore that
 			 */
53ee0b60
 			if(strlen(line) < sizeof(base64buf)) {
 				strcpy(base64buf, line);
 				copy = base64buf;
 			} else {
 				copy = strdup(line);
 				if(copy == NULL)
 					break;
 			}
bbf43447
 
752c34b9
 			p2 = strchr(copy, '=');
b151ef55
 			if(p2)
 				*p2 = '\0';
285a69b4
 
d17de037
 			sanitiseBase64(copy);
 
b151ef55
 			/*
 			 * Klez doesn't always put "=" on the last line
 			 */
285a69b4
 			buf = decode(m, copy, buf, base64, (p2 == NULL) && ((strlen(copy) & 3) == 0));
752c34b9
 
53ee0b60
 			if(copy != base64buf)
 				free(copy);
b151ef55
 			break;
 
 		case UUENCODE:
98685ac1
 			if((line == NULL) || (*line == '\0'))	/* empty line */
3c52fb18
 				break;
b151ef55
 			if(strncasecmp(line, "begin ", 6) == 0)
 				break;
 			if(strcasecmp(line, "end") == 0)
 				break;
 
 			if((line[0] & 0x3F) == ' ')
 				break;
 
28ea5910
 			/* Don't trust the encoded length */
 			/*len = *line++ - ' ';*/
 			len = strlen(++line);
b151ef55
 
eaacc2de
 			if(len > buflen)
 				/*
 				 * In practice this should never occur since
 				 * the maximum length of a uuencoded line is
 				 * 62 characters
 				 */
8dc9ee9e
 				cli_warnmsg("uudecode: buffer overflow stopped, attempting to ignore but decoding may fail\n");
eaacc2de
 			else
285a69b4
 				buf = decode(m, line, buf, uudecode, (len & 3) == 0);
b151ef55
 			break;
00f95393
 		case YENCODE:
 			if((line == NULL) || (*line == '\0'))	/* empty line */
 				break;
 			if(strncmp(line, "=yend ", 6) == 0)
 				break;
 
 			while(*line)
 				if(*line == '=') {
 					if(*++line == '\0')
 						break;
 					*buf++ = ((*line++ - 64) & 255);
 				} else
 					*buf++ = ((*line++ - 42) & 255);
 			break;
b151ef55
 	}
 
eaacc2de
 	*buf = '\0';
 	return buf;
b151ef55
 }
 
6ba88eb8
 /*
fb405afc
  * Remove the non base64 characters such as spaces from a string. Spaces
  * shouldn't appear mid string in base64 files, but some broken mail clients
  * ignore such errors rather than discarding the mail, and virus writers
  * exploit this bug
285a69b4
  */
 static void
fb405afc
 sanitiseBase64(char *s)
285a69b4
 {
d17de037
 #ifdef	USE_TABLE
15bfc2e4
 	/*cli_dbgmsg("sanitiseBase64 '%s'\n", s);*/
fb405afc
 	for(; *s; s++)
15bfc2e4
 		if(base64Table[(unsigned int)(*s & 0xFF)] == 255) {
fb405afc
 			char *p1;
e982ca83
 
fb405afc
 			for(p1 = s; p1[0] != '\0'; p1++)
 				p1[0] = p1[1];
 		}
d17de037
 #else
 	for(; *s; s++) {
 		char *p1;
 		char c = *s;
 
 		if(isupper(c))
 			continue;
 		if(isdigit(c))
 			continue;
 		if(c == '+')
 			continue;
 		if(c == '/')
 			continue;
 		if(islower(c))
 			continue;
 
 		for(p1 = s; p1[0] != '\0'; p1++)
 			p1[0] = p1[1];
 	}
 #endif
285a69b4
 }
 
 /*
6ba88eb8
  * Returns one byte after the end of the decoded data in "out"
285a69b4
  *
  * Update m->base64chars with the last few bytes of data that we haven't
  * decoded. After the last line is found, decode will be called with in = NULL
  * to flush these out
6ba88eb8
  */
b151ef55
 static unsigned char *
285a69b4
 decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast)
 {
 	unsigned char b1, b2, b3, b4;
 	unsigned char cb1, cb2, cb3;	/* carried over from last line */
 
15bfc2e4
 	/*cli_dbgmsg("decode %s (len %d isFast %d base64chars %d)\n", in,
285a69b4
 		in ? strlen(in) : 0,
f0146bc6
 		isFast, m->base64chars);*/
285a69b4
 
 	cb1 = cb2 = cb3 = '\0';
 
 	switch(m->base64chars) {
 		case 3:
 			cb3 = m->base64_3;
 			/* FALLTHROUGH */
 		case 2:
 			cb2 = m->base64_2;
 			/* FALLTHROUGH */
 		case 1:
 			cb1 = m->base64_1;
 			isFast = FALSE;
 			break;
 		default:
 			assert(m->base64chars <= 3);
 	}
 
 	if(isFast)
 		/* Fast decoding if not last line */
 		while(*in) {
 			b1 = (*decoder)(*in++);
 			b2 = (*decoder)(*in++);
 			b3 = (*decoder)(*in++);
 			/*
 			 * Put this line here to help on some compilers which
 			 * can make use of some architecure's ability to
 			 * multiprocess when different variables can be
 			 * updated at the same time - here b3 is used in
 			 * one line, b1/b2 in the next and b4 in the next after
 			 * that, b3 and b4 rely on in but b1/b2 don't
 			 */
 			*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
 			b4 = (*decoder)(*in++);
 			*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
 			*out++ = (b3 << 6) | (b4 & 0x3F);
 		}
0d252351
 	else if(in == NULL) {	/* flush */
 		int nbytes;
 
 		if(m->base64chars == 0)
 			return out;
285a69b4
 
0d252351
 		cli_dbgmsg("base64chars = %d (%c %c %c)\n", m->base64chars,
 			cb1 ? cb1 : '@',
 			cb2 ? cb2 : '@',
 			cb3 ? cb3 : '@');
285a69b4
 
0d252351
 		m->base64chars--;
 		b1 = cb1;
 		nbytes = 1;
d17de037
 
0d252351
 		if(m->base64chars) {
285a69b4
 			m->base64chars--;
0d252351
 			b2 = cb2;
285a69b4
 
 			if(m->base64chars) {
0d252351
 				nbytes++;
285a69b4
 				m->base64chars--;
0d252351
 				b3 = cb3;
 				if(b3)
d17de037
 					nbytes++;
0d252351
 			} else if(b2)
 				nbytes++;
 		}
285a69b4
 
0d252351
 		switch(nbytes) {
 			case 3:
 				b4 = '\0';
 				/* fall through */
 			case 4:
 				*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
 				*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
 				*out++ = (b3 << 6) | (b4 & 0x3F);
 				break;
 			case 2:
 				*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
 				*out++ = b2 << 4;
 				break;
 			case 1:
 				*out++ = b1 << 2;
 				break;
 			default:
 				assert(0);
 		}
 	} else while(*in) {
 		int nbytes;
285a69b4
 
0d252351
 		if(m->base64chars) {
 			m->base64chars--;
 			b1 = cb1;
 		} else
 			b1 = (*decoder)(*in++);
285a69b4
 
0d252351
 		if(*in == '\0') {
 			b2 = '\0';
 			nbytes = 1;
 		} else {
285a69b4
 			if(m->base64chars) {
 				m->base64chars--;
0d252351
 				b2 = cb2;
285a69b4
 			} else
0d252351
 				b2 = (*decoder)(*in++);
285a69b4
 
 			if(*in == '\0') {
0d252351
 				b3 = '\0';
 				nbytes = 2;
285a69b4
 			} else {
 				if(m->base64chars) {
 					m->base64chars--;
0d252351
 					b3 = cb3;
285a69b4
 				} else
0d252351
 					b3 = (*decoder)(*in++);
285a69b4
 
 				if(*in == '\0') {
0d252351
 					b4 = '\0';
 					nbytes = 3;
285a69b4
 				} else {
0d252351
 					b4 = (*decoder)(*in++);
 					nbytes = 4;
285a69b4
 				}
 			}
0d252351
 		}
285a69b4
 
0d252351
 		switch(nbytes) {
 			case 3:
 				m->base64_3 = b3;
 			case 2:
 				m->base64_2 = b2;
 			case 1:
 				m->base64_1 = b1;
285a69b4
 				break;
0d252351
 			case 4:
 				*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
 				*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
 				*out++ = (b3 << 6) | (b4 & 0x3F);
 				break;
 			default:
 				assert(0);
 		}
 		if(nbytes != 4) {
 			m->base64chars = nbytes;
 			break;
285a69b4
 		}
 	}
 	return out;
 }
b151ef55
 
 static unsigned char
 hex(char c)
 {
 	if(isdigit(c))
 		return c - '0';
 	if((c >= 'A') && (c <= 'F'))
 		return c - 'A' + 10;
e66e8982
 	if((c >= 'a') && (c <= 'f'))
 		return c - 'a' + 10;
 	cli_dbgmsg("Illegal hex character '%c'\n", c);
b151ef55
 
 	/*
da850706
 	 * Some mails (notably some spam) break RFC2045 by failing to encode
b151ef55
 	 * the '=' character
 	 */
 	return '=';
 }
 
5ae253d2
 #ifdef	USE_TABLE
 static unsigned char
 base64(char c)
 {
15bfc2e4
 	const unsigned char ret = base64Table[(unsigned int)(c & 0xFF)];
5ae253d2
 
 	if(ret == 255) {
0d252351
 		/*cli_dbgmsg("Illegal character <%c> in base64 encoding\n", c);*/
5ae253d2
 		return 63;
 	}
 	return ret;
 }
 #else
b151ef55
 static unsigned char
 base64(char c)
 {
 	if(isupper(c))
 		return c - 'A';
 	if(isdigit(c))
 		return c - '0' + 52;
 	if(c == '+')
 		return 62;
5ae253d2
 	if(islower(c))	/* call last, most base64 is upper case */
 		return c - 'a' + 26;
b151ef55
 
752c34b9
 	if(c != '/')
627465e7
 		cli_dbgmsg("Illegal character <%c> in base64 encoding\n", c);
b151ef55
 
 	return 63;
 }
5ae253d2
 #endif
b151ef55
 
 static unsigned char
 uudecode(char c)
 {
b329234a
 	return c - ' ';
b151ef55
 }
b4cb4486
 
 /*
  * These are the only arguments we're interested in.
  * Do 'fgrep messageFindArgument *.c' if you don't believe me!
  * It's probably not good doing this since each time a new
  * messageFindArgument is added I need to remember to look here,
  * but it can save a lot of memory...
  */
 static int
 usefulArg(const char *arg)
 {
 	if((strncasecmp(arg, "name", 4) != 0) &&
 	   (strncasecmp(arg, "filename", 8) != 0) &&
 	   (strncasecmp(arg, "boundary", 8) != 0) &&
b62a19da
 	   (strncasecmp(arg, "protocol", 8) != 0) &&
9a7398ee
 	   (strncasecmp(arg, "id", 2) != 0) &&
 	   (strncasecmp(arg, "number", 6) != 0) &&
 	   (strncasecmp(arg, "total", 5) != 0) &&
b4cb4486
 	   (strncasecmp(arg, "type", 4) != 0)) {
 		cli_dbgmsg("Discarding unwanted argument '%s'\n", arg);
 		return 0;
 	}
 	return 1;
 }
e24738dc
 
 /*
  * We've run out of memory. Try to recover some by
  * deduping the message
  */
 static void
 messageDedup(message *m)
 {
 	const text *t1;
 	size_t saved = 0;
 
d16754aa
 	cli_dbgmsg("messageDedup\n");
 
e24738dc
 	t1 = m->dedupedThisFar ? m->dedupedThisFar : m->body_first;
 
 	for(t1 = m->body_first; t1; t1 = t1->t_next) {
 		const char *d1;
 		text *t2;
 		line_t *l1;
 		unsigned int r1;
 
 		if(saved >= 100*1000)
 			break;	/* that's enough */
 		l1 = t1->t_line;
 		if(l1 == NULL)
 			continue;
 		d1 = lineGetData(l1);
 		if(strlen(d1) < 8)
 			continue;	/* wouldn't recover many bytes */
d16754aa
 
e24738dc
 		r1 = (unsigned int)lineGetRefCount(l1);
 		if(r1 == 255)
 			continue;
 		/*
 		 * We don't want to foul up any pointers
 		 */
 		if(t1 == m->encoding)
 			continue;
 		if(t1 == m->bounce)
 			continue;
 		if(t1 == m->uuencode)
 			continue;
 		if(t1 == m->binhex)
 			continue;
 		if(t1 == m->yenc)
 			continue;
 
 		for(t2 = t1->t_next; t2; t2 = t2->t_next) {
 			const char *d2;
 			line_t *l2 = t2->t_line;
 
 			if(l2 == NULL)
 				continue;
 			d2 = lineGetData(l2);
 			if(d1 == d2)
 				/* already linked */
 				continue;
 			if(strcmp(d1, d2) == 0) {
 				if(lineUnlink(l2) == NULL)
d16754aa
 					saved += strlen(d1) + 1;
e24738dc
 				t2->t_line = lineLink(l1);
 				if(t2->t_line == NULL) {
 					cli_errmsg("messageDedup: out of memory\n");
 					return;
 				}
d16754aa
 				if(++r1 == 255)
 					break;
e24738dc
 			}
 		}
 	}
d16754aa
 
 	cli_dbgmsg("messageDedup reclaimed %u bytes\n", saved);
e24738dc
 	m->dedupedThisFar = t1;
 }
b329234a
 
 /*
5e5a162c
  * Handle RFC2231 encoding. Returns a malloc'd buffer that the caller must
  * free, or NULL on error.
  *
  * TODO: Currently only handles paragraph 4 of RFC2231 e.g.
  *	 protocol*=ansi-x3.4-1968''application%2Fpgp-signature;
  */
 static char *
 rfc2231(const char *in)
 {
802c37fc
 	const char *ptr;
 	char *ret, *out;
5e5a162c
 	enum { LANGUAGE, CHARSET, CONTENTS } field = LANGUAGE;
 
 	ptr = strstr(in, "*=");
 
 	if(ptr == NULL)	/* quick return */
 		return strdup(in);
 
 	cli_dbgmsg("rfc2231 '%s'\n", in);
 
 	ret = cli_malloc(strlen(in) + 1);
 
 	if(ret == NULL)
 		return NULL;
 
 	for(out = ret; in != ptr; in++)
 		*out++ = *in;
 
 	*out++ = '=';
 
 	/*
 	 * We don't do anything with the language and character set, just skip
 	 * over them!
 	 */
 	while(*in) {
 		switch(field) {
 			case LANGUAGE:
 				if(*in == '\'')
 					field = CHARSET;
 				break;
 			case CHARSET:
 				if(*in == '\'')
 					field = CONTENTS;
 				break;
 			case CONTENTS:
 				if(*in == '%') {
 					unsigned char byte;
 
 					if((*++in == '\0') || (*in == '\n'))
 						break;
 
 					byte = hex(*in);
 
 					if((*++in == '\0') || (*in == '\n')) {
 						*out++ = byte;
 						break;
 					}
 
 					byte <<= 4;
 					byte += hex(*in);
 					*out++ = byte;
 				} else
 					*out++ = *in;
 		}
 		in++;
 	}
 
 	if(field != CONTENTS) {
 		free(ret);
802c37fc
 		cli_warnmsg("Invalid RFC2231 header: '%s'\n", in);
5e5a162c
 		return strdup("");
 	}
802c37fc
 
5e5a162c
 	*out = '\0';
 
 	cli_dbgmsg("rfc2231 returns '%s'\n", ret);
 
 	return ret;
 }
 
 /*
b329234a
  * common/simil:
  *	From Computing Magazine 20/8/92
  * Returns %ge number from 0 to 100 - how similar are 2 strings?
  * 100 for exact match, < for error
  */
 struct	pstr_list {	/* internal stack */
 	char	*d1;
 	struct	pstr_list	*next;
 };
 
 #define	OUT_OF_MEMORY	(-2)
 #define	FAILURE	(-3)
 #define	SUCCESS	(-4)
 #define	ARRAY_OVERFLOW	(-5)
 typedef	struct	pstr_list	ELEMENT1;
 typedef	ELEMENT1		*LINK1;
 
 static	int	push(LINK1 *top, const char *string);
 static	int	pop(LINK1 *top, char *buffer);
 static	unsigned	int	compare(char *ls1, char **rs1, char *ls2, char **rs2);
 
4bdd7a93
 #define	MAX_PATTERN_SIZ	50	/* maximum string lengths */
b329234a
 
 static int
 simil(const char *str1, const char *str2)
 {
 	LINK1 top = NULL;
 	unsigned int score = 0;
 	unsigned int common, total, len1;
 	unsigned int len2;
 	char ls1[MAX_PATTERN_SIZ], ls2[MAX_PATTERN_SIZ];
 	char *rs1 = NULL, *rs2 = NULL;
 	char *s1, *s2;
 
 	if(strcasecmp(str1, str2) == 0)
 		return 100;
 
 	if((s1 = strdup(str1)) == NULL)
 		return OUT_OF_MEMORY;
 	if((s2 = strdup(str2)) == NULL) {
 		free(s1);
 		return OUT_OF_MEMORY;
 	}
 
 	if(((total = strstrip(s1)) > MAX_PATTERN_SIZ - 1) || ((len2 = strstrip(s2)) > MAX_PATTERN_SIZ - 1)) {
 		free(s1);
 		free(s2);
 		return ARRAY_OVERFLOW;
 	}
 
 	total += len2;
 
63f87938
 	if((push(&top, s1) == OUT_OF_MEMORY) ||
 	   (push(&top, s2) == OUT_OF_MEMORY)) {
 		free(s1);
 		free(s2);
b329234a
 		return OUT_OF_MEMORY;
63f87938
 	}
b329234a
 
 	while(pop(&top, ls2) == SUCCESS) {
 		pop(&top, ls1);
 		common = compare(ls1, &rs1, ls2, &rs2);
 		if(common > 0) {
 			score += common;
 			len1 = strlen(ls1);
 			len2 = strlen(ls2);
 
 			if((len1 > 1 && len2 >= 1) || (len2 > 1 && len1 >= 1))
 				if((push(&top, ls1) == OUT_OF_MEMORY) || (push(&top, ls2) == OUT_OF_MEMORY)) {
 					free(s1);
 					free(s2);
 					return OUT_OF_MEMORY;
 				}
 			len1 = strlen(rs1);
 			len2 = strlen(rs2);
 
 			if((len1 > 1 && len2 >= 1) || (len2 > 1 && len1 >= 1))
 				if((push(&top, rs1) == OUT_OF_MEMORY) || (push(&top, rs2) == OUT_OF_MEMORY)) {
 					free(s1);
 					free(s2);
 					return OUT_OF_MEMORY;
 				}
 		}
 	}
 	free(s1);
 	free(s2);
 	return (total > 0) ? ((score * 200) / total) : 0;
 }
 
 static unsigned int
 compare(char *ls1, char **rs1, char *ls2, char **rs2)
 {
 	unsigned int common, diff, maxchars = 0;
 	bool some_similarity = FALSE;
 	char *s1, *s2;
 	char *maxs1 = NULL, *maxs2 = NULL, *maxe1 = NULL, *maxe2 = NULL;
 	char *cs1, *cs2, *start1, *end1, *end2;
 
 	end1 = ls1 + strlen(ls1);
 	end2 = ls2 + strlen(ls2);
 	start1 = ls1;
 
 	for(;;) {
 		s1 = start1;
 		s2 = ls2;
 
 		if(s1 < end1) {
 			while(s1 < end1 && s2 < end2) {
 				if(tolower(*s1) == tolower(*s2)) {
 					some_similarity = TRUE;
 					cs1 = s1;
 					cs2 = s2;
 					common = 0;
 					do
 						if(s1 == end1 || s2 == end2)
 							break;
 						else {
 							s1++;
 							s2++;
 							common++;
 						}
 					while(tolower(*s1) == tolower(*s2));
 
 					if(common > maxchars) {
 						diff = common - maxchars;
 						maxchars = common;
 						maxs1 = cs1;
 						maxs2 = cs2;
 						maxe1 = s1;
 						maxe2 = s2;
 						end1 -= diff;
 						end2 -= diff;
 					} else
 						s1 -= common;
 				} else
 					s2++;
 			}
 			start1++;
 		} else
 			break;
 	}
 	if(some_similarity) {
 		*maxs1 = '\0';
 		*maxs2 = '\0';
 		*rs1 = maxe1;
 		*rs2 = maxe2;
 	}
 	return maxchars;
 }
 
 static int
 push(LINK1 *top, const char *string)
 {
 	LINK1 element;
 
 	if((element = (LINK1)cli_malloc(sizeof(ELEMENT1))) == NULL)
 		return OUT_OF_MEMORY;
 	if((element->d1 = strdup(string)) == NULL)
 		return OUT_OF_MEMORY;
 	element->next = *top;
 	*top = element;
 
 	return SUCCESS;
 }
 
 static int
 pop(LINK1 *top, char *buffer)
 {
 	LINK1 t1;
 
 	if((t1 = *top) != NULL) {
 		(void)strcpy(buffer, t1->d1);
 		*top = t1->next;
 		free(t1->d1);
 		free((char *)t1);
 		return SUCCESS;
 	}
 	return FAILURE;
 }