GitList

libclamav/message.c

b151ef55	/*
70b54406	* Copyright (C) 2002-2006 Nigel Horne <njh@bandsman.co.uk>
b151ef55	* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software
30738099	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA.
b151ef55	*/
4db74788	static char const rcsid[] = "$Id: message.c,v 1.193 2007/01/07 21:30:49 njh Exp $";
8b242bb9	#if HAVE_CONFIG_H #include "clamav-config.h" #endif
b151ef55	#ifndef CL_DEBUG
0b08b624	#define NDEBUG /* map CLAMAV debug onto standard */
b151ef55	#endif #ifdef CL_THREAD_SAFE
dd8a7e90	#ifndef _REENTRANT
b151ef55	#define _REENTRANT /* for Solaris 2.8 */ #endif
dd8a7e90	#endif
b151ef55
f0146bc6	#ifdef C_DARWIN
b151ef55	#include <sys/types.h> #endif #include <stdlib.h> #include <string.h>
40d54f7f	#ifdef HAVE_STRINGS_H
b151ef55	#include <strings.h>
40d54f7f	#endif
b151ef55	#include <assert.h> #include <ctype.h> #include <stdio.h>
8a88fb93	#ifdef CL_THREAD_SAFE #include <pthread.h> #endif
5c86c162	#include "others.h" #include "str.h" #include "filetypes.h"
b151ef55	#include "mbox.h"
06bce849	#ifndef isblank #define isblank(c) (((c) == ' ') \|\| ((c) == '\t')) #endif
53ee0b60	#define RFC2045LENGTH 76 /* maximum number of characters on a line */
4db74788	#ifdef HAVE_STDBOOL_H
a4f8f199	#include <stdbool.h> #else #ifdef FALSE typedef unsigned char bool; #else typedef enum { FALSE = 0, TRUE = 1 } bool; #endif #endif
b151ef55
de617e3e	static void messageIsEncoding(message *m);
285a69b4	static unsigned char decode(message m, const char in, unsigned char out, unsigned char (*decoder)(char), bool isFast);
fb405afc	static void sanitiseBase64(char *s);
f5d6e670	#ifdef __GNUC__ static unsigned char hex(char c) __attribute__((const)); static unsigned char base64(char c) __attribute__((const)); static unsigned char uudecode(char c) __attribute__((const)); #else
b151ef55	static unsigned char hex(char c); static unsigned char base64(char c); static unsigned char uudecode(char c);
f5d6e670	#endif
b151ef55	static const char messageGetArgument(const message m, int arg);
985cc85e	static void messageExport(message m, const char dir, void (create)(void), void (destroy)(void ), void (setFilename)(void , const char , const char ), void (addData)(void , const unsigned char , size_t), void (exportText)(text , void , int), void (setCTX)(void , cli_ctx *), int destroy_text);
b4cb4486	static int usefulArg(const char *arg);
e24738dc	static void messageDedup(message *m);
5e5a162c	static char rfc2231(const char in);
b329234a	static int simil(const char str1, const char str2);
b151ef55	/* * These maps are ordered in decreasing likelyhood of their appearance
0e3b08fc	* in an e-mail. Probably these should be in a table...
b151ef55	/ static const struct encoding_map { const char string; encoding_type type;
da850706	} encoding_map[] = { /* rfc2045 */
b151ef55	{ "7bit", NOENCODING },
b759d5eb	{ "text/plain", NOENCODING },
da850706	{ "quoted-printable", QUOTEDPRINTABLE }, /* rfc2045 */
98685ac1	{ "base64", BASE64 }, /* rfc2045 */
b151ef55	{ "8bit", EIGHTBIT },
bb2432d7	{ "binary", BINARY },
af66c329	{ "x-uuencode", UUENCODE }, /* uuencode(5) */
00f95393	{ "x-yencode", YENCODE },
fef5ad63	{ "x-binhex", BINHEX },
7ea0c270	{ "us-ascii", NOENCODING }, /* incorrect */
ef3cf57d	{ "x-uue", UUENCODE }, /* incorrect */
a4c3d0a3	{ "uuencode", UUENCODE }, /* incorrect */
6ba88eb8	{ NULL, NOENCODING }
b151ef55	}; static struct mime_map { const char *string; mime_type type; } mime_map[] = { { "text", TEXT }, { "multipart", MULTIPART }, { "application", APPLICATION }, { "audio", AUDIO }, { "image", IMAGE }, { "message", MESSAGE }, { "video", VIDEO },
6ba88eb8	{ NULL, TEXT }
b151ef55	};
fe6ce0ba	/* * See RFC2045, section 6.8, table 1 */
d17de037	static const unsigned char base64Table[256] = {
5ae253d2	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,62,255,255,255,63, 52,53,54,55,56,57,58,59,60,61,255,255,255,0,255,255, 255,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, 15,16,17,18,19,20,21,22,23,24,25,255,255,255,255,255, 255,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, 41,42,43,44,45,46,47,48,49,50,51,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 };
b151ef55	message * messageCreate(void) { message m = (message )cli_calloc(1, sizeof(message));
0e3b08fc	if(m)
bbf43447	m->mimeType = NOMIME;
b151ef55	return m; } void messageDestroy(message *m) {
4d9c0ca8	assert(m != NULL);
b151ef55	messageReset(m); free(m); } void messageReset(message *m) { int i; assert(m != NULL); if(m->mimeSubtype) free(m->mimeSubtype); if(m->mimeDispositionType) free(m->mimeDispositionType);
27a375f2	if(m->mimeArguments) { for(i = 0; i < m->numberOfArguments; i++) free(m->mimeArguments[i]); free(m->mimeArguments); }
b151ef55	if(m->body_first) textDestroy(m->body_first);
285a69b4	assert(m->base64chars == 0);
0e3b08fc	if(m->encodingTypes) { assert(m->numberOfEncTypes > 0); free(m->encodingTypes); }
73175a15	memset(m, '\0', sizeof(message)); m->mimeType = NOMIME;
b151ef55	}
de509b8e	/*
bb2432d7	* Handle the Content-Type header. The syntax is in RFC1341.
2bcec72b	* Return success (1) or failure (0). Failure only happens when it's an * unknown type and we've already received a known type, or we've received an * empty type. If we receive an unknown type by itself we default to application
de509b8e	*/
2bcec72b	int
b151ef55	messageSetMimeType(message mess, const char type) {
8a88fb93	#ifdef CL_THREAD_SAFE static pthread_mutex_t mime_mutex = PTHREAD_MUTEX_INITIALIZER; #endif
1f8eb426	const struct mime_map *m;
dad64ecb	int typeval;
1f8eb426	static table_t *mime_table;
b151ef55	assert(mess != NULL);
4a46b8a2	if(type == NULL) { cli_warnmsg("Empty content-type field\n"); return 0; }
b151ef55	cli_dbgmsg("messageSetMimeType: '%s'\n", type); /* Ignore leading spaces */
82189c76	while(!isalpha(*type))
b151ef55	if(*type++ == '\0')
2bcec72b	return 0;
b151ef55
8a88fb93	#ifdef CL_THREAD_SAFE pthread_mutex_lock(&mime_mutex); #endif
dad64ecb	if(mime_table == NULL) { mime_table = tableCreate();
8a88fb93	if(mime_table == NULL) { #ifdef CL_THREAD_SAFE pthread_mutex_unlock(&mime_mutex); #endif
2bcec72b	return 0;
8a88fb93	}
dad64ecb	for(m = mime_map; m->string; m++) if(!tableInsert(mime_table, m->string, m->type)) { tableDestroy(mime_table);
8a88fb93	mime_table = NULL; #ifdef CL_THREAD_SAFE pthread_mutex_unlock(&mime_mutex); #endif
2bcec72b	return 0;
dad64ecb	} }
8a88fb93	#ifdef CL_THREAD_SAFE pthread_mutex_unlock(&mime_mutex); #endif
dad64ecb	typeval = tableFind(mime_table, type);
2bcec72b	if(typeval != -1) {
f0146bc6	mess->mimeType = (mime_type)typeval;
2bcec72b	return 1;
1f8eb426	} if(mess->mimeType == NOMIME) {
b151ef55	if(strncasecmp(type, "x-", 2) == 0) mess->mimeType = MEXTENSION;
9fc8173e	else {
27a375f2	/* * Based on a suggestion by James Stevens * <James@kyzo.com> * Force scanning of strange messages */
9fc8173e	if(strcasecmp(type, "plain") == 0) {
79879cfb	cli_dbgmsg("Incorrect MIME type: `plain', set to Text\n");
9fc8173e	mess->mimeType = TEXT; } else { /* * Don't handle broken e-mail probably sending * Content-Type: plain/text * instead of * Content-Type: text/plain * as an attachment */
1f8eb426	int highestSimil = 0, t = -1; const char *closest = NULL; for(m = mime_map; m->string; m++) { const int s = simil(m->string, type); if(s > highestSimil) { highestSimil = s; closest = m->string; t = m->type; } } if(highestSimil >= 50) { cli_dbgmsg("Unknown MIME type \"%s\" - guessing as %s (%u%% certainty)\n", type, closest, highestSimil);
388072d8	mess->mimeType = (mime_type)t;
1f8eb426	} else {
a363da65	cli_dbgmsg("Unknown MIME type: `%s', set to Application - if you believe this file contains a virus, submit it to www.clamav.net\n", type);
1f8eb426	mess->mimeType = APPLICATION; }
9fc8173e	}
27a375f2	}
2bcec72b	return 1;
b151ef55	}
2bcec72b	return 0;
b151ef55	} mime_type messageGetMimeType(const message *m) {
4d9c0ca8	assert(m != NULL); return m->mimeType;
b151ef55	} void messageSetMimeSubtype(message m, const char subtype) { assert(m != NULL);
6b9ba2a4	if(subtype == NULL) { /* * Handle broken content-type lines, e.g. * Content-Type: text/ */ cli_dbgmsg("Empty content subtype\n"); subtype = ""; }
b151ef55	if(m->mimeSubtype) free(m->mimeSubtype);
4db74788	m->mimeSubtype = cli_strdup(subtype);
b151ef55	} const char * messageGetMimeSubtype(const message *m) {
b329234a	return (m->mimeSubtype) ? m->mimeSubtype : "";
b151ef55	} void messageSetDispositionType(message m, const char disptype) { assert(m != NULL);
3db105a2	if(m->mimeDispositionType) free(m->mimeDispositionType); if(disptype == NULL) { m->mimeDispositionType = NULL; return; }
8b04b4f6	/* * It's broken for there to be an entry such as "Content-Disposition:" * However some spam and viruses are rather broken, it's a sign * that something is wrong if we get that - maybe we should force a * scan of this part */
3db105a2	while(disptype && isspace((int)disptype)) disptype++; if(*disptype) {
4db74788	m->mimeDispositionType = cli_strdup(disptype);
3db105a2	if(m->mimeDispositionType) strstrip(m->mimeDispositionType);
552878b2	} else m->mimeDispositionType = NULL;
b151ef55	} const char * messageGetDispositionType(const message *m) {
b329234a	return (m->mimeDispositionType) ? m->mimeDispositionType : "";
b151ef55	} /* * TODO: * Arguments are held on a per message basis, they should be held on * a per section basis. Otherwise what happens if two sections have two * different values for charset? Probably doesn't matter for the use this * code will be given, but will need fixing if this code is used elsewhere / void messageAddArgument(message m, const char arg) { int offset; assert(m != NULL); if(arg == NULL) return; / Note: this is not an error condition / while(isspace(arg)) arg++; if(arg == '\0') / Empty argument? Probably a broken mail client... */ return;
49dff330	cli_dbgmsg("messageAddArgument, arg='%s'\n", arg);
b4cb4486	if(!usefulArg(arg))
cea95096	return;
c6259ac5	for(offset = 0; offset < m->numberOfArguments; offset++)
b151ef55	if(m->mimeArguments[offset] == NULL) break; else if(strcasecmp(arg, m->mimeArguments[offset]) == 0) return; /* already in there */
c6259ac5	if(offset == m->numberOfArguments) {
bbf43447	char **ptr;
c6259ac5	m->numberOfArguments++;
bbf43447	ptr = (char *)cli_realloc(m->mimeArguments, m->numberOfArguments sizeof(char *)); if(ptr == NULL) { m->numberOfArguments--; return; } m->mimeArguments = ptr;
c6259ac5	}
b151ef55
802c37fc	arg = m->mimeArguments[offset] = rfc2231(arg);
0bf1353d	/* * This is terribly broken from an RFC point of view but is useful * for catching viruses which have a filename but no type of * mime. By pretending defaulting to an application rather than * to nomime we can ensure they're saved and scanned */ if((strncasecmp(arg, "filename=", 9) == 0) \|\| (strncasecmp(arg, "name=", 5) == 0)) if(messageGetMimeType(m) == NOMIME) { cli_dbgmsg("Force mime encoding to application\n"); messageSetMimeType(m, "application"); }
b151ef55	} /* * Add in all the arguments. * Cope with: * name="foo bar.doc" * charset=foo name=bar / void messageAddArguments(message m, const char s) { const char string = s; cli_dbgmsg("Add arguments '%s'\n", string); assert(string != NULL); while(string) { const char key, cptr; char data, field; if(isspace(string) \|\| (*string == ';')) { string++; continue; } key = string;
b4cb4486
b151ef55	data = strchr(string, '='); /*
da850706	* Some spam breaks RFC2045 by using ':' instead of '='
b151ef55	* e.g.: * Content-Type: text/html; charset:ISO-8859-1 * should be: * Content-type: text/html; charset=ISO-8859-1 * * We give up with lines that are completely broken because * we don't have ESP and don't know what was meant to be there. * It's unlikely to really be a problem. / if(data == NULL) data = strchr(string, ':'); if(data == NULL) { / * Completely broken, give up */
b4cb4486	cli_dbgmsg("Can't parse header \"%s\"\n", s);
b151ef55	return; }
53ee0b60	string = &data[1];
b151ef55
752c34b9	/* * Handle white space to the right of the equals sign
da850706	* This breaks RFC2045 which has:
b4cb4486	* parameter := attribute "=" value * attribute := token ; case-insensitive * token := 1<any (ASCII) CHAR except SPACE, CTLs, or tspecials> * But too many MUAs ignore this
752c34b9	/ while(isspace(string) && (*string != '\0')) string++;
b151ef55	cptr = string++;
1ecd46be	if(strlen(key) == 0) continue;
b151ef55	if(cptr == '"') { char ptr; /* * The field is in quotes, so look for the * closing quotes */
4db74788	key = cli_strdup(key);
4d9c0ca8	if(key == NULL) return;
b151ef55	ptr = strchr(key, '='); if(ptr == NULL) ptr = strchr(key, ':'); *ptr = '\0';
53ee0b60	string = strchr(++cptr, '"');
b4cb4486
1ecd46be	if(string == NULL) { cli_dbgmsg("Unbalanced quote character in \"%s\"\n", s); string = ""; } else string++;
b151ef55
b4cb4486	if(!usefulArg(key)) { free((char *)key); continue; }
4db74788	data = cli_strdup(cptr);
b151ef55
bbf43447	ptr = (data) ? strchr(data, '"') : NULL;
b151ef55	if(ptr == NULL) { /* * Weird e-mail header such as: * Content-Type: application/octet-stream; name=" * " * Content-Transfer-Encoding: base64 * Content-Disposition: attachment; filename=" * " * * TODO: the file should still be saved and * virus checked */
a363da65	cli_dbgmsg("Can't parse header \"%s\" - if you believe this file contains a virus, submit it to www.clamav.net\n", s);
bbf43447	if(data) free(data);
b151ef55	free((char )key); return; } ptr = '\0';
d32343c3	field = cli_realloc((char )key, strlen(key) + strlen(data) + 2); if(field) { strcat(field, "="); strcat(field, data); } else free((char )key); free(data);
b151ef55	} else { size_t len;
bf8ea488	if(*cptr == '\0') {
06bce849	cli_dbgmsg("Ignoring empty field in \"%s\"\n", s);
bf8ea488	return; }
b151ef55	/* * The field is not in quotes, so look for the closing * white space / while((string != '\0') && !isspace(*string)) string++; len = (size_t)string - (size_t)key + 1; field = cli_malloc(len);
bbf43447	if(field) { memcpy(field, key, len - 1); field[len - 1] = '\0'; } } if(field) { messageAddArgument(m, field); free(field);
b151ef55	} } } static const char * messageGetArgument(const message *m, int arg) { assert(m != NULL); assert(arg >= 0);
c6259ac5	assert(arg < m->numberOfArguments);
b151ef55
b329234a	return (m->mimeArguments[arg]) ? m->mimeArguments[arg] : "";
b151ef55	} /* * Find a MIME variable from the header and return a COPY to the value of that * variable. The caller must free the copy / const char messageFindArgument(const message m, const char variable) { int i;
dad64ecb	size_t len;
b151ef55	assert(m != NULL); assert(variable != NULL);
dad64ecb	len = strlen(variable);
c6259ac5	for(i = 0; i < m->numberOfArguments; i++) {
b151ef55	const char ptr; ptr = messageGetArgument(m, i); if((ptr == NULL) \|\| (ptr == '\0'))
dad64ecb	continue;
b151ef55	#ifdef CL_DEBUG cli_dbgmsg("messageFindArgument: compare %d bytes of %s with %s\n", len, variable, ptr); #endif if(strncasecmp(ptr, variable, len) == 0) { ptr = &ptr[len]; while(isspace(*ptr)) ptr++;
752c34b9	if(*ptr != '=') {
133dcdcd	cli_warnmsg("messageFindArgument: no '=' sign found in MIME header '%s' (%s)\n", variable, messageGetArgument(m, i));
752c34b9	return NULL; }
b151ef55	if((*++ptr == '"') && (strchr(&ptr[1], '"') != NULL)) {
bf8ea488	/* Remove any quote characters */
4db74788	char *ret = cli_strdup(++ptr);
bf8ea488	char *p;
bbf43447	if(ret == NULL) return NULL;
bf8ea488	/* * Thomas Lamy <Thomas.Lamy@in-online.net>: * fix un-quoting of boundary strings from * header, occurs if boundary was given as * 'boundary="_Test_";' * * At least two quotes in string, assume * quoted argument * end string at next quote */
53ee0b60	if((p = strchr(ret, '"')) != NULL) { ret[strlen(ret) - 1] = '\0';
bf8ea488	*p = '\0';
53ee0b60	}
bbf43447	return ret;
b151ef55	}
4db74788	return cli_strdup(ptr);
b151ef55	} }
bbf43447	return NULL;
b151ef55	} void messageSetEncoding(message m, const char enctype) { const struct encoding_map *e;
4db74788	int i;
0e3b08fc	char *type;
4db74788
b151ef55	assert(m != NULL); assert(enctype != NULL);
0e3b08fc	/m->encodingType = EEXTENSION;/
b151ef55
06bce849	while(isblank(*enctype))
4674dc9a	enctype++;
a4c3d0a3	cli_dbgmsg("messageSetEncoding: '%s'\n", enctype);
9fc8173e	if(strcasecmp(enctype, "8 bit") == 0) { cli_dbgmsg("Broken content-transfer-encoding: '8 bit' changed to '8bit'\n");
bb2432d7	enctype = "8bit";
9fc8173e	}
bb2432d7	/*
0e3b08fc	* Iterate through * Content-Transfer-Encoding: base64 binary * cli_strtok's fieldno counts from 0 */ i = 0; while((type = cli_strtok(enctype, i++, " \t")) != NULL) {
b329234a	int highestSimil = 0; const char *closest = NULL;
53ee0b60	for(e = encoding_map; e->string; e++) { int sim; const char lowertype = tolower(type[0]);
23e1c37c
53ee0b60	if((lowertype != tolower(e->string[0])) && (lowertype != 'x')) /* * simil is expensive, I'm yet to encounter only * one example of a missent encoding when the * first character was wrong, so lets assume no * match to save the call. * * That example was quoted-printable sent as * X-quoted-printable. */ continue;
2add0ed7	if(strcmp(e->string, "uuencode") == 0) /* * No need to test here - fast track visa will have * handled uuencoded files */ continue;
53ee0b60	sim = simil(type, e->string); if(sim == 100) {
0e3b08fc	int j; encoding_type *et;
aedb0336	for(j = 0; j < m->numberOfEncTypes; j++)
53ee0b60	if(m->encodingTypes[j] == e->type)
0e3b08fc	break;
53ee0b60	if(j < m->numberOfEncTypes) { cli_dbgmsg("Ignoring duplicate encoding mechanism '%s'\n", type); break; }
b329234a
0e3b08fc	et = (encoding_type )cli_realloc(m->encodingTypes, (m->numberOfEncTypes + 1) sizeof(encoding_type));
b329234a	if(et == NULL) break;
0e3b08fc	m->encodingTypes = et; m->encodingTypes[m->numberOfEncTypes++] = e->type; cli_dbgmsg("Encoding type %d is \"%s\"\n", m->numberOfEncTypes, type); break;
53ee0b60	} else if(sim > highestSimil) { closest = e->string; highestSimil = sim;
0e3b08fc	}
53ee0b60	}
0e3b08fc	if(e->string == NULL) { /*
cf25aed7	* The stated encoding type is illegal, so we * use a best guess of what it should be. *
b329234a	* 50% is arbitary. For example 7bi will match as * 66% certain to be 7bit
0e3b08fc	*/
1f8eb426	if(highestSimil >= 50) {
90639c82	cli_dbgmsg("Unknown encoding type \"%s\" - guessing as %s (%u%% certainty)\n",
b329234a	type, closest, highestSimil); messageSetEncoding(m, closest); } else {
a363da65	cli_dbgmsg("Unknown encoding type \"%s\" - if you believe this file contains a virus, submit it to www.clamav.net\n", type);
b329234a	/* * Err on the side of safety, enable all * decoding modules */ messageSetEncoding(m, "base64"); messageSetEncoding(m, "quoted-printable"); }
b151ef55	}
0e3b08fc	free(type); }
b151ef55	} encoding_type messageGetEncoding(const message *m) { assert(m != NULL);
0e3b08fc	if(m->numberOfEncTypes == 0) return NOENCODING; return m->encodingTypes[0];
b151ef55	}
de617e3e	int messageAddLine(message m, line_t line) { assert(m != NULL); if(m->body_first == NULL) m->body_last = m->body_first = (text )cli_malloc(sizeof(text)); else { m->body_last->t_next = (text )cli_malloc(sizeof(text)); m->body_last = m->body_last->t_next; } if(m->body_last == NULL) return -1; m->body_last->t_next = NULL; if(line && lineGetData(line)) { m->body_last->t_line = lineLink(line); messageIsEncoding(m); } else m->body_last->t_line = NULL; return 1; }
b151ef55	/*
edb35c0a	* Add the given line to the end of the given message
ffd59a3e	* If needed a copy of the given line is taken which the caller must free
edb35c0a	* Line must not be terminated by a \n
b151ef55	*/
4c927f11	int
321d5c00	messageAddStr(message m, const char data)
b151ef55	{
c1e96196	line_t *repeat = NULL;
b151ef55	assert(m != NULL);
c1e96196	if(data) {
321d5c00	if(data == '\0') data = NULL; else { / * If it's only white space, just store one space to * save memory. You must store something since it may * be a header line / int iswhite = 1; const char p;
c1e96196
321d5c00	for(p = data; *p; p++)
5e28cd2b	if(((p) & 0x80) \|\| !isspace(p)) {
321d5c00	iswhite = 0; break; } if(iswhite) { /cli_dbgmsg("messageAddStr: empty line: '%s'\n", data);/ data = " ";
a78256af	}
c1e96196	} }
b151ef55	if(m->body_first == NULL) m->body_last = m->body_first = (text *)cli_malloc(sizeof(text)); else {
e24738dc	assert(m->body_last != NULL);
af66c329	if((data == NULL) && (m->body_last->t_line == NULL))
24c897dc	/* * Although this would save time and RAM, some * phish signatures have been built which need the * blank lines / if(messageGetMimeType(m) != TEXT) / don't save two blank lines in sucession */ return 1;
af66c329
b151ef55	m->body_last->t_next = (text *)cli_malloc(sizeof(text));
e24738dc	if(m->body_last->t_next == NULL) { messageDedup(m); m->body_last->t_next = (text *)cli_malloc(sizeof(text)); if(m->body_last->t_next == NULL) { cli_errmsg("messageAddStr: out of memory\n"); return -1; } }
c1e96196	if(data && m->body_last->t_line && (strcmp(data, lineGetData(m->body_last->t_line)) == 0)) repeat = m->body_last->t_line;
b151ef55	m->body_last = m->body_last->t_next; }
e24738dc	if(m->body_last == NULL) { cli_errmsg("messageAddStr: out of memory\n");
4c927f11	return -1;
e24738dc	}
f5a4d7e8
b151ef55	m->body_last->t_next = NULL;
de617e3e	if(data && *data) {
c1e96196	if(repeat) m->body_last->t_line = lineLink(repeat);
399e1865	else {
e24738dc	m->body_last->t_line = lineCreate(data); if(m->body_last->t_line == NULL) {
399e1865	messageDedup(m); m->body_last->t_line = lineCreate(data);
de617e3e
399e1865	if(m->body_last->t_line == NULL) { cli_errmsg("messageAddStr: out of memory\n"); return -1; } } /* cli_chomp(m->body_last->t_text); */
5ae253d2	messageIsEncoding(m);
399e1865	}
98685ac1	} else
de617e3e	m->body_last->t_line = NULL;
98685ac1
4c927f11	return 1;
b151ef55	}
ffd59a3e	/*
edb35c0a	* Add the given line to the start of the given message * A copy of the given line is taken which the caller must free * Line must not be terminated by a \n */ int
de617e3e	messageAddStrAtTop(message m, const char data)
edb35c0a	{ text *oldfirst; assert(m != NULL); if(m->body_first == NULL)
de617e3e	return messageAddLine(m, lineCreate(data));
bbf43447
edb35c0a	oldfirst = m->body_first; m->body_first = (text *)cli_malloc(sizeof(text)); if(m->body_first == NULL) { m->body_first = oldfirst; return -1; } m->body_first->t_next = oldfirst;
de617e3e	m->body_first->t_line = lineCreate((data) ? data : "");
edb35c0a
de617e3e	if(m->body_first->t_line == NULL) { cli_errmsg("messageAddStrAtTop: out of memory\n");
edb35c0a	return -1; } return 1; } /*
de617e3e	* See if the last line marks the start of a non MIME inclusion that * will need to be scanned / static void messageIsEncoding(message m) { static const char encoding[] = "Content-Transfer-Encoding"; static const char binhex[] = "(This file must be converted with BinHex 4.0)"; const char *line = lineGetData(m->body_last->t_line);
55274fda	/* not enough matches to warrant this test / /if(lineGetRefCount(m->body_last->t_line) > 1) { return; }*/
de617e3e	if((m->encoding == NULL) && (strncasecmp(line, encoding, sizeof(encoding) - 1) == 0) && (strstr(line, "7bit") == NULL)) m->encoding = m->body_last;
cf25aed7	else if((m->bounce == NULL) &&
ce0883f6	(strncasecmp(line, "Received: ", 10) == 0) &&
a4f8f199	(cli_filetype((const unsigned char *)line, strlen(line)) == CL_TYPE_MAIL))
de617e3e	m->bounce = m->body_last;
2add0ed7	/* Not needed with fast track visa technology / /else if((m->uuencode == NULL) && isuuencodebegin(line)) m->uuencode = m->body_last;*/
5ae253d2	else if((m->binhex == NULL) &&
1a220adb	strstr(line, "BinHex") &&
4bdd7a93	(simil(line, binhex) > 90))
1a220adb	/* * Look for close matches for BinHex, but * simil() is expensive so only do it if it's * likely to be found */
5ae253d2	m->binhex = m->body_last;
00f95393	else if((m->yenc == NULL) && (strncmp(line, "=ybegin line=", 13) == 0)) m->yenc = m->body_last;
de617e3e	} /*
ffd59a3e	* Returns a pointer to the body of the message. Note that it does NOT return * a copy of the data */
985cc85e	text * messageGetBody(message *m)
b151ef55	{ assert(m != NULL);
ffd59a3e	return m->body_first;
b151ef55	} /* * Clean up the message by removing trailing spaces and blank lines / void messageClean(message m) { text newEnd = textClean(m->body_first); if(newEnd) m->body_last = newEnd; } /
e6b25cd3	* Export a message using the given export routines
0d252351	* * TODO: It really should export into an array, one * for each encoding algorithm. However, what it does is it returns the * last item that was exported. That's sufficient for now.
b151ef55	*/
a446de17	static void *
985cc85e	messageExport(message m, const char dir, void (create)(void), void (destroy)(void ), void (setFilename)(void , const char , const char ), void (addData)(void , const unsigned char , size_t), void (exportText)(text , void , int), void(setCTX)(void , cli_ctx ), int destroy_text)
b151ef55	{
e6b25cd3	void *ret;
985cc85e	text *t_line;
dd8a7e90	char *filename;
0e3b08fc	int i;
b151ef55	assert(m != NULL);
0e3b08fc	if(messageGetBody(m) == NULL) return NULL;
e6b25cd3	ret = (*create)();
b151ef55
e6b25cd3	if(ret == NULL)
02c9dc2a	return NULL;
b151ef55
802c37fc	cli_dbgmsg("messageExport: numberOfEncTypes == %d\n", m->numberOfEncTypes);
0e3b08fc	if((t_line = binhexBegin(m)) != NULL) {
a42dba7d	unsigned char byte;
40d54f7f	size_t newlen = 0L, len, dataforklen, resourceforklen, l;
a4c3d0a3	unsigned char *data;
bbf43447	char *ptr;
bb5d6279	int bytenumber;
285a69b4	blob *tmp;
bc75e1d1	/* * Table look up by Thomas Lamy <Thomas.Lamy@in-online.net> * HQX conversion table - illegal chars are 0xff / const unsigned char hqxtbl[] = { / 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f / / 00-0f / 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, / 10-1f / 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, / 20-2f / 0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0xff,0xff, / 30-3f / 0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0xff,0x14,0x15,0xff,0xff,0xff,0xff,0xff,0xff, / 40-4f / 0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0xff, / 50-5f / 0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0xff,0x2c,0x2d,0x2e,0x2f,0xff,0xff,0xff,0xff, / 60-6f / 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0xff,0x37,0x38,0x39,0x3a,0x3b,0x3c,0xff,0xff, / 70-7f */ 0x3d,0x3e,0x3f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff };
a42dba7d
9b2be218	cli_dbgmsg("messageExport: decode binhex\n");
a42dba7d	/* * Decode BinHex4. First create a temporary blob which contains * the encoded message. Then decode that blob to the target
0bf1353d	* blob, free the temporary blob and return the target one
bc75e1d1	*
fdb974a5	* FIXME: EICAR isn't detected: should create 3 files in fork * format: .info, .data and .rsrc. This is needed for * position dependant detection such as EICAR *
bc75e1d1	* See RFC1741
a42dba7d	*/
a4c3d0a3	while(((t_line = t_line->t_next) != NULL) && (t_line->t_line == NULL)) ;
a42dba7d
985cc85e	tmp = textToBlob(t_line, NULL, ((m->numberOfEncTypes == 1) && (m->encodingTypes[0] == BINHEX)) ? destroy_text : 0);
a4c3d0a3	if(tmp == NULL) {
985cc85e	/* * FIXME: We've probably run out of memory during the * text to blob. * TODO: if m->numberOfEncTypes == 1 we could delete * the text object as we decode it */ cli_warnmsg("Couldn't start binhex parser\n");
e6b25cd3	(*destroy)(ret);
bc75e1d1	return NULL; }
bb5d6279
a4c3d0a3	data = blobGetData(tmp);
a42dba7d
a4c3d0a3	if(data == NULL) { cli_warnmsg("Couldn't locate the binhex message that was claimed to be there\n");
bbf43447	blobDestroy(tmp);
e6b25cd3	(*destroy)(ret);
bbf43447	return NULL; }
a4c3d0a3	len = blobGetDataSize(tmp);
a42dba7d
a4c3d0a3	if(data[0] == ':') { unsigned char uptr; / 7 bit (ala RFC1741) */
a42dba7d
a4c3d0a3	/* * FIXME: this is dirty code, modification of the * contents of a member of the blob object should be * done through blob.c * * Convert 7 bit data into 8 bit */ cli_dbgmsg("decode HQX7 message (%lu bytes)\n", len);
bb5d6279
a4c3d0a3	uptr = cli_malloc(len); if(uptr == NULL) { blobDestroy(tmp); (*destroy)(ret); return NULL;
bc75e1d1	}
a4c3d0a3	memcpy(uptr, data, len); bytenumber = 0;
83ec020f
bc75e1d1	/*
a4c3d0a3	* uptr now contains the encoded (7bit) data - len bytes long * data will contain the unencoded (8bit) data
bc75e1d1	*/
a4c3d0a3	for(l = 1; l < len; l++) { unsigned char c = uptr[l]; if(c == ':')
bc75e1d1	break;
a4c3d0a3	if((c == '\n') \|\| (c == '\r')) continue; if((c < 0x20) \|\| (c > 0x7f) \|\| (hqxtbl[c] == 0xff)) { cli_warnmsg("Invalid HQX7 character '%c' (0x%02x)\n", c, c);
bc75e1d1	break;
a4c3d0a3	} c = hqxtbl[c]; assert(c <= 63); /* * These masks probably aren't needed, but * they're here to verify the code is correct */ switch(bytenumber) { case 0: data[newlen] = (c << 2) & 0xFC; bytenumber = 1; break; case 1: data[newlen++] \|= (c >> 4) & 0x3; data[newlen] = (c << 4) & 0xF0; bytenumber = 2; break; case 2: data[newlen++] \|= (c >> 2) & 0xF; data[newlen] = (c << 6) & 0xC0; bytenumber = 3; break; case 3: data[newlen++] \|= c & 0x3F; bytenumber = 0; break; }
bc75e1d1	}
bb5d6279
a4c3d0a3	cli_dbgmsg("decoded HQX7 message (now %lu bytes)\n", newlen);
bc75e1d1
a4c3d0a3	/* * Throw away the old encoded (7bit) data * data now points to the encoded (8bit) data - newlen bytes * * The data array may contain repetitive characters */ free(uptr); } else {
a363da65	cli_warnmsg("HQX8 messages not yet supported - if you believe this file contains a virus, submit it to www.clamav.net\n");
a4c3d0a3	newlen = len; }
bc75e1d1	/* * Uncompress repetitive characters / if(memchr(data, 0x90, newlen)) { blob u = blobCreate(); /* uncompressed data */
285a69b4	if(u == NULL) {
e6b25cd3	(*destroy)(ret);
285a69b4	blobDestroy(tmp); return NULL; }
bc75e1d1	/* * Includes compression */ for(l = 0L; l < newlen; l++) { unsigned char c = data[l];
a42dba7d	/*
bc75e1d1	* TODO: handle the case where the first byte * is 0x90
a42dba7d	*/
bc75e1d1	blobAddData(u, &c, 1); if((l < (newlen - 1L)) && (data[l + 1] == 0x90)) { int count; l += 2; count = data[l]; if(count == 0) { c = 0x90; blobAddData(u, &c, 1);
ffd59a3e	} else {
0d252351	#ifdef CL_DEBUG cli_dbgmsg("uncompress HQX7 at 0x%06x: %d repetitive bytes\n", l, count); #endif
ffd59a3e	blobGrow(u, count);
bc75e1d1	while(--count > 0) blobAddData(u, &c, 1);
ffd59a3e	}
a42dba7d	} }
3fbd1711	blobDestroy(tmp);
bc75e1d1	tmp = u; data = blobGetData(tmp); len = blobGetDataSize(tmp); cli_dbgmsg("Uncompressed %lu bytes to %lu\n", newlen, len); } else { len = newlen; cli_dbgmsg("HQX7 message (%lu bytes) is not compressed\n", len);
a42dba7d	}
dad64ecb	if(len == 0) { cli_warnmsg("Discarding empty binHex attachment\n");
e6b25cd3	(*destroy)(ret);
dad64ecb	blobDestroy(tmp); return NULL; }
a42dba7d	/*
bc75e1d1	* The blob tmp now contains the uncompressed data * of len bytes, i.e. the repetitive bytes have been removed / / * Parse the header *
a42dba7d	* TODO: set filename argument in message as well */ byte = data[0];
6afdc3ab	if(byte >= len) {
e6b25cd3	(*destroy)(ret);
6afdc3ab	blobDestroy(tmp); return NULL; }
a42dba7d	filename = cli_malloc(byte + 1);
bbf43447	if(filename == NULL) {
e6b25cd3	(*destroy)(ret);
bbf43447	blobDestroy(tmp); return NULL; }
bc75e1d1	memcpy(filename, &data[1], byte);
a42dba7d	filename[byte] = '\0';
e6b25cd3	(*setFilename)(ret, dir, filename);
dad64ecb	/ptr = cli_malloc(strlen(filename) + 6);/ ptr = cli_malloc(byte + 6);
bbf43447	if(ptr) { sprintf(ptr, "name=%s", filename); messageAddArgument(m, ptr); free(ptr); }
a42dba7d	/* * skip over length, filename, version, type, creator and flags */ byte = 1 + byte + 1 + 4 + 4 + 2;
83ec020f	/* * Set len to be the data fork length */
64b0fff6	dataforklen = ((data[byte] << 24) & 0xFF000000) \|
582808c3	((data[byte + 1] << 16) & 0xFF0000) \| ((data[byte + 2] << 8) & 0xFF00) \| (data[byte + 3] & 0xFF);
bc75e1d1
64b0fff6	resourceforklen = ((data[byte + 4] << 24) & 0xFF000000) \|
582808c3	((data[byte + 5] << 16) & 0xFF0000) \| ((data[byte + 6] << 8) & 0xFF00) \| (data[byte + 7] & 0xFF);
64b0fff6	cli_dbgmsg("Filename = '%s', data fork length = %lu, resource fork length = %lu bytes\n", filename, dataforklen, resourceforklen);
bc75e1d1	free((char *)filename);
a42dba7d	/* * Skip over data fork length, resource fork length and CRC */ byte += 10;
dad64ecb	l = blobGetDataSize(tmp) - byte;
db42f46e
64b0fff6	if(l < dataforklen) {
db42f46e	cli_warnmsg("Corrupt BinHex file, claims it is %lu bytes long in a message of %lu bytes\n",
64b0fff6	dataforklen, l); dataforklen = l;
db42f46e	}
55274fda	if(setCTX && m->ctx) (*setCTX)(ret, m->ctx);
64b0fff6	(*addData)(ret, &data[byte], dataforklen);
a42dba7d	blobDestroy(tmp);
fd969c26	if(destroy_text) m->binhex = NULL;
fef5ad63
fd969c26	if((m->numberOfEncTypes == 0) \|\| ((m->numberOfEncTypes == 1) && (m->encodingTypes[0] == BINHEX))) {
fef5ad63	cli_dbgmsg("Finished exporting binhex file\n"); return ret; }
0e3b08fc	} if(m->numberOfEncTypes == 0) { /* * Fast copy */
fd969c26	cli_dbgmsg("messageExport: Entering fast copy mode\n");
6a91c55b	filename = (char *)messageFindArgument(m, "filename");
b151ef55	if(filename == NULL) {
6a91c55b	filename = (char *)messageFindArgument(m, "name");
b151ef55	if(filename == NULL) {
c93e52c1	cli_dbgmsg("Unencoded attachment sent with no filename\n");
3b6eace4	messageAddArgument(m, "name=attachment");
0e3b08fc	} else
bbf43447	/* * Some virus attachments don't say how they've * been encoded. We assume base64 */ messageSetEncoding(m, "base64");
b151ef55	}
63f87938	(setFilename)(ret, dir, (filename && filename) ? filename : "attachment");
b151ef55
5eeffbb9	if(filename) free((char *)filename);
b151ef55
2add0ed7	if(m->numberOfEncTypes == 0)
985cc85e	return exportText(messageGetBody(m), ret, destroy_text);
b151ef55	}
55274fda	if(setCTX && m->ctx) (*setCTX)(ret, m->ctx);
0e3b08fc	for(i = 0; i < m->numberOfEncTypes; i++) { encoding_type enctype = m->encodingTypes[i];
d17de037	size_t size;
0e3b08fc
0d252351	if(i > 0) { void newret; newret = (create)(); if(newret == NULL) { cli_errmsg("Not all decoding algorithms were run\n"); return ret; } (*destroy)(ret); ret = newret; }
ad642304	cli_dbgmsg("messageExport: enctype %d is %d\n", i, enctype);
b151ef55	/*
0e3b08fc	* Find the filename to decode
b151ef55	*/
2add0ed7	if(((enctype == YENCODE) && yEncBegin(m)) \|\| ((i == 0) && yEncBegin(m))) {
00f95393	/* * TODO: handle multipart yEnc encoded files */ t_line = yEncBegin(m);
bb2432d7	filename = (char *)lineGetData(t_line->t_line);
00f95393	if((filename = strstr(filename, " name=")) != NULL) {
4db74788	filename = cli_strdup(&filename[6]);
00f95393	if(filename) { cli_chomp(filename); strstrip(filename); cli_dbgmsg("Set yEnc filename to \"%s\"\n", filename); }
5eeffbb9	}
00f95393
63f87938	(setFilename)(ret, dir, (filename && filename) ? filename : "attachment");
5eeffbb9	if(filename) { free((char *)filename); filename = NULL; }
00f95393	t_line = t_line->t_next; enctype = YENCODE;
74ca33e9	m->yenc = NULL;
0e3b08fc	} else {
2add0ed7	if(enctype == UUENCODE) { /* * The body will have been stripped out by the fast track visa * system. Treat as plain/text, which means we'll still scan * for funnies outside of the uuencoded portion. */
56896211	cli_dbgmsg("messageExport: treat uuencode as text/plain\n");
2add0ed7	enctype = m->encodingTypes[i] = NOENCODING; }
0e3b08fc	filename = (char )messageFindArgument(m, "filename"); if(filename == NULL) { filename = (char )messageFindArgument(m, "name"); if(filename == NULL) { cli_dbgmsg("Attachment sent with no filename\n"); messageAddArgument(m, "name=attachment"); } else if(enctype == NOENCODING) /*
24c897dc	* Some virus attachments don't say how * they've been encoded. We assume * base64.
2add0ed7	*
24c897dc	* FIXME: don't do this if it's a fall * through from uuencode
0e3b08fc	*/ messageSetEncoding(m, "base64"); }
63f87938	(setFilename)(ret, dir, (filename && filename) ? filename : "attachment");
0e3b08fc	t_line = messageGetBody(m); }
ad642304
00f95393	if(filename) free((char *)filename);
0e3b08fc	/*
c93e52c1	* t_line should now point to the first (encoded) line of the * message
0e3b08fc	/ if(t_line == NULL) { cli_warnmsg("Empty attachment not saved\n"); (destroy)(ret); return NULL; } if(enctype == NOENCODING) {
ce73653f	/*
0e3b08fc	* Fast copy
ce73653f	*/
985cc85e	if(i == m->numberOfEncTypes - 1) { /* last one */ (void)exportText(t_line, ret, destroy_text); break; } (void)exportText(t_line, ret, 0);
0e3b08fc	continue;
ce73653f	}
b151ef55
d17de037	size = 0;
0e3b08fc	do {
0d252351	unsigned char smallbuf[1024]; unsigned char uptr, data;
0e3b08fc	const char *line = lineGetData(t_line->t_line);
0d252351	unsigned char *bigbuf; size_t datasize;
b151ef55
2add0ed7	if(enctype == YENCODE) {
00f95393	if(line == NULL) continue;
5eeffbb9	if(strncmp(line, "=yend ", 6) == 0)
00f95393	break;
0e3b08fc	}
0d252351	/* * Add two bytes for '\n' and '\0' */ datasize = (line) ? strlen(line) + 2 : 0;
02c9dc2a
23e1c37c	if(datasize >= sizeof(smallbuf)) data = bigbuf = (unsigned char *)cli_malloc(datasize);
0d252351	else { bigbuf = NULL; data = smallbuf; datasize = sizeof(smallbuf); }
02c9dc2a
23e1c37c	uptr = decodeLine(m, enctype, line, data, datasize);
0d252351	if(uptr == NULL) { if(data == bigbuf) free(data); break; }
1e06e1ab
d17de037	if(uptr != data) {
af66c329	assert((size_t)(uptr - data) < datasize);
0e3b08fc	(*addData)(ret, data, (size_t)(uptr - data));
d17de037	size += (size_t)(uptr - data); }
02c9dc2a
0d252351	if(data == bigbuf) free(data);
0e3b08fc	/*
da850706	* According to RFC2045, '=' is used to pad out
0e3b08fc	* the last byte and should be used as evidence * of the end of the data. Some mail clients * annoyingly then put plain text after the '=' * byte and viruses exploit this bug. Sigh / /if(enctype == BASE64) if(strchr(line, '=')) break;*/
0e01c158	if(line && destroy_text && (i == m->numberOfEncTypes - 1)) { lineUnlink(t_line->t_line); t_line->t_line = NULL; }
0e3b08fc	} while((t_line = t_line->t_next) != NULL);
d17de037
8f465848	cli_dbgmsg("Exported %u bytes using enctype %d\n", size, enctype);
752c34b9
f98d4ab4	/* Verify we have nothing left to flush out / if(m->base64chars) { unsigned char data[4]; unsigned char ptr;
285a69b4
c8bc44d6	ptr = base64Flush(m, data);
f98d4ab4	if(ptr) (*addData)(ret, data, (size_t)(ptr - data)); }
285a69b4	}
e6b25cd3	return ret; }
c8bc44d6	unsigned char * base64Flush(message m, unsigned char buf) { cli_dbgmsg("%u trailing bytes to export\n", m->base64chars); if(m->base64chars) { unsigned char *ret = decode(m, NULL, buf, base64, FALSE); m->base64chars = 0; return ret; } return NULL; }
e6b25cd3	/* * Decode and transfer the contents of the message into a fileblob * The caller must free the returned fileblob / fileblob
985cc85e	messageToFileblob(message m, const char dir, int destroy)
e6b25cd3	{
985cc85e	fileblob *fb;
a446de17	cli_dbgmsg("messageToFileblob\n");
6fe0da47	fb = messageExport(m, dir, (void ()(void))fileblobCreate, (void()(void ))fileblobDestroy, (void()(void , const char , const char ))fileblobSetFilename, (void()(void , const unsigned char , size_t))fileblobAddData, (void ()(text , void , int))textToFileblob, (void()(void , cli_ctx ))fileblobSetCTX, destroy);
985cc85e	if(destroy && m->body_first) { textDestroy(m->body_first); m->body_first = m->body_last = NULL; } return fb;
e6b25cd3	} /*
aea1b159	* Decode and transfer the contents of the message into a closed blob
e6b25cd3	* The caller must free the returned blob / blob
985cc85e	messageToBlob(message *m, int destroy)
e6b25cd3	{
fd969c26	blob *b; cli_dbgmsg("messageToBlob\n"); b = messageExport(m, NULL,
6fe0da47	(void ()(void))blobCreate, (void()(void ))blobDestroy, (void()(void , const char , const char ))blobSetFilename, (void()(void , const unsigned char , size_t))blobAddData, (void ()(text , void , int))textToBlob, (void()(void , cli_ctx ))NULL, destroy);
985cc85e	if(destroy && m->body_first) { textDestroy(m->body_first); m->body_first = m->body_last = NULL; } return b;
b151ef55	} /* * Decode and transfer the contents of the message into a text area
ffd59a3e	* The caller must free the returned text
b151ef55	/ text
285a69b4	messageToText(message *m)
b151ef55	{
0e3b08fc	int i;
b151ef55	text first = NULL, last = NULL; const text *t_line; assert(m != NULL);
0e3b08fc	if(m->numberOfEncTypes == 0) {
b151ef55	/* * Fast copy */ for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) { if(first == NULL) first = last = cli_malloc(sizeof(text)); else { last->t_next = cli_malloc(sizeof(text)); last = last->t_next; }
de617e3e	if(last == NULL) {
285a69b4	if(first) textDestroy(first);
cea95096	return NULL; }
0b08b624	if(t_line->t_line) last->t_line = lineLink(t_line->t_line); else last->t_line = NULL; /* empty line */
b151ef55	}
0e3b08fc	if(last) last->t_next = NULL; return first; } /* * Scan over the data a number of times once for each claimed encoding * type */ for(i = 0; i < m->numberOfEncTypes; i++) { const encoding_type enctype = m->encodingTypes[i]; cli_dbgmsg("messageToText: export transfer method %d = %d\n", i, enctype);
eac601be	switch(enctype) { case NOENCODING: case BINARY: case EIGHTBIT: /* * Fast copy / for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) { if(first == NULL) first = last = cli_malloc(sizeof(text)); else { last->t_next = cli_malloc(sizeof(text)); last = last->t_next; } if(last == NULL) { if(first) { last->t_next = NULL; textDestroy(first); } return NULL; } if(t_line->t_line) last->t_line = lineLink(t_line->t_line); else last->t_line = NULL; / empty line */
0e3b08fc	}
eac601be	continue; case UUENCODE:
b01f527d	cli_errmsg("messageToText: Unexpected attempt to handle uuencoded file - report to http://bugs.clamav.net\n");
2add0ed7	if(first) { last->t_next = NULL; textDestroy(first);
0e3b08fc	}
2add0ed7	return NULL;
eac601be	case YENCODE: t_line = yEncBegin(m);
00f95393
eac601be	if(t_line == NULL) { /cli_warnmsg("YENCODED attachment is missing begin statement\n");/ if(first) { last->t_next = NULL; textDestroy(first); } return NULL;
15bfc2e4	}
eac601be	t_line = t_line->t_next; default: if((i == 0) && binhexBegin(m)) cli_warnmsg("Binhex messages not supported yet.\n"); t_line = messageGetBody(m);
a42dba7d	}
b151ef55
27a375f2	for(; t_line; t_line = t_line->t_next) { unsigned char data[1024]; unsigned char *uptr;
de617e3e	const char *line = lineGetData(t_line->t_line);
27a375f2
2add0ed7	if(enctype == BASE64)
285a69b4	/* * ignore blanks - breaks RFC which is * probably the point! */ if(line == NULL) continue;
752c34b9
0d252351	assert((line == NULL) \|\| (strlen(line) <= sizeof(data)));
0e3b08fc	uptr = decodeLine(m, enctype, line, data, sizeof(data));
b151ef55
27a375f2	if(uptr == NULL) break;
b151ef55
27a375f2	assert(uptr <= &data[sizeof(data)]); if(first == NULL) first = last = cli_malloc(sizeof(text)); else { last->t_next = cli_malloc(sizeof(text)); last = last->t_next; }
b151ef55
98685ac1	if(last == NULL)
bbf43447	break;
752c34b9
290ba18f	/* * If the decoded line is the same as the encoded * there's no need to take a copy, just link it. * Note that the comparison is done without the * trailing newline that the decoding routine may have * added - that's why there's a strncmp rather than a * strcmp - that'd be bad for MIME decoders, but is OK * for AV software */
5eeffbb9	if((data[0] == '\n') \|\| (data[0] == '\0')) last->t_line = NULL;
d16754aa	else if(line && (strncmp((const char *)data, line, strlen(line)) == 0)) {
74ca33e9	#ifdef CL_DEBUG
290ba18f	cli_dbgmsg("messageToText: decoded line is the same(%s)\n", data);
74ca33e9	#endif
290ba18f	last->t_line = lineLink(t_line->t_line); } else
5eeffbb9	last->t_line = lineCreate((char *)data);
98685ac1
0e3b08fc	if(line && enctype == BASE64)
752c34b9	if(strchr(line, '=')) break;
27a375f2	}
82348395	if(m->base64chars) { unsigned char data[4];
4b0e970e	memset(data, '\0', sizeof(data));
5eeffbb9	if(decode(m, NULL, data, base64, FALSE) && data[0]) {
82348395	if(first == NULL) first = last = cli_malloc(sizeof(text)); else { last->t_next = cli_malloc(sizeof(text)); last = last->t_next; } if(last != NULL)
5eeffbb9	last->t_line = lineCreate((char *)data);
82348395	} m->base64chars = 0; }
b151ef55	} if(last) last->t_next = NULL; return first; }
985cc85e	text * yEncBegin(message *m)
00f95393	{ return m->yenc; }
b151ef55	/*
a42dba7d	* Scan to find the BINHEX message (if any) */
f5a4d7e8	#if 0
985cc85e	const text * binhexBegin(message *m)
a42dba7d	{ const text *t_line; for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) if(strcasecmp(t_line->t_text, "(This file must be converted with BinHex 4.0)") == 0) return t_line; return NULL; }
f5a4d7e8	#else
985cc85e	text * binhexBegin(message *m)
f5a4d7e8	{ return m->binhex; } #endif
a42dba7d	/*
5a01973c	* Scan to find a bounce message. There is no standard for these, not * even a convention, so don't expect this to be foolproof */
f5a4d7e8	#if 0
985cc85e	text * bounceBegin(message *m)
5a01973c	{ const text *t_line;
bb5d6279
20fa2f53	for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next)
06d4e856	if(cli_filetype(t_line->t_text, strlen(t_line->t_text)) == CL_TYPE_MAIL)
20fa2f53	return t_line;
5a01973c	return NULL; }
f5a4d7e8	#else
985cc85e	text * bounceBegin(message *m)
f5a4d7e8	{ return m->bounce; } #endif /* * If a message doesn't not contain another message which could be harmful * it is deemed to be safe. * * TODO: ensure nothing can get through this * * TODO: check to see if we need to * find anything else, perhaps anything * from the RFC821 table? / #if 0 int messageIsAllText(const message m) { const text *t; for(t = messageGetBody(m); t; t = t->t_next) if(strncasecmp(t->t_text, "Content-Transfer-Encoding", strlen("Content-Transfer-Encoding")) == 0) return 0; return 1; } #else
985cc85e	text * encodingLine(message *m)
f5a4d7e8	{
627465e7	return m->encoding;
f5a4d7e8	} #endif
5a01973c
a446de17	void messageClearMarkers(message *m) {
2add0ed7	m->encoding = m->bounce = m->binhex = NULL;
a446de17	}
5a01973c	/*
b151ef55	* Decode a line and add it to a buffer, return the end of the buffer
27a375f2	* to help appending callers. There is no new line at the end of "line"
eaacc2de	* * len is sizeof(ptr)
b151ef55	*/
d1a6ea81	unsigned char *
0e3b08fc	decodeLine(message m, encoding_type et, const char line, unsigned char *buf, size_t buflen)
b151ef55	{
2a3ceff4	size_t len, reallen;
bf8ea488	bool softbreak;
23e1c37c	char p2, copy;
53ee0b60	char base64buf[RFC2045LENGTH + 1];
b151ef55
15bfc2e4	/cli_dbgmsg("decodeLine(et = %d buflen = %u)\n", (int)et, buflen);/
0d252351
b151ef55	assert(m != NULL);
eaacc2de	assert(buf != NULL);
b151ef55
0e3b08fc	switch(et) {
ee576466	case BINARY: /* * TODO: find out what this is, encoded as binary?? / / fall through */
b151ef55	case NOENCODING: case EIGHTBIT:
c6259ac5	default: /* unknown encoding type - try our best */
963e073f	if(line) /* empty line? */
abac42dd	buf = (unsigned char )cli_strrcpy((char )buf, line);
b151ef55	/* Put the new line back in */
abac42dd	return (unsigned char )cli_strrcpy((char )buf, "\n");
b151ef55	case QUOTEDPRINTABLE:
98685ac1	if(line == NULL) { /* empty line / buf++ = '\n'; break; }
285a69b4
da850706	softbreak = FALSE;
23e1c37c	while(buflen && *line) {
da850706	if(line == '=') { unsigned char byte; if((++line == '\0') \|\| (line == '\n')) { softbreak = TRUE; / soft line break / break; } byte = hex(line); if((++line == '\0') \|\| (line == '\n')) { /* * broken e-mail, not * adhering to RFC2045 / buf++ = byte; break; }
582808c3	/* * Fix by Torok Edvin * <edwintorok@gmail.com> * Handle messages that use a broken * quoted-printable encoding of * href=\"http://, instead of =3D / if(byte != '=') { byte <<= 4; byte += hex(line); } else line -= 2;
da850706	buf++ = byte; } else buf++ = *line;
23e1c37c	++line; --buflen;
da850706	}
bf8ea488	if(!softbreak) /* Put the new line back in */
eaacc2de	*buf++ = '\n';
b151ef55	break; case BASE64:
98685ac1	if(line == NULL) break;
752c34b9	/*
da850706	* RFC2045 sets the maximum length to 76 bytes
752c34b9	* but many e-mail clients ignore that */
53ee0b60	if(strlen(line) < sizeof(base64buf)) { strcpy(base64buf, line); copy = base64buf; } else {
4db74788	copy = cli_strdup(line);
53ee0b60	if(copy == NULL) break; }
bbf43447
752c34b9	p2 = strchr(copy, '=');
b151ef55	if(p2) *p2 = '\0';
285a69b4
d17de037	sanitiseBase64(copy);
b151ef55	/* * Klez doesn't always put "=" on the last line */
285a69b4	buf = decode(m, copy, buf, base64, (p2 == NULL) && ((strlen(copy) & 3) == 0));
752c34b9
53ee0b60	if(copy != base64buf) free(copy);
b151ef55	break; case UUENCODE:
b3a5cdd8	assert(m->base64chars == 0);
98685ac1	if((line == NULL) \|\| (line == '\0')) / empty line */
3c52fb18	break;
b151ef55	if(strcasecmp(line, "end") == 0) break;
64ff0d49	if(isuuencodebegin(line)) break;
b151ef55	if((line[0] & 0x3F) == ' ') break;
af66c329	/* * reallen contains the number of bytes that were * encoded */
2a3ceff4	reallen = (size_t)uudecode(*line++);
af66c329	if(reallen <= 0) break; if(reallen > 62)
2a3ceff4	break; len = strlen(line);
b151ef55
af66c329	if((len > buflen) \|\| (reallen > len))
eaacc2de	/* * In practice this should never occur since * the maximum length of a uuencoded line is * 62 characters */
8dc9ee9e	cli_warnmsg("uudecode: buffer overflow stopped, attempting to ignore but decoding may fail\n");
2a3ceff4	else { (void)decode(m, line, buf, uudecode, (len & 3) == 0); buf = &buf[reallen]; }
b3a5cdd8	m->base64chars = 0; /* this happens with broken uuencoded files */
b151ef55	break;
00f95393	case YENCODE: if((line == NULL) \|\| (line == '\0')) / empty line / break; if(strncmp(line, "=yend ", 6) == 0) break; while(line) if(line == '=') { if(++line == '\0') break; buf++ = ((line++ - 64) & 255); } else buf++ = ((line++ - 42) & 255); break;
b151ef55	}
eaacc2de	*buf = '\0'; return buf;
b151ef55	}
6ba88eb8	/*
fb405afc	* Remove the non base64 characters such as spaces from a string. Spaces * shouldn't appear mid string in base64 files, but some broken mail clients * ignore such errors rather than discarding the mail, and virus writers * exploit this bug
285a69b4	*/ static void
fb405afc	sanitiseBase64(char *s)
285a69b4	{
15bfc2e4	/cli_dbgmsg("sanitiseBase64 '%s'\n", s);/
fb405afc	for(; *s; s++)
15bfc2e4	if(base64Table[(unsigned int)(*s & 0xFF)] == 255) {
fb405afc	char *p1;
e982ca83
fb405afc	for(p1 = s; p1[0] != '\0'; p1++) p1[0] = p1[1];
811e3356	--s;
fb405afc	}
285a69b4	} /*
6ba88eb8	* Returns one byte after the end of the decoded data in "out"
285a69b4	* * Update m->base64chars with the last few bytes of data that we haven't * decoded. After the last line is found, decode will be called with in = NULL * to flush these out
6ba88eb8	*/
b151ef55	static unsigned char *
285a69b4	decode(message m, const char in, unsigned char out, unsigned char (decoder)(char), bool isFast) { unsigned char b1, b2, b3, b4; unsigned char cb1, cb2, cb3; /* carried over from last line */
15bfc2e4	/*cli_dbgmsg("decode %s (len %d isFast %d base64chars %d)\n", in,
285a69b4	in ? strlen(in) : 0,
f0146bc6	isFast, m->base64chars);*/
285a69b4	cb1 = cb2 = cb3 = '\0'; switch(m->base64chars) { case 3: cb3 = m->base64_3; /* FALLTHROUGH / case 2: cb2 = m->base64_2; / FALLTHROUGH / case 1: cb1 = m->base64_1; isFast = FALSE; break; default: assert(m->base64chars <= 3); } if(isFast) / Fast decoding if not last line / while(in) { b1 = (decoder)(in++); b2 = (decoder)(in++); b3 = (decoder)(in++); /* * Put this line here to help on some compilers which * can make use of some architecure's ability to * multiprocess when different variables can be * updated at the same time - here b3 is used in * one line, b1/b2 in the next and b4 in the next after * that, b3 and b4 rely on in but b1/b2 don't / out++ = (b1 << 2) \| ((b2 >> 4) & 0x3); b4 = (decoder)(in++); out++ = (b2 << 4) \| ((b3 >> 2) & 0xF); out++ = (b3 << 6) \| (b4 & 0x3F); }
0d252351	else if(in == NULL) { /* flush */ int nbytes; if(m->base64chars == 0) return out;
285a69b4
0d252351	cli_dbgmsg("base64chars = %d (%c %c %c)\n", m->base64chars,
87901cab	isalnum(cb1) ? cb1 : '@', isalnum(cb2) ? cb2 : '@', isalnum(cb3) ? cb3 : '@');
285a69b4
0d252351	m->base64chars--; b1 = cb1; nbytes = 1;
d17de037
0d252351	if(m->base64chars) {
285a69b4	m->base64chars--;
0d252351	b2 = cb2;
285a69b4	if(m->base64chars) {
c8e1ad63	nbytes = 2;
285a69b4	m->base64chars--;
0d252351	b3 = cb3;
362fe28f	nbytes = 3;
0d252351	} else if(b2)
c8e1ad63	nbytes = 2;
0d252351	}
285a69b4
0d252351	switch(nbytes) { case 3: b4 = '\0'; /* fall through / case 4: out++ = (b1 << 2) \| ((b2 >> 4) & 0x3); *out++ = (b2 << 4) \| ((b3 >> 2) & 0xF);
70b54406	if((nbytes == 4) \|\| b3) *out++ = (b3 << 6) \| (b4 & 0x3F);
0d252351	break; case 2: *out++ = (b1 << 2) \| ((b2 >> 4) & 0x3);
c8e1ad63	if((b2 << 4) & 0xFF) *out++ = b2 << 4;
0d252351	break; case 1: out++ = b1 << 2; break; default: assert(0); } } else while(in) { int nbytes;
285a69b4
0d252351	if(m->base64chars) { m->base64chars--; b1 = cb1; } else b1 = (decoder)(in++);
285a69b4
0d252351	if(*in == '\0') { b2 = '\0'; nbytes = 1; } else {
285a69b4	if(m->base64chars) { m->base64chars--;
0d252351	b2 = cb2;
285a69b4	} else
0d252351	b2 = (decoder)(in++);
285a69b4	if(*in == '\0') {
0d252351	b3 = '\0'; nbytes = 2;
285a69b4	} else { if(m->base64chars) { m->base64chars--;
0d252351	b3 = cb3;
285a69b4	} else
0d252351	b3 = (decoder)(in++);
285a69b4	if(*in == '\0') {
0d252351	b4 = '\0'; nbytes = 3;
285a69b4	} else {
0d252351	b4 = (decoder)(in++); nbytes = 4;
285a69b4	} }
0d252351	}
285a69b4
0d252351	switch(nbytes) { case 3: m->base64_3 = b3; case 2: m->base64_2 = b2; case 1: m->base64_1 = b1;
285a69b4	break;
0d252351	case 4: out++ = (b1 << 2) \| ((b2 >> 4) & 0x3); out++ = (b2 << 4) \| ((b3 >> 2) & 0xF); *out++ = (b3 << 6) \| (b4 & 0x3F); break; default: assert(0); } if(nbytes != 4) { m->base64chars = nbytes; break;
285a69b4	} } return out; }
b151ef55	static unsigned char hex(char c) { if(isdigit(c)) return c - '0'; if((c >= 'A') && (c <= 'F')) return c - 'A' + 10;
e66e8982	if((c >= 'a') && (c <= 'f')) return c - 'a' + 10; cli_dbgmsg("Illegal hex character '%c'\n", c);
b151ef55	/*
da850706	* Some mails (notably some spam) break RFC2045 by failing to encode
b151ef55	* the '=' character */ return '='; }
5ae253d2	static unsigned char base64(char c) {
15bfc2e4	const unsigned char ret = base64Table[(unsigned int)(c & 0xFF)];
5ae253d2	if(ret == 255) {
0d252351	/cli_dbgmsg("Illegal character <%c> in base64 encoding\n", c);/
5ae253d2	return 63; } return ret; }
b151ef55	static unsigned char uudecode(char c) {
b329234a	return c - ' ';
b151ef55	}
b4cb4486	/* * These are the only arguments we're interested in. * Do 'fgrep messageFindArgument .c' if you don't believe me! It's probably not good doing this since each time a new * messageFindArgument is added I need to remember to look here, * but it can save a lot of memory... / static int usefulArg(const char arg) { if((strncasecmp(arg, "name", 4) != 0) && (strncasecmp(arg, "filename", 8) != 0) && (strncasecmp(arg, "boundary", 8) != 0) &&
b62a19da	(strncasecmp(arg, "protocol", 8) != 0) &&
9a7398ee	(strncasecmp(arg, "id", 2) != 0) && (strncasecmp(arg, "number", 6) != 0) && (strncasecmp(arg, "total", 5) != 0) &&
b4cb4486	(strncasecmp(arg, "type", 4) != 0)) { cli_dbgmsg("Discarding unwanted argument '%s'\n", arg); return 0; } return 1; }
e24738dc
b65d2aad	void messageSetCTX(message m, cli_ctx ctx) { m->ctx = ctx; } int messageContainsVirus(const message *m) { return m->isInfected ? TRUE : FALSE; }
e24738dc	/* * We've run out of memory. Try to recover some by * deduping the message
9a69a785	* * FIXME: this can take a long time. The real solution is for system admins * to refrain from setting ulimits too low, then this routine won't be * called
e24738dc	/ static void messageDedup(message m) { const text *t1; size_t saved = 0;
d16754aa	cli_dbgmsg("messageDedup\n");
e24738dc	t1 = m->dedupedThisFar ? m->dedupedThisFar : m->body_first; for(t1 = m->body_first; t1; t1 = t1->t_next) { const char d1; text t2; line_t l1; unsigned int r1; if(saved >= 1001000) break; /* that's enough / l1 = t1->t_line; if(l1 == NULL) continue; d1 = lineGetData(l1); if(strlen(d1) < 8) continue; / wouldn't recover many bytes */
d16754aa
e24738dc	r1 = (unsigned int)lineGetRefCount(l1); if(r1 == 255) continue; /* * We don't want to foul up any pointers / if(t1 == m->encoding) continue; if(t1 == m->bounce) continue; if(t1 == m->binhex) continue; if(t1 == m->yenc) continue; for(t2 = t1->t_next; t2; t2 = t2->t_next) { const char d2; line_t l2 = t2->t_line; if(l2 == NULL) continue; d2 = lineGetData(l2); if(d1 == d2) / already linked */ continue; if(strcmp(d1, d2) == 0) { if(lineUnlink(l2) == NULL)
d16754aa	saved += strlen(d1) + 1;
e24738dc	t2->t_line = lineLink(l1); if(t2->t_line == NULL) { cli_errmsg("messageDedup: out of memory\n"); return; }
d16754aa	if(++r1 == 255) break;
e24738dc	} } }
d16754aa	cli_dbgmsg("messageDedup reclaimed %u bytes\n", saved);
e24738dc	m->dedupedThisFar = t1; }
b329234a	/*
5e5a162c	* Handle RFC2231 encoding. Returns a malloc'd buffer that the caller must * free, or NULL on error. * * TODO: Currently only handles paragraph 4 of RFC2231 e.g. * protocol=ansi-x3.4-1968''application%2Fpgp-signature; / static char * rfc2231(const char *in) {
802c37fc	const char ptr; char ret, *out;
49dff330	enum { LANGUAGE, CHARSET, CONTENTS } field;
5e5a162c
49dff330	if(strstr(in, "0=") != NULL) { cli_warnmsg("RFC2231 parameter continuations are not yet handled\n");
4db74788	return cli_strdup(in);
49dff330	} ptr = strstr(in, "0="); if(ptr != NULL) / * Parameter continuation, with no continuation * Thunderbird 1.5 (and possibly other versions) does this / field = CONTENTS; else { ptr = strstr(in, "="); field = LANGUAGE; }
5e5a162c	if(ptr == NULL) /* quick return */
4db74788	return cli_strdup(in);
5e5a162c	cli_dbgmsg("rfc2231 '%s'\n", in); ret = cli_malloc(strlen(in) + 1); if(ret == NULL) return NULL;
49dff330	/* * memcpy(out, in, (ptr - in)); * out = &out[ptr - in]; * in = ptr; */
51f308f2	out = ret; while(in != ptr) out++ = in++;
5e5a162c	*out++ = '=';
49dff330	while(*ptr++ != '=') ;
5e5a162c	/* * We don't do anything with the language and character set, just skip * over them! */
49dff330	while(*ptr) {
5e5a162c	switch(field) { case LANGUAGE:
49dff330	if(*ptr == '\'')
5e5a162c	field = CHARSET; break; case CHARSET:
49dff330	if(*ptr == '\'')
5e5a162c	field = CONTENTS; break; case CONTENTS:
49dff330	if(*ptr == '%') {
5e5a162c	unsigned char byte;
49dff330	if((++ptr == '\0') \|\| (ptr == '\n'))
5e5a162c	break;
49dff330	byte = hex(*ptr);
5e5a162c
49dff330	if((++ptr == '\0') \|\| (ptr == '\n')) {
5e5a162c	*out++ = byte; break; } byte <<= 4;
49dff330	byte += hex(*ptr);
5e5a162c	*out++ = byte; } else
49dff330	out++ = ptr;
5e5a162c	}
49dff330	if(*ptr++ == '\0')
fe6ce0ba	/* * Incorrect message that has just one character after * a '%'. * FIXME: stash something in out that would, for example * treat %2 as %02, assuming field == CONTENTS */
abaac091	break;
5e5a162c	} if(field != CONTENTS) { free(ret);
802c37fc	cli_warnmsg("Invalid RFC2231 header: '%s'\n", in);
4db74788	return cli_strdup("");
5e5a162c	}
802c37fc
5e5a162c	out = '\0'; cli_dbgmsg("rfc2231 returns '%s'\n", ret); return ret; } /
b329234a	* common/simil: * From Computing Magazine 20/8/92 * Returns %ge number from 0 to 100 - how similar are 2 strings? * 100 for exact match, < for error / struct pstr_list { / internal stack / char d1; struct pstr_list next; }; #define OUT_OF_MEMORY (-2) #define FAILURE (-3) #define SUCCESS (-4) #define ARRAY_OVERFLOW (-5) typedef struct pstr_list ELEMENT1; typedef ELEMENT1 LINK1; static int push(LINK1 top, const char string); static int pop(LINK1 top, char buffer); static unsigned int compare(char ls1, char rs1, char ls2, char **rs2);
4bdd7a93	#define MAX_PATTERN_SIZ 50 /* maximum string lengths */
b329234a	static int simil(const char str1, const char str2) { LINK1 top = NULL; unsigned int score = 0;
40d54f7f	size_t common, total; size_t len1, len2;
b329234a	char rs1 = NULL, rs2 = NULL; char s1, s2;
4db74788	char ls1[MAX_PATTERN_SIZ], ls2[MAX_PATTERN_SIZ];
b329234a	if(strcasecmp(str1, str2) == 0) return 100;
4db74788	if((s1 = cli_strdup(str1)) == NULL)
b329234a	return OUT_OF_MEMORY;
4db74788	if((s2 = cli_strdup(str2)) == NULL) {
b329234a	free(s1); return OUT_OF_MEMORY; } if(((total = strstrip(s1)) > MAX_PATTERN_SIZ - 1) \|\| ((len2 = strstrip(s2)) > MAX_PATTERN_SIZ - 1)) { free(s1); free(s2); return ARRAY_OVERFLOW; } total += len2;
63f87938	if((push(&top, s1) == OUT_OF_MEMORY) \|\| (push(&top, s2) == OUT_OF_MEMORY)) { free(s1); free(s2);
b329234a	return OUT_OF_MEMORY;
63f87938	}
b329234a	while(pop(&top, ls2) == SUCCESS) { pop(&top, ls1); common = compare(ls1, &rs1, ls2, &rs2); if(common > 0) {
40d54f7f	score += (unsigned int)common;
b329234a	len1 = strlen(ls1); len2 = strlen(ls2); if((len1 > 1 && len2 >= 1) \|\| (len2 > 1 && len1 >= 1)) if((push(&top, ls1) == OUT_OF_MEMORY) \|\| (push(&top, ls2) == OUT_OF_MEMORY)) { free(s1); free(s2); return OUT_OF_MEMORY; } len1 = strlen(rs1); len2 = strlen(rs2); if((len1 > 1 && len2 >= 1) \|\| (len2 > 1 && len1 >= 1)) if((push(&top, rs1) == OUT_OF_MEMORY) \|\| (push(&top, rs2) == OUT_OF_MEMORY)) { free(s1); free(s2); return OUT_OF_MEMORY; } } } free(s1); free(s2); return (total > 0) ? ((score * 200) / total) : 0; } static unsigned int compare(char ls1, char rs1, char ls2, char **rs2) {
4db74788	unsigned int common, maxchars = 0;
b329234a	bool some_similarity = FALSE; char s1, s2; char maxs1 = NULL, maxs2 = NULL, maxe1 = NULL, maxe2 = NULL; char cs1, cs2, start1, end1, end2; end1 = ls1 + strlen(ls1); end2 = ls2 + strlen(ls2); start1 = ls1; for(;;) { s1 = start1; s2 = ls2; if(s1 < end1) { while(s1 < end1 && s2 < end2) { if(tolower(s1) == tolower(s2)) { some_similarity = TRUE; cs1 = s1; cs2 = s2; common = 0; do if(s1 == end1 \|\| s2 == end2) break; else { s1++; s2++; common++; } while(tolower(s1) == tolower(*s2)); if(common > maxchars) {
4db74788	unsigned int diff = common - maxchars;
b329234a	maxchars = common; maxs1 = cs1; maxs2 = cs2; maxe1 = s1; maxe2 = s2; end1 -= diff; end2 -= diff; } else s1 -= common; } else s2++; } start1++; } else break; } if(some_similarity) { maxs1 = '\0'; maxs2 = '\0'; rs1 = maxe1; rs2 = maxe2; } return maxchars; } static int push(LINK1 top, const char string) { LINK1 element; if((element = (LINK1)cli_malloc(sizeof(ELEMENT1))) == NULL) return OUT_OF_MEMORY;
4db74788	if((element->d1 = cli_strdup(string)) == NULL)
b329234a	return OUT_OF_MEMORY; element->next = top; top = element; return SUCCESS; } static int pop(LINK1 top, char buffer) { LINK1 t1; if((t1 = top) != NULL) { (void)strcpy(buffer, t1->d1); top = t1->next; free(t1->d1); free((char *)t1); return SUCCESS; } return FAILURE; }
64ff0d49	/* * Have we found a line that is a start of a uuencoded file (see uuencode(5))? / int isuuencodebegin(const char line) {
2add0ed7	if(line[0] != 'b') /* quick check */ return 0;
64ff0d49	if(strlen(line) < 10) return 0; return (strncasecmp(line, "begin ", 6) == 0) && isdigit(line[6]) && isdigit(line[7]) && isdigit(line[8]) && (line[9] == ' '); }