GitList

libclamav/message.c

e3aaff8e	/*
e1cbc270	* Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * Copyright (C) 2007-2013 Sourcefire, Inc.
2023340a	* * Authors: Nigel Horne
e3aaff8e	* * This program is free software; you can redistribute it and/or modify
2023340a	* it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation.
e3aaff8e	* * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software
48b7b4a7	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA.
73ddf91f	* * TODO: Optimise messageExport, decodeLine, messageIsEncoding
e3aaff8e	*/
6d6e8271	#if HAVE_CONFIG_H #include "clamav-config.h" #endif
e3aaff8e	#ifdef CL_THREAD_SAFE
288057e9	#ifndef _REENTRANT #define _REENTRANT /* for Solaris 2.8 */
e3aaff8e	#endif
406b1800	#endif
e3aaff8e
288057e9	#ifdef C_DARWIN
e3aaff8e	#include <sys/types.h> #endif #include <stdlib.h> #include <string.h>
288057e9	#ifdef HAVE_STRINGS_H
e3aaff8e	#include <strings.h>
bc6bbeff	#endif
e3aaff8e	#include <assert.h> #include <ctype.h> #include <stdio.h>
288057e9	#ifdef CL_THREAD_SAFE
e2875303	#include <pthread.h> #endif
0f7f7682	#include "others.h" #include "str.h" #include "filetypes.h"
e3aaff8e	#include "mbox.h"
60d8d2c3	#include "clamav.h"
e83019ae	#include "json_api.h"
e3aaff8e
2d773a31	#ifndef isblank
288057e9	#define isblank(c) (((c) == ' ') \|\| ((c) == '\t'))
2d773a31	#endif
288057e9	#define RFC2045LENGTH 76 /* maximum number of characters on a line */
f003b79e
288057e9	#ifdef HAVE_STDBOOL_H
edee0700	#include <stdbool.h> #else
288057e9	#ifdef FALSE typedef unsigned char bool;
edee0700	#else
288057e9	typedef enum { FALSE = 0, TRUE = 1 } bool;
edee0700	#endif #endif
e3aaff8e
288057e9	static int messageHasArgument(const message m, const char variable); static void messageIsEncoding(message *m);
0c0894b8	static unsigned char decode(message m, const char in, unsigned char out, unsigned char (*decoder)(char), bool isFast);
288057e9	static void sanitiseBase64(char *s); #ifdef __GNUC__ static unsigned char hex(char c) __attribute__((const)); static unsigned char base64(char c) __attribute__((const)); static unsigned char uudecode(char c) __attribute__((const));
a765e132	#else
288057e9	static unsigned char hex(char c); static unsigned char base64(char c); static unsigned char uudecode(char c);
a765e132	#endif
288057e9	static const char messageGetArgument(const message m, int arg); static void messageExport(message m, const char dir, void (create)(void), void (destroy)(void ), void (setFilename)(void , const char , const char ), void (addData)(void , const unsigned char , size_t), void (exportText)(text , void , int), void (setCTX)(void , cli_ctx ), int destroy_text); static int usefulArg(const char arg); static void messageDedup(message m); static char rfc2231(const char in); static int simil(const char str1, const char *str2);
e3aaff8e	/*
7cd9337a	* These maps are ordered in decreasing likelihood of their appearance
c7b69776	* in an e-mail. Probably these should be in a table...
e3aaff8e	*/
288057e9	static const struct encoding_map { const char string; encoding_type type; } encoding_map[] = {/ rfc2045 / {"7bit", NOENCODING}, {"text/plain", NOENCODING}, {"quoted-printable", QUOTEDPRINTABLE}, / rfc2045 / {"base64", BASE64}, / rfc2045 / {"8bit", EIGHTBIT}, {"binary", BINARY}, {"x-uuencode", UUENCODE}, / uuencode(5) / {"x-yencode", YENCODE}, {"x-binhex", BINHEX}, {"us-ascii", NOENCODING}, / incorrect / {"x-uue", UUENCODE}, / incorrect / {"uuencode", UUENCODE}, / incorrect / {NULL, NOENCODING}}; static const struct mime_map { const char string; mime_type type;
e3aaff8e	} mime_map[] = {
288057e9	{"text", TEXT}, {"multipart", MULTIPART}, {"application", APPLICATION}, {"audio", AUDIO}, {"image", IMAGE}, {"message", MESSAGE}, {"video", VIDEO}, {NULL, TEXT}};
e3aaff8e
6e2ba331	/* * See RFC2045, section 6.8, table 1 */
32c9b306	static const unsigned char base64Table[256] = {
288057e9	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 0, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255, 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255};
621a667a
e3aaff8e	message * messageCreate(void) {
288057e9	message m = (message )cli_calloc(1, sizeof(message));
e3aaff8e
288057e9	if (m) m->mimeType = NOMIME;
e3aaff8e
288057e9	return m;
e3aaff8e	}
288057e9	void messageDestroy(message *m)
e3aaff8e	{
288057e9	assert(m != NULL);
767f16ab
288057e9	messageReset(m);
e3aaff8e
288057e9	free(m);
e3aaff8e	}
288057e9	void messageReset(message *m)
e3aaff8e	{
288057e9	int i;
e3aaff8e
288057e9	assert(m != NULL);
e3aaff8e
288057e9	if (m->mimeSubtype) free(m->mimeSubtype);
e3aaff8e
288057e9	if (m->mimeDispositionType) free(m->mimeDispositionType);
e3aaff8e
288057e9	if (m->mimeArguments) { for (i = 0; i < m->numberOfArguments; i++) free(m->mimeArguments[i]); free(m->mimeArguments); }
e3aaff8e
288057e9	if (m->body_first) textDestroy(m->body_first);
e3aaff8e
288057e9	assert(m->base64chars == 0);
0c0894b8
288057e9	if (m->encodingTypes) { assert(m->numberOfEncTypes > 0); free(m->encodingTypes); }
1f4d8d3e
e83019ae	#if HAVE_JSON
288057e9	if (m->jobj) cli_json_delobj(m->jobj);
e83019ae	#endif
288057e9	memset(m, '\0', sizeof(message)); m->mimeType = NOMIME;
e3aaff8e	}
0960ff5e	/*
83e42783	* Handle the Content-Type header. The syntax is in RFC1341.
53bfac08	* Return success (1) or failure (0). Failure only happens when it's an * unknown type and we've already received a known type, or we've received an * empty type. If we receive an unknown type by itself we default to application
0960ff5e	*/
288057e9	int messageSetMimeType(message mess, const char type)
e3aaff8e	{
288057e9	#ifdef CL_THREAD_SAFE static pthread_mutex_t mime_mutex = PTHREAD_MUTEX_INITIALIZER;
e2875303	#endif
288057e9	const struct mime_map m; int typeval; static table_t mime_table; assert(mess != NULL); if (type == NULL) { cli_dbgmsg("Empty content-type field\n"); return 0; }
e3aaff8e
288057e9	cli_dbgmsg("messageSetMimeType: '%s'\n", type);
e3aaff8e
288057e9	/* Ignore leading spaces / while (!isalpha(type)) if (*type++ == '\0') return 0;
e3aaff8e
288057e9	#ifdef CL_THREAD_SAFE pthread_mutex_lock(&mime_mutex);
e2875303	#endif
288057e9	if (mime_table == NULL) { mime_table = tableCreate(); if (mime_table == NULL) { #ifdef CL_THREAD_SAFE pthread_mutex_unlock(&mime_mutex);
e2875303	#endif
288057e9	return 0; } for (m = mime_map; m->string; m++) if (!tableInsert(mime_table, m->string, m->type)) { tableDestroy(mime_table); mime_table = NULL; #ifdef CL_THREAD_SAFE pthread_mutex_unlock(&mime_mutex);
e2875303	#endif
288057e9	return 0; } } #ifdef CL_THREAD_SAFE pthread_mutex_unlock(&mime_mutex);
e2875303	#endif
9425e7ce
288057e9	typeval = tableFind(mime_table, type);
9425e7ce
288057e9	if (typeval != -1) { mess->mimeType = (mime_type)typeval; return 1; } if (mess->mimeType == NOMIME) { if (strncasecmp(type, "x-", 2) == 0) mess->mimeType = MEXTENSION; else { /*
0ae75a8d	* Force scanning of strange messages */
288057e9	if (strcasecmp(type, "plain") == 0) { cli_dbgmsg("Incorrect MIME type: `plain', set to Text\n"); mess->mimeType = TEXT; } else { /*
0356cdc0	* Don't handle broken e-mail probably sending * Content-Type: plain/text * instead of * Content-Type: text/plain * as an attachment */
288057e9	int highestSimil = 0, t = -1; const char *closest = NULL; for (m = mime_map; m->string; m++) { const int s = simil(m->string, type); if (s > highestSimil) { highestSimil = s; closest = m->string; t = m->type; } } if (highestSimil >= 50) { cli_dbgmsg("Unknown MIME type \"%s\" - guessing as %s (%d%% certainty)\n", type, closest, highestSimil); mess->mimeType = (mime_type)t; } else { cli_dbgmsg("Unknown MIME type: `%s', set to Application - if you believe this file contains a virus, submit it to www.clamav.net\n", type); mess->mimeType = APPLICATION; } } } return 1; } return 0;
e3aaff8e	} mime_type messageGetMimeType(const message *m) {
288057e9	assert(m != NULL);
767f16ab
288057e9	return m->mimeType;
e3aaff8e	}
288057e9	void messageSetMimeSubtype(message m, const char subtype)
e3aaff8e	{
288057e9	assert(m != NULL);
68361484
288057e9	if (subtype == NULL) { /*
68361484	* Handle broken content-type lines, e.g. * Content-Type: text/ */
288057e9	cli_dbgmsg("Empty content subtype\n"); subtype = ""; }
e3aaff8e
288057e9	if (m->mimeSubtype) free(m->mimeSubtype);
e3aaff8e
288057e9	m->mimeSubtype = cli_strdup(subtype);
e3aaff8e	} const char * messageGetMimeSubtype(const message *m) {
288057e9	return (m->mimeSubtype) ? m->mimeSubtype : "";
e3aaff8e	}
288057e9	void messageSetDispositionType(message m, const char disptype)
e3aaff8e	{
288057e9	assert(m != NULL);
e3aaff8e
288057e9	if (m->mimeDispositionType) free(m->mimeDispositionType); if (disptype == NULL) { m->mimeDispositionType = NULL; return; }
f0627588
288057e9	/*
86c4e9d5	* It's broken for there to be an entry such as "Content-Disposition:" * However some spam and viruses are rather broken, it's a sign * that something is wrong if we get that - maybe we should force a * scan of this part */
288057e9	while (disptype && isspace((int)disptype)) disptype++; if (*disptype) { m->mimeDispositionType = cli_strdup(disptype); if (m->mimeDispositionType) strstrip(m->mimeDispositionType); } else m->mimeDispositionType = NULL;
e3aaff8e	} const char * messageGetDispositionType(const message *m) {
288057e9	return (m->mimeDispositionType) ? m->mimeDispositionType : "";
e3aaff8e	} /* * TODO: * Arguments are held on a per message basis, they should be held on * a per section basis. Otherwise what happens if two sections have two * different values for charset? Probably doesn't matter for the use this * code will be given, but will need fixing if this code is used elsewhere */
288057e9	void messageAddArgument(message m, const char arg)
e3aaff8e	{
288057e9	int offset; char *p;
e3aaff8e
288057e9	assert(m != NULL);
e3aaff8e
288057e9	if (arg == NULL) return; /* Note: this is not an error condition */
e3aaff8e
288057e9	while (isspace(*arg)) arg++;
e3aaff8e
288057e9	if (arg == '\0') / Empty argument? Probably a broken mail client... */ return;
e3aaff8e
288057e9	cli_dbgmsg("messageAddArgument, arg='%s'\n", arg);
9f5f1b1a
288057e9	if (!usefulArg(arg)) return;
963c6ae7
288057e9	for (offset = 0; offset < m->numberOfArguments; offset++) if (m->mimeArguments[offset] == NULL) break; else if (strcasecmp(arg, m->mimeArguments[offset]) == 0) return; /* already in there */
e3aaff8e
288057e9	if (offset == m->numberOfArguments) { char **q;
843e1da6
288057e9	m->numberOfArguments++; q = (char *)cli_realloc(m->mimeArguments, m->numberOfArguments sizeof(char *)); if (q == NULL) { m->numberOfArguments--; return; } m->mimeArguments = q; }
e3aaff8e
288057e9	p = m->mimeArguments[offset] = rfc2231(arg); if (!p) { /* problem inside rfc2231() */ cli_dbgmsg("messageAddArgument, error from rfc2231()\n"); return; }
faa0d267
288057e9	if (strchr(p, '=') == NULL) { if (strncmp(p, "filename", 8) == 0) { /*
faa0d267	* FIXME: Bounce message handling is corrupting the in * core copies of headers */
288057e9	if (strlen(p) > 8) { cli_dbgmsg("Possible data corruption fixed\n"); p[8] = '='; } else { cli_dbgmsg("Possible data corruption not fixed\n"); } } else { if (p) cli_dbgmsg("messageAddArgument, '%s' contains no '='\n", p); free(m->mimeArguments[offset]); m->mimeArguments[offset] = NULL; return; } } /
8ba634a9	* This is terribly broken from an RFC point of view but is useful * for catching viruses which have a filename but no type of * mime. By pretending defaulting to an application rather than * to nomime we can ensure they're saved and scanned */
288057e9	if ((strncasecmp(p, "filename=", 9) == 0) \|\| (strncasecmp(p, "name=", 5) == 0)) if (messageGetMimeType(m) == NOMIME) { cli_dbgmsg("Force mime encoding to application\n"); messageSetMimeType(m, "application"); }
e3aaff8e	} /* * Add in all the arguments. * Cope with: * name="foo bar.doc" * charset=foo name=bar */
288057e9	void messageAddArguments(message m, const char s)
e3aaff8e	{
288057e9	const char *string = s;
e3aaff8e
288057e9	cli_dbgmsg("Add arguments '%s'\n", string);
e3aaff8e
288057e9	assert(string != NULL);
e3aaff8e
288057e9	while (string) { const char key, cptr; char data, *field; size_t datasz = 0;
e3aaff8e
288057e9	if (isspace(string & 0xff) \|\| (string == ';')) { string++; continue; }
e3aaff8e
288057e9	key = string;
28010d29
288057e9	data = strchr(string, '=');
e3aaff8e
288057e9	/*
4685e392	* Some spam breaks RFC2045 by using ':' instead of '='
e3aaff8e	* e.g.: * Content-Type: text/html; charset:ISO-8859-1 * should be: * Content-type: text/html; charset=ISO-8859-1 * * We give up with lines that are completely broken because * we don't have ESP and don't know what was meant to be there. * It's unlikely to really be a problem. */
288057e9	if (data == NULL) data = strchr(string, ':');
e3aaff8e
288057e9	if (data == NULL) { /*
e3aaff8e	* Completely broken, give up */
288057e9	cli_dbgmsg("Can't parse header \"%s\"\n", s); return; }
e3aaff8e
288057e9	string = &data[1];
e3aaff8e
288057e9	/*
28c29d59	* Handle white space to the right of the equals sign
4685e392	* This breaks RFC2045 which has:
28010d29	* parameter := attribute "=" value * attribute := token ; case-insensitive * token := 1<any (ASCII) CHAR except SPACE, CTLs, or tspecials> * But too many MUAs ignore this
28c29d59	*/
288057e9	while (isspace(string) && (string != '\0')) string++;
28c29d59
288057e9	cptr = string;
e3aaff8e
288057e9	if (*string) string++;
121ec511
288057e9	if (cptr == '"') { char ptr, *kcopy;
e3aaff8e
288057e9	/*
e3aaff8e	* The field is in quotes, so look for the * closing quotes */
288057e9	kcopy = cli_strdup(key);
767f16ab
288057e9	if (kcopy == NULL) return;
767f16ab
288057e9	ptr = strchr(kcopy, '='); if (ptr == NULL) { ptr = strchr(kcopy, ':');
b2c04b6c	if (ptr == NULL) { cli_dbgmsg("Can't parse header \"%s\"\n", s);
3a72170f	free(kcopy);
b2c04b6c	return; } }
288057e9	*ptr = '\0';
e3aaff8e
288057e9	string = strchr(++cptr, '"');
28010d29
288057e9	if (string == NULL) { cli_dbgmsg("Unbalanced quote character in \"%s\"\n", s); string = ""; } else string++;
e3aaff8e
288057e9	if (!usefulArg(kcopy)) { free(kcopy); continue; }
28010d29
288057e9	data = cli_strdup(cptr);
e3aaff8e
288057e9	if (!data) { cli_dbgmsg("Can't parse header \"%s\" - if you believe this file contains a missed virus, report it to bugs@clamav.net\n", s);
50876732	free(kcopy);
288057e9	return; }
6c11e824
288057e9	ptr = strchr(data, '"');
6c11e824
288057e9	if (ptr == NULL) { /*
e3aaff8e	* Weird e-mail header such as: * Content-Type: application/octet-stream; name=" * " * Content-Transfer-Encoding: base64 * Content-Disposition: attachment; filename=" * " *
6c11e824	* Use the end of line as data.
e3aaff8e	*/
288057e9	} else *ptr = '\0';
e3aaff8e
1f271616	datasz = strlen(kcopy) + strlen(data) + 2;
288057e9	field = cli_realloc(kcopy, strlen(kcopy) + strlen(data) + 2); if (field) {
1f271616	cli_strlcat(field, "=", datasz); cli_strlcat(field, data, datasz);
288057e9	} else { free(kcopy);
1f271616	}
288057e9	free(data); } else { size_t len;
5a642650
288057e9	if (*cptr == '\0') { cli_dbgmsg("Ignoring empty field in \"%s\"\n", s); return; }
5a642650
288057e9	/*
e3aaff8e	* The field is not in quotes, so look for the closing * white space */
288057e9	while ((string != '\0') && !isspace(string)) string++; len = (size_t)string - (size_t)key + 1; field = cli_malloc(len); if (field) { memcpy(field, key, len - 1); field[len - 1] = '\0'; } } if (field) { messageAddArgument(m, field); free(field); } }
e3aaff8e	} static const char * messageGetArgument(const message *m, int arg) {
288057e9	assert(m != NULL); assert(arg >= 0); assert(arg < m->numberOfArguments);
e3aaff8e
288057e9	return (m->mimeArguments[arg]) ? m->mimeArguments[arg] : "";
e3aaff8e	} /* * Find a MIME variable from the header and return a COPY to the value of that * variable. The caller must free the copy */
95e11e5a	char *
e3aaff8e	messageFindArgument(const message m, const char variable) {
288057e9	int i; size_t len;
e3aaff8e
288057e9	assert(m != NULL); assert(variable != NULL);
e3aaff8e
288057e9	len = strlen(variable);
9425e7ce
288057e9	for (i = 0; i < m->numberOfArguments; i++) { const char *ptr;
e3aaff8e
288057e9	ptr = messageGetArgument(m, i); if ((ptr == NULL) \|\| (*ptr == '\0')) continue; #ifdef CL_DEBUG cli_dbgmsg("messageFindArgument: compare %lu bytes of %s with %s\n", (unsigned long)len, variable, ptr);
e3aaff8e	#endif
288057e9	if (strncasecmp(ptr, variable, len) == 0) { ptr = &ptr[len]; while (isspace(ptr)) ptr++; if (ptr != '=') { cli_dbgmsg("messageFindArgument: no '=' sign found in MIME header '%s' (%s)\n", variable, messageGetArgument(m, i)); return NULL; } ptr++; if ((strlen(ptr) > 1) && (ptr == '"') && (strchr(&ptr[1], '"') != NULL)) { / Remove any quote characters / char ret = cli_strdup(++ptr); char p; if (ret == NULL) return NULL; /
5a642650	* fix un-quoting of boundary strings from * header, occurs if boundary was given as * 'boundary="_Test_";' * * At least two quotes in string, assume * quoted argument * end string at next quote */
288057e9	if ((p = strchr(ret, '"')) != NULL) { ret[strlen(ret) - 1] = '\0'; *p = '\0'; } return ret; } return cli_strdup(ptr); } } return NULL;
e3aaff8e	}
ba74b333	char * messageGetFilename(const message *m) {
288057e9	char filename = (char )messageFindArgument(m, "filename");
ba74b333
288057e9	if (filename) return filename;
ba74b333
288057e9	return (char *)messageFindArgument(m, "name");
ba74b333	}
be32043e	/* Returns true or false / static int messageHasArgument(const message m, const char *variable) {
288057e9	int i; size_t len;
be32043e
288057e9	assert(m != NULL); assert(variable != NULL);
be32043e
288057e9	len = strlen(variable);
be32043e
288057e9	for (i = 0; i < m->numberOfArguments; i++) { const char *ptr;
be32043e
288057e9	ptr = messageGetArgument(m, i); if ((ptr == NULL) \|\| (*ptr == '\0')) continue; #ifdef CL_DEBUG cli_dbgmsg("messageHasArgument: compare %lu bytes of %s with %s\n", (unsigned long)len, variable, ptr);
be32043e	#endif
288057e9	if (strncasecmp(ptr, variable, len) == 0) { ptr = &ptr[len]; while (isspace(ptr)) ptr++; if (ptr != '=') { cli_dbgmsg("messageHasArgument: no '=' sign found in MIME header '%s' (%s)\n", variable, messageGetArgument(m, i)); return 0; } return 1; } } return 0;
be32043e	}
288057e9	int messageHasFilename(const message *m)
be32043e	{
288057e9	return messageHasArgument(m, "filename") \|\| messageHasArgument(m, "file");
be32043e	}
288057e9	void messageSetEncoding(message m, const char enctype)
e3aaff8e	{
288057e9	const struct encoding_map e; int i; char type;
0cf4cea7
288057e9	assert(m != NULL); assert(enctype != NULL);
e3aaff8e
288057e9	/m->encodingType = EEXTENSION;/
e3aaff8e
288057e9	while (isblank(*enctype)) enctype++;
098d38f1
288057e9	cli_dbgmsg("messageSetEncoding: '%s'\n", enctype);
5ee8f96d
288057e9	if (strcasecmp(enctype, "8 bit") == 0) { cli_dbgmsg("Broken content-transfer-encoding: '8 bit' changed to '8bit'\n"); enctype = "8bit"; }
83e42783
288057e9	/*
c7b69776	* Iterate through * Content-Transfer-Encoding: base64 binary * cli_strtok's fieldno counts from 0 */
288057e9	i = 0; while ((type = cli_strtok(enctype, i++, " \t")) != NULL) { int highestSimil = 0; const char *closest = NULL;
1602f612
288057e9	for (e = encoding_map; e->string; e++) { int sim; const char lowertype = tolower(type[0]);
4ab382c3
288057e9	if ((lowertype != tolower(e->string[0])) && (lowertype != 'x')) /*
f003b79e	* simil is expensive, I'm yet to encounter only * one example of a missent encoding when the * first character was wrong, so lets assume no * match to save the call. * * That example was quoted-printable sent as * X-quoted-printable. */
288057e9	continue;
f003b79e
288057e9	if (strcmp(e->string, "uuencode") == 0) /*
182bbcc8	* No need to test here - fast track visa will have * handled uuencoded files */
288057e9	continue;
182bbcc8
288057e9	sim = simil(type, e->string);
f003b79e
288057e9	if (sim == 100) { int j; encoding_type *et;
c7b69776
288057e9	for (j = 0; j < m->numberOfEncTypes; j++) if (m->encodingTypes[j] == e->type) break;
f003b79e
288057e9	if (j < m->numberOfEncTypes) { cli_dbgmsg("Ignoring duplicate encoding mechanism '%s'\n", type); break; }
1602f612
288057e9	et = (encoding_type )cli_realloc(m->encodingTypes, (m->numberOfEncTypes + 1) sizeof(encoding_type)); if (et == NULL) break;
c7b69776
288057e9	m->encodingTypes = et; m->encodingTypes[m->numberOfEncTypes++] = e->type;
c7b69776
288057e9	cli_dbgmsg("Encoding type %d is \"%s\"\n", m->numberOfEncTypes, type); break; } else if (sim > highestSimil) { closest = e->string; highestSimil = sim; } }
c7b69776
288057e9	if (e->string == NULL) { /*
68bad3a8	* The stated encoding type is illegal, so we * use a best guess of what it should be. *
7cd9337a	* 50% is arbitrary. For example 7bi will match as
1602f612	* 66% certain to be 7bit
c7b69776	*/
288057e9	if (highestSimil >= 50) { cli_dbgmsg("Unknown encoding type \"%s\" - guessing as %s (%u%% certainty)\n", type, closest, highestSimil); messageSetEncoding(m, closest); } else { cli_dbgmsg("Unknown encoding type \"%s\" - if you believe this file contains a virus, submit it to www.clamav.net\n", type); /*
1602f612	* Err on the side of safety, enable all * decoding modules */
288057e9	messageSetEncoding(m, "base64"); messageSetEncoding(m, "quoted-printable"); } }
e3aaff8e
288057e9	free(type); }
e3aaff8e	} encoding_type messageGetEncoding(const message *m) {
288057e9	assert(m != NULL);
c7b69776
288057e9	if (m->numberOfEncTypes == 0) return NOENCODING; return m->encodingTypes[0];
e3aaff8e	}
288057e9	int messageAddLine(message m, line_t line)
b2223aad	{
288057e9	assert(m != NULL);
b2223aad
288057e9	if (m->body_first == NULL) m->body_last = m->body_first = (text )cli_malloc(sizeof(text)); else { m->body_last->t_next = (text )cli_malloc(sizeof(text)); m->body_last = m->body_last->t_next; }
b2223aad
288057e9	if (m->body_last == NULL) {
241e7eb1	cli_errmsg("messageAddLine: out of memory for m->body_last\n");
288057e9	return -1;
241e7eb1	}
b2223aad
288057e9	m->body_last->t_next = NULL;
b2223aad
288057e9	if (line && lineGetData(line)) { m->body_last->t_line = lineLink(line);
b2223aad
288057e9	messageIsEncoding(m); } else m->body_last->t_line = NULL;
b2223aad
288057e9	return 1;
b2223aad	}
e3aaff8e	/*
3e69b5be	* Add the given line to the end of the given message
d879a7b0	* If needed a copy of the given line is taken which the caller must free
3e69b5be	* Line must not be terminated by a \n
e3aaff8e	*/
288057e9	int messageAddStr(message m, const char data)
e3aaff8e	{
288057e9	line_t *repeat = NULL;
381b67a7
288057e9	assert(m != NULL);
e3aaff8e
288057e9	if (data) { if (data == '\0') data = NULL; else { /
564b3e07	* If it's only white space, just store one space to * save memory. You must store something since it may * be a header line */
288057e9	int iswhite = 1; const char p; for (p = data; p; p++) if (((p) & 0x80) \|\| !isspace(p)) { iswhite = 0; break; } if (iswhite) { /cli_dbgmsg("messageAddStr: empty line: '%s'\n", data);/ data = " "; } } } if (m->body_first == NULL) m->body_last = m->body_first = (text )cli_malloc(sizeof(text)); else { assert(m->body_last != NULL); if ((data == NULL) && (m->body_last->t_line == NULL)) /
fca571cb	* Although this would save time and RAM, some * phish signatures have been built which need the * blank lines */
288057e9	if (messageGetMimeType(m) != TEXT) /* don't save two blank lines in succession / return 1; m->body_last->t_next = (text )cli_malloc(sizeof(text)); if (m->body_last->t_next == NULL) { messageDedup(m); m->body_last->t_next = (text *)cli_malloc(sizeof(text)); if (m->body_last->t_next == NULL) { cli_errmsg("messageAddStr: out of memory\n"); return -1; } }
02927896
288057e9	if (data && m->body_last->t_line && (strcmp(data, lineGetData(m->body_last->t_line)) == 0)) repeat = m->body_last->t_line; m->body_last = m->body_last->t_next; } if (m->body_last == NULL) { cli_errmsg("messageAddStr: out of memory\n"); return -1; } m->body_last->t_next = NULL; if (data && data) { if (repeat) m->body_last->t_line = lineLink(repeat); else { m->body_last->t_line = lineCreate(data); if (m->body_last->t_line == NULL) { messageDedup(m); m->body_last->t_line = lineCreate(data); if (m->body_last->t_line == NULL) { cli_errmsg("messageAddStr: out of memory\n"); return -1; } } / cli_chomp(m->body_last->t_text); */ messageIsEncoding(m); } } else m->body_last->t_line = NULL; return 1;
e3aaff8e	}
d879a7b0	/*
3e69b5be	* Add the given line to the start of the given message * A copy of the given line is taken which the caller must free * Line must not be terminated by a \n */
288057e9	int messageAddStrAtTop(message m, const char data)
3e69b5be	{
288057e9	text *oldfirst;
3e69b5be
288057e9	assert(m != NULL);
3e69b5be
288057e9	if (m->body_first == NULL) return messageAddLine(m, lineCreate(data));
843e1da6
288057e9	oldfirst = m->body_first; m->body_first = (text *)cli_malloc(sizeof(text)); if (m->body_first == NULL) { m->body_first = oldfirst; return -1; }
3e69b5be
288057e9	m->body_first->t_next = oldfirst; m->body_first->t_line = lineCreate((data) ? data : "");
3e69b5be
288057e9	if (m->body_first->t_line == NULL) { cli_errmsg("messageAddStrAtTop: out of memory\n"); return -1; } return 1;
3e69b5be	} /*
94f051b0	* Put the contents of the given text at the end of the current object. * Can be used either to move a text object into a message, or to move a * message's text into another message only moving from a given offset. * The given text emptied; it can be used again if needed, though be warned that * it will have an empty line at the start. * Returns 0 for failure, 1 for success */
288057e9	int messageMoveText(message m, text t, message *old_message)
94f051b0	{
288057e9	int rc;
94f051b0
288057e9	if (m->body_first == NULL) { if (old_message) { text u; /
94f051b0	* t is within old_message which is about to be * destroyed */
288057e9	assert(old_message->body_first != NULL); m->body_first = t; for (u = old_message->body_first; u != t;) { text *next; if (u->t_line) { lineUnlink(u->t_line); u->t_line = NULL; } next = u->t_next;
94f051b0
288057e9	free(u); u = next; if (u == NULL) { cli_dbgmsg("messageMoveText sanity check: t not within old_message\n"); return -1; } } assert(old_message->body_last->t_next == NULL); m->body_last = old_message->body_last; old_message->body_first = old_message->body_last = NULL; /* Do any pointers need to be reset? */ if ((old_message->bounce == NULL) && (old_message->encoding == NULL) && (old_message->binhex == NULL) && (old_message->yenc == NULL)) return 0; m->body_last = m->body_first; rc = 0; } else { m->body_last = m->body_first = textMove(NULL, t); if (m->body_first == NULL) return -1; else rc = 0; } } else { m->body_last = textMove(m->body_last, t); if (m->body_last == NULL) { rc = -1; m->body_last = m->body_first; } else rc = 0; } while (m->body_last->t_next) { m->body_last = m->body_last->t_next; if (m->body_last->t_line) messageIsEncoding(m); } return rc;
94f051b0	} /*
b2223aad	* See if the last line marks the start of a non MIME inclusion that * will need to be scanned / static void messageIsEncoding(message m) {
288057e9	static const char encoding[] = "Content-Transfer-Encoding"; static const char binhex[] = "(This file must be converted with BinHex 4.0)"; const char *line = lineGetData(m->body_last->t_line);
b2223aad
288057e9	/*if(m->ctx == NULL)
faa0d267	cli_dbgmsg("messageIsEncoding, ctx == NULL\n");*/
288057e9	if ((m->encoding == NULL) && (strncasecmp(line, encoding, sizeof(encoding) - 1) == 0) && (strstr(line, "7bit") == NULL)) m->encoding = m->body_last; else if ((m->bounce == NULL) && m->ctx && (strncasecmp(line, "Received: ", 10) == 0) && (cli_filetype((const unsigned char )line, strlen(line), m->ctx->engine) == CL_TYPE_MAIL)) m->bounce = m->body_last; / Not needed with fast track visa technology / /else if((m->uuencode == NULL) && isuuencodebegin(line))
182bbcc8	m->uuencode = m->body_last;*/
288057e9	else if ((m->binhex == NULL) && strstr(line, "BinHex") && (simil(line, binhex) > 90)) /*
47193544	* Look for close matches for BinHex, but * simil() is expensive so only do it if it's * likely to be found */
288057e9	m->binhex = m->body_last; else if ((m->yenc == NULL) && (strncmp(line, "=ybegin line=", 13) == 0)) m->yenc = m->body_last;
b2223aad	} /*
d879a7b0	* Returns a pointer to the body of the message. Note that it does NOT return * a copy of the data */
2673dc74	text * messageGetBody(message *m)
e3aaff8e	{
288057e9	assert(m != NULL); return m->body_first;
e3aaff8e	} /*
78e302e1	* Export a message using the given export routines
e7aa5e3d	* * TODO: It really should export into an array, one * for each encoding algorithm. However, what it does is it returns the * last item that was exported. That's sufficient for now.
e3aaff8e	*/
09e05292	static void *
288057e9	messageExport(message m, const char dir, void (create)(void), void (destroy)(void ), void (setFilename)(void , const char , const char ), void (addData)(void , const unsigned char , size_t), void (exportText)(text , void , int), void (setCTX)(void , cli_ctx ), int destroy_text)
e3aaff8e	{
288057e9	void ret; text t_line; char *filename; int i;
e3aaff8e
288057e9	assert(m != NULL);
e3aaff8e
288057e9	if (messageGetBody(m) == NULL) return NULL;
c7b69776
288057e9	ret = (*create)();
e3aaff8e
288057e9	if (ret == NULL) return NULL;
e3aaff8e
288057e9	cli_dbgmsg("messageExport: numberOfEncTypes == %d\n", m->numberOfEncTypes);
4b187745
288057e9	if (m->numberOfEncTypes == 0) { /*
c7b69776	* Fast copy */
288057e9	cli_dbgmsg("messageExport: Entering fast copy mode\n");
c691512c
288057e9	#if 0
ba74b333	filename = messageGetFilename(m); if(filename == NULL) { cli_dbgmsg("Unencoded attachment sent with no filename\n"); messageAddArgument(m, "name=attachment"); } else if((strcmp(filename, "textportion") != 0) && (strcmp(filename, "mixedtextportion") != 0)) /* * Some virus attachments don't say how they've * been encoded. We assume base64 */ messageSetEncoding(m, "base64"); #else
288057e9	filename = (char )messageFindArgument(m, "filename"); if (filename == NULL) { filename = (char )messageFindArgument(m, "name"); if (filename == NULL) { cli_dbgmsg("Unencoded attachment sent with no filename\n"); messageAddArgument(m, "name=attachment"); } else /*
843e1da6	* Some virus attachments don't say how they've
75cc6fb0	* been encoded. We assume base64. * RFC says encoding should be 7-bit.
843e1da6	*/
288057e9	messageSetEncoding(m, "7-bit"); }
ba74b333	#endif
e3aaff8e
288057e9	(setFilename)(ret, dir, (filename && filename) ? filename : "attachment");
e3aaff8e
288057e9	if (filename) free((char *)filename);
e3aaff8e
288057e9	if (m->numberOfEncTypes == 0) return exportText(messageGetBody(m), ret, destroy_text); }
e3aaff8e
288057e9	if (setCTX && m->ctx) (*setCTX)(ret, m->ctx);
3e55fc76
288057e9	for (i = 0; i < m->numberOfEncTypes; i++) { encoding_type enctype = m->encodingTypes[i]; size_t size;
c7b69776
288057e9	if (i > 0) { void *newret;
e7aa5e3d
288057e9	newret = (create)(); if (newret == NULL) { cli_dbgmsg("Not all decoding algorithms were run\n"); return ret; } (destroy)(ret); ret = newret; } cli_dbgmsg("messageExport: enctype %d is %d\n", i, (int)enctype); /*
c7b69776	* Find the filename to decode
e3aaff8e	*/
288057e9	if (((enctype == YENCODE) \|\| (i == 0)) && yEncBegin(m)) { const char *f;
95e11e5a
288057e9	/*
16394c6d	* TODO: handle multipart yEnc encoded files */
288057e9	t_line = yEncBegin(m); f = lineGetData(t_line->t_line); if ((filename = strstr(f, " name=")) != NULL) { filename = cli_strdup(&filename[6]); if (filename) { cli_chomp(filename); strstrip(filename); cli_dbgmsg("Set yEnc filename to \"%s\"\n", filename); } } (setFilename)(ret, dir, (filename && filename) ? filename : "attachment"); if (filename) { free((char )filename); filename = NULL; } t_line = t_line->t_next; enctype = YENCODE; m->yenc = NULL; } else { if (enctype == UUENCODE) { /
73ddf91f	* The body will have been stripped out by the * fast track visa system. Treat as plain/text, * which means we'll still scan for funnies * outside of the uuencoded portion.
182bbcc8	*/
288057e9	cli_dbgmsg("messageExport: treat uuencode as text/plain\n"); enctype = m->encodingTypes[i] = NOENCODING; } filename = messageGetFilename(m); if (filename == NULL) { cli_dbgmsg("Attachment sent with no filename\n"); messageAddArgument(m, "name=attachment"); } else if (enctype == NOENCODING) /*
ba74b333	* Some virus attachments don't say how * they've been encoded. We assume * base64. * * FIXME: don't do this if it's a fall * through from uuencode */
288057e9	messageSetEncoding(m, "base64");
c7b69776
288057e9	(setFilename)(ret, dir, (filename && filename) ? filename : "attachment");
c7b69776
288057e9	t_line = messageGetBody(m); }
2ad0c86e
288057e9	if (filename) free((char *)filename);
c7b69776
288057e9	/*
af780d0c	* t_line should now point to the first (encoded) line of the * message
c7b69776	*/
288057e9	if (t_line == NULL) { cli_dbgmsg("Empty attachment not saved\n"); (*destroy)(ret); return NULL; }
c7b69776
288057e9	if (enctype == NOENCODING) { /*
c7b69776	* Fast copy
de101a82	*/
288057e9	if (i == m->numberOfEncTypes - 1) { /* last one */ (void)exportText(t_line, ret, destroy_text); break; } (void)exportText(t_line, ret, 0); continue; }
c7b69776
288057e9	size = 0; do { unsigned char smallbuf[1024]; unsigned char uptr, data; const char line = lineGetData(t_line->t_line); unsigned char bigbuf; size_t datasize; if (enctype == YENCODE) { if (line == NULL) continue; if (strncmp(line, "=yend ", 6) == 0) break; } /*
e7aa5e3d	* Add two bytes for '\n' and '\0' */
288057e9	datasize = (line) ? strlen(line) + 2 : 0; if (datasize >= sizeof(smallbuf)) data = bigbuf = (unsigned char *)cli_malloc(datasize); else { bigbuf = NULL; data = smallbuf; datasize = sizeof(smallbuf); }
e7aa5e3d
288057e9	uptr = decodeLine(m, enctype, line, data, datasize); if (uptr == NULL) { if (data == bigbuf) free(data); break; } if (uptr != data) { assert((size_t)(uptr - data) < datasize); (addData)(ret, data, (size_t)(uptr - data)); size += (size_t)(uptr - data); } if (data == bigbuf) free(data); /
4685e392	* According to RFC2045, '=' is used to pad out
c7b69776	* the last byte and should be used as evidence * of the end of the data. Some mail clients * annoyingly then put plain text after the '=' * byte and viruses exploit this bug. Sigh */
288057e9	/*if(enctype == BASE64)
c7b69776	if(strchr(line, '=')) break;*/
288057e9	if (line && destroy_text && (i == m->numberOfEncTypes - 1)) { lineUnlink(t_line->t_line); t_line->t_line = NULL; } } while ((t_line = t_line->t_next) != NULL); cli_dbgmsg("Exported %lu bytes using enctype %d\n", (unsigned long)size, (int)enctype); /* Verify we have nothing left to flush out / if (m->base64chars) { unsigned char data[4]; unsigned char ptr; ptr = base64Flush(m, data); if (ptr) (*addData)(ret, data, (size_t)(ptr - data)); } } return ret;
78e302e1	}
a9ecf619	unsigned char * base64Flush(message m, unsigned char buf) {
288057e9	cli_dbgmsg("%d trailing bytes to export\n", m->base64chars);
a9ecf619
288057e9	if (m->base64chars) { unsigned char *ret = decode(m, NULL, buf, base64, FALSE);
a9ecf619
288057e9	m->base64chars = 0;
a9ecf619
288057e9	return ret; } return NULL;
a9ecf619	}
4270f93b	int messageSavePartial(message m, const char dir, const char *md5id, unsigned part) {
288057e9	char fullname[1024]; fileblob fb; unsigned long time_val; cli_dbgmsg("messageSavePartial\n"); time_val = time(NULL); snprintf(fullname, 1024, "%s" PATHSEP "clamav-partial-%lu_%s-%u", dir, time_val, md5id, part); fb = messageExport(m, fullname, (void ()(void))fileblobCreate, (void ()(void ))fileblobDestroy, (void ()(void , const char , const char ))fileblobPartialSet, (void ()(void , const unsigned char , size_t))fileblobAddData, (void ()(text , void , int))textToFileblob, (void ()(void , cli_ctx *))fileblobSetCTX, 0); if (!fb) return CL_EFORMAT; fileblobDestroy(fb); return CL_SUCCESS;
4270f93b	}
78e302e1	/* * Decode and transfer the contents of the message into a fileblob * The caller must free the returned fileblob / fileblob
2673dc74	messageToFileblob(message m, const char dir, int destroy)
78e302e1	{
288057e9	fileblob fb; cli_dbgmsg("messageToFileblob\n"); fb = messageExport(m, dir, (void ()(void))fileblobCreate, (void ()(void ))fileblobDestroy, (void ()(void , const char , const char ))fileblobSetFilename, (void ()(void , const unsigned char , size_t))fileblobAddData, (void ()(text , void , int))textToFileblob, (void ()(void , cli_ctx *))fileblobSetCTX, destroy); if (destroy && m->body_first) { textDestroy(m->body_first); m->body_first = m->body_last = NULL; } return fb;
78e302e1	} /*
8386c723	* Decode and transfer the contents of the message into a closed blob
78e302e1	* The caller must free the returned blob / blob
2673dc74	messageToBlob(message *m, int destroy)
78e302e1	{
288057e9	blob b; cli_dbgmsg("messageToBlob\n"); b = messageExport(m, NULL, (void ()(void))blobCreate, (void ()(void ))blobDestroy, (void ()(void , const char , const char ))blobSetFilename, (void ()(void , const unsigned char , size_t))blobAddData, (void ()(text , void , int))textToBlob, (void ()(void , cli_ctx *))NULL, destroy); if (destroy && m->body_first) { textDestroy(m->body_first); m->body_first = m->body_last = NULL; } return b;
e3aaff8e	} /* * Decode and transfer the contents of the message into a text area
d879a7b0	* The caller must free the returned text
e3aaff8e	/ text
0c0894b8	messageToText(message *m)
e3aaff8e	{
288057e9	int i; text first = NULL, last = NULL; const text *t_line;
e3aaff8e
288057e9	assert(m != NULL);
e3aaff8e
288057e9	if (m->numberOfEncTypes == 0) { /*
e3aaff8e	* Fast copy */
288057e9	for (t_line = messageGetBody(m); t_line; t_line = t_line->t_next) { if (first == NULL) first = last = cli_malloc(sizeof(text)); else { last->t_next = cli_malloc(sizeof(text)); last = last->t_next; } if (last == NULL) { if (first) textDestroy(first); return NULL; } if (t_line->t_line) last->t_line = lineLink(t_line->t_line); else last->t_line = NULL; /* empty line / } if (last) last->t_next = NULL; return first; } /
c7b69776	* Scan over the data a number of times once for each claimed encoding * type */
288057e9	for (i = 0; i < m->numberOfEncTypes; i++) { const encoding_type enctype = m->encodingTypes[i];
c7b69776
288057e9	cli_dbgmsg("messageToText: export transfer method %d = %d\n", i, (int)enctype);
7ae8fbfb
288057e9	switch (enctype) { case NOENCODING: case BINARY: case EIGHTBIT: /*
7ae8fbfb	* Fast copy */
288057e9	for (t_line = messageGetBody(m); t_line; t_line = t_line->t_next) { if (first == NULL) first = last = cli_malloc(sizeof(text)); else if (last) { last->t_next = cli_malloc(sizeof(text)); last = last->t_next; } if (last == NULL) { if (first) { textDestroy(first); } return NULL; } if (t_line->t_line) last->t_line = lineLink(t_line->t_line); else last->t_line = NULL; /* empty line / } continue; case UUENCODE: cli_warnmsg("messageToText: Unexpected attempt to handle uuencoded file\n"); if (first) { if (last) last->t_next = NULL; textDestroy(first); } return NULL; case YENCODE: t_line = yEncBegin(m); if (t_line == NULL) { /cli_warnmsg("YENCODED attachment is missing begin statement\n");/ if (first) { if (last) last->t_next = NULL; textDestroy(first); } return NULL; } t_line = t_line->t_next; default: if ((i == 0) && binhexBegin(m)) cli_warnmsg("Binhex messages not supported yet.\n"); t_line = messageGetBody(m); } for (; t_line; t_line = t_line->t_next) { unsigned char data[1024]; unsigned char uptr; const char line = lineGetData(t_line->t_line); if (enctype == BASE64) /
0c0894b8	* ignore blanks - breaks RFC which is * probably the point! */
288057e9	if (line == NULL) continue;
28c29d59
288057e9	assert((line == NULL) \|\| (strlen(line) <= sizeof(data)));
e7aa5e3d
288057e9	uptr = decodeLine(m, enctype, line, data, sizeof(data));
e3aaff8e
288057e9	if (uptr == NULL) break;
e3aaff8e
288057e9	assert(uptr <= &data[sizeof(data)]);
0ae75a8d
288057e9	if (first == NULL) first = last = cli_malloc(sizeof(text)); else if (last) { last->t_next = cli_malloc(sizeof(text)); last = last->t_next; }
e3aaff8e
288057e9	if (last == NULL) break;
28c29d59
288057e9	/*
ab4038b4	* If the decoded line is the same as the encoded * there's no need to take a copy, just link it. * Note that the comparison is done without the * trailing newline that the decoding routine may have * added - that's why there's a strncmp rather than a * strcmp - that'd be bad for MIME decoders, but is OK * for AV software */
288057e9	if ((data[0] == '\n') \|\| (data[0] == '\0')) last->t_line = NULL; else if (line && (strncmp((const char *)data, line, strlen(line)) == 0)) { #ifdef CL_DEBUG cli_dbgmsg("messageToText: decoded line is the same(%s)\n", data);
bae9c53f	#endif
288057e9	last->t_line = lineLink(t_line->t_line); } else last->t_line = lineCreate((char *)data);
02927896
288057e9	if (line && enctype == BASE64) if (strchr(line, '=')) break; } if (m->base64chars) { unsigned char data[4]; memset(data, '\0', sizeof(data)); if (decode(m, NULL, data, base64, FALSE) && data[0]) { if (first == NULL) first = last = cli_malloc(sizeof(text)); else if (last) { last->t_next = cli_malloc(sizeof(text)); last = last->t_next; } if (last != NULL) last->t_line = lineCreate((char *)data); } m->base64chars = 0; } } if (last) last->t_next = NULL; return first;
e3aaff8e	}
2673dc74	text * yEncBegin(message *m)
16394c6d	{
288057e9	return m->yenc;
16394c6d	}
e3aaff8e	/*
130bc08c	* Scan to find the BINHEX message (if any) */
288057e9	#if 0
2673dc74	const text * binhexBegin(message *m)
130bc08c	{ const text *t_line; for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) if(strcasecmp(t_line->t_text, "(This file must be converted with BinHex 4.0)") == 0) return t_line; return NULL; }
ae3bda56	#else
2673dc74	text * binhexBegin(message *m)
ae3bda56	{
288057e9	return m->binhex;
ae3bda56	} #endif
130bc08c	/*
cca4efe4	* Scan to find a bounce message. There is no standard for these, not * even a convention, so don't expect this to be foolproof */
288057e9	#if 0
2673dc74	text * bounceBegin(message *m)
cca4efe4	{ const text *t_line;
1892da50
86cf20d6	for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next)
3805ebcb	if(cli_filetype(t_line->t_text, strlen(t_line->t_text)) == CL_TYPE_MAIL)
86cf20d6	return t_line;
cca4efe4	return NULL; }
ae3bda56	#else
2673dc74	text * bounceBegin(message *m)
ae3bda56	{
288057e9	return m->bounce;
ae3bda56	} #endif /* * If a message doesn't not contain another message which could be harmful * it is deemed to be safe. * * TODO: ensure nothing can get through this * * TODO: check to see if we need to * find anything else, perhaps anything * from the RFC821 table? */
288057e9	#if 0
ae3bda56	int messageIsAllText(const message m) { const text t; for(t = messageGetBody(m); t; t = t->t_next) if(strncasecmp(t->t_text, "Content-Transfer-Encoding", strlen("Content-Transfer-Encoding")) == 0) return 0; return 1; } #else
2673dc74	text * encodingLine(message *m)
ae3bda56	{
288057e9	return m->encoding;
ae3bda56	} #endif
cca4efe4	/*
e3aaff8e	* Decode a line and add it to a buffer, return the end of the buffer
0ae75a8d	* to help appending callers. There is no new line at the end of "line"
9b9fcfc5	* * len is sizeof(ptr)
e3aaff8e	*/
4945127a	unsigned char *
c7b69776	decodeLine(message m, encoding_type et, const char line, unsigned char *buf, size_t buflen)
e3aaff8e	{
288057e9	size_t len, reallen; bool softbreak; char p2, copy; char base64buf[RFC2045LENGTH + 1];
e3aaff8e
288057e9	/cli_dbgmsg("decodeLine(et = %d buflen = %u)\n", (int)et, buflen);/
e7aa5e3d
288057e9	assert(m != NULL); assert(buf != NULL);
e3aaff8e
288057e9	switch (et) { case BINARY: /*
cc96e455	* TODO: find out what this is, encoded as binary?? */
288057e9	/* fall through / case NOENCODING: case EIGHTBIT: default: / unknown encoding type - try our best / if (line) / empty line? / buf = (unsigned char )cli_strrcpy((char )buf, line); / Put the new line back in / return (unsigned char )cli_strrcpy((char )buf, "\n"); case QUOTEDPRINTABLE: if (line == NULL) { / empty line / buf++ = '\n'; break; } softbreak = FALSE; while (buflen && line) { if (line == '=') { unsigned char byte; if ((++line == '\0') \|\| (line == '\n')) { softbreak = TRUE; /* soft line break / break; } byte = hex(line); if ((++line == '\0') \|\| (line == '\n')) { /*
4685e392	* broken e-mail, not * adhering to RFC2045 */
288057e9	*buf++ = byte; break; }
4685e392
288057e9	/*
ce30bbe0	* Handle messages that use a broken * quoted-printable encoding of * href=\"http://, instead of =3D */
288057e9	if (byte != '=') byte = (byte << 4) \| hex(line); else line -= 2; buf++ = byte; } else buf++ = line; ++line; --buflen; } if (!softbreak) /* Put the new line back in / buf++ = '\n'; break; case BASE64: if (line == NULL) break; /*
4685e392	* RFC2045 sets the maximum length to 76 bytes
28c29d59	* but many e-mail clients ignore that */
288057e9	if (strlen(line) < sizeof(base64buf)) { strcpy(base64buf, line); copy = base64buf; } else { copy = cli_strdup(line); if (copy == NULL) break; }
843e1da6
288057e9	p2 = strchr(copy, '='); if (p2) *p2 = '\0';
0c0894b8
288057e9	sanitiseBase64(copy);
32c9b306
288057e9	/*
e3aaff8e	* Klez doesn't always put "=" on the last line */
288057e9	buf = decode(m, copy, buf, base64, (p2 == NULL) && ((strlen(copy) & 3) == 0));
28c29d59
288057e9	if (copy != base64buf) free(copy); break;
e3aaff8e
288057e9	case UUENCODE: assert(m->base64chars == 0);
ae5c693a
288057e9	if ((line == NULL) \|\| (line == '\0')) / empty line */ break; if (strcasecmp(line, "end") == 0) break; if (isuuencodebegin(line)) break;
e3aaff8e
288057e9	if ((line[0] & 0x3F) == ' ') break;
e3aaff8e
288057e9	/*
5be8beb8	* reallen contains the number of bytes that were * encoded */
288057e9	reallen = (size_t)uudecode(line++); if (reallen <= 0) break; if (reallen > 62) break; len = strlen(line); if ((len > buflen) \|\| (reallen > len)) /
9b9fcfc5	* In practice this should never occur since * the maximum length of a uuencoded line is * 62 characters */
288057e9	cli_dbgmsg("uudecode: buffer overflow stopped, attempting to ignore but decoding may fail\n"); else { (void)decode(m, line, buf, uudecode, (len & 3) == 0); buf = &buf[reallen]; } m->base64chars = 0; /* this happens with broken uuencoded files / break; case YENCODE: if ((line == NULL) \|\| (line == '\0')) /* empty line / break; if (strncmp(line, "=yend ", 6) == 0) break; while (line) if (line == '=') { if (++line == '\0') break; buf++ = ((line++ - 64) & 255); } else buf++ = ((line++ - 42) & 255); break; } *buf = '\0'; return buf;
e3aaff8e	}
c43c9798	/*
9e1dc6e8	* Remove the non base64 characters such as spaces from a string. Spaces * shouldn't appear mid string in base64 files, but some broken mail clients * ignore such errors rather than discarding the mail, and virus writers * exploit this bug
0c0894b8	*/ static void
9e1dc6e8	sanitiseBase64(char *s)
0c0894b8	{
288057e9	cli_dbgmsg("sanitiseBase64 '%s'\n", s); while (s) if (base64Table[(unsigned int)(s & 0xFF)] == 255) { char *p1; for (p1 = s; p1[0] != '\0'; p1++) p1[0] = p1[1]; } else s++;
0c0894b8	} /*
c43c9798	* Returns one byte after the end of the decoded data in "out"
0c0894b8	* * Update m->base64chars with the last few bytes of data that we haven't * decoded. After the last line is found, decode will be called with in = NULL * to flush these out
c43c9798	*/
e3aaff8e	static unsigned char *
0c0894b8	decode(message m, const char in, unsigned char out, unsigned char (decoder)(char), bool isFast) {
288057e9	unsigned char b1, b2, b3, b4; unsigned char cb1, cb2, cb3; /* carried over from last line */
0c0894b8
288057e9	/*cli_dbgmsg("decode %s (len %d isFast %d base64chars %d)\n", in,
0c0894b8	in ? strlen(in) : 0,
cd11ef39	isFast, m->base64chars);*/
0c0894b8
288057e9	cb1 = cb2 = cb3 = '\0'; switch (m->base64chars) { case 3: cb3 = m->base64_3; /* FALLTHROUGH / case 2: cb2 = m->base64_2; / FALLTHROUGH / case 1: cb1 = m->base64_1; isFast = FALSE; break; default: assert(m->base64chars <= 3); } if (isFast) / Fast decoding if not last line / while (in) { b1 = (decoder)(in++); b2 = (decoder)(in++); b3 = (decoder)(in++); /*
0c0894b8	* Put this line here to help on some compilers which
7cd9337a	* can make use of some architecture's ability to
0c0894b8	* multiprocess when different variables can be * updated at the same time - here b3 is used in * one line, b1/b2 in the next and b4 in the next after * that, b3 and b4 rely on in but b1/b2 don't */
288057e9	out++ = (b1 << 2) \| ((b2 >> 4) & 0x3); b4 = (decoder)(in++); out++ = (b2 << 4) \| ((b3 >> 2) & 0xF); out++ = (b3 << 6) \| (b4 & 0x3F); } else if (in == NULL) { / flush / int nbytes; if (m->base64chars == 0) return out; cli_dbgmsg("base64chars = %d (%c %c %c)\n", m->base64chars, isalnum(cb1) ? cb1 : '@', isalnum(cb2) ? cb2 : '@', isalnum(cb3) ? cb3 : '@'); m->base64chars--; b1 = cb1; nbytes = 1; if (m->base64chars) { m->base64chars--; b2 = cb2; if (m->base64chars) { nbytes = 2; m->base64chars--; b3 = cb3; nbytes = 3; } else if (b2) nbytes = 2; } switch (nbytes) { case 3: b4 = '\0'; / fall through / case 4: out++ = (b1 << 2) \| ((b2 >> 4) & 0x3); out++ = (b2 << 4) \| ((b3 >> 2) & 0xF); if ((nbytes == 4) \|\| (b3 & 0x3)) out++ = (b3 << 6) \| (b4 & 0x3F); break; case 2: out++ = (b1 << 2) \| ((b2 >> 4) & 0x3); if ((b2 << 4) & 0xFF) out++ = b2 << 4; break; case 1: out++ = b1 << 2; break; default: assert(0); } } else while (in) { int nbytes; if (m->base64chars) { m->base64chars--; b1 = cb1; } else b1 = (decoder)(in++); if (in == '\0') { b2 = '\0'; nbytes = 1; } else { if (m->base64chars) { m->base64chars--; b2 = cb2; } else b2 = (decoder)(in++); if (in == '\0') { b3 = '\0'; nbytes = 2; } else { if (m->base64chars) { m->base64chars--; b3 = cb3; } else b3 = (decoder)(in++); if (in == '\0') { b4 = '\0'; nbytes = 3; } else { b4 = (decoder)(in++); nbytes = 4; } } } switch (nbytes) { case 4: out++ = (b1 << 2) \| ((b2 >> 4) & 0x3); out++ = (b2 << 4) \| ((b3 >> 2) & 0xF); out++ = (b3 << 6) \| (b4 & 0x3F); continue; case 3: m->base64_3 = b3; case 2: m->base64_2 = b2; case 1: m->base64_1 = b1; m->base64chars = nbytes; break; default: assert(0); } break; /* nbytes != 4 => EOL */ } return out;
0c0894b8	}
e3aaff8e	static unsigned char hex(char c) {
288057e9	if (isdigit(c)) return c - '0'; if ((c >= 'A') && (c <= 'F')) return c - 'A' + 10; if ((c >= 'a') && (c <= 'f')) return c - 'a' + 10; cli_dbgmsg("Illegal hex character '%c'\n", c); /*
4685e392	* Some mails (notably some spam) break RFC2045 by failing to encode
e3aaff8e	* the '=' character */
288057e9	return '=';
e3aaff8e	}
621a667a	static unsigned char base64(char c) {
288057e9	const unsigned char ret = base64Table[(unsigned int)(c & 0xFF)];
621a667a
288057e9	if (ret == 255) { /cli_dbgmsg("Illegal character <%c> in base64 encoding\n", c);/ return 63; } return ret;
621a667a	}
e3aaff8e	static unsigned char uudecode(char c) {
288057e9	return c - ' ';
e3aaff8e	}
28010d29	/* * These are the only arguments we're interested in. * Do 'fgrep messageFindArgument .c' if you don't believe me! It's probably not good doing this since each time a new * messageFindArgument is added I need to remember to look here, * but it can save a lot of memory... / static int usefulArg(const char arg) {
288057e9	if ((strncasecmp(arg, "name", 4) != 0) && (strncasecmp(arg, "filename", 8) != 0) && (strncasecmp(arg, "boundary", 8) != 0) && (strncasecmp(arg, "protocol", 8) != 0) && (strncasecmp(arg, "id", 2) != 0) && (strncasecmp(arg, "number", 6) != 0) && (strncasecmp(arg, "total", 5) != 0) && (strncasecmp(arg, "type", 4) != 0)) { cli_dbgmsg("Discarding unwanted argument '%s'\n", arg); return 0; } return 1;
28010d29	}
86355dc2
288057e9	void messageSetCTX(message m, cli_ctx ctx)
a603478f	{
288057e9	m->ctx = ctx;
a603478f	}
288057e9	int messageContainsVirus(const message *m)
a603478f	{
288057e9	return m->isInfected ? TRUE : FALSE;
a603478f	}
86355dc2	/* * We've run out of memory. Try to recover some by * deduping the message
70a968be	* * FIXME: this can take a long time. The real solution is for system admins * to refrain from setting ulimits too low, then this routine won't be * called
86355dc2	/ static void messageDedup(message m) {
288057e9	const text t1; size_t saved = 0; cli_dbgmsg("messageDedup\n"); t1 = m->dedupedThisFar ? m->dedupedThisFar : m->body_first; for (t1 = m->body_first; t1; t1 = t1->t_next) { const char d1; text t2; line_t l1; unsigned int r1; if (saved >= 100 * 1000) break; /* that's enough / l1 = t1->t_line; if (l1 == NULL) continue; d1 = lineGetData(l1); if (strlen(d1) < 8) continue; / wouldn't recover many bytes / r1 = (unsigned int)lineGetRefCount(l1); if (r1 == 255) continue; /
86355dc2	* We don't want to foul up any pointers */
288057e9	if (t1 == m->encoding) continue; if (t1 == m->bounce) continue; if (t1 == m->binhex) continue; if (t1 == m->yenc) continue; for (t2 = t1->t_next; t2; t2 = t2->t_next) { const char d2; line_t l2 = t2->t_line; if (l2 == NULL) continue; d2 = lineGetData(l2); if (d1 == d2) /* already linked */ continue; if (strcmp(d1, d2) == 0) { if (lineUnlink(l2) == NULL) saved += strlen(d1) + 1; t2->t_line = lineLink(l1); if (t2->t_line == NULL) { cli_errmsg("messageDedup: out of memory\n"); return; } if (++r1 == 255) break; } } } cli_dbgmsg("messageDedup reclaimed %lu bytes\n", (unsigned long)saved); m->dedupedThisFar = t1;
86355dc2	}
1602f612	/*
ec8e31fa	* Handle RFC2231 encoding. Returns a malloc'd buffer that the caller must * free, or NULL on error. * * TODO: Currently only handles paragraph 4 of RFC2231 e.g. * protocol=ansi-x3.4-1968''application%2Fpgp-signature; / static char * rfc2231(const char *in) {
288057e9	const char ptr; char ret, out; enum { LANGUAGE, CHARSET, CONTENTS } field; if (strstr(in, "0=") != NULL) { char p; /* Don't handle continuations, decode what we can */ p = ret = cli_malloc(strlen(in) + 16); if (ret == NULL) {
241e7eb1	cli_errmsg("rfc2331: out of memory, unable to proceed\n");
288057e9	return NULL;
241e7eb1	}
9fe789f8
288057e9	do { switch (in) { default: p++ = in++; continue; case '': do in++; while ((in != '') && in); if (in) { in++; continue; } break; case '=': /strcpy(p, in);/ strcpy(p, "=rfc2231failure"); p += strlen("=rfc2231failure"); break; } break; } while (in); p = '\0'; cli_dbgmsg("RFC2231 parameter continuations are not yet handled, returning \"%s\"\n", ret); return ret; } ptr = strstr(in, "0="); if (ptr != NULL) /
9f5f1b1a	* Parameter continuation, with no continuation * Thunderbird 1.5 (and possibly other versions) does this */
288057e9	field = CONTENTS; else { ptr = strstr(in, "*="); field = LANGUAGE; }
ec8e31fa
288057e9	if (ptr == NULL) { /* quick return / out = ret = cli_strdup(in); while (out) *out++ &= 0x7F; return ret; }
ec8e31fa
288057e9	cli_dbgmsg("rfc2231 '%s'\n", in);
ec8e31fa
288057e9	ret = cli_malloc(strlen(in) + 1);
ec8e31fa
288057e9	if (ret == NULL) {
241e7eb1	cli_errmsg("rfc2331: out of memory for ret\n");
288057e9	return NULL;
241e7eb1	}
ec8e31fa
288057e9	/*
9f5f1b1a	* memcpy(out, in, (ptr - in)); * out = &out[ptr - in]; * in = ptr; */
288057e9	out = ret; while (in != ptr) out++ = in++;
ec8e31fa
288057e9	*out++ = '=';
ec8e31fa
288057e9	while (*ptr++ != '=') continue;
9f5f1b1a
288057e9	/*
ec8e31fa	* We don't do anything with the language and character set, just skip * over them! */
288057e9	while (ptr) { switch (field) { case LANGUAGE: if (ptr == '\'') field = CHARSET; break; case CHARSET: if (ptr == '\'') field = CONTENTS; break; case CONTENTS: if (ptr == '%') { unsigned char byte; if ((++ptr == '\0') \|\| (ptr == '\n')) break; byte = hex(ptr); if ((++ptr == '\0') \|\| (ptr == '\n')) { out++ = byte; break; } byte <<= 4; byte += hex(ptr); out++ = byte; } else out++ = ptr; } if (ptr++ == '\0') /
6e2ba331	* Incorrect message that has just one character after * a '%'. * FIXME: stash something in out that would, for example * treat %2 as %02, assuming field == CONTENTS */
288057e9	break; }
ec8e31fa
288057e9	if (field != CONTENTS) { free(ret); cli_dbgmsg("Invalid RFC2231 header: '%s'\n", in); return cli_strdup(""); }
4b187745
288057e9	*out = '\0';
ec8e31fa
288057e9	cli_dbgmsg("rfc2231 returns '%s'\n", ret);
ec8e31fa
288057e9	return ret;
ec8e31fa	} /*
1602f612	* common/simil: * From Computing Magazine 20/8/92 * Returns %ge number from 0 to 100 - how similar are 2 strings? * 100 for exact match, < for error */
288057e9	struct pstr_list { /* internal stack / char d1; struct pstr_list *next;
1602f612	};
288057e9	#define OUT_OF_MEMORY (-2) #define FAILURE (-3) #define SUCCESS (-4) #define ARRAY_OVERFLOW (-5) typedef struct pstr_list ELEMENT1; typedef ELEMENT1 *LINK1;
1602f612
288057e9	static int push(LINK1 top, const char string); static int pop(LINK1 top, char buffer); static unsigned int compare(char ls1, char rs1, char ls2, char **rs2);
1602f612
288057e9	#define MAX_PATTERN_SIZ 50 /* maximum string lengths */
1602f612	static int simil(const char str1, const char str2) {
288057e9	LINK1 top = NULL; unsigned int score = 0; size_t common, total; size_t len1, len2; char rs1 = NULL, rs2 = NULL; char s1, s2; char ls1[MAX_PATTERN_SIZ], ls2[MAX_PATTERN_SIZ]; if (strcasecmp(str1, str2) == 0) return 100; if ((s1 = cli_strdup(str1)) == NULL) return OUT_OF_MEMORY; if ((s2 = cli_strdup(str2)) == NULL) { free(s1); return OUT_OF_MEMORY; } if (((total = strstrip(s1)) > MAX_PATTERN_SIZ - 1) \|\| ((len2 = strstrip(s2)) > MAX_PATTERN_SIZ - 1)) { free(s1); free(s2); return ARRAY_OVERFLOW; } total += len2; if ((push(&top, s1) == OUT_OF_MEMORY) \|\| (push(&top, s2) == OUT_OF_MEMORY)) { free(s1); free(s2); return OUT_OF_MEMORY; } while (pop(&top, ls2) == SUCCESS) { pop(&top, ls1); common = compare(ls1, &rs1, ls2, &rs2); if (common > 0) { score += (unsigned int)common; len1 = strlen(ls1); len2 = strlen(ls2); if ((len1 > 1 && len2 >= 1) \|\| (len2 > 1 && len1 >= 1)) if ((push(&top, ls1) == OUT_OF_MEMORY) \|\| (push(&top, ls2) == OUT_OF_MEMORY)) { free(s1); free(s2); return OUT_OF_MEMORY; } len1 = strlen(rs1); len2 = strlen(rs2); if ((len1 > 1 && len2 >= 1) \|\| (len2 > 1 && len1 >= 1)) if ((push(&top, rs1) == OUT_OF_MEMORY) \|\| (push(&top, rs2) == OUT_OF_MEMORY)) { free(s1); free(s2); return OUT_OF_MEMORY; } } } free(s1); free(s2); return (total > 0) ? ((score * 200) / total) : 0;
1602f612	} static unsigned int compare(char ls1, char rs1, char ls2, char **rs2) {
288057e9	unsigned int common, maxchars = 0; bool some_similarity = FALSE; char s1, s2; char maxs1 = NULL, maxs2 = NULL, maxe1 = NULL, maxe2 = NULL; char cs1, cs2, start1, end1, end2; end1 = ls1 + strlen(ls1); end2 = ls2 + strlen(ls2); start1 = ls1; for (;;) { s1 = start1; s2 = ls2; if (s1 < end1) { while (s1 < end1 && s2 < end2) { if (tolower(s1) == tolower(s2)) { some_similarity = TRUE; cs1 = s1; cs2 = s2; common = 0; do if (s1 == end1 \|\| s2 == end2) break; else { s1++; s2++; common++; } while (tolower(s1) == tolower(s2)); if (common > maxchars) { unsigned int diff = common - maxchars; maxchars = common; maxs1 = cs1; maxs2 = cs2; maxe1 = s1; maxe2 = s2; end1 -= diff; end2 -= diff; } else s1 -= common; } else s2++; } start1++; } else break; } if (some_similarity) { maxs1 = '\0'; maxs2 = '\0'; rs1 = maxe1; *rs2 = maxe2; } return maxchars;
1602f612	} static int push(LINK1 top, const char string) {
288057e9	LINK1 element; if ((element = (LINK1)cli_malloc(sizeof(ELEMENT1))) == NULL) return OUT_OF_MEMORY; if ((element->d1 = cli_strdup(string)) == NULL) { free(element); return OUT_OF_MEMORY; } element->next = top; top = element; return SUCCESS;
1602f612	} static int pop(LINK1 top, char buffer) {
288057e9	LINK1 t1; if ((t1 = top) != NULL) { (void)strcpy(buffer, t1->d1); top = t1->next; free(t1->d1); free((char *)t1); return SUCCESS; } return FAILURE;
1602f612	}
5198de85	/* * Have we found a line that is a start of a uuencoded file (see uuencode(5))? */
288057e9	int isuuencodebegin(const char *line)
5198de85	{
288057e9	if (line[0] != 'b') /* quick check */ return 0;
182bbcc8
288057e9	if (strlen(line) < 10) return 0;
5198de85
288057e9	return (strncasecmp(line, "begin ", 6) == 0) && isdigit(line[6]) && isdigit(line[7]) && isdigit(line[8]) && (line[9] == ' ');
5198de85	}
e83019ae	#if HAVE_JSON json_object messageGetJObj(message m) {
288057e9	assert(m != NULL);
e83019ae
288057e9	if (m->jobj == NULL) m->jobj = cli_jsonobj(NULL, NULL);
e83019ae
288057e9	return m->jobj;
e83019ae	} #endif