GitList

libclamav/mbox.c

e3aaff8e	/*
b025d30e	* Copyright (C) 2002-2006 Nigel Horne <njh@bandsman.co.uk>
e3aaff8e	* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software
48b7b4a7	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA.
e3aaff8e	*/
69c62847	static char const rcsid[] = "$Id: mbox.c,v 1.381 2007/02/15 12:26:44 njh Exp $";
ea541184	#ifdef _MSC_VER #include <winsock.h> /* only needed in CL_EXPERIMENTAL */ #endif
6d6e8271	#if HAVE_CONFIG_H #include "clamav-config.h" #endif
e3aaff8e	#ifndef CL_DEBUG
548a5f96	#define NDEBUG /* map CLAMAV debug onto standard */
e3aaff8e	#endif #ifdef CL_THREAD_SAFE
98cb5cba	#ifndef _REENTRANT
e3aaff8e	#define _REENTRANT /* for Solaris 2.8 */ #endif
98cb5cba	#endif
e3aaff8e	#include <stdio.h> #include <stdlib.h> #include <errno.h> #include <assert.h> #include <string.h>
bc6bbeff	#ifdef HAVE_STRINGS_H
e3aaff8e	#include <strings.h>
bc6bbeff	#endif
e3aaff8e	#include <ctype.h> #include <time.h> #include <fcntl.h>
bc6bbeff	#ifdef HAVE_SYS_PARAM_H
d4d14218	#include <sys/param.h>
bc6bbeff	#endif #include "clamav.h" #ifndef C_WINDOWS
f10460ed	#include <dirent.h>
bc6bbeff	#endif
a0b21816	#include <limits.h>
093e013c	#include <signal.h>
e3aaff8e
cd153266	#ifdef HAVE_UNISTD_H #include <unistd.h> #endif
242bfde8	#if defined(HAVE_READDIR_R_3) \|\| defined(HAVE_READDIR_R_2) #include <stddef.h> #endif
e2875303	#ifdef CL_THREAD_SAFE #include <pthread.h> #endif
0f7f7682	#include "others.h" #include "str.h" #include "filetypes.h"
e3aaff8e	#include "mbox.h"
d77ac7de	#include "dconf.h" #define DCONF_PHISHING mctx->ctx->dconf->phishing
e3aaff8e
02927896	#ifdef CL_DEBUG
093e013c
92dbfae7	#if defined(C_LINUX) \|\| defined(C_CYGWIN)
093e013c	#include <features.h>
92dbfae7	#endif
093e013c
02927896	#if __GLIBC__ == 2 && __GLIBC_MINOR__ >= 1 #define HAVE_BACKTRACE #endif
3f3f9085	#endif
02927896	#ifdef HAVE_BACKTRACE #include <execinfo.h> #include <syslog.h> static void sigsegv(int sig); static void print_trace(int use_syslog);
a9d251e0	/#define SAVE_TMP / Save the file being worked on in tmp */
02927896	#endif
c2b2d8af	#if defined(NO_STRTOK_R) \|\| !defined(CL_THREAD_SAFE)
e3aaff8e	#undef strtok_r #undef __strtok_r #define strtok_r(a,b,c) strtok(a,b) #endif
0cf4cea7	#ifdef HAVE_STDBOOL_H
89d4073d	#ifdef C_BEOS #include "SupportDefs.h" #else
edee0700	#include <stdbool.h>
89d4073d	#endif
edee0700	#else #ifdef FALSE typedef unsigned char bool; #else typedef enum { FALSE = 0, TRUE = 1 } bool;
e3aaff8e	#endif #endif
ecc3d638	typedef enum { FAIL, OK, OK_ATTACHMENTS_NOT_SAVED,
69c62847	VIRUS, MAXREC
ecc3d638	} mbox_status;
9f2024cc	#ifndef isblank #define isblank(c) (((c) == ' ') \|\| ((c) == '\t')) #endif
9b4bb8b7	#define SAVE_TO_DISC /* multipart/message are saved in a temporary file */
393a6d67
ad091acf	#define FOLLOWURLS 5 /* * Maximum number of URLs scanned in a message
59d4e1cf	* part. Helps to prevent Dialer.gen-45 and * Trojan.WinREG.Zapchast which are often * dispatched by emails which point to it. If
ad091acf	* not defined, don't check any URLs
f7cd5fbf	* It is also used to indicate the number of * 301/302 redirects we wish to follow
ad091acf	*/
9b4bb8b7
c52d991e	#include "htmlnorm.h" #include "phishcheck.h"
ea541184	#ifndef C_WINDOWS #include <netdb.h> #include <sys/socket.h> #include <netinet/in.h>
89d4073d	#ifndef C_BEOS
ea541184	#include <net/if.h> #include <arpa/inet.h> #endif
89d4073d	#endif
f4a02249	#ifndef C_WINDOWS #define closesocket(s) close(s) #endif
ea541184	#include <fcntl.h> #ifndef C_WINDOWS #include <sys/time.h> #endif #ifndef HAVE_IN_PORT_T typedef unsigned short in_port_t; #endif #ifndef HAVE_IN_ADDR_T typedef unsigned int in_addr_t; #endif #if (!defined(EALREADY)) && (defined(WSAEALREADY)) #define EALREADY WSAEALREADY #endif #if (!defined(EINPROGRESS)) && (defined(WSAEINPROGRESS)) #define EINPROGRESS WSAEINPROGRESS #endif #if (!defined(EISCONN)) && (defined(WSAEISCONN)) #define EISCONN WSAEISCONN #endif
f10460ed	/*
6e84cebb	* Define this to handle messages covered by section 7.3.2 of RFC1341.
f10460ed	* This is experimental code so it is up to YOU to (1) ensure it's secure
cf569541	* (2) periodically trim the directory of old files * * If you use the load balancing feature of clamav-milter to run clamd on
fb79b576	* more than one machine you must make sure that .../partial is on a shared
cf569541	* network filesystem
f10460ed	*/
bc6bbeff	#ifndef C_WINDOWS /* TODO: when opendir() is done */
fb79b576	#define PARTIAL_DIR
bc6bbeff	#endif
f10460ed
ac9b941b	/#define NEW_WORLD/
d72749e0
8c68fcc1	/#define SCAN_UNENCODED_BOUNCES //*
12bd9764	* Slows things down a lot and only catches unencoded copies
214621f2	* of EICAR within bounces, which don't matter
12bd9764	*/
c1fce7f7	typedef struct mbox_ctx { const char dir; const table_t rfc821Table; const table_t subtypeTable; cli_ctx ctx; } mbox_ctx;
0f7f7682	static int cli_parse_mbox(const char dir, int desc, cli_ctx ctx);
ae5c693a	static message parseEmailFile(FILE fin, const table_t rfc821Table, const char firstLine, const char *dir);
2673dc74	static message parseEmailHeaders(message m, const table_t *rfc821Table);
4c60b74f	static int parseEmailHeader(message m, const char line, const table_t *rfc821Table);
ecc3d638	static mbox_status parseEmailBody(message messageIn, text textIn, mbox_ctx *mctx, unsigned int recursion_level);
e3aaff8e	static int boundaryStart(const char line, const char boundary);
69c62847	static int boundaryEnd(const char line, const char boundary);
e3aaff8e	static int initialiseTables(table_t rfc821Table, table_t subtypeTable); static int getTextPart(message const messages[], size_t size); static size_t strip(char buf, int len); static int parseMimeHeader(message m, const char cmd, const table_t rfc821Table, const char arg);
3f46285b	static void saveTextPart(message m, const char dir, int destroy_text);
50df4118	static char rfc2047(const char in);
d72749e0	static char rfc822comments(const char in, char *out);
f10460ed	#ifdef PARTIAL_DIR static int rfc1341(message m, const char dir); #endif
ddea752e	static bool usefulHeader(int commandNumber, const char *cmd);
d8142abc	static char getline_from_mbox(char buffer, size_t len, FILE *fin);
a603478f	static bool isBounceStart(const char *line);
47d9cc65	static bool exportBinhexMessage(const char dir, message m); static int exportBounceMessage(text start, const mbox_ctx ctx);
ecc3d638	static message do_multipart(message mainMessage, message *messages, int i, mbox_status rc, mbox_ctx mctx, message messageIn, text **tptr, unsigned int recursion_level);
4f4a8f4a	static int count_quotes(const char *buf);
842c7d49	static bool next_is_folded_header(const text *t);
0cf4cea7	static bool newline_in_header(const char *line);
9b4bb8b7
ecc3d638	static blob getHrefs(message m, tag_arguments_t hrefs); static void hrefs_done(blob b, tag_arguments_t *hrefs);
ad422cc9	static void checkURLs(message m, mbox_ctx mctx, mbox_status rc, int is_html); static void do_checkURLs(const char dir, tag_arguments_t *hrefs);
c52d991e
17a5e7ea	#if defined(FOLLOWURLS) && (FOLLOWURLS > 0)
f121cb96	struct arg {
b362ea45	char *url;
2c7d1edd	const char *dir;
f121cb96	char *filename;
f7cd5fbf	int depth;
f121cb96	}; #ifdef CL_THREAD_SAFE static void getURL(void a); #else static void getURL(struct arg arg); #endif
9b4bb8b7	#endif
e3aaff8e	/* Maximum line length according to RFC821 */
85bb253e	#define RFC2821LENGTH 1000
e3aaff8e	/* Hashcodes for our hash tables / #define CONTENT_TYPE 1 #define CONTENT_TRANSFER_ENCODING 2 #define CONTENT_DISPOSITION 3 / Mime sub types */ #define PLAIN 1 #define ENRICHED 2 #define HTML 3 #define RICHTEXT 4 #define MIXED 5
946a0ad3	#define ALTERNATIVE 6 /* RFC1521*/
e3aaff8e	#define DIGEST 7 #define SIGNED 8 #define PARALLEL 9 #define RELATED 10 /* RFC2387 / #define REPORT 11 / RFC1892 */
c9b8f252	#define APPLEDOUBLE 12 /* Handling of this in only noddy for now */
393a6d67	#define FAX MIXED /* * RFC3458 * Drafts stated to treat is as mixed if it is * not known. This disappeared in the final * version (except when talking about * voice-message), but it is good enough for us * since we do no validation of coversheet * presence etc. (which also has disappeared * in the final version) */
9a729c80	#define ENCRYPTED 13 /* * e.g. RFC2015 * Content-Type: multipart/encrypted; * boundary="nextPart1383049.XCRrrar2yq"; * protocol="application/pgp-encrypted" */
6e5d95eb	#define X_BFILE RELATED /* * BeOS, expert two parts: the file and it's * attributes. The attributes part comes as * Content-Type: application/x-be_attribute * name="foo" * I can't find where it is defined, any * pointers would be appreciated. For now * we treat it as multipart/related */
c79a2273	#define KNOWBOT 14 /* Unknown and undocumented format? */
e3aaff8e	static const struct tableinit { const char *key; int value; } rfc821headers[] = {
303f9be9	/* TODO: make these regular expressions */
5c1150ac	{ "Content-Type", CONTENT_TYPE },
a9f386ed	{ "Content-Transfer-Encoding", CONTENT_TRANSFER_ENCODING }, { "Content-Disposition", CONTENT_DISPOSITION },
e3aaff8e	{ NULL, 0 }
15033cb6	}, mimeSubtypes[] = { /* see RFC2045 */
e3aaff8e	/* subtypes of Text / { "plain", PLAIN }, { "enriched", ENRICHED }, { "html", HTML }, { "richtext", RICHTEXT }, / subtypes of Multipart */ { "mixed", MIXED }, { "alternative", ALTERNATIVE }, { "digest", DIGEST }, { "signed", SIGNED }, { "parallel", PARALLEL }, { "related", RELATED }, { "report", REPORT },
c9b8f252	{ "appledouble", APPLEDOUBLE },
393a6d67	{ "fax-message", FAX },
9a729c80	{ "encrypted", ENCRYPTED },
6e5d95eb	{ "x-bfile", X_BFILE }, /* BeOS */
c79a2273	{ "knowbot", KNOWBOT }, /* ??? / { "knowbot-metadata", KNOWBOT }, / ??? / { "knowbot-code", KNOWBOT }, / ??? / { "knowbot-state", KNOWBOT }, / ??? */
e3aaff8e	{ NULL, 0 } };
e2875303	#ifdef CL_THREAD_SAFE static pthread_mutex_t tables_mutex = PTHREAD_MUTEX_INITIALIZER; #endif
e3aaff8e
bac883ff	#ifndef O_BINARY #define O_BINARY 0 #endif
d72749e0	#ifdef NEW_WORLD
f24bf390
dd7f118f	#include "matcher.h"
c6023c3f	#undef PARTIAL_DIR
f24bf390	#if HAVE_MMAP #if HAVE_SYS_MMAN_H #include <sys/mman.h> #else /* HAVE_SYS_MMAN_H */ #undef HAVE_MMAP #endif
0c8e0638	#else /HAVE_MMAP/ #undef NEW_WORLD #endif
f24bf390	#endif
0c8e0638	#ifdef NEW_WORLD
ceabee13	/* * Files larger than this are scanned with the old method, should be * StreamMaxLength, I guess * If NW_MAX_FILE_SIZE is not defined, all files go through the * new method. This definition is for machines very tight on RAM, or * with large StreamMaxLength values / #define MAX_ALLOCATION 134217728 / see libclamav/others.c */ #define NW_MAX_FILE_SIZE MAX_ALLOCATION
d72749e0	struct scanlist {
ceabee13	const char start; size_t size; encoding_type decoder; / only BASE64 and QUOTEDPRINTABLE for now / struct scanlist next;
d72749e0	};
ceabee13	static struct map { const char offset; / sorted / const char word; struct map next; } map, *tail;
dd7f118f	static int save_text(cli_ctx ctx, const char dir, const char *start, size_t len);
ceabee13	static void create_map(const char begin, const char end); static void add_to_map(const char offset, const char word); static const char find_in_map(const char offset, const char *word); static void free_map(void);
f24bf390	/* * This could be the future. Instead of parsing and decoding it just decodes.
d72749e0	*
f24bf390	* USE IT AT YOUR PERIL, a large number of viruses are not detected with this * method, possibly because the decoded files must be exact and not have * extra data at the start or end, which this code will produce.
f003b79e	*
d72749e0	* Currently only supports base64 and quoted-printable * * You may also see a lot of warnings. For the moment it falls back to old * world mode if it doesn't know what to do - that'll be removed. * The code is untidy... * * FIXME: Some mailbox scans are slower with this method. I suspect that it's * because the scan can proceed to the end of the file rather than the end * of the attachment which can mean than later emails are scanned many times
0b28fbb8	*
bc0fc102	* FIXME: quoted printable doesn't know when to stop, so size related virus * matching breaks *
ceabee13	* TODO: Fall through to cli_parse_mbox() too often
c6023c3f	*
0c8e0638	* TODO: Add support for systems without mmap()
c6023c3f	* * TODO: partial_dir fall through
a05e6d45	* * FIXME: Some EICAR gets through
f24bf390	*/ int
0f7f7682	cli_mbox(const char dir, int desc, cli_ctx ctx)
f24bf390	{
0c8e0638	char start, ptr, line; const char last, p, q;
6b1cf491	size_t size;
f24bf390	struct stat statb; message m; fileblob fb;
dd7f118f	int ret = CL_CLEAN;
af7dfe53	int wasAlloced;
d72749e0	struct scanlist scanlist, scanelem;
f24bf390
7c56033f	if(dir == NULL) { cli_warnmsg("cli_mbox called with NULL dir\n"); return CL_ENULLARG; }
f24bf390	if(fstat(desc, &statb) < 0) return CL_EOPEN; size = statb.st_size; if(size == 0) return CL_CLEAN;
ceabee13	#ifdef NW_MAX_FILE_SIZE if(size > NW_MAX_FILE_SIZE)
0f7f7682	return cli_parse_mbox(dir, desc, ctx);
ceabee13	#endif
f24bf390
c6023c3f	/cli_warnmsg("NEW_WORLD is new code - use at your own risk.\n");/
bc0fc102	#ifdef PARTIAL_DIR cli_warnmsg("PARTIAL_DIR doesn't work in the NEW_WORLD yet\n"); #endif
6862efc7
d72749e0	start = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, 0); if(start == MAP_FAILED)
f24bf390	return CL_EMEM; cli_dbgmsg("mmap'ed mbox\n");
d72749e0	ptr = cli_malloc(size); if(ptr) { memcpy(ptr, start, size);
af7dfe53	munmap(start, size);
d72749e0	start = ptr;
ceabee13	wasAlloced = 1;
af7dfe53	} else wasAlloced = 0;
ceabee13	/* last points to the last valid address in the array */ last = &start[size - 1]; create_map(start, last);
d72749e0	scanelem = scanlist = NULL; q = start;
ceabee13	/* * FIXME: mismatch of const char * and char * here and in later calls * to find_in_map() */ while((p = find_in_map(q, "base64")) != NULL) {
d72749e0	cli_dbgmsg("Found base64\n"); if(scanelem) { scanelem->next = cli_malloc(sizeof(struct scanlist)); scanelem = scanelem->next; } else scanlist = scanelem = cli_malloc(sizeof(struct scanlist)); scanelem->next = NULL; scanelem->decoder = BASE64; q = scanelem->start = &p[6];
ceabee13	if(((p = find_in_map(q, "\nFrom ")) != NULL) \|\| ((p = find_in_map(q, "base64")) != NULL) \|\| ((p = find_in_map(q, "quoted-printable")) != NULL)) {
5198de85	scanelem->size = (size_t)(p - q);
d72749e0	q = p;
0b28fbb8	} else { scanelem->size = (size_t)(last - scanelem->start) + 1; break; }
6b1cf491	cli_dbgmsg("base64: last %u q %u\n", (unsigned int)last, (unsigned int)q);
d72749e0	assert(scanelem->size <= size);
f24bf390	}
c6023c3f
d72749e0	q = start;
ceabee13	while((p = find_in_map(q, "quoted-printable")) != NULL) {
0b28fbb8	if(p != q) switch(p[-1]) { case ' ': case ':': case '=': /* wrong but allow it / break; default: q = &p[16]; cli_dbgmsg("Ignore quoted-printable false positive\n"); continue; / false positive */ }
72cf1461
d72749e0	cli_dbgmsg("Found quoted-printable\n");
c6023c3f	#ifdef notdef /* * The problem with quoted printable is recognising when to stop * parsing */
d72749e0	if(scanelem) { scanelem->next = cli_malloc(sizeof(struct scanlist)); scanelem = scanelem->next; } else scanlist = scanelem = cli_malloc(sizeof(struct scanlist)); scanelem->next = NULL; scanelem->decoder = QUOTEDPRINTABLE; q = scanelem->start = &p[16];
6b1cf491	cli_dbgmsg("qp: last %u q %u\n", (unsigned int)last, (unsigned int)q);
ceabee13	if(((p = find_in_map(q, "\nFrom ")) != NULL) \|\| ((p = find_in_map(q, "quoted-printable")) != NULL) \|\| ((p = find_in_map(q, "base64")) != NULL)) {
5198de85	scanelem->size = (size_t)(p - q);
d72749e0	q = p;
0b28fbb8	cli_dbgmsg("qp: scanelem->size = %u\n", scanelem->size); } else { scanelem->size = (size_t)(last - scanelem->start) + 1; break; }
d72749e0	assert(scanelem->size <= size);
c6023c3f	#else if(wasAlloced) free(start); else munmap(start, size);
ceabee13	free_map();
0f7f7682	return cli_parse_mbox(dir, desc, ctx);
c6023c3f	#endif
f24bf390	}
d72749e0	if(scanlist == NULL) { const struct tableinit *tableinit; bool anyHeadersFound = FALSE;
0b28fbb8	bool hasuuencode = FALSE;
c6023c3f	cli_file_t type;
d72749e0	/* FIXME: message: There could of course be no decoder needed... */ for(tableinit = rfc821headers; tableinit->key; tableinit++)
ceabee13	if(find_in_map(start, tableinit->key)) {
d72749e0	anyHeadersFound = TRUE; break; }
dd7f118f	if((!anyHeadersFound) && ((p = find_in_map(start, "\nbegin ")) != NULL) && (isuuencodebegin(++p)))
0b28fbb8	/* uuencoded part */ hasuuencode = TRUE;
dd7f118f	else { cli_dbgmsg("Nothing encoded, looking for a text part to save\n"); ret = save_text(ctx, dir, start, size); if(wasAlloced) free(start); else munmap(start, size); free_map(); if(ret != CL_EFORMAT) return ret; ret = CL_CLEAN; }
0b28fbb8
ceabee13	free_map();
c6023c3f	type = cli_filetype(start, size); if((type == CL_TYPE_UNKNOWN_TEXT) && (strncmp(start, "Microsoft Mail Internet Headers", 31) == 0))
ceabee13	type = CL_TYPE_MAIL;
c6023c3f
af7dfe53	if(wasAlloced) free(start); else munmap(start, size);
f24bf390
0b28fbb8	if(anyHeadersFound \|\| hasuuencode) { /* TODO: reduce the number of falls through here */
c6023c3f	if(hasuuencode)
dd7f118f	/* TODO: fast track visa */ cli_warnmsg("New world - fall back to old uudecoder\n");
c6023c3f	else
dd7f118f	cli_warnmsg("cli_mbox: unknown encoder, type %d\n", type);
c6023c3f	if(type == CL_TYPE_MAIL)
0f7f7682	return cli_parse_mbox(dir, desc, ctx);
c6023c3f	cli_dbgmsg("Unknown filetype %d, return CLEAN\n", type); return CL_CLEAN;
f003b79e	}
0b28fbb8
dd7f118f	#if 0 /* I don't believe this is needed any more */
ff07f243	/* * The message could be a plain text phish * FIXME: Can't get to the option whether we are looking for * phishes or not, so assume we are, this slows things a * lot * Should be * if((type == CL_TYPE_MAIL) && (!(no-phishing)) */ if(type == CL_TYPE_MAIL)
0f7f7682	return cli_parse_mbox(dir, desc, ctx);
dd7f118f	#endif
ff07f243	cli_dbgmsg("cli_mbox: I believe it's plain text (type == %d) which must be clean\n", type);
d72749e0	return CL_CLEAN; }
ceabee13	#if 0 if(wasAlloced) { const char max = NULL; for(scanelem = scanlist; scanelem; scanelem = scanelem->next) { const char end = &scanelem->start[scanelem->size]; if(end > max) max = end; } if(max < last) printf("could free %d bytes\n", (int)(last - max)); } #endif
d72749e0	for(scanelem = scanlist; scanelem; scanelem = scanelem->next) { if(scanelem->decoder == BASE64) {
ceabee13	const char *b64start = scanelem->start; size_t b64size = scanelem->size;
d72749e0	cli_dbgmsg("b64size = %lu\n", b64size);
012682d0	while((b64start != '\n') && (b64start != '\r')) {
f003b79e	b64start++; b64size--;
d72749e0	} /* * Look for the end of the headers / while(b64start < last) { if(b64start == ';') {
f003b79e	b64start++; b64size--;
012682d0	} else if((memcmp(b64start, "\n\n", 2) == 0) \|\| (memcmp(b64start, "\r\r", 2) == 0)) { b64start += 2; b64size -= 2; break; } else if(memcmp(b64start, "\r\n\r\n", 4) == 0) { b64start += 4; b64size -= 4; break;
c6023c3f	} else if(memcmp(b64start, "\n \n", 3) == 0) { /* * Some viruses are broken and have * one space character at the end of * the headers / b64start += 3; b64size -= 3; break; } else if(memcmp(b64start, "\r\n \r\n", 5) == 0) { / * Some viruses are broken and have * one space character at the end of * the headers */ b64start += 5; b64size -= 5; break;
f003b79e	}
5b76248c	b64start++;
d72749e0	b64size--;
5b76248c	}
f003b79e
d72749e0	if(b64size > 0L)
0b28fbb8	while((!isalnum(b64start)) && (b64start != '/')) {
d72749e0	if(b64size-- == 0L) break; b64start++; } if(b64size > 0L) {
c04baf9e	int lastline;
a9ecf619	char tmpfilename; unsigned char uptr;
d72749e0	cli_dbgmsg("cli_mbox: decoding %ld base64 bytes\n", b64size);
c04baf9e	if((fb = fileblobCreate()) == NULL) {
dd7f118f	free_map();
a9ecf619	if(wasAlloced) free(start); else munmap(start, size); return CL_EMEM; }
c04baf9e	tmpfilename = cli_gentemp(dir);
dd7f118f	if(tmpfilename == NULL) { free_map();
a9ecf619	if(wasAlloced) free(start); else munmap(start, size);
c04baf9e	fileblobDestroy(fb);
a9ecf619
c04baf9e	return CL_EMEM;
a9ecf619	}
c04baf9e	fileblobSetFilename(fb, dir, tmpfilename); free(tmpfilename);
a9ecf619
d72749e0	line = NULL;
f003b79e
d72749e0	m = messageCreate();
c6023c3f	if(m == NULL) {
dd7f118f	free_map();
c6023c3f	if(wasAlloced) free(start); else munmap(start, size);
c04baf9e	fileblobDestroy(fb);
c6023c3f
d72749e0	return CL_EMEM;
c6023c3f	}
d72749e0	messageSetEncoding(m, "base64");
f003b79e
a603478f	messageSetCTX(m, ctx); fileblobSetCTX(fb, ctx);
012682d0	lastline = 0;
0b28fbb8	do {
a9ecf619	int length = 0, datalen;
012682d0	char newline, equal;
a9ecf619	unsigned char bigbuf, data; unsigned char smallbuf[1024];
ff07f243	const char *cptr;
f003b79e
d72749e0	/printf("%ld: ", b64size); fflush(stdout);/
f003b79e
ff07f243	for(cptr = b64start; b64size && (cptr != '\n') && (cptr != '\r'); cptr++) {
d72749e0	length++; --b64size; }
f003b79e
d72749e0	/printf("%d: ", length); fflush(stdout);/
f003b79e
468c0f21	newline = cli_realloc(line, length + 1); if(newline == NULL) break; line = newline;
f003b79e
d72749e0	memcpy(line, b64start, length); line[length] = '\0';
f003b79e
012682d0	equal = strchr(line, '='); if(equal) { lastline++; *equal = '\0'; }
d72749e0	/puts(line);/
f003b79e
a9ecf619	#if 0
d72749e0	if(messageAddStr(m, line) < 0) break;
a9ecf619	#endif
c04baf9e	if(length >= (int)sizeof(smallbuf)) {
a9ecf619	datalen = length + 2; data = bigbuf = cli_malloc(datalen); if(data == NULL) break; } else { bigbuf = NULL; data = smallbuf; datalen = sizeof(data) - 1; } uptr = decodeLine(m, BASE64, line, data, datalen); if(uptr == NULL) { if(bigbuf) free(bigbuf); break; } /cli_dbgmsg("base64: write %u bytes\n", (size_t)(uptr - data));/
c04baf9e	datalen = fileblobAddData(fb, data, (size_t)(uptr - data));
a9ecf619	if(bigbuf) free(bigbuf);
f003b79e
c04baf9e	if(datalen < 0) break;
d4a7dd82	if(fileblobContainsVirus(fb))
01c99f53	break;
c04baf9e
ff07f243	if((b64size > 0) && (*cptr == '\r')) { b64start = ++cptr;
012682d0	--b64size; }
ff07f243	if((b64size > 0) && (*cptr == '\n')) { b64start = ++cptr;
d72749e0	--b64size; }
012682d0	if(lastline)
d72749e0	break;
0b28fbb8	} while(b64size > 0L);
a9ecf619	if(m->base64chars) { unsigned char data[4]; uptr = base64Flush(m, data); if(uptr) { /cli_dbgmsg("base64: flush %u bytes\n", (size_t)(uptr - data));/
c04baf9e	(void)fileblobAddData(fb, data, (size_t)(uptr - data));
a9ecf619	} }
c04baf9e	if(fb) fileblobDestroy(fb); else
d4a7dd82	ret = -1;
a9ecf619	messageDestroy(m); free(line);
f24bf390	}
d72749e0	} else if(scanelem->decoder == QUOTEDPRINTABLE) {
ceabee13	const char *quotedstart = scanelem->start; size_t quotedsize = scanelem->size;
f003b79e
d72749e0	cli_dbgmsg("quotedsize = %lu\n", quotedsize); while(*quotedstart != '\n') {
f003b79e	quotedstart++; quotedsize--;
d72749e0	} /* * Look for the end of the headers / while(quotedstart < last) { if(quotedstart == ';') {
f003b79e	quotedstart++; quotedsize--;
012682d0	} else if((quotedstart == '\n') \|\| (quotedstart == '\r')) {
d72749e0	quotedstart++; quotedsize--; if((quotedstart == '\n') \|\| (quotedstart == '\r')) { quotedstart++; quotedsize--; break; }
f003b79e	}
d72749e0	quotedstart++; quotedsize--;
f003b79e	}
f24bf390
d72749e0	while(!isalnum(*quotedstart)) { quotedstart++; quotedsize--; }
f24bf390
d72749e0	if(quotedsize > 0L) { cli_dbgmsg("cli_mbox: decoding %ld quoted-printable bytes\n", quotedsize);
f24bf390
d72749e0	m = messageCreate();
c6023c3f	if(m == NULL) {
dd7f118f	free_map();
c6023c3f	if(wasAlloced) free(start); else munmap(start, size);
d72749e0	return CL_EMEM;
c6023c3f	}
d72749e0	messageSetEncoding(m, "quoted-printable");
a603478f	messageSetCTX(m, ctx);
f24bf390
d72749e0	line = NULL;
f24bf390
0b28fbb8	do {
d72749e0	int length = 0;
468c0f21	char *newline;
ff07f243	const char *cptr;
f24bf390
d72749e0	/printf("%ld: ", quotedsize); fflush(stdout);/
f24bf390
ff07f243	for(cptr = quotedstart; quotedsize && (cptr != '\n') && (cptr != '\r'); cptr++) {
d72749e0	length++; --quotedsize; }
f24bf390
d72749e0	/printf("%d: ", length); fflush(stdout);/
f24bf390
468c0f21	newline = cli_realloc(line, length + 1); if(newline == NULL) break; line = newline;
f24bf390
d72749e0	memcpy(line, quotedstart, length); line[length] = '\0';
f24bf390
d72749e0	/puts(line);/
f24bf390
d72749e0	if(messageAddStr(m, line) < 0) break;
f003b79e
ff07f243	if((quotedsize > 0) && (*cptr == '\r')) { quotedstart = ++cptr;
012682d0	--quotedsize; }
ff07f243	if((quotedsize > 0) && (*cptr == '\n')) { quotedstart = ++cptr;
d72749e0	--quotedsize; }
0b28fbb8	} while(quotedsize > 0L);
d72749e0	free(line);
2673dc74	fb = messageToFileblob(m, dir, 1);
d72749e0	messageDestroy(m);
f003b79e
d4a7dd82	if(fb)
d72749e0	fileblobDestroy(fb);
d4a7dd82	else ret = -1;
d72749e0	}
f24bf390	} }
d72749e0	scanelem = scanlist;
dd7f118f	/* * There could be a phish in the plain text part, so save that * FIXME: Can't get to the option whether we are looking for * phishes or not, so assume we are, this slows things a * lot * Should be * if((type == CL_TYPE_MAIL) && (!(no-phishing)) */ ret = save_text(ctx, dir, start, size); free_map();
d72749e0	while(scanelem) { struct scanlist *n = scanelem->next; free(scanelem); scanelem = n; }
f24bf390
af7dfe53	if(wasAlloced) free(start); else munmap(start, size);
f24bf390
af7dfe53	/* * FIXME: Need to run cl_scandir() here and return that value */
a9ecf619	cli_dbgmsg("cli_mbox: ret = %d\n", ret);
dd7f118f	if(ret != CL_EFORMAT) return ret;
f003b79e
dd7f118f	cli_warnmsg("New world - don't know what to do - fall back to old world\n");
d72749e0	/* Fall back for now */
0b28fbb8	lseek(desc, 0L, SEEK_SET);
0f7f7682	return cli_parse_mbox(dir, desc, ctx);
f24bf390	}
ceabee13
dd7f118f	/* * Save a text part - it could contain phish or jscript / static int save_text(cli_ctx ctx, const char dir, const char start, size_t len) { const char p; if((p = find_in_map(start, "\n\n")) \|\| (p = find_in_map(start, "\r\n\r\n"))) { const char q; fileblob fb; char tmpfilename; if(((q = find_in_map(start, "base64")) == NULL) && ((q = find_in_map(start, "quoted_printable")) == NULL)) { cli_dbgmsg("It's all plain text!\n"); if(p == '\r') p += 4; else p += 2; len -= (p - start); } else if(((q = find_in_map(p, "\nFrom ")) == NULL) && ((q = find_in_map(p, "base64")) == NULL) && ((q = find_in_map(p, "quoted-printable")) == NULL)) cli_dbgmsg("Can't find end of plain text - assume it's all\n"); else len = (size_t)(q - p); if(len < 5) { cli_dbgmsg("save_text: Too small\n"); return CL_EFORMAT; } if(ctx->scanned) ctx->scanned += len / CL_COUNT_PRECISION; /* * This doesn't work, cli_scanbuff isn't designed to be used * in this way. It gets the "filetype" wrong and then * doesn't scan correctly */
c3045077	if(cli_scanbuff((char *)p, len, ctx->virname, ctx->engine, CL_TYPE_UNKNOWN_DATA) == CL_VIRUS) {
dd7f118f	cli_dbgmsg("save_text: found %s\n", ctx->virname); return CL_VIRUS; } fb = fileblobCreate(); if(fb == NULL) return CL_EMEM; tmpfilename = cli_gentemp(dir); if(tmpfilename == NULL) { fileblobDestroy(fb); return CL_ETMPFILE; } cli_dbgmsg("save plain bit to %s, %u bytes\n", tmpfilename, len); fileblobSetFilename(fb, dir, tmpfilename); free(tmpfilename); (void)fileblobAddData(fb, (const unsigned char )p, len); fileblobDestroy(fb); return CL_SUCCESS; } cli_dbgmsg("No text part found to save\n"); return CL_EFORMAT; }
ceabee13	static void create_map(const char begin, const char end) { const struct wordlist { const char *word; int len; } wordlist[] = { { "base64", 6 }, { "quoted-printable", 16 }, { "\nbegin ", 7 },
dd7f118f	{ "\nFrom ", 6 }, { "\n\n", 2 }, { "\r\n\r\n", 4 },
ceabee13	{ NULL, 0 } }; if(map) { cli_warnmsg("create_map called without free_map\n"); free_map(); } while(begin < end) { const struct wordlist word; for(word = wordlist; word->word; word++) { if((end - begin) < word->len) continue; if(strncasecmp(begin, word->word, word->len) == 0) { add_to_map(begin, word->word); break; } } begin++; } } / To sort map, assume 'offset' is presented in sorted order / static void add_to_map(const char offset, const char word) { if(map) { tail->next = cli_malloc(sizeof(struct map)); / FIXME: verify / tail = tail->next; } else map = tail = cli_malloc(sizeof(struct map)); / FIXME: verify / tail->offset = offset; tail->word = word; tail->next = NULL; } static const char find_in_map(const char offset, const char word) { const struct map item; for(item = map; item; item = item->next) if(item->offset >= offset) if(strcasecmp(word, item->word) == 0) return item->offset; return NULL; } static void free_map(void) { while(map) { struct map next = map->next; free(map); map = next; } map = NULL; } #else /!NEW_WORLD/
f24bf390	int
0f7f7682	cli_mbox(const char dir, int desc, cli_ctx ctx)
f24bf390	{
7c56033f	if(dir == NULL) { cli_warnmsg("cli_mbox called with NULL dir\n"); return CL_ENULLARG; }
0f7f7682	return cli_parse_mbox(dir, desc, ctx);
f24bf390	} #endif
e3aaff8e	/* * TODO: when signal handling is added, need to remove temp files when a
ef822cfc	* signal is received
e3aaff8e	* TODO: add option to scan in memory not via temp files, perhaps with a
1bfbedd4	* named pipe or memory mapped file, though this won't work on big e-mails * containing many levels of encapsulated messages - it'd just take too much * RAM
049a18b9	* TODO: parse .msg format files
c9b8f252	* TODO: fully handle AppleDouble format, see
ef822cfc	* http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
89670d69	* TODO: ensure parseEmailHeaders is always called before parseEmailBody * TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody
9f2024cc	* TODO: Handle unepected NUL bytes in header lines which stop strcmp()s: * e.g. \0Content-Type: application/binary;
e3aaff8e	*/
f24bf390	static int
0f7f7682	cli_parse_mbox(const char dir, int desc, cli_ctx ctx)
e3aaff8e	{
049a18b9	int retcode, i;
ddea752e	message *body;
e3aaff8e	FILE *fd;
85bb253e	char buffer[RFC2821LENGTH + 1];
c1fce7f7	mbox_ctx mctx;
c7b69776	#ifdef HAVE_BACKTRACE
02927896	void (*segv)(int); #endif
393a6d67	static table_t rfc821, subtype;
a9d251e0	#ifdef SAVE_TMP
5cdb01fc	char tmpfilename[16]; int tmpfd; #endif
e3aaff8e
4c586bc8	#ifdef NEW_WORLD cli_dbgmsg("fall back to old world\n"); #else
e3aaff8e	cli_dbgmsg("in mbox()\n");
4c586bc8	#endif
e3aaff8e
049a18b9	i = dup(desc); if((fd = fdopen(i, "rb")) == NULL) { cli_errmsg("Can't open descriptor %d\n", desc); close(i);
ef822cfc	return CL_EOPEN;
049a18b9	}
59d4e1cf	rewind(fd); /* bug 240 */
a9d251e0	#ifdef SAVE_TMP
5cdb01fc	/* * Copy the incoming mail for debugging, so that if it falls over * we have a copy of the offending email. This is debugging code * that you shouldn't of course install in a live environment. I am * not interested in hearing about security issues with this section * of the parser. / strcpy(tmpfilename, "/tmp/mboxXXXXXX"); tmpfd = mkstemp(tmpfilename); if(tmpfd < 0) { perror(tmpfilename); cli_errmsg("Can't make debugging file\n"); } else { FILE tmpfp = fdopen(tmpfd, "w"); if(tmpfp) { while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL) fputs(buffer, tmpfp); fclose(tmpfp); rewind(fd); } else cli_errmsg("Can't fdopen debugging file\n"); } #endif
4b187745	if(fgets(buffer, sizeof(buffer) - 1, fd) == NULL) {
049a18b9	/* empty message */ fclose(fd);
a9d251e0	#ifdef SAVE_TMP
5cdb01fc	unlink(tmpfilename); #endif
ef822cfc	return CL_CLEAN;
049a18b9	}
e2875303	#ifdef CL_THREAD_SAFE pthread_mutex_lock(&tables_mutex); #endif
393a6d67	if(rfc821 == NULL) { assert(subtype == NULL);
51fc2aa8
393a6d67	if(initialiseTables(&rfc821, &subtype) < 0) { rfc821 = NULL; subtype = NULL;
e2875303	#ifdef CL_THREAD_SAFE pthread_mutex_unlock(&tables_mutex); #endif
51fc2aa8	fclose(fd);
a9d251e0	#ifdef SAVE_TMP
5cdb01fc	unlink(tmpfilename); #endif
ef822cfc	return CL_EMEM;
51fc2aa8	}
e3aaff8e	}
e2875303	#ifdef CL_THREAD_SAFE pthread_mutex_unlock(&tables_mutex); #endif
e3aaff8e
3f3f9085	#ifdef HAVE_BACKTRACE
02927896	segv = signal(SIGSEGV, sigsegv); #endif
a603478f	retcode = CL_SUCCESS;
e791b5ac	body = NULL;
c1fce7f7	mctx.dir = dir; mctx.rfc821Table = rfc821; mctx.subtypeTable = subtype; mctx.ctx = ctx;
89670d69	/*
45dc1456	* Is it a UNIX style mbox with more than one
89670d69	* mail message, or just a single mail message?
45dc1456	* * TODO: It would be better if we called cli_scandir here rather than * in cli_scanmail. Then we could improve the way mailboxes with more * than one message is handled, e.g. stopping parsing when an infected * message is stopped, and giving a better indication of which message * within the mailbox is infected
89670d69	*/
25071deb	/if((strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {/ if(strncmp(buffer, "From ", 5) == 0) {
e3aaff8e	/*
049a18b9	* Have been asked to check a UNIX style mbox file, which * may contain more than one e-mail message to decode
f003b79e	* * It would be far better for scanners.c to do this splitting * and do this * FOR EACH mail in the mailbox * DO * pass this mail to cli_mbox -- * scan this file * IF this file has a virus quit * THEN * return CL_VIRUS * FI * END * This would remove a problem with this code that it can * fill up the tmp directory before it starts scanning
e3aaff8e	*/
ddea752e	bool lastLineWasEmpty; int messagenumber; message *m = messageCreate(); if(m == NULL) { fclose(fd); #ifdef HAVE_BACKTRACE signal(SIGSEGV, segv); #endif
a9d251e0	#ifdef SAVE_TMP
5cdb01fc	unlink(tmpfilename); #endif
ddea752e	return CL_EMEM; } lastLineWasEmpty = FALSE; messagenumber = 1;
a603478f	messageSetCTX(m, ctx);
e3aaff8e
049a18b9	do {
89670d69	cli_chomp(buffer);
25071deb	/if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {/ if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) {
f35bc674	cli_dbgmsg("Deal with email number %d\n", messagenumber++);
e3aaff8e	/*
89670d69	* End of a message in the mail box
e3aaff8e	*/
b2223aad	body = parseEmailHeaders(m, rfc821);
4f1d0bfc	if(body == NULL) { messageReset(m); continue; }
a603478f	messageSetCTX(body, ctx);
89670d69	messageDestroy(m);
a603478f	if(messageGetBody(body)) {
ecc3d638	mbox_status rc = parseEmailBody(body, NULL, &mctx, 0); if(rc == FAIL) {
e17491b2	messageReset(body); m = body; continue;
ecc3d638	} else if(rc == VIRUS) {
a603478f	cli_dbgmsg("Message number %d is infected\n", messagenumber); retcode = CL_VIRUS;
826c9892	m = NULL;
a603478f	break;
e17491b2	}
a603478f	}
e3aaff8e	/*
89670d69	* Starting a new message, throw away all the
f24bf390	* information about the old one. It would * be best to be able to scan this message * now, but cli_scanfile needs arguments * that haven't been passed here so it can't be * called
e3aaff8e	*/
89670d69	m = body; messageReset(body);
a603478f	messageSetCTX(body, ctx);
e3aaff8e
049a18b9	cli_dbgmsg("Finished processing message\n");
89670d69	} else
547b89de	lastLineWasEmpty = (bool)(buffer[0] == '\0');
4945127a
fa5661be	if(isuuencodebegin(buffer)) {
5198de85	/*
4945127a	* Fast track visa to uudecode. * TODO: binhex, yenc */
3953039b	if(uudecodeFile(m, buffer, dir, fd) < 0)
fa5661be	if(messageAddStr(m, buffer) < 0) break; } else
69c62847	/* at this point, the \n has been removed */
4945127a	if(messageAddStr(m, buffer) < 0) break;
4b187745	} while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL);
f35bc674
ddea752e	fclose(fd);
a603478f	if(retcode == CL_SUCCESS) { cli_dbgmsg("Extract attachments from email %d\n", messagenumber); body = parseEmailHeaders(m, rfc821); } if(m) messageDestroy(m);
4f1d0bfc	} else {
7e577f26	/* * It's a single message, parse the headers then the body
4f1d0bfc	*/
69543a9d	if(strncmp(buffer, "P I ", 4) == 0) /* * CommuniGate Pro format: ignore headers until * blank line */
4b187745	while((fgets(buffer, sizeof(buffer) - 1, fd) != NULL) &&
69543a9d	(strchr("\r\n", buffer[0]) == NULL)) ; /* * Ignore any blank lines at the top of the message */
4f1d0bfc	while(strchr("\r\n", buffer[0]) &&
d8142abc	(getline_from_mbox(buffer, sizeof(buffer) - 1, fd) != NULL))
87c9313e	;
9ed148a8	buffer[sizeof(buffer) - 1] = '\0';
4b187745
ae5c693a	body = parseEmailFile(fd, rfc821, buffer, dir);
ddea752e	fclose(fd);
4f1d0bfc	}
7e577f26
4f1d0bfc	if(body) { /* * Write out the last entry in the mailbox */
a603478f	if((retcode == CL_SUCCESS) && messageGetBody(body)) { messageSetCTX(body, ctx);
242ffd7a	switch(parseEmailBody(body, NULL, &mctx, 0)) {
ecc3d638	case FAIL:
69c62847	/* * beware: cli_magic_scandesc(), * changes this into CL_CLEAN, so only * use it to inform the higher levels * that we couldn't decode it because * it isn't an mbox, not to signal * decoding errors on what is a valid * mbox */
a603478f	retcode = CL_EFORMAT; break;
69c62847	case MAXREC: retcode = CL_EMAXREC; break;
ecc3d638	case VIRUS:
a603478f	retcode = CL_VIRUS; break; } }
e3aaff8e
4f1d0bfc	/* * Tidy up and quit */ messageDestroy(body); }
e3aaff8e	cli_dbgmsg("cli_mbox returning %d\n", retcode);
3f3f9085	#ifdef HAVE_BACKTRACE
02927896	signal(SIGSEGV, segv); #endif
a9d251e0	#ifdef SAVE_TMP
5cdb01fc	unlink(tmpfilename); #endif
e3aaff8e	return retcode; } /*
ddea752e	* Read in an email message from fin, parse it, and return the message
7e577f26	*
ddea752e	* FIXME: files full of new lines and nothing else are * handled ungracefully... / static message
ae5c693a	parseEmailFile(FILE fin, const table_t rfc821, const char firstLine, const char dir)
ddea752e	{ bool inHeader = TRUE;
6e3d492a	bool bodyIsEmpty = TRUE;
06466233	bool lastWasBlank = FALSE, lastBodyLineWasBlank = FALSE;
ddea752e	message *ret; bool anyHeadersFound = FALSE; int commandNumber = -1;
41b7a56b	char fullline = NULL, boundary = NULL;
ddea752e	size_t fulllinelength = 0;
85bb253e	char buffer[RFC2821LENGTH + 1];
ddea752e	cli_dbgmsg("parseEmailFile\n"); ret = messageCreate(); if(ret == NULL) return NULL; strcpy(buffer, firstLine); do {
4f4a8f4a	const char *line;
ddea752e	(void)cli_chomp(buffer);
4f4a8f4a	if(buffer[0] == '\0')
72cf1461	line = NULL;
4f4a8f4a	else line = buffer;
ddea752e	/* * Don't blank lines which are only spaces from headers, * otherwise they'll be treated as the end of header marker */
41b7a56b	if(lastWasBlank) { lastWasBlank = FALSE; if(boundaryStart(buffer, boundary)) { cli_dbgmsg("Found a header line with space that should be blank\n"); inHeader = FALSE; } }
ddea752e	if(inHeader) {
0ed29506	cli_dbgmsg("parseEmailFile: check '%s' fullline %p\n", buffer ? buffer : "", fullline);
2a0041b8	/* * Ensure wide characters are handled where * sizeof(char) > 1 */ if(line && isspace(line[0] & 0xFF)) {
41b7a56b	char copy[sizeof(buffer)]; strcpy(copy, buffer); strstrip(copy); if(copy[0] == '\0') { /*
4d4166a9	* The header line contains only white * space. This is not the end of the * headers according to RFC2822, but * some MUAs will handle it as though * it were, and virus writers exploit * this bug. We can't just break from * the loop here since that would allow * other exploits such as inserting a * white space line before the * content-type line. So we just have * to make a best guess. Sigh.
41b7a56b	*/ if(fullline) { if(parseEmailHeader(ret, fullline, rfc821) < 0) continue; free(fullline); fullline = NULL; }
300a8ae9	if(boundary \|\| ((boundary = (char *)messageFindArgument(ret, "boundary")) != NULL)) {
41b7a56b	lastWasBlank = TRUE; continue; } } }
72cf1461	if((line == NULL) && (fullline == NULL)) { /* empty line */
0ed29506	/* * A blank line signifies the end of * the header and the start of the text / if(!anyHeadersFound) / Ignore the junk at the top */ continue;
5860ae08
0ed29506	cli_dbgmsg("End of header information\n"); inHeader = FALSE; bodyIsEmpty = TRUE;
ddea752e	} else { char *ptr;
d6d5763c	int lookahead;
ddea752e	if(fullline == NULL) {
85bb253e	char cmd[RFC2821LENGTH + 1], out[RFC2821LENGTH + 1];
ddea752e	/* * Continuation of line we're ignoring? */
0ed29506	if(isblank(line[0]))
ddea752e	continue; /* * Is this a header we're interested in? */
72cf1461	if((strchr(line, ':') == NULL) \|\| (cli_strtokbuf(line, 0, ":", cmd) == NULL)) { if(strncmp(line, "From ", 5) == 0)
ddea752e	anyHeadersFound = TRUE; continue; }
d72749e0	ptr = rfc822comments(cmd, out);
ddea752e	commandNumber = tableFind(rfc821, ptr ? ptr : cmd); switch(commandNumber) { case CONTENT_TRANSFER_ENCODING: case CONTENT_DISPOSITION: case CONTENT_TYPE: anyHeadersFound = TRUE; break; default: if(!anyHeadersFound) anyHeadersFound = usefulHeader(commandNumber, cmd); continue; }
0cf4cea7	fullline = cli_strdup(line);
72cf1461	fulllinelength = strlen(line) + 1; } else if(line != NULL) { fulllinelength += strlen(line);
468c0f21	ptr = cli_realloc(fullline, fulllinelength); if(ptr == NULL) continue; fullline = ptr;
72cf1461	strcat(fullline, line);
ddea752e	} assert(fullline != NULL); lookahead = getc(fin); if(lookahead != EOF) { ungetc(lookahead, fin); /* * Section B.2 of RFC822 says TAB or * SPACE means a continuation of the * previous entry. * * Add all the arguments on the line */
9f2024cc	if(isblank(lookahead))
ddea752e	continue; }
11f253d6	/* * Handle broken headers, where the next * line isn't indented by whitespace / if(fullline[fulllinelength - 2] == ';') / Add arguments to this line */ continue;
4f4a8f4a	if(line && (count_quotes(fullline) & 1)) continue;
ddea752e
d72749e0	ptr = rfc822comments(fullline, NULL);
ddea752e	if(ptr) { free(fullline); fullline = ptr; } if(parseEmailHeader(ret, fullline, rfc821) < 0) continue; free(fullline); fullline = NULL; }
fa5661be	} else if(line && isuuencodebegin(line)) {
ae5c693a	/* * Fast track visa to uudecode. * TODO: binhex, yenc */
6e3d492a	bodyIsEmpty = FALSE;
3953039b	if(uudecodeFile(ret, line, dir, fin) < 0)
fa5661be	if(messageAddStr(ret, line) < 0) break;
06466233	} else { if(line == NULL) {
c52d991e	/* * Although this would save time and RAM, some * phish signatures have been built which need * the blank lines */ if(lastBodyLineWasBlank && (messageGetMimeType(ret) != TEXT)) {
06466233	cli_dbgmsg("Ignoring consecutive blank lines in the body\n"); continue; } lastBodyLineWasBlank = TRUE;
6e3d492a	} else { if(bodyIsEmpty) { /* * Broken message: new line in the * middle of the headers, so the first * line of the body is in fact * the last lines of the header */
0cf4cea7	if(newline_in_header(line))
6e3d492a	continue;
0cf4cea7	bodyIsEmpty = FALSE;
6e3d492a	}
06466233	lastBodyLineWasBlank = FALSE;
6e3d492a	}
06466233
72cf1461	if(messageAddStr(ret, line) < 0)
ddea752e	break;
06466233	}
d8142abc	} while(getline_from_mbox(buffer, sizeof(buffer) - 1, fin) != NULL);
ddea752e
300a8ae9	if(boundary) free(boundary);
ddea752e	if(fullline) { if(*fullline) switch(commandNumber) { case CONTENT_TRANSFER_ENCODING: case CONTENT_DISPOSITION: case CONTENT_TYPE:
0d35f10f	cli_dbgmsg("parseEmailFile: Fullline unparsed '%s'\n", fullline);
ddea752e	} free(fullline); } if(!anyHeadersFound) { /* * False positive in believing we have an e-mail when we don't / messageDestroy(ret); cli_dbgmsg("parseEmailFile: no headers found, assuming it isn't an email\n"); return NULL; } messageClean(ret); cli_dbgmsg("parseEmailFile: return\n"); return ret; } / * The given message contains a raw e-mail.
e06d34dc	* * Returns the message's body with the correct arguments set
f73920a4	* * The downside of this approach is that for a short time we have two copies * of the message in memory, the upside is that it makes for easier parsing * of encapsulated messages, and in the long run uses less memory in those * scenarios
ddea752e	* * TODO: remove the duplication with parseEmailFile
7e577f26	*/
e06d34dc	static message *
2673dc74	parseEmailHeaders(message m, const table_t rfc821)
7e577f26	{
e06d34dc	bool inHeader = TRUE;
0d35f10f	bool bodyIsEmpty = TRUE;
b2223aad	const text *t;
89670d69	message *ret;
4f1d0bfc	bool anyHeadersFound = FALSE;
15033cb6	int commandNumber = -1;
efb5f16c	char *fullline = NULL;
2ad0c86e	size_t fulllinelength = 0;
89670d69
02927896	cli_dbgmsg("parseEmailHeaders\n");
89670d69	if(m == NULL) return NULL; ret = messageCreate();
7e577f26
b2223aad	for(t = messageGetBody(m); t; t = t->t_next) {
0cf4cea7	const char *line;
7e577f26
b2223aad	if(t->t_line)
0cf4cea7	line = lineGetData(t->t_line);
b2223aad	else
0cf4cea7	line = NULL;
7e577f26
28010d29	if(inHeader) {
4e75d9b6	cli_dbgmsg("parseEmailHeaders: check '%s'\n",
0cf4cea7	line ? line : ""); if(line == NULL) {
4e75d9b6	/* * A blank line signifies the end of * the header and the start of the text */ cli_dbgmsg("End of header information\n");
ad091acf	if(!anyHeadersFound) { cli_dbgmsg("Nothing interesting in the header\n"); break; }
0d35f10f	inHeader = FALSE; bodyIsEmpty = TRUE;
2ad0c86e	} else {
b116962d	char *ptr;
2ad0c86e	if(fullline == NULL) {
85bb253e	char cmd[RFC2821LENGTH + 1];
45aba293	/* * Continuation of line we're ignoring? */
0cf4cea7	if(isblank(line[0]))
45aba293	continue; /* * Is this a header we're interested in? */
0cf4cea7	if((strchr(line, ':') == NULL) \|\| (cli_strtokbuf(line, 0, ":", cmd) == NULL)) { if(strncmp(line, "From ", 5) == 0)
b116962d	anyHeadersFound = TRUE;
45aba293	continue;
b116962d	}
45aba293
d72749e0	ptr = rfc822comments(cmd, NULL);
59921c02	commandNumber = tableFind(rfc821, ptr ? ptr : cmd); if(ptr) free(ptr);
45aba293	switch(commandNumber) { case CONTENT_TRANSFER_ENCODING: case CONTENT_DISPOSITION: case CONTENT_TYPE:
b116962d	anyHeadersFound = TRUE;
45aba293	break; default:
ddea752e	if(!anyHeadersFound) anyHeadersFound = usefulHeader(commandNumber, cmd);
45aba293	continue; }
0cf4cea7	fullline = cli_strdup(line); fulllinelength = strlen(line) + 1; } else if(line) { fulllinelength += strlen(line);
468c0f21	ptr = cli_realloc(fullline, fulllinelength); if(ptr == NULL) continue; fullline = ptr;
0cf4cea7	strcat(fullline, line);
2ad0c86e	}
13462674
ddea752e	assert(fullline != NULL);
2ad0c86e
842c7d49	if(next_is_folded_header(t)) /* Add arguments to this line */ continue;
2ad0c86e
4f4a8f4a	if(count_quotes(fullline) & 1)
2ad0c86e	continue;
d72749e0	ptr = rfc822comments(fullline, NULL);
2ad0c86e	if(ptr) { free(fullline); fullline = ptr; }
c77c8809
ddea752e	if(parseEmailHeader(ret, fullline, rfc821) < 0) continue;
28010d29
ddea752e	free(fullline); fullline = NULL;
f12d2498	}
0d35f10f	} else { if(bodyIsEmpty) {
0cf4cea7	if(line == NULL)
0d35f10f	/* throw away leading blank lines / continue; / * Broken message: new line in the * middle of the headers, so the first * line of the body is in fact * the last lines of the header */
0cf4cea7	if(newline_in_header(line))
0d35f10f	continue; bodyIsEmpty = FALSE; } /if(t->t_line && isuuencodebegin(t->t_line)) puts("FIXME: add fast visa here");/
0cf4cea7	/cli_dbgmsg("Add line to body '%s'\n", line);/
b2223aad	if(messageAddLine(ret, t->t_line) < 0)
0e4e16d4	break;
0d35f10f	}
d879a7b0	}
e06d34dc
efb5f16c	if(fullline) {
2ad0c86e	if(*fullline) switch(commandNumber) { case CONTENT_TRANSFER_ENCODING: case CONTENT_DISPOSITION: case CONTENT_TYPE:
aaaae842	cli_dbgmsg("parseEmailHeaders: Fullline unparsed '%s'\n", fullline);
2ad0c86e	}
efb5f16c	free(fullline); }
4f1d0bfc	if(!anyHeadersFound) { /* * False positive in believing we have an e-mail when we don't */ messageDestroy(ret); cli_dbgmsg("parseEmailHeaders: no headers found, assuming it isn't an email\n"); return NULL; }
93d41ee4	messageClean(ret);
e17491b2	cli_dbgmsg("parseEmailHeaders: return\n");
e06d34dc	return ret;
7e577f26	} /*
4c60b74f	* Handle a header line of an email message */ static int
393a6d67	parseEmailHeader(message m, const char line, const table_t *rfc821)
4c60b74f	{
0960ff5e	char *cmd;
4c60b74f	int ret = -1; #ifdef CL_THREAD_SAFE char *strptr; #endif
97867f21	const char *separater;
50df4118	char *copy, tokenseparater[2];
4c60b74f
20d3dde9	cli_dbgmsg("parseEmailHeader '%s'\n", line);
97867f21	/* * In RFC822 the separater between the key a value is a colon, * e.g. Content-Transfer-Encoding: base64 * However some MUA's are lapse about this and virus writers exploit * this hole, so we need to check all known possiblities / for(separater = ":= "; separater; separater++) if(strchr(line, separater) != NULL) break; if(separater == '\0')
1bfbedd4	return -1;
50df4118	copy = rfc2047(line); if(copy == NULL)
8ebe8dbc	/* an RFC checker would return -1 here */
0cf4cea7	copy = cli_strdup(line);
f2f25418
97867f21	tokenseparater[0] = *separater; tokenseparater[1] = '\0';
548a5f96	#ifdef CL_THREAD_SAFE
97867f21	cmd = strtok_r(copy, tokenseparater, &strptr);
548a5f96	#else cmd = strtok(copy, tokenseparater); #endif
4c60b74f
3499d81e	if(cmd && (strstrip(cmd) > 0)) {
548a5f96	#ifdef CL_THREAD_SAFE
4c60b74f	char *arg = strtok_r(NULL, "", &strptr);
548a5f96	#else char *arg = strtok(NULL, ""); #endif
4c60b74f	if(arg) /* * Found a header such as * Content-Type: multipart/mixed; * set arg to be * "multipart/mixed" and cmd to
a9f386ed	* be "Content-Type"
4c60b74f	*/
393a6d67	ret = parseMimeHeader(m, cmd, rfc821, arg);
4c60b74f	}
50df4118	free(copy);
4c60b74f	return ret; } /*
e3aaff8e	* This is a recursive routine.
45dc1456	* FIXME: We are not passed &mrec so we can't check against MAX_MAIL_RECURSION
e3aaff8e	*
7e577f26	* This function parses the body of mainMessage and saves its attachments in dir *
e06d34dc	* mainMessage is the buffer to be parsed, it contains an e-mail's body, without
f12d2498	* any headers. First time of calling it'll be * the whole message. Later it'll be parts of a multipart message
e3aaff8e	* textIn is the plain text message being built up so far */
ecc3d638	static mbox_status
242ffd7a	parseEmailBody(message messageIn, text textIn, mbox_ctx *mctx, unsigned int recursion_level)
e3aaff8e	{
42eebd87	mbox_status rc;
2673dc74	text aText = textIn; message mainMessage = messageIn;
b0b860f1	fileblob *fb;
a603478f	bool infected = FALSE;
c3a79a7a	const int doPhishingScan = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE);
8386c723
b0b860f1	cli_dbgmsg("in parseEmailBody\n");
e3aaff8e
23841257	if(mctx->ctx->limits && mctx->ctx->limits->maxmailrec) {
72910996	const cli_ctx ctx = mctx->ctx; / needed for BLOCKMAX :-( */
5684fccf	/* * This is approximate */
a5faab66	if(recursion_level > ctx->limits->maxmailrec) { cli_warnmsg("parseEmailBody: hit maximum recursion level (%u)\n", recursion_level); if(BLOCKMAX) { if(ctx->virname) *ctx->virname = "MIME.RecursionLimit"; return VIRUS; } else
69c62847	return MAXREC;
242ffd7a	}
72910996	}
242ffd7a
f7bf6fd2	rc = OK;
e3aaff8e	/* Anything left to be parsed? */
d4d14218	if(mainMessage && (messageGetBody(mainMessage) != NULL)) {
e3aaff8e	mime_type mimeType;
2673dc74	int subtype, inhead, htmltextPart, inMimeHead, i;
95e11e5a	const char mimeSubtype; char protocol, *boundary;
e3aaff8e	const text *t_line;
98cb5cba	/bool isAlternative;/
e3aaff8e	message *aMessage;
2673dc74	int multiparts = 0; message *messages = NULL; / parts of a multipart message */
e3aaff8e
049a18b9	cli_dbgmsg("Parsing mail file\n");
e3aaff8e	mimeType = messageGetMimeType(mainMessage); mimeSubtype = messageGetMimeSubtype(mainMessage);
528c8a2d	/* pre-process */
c1fce7f7	subtype = tableFind(mctx->subtypeTable, mimeSubtype);
7c5a7a47	if((mimeType == TEXT) && (subtype == PLAIN)) {
e3aaff8e	/* * This is effectively no encoding, notice that we * don't check that charset is us-ascii */
c06e8a5c	cli_dbgmsg("text/plain: Assume no attachements\n");
e3aaff8e	mimeType = NOMIME;
ae5c693a	messageSetMimeSubtype(mainMessage, "");
528c8a2d	} else if((mimeType == MESSAGE) && (strcasecmp(mimeSubtype, "rfc822-headers") == 0)) { /* * RFC1892/RFC3462: section 2 text/rfc822-headers * incorrectly sent as message/rfc822-headers
46d375fe	* * Parse as text/plain, i.e. no mime
528c8a2d	*/ cli_dbgmsg("Changing message/rfc822-headers to text/rfc822-headers\n");
46d375fe	mimeType = NOMIME;
ae5c693a	messageSetMimeSubtype(mainMessage, "");
a05e6d45	} else cli_dbgmsg("mimeType = %d\n", mimeType);
049a18b9
e3aaff8e	switch(mimeType) { case NOMIME:
6fd711b2	cli_dbgmsg("Not a mime encoded message\n");
e3aaff8e	aText = textAddMessage(aText, mainMessage);
647a4f8d	if(!doPhishingScan) break;
94aea271	/* * Fall through: some phishing mails claim they are * text/plain, when they are in fact html */
e3aaff8e	case TEXT:
8a892c3b	/* text/plain has been preprocessed as no encoding */
d77ac7de	if(((mctx->ctx->options&CL_SCAN_MAILURL) && (subtype == HTML)) \|\| doPhishingScan) {
47d9cc65	/* * It would be better to save and scan the * file and only checkURLs if it's found to be * clean */
8b899010	checkURLs(mainMessage, mctx, &rc, (subtype == HTML)); /* * There might be html sent without subtype * html too, so scan them for phishing */
ecc3d638	if(rc == VIRUS)
8b899010	infected = TRUE;
c52d991e	}
e3aaff8e	break; case MULTIPART:
c8a7cef0	cli_dbgmsg("Content-type 'multipart' handler\n");
e3aaff8e	boundary = messageFindArgument(mainMessage, "boundary"); if(boundary == NULL) {
842c7d49	cli_warnmsg("Multipart/%s MIME message contains no boundary header\n", mimeSubtype);
e2e7ebf5	/* Broken e-mail message / mimeType = NOMIME; / * The break means that we will still * check if the file contains a uuencoded file */ break;
e3aaff8e	}
c79a2273	/* Perhaps it should assume mixed? */
cb5a87e0	if(mimeSubtype[0] == '\0') { cli_warnmsg("Multipart has no subtype assuming alternative\n"); mimeSubtype = "alternative"; messageSetMimeSubtype(mainMessage, "alternative"); }
e3aaff8e	/* * Get to the start of the first message */
20d3dde9	t_line = messageGetBody(mainMessage); if(t_line == NULL) { cli_warnmsg("Multipart MIME message has no body\n"); free((char *)boundary); mimeType = NOMIME; break; } do
bae9c53f	if(t_line->t_line) { if(boundaryStart(lineGetData(t_line->t_line), boundary)) break; /*
182bbcc8	* Found a binhex file before
ebe57840	* the first multipart
b116962d	* TODO: check yEnc
bae9c53f	*/
182bbcc8	if(binhexBegin(mainMessage) == t_line) {
47d9cc65	if(exportBinhexMessage(mctx->dir, mainMessage)) {
a05e6d45	/* virus found */
ecc3d638	rc = VIRUS;
47d9cc65	infected = TRUE;
a05e6d45	break;
b116962d	}
42eebd87	} else if(t_line->t_next &&
a5faab66	(encodingLine(mainMessage) == t_line->t_next)) {
99c2299d	/* * We look for the next line * since later on we'll skip * over the important line when * we think it's a blank line * at the top of the message - * which it would have been in * an RFC compliant world */
42eebd87	cli_dbgmsg("Found MIME attachment before the first MIME section \"%s\"\n", lineGetData(t_line->t_next->t_line));
99c2299d	if(messageGetEncoding(mainMessage) == NOENCODING) break;
b116962d	}
bae9c53f	}
20d3dde9	while((t_line = t_line->t_next) != NULL);
e3aaff8e	if(t_line == NULL) {
47d9cc65	cli_dbgmsg("Multipart MIME message contains no boundary lines (%s)\n", boundary);
5a642650	/* * Free added by Thomas Lamy * <Thomas.Lamy@in-online.net> / free((char )boundary);
e2e7ebf5	mimeType = NOMIME; /* * The break means that we will still
182bbcc8	* check if the file contains a yEnc/binhex file
e2e7ebf5	*/ break;
e3aaff8e	} /* * Build up a table of all of the parts of this * multipart message. Remember, each part may itself * be a multipart message. */ inhead = 1; inMimeHead = 0;
e06d34dc	/*
9bccc1e5	* Re-read this variable in case mimeSubtype has changed / subtype = tableFind(mctx->subtypeTable, mimeSubtype); /
9a729c80	* Parse the mainMessage object and create an array * of objects called messages, one for each of the
9bccc1e5	* multiparts that mainMessage contains.
3f46285b	*
e06d34dc	* This looks like parseEmailHeaders() - maybe there's * some duplication of code to be cleaned up
9bccc1e5	*
842c7d49	* We may need to create an array rather than just
9bccc1e5	* save each part as it is found because not all * elements will need scanning, and we don't yet know * which of those elements it will be, except in * the case of mixed, when all parts need to be scanned.
e06d34dc	*/
9bccc1e5	for(multiparts = 0; t_line && !infected; multiparts++) {
56ae62e2	int lines = 0;
1a74d4df	message **m;
69c62847	mbox_status old_rc;
56ae62e2
1a74d4df	m = cli_realloc(messages, ((multiparts + 1) * sizeof(message *)));
f12d2498	if(m == NULL)
1a74d4df	break; messages = m;
b726511f
e3aaff8e	aMessage = messages[multiparts] = messageCreate();
3f3f9085	if(aMessage == NULL) { multiparts--; continue; }
c1fce7f7	messageSetCTX(aMessage, mctx->ctx);
e3aaff8e	cli_dbgmsg("Now read in part %d\n", multiparts);
8ba634a9	/* * Ignore blank lines. There shouldn't be ANY * but some viruses insert them */
02927896	while((t_line = t_line->t_next) != NULL)
b2223aad	if(t_line->t_line && /(cli_chomp(t_line->t_text) > 0))/ (strlen(lineGetData(t_line->t_line)) > 0))
d79597e3	break;
8ba634a9	if(t_line == NULL) { cli_dbgmsg("Empty part\n");
b9ce9639	/* * Remove this part unless there's
182bbcc8	* a binhex portion somewhere in
b9ce9639	* the complete message that we may * throw away by mistake if the MIME * encoding information is incorrect */
9bccc1e5	if(mainMessage && (binhexBegin(mainMessage) == NULL)) {
b9ce9639	messageDestroy(aMessage); --multiparts; }
8ba634a9	continue; } do {
b2223aad	const char *line = lineGetData(t_line->t_line);
e3aaff8e
fa5661be	/*cli_dbgmsg("multipart %d: inMimeHead %d inhead %d boundary '%s' line '%s' next '%s'\n", multiparts, inMimeHead, inhead, boundary, line,
391f7bb3	t_line->t_next && t_line->t_next->t_line ? lineGetData(t_line->t_next->t_line) : "(null)");*/
e3aaff8e
f1c1300c	if(inMimeHead) { /* continuation line */
02927896	if(line == NULL) {
59b99810	/inhead =/ inMimeHead = 0;
02927896	continue; }
3a978f7d	/* * Handle continuation lines * because the previous line
1eec55a6	* ended with a ; or this line * starts with a white space
3a978f7d	*/
1eec55a6	cli_dbgmsg("Multipart %d: About to add mime Argument '%s'\n", multiparts, line);
3a978f7d	/* * Handle the case when it * isn't really a continuation * line: * Content-Type: application/octet-stream; * Content-Transfer-Encoding: base64 */
c1fce7f7	parseEmailHeader(aMessage, line, mctx->rfc821Table);
3a978f7d
e3aaff8e	while(isspace((int)line)) line++; if(line == '\0') { inhead = inMimeHead = 0; continue; }
0ed29506	inMimeHead = FALSE;
e3aaff8e	messageAddArgument(aMessage, line);
f1c1300c	} else if(inhead) { /* handling normal headers */
4f4a8f4a	/int quotes;/
c8a7cef0	char fullline, ptr;
2ad0c86e
02927896	if(line == NULL) {
7e67e382	/* * empty line, should the end of the headers, * but some base64 decoders, e.g. uudeview, are broken * and will handle this type of entry, decoding the * base64 content... * Content-Type: application/octet-stream; name=text.zip * Content-Transfer-Encoding: base64 * Content-Disposition: attachment; filename="text.zip"
5198de85	*
7e67e382	* Content-Disposition: attachment; * filename=text.zip * Content-Type: application/octet-stream; * name=text.zip * Content-Transfer-Encoding: base64
5198de85	*
7e67e382	* UEsDBAoAAAAAAACgPjJ2RHw676gAAO+oAABEAAAAbWFpbF90ZXh0LWluZm8udHh0ICAgICAgICAg */
842c7d49	const text *next = t_line->t_next;
7e67e382	if(next && next->t_line) { const char *data = lineGetData(next->t_line);
5b76248c	if((messageGetEncoding(aMessage) == NOENCODING) &&
842c7d49	(messageGetMimeType(aMessage) == APPLICATION) && strstr(data, "base64")) {
d72749e0	/* * Handle this nightmare (note the blank * line in the header and the incorrect * content-transfer-encoding header) * * Content-Type: application/octet-stream; name="zipped_files.EXEX-Spanska: Yes * * r-Encoding: base64 * Content-Disposition: attachment; filename="zipped_files.EXE" */
842c7d49	messageSetEncoding(aMessage, "base64"); cli_dbgmsg("Ignoring fake end of headers\n"); continue; }
5cdb01fc	if((strncmp(data, "Content", 7) == 0) \|\| (strncmp(data, "filename=", 9) == 0)) {
7e67e382	cli_dbgmsg("Ignoring fake end of headers\n"); continue; } }
59b99810	cli_dbgmsg("Multipart %d: End of header information\n", multiparts);
e3aaff8e	inhead = 0; continue; }
c76810dc	if(isspace((int)line)) { / * The first line is * continuation line. * This is tricky * to handle, but * all we can do is our * best / cli_dbgmsg("Part %d starts with a continuation line\n", multiparts); messageAddArgument(aMessage, line); / * Give it a default * MIME type since * that may be the * missing line * * Choose application to * force a save */ if(messageGetMimeType(aMessage) == NOMIME) messageSetMimeType(aMessage, "application"); continue; }
c8a7cef0	inMimeHead = FALSE;
b2223aad
85bb253e	assert(strlen(line) <= RFC2821LENGTH);
f1c1300c
d72749e0	fullline = rfc822comments(line, NULL);
c8a7cef0	if(fullline == NULL)
0cf4cea7	fullline = cli_strdup(line);
2ad0c86e
4f4a8f4a	/quotes = count_quotes(fullline);/
f1c1300c
c8a7cef0	/* * Fold next lines to the end of this * if they start with a white space * or if this line has an odd number of quotes: * Content-Type: application/octet-stream; name="foo * " */
842c7d49	while(t_line && next_is_folded_header(t_line)) { const char *data; t_line = t_line->t_next; data = lineGetData(t_line->t_line);
f1c1300c
f1d57230	if(data[1] == '\0') { /* * Broken message: the * blank line at the end * of the headers isn't blank - * it contains a space */ cli_dbgmsg("Multipart %d: headers not terminated by blank line\n", multiparts); inhead = FALSE; break; }
c8a7cef0	ptr = cli_realloc(fullline, strlen(fullline) + strlen(data) + 1);
303f9be9
c8a7cef0	if(ptr == NULL) break;
1eec55a6
c8a7cef0	fullline = ptr; strcat(fullline, data);
2ad0c86e
4f4a8f4a	/quotes = count_quotes(data);/
1eec55a6	}
842c7d49
c8a7cef0	cli_dbgmsg("Multipart %d: About to parse folded header '%s'\n", multiparts, fullline);
c1fce7f7	parseEmailHeader(aMessage, fullline, mctx->rfc821Table);
c8a7cef0	free(fullline);
69c62847	} else if(boundaryEnd(line, boundary)) {
e3aaff8e	/* * Some viruses put information * after the end of message, * which presumably some broken * mail clients find, so we * can't assume that this * is the end of the message / / t_line = NULL;*/ break;
391f7bb3	} else if(boundaryStart(line, boundary)) { inhead = 1; break;
56ae62e2	} else {
b2223aad	if(messageAddLine(aMessage, t_line->t_line) < 0)
1a74d4df	break;
56ae62e2	lines++; }
8ba634a9	} while((t_line = t_line->t_next) != NULL);
69c62847	cli_dbgmsg("Part %d has %d lines, rc = %d\n", multiparts, lines, rc);
9bccc1e5	/* * Only save in the array of messages if some * decision will be taken on whether to scan. * If all parts will be scanned then save to * file straight away */ switch(subtype) { case MIXED: case ALTERNATIVE: case REPORT: case DIGEST: case APPLEDOUBLE: case KNOWBOT: case -1:
69c62847	old_rc = rc;
9bccc1e5	mainMessage = do_multipart(mainMessage, messages, multiparts, &rc, mctx, messageIn,
5684fccf	&aText, recursion_level);
69c62847	if((rc == OK_ATTACHMENTS_NOT_SAVED) && (old_rc == OK)) rc = OK;
a7a2e2d4	if(messages[multiparts]) { messageDestroy(messages[multiparts]); messages[multiparts] = NULL; }
9bccc1e5	--multiparts;
ecc3d638	if(rc == VIRUS)
9bccc1e5	infected = TRUE; break; default: messageClean(aMessage); }
e3aaff8e	} free((char *)boundary);
7c1eb3bf	/*
c79a2273	* Preprocess. Anything special to be done before * we handle the multiparts?
9a729c80	*/
9bccc1e5	switch(subtype) {
c79a2273	case KNOWBOT: /* TODO */ cli_dbgmsg("multipart/knowbot parsed as multipart/mixed for now\n"); mimeSubtype = "mixed"; break;
5d8100cb	case -1: /* * According to section 7.2.6 of * RFC1521, unrecognised multiparts * should be treated as multipart/mixed. */
38d07186	cli_dbgmsg("Unsupported multipart format `%s', parsed as mixed\n", mimeSubtype);
5d8100cb	mimeSubtype = "mixed"; break;
c79a2273	}
9a729c80	/*
7c1eb3bf	* We've finished message we're parsing */ if(mainMessage && (mainMessage != messageIn)) { messageDestroy(mainMessage); mainMessage = NULL;
ad9c6836	}
e3aaff8e
9bccc1e5	cli_dbgmsg("The message has %d parts\n", multiparts); if(((multiparts == 0) \|\| infected) && (aText == NULL)) {
b912eaf2	if(messages) { for(i = 0; i < multiparts; i++) if(messages[i]) messageDestroy(messages[i]);
b726511f	free(messages);
b912eaf2	}
9bccc1e5	/*
ecc3d638	* Nothing to do
9bccc1e5	*/
69c62847	switch(rc) { case VIRUS: return VIRUS; case MAXREC: return MAXREC; default: return OK_ATTACHMENTS_NOT_SAVED; }
b726511f	}
7c1eb3bf
393a6d67	cli_dbgmsg("Find out the multipart type (%s)\n", mimeSubtype);
e3aaff8e
9a729c80	/* * We now have all the parts of the multipart message * in the messages array: * message messages[multiparts] Let's decide what to do with them all */
c1fce7f7	switch(tableFind(mctx->subtypeTable, mimeSubtype)) {
e3aaff8e	case RELATED:
e06d34dc	cli_dbgmsg("Multipart related handler\n");
e3aaff8e	/*
294d0774	* Have a look to see if there's HTML code * which will need scanning
e3aaff8e	*/ aMessage = NULL; assert(multiparts > 0);
d4d14218	htmltextPart = getTextPart(messages, multiparts);
e3aaff8e
d4d14218	if(htmltextPart >= 0) aText = textAddMessage(aText, messages[htmltextPart]);
e3aaff8e	else /*
294d0774	* There isn't an HTML bit. If there's a * multipart bit, it'll may be in there * somewhere
e3aaff8e	*/ for(i = 0; i < multiparts; i++) if(messageGetMimeType(messages[i]) == MULTIPART) { aMessage = messages[i];
d4d14218	htmltextPart = i;
e3aaff8e	break; }
59da5a4f	if(htmltextPart == -1)
16037392	cli_dbgmsg("No HTML code found to be scanned\n");
59da5a4f	else {
242ffd7a	rc = parseEmailBody(aMessage, aText, mctx, recursion_level + 1);
99f7771b	if((rc == OK) && aMessage) {
59da5a4f	assert(aMessage == messages[htmltextPart]);
99f7771b	messageDestroy(aMessage);
59da5a4f	messages[htmltextPart] = NULL; } }
e3aaff8e	/* * Fixed based on an idea from Stephen White <stephen@earth.li> * The message is confused about the difference * between alternative and related. Badtrans.B * suffers from this problem. * * Fall through in this case: * Content-Type: multipart/related; * type="multipart/alternative" */
98cb5cba	/* * Changed to always fall through based on * an idea from Michael Dankov <misha@btrc.ru> * that some viruses are completely confused * about the difference between related * and mixed / /cptr = messageFindArgument(mainMessage, "type");
e3aaff8e	if(cptr == NULL) break; isAlternative = (bool)(strcasecmp(cptr, "multipart/alternative") == 0); free((char *)cptr); if(!isAlternative)
98cb5cba	break;*/
ba867aed	case DIGEST: /* * According to section 5.1.5 RFC2046, the * default mime type of multipart/digest parts * is message/rfc822 * * We consider them as alternative, wrong in * the strictest sense since they aren't * alternatives - all parts a valid - but it's * OK for our needs since it means each part * will be scanned */
e3aaff8e	case ALTERNATIVE: cli_dbgmsg("Multipart alternative handler\n"); /* * Fall through - some clients are broken and * say alternative instead of mixed. The Klez
5d8100cb	* virus is broken that way, and anyway we * wish to scan all of the alternatives
e3aaff8e	/ case REPORT: / * According to section 1 of RFC1892, the * syntax of multipart/report is the same * as multipart/mixed. There are some required * parameters, but there's no need for us to * verify that they exist */ case MIXED:
c9b8f252	case APPLEDOUBLE: /* not really supported */
e3aaff8e	/* * Look for attachments * * Not all formats are supported. If an * unsupported format turns out to be * common enough to implement, it is a simple * matter to add it */
ad9c6836	if(aText) { if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
e3aaff8e	mainMessage = NULL;
ad9c6836	}
e3aaff8e	cli_dbgmsg("Mixed message with %d parts\n", multiparts); for(i = 0; i < multiparts; i++) {
c1fce7f7	mainMessage = do_multipart(mainMessage, messages, i, &rc, mctx,
242ffd7a	messageIn, &aText, recursion_level + 1);
ecc3d638	if(rc == VIRUS) {
a603478f	infected = TRUE; break; }
69c62847	if(rc == MAXREC) break;
e3aaff8e	}
242ffd7a	/* rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1); */
e3aaff8e	break; case SIGNED: case PARALLEL: /* * If we're here it could be because we have a * multipart/mixed message, consisting of a * message followed by an attachment. That * message itself is a multipart/alternative * message and we need to dig out the plain * text part of that alternative */
d4d14218	htmltextPart = getTextPart(messages, multiparts); if(htmltextPart == -1) htmltextPart = 0;
e3aaff8e
242ffd7a	rc = parseEmailBody(messages[htmltextPart], aText, mctx, recursion_level + 1);
e3aaff8e	break;
9a729c80	case ENCRYPTED:
08e5a453	rc = FAIL; /* Not yet handled */
c79a2273	protocol = (char *)messageFindArgument(mainMessage, "protocol");
9a729c80	if(protocol) { if(strcasecmp(protocol, "application/pgp-encrypted") == 0) { /* RFC2015 */ cli_warnmsg("PGP encoded attachment not scanned\n");
ecc3d638	rc = OK_ATTACHMENTS_NOT_SAVED;
9a729c80	} else
6fcf5624	cli_warnmsg("Unknown encryption protocol '%s' - if you believe this file contains a virus, submit it to www.clamav.net\n", protocol);
9a729c80	free(protocol); } else
4ab382c3	cli_dbgmsg("Encryption method missing protocol name\n");
9a729c80	break;
e3aaff8e	default:
5d8100cb	assert(0);
e3aaff8e	}
ad9c6836	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
6e84cebb	if(aText && (textIn == NULL)) {
a603478f	if((!infected) && (fb = fileblobCreate()) != NULL) {
c8a7cef0	cli_dbgmsg("Save non mime and/or text/plain part\n");
c1fce7f7	fileblobSetFilename(fb, mctx->dir, "textpart");
e097c0dd	/fileblobAddData(fb, "Received: by clamd (textpart)\n", 30);/
c1fce7f7	fileblobSetCTX(fb, mctx->ctx);
3f46285b	(void)textToFileblob(aText, fb, 1);
6e84cebb	fileblobDestroy(fb); }
049a18b9	textDestroy(aText);
6e84cebb	}
049a18b9
b116962d	for(i = 0; i < multiparts; i++) if(messages[i]) messageDestroy(messages[i]);
b726511f	if(messages) free(messages);
e3aaff8e	return rc; case MESSAGE: /* * Check for forbidden encodings */ switch(messageGetEncoding(mainMessage)) { case NOENCODING: case EIGHTBIT: case BINARY: break; default:
049a18b9	cli_warnmsg("MIME type 'message' cannot be decoded\n");
e3aaff8e	break; }
ecc3d638	rc = FAIL;
049a18b9	if((strcasecmp(mimeSubtype, "rfc822") == 0) \|\| (strcasecmp(mimeSubtype, "delivery-status") == 0)) {
c1fce7f7	message *m = parseEmailHeaders(mainMessage, mctx->rfc821Table);
c693116d	if(m) {
af3c6acb	cli_dbgmsg("Decode rfc822\n");
c693116d
c1fce7f7	messageSetCTX(m, mctx->ctx);
a603478f
93d41ee4	if(mainMessage && (mainMessage != messageIn)) { messageDestroy(mainMessage); mainMessage = NULL;
59da5a4f	} else messageReset(mainMessage);
c693116d	if(messageGetBody(m))
242ffd7a	rc = parseEmailBody(m, NULL, mctx, recursion_level + 1);
c693116d	messageDestroy(m); }
e3aaff8e	break;
5a15955b	} else if(strcasecmp(mimeSubtype, "disposition-notification") == 0) {
12f3689d	/* RFC 2298 - handle like a normal email */
ecc3d638	rc = OK;
12f3689d	break;
5a15955b	} else if(strcasecmp(mimeSubtype, "partial") == 0) {
f10460ed	#ifdef PARTIAL_DIR /* RFC1341 message split over many emails */
c1fce7f7	if(rfc1341(mainMessage, mctx->dir) >= 0)
ecc3d638	rc = OK;
f10460ed	#else
22080fa5	cli_warnmsg("Partial message received from MUA/MTA - message cannot be scanned\n");
f10460ed	#endif } else if(strcasecmp(mimeSubtype, "external-body") == 0)
22080fa5	/* TODO */
e3aaff8e	cli_warnmsg("Attempt to send Content-type message/external-body trapped");
5a642650	else
6fcf5624	cli_warnmsg("Unsupported message format `%s' - if you believe this file contains a virus, submit it to www.clamav.net\n", mimeSubtype);
e3aaff8e
f10460ed
ad9c6836	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
b726511f	if(messages) free(messages);
f10460ed	return rc;
e3aaff8e	case APPLICATION:
4ab382c3	/*cptr = messageGetMimeSubtype(mainMessage);
d4d14218
4ab382c3	if((strcasecmp(cptr, "octet-stream") == 0) \|\|
aa0210b6	(strcasecmp(cptr, "x-msdownload") == 0)) {*/ {
2673dc74	fb = messageToFileblob(mainMessage, mctx->dir, 1);
e3aaff8e
0e5a0129	if(fb) { cli_dbgmsg("Saving main message as attachment\n"); fileblobDestroy(fb);
fbb7262e	if(mainMessage != messageIn) { messageDestroy(mainMessage); mainMessage = NULL; } else messageReset(mainMessage);
e3aaff8e	}
aa0210b6	} /else cli_warnmsg("Discarded application not sent as attachment\n");/
e3aaff8e	break; case AUDIO: case VIDEO: case IMAGE: break; default: cli_warnmsg("Message received with unknown mime encoding"); break; }
2673dc74
b912eaf2	if(messages) { /* "can't happen" */
2e5a3528	cli_warnmsg("messages != NULL, report to http://bugs.clamav.net\n");
2673dc74	free(messages);
b912eaf2	}
e3aaff8e	}
f12d2498	if(aText && (textIn == NULL)) {
6fd711b2	/* Look for a bounce in the text (non mime encoded) portion / const text t;
b116962d
6fd711b2	for(t = aText; t; t = t->t_next) { const line_t *l = t->t_line;
ebe57840	const text lookahead, topofbounce;
6fd711b2	const char *s;
ebe57840	bool inheader;
b116962d
6fd711b2	if(l == NULL) continue;
a603478f	if(!isBounceStart(lineGetData(l)))
6fd711b2	continue;
a7a2e2d4	lookahead = t->t_next; if(lookahead) { if(isBounceStart(lineGetData(lookahead->t_line))) /* don't save worthless header lines / continue; } else / don't save a single liner */ break;
6fd711b2	/* * We've found what looks like the start of a bounce * message. Only bother saving if it really is a bounce * message, this helps to speed up scanning of ping-pong * messages that have lots of bounces within bounces in * them */
a7a2e2d4	for(; lookahead; lookahead = lookahead->t_next) {
6fd711b2	l = lookahead->t_line; if(l == NULL) break; s = lineGetData(l); if(strncasecmp(s, "Content-Type:", 13) == 0) /* * Don't bother with plain/text or * plain/html */ if(strstr(s, "text/") == NULL)
6862efc7	/* * Don't bother to save the unuseful * part */
6fd711b2	break; } if(lookahead && (lookahead->t_line == NULL)) { cli_dbgmsg("Non mime part bounce message is not mime encoded, so it will not be scanned\n"); t = lookahead; /* look for next bounce message */ continue; }
6862efc7	/* * Prescan the bounce message to see if there's likely * to be anything nasty. * This algorithm is hand crafted and may be breakable * so all submissions are welcome. It's best NOT to * remove this however you may be tempted, because it * significantly speeds up the scanning of multiple * bounces (i.e. bounces within many bounces) */ for(; lookahead; lookahead = lookahead->t_next) { l = lookahead->t_line; if(l) { s = lineGetData(l); if((strncasecmp(s, "Content-Type:", 13) == 0) && (strstr(s, "multipart/") == NULL) && (strstr(s, "message/rfc822") == NULL) && (strstr(s, "text/plain") == NULL)) break; } } if(lookahead == NULL) {
7c56033f	cli_dbgmsg("cli_mbox: I believe it's plain text which must be clean\n");
6862efc7	/* nothing here, move along please */ break; }
ebe57840	if((fb = fileblobCreate()) == NULL) break; cli_dbgmsg("Save non mime part bounce message\n");
c1fce7f7	fileblobSetFilename(fb, mctx->dir, "bounce");
95e11e5a	fileblobAddData(fb, (const unsigned char *)"Received: by clamd (bounce)\n", 28);
c1fce7f7	fileblobSetCTX(fb, mctx->ctx);
ebe57840	inheader = TRUE; topofbounce = NULL;
01c99f53	do {
ebe57840	l = t->t_line; if(l == NULL) { if(inheader) { inheader = FALSE; topofbounce = t; } } else { s = lineGetData(l);
95e11e5a	fileblobAddData(fb, (const unsigned char *)s, strlen(s));
ebe57840	}
95e11e5a	fileblobAddData(fb, (const unsigned char *)"\n", 1);
ebe57840	lookahead = t->t_next; if(lookahead == NULL) break; t = lookahead; l = t->t_line; if((!inheader) && l) { s = lineGetData(l);
a603478f	if(isBounceStart(s)) {
b2ba24f5	cli_dbgmsg("Found the start of another bounce candidate (%s)\n", s);
ebe57840	break; } }
01c99f53	} while(!fileblobContainsVirus(fb));
ebe57840	fileblobDestroy(fb); if(topofbounce) t = topofbounce; /* * Don't do this - it slows bugs.txt / /if(mainMessage) mainMessage->bounce = NULL;*/
6fd711b2	}
f12d2498	textDestroy(aText); aText = NULL; }
b0b860f1	/* * No attachments - scan the text portions, often files * are hidden in HTML code */
ecc3d638	if(mainMessage && (rc != VIRUS)) {
47d9cc65	text *t_line;
e3aaff8e	/*
b0b860f1	* Look for uu-encoded main file
e3aaff8e	*/
182bbcc8	if((encodingLine(mainMessage) != NULL) &&
47d9cc65	((t_line = bounceBegin(mainMessage)) != NULL)) { if(exportBounceMessage(t_line, mctx))
ecc3d638	rc = OK;
b0b860f1	} else { bool saveIt;
d4d14218
b0b860f1	if(messageGetMimeType(mainMessage) == MESSAGE)
f01bbfe8	/*
b0b860f1	* Quick peek, if the encapsulated * message has no * content encoding statement don't * bother saving to scan, it's safe
f01bbfe8	*/
47d9cc65	saveIt = (bool)(encodingLine(mainMessage) != NULL);
b0b860f1	else if((t_line = encodingLine(mainMessage)) != NULL) {
a7527b1f	/*
b0b860f1	* Some bounces include the message * body without the headers.
ebe57840	* FIXME: Unfortunately this generates a
b0b860f1	* lot of false positives that a bounce * has been found when it hasn't.
a7527b1f	*/
b0b860f1	if((fb = fileblobCreate()) != NULL) {
ae5c693a	cli_dbgmsg("Found a bounce message with no header at '%s'\n", lineGetData(t_line->t_line));
c1fce7f7	fileblobSetFilename(fb, mctx->dir, "bounce");
f24bf390	fileblobAddData(fb, (const unsigned char *)"Received: by clamd (bounce)\n", 28);
cca4efe4
01c99f53	/fileblobSetCTX(fb, ctx);/
94aea271	fileblobDestroy(textToFileblob(t_line, fb, 1));
5c1150ac	}
b0b860f1	saveIt = FALSE;
2673dc74	} else
b0b860f1	/* * Save the entire text portion, * since it it may be an HTML file with
a05e6d45	* a JavaScript virus or a phish
b0b860f1	*/ saveIt = TRUE;
e3aaff8e
b0b860f1	if(saveIt) { cli_dbgmsg("Saving text part to scan\n");
3f46285b	saveTextPart(mainMessage, mctx->dir, 1);
59da5a4f	if(mainMessage != messageIn) { messageDestroy(mainMessage); mainMessage = NULL; } else messageReset(mainMessage);
ecc3d638	rc = OK;
049a18b9	}
e3aaff8e	}
69c62847	} /*else
ecc3d638	rc = OK_ATTACHMENTS_NOT_SAVED; /* nothing saved */
e3aaff8e
ad9c6836	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
ecc3d638	if((rc != FAIL) && infected) rc = VIRUS;
a603478f
e06d34dc	cli_dbgmsg("parseEmailBody() returning %d\n", rc);
e3aaff8e
e06d34dc	return rc;
e3aaff8e	} /* * Is the current line the start of a new section? * * New sections start with --boundary / static int boundaryStart(const char line, const char *boundary) {
95e11e5a	const char ptr; char out;
df8806fd	int rc;
85bb253e	char buf[RFC2821LENGTH + 1];
2ad0c86e
0e4e16d4	if(line == NULL) return 0; /* empty line */
300a8ae9	if(boundary == NULL) return 0;
0e4e16d4
6e84cebb	/cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary);/
2ad0c86e
ebe57840	if((line != '-') && (line != '(')) return 0; if(strchr(line, '-') == NULL) return 0;
d72749e0	if(strlen(line) <= sizeof(buf)) { out = NULL; ptr = rfc822comments(line, buf); } else
95e11e5a	ptr = out = rfc822comments(line, NULL);
d72749e0
2ad0c86e	if(ptr == NULL)
95e11e5a	ptr = line;
2ad0c86e
a9d251e0	if((ptr++ != '-') \|\| (ptr == '\0')) {
d72749e0	if(out) free(out);
0e4e16d4	return 0;
2ad0c86e	}
0e4e16d4
e3aaff8e	/*
0e4e16d4	* Gibe.B3 is broken, it has:
e3aaff8e	* boundary="---- =_NextPart_000_01C31177.9DC7C000" * but it's boundaries look like * ------ =_NextPart_000_01C31177.9DC7C000
0e4e16d4	* notice the one too few '-'. * Presumably this is a deliberate exploitation of a bug in some mail * clients. * * The trouble is that this creates a lot of false positives for * boundary conditions, if we're too lax about matches. We do our level * best to avoid these false positives. For example if we have * boundary="1" we want to ensure that we don't break out of every line * that has -1 in it instead of starting --1. This needs some more work.
7c56033f	* * Look with and without RFC822 comments stripped, I've seen some * samples where () are taken as comments in boundaries and some where
69c62847	* they're not. Irrespective of whatever RFC2822 says, we need to find * viruses in both types of mails.
e3aaff8e	*/
69c62847	if((strstr(&ptr[1], boundary) != NULL) \|\| (strstr(line, boundary) != NULL)) { const char k = ptr; / * We need to ensure that we don't match --11=-=-=11 when * looking for --1=-=-=1 in well behaved headers, that's a * false positive problem mentioned above / rc = 0; do if(strcmp(++k, boundary) == 0) { rc = 1; break; } while(k == '-'); if(rc == 0) { k = &line[1]; do if(strcmp(++k, boundary) == 0) { rc = 1; break; } while(k == '-'); } } else if(ptr++ != '-') rc = 0;
df8806fd	else
79179da5	rc = (strcasecmp(ptr, boundary) == 0);
df8806fd
d72749e0	if(out) free(out);
df8806fd
69c62847	if(rc == 1)
df8806fd	cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line); return rc;
e3aaff8e	} /* * Is the current line the end? * * The message ends with with --boundary-- */ static int
69c62847	boundaryEnd(const char line, const char boundary)
e3aaff8e	{ size_t len;
02927896	if(line == NULL) return 0;
69c62847	/cli_dbgmsg("boundaryEnd: line = '%s' boundary = '%s'\n", line, boundary);/
e3aaff8e	if(line++ != '-') return 0; if(line++ != '-') return 0; len = strlen(boundary);
049a18b9	if(strncasecmp(line, boundary, len) != 0) return 0;
0e523db2	/* * Use < rather than == because some broken mails have white * space after the boundary */
c29f01c5	if(strlen(line) < (len + 2))
e3aaff8e	return 0; line = &line[len]; if(*line++ != '-') return 0;
69c62847	if(*line == '-') { cli_dbgmsg("boundaryEnd: found %s in %s\n", boundary, line); return 1; } return 0;
e3aaff8e	} /* * Initialise the various lookup tables / static int initialiseTables(table_t rfc821Table, table_t subtypeTable) { const struct tableinit tableinit; /* * Initialise the various look up tables / rfc821Table = tableCreate(); assert(*rfc821Table != NULL); for(tableinit = rfc821headers; tableinit->key; tableinit++)
51fc2aa8	if(tableInsert(rfc821Table, tableinit->key, tableinit->value) < 0) { tableDestroy(rfc821Table);
767f16ab	*rfc821Table = NULL;
e3aaff8e	return -1;
51fc2aa8	}
e3aaff8e	subtypeTable = tableCreate(); assert(subtypeTable != NULL); for(tableinit = mimeSubtypes; tableinit->key; tableinit++) if(tableInsert(subtypeTable, tableinit->key, tableinit->value) < 0) { tableDestroy(rfc821Table);
51fc2aa8	tableDestroy(*subtypeTable);
767f16ab	rfc821Table = NULL; subtypeTable = NULL;
e3aaff8e	return -1; } return 0; } /*
d4d14218	* If there's a HTML text version use that, otherwise
e3aaff8e	* use the first text part, otherwise just use the
d4d14218	* first one around. HTML text is most likely to include * a scripting worm
e3aaff8e	* * If we can't find one, return -1 / static int getTextPart(message const messages[], size_t size) { size_t i;
28010d29	int textpart = -1;
e3aaff8e	for(i = 0; i < size; i++) { assert(messages[i] != NULL);
28010d29	if(messageGetMimeType(messages[i]) == TEXT) { if(strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0) return (int)i; textpart = (int)i; }
e3aaff8e	}
28010d29	return textpart;
e3aaff8e	} /* * strip -
767f16ab	* Remove the trailing spaces from a buffer. Don't call this directly, * always call strstrip() which is a wrapper to this routine to be used with * NUL terminated strings. This code looks a bit strange because of it's * heritage from code that worked on strings that weren't necessarily NUL * terminated. * TODO: rewrite for clamAV *
e3aaff8e	* Returns it's new length (a la strlen) * * len must be int not size_t because of the >= 0 test, it is sizeof(buf) * not strlen(buf) / static size_t strip(char buf, int len) { register char *ptr; register size_t i; if((buf == NULL) \|\| (len <= 0))
767f16ab	return 0;
e3aaff8e	i = strlen(buf); if(len > (int)(i + 1))
767f16ab	return i;
e3aaff8e	ptr = &buf[--len];
ecc3d638	#if defined(UNIX) \|\| defined(C_LINUX) \|\| defined(C_DARWIN) /* watch - it may be in shared text area */
e3aaff8e	do if(ptr) ptr = '\0';
87c9313e	while((--len >= 0) && (!isgraph(--ptr)) && (ptr != '\n') && (*ptr != '\r'));
e3aaff8e	#else /* more characters can be displayed on DOS / do #ifndef REAL_MODE_DOS if(ptr) /* C8.0 puts into a text area / #endif ptr = '\0';
ced371fe	while((--len >= 0) && ((--ptr == '\0') \|\| isspace((int)(ptr & 0xFF))));
e3aaff8e	#endif return((size_t)(len + 1)); } /* * strstrip: * Strip a given string */
f0627588	size_t
e3aaff8e	strstrip(char s) { if(s == (char )NULL) return(0);
02927896
bc6bbeff	return(strip(s, (int)strlen(s) + 1));
e3aaff8e	} static int parseMimeHeader(message m, const char cmd, const table_t rfc821Table, const char arg) {
38d07186	char copy, p; const char *ptr;
8b3563f2	int commandNumber;
4f1d0bfc
e3aaff8e	cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg);
8b3563f2
d72749e0	copy = rfc822comments(cmd, NULL);
38d07186	if(copy) { commandNumber = tableFind(rfc821Table, copy); free(copy);
f017fbdd	} else commandNumber = tableFind(rfc821Table, cmd);
8b3563f2
d72749e0	copy = rfc822comments(arg, NULL);
e3aaff8e
38d07186	if(copy) ptr = copy; else ptr = arg;
e75e1ad1
8b3563f2	switch(commandNumber) {
e3aaff8e	case CONTENT_TYPE: /* * Fix for non RFC1521 compliant mailers * that send content-type: Text instead * of content-type: Text/Plain, or * just simply "Content-Type:" */
5e394e73	if(arg == NULL)
69543a9d	/* * According to section 4 of RFC1521: * "Note also that a subtype specification is * MANDATORY. There are no default subtypes" *
1eec55a6	* We have to break this and make an assumption
69543a9d	* for the subtype because virus writers and * email client writers don't get it right */ cli_warnmsg("Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii\n");
38d07186	else if(strchr(ptr, '/') == NULL)
69543a9d	/* * Empty field, such as * Content-Type: * which I believe is illegal according to * RFC1521 */
38d07186	cli_dbgmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", ptr);
e3aaff8e	else {
8037334b	int i;
6d312569	char mimeArgs; / RHS of the ; */
e3aaff8e	/* * Some clients are broken and * put white space after the ; */
2625d6a0	if(arg == '/') { cli_warnmsg("Content-type '/' received, assuming application/octet-stream\n"); messageSetMimeType(m, "application"); messageSetMimeSubtype(m, "octet-stream"); } else { /
0960ff5e	* The content type could be in quotes: * Content-Type: "multipart/mixed" * FIXME: this is a hack in that ignores * the quotes, it doesn't handle * them properly
2625d6a0	*/
38d07186	while(isspace(*ptr)) ptr++; if(ptr[0] == '\"') ptr++;
0960ff5e
38d07186	if(ptr[0] != '/') {
f017fbdd	char s; char mimeType; /* LHS of the ; */
38d07186	#ifdef CL_THREAD_SAFE
1adb6fa8	char *strptr = NULL;
38d07186	#endif
f017fbdd
38d07186	s = mimeType = cli_strtok(ptr, 0, ";");
0960ff5e	/*
6d312569	* Handle * Content-Type: foo/bar multipart/mixed * and * Content-Type: multipart/mixed foo/bar
0960ff5e	*/
38e2ab86	if(s && *s) for(;;) {
548a5f96	#ifdef CL_THREAD_SAFE
6d312569	int set = messageSetMimeType(m, strtok_r(s, "/", &strptr));
548a5f96	#else int set = messageSetMimeType(m, strtok(s, "/")); #endif
6d312569	/* * Stephen White <stephen@earth.li> * Some clients put space after * the mime type but before * the ; */
548a5f96	#ifdef CL_THREAD_SAFE
6d312569	s = strtok_r(NULL, ";", &strptr);
548a5f96	#else s = strtok(NULL, ";"); #endif
6d312569	if(s == NULL) break; if(set) {
f017fbdd	size_t len = strstrip(s) - 1;
6d312569	if(s[len] == '\"') { s[len] = '\0'; len = strstrip(s); } if(len) {
f017fbdd	if(strchr(s, ' ')) { char *t = cli_strtok(s, 0, " ");
6d312569
f017fbdd	messageSetMimeSubtype(m, t); free(t); } else messageSetMimeSubtype(m, s);
6d312569	}
0960ff5e	}
6d312569	while(s && !isspace(s)) s++; if(s++ == '\0') break; if(s == '\0') break;
0960ff5e	}
38e2ab86	if(mimeType) free(mimeType);
f017fbdd	}
2625d6a0	}
e3aaff8e	/*
20d3dde9	* Add in all rest of the the arguments. * e.g. if the header is this: * Content-Type:', arg='multipart/mixed; boundary=foo * we find the boundary argument set it
e3aaff8e	*/
8037334b	i = 1;
38d07186	while((mimeArgs = cli_strtok(ptr, i++, ";")) != NULL) {
8037334b	cli_dbgmsg("mimeArgs = '%s'\n", mimeArgs);
6d312569	messageAddArguments(m, mimeArgs); free(mimeArgs); }
e3aaff8e	} break; case CONTENT_TRANSFER_ENCODING:
38d07186	messageSetEncoding(m, ptr);
e3aaff8e	break; case CONTENT_DISPOSITION:
38d07186	p = cli_strtok(ptr, 0, ";"); if(p) { if(*p) { messageSetDispositionType(m, p); free(p); p = cli_strtok(ptr, 1, ";"); messageAddArgument(m, p); } free(p);
548a5f96	}
ab84808e	if((p = (char )messageFindArgument(m, "filename")) == NULL) / * Handle this type of header, without * a filename (e.g. some Worm.Torvil.D) * Content-ID: <nRfkHdrKsAxRU> * Content-Transfer-Encoding: base64 * Content-Disposition: attachment */ messageAddArgument(m, "filename=unknown"); else free(p);
e3aaff8e	}
38d07186	if(copy) free(copy);
e3aaff8e
4f1d0bfc	return 0;
e3aaff8e	}
e06d34dc	/*
cca4efe4	* Save the text portion of the message */ static void
3f46285b	saveTextPart(message m, const char dir, int destroy_text)
cca4efe4	{
0e5a0129	fileblob *fb;
cca4efe4	messageAddArgument(m, "filename=textportion");
3f46285b	if((fb = messageToFileblob(m, dir, destroy_text)) != NULL) {
cca4efe4	/* * Save main part to scan that */
c77c8809	cli_dbgmsg("Saving main message\n");
cca4efe4
0e5a0129	fileblobDestroy(fb);
cca4efe4	} }
a9714c49	/*
f017fbdd	* Handle RFC822 comments in headers.
d72749e0	* If out == NULL, return a buffer without the comments, the caller must free * the returned buffer * Return NULL on error or if the input * has no comments.
f017fbdd	* See secion 3.4.3 of RFC822
a9714c49	* TODO: handle comments that go on to more than one line / static char
d72749e0	rfc822comments(const char in, char out)
a9714c49	{ const char *iptr;
d72749e0	char *optr;
a9714c49	int backslash, inquote, commentlevel; if(in == NULL)
f017fbdd	return NULL;
a9714c49	if(strchr(in, '(') == NULL)
f017fbdd	return NULL;
a9714c49
d72749e0	assert(out != in); if(out == NULL) { out = cli_malloc(strlen(in) + 1); if(out == NULL) return NULL; }
a9714c49	backslash = commentlevel = inquote = 0; optr = out; cli_dbgmsg("rfc822comments: contains a comment\n"); for(iptr = in; *iptr; iptr++) if(backslash) {
b814e02a	if(commentlevel == 0) optr++ = iptr;
a9714c49	backslash = 0; } else switch(*iptr) { case '\\': backslash = 1; break; case '\"':
7c56033f	*optr++ = '\"';
a9714c49	inquote = !inquote; break; case '(':
7c56033f	if(inquote) *optr++ = '('; else commentlevel++;
a9714c49	break; case ')':
7c56033f	if(inquote) *optr++ = ')'; else if(commentlevel > 0)
a9714c49	commentlevel--; break; default: if(commentlevel == 0) optr++ = iptr; } if(backslash) /* last character was a single backslash / optr++ = '\\'; *optr = '\0';
d72749e0	/strstrip(out);/
a9714c49	cli_dbgmsg("rfc822comments '%s'=>'%s'\n", in, out); return out; }
50df4118	/* * Handle RFC2047 encoding. Returns a malloc'd buffer that the caller must * free, or NULL on error / static char rfc2047(const char in) { char out, *pout; size_t len;
4de5fffd	if((strstr(in, "=?") == NULL) \|\| (strstr(in, "?=") == NULL))
0cf4cea7	return cli_strdup(in);
50df4118	cli_dbgmsg("rfc2047 '%s'\n", in); out = cli_malloc(strlen(in) + 1); if(out == NULL) return NULL; pout = out; /* For each RFC2047 string / while(in) {
cf569541	char encoding, ptr, enctext;
50df4118	message m; blob b; /* Find next RFC2047 string / while(in) { if((in == '=') && (in[1] == '?')) { in += 2; break; } pout++ = in++; } / Skip over charset, find encoding / while((in != '?') && in) in++; if(in == '\0') break; encoding = *++in; encoding = tolower(encoding); if((encoding != 'q') && (encoding != 'b')) {
6fcf5624	cli_warnmsg("Unsupported RFC2047 encoding type '%c' - if you believe this file contains a virus, submit it to www.clamav.net\n", encoding);
1b00d9a4	free(out); out = NULL;
50df4118	break; } /* Skip to encoded text / if(++in != '?') break; if(*++in == '\0') break;
0cf4cea7	enctext = cli_strdup(in);
cf569541	if(enctext == NULL) { free(out); out = NULL; break; }
50df4118	in = strstr(in, "?=");
cf569541	if(in == NULL) { free(enctext);
50df4118	break;
cf569541	}
50df4118	in += 2; ptr = strstr(enctext, "?="); assert(ptr != NULL); ptr = '\0'; /cli_dbgmsg("Need to decode '%s' with method '%c'\n", enctext, encoding);*/ m = messageCreate();
c77c8809	if(m == NULL)
50df4118	break;
564b3e07	messageAddStr(m, enctext);
cf569541	free(enctext);
767f16ab	switch(encoding) {
50df4118	case 'q': messageSetEncoding(m, "quoted-printable"); break; case 'b': messageSetEncoding(m, "base64"); break; }
2673dc74	b = messageToBlob(m, 1);
50df4118	len = blobGetDataSize(b);
95e11e5a	cli_dbgmsg("Decoded as '%.s'\n", (int)len, (int)len, blobGetData(b));
50df4118	memcpy(pout, blobGetData(b), len); blobDestroy(b); messageDestroy(m); if(pout[len - 1] == '\n') pout += len - 1; else pout += len; }
ec8e31fa	if(out == NULL) return NULL; *pout = '\0';
50df4118
ec8e31fa	cli_dbgmsg("rfc2047 returns '%s'\n", out);
50df4118	return out; }
f10460ed	#ifdef PARTIAL_DIR /* * Handle partial messages / static int rfc1341(message m, const char dir) { fileblob fb;
13462674	char arg, id, number, total, *oldfilename;
fb79b576	const char *tmpdir;
16ea58a2	char pdir[NAME_MAX + 1];
fb79b576
f003b79e	id = (char *)messageFindArgument(m, "id"); if(id == NULL) return -1;
f83668bf	#ifdef C_CYGWIN
45aba293	if((tmpdir = getenv("TEMP")) == (char )NULL) if((tmpdir = getenv("TMP")) == (char )NULL) if((tmpdir = getenv("TMPDIR")) == (char *)NULL) tmpdir = "C:\\";
fb79b576	#else
45aba293	if((tmpdir = getenv("TMPDIR")) == (char )NULL) if((tmpdir = getenv("TMP")) == (char )NULL) if((tmpdir = getenv("TEMP")) == (char *)NULL)
fb79b576	#ifdef P_tmpdir
45aba293	tmpdir = P_tmpdir;
fb79b576	#else
45aba293	tmpdir = "/tmp";
fb79b576	#endif #endif
f10460ed
16ea58a2	snprintf(pdir, sizeof(pdir) - 1, "%s/clamav-partial", tmpdir);
fb79b576	if((mkdir(pdir, 0700) < 0) && (errno != EEXIST)) { cli_errmsg("Can't create the directory '%s'\n", pdir);
f10460ed	return -1;
13462674	} else { struct stat statb;
fb79b576	if(stat(pdir, &statb) < 0) { cli_errmsg("Can't stat the directory '%s'\n", pdir);
13462674	return -1; } if(statb.st_mode & 077) cli_warnmsg("Insecure partial directory %s (mode 0%o)\n",
95e11e5a	pdir, (int)(statb.st_mode & 0777));
f10460ed	} number = (char )messageFindArgument(m, "number"); if(number == NULL) { free(id); return -1; } oldfilename = (char )messageFindArgument(m, "filename"); if(oldfilename == NULL) oldfilename = (char *)messageFindArgument(m, "name"); arg = cli_malloc(10 + strlen(id) + strlen(number));
a43dd3cd	if(arg) { sprintf(arg, "filename=%s%s", id, number); messageAddArgument(m, arg); free(arg); }
f10460ed	if(oldfilename) { cli_warnmsg("Must reset to %s\n", oldfilename); free(oldfilename); }
2673dc74	if((fb = messageToFileblob(m, pdir, 0)) == NULL) {
f10460ed	free(id); free(number); return -1; } fileblobDestroy(fb); total = (char )messageFindArgument(m, "total"); cli_dbgmsg("rfc1341: %s, %s of %s\n", id, number, (total) ? total : "?"); if(total) { int n = atoi(number); int t = atoi(total); DIR dd = NULL;
f003b79e	free(total);
f10460ed	/* * If it's the last one - reassemble it
3a0f75c6	* FIXME: this assumes that we receive the parts in order
f10460ed	*/
fb79b576	if((n == t) && ((dd = opendir(pdir)) != NULL)) {
f10460ed	FILE *fout; char outname[NAME_MAX + 1];
e6be10f7	time_t now;
f10460ed
11b50569	sanitiseName(id);
95e11e5a
f10460ed	snprintf(outname, sizeof(outname) - 1, "%s/%s", dir, id); cli_dbgmsg("outname: %s\n", outname); fout = fopen(outname, "wb"); if(fout == NULL) {
3a0f75c6	cli_errmsg("Can't open '%s' for writing", outname);
f10460ed	free(id); free(number); closedir(dd); return -1; }
e6be10f7	time(&now);
f10460ed	for(n = 1; n <= t; n++) { char filename[NAME_MAX + 1];
13462674	const struct dirent *dent; #if defined(HAVE_READDIR_R_3) \|\| defined(HAVE_READDIR_R_2)
242bfde8	union { struct dirent d; char b[offsetof(struct dirent, d_name) + NAME_MAX + 1]; } result;
13462674	#endif
f10460ed	snprintf(filename, sizeof(filename), "%s%d", id, n);
fdc6adbe
242bfde8	#ifdef HAVE_READDIR_R_3 while((readdir_r(dd, &result.d, &dent) == 0) && dent) {
f10460ed	#elif defined(HAVE_READDIR_R_2)
242bfde8	while((dent = (struct dirent *)readdir_r(dd, &result.d))) {
fdc6adbe	#else /!HAVE_READDIR_R/
f10460ed	while((dent = readdir(dd))) { #endif FILE *fin;
e6be10f7	char buffer[BUFSIZ], fullname[NAME_MAX + 1];
f10460ed	int nblanks;
e6be10f7	struct stat statb;
95e11e5a	extern short cli_leavetemps_flag;
f10460ed
b4c5d082	#ifndef C_CYGWIN
f10460ed	if(dent->d_ino == 0) continue;
f83668bf	#endif
f10460ed
a43dd3cd	snprintf(fullname, sizeof(fullname) - 1, "%s/%s", pdir, dent->d_name);
e6be10f7	if(strncmp(filename, dent->d_name, strlen(filename)) != 0) { if(!cli_leavetemps_flag) continue; if(stat(fullname, &statb) < 0) continue; if(now - statb.st_mtime > (time_t)(7 * 24 * 3600)) if(unlink(fullname) >= 0) cli_warnmsg("removed old RFC1341 file %s\n", fullname);
f10460ed	continue;
e6be10f7	}
f10460ed
e6be10f7	fin = fopen(fullname, "rb");
f10460ed	if(fin == NULL) {
e6be10f7	cli_errmsg("Can't open '%s' for reading", fullname);
f10460ed	fclose(fout); unlink(outname); free(id); free(number); closedir(dd); return -1; } nblanks = 0;
9f2024cc	while(fgets(buffer, sizeof(buffer) - 1, fin) != NULL)
f10460ed	/* * Ensure that trailing newlines * aren't copied */
f003b79e	if(buffer[0] == '\n')
f10460ed	nblanks++;
f003b79e	else {
f10460ed	if(nblanks) do putc('\n', fout); while(--nblanks > 0); fputs(buffer, fout); } fclose(fin);
fb79b576	/* don't unlink if leave temps */ if(!cli_leavetemps_flag)
e6be10f7	unlink(fullname);
f10460ed	break; } rewinddir(dd); } closedir(dd); fclose(fout); } }
f003b79e	free(number);
f10460ed	free(id); return 0; } #endif
c52d991e	static void hrefs_done(blob b, tag_arguments_t hrefs) { if(b) blobDestroy(b); html_tag_arg_free(hrefs); } /* * This used to be part of checkURLs, split out, because phishingScan needs it * too, and phishingScan might be used in situations where checkURLs is * disabled (see ifdef) / static blob getHrefs(message m, tag_arguments_t hrefs) {
8386c723	blob *b = messageToBlob(m, 0);
c52d991e	size_t len; if(b == NULL) return NULL; len = blobGetDataSize(b); if(len == 0) { blobDestroy(b); return NULL; } /* TODO: make this size customisable / if(len > 1001024) {
00a36e85	cli_warnmsg("Viruses pointed to by URLs not scanned in large message\n");
c52d991e	blobDestroy(b); return NULL; } hrefs->count = 0; hrefs->tag = hrefs->value = NULL; hrefs->contents = NULL;
8386c723	cli_dbgmsg("getHrefs: calling html_normalise_mem\n");
d77ac7de	if(!html_normalise_mem(blobGetData(b), (off_t)len, NULL, hrefs,m->ctx->dconf)) {
c52d991e	blobDestroy(b); return NULL; }
8386c723	cli_dbgmsg("getHrefs: html_normalise_mem returned\n");
c52d991e	/* TODO: Do we need to call remove_html_comments? */ return b; }
ad422cc9	/* * Experimental: validate URLs for phishes * followurls: see if URLs point to malware */
c52d991e	static void
ecc3d638	checkURLs(message mainMessage, mbox_ctx mctx, mbox_status *rc, int is_html)
c52d991e	{
94aea271	blob *b;
ad422cc9	tag_arguments_t hrefs;
c52d991e
d77ac7de	hrefs.scanContents = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE);
c52d991e	#if (!defined(FOLLOWURLS)) \|\| (FOLLOWURLS <= 0)
94aea271	if(!hrefs.scanContents)
093e013c	/* * Don't waste time extracting hrefs (parsing html), nobody * will need it */
c52d991e	return; #endif
94aea271	hrefs.count = 0; hrefs.tag = hrefs.value = NULL; hrefs.contents = NULL; b = getHrefs(mainMessage, &hrefs); if(b) {
d77ac7de	if(hrefs.scanContents) {
94aea271	if(phishingScan(mainMessage, mctx->dir, mctx->ctx, &hrefs) == CL_VIRUS) { mainMessage->isInfected = TRUE;
ecc3d638	*rc = VIRUS;
94aea271	cli_dbgmsg("PH:Phishing found\n"); } }
ecc3d638	if(is_html && (mctx->ctx->options&CL_SCAN_MAILURL) && (*rc != VIRUS))
ad422cc9	do_checkURLs(mctx->dir, &hrefs);
94aea271	} hrefs_done(b,&hrefs);
c52d991e	}
ad422cc9
c52d991e	#if defined(FOLLOWURLS) && (FOLLOWURLS > 0) static void
ad422cc9	do_checkURLs(const char dir, tag_arguments_t hrefs)
c52d991e	{ table_t *t; int i, n;
93509e9a	#ifdef CL_THREAD_SAFE
c52d991e	pthread_t tid[FOLLOWURLS]; struct arg args[FOLLOWURLS]; #endif t = tableCreate(); if(t == NULL) return; n = 0; for(i = 0; i < hrefs->count; i++) { const char url = (const char )hrefs->value[i]; /* * TODO: If it's an image source, it'd be nice to note beacons * where width="0" height="0", which needs support from * the HTML normalise code / if(strncasecmp("http://", url, 7) == 0) { char ptr; #ifndef CL_THREAD_SAFE struct arg arg; #endif char name[NAME_MAX + 1]; if(tableFind(t, url) == 1) { cli_dbgmsg("URL %s already downloaded\n", url); continue; } /* * What about foreign character spoofing? */ if(strchr(url, '%') && strchr(url, '@')) cli_warnmsg("Possible URL spoofing attempt noticed, but not yet handled (%s)\n", url); if(n == FOLLOWURLS) {
ad422cc9	cli_warnmsg("URL %s will not be scanned (FOLLOWURLS limit %d was reached)\n", url, FOLLOWURLS);
c52d991e	break; } (void)tableInsert(t, url, 1); cli_dbgmsg("Downloading URL %s to be scanned\n", url); strncpy(name, url, sizeof(name) - 1); name[sizeof(name) - 1] = '\0'; for(ptr = name; ptr; ptr++) if(ptr == '/') *ptr = '_'; #ifdef CL_THREAD_SAFE args[n].dir = dir;
0cf4cea7	args[n].url = cli_strdup(url); args[n].filename = cli_strdup(name);
f7cd5fbf	args[n].depth = 0;
c52d991e	pthread_create(&tid[n], NULL, getURL, &args[n]); #else
0cf4cea7	arg.url = cli_strdup(url);
c52d991e	arg.dir = dir; arg.filename = name;
f7cd5fbf	arg.depth = 0;
c52d991e	getURL(&arg);
b362ea45	free(arg.url);
c52d991e	#endif ++n; } } tableDestroy(t);
93509e9a	#ifdef CL_THREAD_SAFE
c52d991e	assert(n <= FOLLOWURLS); cli_dbgmsg("checkURLs: waiting for %d thread(s) to finish\n", n); while(--n >= 0) { pthread_join(tid[n], NULL); free(args[n].filename);
b362ea45	free(args[n].url);
c52d991e	} #endif }
ad422cc9	#else /!FOLLOWURLS/
c52d991e
71ba1dcd	static void
ad422cc9	do_checkURLs(const char dir, tag_arguments_t hrefs)
71ba1dcd	{ }
c52d991e	#endif
00a36e85	#if defined(FOLLOWURLS) && (FOLLOWURLS > 0)
2c9c9f3b	/* * Includes some Win32 patches by Gianluigi Tiesi <sherpya@netfarm.it>
ec0cef20	* * FIXME: Often WMF exploits work by sending people an email directing them * to a page which displays a picture containing the exploit. This is not * currently found, since only the HTML on the referred page is downloaded. * It would be useful to scan the HTML for references to pictures and * download them for scanning. But that will hit performance so there is * an issue here.
2c9c9f3b	*/
093e013c
10bb79d0	/* * Removing the reliance on libcurl * Includes some of the freshclam hacks by Everton da Silva Marques * everton.marques@gmail.com> */ #ifndef timercmp
94aea271	# define timercmp(a, b, cmp) \ (((a)->tv_sec == (b)->tv_sec) ? \
10bb79d0	((a)->tv_usec cmp (b)->tv_usec) : \ ((a)->tv_sec cmp (b)->tv_sec)) #endif /* timercmp */ #ifndef timersub
94aea271	# define timersub(a, b, result) \ do { \ (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ if ((result)->tv_usec < 0) { \ --(result)->tv_sec; \ (result)->tv_usec += 1000000; \ } \
10bb79d0	} while (0) #endif /* timersub */
fed0cf98	static long nonblock_fcntl(int sock); static void restore_fcntl(int sock, long fcntl_flags); static int nonblock_connect(int sock, const struct sockaddr *addr, socklen_t addrlen, int secs); static int connect_error(int sock);
43bebd43	static int my_r_gethostbyname(const char hostname, struct hostent hp, char *buf, size_t len);
10bb79d0
b362ea45	#define NONBLOCK_SELECT_MAX_FAILURES 3 #define NONBLOCK_MAX_BOGUS_LOOPS 10
fed0cf98
8f4b1f3e	/* * Simple implementation of a subset of RFC1945 (HTTP/1.0) * TODO: HTTP/1.1 (RFC2068) */
10bb79d0	static void * #ifdef CL_THREAD_SAFE getURL(void a) #else getURL(struct arg arg) #endif { FILE fp; #ifdef CL_THREAD_SAFE struct arg arg = (struct arg )a; #endif const char url = arg->url; const char dir = arg->dir; const char filename = arg->filename;
ea541184	#ifdef C_WINDOWS SOCKET sd; #else int sd; #endif
10bb79d0	struct sockaddr_in server;
ea541184	#ifdef HAVE_IN_ADDR_T
10bb79d0	in_addr_t ip;
ea541184	#else
5162562e	unsigned int ip;
ea541184	#endif
fed0cf98	in_port_t port; static in_port_t default_port; static int tcp;
f136b57d	int doingsite, firstpacket;
10bb79d0	char *ptr;
b362ea45	int flags, via_proxy;
10bb79d0	const char *proxy;
8f4b1f3e	char buf[BUFSIZ + 1], site[BUFSIZ], fout[NAME_MAX + 1];
10bb79d0
43bebd43	if(strlen(url) > (sizeof(site) - 1)) { cli_dbgmsg("Ignoring long URL \"%s\"\n", url); return NULL; }
10bb79d0	snprintf(fout, sizeof(fout) - 1, "%s/%s", dir, filename); fp = fopen(fout, "wb"); if(fp == NULL) { cli_errmsg("Can't open '%s' for writing", fout); return NULL; }
43bebd43	cli_dbgmsg("Saving %s to %s\n", url, fout);
fed0cf98
89d4073d	#ifndef C_BEOS
fed0cf98	if(tcp == 0) { const struct protoent *proto = getprotobyname("tcp"); if(proto == NULL) { cli_warnmsg("Unknown prototol tcp, check /etc/protocols\n"); fclose(fp); return NULL; } tcp = proto->p_proto;
ea541184	#ifndef C_WINDOWS
f136b57d	endprotoent();
ea541184	#endif
fed0cf98	}
89d4073d	#endif
fed0cf98	if(default_port == 0) { const struct servent *servent = getservbyname("http", "tcp"); if(servent) default_port = (in_port_t)ntohs(servent->s_port); else default_port = 80;
89ee28d0	#if !defined(C_WINDOWS) && !defined(C_BEOS)
fed0cf98	endservent();
ea541184	#endif
fed0cf98	} port = default_port;
43bebd43	doingsite = 1; ptr = site;
10bb79d0	proxy = getenv("http_proxy"); /* FIXME: handle no_proxy */
b362ea45	via_proxy = (proxy && *proxy); if(via_proxy) {
10bb79d0	if(strncasecmp(proxy, "http://", 7) != 0) { cli_warnmsg("Unsupported proxy protocol\n"); fclose(fp); return NULL; }
43bebd43	cli_dbgmsg("Getting %s via %s\n", url, proxy);
10bb79d0	proxy += 7; while(proxy) { if(doingsite && (proxy == ':')) { port = 0; while(isdigit(++proxy)) { port = 10; port += proxy - '0'; } continue; } if(doingsite && (proxy == '/')) { proxy++; break; } ptr++ = proxy++; } } else { cli_dbgmsg("Getting %s\n", url); if(strncasecmp(url, "http://", 7) != 0) { cli_warnmsg("Unsupported protocol\n"); fclose(fp); return NULL; } url += 7; while(url) { if(doingsite && (url == ':')) { port = 0; while(isdigit(++url)) { port = 10; port += url - '0'; } continue; } if(doingsite && (url == '/')) { url++; break; } ptr++ = url++; }
43bebd43	} *ptr = '\0';
10bb79d0
43bebd43	memset((char *)&server, '\0', sizeof(struct sockaddr_in)); server.sin_family = AF_INET; server.sin_port = (in_port_t)htons(port);
10bb79d0
43bebd43	ip = inet_addr(site);
10bb79d0	#ifdef INADDR_NONE
43bebd43	if(ip == INADDR_NONE) {
10bb79d0	#else
43bebd43	if(ip == (in_addr_t)-1) {
10bb79d0	#endif
43bebd43	struct hostent h;
f136b57d	if((my_r_gethostbyname(site, &h, buf, sizeof(buf)) != 0) \|\| (h.h_addr_list == NULL) \|\| (h.h_addr == NULL)) {
43bebd43	cli_dbgmsg("Unknown host %s\n", site);
10bb79d0	fclose(fp); return NULL; }
43bebd43	memcpy((char *)&ip, h.h_addr, sizeof(ip));
10bb79d0	}
43bebd43	server.sin_addr.s_addr = ip; if((sd = socket(AF_INET, SOCK_STREAM, tcp)) < 0) { fclose(fp); return NULL; } flags = nonblock_fcntl(sd); if(nonblock_connect(sd, (struct sockaddr *)&server, sizeof(struct sockaddr_in), 5) < 0) {
ea541184	closesocket(sd);
43bebd43	fclose(fp); return NULL; } restore_fcntl(sd, flags); /* * TODO: consider HTTP/1.1 */
b362ea45	if(via_proxy)
43bebd43	snprintf(buf, sizeof(buf) - 1,
8a894fd8	"GET %s HTTP/1.0\r\nUser-Agent: ClamAV %s\r\n\r\n", url, VERSION);
43bebd43	else snprintf(buf, sizeof(buf) - 1,
8a894fd8	"GET /%s HTTP/1.0\r\nUser-Agent: ClamAV %s\r\n\r\n", url, VERSION);
10bb79d0
93509e9a	/cli_dbgmsg("%s", buf);/
b362ea45
ea541184	if(send(sd, buf, (int)strlen(buf), 0) < 0) { closesocket(sd);
10bb79d0	fclose(fp); return NULL; }
ea541184	#ifdef SHUT_WR
10bb79d0	shutdown(sd, SHUT_WR);
ea541184	#else shutdown(sd, 1); #endif
10bb79d0
f136b57d	firstpacket = 1;
10bb79d0	for(;;) { fd_set set; struct timeval tv;
2cc843da	int n;
10bb79d0	FD_ZERO(&set); FD_SET(sd, &set); tv.tv_sec = 30; /* FIXME: make this customisable */ tv.tv_usec = 0; if(select(sd + 1, &set, NULL, NULL, &tv) < 0) { if(errno == EINTR) continue;
ea541184	closesocket(sd);
10bb79d0	fclose(fp); return NULL; } if(!FD_ISSET(sd, &set)) { fclose(fp);
ea541184	closesocket(sd);
10bb79d0	return NULL; }
884f536d	n = recv(sd, buf, sizeof(buf) - 1, 0);
f136b57d
10bb79d0	if(n < 0) { fclose(fp);
ea541184	closesocket(sd);
10bb79d0	return NULL; } if(n == 0) break;
398d802b	/* * FIXME: Handle header in more than one packet */
f136b57d	if(firstpacket) { char *statusptr; buf[n] = '\0'; statusptr = cli_strtok(buf, 1, " "); if(statusptr) { int status = atoi(statusptr); cli_dbgmsg("HTTP status %d\n", status); free(statusptr);
b362ea45	if((status == 301) \|\| (status == 302)) { char location; location = strstr(buf, "\nLocation: "); if(location) { char end;
f7cd5fbf	unlink(fout); if(arg->depth >= FOLLOWURLS) { cli_warnmsg("URL %s will not be followed to %s (FOLLOWURLS limit %d was reached)\n", arg->url, location, FOLLOWURLS); break; }
b362ea45	fclose(fp);
ea541184	closesocket(sd);
b362ea45	location += 11; free(arg->url); end = location; while(end && (end != '\n')) end++; *end = '\0';
0cf4cea7	arg->url = cli_strdup(location);
f7cd5fbf	arg->depth++;
b362ea45	cli_dbgmsg("Redirecting to %s\n", arg->url); return getURL(arg); } }
f136b57d	}
398d802b	/* * Don't write the HTTP header */
8f4b1f3e	if((ptr = strstr(buf, "\r\n\r\n")) != NULL) { ptr += 4; n -= (int)(ptr - buf); } else if((ptr = strstr(buf, "\n\n")) != NULL) {
398d802b	ptr += 2; n -= (int)(ptr - buf); } else ptr = buf;
f136b57d	firstpacket = 0;
398d802b	} else ptr = buf;
f136b57d
2cc843da	if(n && (fwrite(ptr, n, 1, fp) != 1)) {
10bb79d0	cli_warnmsg("Error writing %d bytes to %s\n", n, fout); break; } } fclose(fp);
ea541184	closesocket(sd);
10bb79d0	return NULL; }
43bebd43	/* * Have a copy here because r_gethostbyname is in shared not libclamav :-( / static int my_r_gethostbyname(const char hostname, struct hostent hp, char buf, size_t len) { #if defined(HAVE_GETHOSTBYNAME_R_6) /* e.g. Linux / struct hostent hp2; int ret = -1; if((hostname == NULL) \|\| (hp == NULL)) return -1; if(gethostbyname_r(hostname, hp, buf, len, &hp2, &ret) < 0) return ret; #elif defined(HAVE_GETHOSTBYNAME_R_5) /* e.g. BSD, Solaris, Cygwin */
89ee28d0	/* * Configure doesn't work on BeOS. We need -lnet to link, but configure * doesn't add it, so you need to do something like * LIBS=-lnet ./configure --enable-cache --disable-clamav */
43bebd43	int ret = -1; if((hostname == NULL) \|\| (hp == NULL)) return -1; if(gethostbyname_r(hostname, hp, buf, len, &ret) == NULL) return ret; #elif defined(HAVE_GETHOSTBYNAME_R_3) /* e.g. HP/UX, AIX / if((hostname == NULL) \|\| (hp == NULL)) return -1; if(gethostbyname_r(hostname, &hp, (struct hostent_data )buf) < 0) return h_errno; #else
ea541184	/* Single thread the code e.g. VS2005 */
43bebd43	struct hostent *hp2; #ifdef CL_THREAD_SAFE static pthread_mutex_t hostent_mutex = PTHREAD_MUTEX_INITIALIZER; #endif if((hostname == NULL) \|\| (hp == NULL)) return -1; #ifdef CL_THREAD_SAFE pthread_mutex_lock(&hostent_mutex); #endif if((hp2 = gethostbyname(hostname)) == NULL) { #ifdef CL_THREAD_SAFE pthread_mutex_unlock(&hostent_mutex); #endif return h_errno; } memcpy(hp, hp2, sizeof(struct hostent)); #ifdef CL_THREAD_SAFE pthread_mutex_unlock(&hostent_mutex); #endif #endif return 0; }
10bb79d0	static long nonblock_fcntl(int sock) {
ea541184	#ifdef F_GETFL
10bb79d0	long fcntl_flags; /* Save fcntl() flags */ fcntl_flags = fcntl(sock, F_GETFL, 0); if(fcntl_flags < 0) cli_warnmsg("nonblock_fcntl: saving: fcntl(%d, F_GETFL): errno=%d: %s\n", sock, errno, strerror(errno)); else if(fcntl(sock, F_SETFL, fcntl_flags \| O_NONBLOCK)) cli_warnmsg("nonblock_fcntl: fcntl(%d, F_SETFL, O_NONBLOCK): errno=%d: %s\n", sock, errno, strerror(errno)); return fcntl_flags;
ea541184	#else return 0L; #endif
10bb79d0	} static void restore_fcntl(int sock, long fcntl_flags) {
ea541184	#ifdef F_SETFL
f136b57d	if(fcntl_flags != -1) if(fcntl(sock, F_SETFL, fcntl_flags)) {
10bb79d0	cli_warnmsg("restore_fcntl: restoring: fcntl(%d, F_SETFL): errno=%d: %s\n", sock, errno, strerror(errno)); }
ea541184	#endif
10bb79d0	} static int nonblock_connect(int sock, const struct sockaddr addr, socklen_t addrlen, int secs) { / Max. of unexpected select() failures / int select_failures = NONBLOCK_SELECT_MAX_FAILURES; / Max. of useless loops */ int bogus_loops = NONBLOCK_MAX_BOGUS_LOOPS;
f136b57d	struct timeval timeout; /* When we should time out */
10bb79d0	int numfd; /* Highest fdset fd plus 1 / / Calculate into 'timeout' when we should time out / gettimeofday(&timeout, 0); timeout.tv_sec += secs; / Launch (possibly) non-blocking connect() request */ if(connect(sock, addr, addrlen)) { int e = errno;
ad422cc9	cli_dbgmsg("nonblock_connect: connect(): fd=%d errno=%d: %s\n",
10bb79d0	sock, e, strerror(e)); switch (e) { case EALREADY: case EINPROGRESS: break; /* wait for connection / case EISCONN: return 0; / connected / default: cli_warnmsg("nonblock_connect: connect(): fd=%d errno=%d: %s\n", sock, e, strerror(e)); return -1; / failed / } } else return connect_error(sock); numfd = sock + 1; / Highest fdset fd plus 1 / for (;;) { fd_set fds; struct timeval now; struct timeval wait; int n; / Force timeout if we ran out of time / gettimeofday(&now, 0); if (timercmp(&now, &timeout, >)) { cli_warnmsg("connect timing out (%d secs)\n", secs); break; / failed / } / Calculate into 'wait' how long to wait / timersub(&timeout, &now, &wait); / wait = timeout - now / / Init fds with 'sock' as the only fd / FD_ZERO(&fds); FD_SET(sock, &fds); n = select(numfd, 0, &fds, 0, &wait); if (n < 0) { cli_warnmsg("nonblock_connect: select() failure %d: errno=%d: %s\n", select_failures, errno, strerror(errno)); if (--select_failures >= 0) continue; / keep waiting / break; / failed */ }
ad422cc9	cli_dbgmsg("nonblock_connect: select = %d\n", n);
10bb79d0
fed0cf98	if(n)
10bb79d0	return connect_error(sock); /* Select returned, but there is no work to do... / if (--bogus_loops < 0) { cli_warnmsg("nonblock_connect: giving up due to excessive bogus loops\n"); break; / failed / } } / for loop: keep waiting / return -1; / failed */ } static int connect_error(int sock) {
2325b432	#ifdef SO_ERROR
10bb79d0	int optval; socklen_t optlen; optlen = sizeof(optval); getsockopt(sock, SOL_SOCKET, SO_ERROR, &optval, &optlen); if(optval) cli_warnmsg("connect_error: getsockopt(SO_ERROR): fd=%d error=%d: %s\n", sock, optval, strerror(optval)); return optval ? -1 : 0;
2325b432	#else return 0; #endif
10bb79d0	}
9b4bb8b7	#endif
93509e9a
f2f25418	#ifdef HAVE_BACKTRACE
4f1d0bfc	static void
02927896	sigsegv(int sig) { signal(SIGSEGV, SIG_DFL);
f2f25418	print_trace(1);
02927896	exit(SIGSEGV); }
4f1d0bfc	static void
02927896	print_trace(int use_syslog) { void array[10]; size_t size; char *strings; size_t i; pid_t pid = getpid(); size = backtrace(array, 10); strings = backtrace_symbols(array, size); if(use_syslog == 0) cli_dbgmsg("Backtrace of pid %d:\n", pid);
f73920a4	else
02927896	syslog(LOG_ERR, "Backtrace of pid %d:", pid); for(i = 0; i < size; i++) if(use_syslog)
04c39f39	syslog(LOG_ERR, "bt[%u]: %s", i, strings[i]);
02927896	else cli_dbgmsg("%s\n", strings[i]);
ef822cfc	/* TODO: dump the current email */
02927896	free(strings); } #endif
f24bf390
6cad6a15	/* See also clamav-milter */
ddea752e	static bool usefulHeader(int commandNumber, const char *cmd) { switch(commandNumber) { case CONTENT_TRANSFER_ENCODING: case CONTENT_DISPOSITION: case CONTENT_TYPE: return TRUE; default: if(strcasecmp(cmd, "From") == 0) return TRUE;
6cad6a15	if(strcasecmp(cmd, "Received") == 0)
ddea752e	return TRUE;
6cad6a15	if(strcasecmp(cmd, "De") == 0)
ddea752e	return TRUE; } return FALSE; }
ae5c693a	/*
72cf1461	* Like fgets but cope with end of line by "\n", "\r\n", "\n\r", "\r" / static char
d8142abc	getline_from_mbox(char buffer, size_t len, FILE fin)
72cf1461	{ char *ret; if(feof(fin)) return NULL; if((len == 0) \|\| (buffer == NULL)) {
27395a6e	cli_errmsg("Invalid call to getline_from_mbox(). Refer to http://www.clamav.net/bugs\n");
72cf1461	return NULL; } ret = buffer; do { int c = getc(fin); if(ferror(fin)) return NULL; switch(c) { case '\n': *buffer++ = '\n'; c = getc(fin); if((c != '\r') && !feof(fin)) ungetc(c, fin); break; default:
c40715eb	*buffer++ = (char)c;
72cf1461	continue; case EOF: break; case '\r': *buffer++ = '\n'; c = getc(fin); if((c != '\n') && !feof(fin)) ungetc(c, fin); break; } break;
85bb253e	} while(--len > 1);
72cf1461	if(len == 0) {
182bbcc8	/* the email probably breaks RFC821 */
00a36e85	cli_warnmsg("getline_from_mbox: buffer overflow stopped, line lost\n");
72cf1461	return NULL; }
00a36e85	*buffer = '\0';
85bb253e	if(len == 1)
0d35f10f	/* overflows will have appeared on separate lines */
f136b57d	cli_dbgmsg("getline_from_mbox: buffer overflow stopped, line recovered\n");
72cf1461	return ret; }
b2ba24f5
a603478f	/* * Is this line a candidate for the start of a bounce message? */
b2ba24f5	static bool
a603478f	isBounceStart(const char *line)
b2ba24f5	{ if(line == NULL) return FALSE; if(*line == '\0') return FALSE;
25071deb	/*if((strncmp(line, "From ", 5) == 0) && !isalnum(line[5]))
b2ba24f5	return FALSE; if((strncmp(line, ">From ", 6) == 0) && !isalnum(line[6]))
25071deb	return FALSE;*/
edee0700	if(cli_filetype((const unsigned char *)line, strlen(line)) != CL_TYPE_MAIL)
b2ba24f5	return FALSE; if((strncmp(line, "From ", 5) == 0) \|\| (strncmp(line, ">From ", 6) == 0)) { int numSpaces = 0, numDigits = 0; do if(line == ' ') numSpaces++; else if(isdigit(line)) numDigits++; while(*++line != '\0'); if(numSpaces < 6) return FALSE; if(numDigits < 11) return FALSE; } return TRUE; }
a05e6d45	/* * Extract a binhexEncoded message, return if it's found to be infected as we * extract it */ static bool
47d9cc65	exportBinhexMessage(const char dir, message m)
a05e6d45	{ bool infected = FALSE; fileblob *fb; if(messageGetEncoding(m) == NOENCODING) messageSetEncoding(m, "x-binhex");
2673dc74	fb = messageToFileblob(m, dir, 0);
a05e6d45	if(fb) { if(fileblobContainsVirus(fb)) infected = TRUE; cli_dbgmsg("Binhex file decoded to %s\n", fileblobGetFilename(fb)); fileblobDestroy(fb); } else cli_errmsg("Couldn't decode binhex file to %s\n", dir); return infected; }
c1fce7f7	/*
47d9cc65	* Locate any bounce message and extract it. Return 1 if anything found / static int exportBounceMessage(text start, const mbox_ctx mctx) { int rc = 0; text t; fileblob fb; / * Attempt to save the original (unbounced) * message - clamscan will find that in the * directory and call us again (with any luck) * having found an e-mail message to handle. * * This finds a lot of false positives, the * search that a content type is in the * bounce (i.e. it's after the bounce header) * helps a bit. * * messageAddLine * optimisation could help here, but needs * careful thought, do it with line numbers * would be best, since the current method in * messageAddLine of checking encoding first * must remain otherwise non bounce messages * won't be scanned / for(t = start; t; t = t->t_next) { const char txt = lineGetData(t->t_line);
ecc3d638	char cmd[RFC2821LENGTH + 1];
47d9cc65	if(txt == NULL) continue; if(cli_strtokbuf(txt, 0, ":", cmd) == NULL) continue; switch(tableFind(mctx->rfc821Table, cmd)) { case CONTENT_TRANSFER_ENCODING: if((strstr(txt, "7bit") == NULL) && (strstr(txt, "8bit") == NULL)) break; continue; case CONTENT_DISPOSITION: break; case CONTENT_TYPE: if(strstr(txt, "text/plain") != NULL) t = NULL; break; default: if(strcasecmp(cmd, "From") == 0) start = t; else if(strcasecmp(cmd, "Received") == 0) start = t; continue; } break; } if(t && ((fb = fileblobCreate()) != NULL)) { cli_dbgmsg("Found a bounce message\n"); fileblobSetFilename(fb, mctx->dir, "bounce"); /fileblobSetCTX(fb, mctx->ctx);/ if(textToFileblob(start, fb, 1) == NULL) cli_dbgmsg("Nothing new to save in the bounce message\n"); else rc = 1; fileblobDestroy(fb); } else cli_dbgmsg("Not found a bounce message\n"); return rc; } /*
c1fce7f7	* Handle the ith element of a number of multiparts, e.g. multipart/alternative / static message
ecc3d638	do_multipart(message mainMessage, message messages, int i, mbox_status rc, mbox_ctx mctx, message messageIn, text **tptr, unsigned int recursion_level)
c1fce7f7	{ bool addToText = FALSE; const char dtype; #ifndef SAVE_TO_DISC message body; #endif message *aMessage = messages[i];
d77ac7de	const int doPhishingScan = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING&PHISHING_CONF_ENGINE);
c1fce7f7	if(aMessage == NULL) return mainMessage;
69c62847	if(*rc != OK) return mainMessage;
c1fce7f7	cli_dbgmsg("Mixed message part %d is of type %d\n", i, messageGetMimeType(aMessage)); switch(messageGetMimeType(aMessage)) { case APPLICATION: case AUDIO: case IMAGE: case VIDEO: break; case NOMIME: cli_dbgmsg("No mime headers found in multipart part %d\n", i); if(mainMessage) { if(binhexBegin(aMessage)) { cli_dbgmsg("Found binhex message in multipart/mixed mainMessage\n");
47d9cc65	if(exportBinhexMessage(mctx->dir, mainMessage))
5684fccf	*rc = VIRUS;
c1fce7f7	} if(mainMessage != messageIn) messageDestroy(mainMessage); mainMessage = NULL; } else if(aMessage) { if(binhexBegin(aMessage)) { cli_dbgmsg("Found binhex message in multipart/mixed non mime part\n");
47d9cc65	if(exportBinhexMessage(mctx->dir, aMessage))
5684fccf	*rc = VIRUS;
c1fce7f7	assert(aMessage == messages[i]); messageReset(messages[i]); } } addToText = TRUE; if(messageGetBody(aMessage) == NULL) /* * No plain text version / cli_dbgmsg("No plain text alternative\n"); break; case TEXT: dtype = messageGetDispositionType(aMessage); cli_dbgmsg("Mixed message text part disposition \"%s\"\n", dtype); if(strcasecmp(dtype, "attachment") == 0) break; if((dtype == '\0') \|\| (strcasecmp(dtype, "inline") == 0)) { const char cptr; if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage); mainMessage = NULL; cptr = messageGetMimeSubtype(aMessage); cli_dbgmsg("Mime subtype \"%s\"\n", cptr); if((tableFind(mctx->subtypeTable, cptr) == PLAIN) && (messageGetEncoding(aMessage) == NOENCODING)) { char filename; /* * Strictly speaking * a text/plain part is * not an attachment. We * pretend it is so that * we can decode and * scan it / filename = (char )messageFindArgument(aMessage, "filename"); if(filename == NULL) filename = (char *)messageFindArgument(aMessage, "name"); if(filename == NULL) { cli_dbgmsg("Adding part to main message\n"); addToText = TRUE; } else { cli_dbgmsg("Treating %s as attachment\n", filename); free(filename); } } else {
56aea026	const int is_html = (tableFind(mctx->subtypeTable, cptr) == HTML);
c3a79a7a	if((mctx->ctx->options&CL_SCAN_MAILURL) && is_html)
56aea026	checkURLs(aMessage, mctx, rc, 1);
d77ac7de	else if(doPhishingScan)
56aea026	checkURLs(aMessage, mctx, rc, is_html);
c1fce7f7	messageAddArgument(aMessage, "filename=mixedtextportion"); } break; } cli_dbgmsg("Text type %s is not supported\n", dtype); return mainMessage; case MESSAGE: /* Content-Type: message/rfc822 / cli_dbgmsg("Found message inside multipart (encoding type %d)\n", messageGetEncoding(aMessage)); #ifndef SCAN_UNENCODED_BOUNCES switch(messageGetEncoding(aMessage)) { case NOENCODING: case EIGHTBIT: case BINARY: if(encodingLine(aMessage) == NULL) { / * This means that the message * has no attachments * * The test for * messageGetEncoding is needed * since encodingLine won't have * been set if the message * itself has been encoded / cli_dbgmsg("Unencoded multipart/message will not be scanned\n"); assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL; return mainMessage; } / FALLTHROUGH / default: cli_dbgmsg("Encoded multipart/message will be scanned\n"); } #endif #if 0 messageAddStrAtTop(aMessage, "Received: by clamd (message/rfc822)"); #endif #ifdef SAVE_TO_DISC / * Save this embedded message * to a temporary file */
3f46285b	saveTextPart(aMessage, mctx->dir, 1);
c1fce7f7	assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL; #else /*
b912eaf2	* Scan in memory, faster but is open to DoS attacks * when many nested levels are involved.
c1fce7f7	*/
0072fa21	body = parseEmailHeaders(aMessage, mctx->rfc821Table);
c1fce7f7	/* * We've fininished with the * original copy of the message, * so throw that away and * deal with the encapsulated * message as a message. * This can save a lot of memory */ assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL; if(body) {
0072fa21	messageSetCTX(body, mctx->ctx); *rc = parseEmailBody(body, NULL, mctx, recursion_level + 1);
69c62847	if((*rc == OK) && messageContainsVirus(body))
5684fccf	*rc = VIRUS;
c1fce7f7	messageDestroy(body); } #endif return mainMessage; case MULTIPART: /* * It's a multi part within a multi part * Run the message parser on this bit, it won't * be an attachment / cli_dbgmsg("Found multipart inside multipart\n"); if(aMessage) { / * The headers were parsed when reading in the * whole multipart section */
242ffd7a	rc = parseEmailBody(aMessage, tptr, mctx, recursion_level + 1);
69c62847	cli_dbgmsg("Finished recursion, rc = %d\n", *rc);
c1fce7f7	assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL; } else {
242ffd7a	*rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1);
c1fce7f7	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage); mainMessage = NULL; } return mainMessage; default: cli_warnmsg("Only text and application attachments are supported, type = %d\n", messageGetMimeType(aMessage)); return mainMessage; } if(addToText) { cli_dbgmsg("Adding to non mime-part\n"); tptr = textAdd(tptr, messageGetBody(aMessage)); } else {
2673dc74	fileblob *fb = messageToFileblob(aMessage, mctx->dir, 1);
c1fce7f7	if(fb) { if(fileblobContainsVirus(fb))
5684fccf	*rc = VIRUS;
c1fce7f7	fileblobDestroy(fb); } } if(messageContainsVirus(aMessage))
5684fccf	*rc = VIRUS;
c1fce7f7	messageDestroy(aMessage); messages[i] = NULL; return mainMessage; }
4f4a8f4a	/* * Returns the number of quote characters in the given string / static int count_quotes(const char buf) { int quotes = 0; while(buf) if(buf++ == '\"') quotes++; return quotes; }
842c7d49	/* * Will the next line be a folded header? See RFC2822 section 2.2.3 / static bool next_is_folded_header(const text t) { const text next = t->t_next; const char data, ptr; if(next == NULL) return FALSE; if(next->t_line == NULL) return FALSE; data = lineGetData(next->t_line); /
b912eaf2	* Section B.2 of RFC822 says TAB or SPACE means a continuation of the
842c7d49	* previous entry. / if(isblank(data[0])) return TRUE; if(strchr(data, '=') == NULL) / * Avoid false positives with * Content-Type: text/html; * Content-Transfer-Encoding: quoted-printable */ return FALSE;
bc6bbeff
842c7d49	/* * Some are broken and don't fold headers lines * correctly as per section 2.2.3 of RFC2822. * Generally they miss the white space at * the start of the fold line: * Content-Type: multipart/related; * type="multipart/alternative"; * boundary="----=_NextPart_000_006A_01C6AC47.348CB550" * should read: * Content-Type: multipart/related; * type="multipart/alternative"; * boundary="----=_NextPart_000_006A_01C6AC47.348CB550" * Since we're a virus checker not an RFC * verifier we need to handle these / data = lineGetData(t->t_line); ptr = strchr(data, '\0'); while(--ptr > data) switch(ptr) { case ';': return TRUE; case '\n': case ' ': case '\r': case '\t': continue; /* white space at end of line */ default: return FALSE; } return FALSE; }
0cf4cea7	/* * This routine is called on the first line of the body of * an email to handle broken messages that have newlines * in the middle of its headers / static bool newline_in_header(const char line) { cli_dbgmsg("newline_in_header, check \"%s\"\n", line); if(strncmp(line, "Message-Id: ", 12) == 0) return TRUE; if(strncmp(line, "Date: ", 6) == 0) return TRUE; return FALSE; }