GitList

libclamav/mbox.c

b151ef55	/* * Copyright (C) 2002 Nigel Horne <njh@bandsman.co.uk> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
7cef72ea	* * Change History: * $Log: mbox.c,v $
6613d595	* Revision 1.77 2004/06/18 10:07:12 nigelhorne * Allow any number of alternatives in multipart messages *
8a88fb93	* Revision 1.76 2004/06/16 08:07:39 nigelhorne * Added thread safety *
93002b48	* Revision 1.75 2004/06/14 09:07:10 nigelhorne * Handle spam using broken e-mail generators for multipart/alternative *
7b8fb055	* Revision 1.74 2004/06/09 18:18:59 nigelhorne * Find uuencoded viruses in multipart/mixed that have no start of message boundaries *
4b0a2de6	* Revision 1.73 2004/05/14 08:15:55 nigelhorne * Use mkstemp on cygwin *
a750c93c	* Revision 1.72 2004/05/12 11:20:37 nigelhorne * More bounce message false positives handled *
92915cee	* Revision 1.71 2004/05/10 11:35:11 nigelhorne * No need to update mbox.c for cli_filetype problem
0b244177	*
2e0f78a6	* Revision 1.69 2004/05/06 11:26:49 nigelhorne * Force attachments marked as RFC822 messages to be scanned *
3db105a2	* Revision 1.68 2004/04/29 08:59:24 nigelhorne * Tidied up SetDispositionType *
7584963d	* Revision 1.67 2004/04/23 10:47:41 nigelhorne * If an inline text portion has a filename treat is as an attachment *
bf497d0a	* Revision 1.66 2004/04/14 08:32:21 nigelhorne * When debugging print the email number in mailboxes *
7baeb4a6	* Revision 1.65 2004/04/07 18:18:07 nigelhorne * Some occurances of W97M.Lexar were let through *
4465fb04	* Revision 1.64 2004/04/05 09:32:20 nigelhorne * Added SCAN_TO_DISC define *
4c927f11	* Revision 1.63 2004/04/01 15:32:34 nigelhorne * Graceful exit if messageAddLine fails in strdup *
6638be41	* Revision 1.62 2004/03/31 17:00:20 nigelhorne * Code tidy up free memory earlier *
74b5c349	* Revision 1.61 2004/03/30 22:45:13 nigelhorne * Better handling of multipart/multipart messages *
ffd59a3e	* Revision 1.60 2004/03/29 09:22:03 nigelhorne * Tidy up code and reduce shuffling of data *
c95ae98b	* Revision 1.59 2004/03/26 11:08:36 nigelhorne * Use cli_writen *
02c9dc2a	* Revision 1.58 2004/03/25 22:40:46 nigelhorne * Removed even more calls to realloc and some duplicated code *
627465e7	* Revision 1.57 2004/03/21 17:19:49 nigelhorne * Handle bounce messages with no headers *
f5a4d7e8	* Revision 1.56 2004/03/21 09:41:26 nigelhorne * Faster scanning for non MIME messages *
3e556ea8	* Revision 1.55 2004/03/20 17:39:23 nigelhorne * First attempt to handle all bounces *
a980b067	* Revision 1.54 2004/03/19 15:40:45 nigelhorne * Handle empty content-disposition types *
af852ae0	* Revision 1.53 2004/03/19 08:08:02 nigelhorne * If a message part of a multipart contains an RFC822 message that has no encoding don't scan it *
b759d5eb	* Revision 1.52 2004/03/18 21:51:41 nigelhorne * If a message only contains a single RFC822 message that has no encoding don't save for scanning *
bad123c6	* Revision 1.51 2004/03/17 19:48:12 nigelhorne * Improved embedded RFC822 message handling *
09ccd6e0	* Revision 1.50 2004/03/10 22:05:39 nigelhorne * Fix seg fault when a message in a multimessage mailbox fails to scan *
b0d8b0db	* Revision 1.49 2004/03/04 13:01:58 nigelhorne * Ensure all bounces are rescanned by cl_mbox *
6e07998e	* Revision 1.48 2004/02/27 12:16:26 nigelhorne * Catch lines just containing ':' *
39ff42ee	* Revision 1.47 2004/02/23 10:13:08 nigelhorne * Handle spaces before : in headers *
1d53a315	* Revision 1.46 2004/02/18 13:29:19 nigelhorne * Stop buffer overflows for files with very long suffixes *
26564cf5	* Revision 1.45 2004/02/18 10:07:40 nigelhorne * Find some Yaha *
c7256385	* Revision 1.44 2004/02/15 08:45:54 nigelhorne * Avoid scanning the same file twice *
0704dad8	* Revision 1.43 2004/02/14 19:04:05 nigelhorne * Handle spaces in boundaries *
0dbec6b9	* Revision 1.42 2004/02/14 17:23:45 nigelhorne * Had deleted O_BINARY by mistake *
d32e668f	* Revision 1.41 2004/02/12 18:43:58 nigelhorne * Use mkstemp on Solaris *
a66ca28a	* Revision 1.40 2004/02/11 08:15:59 nigelhorne * Use O_BINARY for cygwin *
8b242bb9	* Revision 1.39 2004/02/06 13:46:08 kojm * Support for clamav-config.h *
b9ec1705	* Revision 1.38 2004/02/04 13:29:48 nigelhorne * Handle partial writes - and print when write fails *
0bf1353d	* Revision 1.37 2004/02/03 22:54:59 nigelhorne * Catch another example of Worm.Dumaru.Y *
a64bf87e	* Revision 1.36 2004/02/02 09:52:57 nigelhorne * Some instances of Worm.Dumaru.Y got through the net *
5a01973c	* Revision 1.35 2004/01/28 10:15:24 nigelhorne * Added support to scan some bounce messages *
5c7cf3f1	* Revision 1.34 2004/01/24 17:43:37 nigelhorne * Removed (incorrect) warning about uninitialised variable *
2250ea69	* Revision 1.33 2004/01/23 10:38:22 nigelhorne * Fixed memory leak in handling some multipart messages *
4e7ca2b1	* Revision 1.32 2004/01/23 08:51:19 nigelhorne * Add detection of uuencoded viruses in single part multipart/mixed files *
9a35912c	* Revision 1.31 2004/01/22 22:13:06 nigelhorne * Prevent infinite recursion on broken uuencoded files *
8c0250d5	* Revision 1.30 2004/01/13 10:12:05 nigelhorne * Remove duplicate code when handling multipart messages *
0ada8f3e	* Revision 1.29 2004/01/09 18:27:11 nigelhorne * ParseMimeHeader could corrupt arg *
7e572372	* Revision 1.28 2004/01/09 15:07:42 nigelhorne * Re-engineered update 1.11 lost in recent changes *
68badbc1	* Revision 1.27 2004/01/09 14:45:59 nigelhorne * Removed duplicated code in multipart handler *
852e3ce4	* Revision 1.26 2004/01/09 10:20:54 nigelhorne * Locate uuencoded viruses hidden in text poritions of multipart/mixed mime messages *
441992ed	* Revision 1.25 2004/01/06 14:41:18 nigelhorne * Handle headers which do not not have a space after the ':' *
f54a8635	* Revision 1.24 2003/12/20 13:55:36 nigelhorne * Ensure multipart just save the bodies of attachments *
68be129f	* Revision 1.23 2003/12/14 18:07:01 nigelhorne * Some viruses in embedded messages were not being found *
062ba8b0	* Revision 1.22 2003/12/13 16:42:23 nigelhorne * call new cli_chomp *
7fca6080	* Revision 1.21 2003/12/11 14:35:48 nigelhorne * Better handling of encapsulated messages *
f5e9abc8	* Revision 1.20 2003/12/06 04:03:26 nigelhorne * Handle hand crafted emails that incorrectly set multipart headers *
2227f20e	* Revision 1.19 2003/11/21 07:26:31 nigelhorne * Scan multipart alternatives that have no boundaries, finds some uuencoded happy99 *
181c7548	* Revision 1.18 2003/11/17 08:13:21 nigelhorne * Handle spaces at the end of lines of MIME headers *
04421a14	* Revision 1.17 2003/11/06 05:06:42 nigelhorne * Some applications weren't being scanned *
295e425f	* Revision 1.16 2003/11/04 08:24:00 nigelhorne * Handle multipart messages that have no text portion *
07cbf822	* Revision 1.15 2003/10/12 20:13:49 nigelhorne * Use NO_STRTOK_R consistent with message.c *
fdc8a467	* Revision 1.14 2003/10/12 12:37:11 nigelhorne * Appledouble encoded EICAR now found *
4674dc9a	* Revision 1.13 2003/10/01 09:27:42 nigelhorne * Handle content-type header going over to a new line *
6ecba059	* Revision 1.12 2003/09/29 17:10:19 nigelhorne * Moved stub from heap to stack since its maximum size is known *
47ab99fa	* Revision 1.11 2003/09/29 12:58:32 nigelhorne * Handle Content-Type: /; name="eicar.com" *
7cef72ea	* Revision 1.10 2003/09/28 10:06:34 nigelhorne * Compilable under SCO; removed duplicate code with message.c *
b151ef55	*/
6613d595	static char const rcsid[] = "$Id: mbox.c,v 1.77 2004/06/18 10:07:12 nigelhorne Exp $";
8b242bb9	#if HAVE_CONFIG_H #include "clamav-config.h" #endif
b151ef55	#ifndef CL_DEBUG
0bcad2b1	/#define NDEBUG / map CLAMAV debug onto standard */
b151ef55	#endif #ifdef CL_THREAD_SAFE
f5e9abc8	#ifndef _REENTRANT
b151ef55	#define _REENTRANT /* for Solaris 2.8 */ #endif
f5e9abc8	#endif
b151ef55	#include <stdio.h> #include <stdlib.h> #include <errno.h> #include <assert.h> #include <string.h> #include <strings.h> #include <ctype.h> #include <time.h> #include <unistd.h> #include <fcntl.h> #include <sys/stat.h> #include <sys/types.h>
0bcad2b1	#include <sys/param.h>
b151ef55	#include <clamav.h>
8a88fb93	#ifdef CL_THREAD_SAFE #include <pthread.h> #endif
b151ef55	#include "table.h" #include "mbox.h" #include "blob.h" #include "text.h" #include "message.h" #include "others.h" #include "defaults.h"
7fca6080	#include "str.h"
b151ef55
07cbf822	#if defined(NO_STRTOK_R) \|\| !defined(CL_THREAD_SAFE)
b151ef55	#undef strtok_r #undef __strtok_r #define strtok_r(a,b,c) strtok(a,b) #endif /* required for AIX and Tru64 */ #ifdef TRUE #undef TRUE #endif #ifdef FALSE #undef FALSE #endif typedef enum { FALSE = 0, TRUE = 1 } bool;
68be129f	static message parseEmailHeaders(const message m, const table_t *rfc821Table);
8c0250d5	static int parseEmailHeader(message m, const char line, const table_t *rfc821Table);
2250ea69	static int parseEmailBody(message messageIn, blob blobsIn, int nBlobs, text textIn, const char dir, table_t rfc821Table, table_t *subtypeTable);
b151ef55	static int boundaryStart(const char line, const char boundary); static int endOfMessage(const char line, const char boundary); static int initialiseTables(table_t rfc821Table, table_t subtypeTable); static int getTextPart(message const messages[], size_t size); static size_t strip(char buf, int len); static bool continuationMarker(const char line); static int parseMimeHeader(message m, const char cmd, const table_t rfc821Table, const char *arg);
5a01973c	static void saveTextPart(message m, const char dir);
0bcad2b1	static bool saveFile(const blob b, const char dir);
b151ef55
852e3ce4	/* Maximum number of attachments that we accept */
b151ef55	#define MAX_ATTACHMENTS 10 /* Maximum line length according to RFC821 / #define LINE_LENGTH 1000 / Hashcodes for our hash tables / #define CONTENT_TYPE 1 #define CONTENT_TRANSFER_ENCODING 2 #define CONTENT_DISPOSITION 3 / Mime sub types / #define PLAIN 1 #define ENRICHED 2 #define HTML 3 #define RICHTEXT 4 #define MIXED 5 #define ALTERNATIVE 6 #define DIGEST 7 #define SIGNED 8 #define PARALLEL 9 #define RELATED 10 / RFC2387 / #define REPORT 11 / RFC1892 */
fdc8a467	#define APPLEDOUBLE 12 /* Handling of this in only noddy for now */
b151ef55	static const struct tableinit { const char *key; int value; } rfc821headers[] = {
68badbc1	/* TODO: make these regular expressions */
b759d5eb	{ "Content-Type", CONTENT_TYPE },
39ff42ee	{ "Content-Transfer-Encoding", CONTENT_TRANSFER_ENCODING }, { "Content-Disposition", CONTENT_DISPOSITION },
b151ef55	{ NULL, 0 } }, mimeSubtypes[] = { /* subtypes of Text / { "plain", PLAIN }, { "enriched", ENRICHED }, { "html", HTML }, { "richtext", RICHTEXT }, / subtypes of Multipart */ { "mixed", MIXED }, { "alternative", ALTERNATIVE }, { "digest", DIGEST }, { "signed", SIGNED }, { "parallel", PARALLEL }, { "related", RELATED }, { "report", REPORT },
fdc8a467	{ "appledouble", APPLEDOUBLE },
b151ef55	{ NULL, 0 } };
8a88fb93	#ifdef CL_THREAD_SAFE static pthread_mutex_t tables_mutex = PTHREAD_MUTEX_INITIALIZER; #endif
7b8fb055	static table_t rfc821Table, subtypeTable;
b151ef55
7cef72ea	/* Maximum filenames under various systems / #ifndef NAME_MAX / e.g. Linux / #ifdef MAXNAMELEN / e.g. Solaris / #define NAME_MAX MAXNAMELEN #else #ifdef FILENAME_MAX / e.g. SCO */ #define NAME_MAX FILENAME_MAX #endif #endif #endif
0dbec6b9	#ifndef O_BINARY #define O_BINARY 0 #endif
4465fb04	#define SAVE_TO_DISC /* multipart/message are saved in a temporary file */
b151ef55	/* * TODO: when signal handling is added, need to remove temp files when a * signal is received * TODO: add option to scan in memory not via temp files, perhaps with a
74b5c349	* named pipe or memory mapped file, though this won't work on big e-mails * containing many levels of encapsulated messages - it'd just take too much * RAM
15c8cace	* TODO: if debug is enabled, catch a segfault and dump the current e-mail * in it's entirety, then call abort()
c6259ac5	* TODO: parse .msg format files
fdc8a467	* TODO: fully handle AppleDouble format, see * http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
f54a8635	* TODO: ensure parseEmailHeaders is always called before parseEmailBody * TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody
b151ef55	/ int cl_mbox(const char dir, int desc) {
c6259ac5	int retcode, i;
f54a8635	message m, body;
b151ef55	FILE *fd;
c6259ac5	char buffer[LINE_LENGTH];
b151ef55	cli_dbgmsg("in mbox()\n");
c6259ac5	i = dup(desc); if((fd = fdopen(i, "rb")) == NULL) { cli_errmsg("Can't open descriptor %d\n", desc); close(i);
b151ef55	return -1;
c6259ac5	} if(fgets(buffer, sizeof(buffer), fd) == NULL) { /* empty message */ fclose(fd); return 0; }
b151ef55	m = messageCreate();
7b8fb055	if(m == NULL) {
c6259ac5	fclose(fd);
7b8fb055	return 0; }
8a88fb93	#ifdef CL_THREAD_SAFE pthread_mutex_lock(&tables_mutex); #endif
7b8fb055	if(rfc821Table == NULL) { assert(subtypeTable == NULL); if(initialiseTables(&rfc821Table, &subtypeTable) < 0) {
8a88fb93	rfc821Table = NULL; subtypeTable = NULL; #ifdef CL_THREAD_SAFE pthread_mutex_unlock(&tables_mutex); #endif
7b8fb055	messageDestroy(m); fclose(fd); return -1; }
b151ef55	}
8a88fb93	#ifdef CL_THREAD_SAFE pthread_mutex_unlock(&tables_mutex); #endif
b151ef55
f54a8635	/* * is it a UNIX style mbox with more than one * mail message, or just a single mail message? */ if(strncmp(buffer, "From ", 5) == 0) {
b151ef55	/*
c6259ac5	* Have been asked to check a UNIX style mbox file, which * may contain more than one e-mail message to decode
b151ef55	*/
f54a8635	bool lastLineWasEmpty = FALSE;
bf497d0a	int messagenumber = 1;
b151ef55
c6259ac5	do { /cli_dbgmsg("read: %s", buffer);/
b151ef55
f54a8635	cli_chomp(buffer); if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) {
bf497d0a	cli_dbgmsg("Deal with email number %d\n", messagenumber++);
b151ef55	/*
f54a8635	* End of a message in the mail box
b151ef55	*/
f54a8635	body = parseEmailHeaders(m, rfc821Table); messageDestroy(m); if(messageGetBody(body))
09ccd6e0	if(!parseEmailBody(body, NULL, 0, NULL, dir, rfc821Table, subtypeTable)) { messageReset(body); m = body; continue; }
b151ef55	/*
f54a8635	* Starting a new message, throw away all the * information about the old one
b151ef55	*/
f54a8635	m = body; messageReset(body);
b151ef55
c6259ac5	cli_dbgmsg("Finished processing message\n");
f54a8635	} else
a66ca28a	lastLineWasEmpty = (bool)(buffer[0] == '\0');
4c927f11	if(messageAddLine(m, buffer, 1) < 0) break;
c6259ac5	} while(fgets(buffer, sizeof(buffer), fd) != NULL);
bf497d0a	cli_dbgmsg("Deal with email number %d\n", messagenumber);
f54a8635	} else
7fca6080	/* * It's a single message, parse the headers then the body */
b759d5eb	do /* * No need to preprocess such as cli_chomp() since * that'll be done by parseEmailHeaders()
4465fb04	* * TODO: this needlessly creates a message object, * it'd be better if parseEmailHeaders could also * read in from a file. I do not want to lump the * parseEmailHeaders code here, that'd be a duplication * of code I want to avoid
b759d5eb	*/
4c927f11	if(messageAddLine(m, buffer, 1) < 0) break;
b759d5eb	while(fgets(buffer, sizeof(buffer), fd) != NULL);
7fca6080
b151ef55	fclose(fd);
c6259ac5	retcode = 0;
f54a8635	body = parseEmailHeaders(m, rfc821Table); messageDestroy(m);
b151ef55	/* * Write out the last entry in the mailbox */
f54a8635	if(messageGetBody(body)) if(!parseEmailBody(body, NULL, 0, NULL, dir, rfc821Table, subtypeTable))
c6259ac5	retcode = -1;
b151ef55	/* * Tidy up and quit */
f54a8635	messageDestroy(body);
b151ef55	cli_dbgmsg("cli_mbox returning %d\n", retcode); return retcode; } /*
7fca6080	* The given message contains a raw e-mail. * * This function parses the headers of m and sets the message's arguments
68be129f	* * Returns the message's body with the correct arguments set
7fca6080	*/
68be129f	static message * parseEmailHeaders(const message m, const table_t rfc821Table)
7fca6080	{
4465fb04	bool inContinuationHeader = FALSE; /* state machine: ugh */
68be129f	bool inHeader = TRUE;
ffd59a3e	const text *t;
f54a8635	message *ret; if(m == NULL) return NULL; ret = messageCreate();
7fca6080
ffd59a3e	for(t = messageGetBody(m); t; t = t->t_next) {
7fca6080	char buffer = strdup(t->t_text); #ifdef CL_THREAD_SAFE char strptr; #endif
ffd59a3e	if(buffer == NULL) break;
062ba8b0	cli_chomp(buffer);
7fca6080	/* * Section B.2 of RFC822 says TAB or SPACE means
062ba8b0	* a continuation of the previous entry.
7fca6080	*/
68be129f	if(inHeader && ((buffer[0] == '\t') \|\| (buffer[0] == ' ')))
062ba8b0	inContinuationHeader = TRUE;
7fca6080
062ba8b0	if(inContinuationHeader) { const char *ptr;
7fca6080	if(!continuationMarker(buffer))
062ba8b0	inContinuationHeader = FALSE; /* no more args */
7fca6080	/* * Add all the arguments on the line */ for(ptr = strtok_r(buffer, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr))
68be129f	messageAddArgument(ret, ptr);
ffd59a3e	free(buffer);
68be129f	} else if(inHeader) {
7fca6080	cli_dbgmsg("Deal with header %s\n", buffer); /* * A blank line signifies the end of the header and * the start of the text */
ffd59a3e	if(strlen(buffer) == 0) {
7fca6080	cli_dbgmsg("End of header information\n");
09ccd6e0	inContinuationHeader = inHeader = FALSE;
8c0250d5	} else if(parseEmailHeader(ret, buffer, rfc821Table) == CONTENT_TYPE) inContinuationHeader = continuationMarker(buffer);
ffd59a3e	free(buffer);
09ccd6e0	} else { /cli_dbgmsg("Add line to body '%s'\n", buffer);/
ffd59a3e	messageAddLine(ret, buffer, 0);
09ccd6e0	}
ffd59a3e	}
68be129f
4465fb04	messageClean(ret);
09ccd6e0	cli_dbgmsg("parseEmailHeaders: return\n");
68be129f	return ret;
7fca6080	} /*
8c0250d5	* Handle a header line of an email message / static int parseEmailHeader(message m, const char line, const table_t rfc821Table) {
74b5c349	char copy, cmd;
8c0250d5	int ret = -1; #ifdef CL_THREAD_SAFE char *strptr; #endif
0704dad8	cli_dbgmsg("parseEmailHeader '%s'\n", line);
74b5c349	if(strchr(line, ':') == NULL) return -1; copy = strdup(line);
39ff42ee	cmd = strtok_r(copy, ":", &strptr);
8c0250d5
6e07998e	if(cmd && *cmd) {
8c0250d5	char arg = strtok_r(NULL, "", &strptr); if(arg) / * Found a header such as * Content-Type: multipart/mixed; * set arg to be * "multipart/mixed" and cmd to
39ff42ee	* be "Content-Type"
8c0250d5	/ ret = parseMimeHeader(m, cmd, rfc821Table, arg); } free(copy); return ret; } /
b151ef55	* This is a recursive routine. *
7fca6080	* This function parses the body of mainMessage and saves its attachments in dir *
68be129f	* mainMessage is the buffer to be parsed, it contains an e-mail's body, without * any headers. First
062ba8b0	* time of calling it'll be
b151ef55	* the whole message. Later it'll be parts of a multipart message * textIn is the plain text message being built up so far * blobsIn contains the array of attachments found so far *
0bcad2b1	* Returns:
b151ef55	* 0 for fail
852e3ce4	* 1 for success, attachments saved * 2 for success, attachments not saved
b151ef55	/ static int / success or fail */
2250ea69	parseEmailBody(message messageIn, blob blobsIn, int nBlobs, text textIn, const char dir, table_t rfc821Table, table_t *subtypeTable)
b151ef55	{
6613d595	message *messages; / parts of a multipart message */
5c7cf3f1	int inhead, inMimeHead, i, rc = 1, htmltextPart, multiparts = 0;
b151ef55	text aText; blob blobList[MAX_ATTACHMENTS], *blobs; const char cptr;
2250ea69	message *mainMessage;
b151ef55
7fca6080	cli_dbgmsg("in parseEmailBody(nBlobs = %d)\n", nBlobs);
b151ef55	/* Pre-assertions */ if(nBlobs >= MAX_ATTACHMENTS) { cli_warnmsg("Not all attachments will be scanned\n"); return 2; } aText = textIn; blobs = blobsIn;
6613d595	messages = NULL;
2250ea69	mainMessage = messageIn;
b151ef55	/* Anything left to be parsed? */
0bcad2b1	if(mainMessage && (messageGetBody(mainMessage) != NULL)) {
c7256385	int numberOfAttachments = 0, numberOfNewAttachments;
b151ef55	mime_type mimeType; const char mimeSubtype; const text t_line;
f5e9abc8	/bool isAlternative;/
b151ef55	const char boundary; message aMessage;
c6259ac5	cli_dbgmsg("Parsing mail file\n");
b151ef55	mimeType = messageGetMimeType(mainMessage); mimeSubtype = messageGetMimeSubtype(mainMessage); if((mimeType == TEXT) && (tableFind(subtypeTable, mimeSubtype) == PLAIN)) { /* * This is effectively no encoding, notice that we * don't check that charset is us-ascii */ cli_dbgmsg("assume no encoding\n"); mimeType = NOMIME; }
c6259ac5	cli_dbgmsg("mimeType = %d\n", mimeType);
b151ef55	switch(mimeType) { case NOMIME: aText = textAddMessage(aText, mainMessage); break; case TEXT: if(tableFind(subtypeTable, mimeSubtype) == PLAIN) aText = textCopy(messageGetBody(mainMessage)); break; case MULTIPART: boundary = messageFindArgument(mainMessage, "boundary"); if(boundary == NULL) { cli_warnmsg("Multipart MIME message contains no boundaries\n");
2227f20e	/* Broken e-mail message / mimeType = NOMIME; / * The break means that we will still * check if the file contains a uuencoded file */ break;
b151ef55	}
93002b48	if(mimeSubtype[0] == '\0') { cli_warnmsg("Multipart has no subtype assuming alternative\n"); mimeSubtype = "alternative"; messageSetMimeSubtype(mainMessage, "alternative"); }
b151ef55	/* * Get to the start of the first message */
0704dad8	t_line = messageGetBody(mainMessage); if(t_line == NULL) { cli_warnmsg("Multipart MIME message has no body\n"); free((char *)boundary); mimeType = NOMIME; break; } do
bf8ea488	if(boundaryStart(t_line->t_text, boundary))
b151ef55	break;
0704dad8	while((t_line = t_line->t_next) != NULL);
b151ef55	if(t_line == NULL) {
0704dad8	cli_warnmsg("Multipart MIME message contains no boundary lines\n");
bf8ea488	/* * Free added by Thomas Lamy * <Thomas.Lamy@in-online.net> / free((char )boundary);
2227f20e	mimeType = NOMIME; /* * The break means that we will still * check if the file contains a uuencoded file */ break;
b151ef55	} /* * Build up a table of all of the parts of this * multipart message. Remember, each part may itself * be a multipart message. */ inhead = 1; inMimeHead = 0;
68be129f	/* * This looks like parseEmailHeaders() - maybe there's * some duplication of code to be cleaned up */
6613d595	for(multiparts = 0; t_line; multiparts++) {
26564cf5	int lines = 0;
6613d595	messages = cli_realloc(messages, ((multiparts + 1) * sizeof(message *)));
b151ef55	aMessage = messages[multiparts] = messageCreate(); cli_dbgmsg("Now read in part %d\n", multiparts);
0bf1353d	/* * Ignore blank lines. There shouldn't be ANY * but some viruses insert them */ while((t_line = t_line->t_next) != NULL) { cli_chomp(t_line->t_text); if(strlen(t_line->t_text) != 0) break; } if(t_line == NULL) { cli_dbgmsg("Empty part\n"); continue; } do {
b151ef55	const char *line = t_line->t_text;
68be129f	/cli_dbgmsg("inMimeHead %d inhead %d boundary %s line '%s' next '%s'\n", inMimeHead, inhead, boundary, line, t_line->t_next ? t_line->t_next->t_text : "(null)");/
b151ef55	if(inMimeHead) {
7baeb4a6	/* * Handle continuation lines * because the previous line * ended with a ; */
68be129f	cli_dbgmsg("About to add mime Argument '%s'\n", line);
7baeb4a6	/* * Handle the case when it * isn't really a continuation * line: * Content-Type: application/octet-stream; * Content-Transfer-Encoding: base64 */ parseEmailHeader(aMessage, line, rfc821Table);
b151ef55	while(isspace((int)line)) line++; if(line == '\0') { inhead = inMimeHead = 0; continue; } /* * This may cause a trailing ';' * to be added if this test * fails - TODO: verify this */ inMimeHead = continuationMarker(line); messageAddArgument(aMessage, line); } else if(inhead) { if(strlen(line) == 0) { inhead = 0; continue; }
a64bf87e	if(isspace((int)line)) { / * The first line is * continuation line. * This is tricky * to handle, but * all we can do is our * best / cli_dbgmsg("Part %d starts with a continuation line\n", multiparts); messageAddArgument(aMessage, line); / * Give it a default * MIME type since * that may be the * missing line * * Choose application to * force a save */ if(messageGetMimeType(aMessage) == NOMIME) messageSetMimeType(aMessage, "application"); continue; }
b151ef55	/* * Some clients are broken and * put white space after the ; */ inMimeHead = continuationMarker(line);
4674dc9a	if(!inMimeHead) if(t_line->t_next && ((t_line->t_next->t_text[0] == '\t') \|\| (t_line->t_next->t_text[0] == ' '))) inMimeHead = TRUE;
68badbc1
8c0250d5	parseEmailHeader(aMessage, line, rfc821Table);
b151ef55	} else if(boundaryStart(line, boundary)) { inhead = 1; break; } else if(endOfMessage(line, boundary)) { /* * Some viruses put information * after the end of message, * which presumably some broken * mail clients find, so we * can't assume that this * is the end of the message / / t_line = NULL;*/ break;
26564cf5	} else {
ffd59a3e	messageAddLine(aMessage, line, 1);
26564cf5	lines++; }
0bf1353d	} while((t_line = t_line->t_next) != NULL);
b151ef55	messageClean(aMessage);
26564cf5	cli_dbgmsg("Part %d has %d lines\n", multiparts, lines);
b151ef55	} free((char *)boundary);
6638be41	/* * We've finished message we're parsing */ if(mainMessage && (mainMessage != messageIn)) { messageDestroy(mainMessage); mainMessage = NULL;
2250ea69	}
b151ef55
6613d595	if(multiparts == 0) { if(messages) free(messages);
6638be41	return 2; /* Nothing to do */
6613d595	}
6638be41
b151ef55	cli_dbgmsg("The message has %d parts\n", multiparts); cli_dbgmsg("Find out the multipart type(%s)\n", mimeSubtype); switch(tableFind(subtypeTable, mimeSubtype)) { case RELATED:
68be129f	cli_dbgmsg("Multipart related handler\n");
b151ef55	/*
295e425f	* Have a look to see if there's HTML code * which will need scanning
b151ef55	*/ aMessage = NULL; assert(multiparts > 0);
0bcad2b1	htmltextPart = getTextPart(messages, multiparts);
b151ef55
0bcad2b1	if(htmltextPart >= 0) aText = textAddMessage(aText, messages[htmltextPart]);
b151ef55	else /*
295e425f	* There isn't an HTML bit. If there's a * multipart bit, it'll may be in there * somewhere
b151ef55	*/ for(i = 0; i < multiparts; i++) if(messageGetMimeType(messages[i]) == MULTIPART) { aMessage = messages[i];
0bcad2b1	htmltextPart = i;
b151ef55	break; }
295e425f	if(htmltextPart == -1) { cli_dbgmsg("No HTML code found to be scanned"); rc = 0; } else
7fca6080	rc = parseEmailBody(aMessage, blobs, nBlobs, aText, dir, rfc821Table, subtypeTable);
b151ef55	blobArrayDestroy(blobs, nBlobs);
c6259ac5	blobs = NULL; nBlobs = 0;
b151ef55	/* * Fixed based on an idea from Stephen White <stephen@earth.li> * The message is confused about the difference * between alternative and related. Badtrans.B * suffers from this problem. * * Fall through in this case: * Content-Type: multipart/related; * type="multipart/alternative" */
f5e9abc8	/* * Changed to always fall through based on * an idea from Michael Dankov <misha@btrc.ru> * that some viruses are completely confused * about the difference between related * and mixed / /cptr = messageFindArgument(mainMessage, "type");
b151ef55	if(cptr == NULL) break; isAlternative = (bool)(strcasecmp(cptr, "multipart/alternative") == 0); free((char *)cptr); if(!isAlternative)
f5e9abc8	break;*/
b151ef55	case ALTERNATIVE: cli_dbgmsg("Multipart alternative handler\n");
0bcad2b1	htmltextPart = getTextPart(messages, multiparts);
b151ef55
0bcad2b1	if(htmltextPart == -1) htmltextPart = 0;
b151ef55
0bcad2b1	aMessage = messages[htmltextPart];
b151ef55	aText = textAddMessage(aText, aMessage);
7fca6080	rc = parseEmailBody(NULL, blobs, nBlobs, aText, dir, rfc821Table, subtypeTable);
7b8fb055
b151ef55	if(rc == 1) { /* * Alternative message has saved its * attachments, ensure we don't do * the same thing */
d3d2fb1e	blobArrayDestroy(blobs, nBlobs); blobs = NULL;
b151ef55	nBlobs = 0; rc = 2; } /* * Fall through - some clients are broken and * say alternative instead of mixed. The Klez * virus is broken that way / case REPORT: / * According to section 1 of RFC1892, the * syntax of multipart/report is the same * as multipart/mixed. There are some required * parameters, but there's no need for us to * verify that they exist */ case MIXED:
fdc8a467	case APPLEDOUBLE: /* not really supported */
b151ef55	/* * Look for attachments * * Not all formats are supported. If an * unsupported format turns out to be * common enough to implement, it is a simple * matter to add it */
2250ea69	if(aText) { if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
b151ef55	mainMessage = NULL;
2250ea69	}
b151ef55	cli_dbgmsg("Mixed message with %d parts\n", multiparts); for(i = 0; i < multiparts; i++) { bool addAttachment = FALSE; bool addToText = FALSE; const char *dtype;
f54a8635	message *body;
b151ef55	aMessage = messages[i]; assert(aMessage != NULL); dtype = messageGetDispositionType(aMessage);
0bcad2b1	cptr = messageGetMimeSubtype(aMessage);
b151ef55	cli_dbgmsg("Mixed message part %d is of type %d\n", i, messageGetMimeType(aMessage)); switch(messageGetMimeType(aMessage)) { case APPLICATION:
c6259ac5	#if 0 /* strict checking... */
b151ef55	if((strcasecmp(dtype, "attachment") == 0) \|\|
0bcad2b1	(strcasecmp(cptr, "x-msdownload") == 0) \|\|
c6259ac5	(strcasecmp(cptr, "octet-stream") == 0) \|\|
0bcad2b1	(strcasecmp(dtype, "octet-stream") == 0))
b151ef55	addAttachment = TRUE; else {
c6259ac5	cli_dbgmsg("Discarded mixed/application not sent as attachment\n");
b151ef55	continue; }
c6259ac5	#endif addAttachment = TRUE;
b151ef55	break; case NOMIME:
7b8fb055	if(mainMessage) { const text t_line = uuencodeBegin(mainMessage); if(t_line) { blob aBlob; cli_dbgmsg("Found uuencoded message in multipart/mixed mainMessage\n"); messageSetEncoding(mainMessage, "x-uuencode"); aBlob = messageToBlob(mainMessage); if(aBlob) { assert(blobGetFilename(aBlob) != NULL); blobClose(aBlob); blobList[numberOfAttachments++] = aBlob; } } if(mainMessage != messageIn) messageDestroy(mainMessage); mainMessage = NULL; }
b151ef55	addToText = TRUE; if(messageGetBody(aMessage) == NULL) /* * No plain text version */
ffd59a3e	messageAddLine(aMessage, "No plain text alternative", 1);
b151ef55	assert(messageGetBody(aMessage) != NULL); break; case TEXT:
852e3ce4	cli_dbgmsg("Mixed message text part disposition \"%s\"\n", dtype);
b151ef55	if(strcasecmp(dtype, "attachment") == 0) addAttachment = TRUE; else if((*dtype == '\0') \|\| (strcasecmp(dtype, "inline") == 0)) {
852e3ce4	const text *t_line = uuencodeBegin(aMessage);
2250ea69	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
b151ef55	mainMessage = NULL;
852e3ce4	if(t_line) { cli_dbgmsg("Found uuencoded message in multipart/mixed text portion\n"); messageSetEncoding(aMessage, "x-uuencode"); addAttachment = TRUE; } else if(strcasecmp(messageGetMimeSubtype(aMessage), "plain") == 0) {
7584963d	char *filename;
852e3ce4	/* * Strictly speaking * a text/html part is * not an attachment. We * pretend it is so that * we can decode and * scan it */
7584963d	filename = (char )messageFindArgument(aMessage, "filename"); if(filename == NULL) filename = (char )messageFindArgument(aMessage, "name"); if(filename == NULL) { cli_dbgmsg("Adding part to main message\n"); addToText = TRUE; } else { cli_dbgmsg("Treating %s as attachment\n", filename); free(filename); addAttachment = TRUE; }
852e3ce4	} else {
b151ef55	messageAddArgument(aMessage, "filename=textportion"); addAttachment = TRUE; } } else {
bad123c6	cli_warnmsg("Text type %s is not supported\n", dtype);
b151ef55	continue; } break; case MESSAGE:
2e0f78a6	/* Content-Type: message/rfc822 */
b151ef55	cli_dbgmsg("Found message inside multipart\n");
ffd59a3e	if(encodingLine(aMessage) == NULL) { assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL;
af852ae0	continue;
ffd59a3e	}
2e0f78a6	messageAddLineAtTop(aMessage, "Received: by clamd");
4465fb04	#ifdef SAVE_TO_DISC /* * Save this embedded message * to a temporary file / saveTextPart(aMessage, dir); assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL; #else / * Scan in memory, faster but * is open to DoS attacks when * many nested levels are * involved. */
f54a8635	body = parseEmailHeaders(aMessage, rfc821Table);
bad123c6	/* * We've fininished with the * original copy of the message, * so throw that away and * deal with the encapsulated * message as a message. * This can save a lot of memory */ assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL;
f54a8635	if(body) { rc = parseEmailBody(body, blobs, nBlobs, NULL, dir, rfc821Table, subtypeTable); messageDestroy(body); }
4465fb04	#endif
b151ef55	continue; case MULTIPART: /* * It's a multi part within a multi part * Run the message parser on this bit, it won't * be an attachment */ cli_dbgmsg("Found multipart inside multipart\n");
f54a8635	if(aMessage) { body = parseEmailHeaders(aMessage, rfc821Table); if(body) {
74b5c349	assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL;
2250ea69	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
f54a8635
6638be41	/t = messageToText(body); rc = parseEmailBody(body, blobs, nBlobs, t, dir, rfc821Table, subtypeTable);/ rc = parseEmailBody(body, blobs, nBlobs, aText, dir, rfc821Table, subtypeTable); /textDestroy(t);/
74b5c349	cli_dbgmsg("Finished recursion\n");
f54a8635	mainMessage = body; } } else { rc = parseEmailBody(NULL, blobs, nBlobs, NULL, dir, rfc821Table, subtypeTable);
2250ea69	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
f54a8635	mainMessage = NULL; }
b151ef55	continue; case AUDIO: case IMAGE:
c7256385	case VIDEO:
b151ef55	/* * TODO: it may be nice to * have an option to throw * away all images and sound * files for ultra-secure sites */ addAttachment = TRUE; break; default:
c7256385	cli_warnmsg("Only text and application attachments are supported, type = %d\n",
b151ef55	messageGetMimeType(aMessage)); continue; } /* * It must be either text or * an attachment. It can't be both */ assert(addToText \|\| addAttachment); assert(!(addToText && addAttachment));
7b8fb055	if(addToText) {
b151ef55	aText = textAdd(aText, messageGetBody(aMessage));
7b8fb055	} else if(addAttachment) {
b151ef55	blob *aBlob = messageToBlob(aMessage); if(aBlob) { assert(blobGetFilename(aBlob) != NULL);
c7256385	blobClose(aBlob); blobList[numberOfAttachments++] = aBlob;
b151ef55	} }
6638be41	assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL;
b151ef55	} if(numberOfAttachments == 0) { /* No usable attachment was found */
7fca6080	rc = parseEmailBody(NULL, NULL, 0, aText, dir, rfc821Table, subtypeTable);
b151ef55	break; }
c7256385
b151ef55	/* * Store any existing attachments at the end of * the list we've just built up */
c7256385	numberOfNewAttachments = 0;
b151ef55	for(i = 0; i < nBlobs; i++) {
c7256385	int j;
0bcad2b1	#ifdef CL_DEBUG
b151ef55	assert(blobs[i]->magic == BLOB);
0bcad2b1	#endif
c7256385	for(j = 0; j < numberOfAttachments; j++) if(blobcmp(blobs[i], blobList[j]) == 0) break; if(j >= numberOfAttachments) { assert(numberOfAttachments < MAX_ATTACHMENTS); cli_dbgmsg("Attaching %s to list of blobs\n", blobGetFilename(blobs[i])); blobClose(blobs[i]); blobList[numberOfAttachments++] = blobs[i]; numberOfNewAttachments++; } else { cli_warnmsg("Don't scan the same file twice as '%s' and '%s'\n", blobGetFilename(blobs[i]), blobGetFilename(blobList[j])); blobDestroy(blobs[i]); }
b151ef55	}
9a35912c	/*
c7256385	* If we've found nothing new save what we have * and quit - that's this part all done. / if(numberOfNewAttachments == 0) { rc = parseEmailBody(NULL, blobList, numberOfAttachments, NULL, dir, rfc821Table, subtypeTable); break; } /
9a35912c	* If there's only one part of the MULTIPART * we already have the body to decode so * there's no more work to do. * * This is mostly for the situation where
bad123c6	* broken messages claim to be multipart * but aren't was causing us to go into
9a35912c	* infinite recursion */ if(multiparts > 1) rc = parseEmailBody(mainMessage, blobList, numberOfAttachments, aText, dir, rfc821Table, subtypeTable);
4e7ca2b1	else if(numberOfAttachments == 1) { (void)saveFile(blobList[0], dir); blobDestroy(blobList[0]); }
b151ef55	break; case DIGEST:
f54a8635	/* * TODO: * According to section 5.1.5 RFC2046, the * default mime type of multipart/digest parts * is message/rfc822 */
b151ef55	case SIGNED: case PARALLEL: /* * If we're here it could be because we have a * multipart/mixed message, consisting of a * message followed by an attachment. That * message itself is a multipart/alternative * message and we need to dig out the plain * text part of that alternative */
0bcad2b1	htmltextPart = getTextPart(messages, multiparts); if(htmltextPart == -1) htmltextPart = 0;
b151ef55
7fca6080	rc = parseEmailBody(messages[htmltextPart], blobs, nBlobs, aText, dir, rfc821Table, subtypeTable);
b151ef55	blobArrayDestroy(blobs, nBlobs);
c6259ac5	blobs = NULL; nBlobs = 0;
b151ef55	break; default: /* * According to section 7.2.6 of RFC1521, * unrecognised multiparts should be treated as * multipart/mixed. I don't do this yet so * that I can see what comes along... */ cli_warnmsg("Unsupported multipart format `%s'\n", mimeSubtype); rc = 0; } for(i = 0; i < multiparts; i++)
bad123c6	if(messages[i]) messageDestroy(messages[i]);
b151ef55	if(blobs && (blobsIn == NULL)) puts("arraydestroy");
2250ea69	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
c6259ac5	if(aText && (textIn == NULL)) textDestroy(aText);
6613d595	if(messages) free(messages);
b151ef55	return rc; case MESSAGE: /* * Check for forbidden encodings */ switch(messageGetEncoding(mainMessage)) { case NOENCODING: case EIGHTBIT: case BINARY: break; default:
c6259ac5	cli_warnmsg("MIME type 'message' cannot be decoded\n");
b151ef55	break; }
c6259ac5	if((strcasecmp(mimeSubtype, "rfc822") == 0) \|\| (strcasecmp(mimeSubtype, "delivery-status") == 0)) {
bad123c6	message *m = parseEmailHeaders(mainMessage, rfc821Table); if(m) { cli_dbgmsg("Decode rfc822");
4465fb04	if(mainMessage && (mainMessage != messageIn)) { messageDestroy(mainMessage); mainMessage = NULL; }
bad123c6	if(messageGetBody(m)) rc = parseEmailBody(m, NULL, 0, NULL, dir, rfc821Table, subtypeTable); messageDestroy(m); }
b151ef55	break;
bf8ea488	} else if(strcasecmp(mimeSubtype, "partial") == 0)
b151ef55	/* TODO */
8a88fb93	cli_warnmsg("Content-type message/partial not yet supported\n");
bf8ea488	else if(strcasecmp(mimeSubtype, "external-body") == 0)
b151ef55	/* * I don't believe that we should be going * around the Internet looking for referenced * files... */ cli_warnmsg("Attempt to send Content-type message/external-body trapped");
bf8ea488	else
b151ef55	cli_warnmsg("Unsupported message format `%s'\n", mimeSubtype);
2250ea69	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
6613d595	if(messages) free(messages);
b151ef55	return 0; case APPLICATION:
0bcad2b1	cptr = messageGetMimeSubtype(mainMessage);
04421a14	/if((strcasecmp(cptr, "octet-stream") == 0) \|\| (strcasecmp(cptr, "x-msdownload") == 0)) {/ {
b151ef55	blob aBlob = messageToBlob(mainMessage); if(aBlob) { cli_dbgmsg("Saving main message as attachment %d\n", nBlobs); assert(blobGetFilename(aBlob) != NULL); / * It's likely that we won't have built * a set of attachments */ if(blobs == NULL) blobs = blobList;
c6259ac5	for(i = 0; i < nBlobs; i++) if(blobs[i] == NULL) break;
c7256385	blobClose(aBlob);
c6259ac5	blobs[i] = aBlob; if(i == nBlobs) { nBlobs++; assert(nBlobs < MAX_ATTACHMENTS); }
b151ef55	}
04421a14	} /else cli_warnmsg("Discarded application not sent as attachment\n");/
b151ef55	break; case AUDIO: case VIDEO: case IMAGE: break; default: cli_warnmsg("Message received with unknown mime encoding"); break; } } cli_dbgmsg("%d attachments found\n", nBlobs);
0bcad2b1	if(nBlobs == 0) { blob *b;
b151ef55	/*
15c8cace	* No attachments - scan the text portions, often files * are hidden in HTML code
b151ef55	*/
0bcad2b1	cli_dbgmsg("%d multiparts found\n", multiparts);
15c8cace	for(i = 0; i < multiparts; i++) { b = messageToBlob(messages[i]);
b151ef55
0bcad2b1	assert(b != NULL);
15c8cace	cli_dbgmsg("Saving multipart %d, encoded with scheme %d\n", i, messageGetEncoding(messages[i]));
0bcad2b1	(void)saveFile(b, dir);
b151ef55	blobDestroy(b); }
0bcad2b1	if(mainMessage) { /* * Look for uu-encoded main file */
5a01973c	const text *t_line;
0bcad2b1
5a01973c	if((t_line = uuencodeBegin(mainMessage)) != NULL) {
2227f20e	cli_dbgmsg("Found uuencoded file\n");
15c8cace	/* * Main part contains uuencoded section */
852e3ce4	messageSetEncoding(mainMessage, "x-uuencode");
0bcad2b1	if((b = messageToBlob(mainMessage)) != NULL) { if((cptr = blobGetFilename(b)) != NULL) { cli_dbgmsg("Found uuencoded message %s\n", cptr); (void)saveFile(b, dir); } blobDestroy(b); }
92915cee	} else if((encodingLine(mainMessage) != NULL) &&
a750c93c	((t_line = bounceBegin(mainMessage)) != NULL)) { const text *t; static const char encoding[] = "Content-Transfer-Encoding";
92915cee	/* * Attempt to save the original (unbounced) * message - clamscan will find that in the * directory and call us again (with any luck) * having found an e-mail message to handle
a750c93c	* * This finds a lot of false positives, the * search that an encoding line is in the * bounce (i.e. it's after the bounce header) * helps a bit, but at the expense of scanning * the entire message. messageAddLine * optimisation could help here, but needs * careful thought, do it with line numbers * would be best, since the current method in * messageAddLine of checking encoding first * must remain otherwise non bounce messages * won't be scanned
92915cee	*/
a750c93c	for(t = t_line; t; t = t->t_next) if((strncasecmp(t->t_text, encoding, sizeof(encoding) - 1) == 0) && (strstr(t->t_text, "7bit") == NULL))
7b8fb055	break;
a750c93c	if(t && ((b = textToBlob(t_line, NULL)) != NULL)) {
92915cee	cli_dbgmsg("Found a bounce message\n"); saveFile(b, dir); blobDestroy(b); }
5a01973c	} else {
b759d5eb	bool saveIt;
5a01973c	cli_dbgmsg("Not found uuencoded file\n");
af852ae0	if(messageGetMimeType(mainMessage) == MESSAGE)
b759d5eb	/* * Quick peek, if the encapsulated * message has no * content encoding statement don't * bother saving to scan, it's safe */
627465e7	saveIt = (encodingLine(mainMessage) != NULL); else if((t_line = encodingLine(mainMessage)) != NULL) { /* * Some bounces include the message
a750c93c	* body without the headers. * Unfortunately this generates a * lot of false positives that a bounce * has been found when it hasn't.
627465e7	*/
92915cee	if((b = blobCreate()) != NULL) {
627465e7	cli_dbgmsg("Found a bounce message with no header\n"); blobAddData(b, "Received: by clamd\n", 19);
02c9dc2a	b = textToBlob(t_line, b);
627465e7	saveFile(b, dir); blobDestroy(b); } saveIt = FALSE;
a750c93c	} else
627465e7	/* * Save the entire text portion,
a750c93c	* since it it may be an HTML file with * a JavaScript virus
627465e7	*/
b759d5eb	saveIt = TRUE; if(saveIt) { cli_dbgmsg("Saving text part to scan\n"); saveTextPart(mainMessage, dir); }
0bcad2b1	}
68be129f	} else rc = (multiparts) ? 1 : 2; /* anything saved? */
b151ef55	} else { short attachmentNumber; for(attachmentNumber = 0; attachmentNumber < nBlobs; attachmentNumber++) { blob *b = blobs[attachmentNumber];
c6259ac5	if(b) { if(!saveFile(b, dir)) break; blobDestroy(b); blobs[attachmentNumber] = NULL; }
b151ef55	} } if(aText && (textIn == NULL)) textDestroy(aText); /* Already done */
c6259ac5	if(blobs && (blobsIn == NULL)) blobArrayDestroy(blobs, nBlobs);
b151ef55
2250ea69	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
6613d595	if(messages) free(messages);
68be129f	cli_dbgmsg("parseEmailBody() returning %d\n", rc);
b151ef55
68be129f	return rc;
b151ef55	} /* * Is the current line the start of a new section? * * New sections start with --boundary / static int boundaryStart(const char line, const char boundary) { / * Gibe.B3 is broken it has: * boundary="---- =_NextPart_000_01C31177.9DC7C000" * but it's boundaries look like * ------ =_NextPart_000_01C31177.9DC7C000 * notice the extra '-' */
0704dad8	/cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary);/
b151ef55	if(strstr(line, boundary) != NULL) { cli_dbgmsg("found %s in %s\n", boundary, line); return 1; } if(line++ != '-') return 0; if(line++ != '-') return 0; return strcasecmp(line, boundary) == 0; } /* * Is the current line the end? * * The message ends with with --boundary-- / static int endOfMessage(const char line, const char boundary) { size_t len; if(line++ != '-') return 0; if(*line++ != '-') return 0; len = strlen(boundary);
c6259ac5	if(strncasecmp(line, boundary, len) != 0) return 0;
b151ef55	if(strlen(line) != (len + 2)) return 0; line = &line[len]; if(line++ != '-') return 0; return line == '-'; } /* * Initialise the various lookup tables / static int initialiseTables(table_t rfc821Table, table_t subtypeTable) { const struct tableinit tableinit; /* * Initialise the various look up tables / rfc821Table = tableCreate(); assert(*rfc821Table != NULL); for(tableinit = rfc821headers; tableinit->key; tableinit++)
7b8fb055	if(tableInsert(rfc821Table, tableinit->key, tableinit->value) < 0) { tableDestroy(rfc821Table);
b151ef55	return -1;
7b8fb055	}
b151ef55	subtypeTable = tableCreate(); assert(subtypeTable != NULL); for(tableinit = mimeSubtypes; tableinit->key; tableinit++) if(tableInsert(subtypeTable, tableinit->key, tableinit->value) < 0) { tableDestroy(rfc821Table);
7b8fb055	tableDestroy(*subtypeTable);
b151ef55	return -1; } return 0; } /*
0bcad2b1	* If there's a HTML text version use that, otherwise
b151ef55	* use the first text part, otherwise just use the
0bcad2b1	* first one around. HTML text is most likely to include * a scripting worm
b151ef55	* * If we can't find one, return -1 / static int getTextPart(message const messages[], size_t size) { size_t i; for(i = 0; i < size; i++) { assert(messages[i] != NULL); if((messageGetMimeType(messages[i]) == TEXT) &&
0bcad2b1	(strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0))
b151ef55	return (int)i; } for(i = 0; i < size; i++) if(messageGetMimeType(messages[i]) == TEXT) return (int)i; return -1; } /* * strip - * Remove the trailing spaces from a buffer * Returns it's new length (a la strlen) * * len must be int not size_t because of the >= 0 test, it is sizeof(buf) * not strlen(buf) / static size_t strip(char buf, int len) { register char ptr; register size_t i; if((buf == NULL) \|\| (len <= 0)) return(0); i = strlen(buf); if(len > (int)(i + 1)) return(i); ptr = &buf[--len]; #if defined(UNIX) \|\| defined(C_LINUX) \|\| defined(C_DARWIN) / watch - it may be in shared text area / do if(ptr) ptr = '\0'; while((--len >= 0) && !isgraph(--ptr) && (ptr != '\n') && (ptr != '\r')); #else /* more characters can be displayed on DOS / do #ifndef REAL_MODE_DOS if(ptr) /* C8.0 puts into a text area / #endif ptr = '\0'; while((--len >= 0) && ((--ptr == '\0') \|\| (isspace((int)ptr)))); #endif return((size_t)(len + 1)); } /* * strstrip: * Strip a given string */
3db105a2	size_t
b151ef55	strstrip(char s) { if(s == (char )NULL) return(0); return(strip(s, strlen(s) + 1)); } /* * When parsing a MIME header see if this spans more than one line. A * semi-colon at the end of the line indicates that the MIME information * is continued on the next line. * * Some clients are broken and put white space after the ; / static bool continuationMarker(const char line) { const char *ptr; assert(line != NULL); #ifdef CL_DEBUG cli_dbgmsg("continuationMarker(%s)\n", line); #endif if(strlen(line) == 0) return FALSE; ptr = strchr(line, '\0'); assert(ptr != NULL);
752c34b9	while(ptr > line)
b151ef55	switch(--ptr) { case '\n': case '\r': case ' ': case '\t': continue; case ';': return TRUE; default: return FALSE; } return FALSE; } static int parseMimeHeader(message m, const char cmd, const table_t rfc821Table, const char arg) { int type = tableFind(rfc821Table, cmd); #ifdef CL_THREAD_SAFE char strptr; #endif char *copy = strdup(arg);
c6259ac5	char *ptr = copy;
b151ef55	cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg);
181c7548	strstrip(copy);
b151ef55	switch(type) { case CONTENT_TYPE: /* * Fix for non RFC1521 compliant mailers * that send content-type: Text instead * of content-type: Text/Plain, or * just simply "Content-Type:" */
a8c7e017	if(arg == NULL)
b151ef55	cli_warnmsg("Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii\n"); else if(strchr(copy, '/') == NULL) cli_warnmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", copy); else { /* * Some clients are broken and * put white space after the ; */
09ccd6e0	/strstrip(copy);/
7e572372	if(*arg == '/') { cli_warnmsg("Content-type '/' received, assuming application/octet-stream\n"); messageSetMimeType(m, "application"); messageSetMimeSubtype(m, "octet-stream");
0ada8f3e	strtok_r(copy, ";", &strptr);
7e572372	} else { char *s;
b151ef55
7e572372	messageSetMimeType(m, strtok_r(copy, "/", &strptr)); /* * Stephen White <stephen@earth.li> * Some clients put space after * the mime type but before * the ; */ s = strtok_r(NULL, ";", &strptr); strstrip(s); messageSetMimeSubtype(m, s); }
b151ef55	/*
0704dad8	* Add in all rest of the the arguments. * e.g. if the header is this: * Content-Type:', arg='multipart/mixed; boundary=foo * we find the boundary argument set it
b151ef55	*/
0704dad8	copy = strtok_r(NULL, "", &strptr); if(copy) messageAddArguments(m, copy);
b151ef55	} break; case CONTENT_TRANSFER_ENCODING: messageSetEncoding(m, copy); break; case CONTENT_DISPOSITION:
a980b067	arg = strtok_r(copy, ";", &strptr); if(arg && *arg) { messageSetDispositionType(m, arg); messageAddArgument(m, strtok_r(NULL, "\r\n", &strptr)); }
b151ef55	}
c6259ac5	free(ptr);
b151ef55	return type; }
68be129f	/*
5a01973c	* Save the text portion of the message / static void saveTextPart(message m, const char dir) { blob b; messageAddArgument(m, "filename=textportion"); if((b = messageToBlob(m)) != NULL) { /* * Save main part to scan that / cli_dbgmsg("Saving main message, encoded with scheme %d\n", messageGetEncoding(m)); (void)saveFile(b, dir); blobDestroy(b); } } /
68be129f	* Save some data as a unique file in the given directory.
a750c93c	* * TODO: don't save archive files if archive scanning is disabled, or * OLE2 files if that is disabled or pattern match --exclude, but * we need access to the command line options/clamav.conf here to * be able to do that
68be129f	*/
0bcad2b1	static bool
b151ef55	saveFile(const blob b, const char dir) {
c95ae98b	const unsigned long nbytes = blobGetDataSize(b);
1d53a315	size_t suffixLen = 0;
b151ef55	int fd;
0bcad2b1	const char cptr, suffix;
701a425d	char filename[NAME_MAX + 1];
b151ef55	assert(dir != NULL); if(nbytes == 0)
0bcad2b1	return TRUE;
b151ef55	cptr = blobGetFilename(b); if(cptr == NULL) { cptr = "unknown"; suffix = ""; } else { /* * Some programs are broken and use an idea of a ".suffix" * to determine the file type rather than looking up the * magic number. CPM has a lot to answer for... * FIXME: the suffix now appears twice in the filename... */ suffix = strrchr(cptr, '.'); if(suffix == NULL) suffix = "";
1d53a315	else { suffixLen = strlen(suffix); if(suffixLen > 4) { /* Found a full stop which isn't a suffix */ suffix = ""; suffixLen = 0; } }
b151ef55	} cli_dbgmsg("Saving attachment in %s/%s\n", dir, cptr);
0bcad2b1	/* * Allow for very long filenames. We have to truncate them to fit */
1d53a315	snprintf(filename, sizeof(filename) - 1 - suffixLen, "%s/%.*sXXXXXX", dir, (int)(sizeof(filename) - 9 - suffixLen - strlen(dir)), cptr);
b151ef55	/*
181c7548	* TODO: add a HAVE_MKSTEMP property
b151ef55	*/
4b0a2de6	#if defined(C_LINUX) \|\| defined(C_BSD) \|\| defined(HAVE_MKSTEMP) \|\| defined(C_SOLARIS) \|\| defined(C_CYGWIN)
b151ef55	fd = mkstemp(filename); #else (void)mktemp(filename);
0dbec6b9	fd = open(filename, O_WRONLY\|O_CREAT\|O_EXCL\|O_TRUNC\|O_BINARY, 0600);
b151ef55	#endif if(fd < 0) {
181c7548	cli_errmsg("Can't create temporary file %s: %s\n", filename, strerror(errno));
39ff42ee	cli_dbgmsg("%lu %d %d\n", suffixLen, sizeof(filename), strlen(filename));
0bcad2b1	return FALSE;
b151ef55	} /*
c6259ac5	* Add the suffix back to the end of the filename. Tut-tut, filenames * should be independant of their usage on UNIX type systems.
b151ef55	*/
1d53a315	if(suffixLen > 1) {
6ecba059	char stub[NAME_MAX + 1];
c6259ac5
181c7548	snprintf(stub, sizeof(stub), "%s%s", filename, suffix);
a8c7e017	#ifdef C_LINUX rename(stub, filename); #else
b151ef55	link(stub, filename); unlink(stub);
a8c7e017	#endif
b151ef55	}
b9ec1705	cli_dbgmsg("Saving attachment as %s (%lu bytes long)\n",
b151ef55	filename, nbytes);
c95ae98b	if(cli_writen(fd, blobGetData(b), (size_t)nbytes) != nbytes) { perror(filename); close(fd); return FALSE;
b9ec1705	}
c6259ac5	return (close(fd) >= 0);
b151ef55	}