b151ef55 |
/*
* Copyright (C) 2002 Nigel Horne <njh@bandsman.co.uk>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
7cef72ea |
*
* Change History:
* $Log: mbox.c,v $ |
6613d595 |
* Revision 1.77 2004/06/18 10:07:12 nigelhorne
* Allow any number of alternatives in multipart messages
* |
8a88fb93 |
* Revision 1.76 2004/06/16 08:07:39 nigelhorne
* Added thread safety
* |
93002b48 |
* Revision 1.75 2004/06/14 09:07:10 nigelhorne
* Handle spam using broken e-mail generators for multipart/alternative
* |
7b8fb055 |
* Revision 1.74 2004/06/09 18:18:59 nigelhorne
* Find uuencoded viruses in multipart/mixed that have no start of message boundaries
* |
4b0a2de6 |
* Revision 1.73 2004/05/14 08:15:55 nigelhorne
* Use mkstemp on cygwin
* |
a750c93c |
* Revision 1.72 2004/05/12 11:20:37 nigelhorne
* More bounce message false positives handled
* |
92915cee |
* Revision 1.71 2004/05/10 11:35:11 nigelhorne
* No need to update mbox.c for cli_filetype problem |
0b244177 |
* |
2e0f78a6 |
* Revision 1.69 2004/05/06 11:26:49 nigelhorne
* Force attachments marked as RFC822 messages to be scanned
* |
3db105a2 |
* Revision 1.68 2004/04/29 08:59:24 nigelhorne
* Tidied up SetDispositionType
* |
7584963d |
* Revision 1.67 2004/04/23 10:47:41 nigelhorne
* If an inline text portion has a filename treat is as an attachment
* |
bf497d0a |
* Revision 1.66 2004/04/14 08:32:21 nigelhorne
* When debugging print the email number in mailboxes
* |
7baeb4a6 |
* Revision 1.65 2004/04/07 18:18:07 nigelhorne
* Some occurances of W97M.Lexar were let through
* |
4465fb04 |
* Revision 1.64 2004/04/05 09:32:20 nigelhorne
* Added SCAN_TO_DISC define
* |
4c927f11 |
* Revision 1.63 2004/04/01 15:32:34 nigelhorne
* Graceful exit if messageAddLine fails in strdup
* |
6638be41 |
* Revision 1.62 2004/03/31 17:00:20 nigelhorne
* Code tidy up free memory earlier
* |
74b5c349 |
* Revision 1.61 2004/03/30 22:45:13 nigelhorne
* Better handling of multipart/multipart messages
* |
ffd59a3e |
* Revision 1.60 2004/03/29 09:22:03 nigelhorne
* Tidy up code and reduce shuffling of data
* |
c95ae98b |
* Revision 1.59 2004/03/26 11:08:36 nigelhorne
* Use cli_writen
* |
02c9dc2a |
* Revision 1.58 2004/03/25 22:40:46 nigelhorne
* Removed even more calls to realloc and some duplicated code
* |
627465e7 |
* Revision 1.57 2004/03/21 17:19:49 nigelhorne
* Handle bounce messages with no headers
* |
f5a4d7e8 |
* Revision 1.56 2004/03/21 09:41:26 nigelhorne
* Faster scanning for non MIME messages
* |
3e556ea8 |
* Revision 1.55 2004/03/20 17:39:23 nigelhorne
* First attempt to handle all bounces
* |
a980b067 |
* Revision 1.54 2004/03/19 15:40:45 nigelhorne
* Handle empty content-disposition types
* |
af852ae0 |
* Revision 1.53 2004/03/19 08:08:02 nigelhorne
* If a message part of a multipart contains an RFC822 message that has no encoding don't scan it
* |
b759d5eb |
* Revision 1.52 2004/03/18 21:51:41 nigelhorne
* If a message only contains a single RFC822 message that has no encoding don't save for scanning
* |
bad123c6 |
* Revision 1.51 2004/03/17 19:48:12 nigelhorne
* Improved embedded RFC822 message handling
* |
09ccd6e0 |
* Revision 1.50 2004/03/10 22:05:39 nigelhorne
* Fix seg fault when a message in a multimessage mailbox fails to scan
* |
b0d8b0db |
* Revision 1.49 2004/03/04 13:01:58 nigelhorne
* Ensure all bounces are rescanned by cl_mbox
* |
6e07998e |
* Revision 1.48 2004/02/27 12:16:26 nigelhorne
* Catch lines just containing ':'
* |
39ff42ee |
* Revision 1.47 2004/02/23 10:13:08 nigelhorne
* Handle spaces before : in headers
* |
1d53a315 |
* Revision 1.46 2004/02/18 13:29:19 nigelhorne
* Stop buffer overflows for files with very long suffixes
* |
26564cf5 |
* Revision 1.45 2004/02/18 10:07:40 nigelhorne
* Find some Yaha
* |
c7256385 |
* Revision 1.44 2004/02/15 08:45:54 nigelhorne
* Avoid scanning the same file twice
* |
0704dad8 |
* Revision 1.43 2004/02/14 19:04:05 nigelhorne
* Handle spaces in boundaries
* |
0dbec6b9 |
* Revision 1.42 2004/02/14 17:23:45 nigelhorne
* Had deleted O_BINARY by mistake
* |
d32e668f |
* Revision 1.41 2004/02/12 18:43:58 nigelhorne
* Use mkstemp on Solaris
* |
a66ca28a |
* Revision 1.40 2004/02/11 08:15:59 nigelhorne
* Use O_BINARY for cygwin
* |
8b242bb9 |
* Revision 1.39 2004/02/06 13:46:08 kojm
* Support for clamav-config.h
* |
b9ec1705 |
* Revision 1.38 2004/02/04 13:29:48 nigelhorne
* Handle partial writes - and print when write fails
* |
0bf1353d |
* Revision 1.37 2004/02/03 22:54:59 nigelhorne
* Catch another example of Worm.Dumaru.Y
* |
a64bf87e |
* Revision 1.36 2004/02/02 09:52:57 nigelhorne
* Some instances of Worm.Dumaru.Y got through the net
* |
5a01973c |
* Revision 1.35 2004/01/28 10:15:24 nigelhorne
* Added support to scan some bounce messages
* |
5c7cf3f1 |
* Revision 1.34 2004/01/24 17:43:37 nigelhorne
* Removed (incorrect) warning about uninitialised variable
* |
2250ea69 |
* Revision 1.33 2004/01/23 10:38:22 nigelhorne
* Fixed memory leak in handling some multipart messages
* |
4e7ca2b1 |
* Revision 1.32 2004/01/23 08:51:19 nigelhorne
* Add detection of uuencoded viruses in single part multipart/mixed files
* |
9a35912c |
* Revision 1.31 2004/01/22 22:13:06 nigelhorne
* Prevent infinite recursion on broken uuencoded files
* |
8c0250d5 |
* Revision 1.30 2004/01/13 10:12:05 nigelhorne
* Remove duplicate code when handling multipart messages
* |
0ada8f3e |
* Revision 1.29 2004/01/09 18:27:11 nigelhorne
* ParseMimeHeader could corrupt arg
* |
7e572372 |
* Revision 1.28 2004/01/09 15:07:42 nigelhorne
* Re-engineered update 1.11 lost in recent changes
* |
68badbc1 |
* Revision 1.27 2004/01/09 14:45:59 nigelhorne
* Removed duplicated code in multipart handler
* |
852e3ce4 |
* Revision 1.26 2004/01/09 10:20:54 nigelhorne
* Locate uuencoded viruses hidden in text poritions of multipart/mixed mime messages
* |
441992ed |
* Revision 1.25 2004/01/06 14:41:18 nigelhorne
* Handle headers which do not not have a space after the ':'
* |
f54a8635 |
* Revision 1.24 2003/12/20 13:55:36 nigelhorne
* Ensure multipart just save the bodies of attachments
* |
68be129f |
* Revision 1.23 2003/12/14 18:07:01 nigelhorne
* Some viruses in embedded messages were not being found
* |
062ba8b0 |
* Revision 1.22 2003/12/13 16:42:23 nigelhorne
* call new cli_chomp
* |
7fca6080 |
* Revision 1.21 2003/12/11 14:35:48 nigelhorne
* Better handling of encapsulated messages
* |
f5e9abc8 |
* Revision 1.20 2003/12/06 04:03:26 nigelhorne
* Handle hand crafted emails that incorrectly set multipart headers
* |
2227f20e |
* Revision 1.19 2003/11/21 07:26:31 nigelhorne
* Scan multipart alternatives that have no boundaries, finds some uuencoded happy99
* |
181c7548 |
* Revision 1.18 2003/11/17 08:13:21 nigelhorne
* Handle spaces at the end of lines of MIME headers
* |
04421a14 |
* Revision 1.17 2003/11/06 05:06:42 nigelhorne
* Some applications weren't being scanned
* |
295e425f |
* Revision 1.16 2003/11/04 08:24:00 nigelhorne
* Handle multipart messages that have no text portion
* |
07cbf822 |
* Revision 1.15 2003/10/12 20:13:49 nigelhorne
* Use NO_STRTOK_R consistent with message.c
* |
fdc8a467 |
* Revision 1.14 2003/10/12 12:37:11 nigelhorne
* Appledouble encoded EICAR now found
* |
4674dc9a |
* Revision 1.13 2003/10/01 09:27:42 nigelhorne
* Handle content-type header going over to a new line
* |
6ecba059 |
* Revision 1.12 2003/09/29 17:10:19 nigelhorne
* Moved stub from heap to stack since its maximum size is known
* |
47ab99fa |
* Revision 1.11 2003/09/29 12:58:32 nigelhorne
* Handle Content-Type: /; name="eicar.com"
* |
7cef72ea |
* Revision 1.10 2003/09/28 10:06:34 nigelhorne
* Compilable under SCO; removed duplicate code with message.c
* |
b151ef55 |
*/ |
6613d595 |
static char const rcsid[] = "$Id: mbox.c,v 1.77 2004/06/18 10:07:12 nigelhorne Exp $"; |
8b242bb9 |
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif |
b151ef55 |
#ifndef CL_DEBUG |
0bcad2b1 |
/*#define NDEBUG /* map CLAMAV debug onto standard */ |
b151ef55 |
#endif
#ifdef CL_THREAD_SAFE |
f5e9abc8 |
#ifndef _REENTRANT |
b151ef55 |
#define _REENTRANT /* for Solaris 2.8 */
#endif |
f5e9abc8 |
#endif |
b151ef55 |
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <assert.h>
#include <string.h>
#include <strings.h>
#include <ctype.h>
#include <time.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h> |
0bcad2b1 |
#include <sys/param.h> |
b151ef55 |
#include <clamav.h>
|
8a88fb93 |
#ifdef CL_THREAD_SAFE
#include <pthread.h>
#endif
|
b151ef55 |
#include "table.h"
#include "mbox.h"
#include "blob.h"
#include "text.h"
#include "message.h"
#include "others.h"
#include "defaults.h" |
7fca6080 |
#include "str.h" |
b151ef55 |
|
07cbf822 |
#if defined(NO_STRTOK_R) || !defined(CL_THREAD_SAFE) |
b151ef55 |
#undef strtok_r
#undef __strtok_r
#define strtok_r(a,b,c) strtok(a,b)
#endif
/* required for AIX and Tru64 */
#ifdef TRUE
#undef TRUE
#endif
#ifdef FALSE
#undef FALSE
#endif
typedef enum { FALSE = 0, TRUE = 1 } bool;
|
68be129f |
static message *parseEmailHeaders(const message *m, const table_t *rfc821Table); |
8c0250d5 |
static int parseEmailHeader(message *m, const char *line, const table_t *rfc821Table); |
2250ea69 |
static int parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, const char *dir, table_t *rfc821Table, table_t *subtypeTable); |
b151ef55 |
static int boundaryStart(const char *line, const char *boundary);
static int endOfMessage(const char *line, const char *boundary);
static int initialiseTables(table_t **rfc821Table, table_t **subtypeTable);
static int getTextPart(message *const messages[], size_t size);
static size_t strip(char *buf, int len);
static bool continuationMarker(const char *line);
static int parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg); |
5a01973c |
static void saveTextPart(message *m, const char *dir); |
0bcad2b1 |
static bool saveFile(const blob *b, const char *dir); |
b151ef55 |
|
852e3ce4 |
/* Maximum number of attachments that we accept */ |
b151ef55 |
#define MAX_ATTACHMENTS 10
/* Maximum line length according to RFC821 */
#define LINE_LENGTH 1000
/* Hashcodes for our hash tables */
#define CONTENT_TYPE 1
#define CONTENT_TRANSFER_ENCODING 2
#define CONTENT_DISPOSITION 3
/* Mime sub types */
#define PLAIN 1
#define ENRICHED 2
#define HTML 3
#define RICHTEXT 4
#define MIXED 5
#define ALTERNATIVE 6
#define DIGEST 7
#define SIGNED 8
#define PARALLEL 9
#define RELATED 10 /* RFC2387 */
#define REPORT 11 /* RFC1892 */ |
fdc8a467 |
#define APPLEDOUBLE 12 /* Handling of this in only noddy for now */ |
b151ef55 |
static const struct tableinit {
const char *key;
int value;
} rfc821headers[] = { |
68badbc1 |
/* TODO: make these regular expressions */ |
b759d5eb |
{ "Content-Type", CONTENT_TYPE }, |
39ff42ee |
{ "Content-Transfer-Encoding", CONTENT_TRANSFER_ENCODING },
{ "Content-Disposition", CONTENT_DISPOSITION }, |
b151ef55 |
{ NULL, 0 }
}, mimeSubtypes[] = {
/* subtypes of Text */
{ "plain", PLAIN },
{ "enriched", ENRICHED },
{ "html", HTML },
{ "richtext", RICHTEXT },
/* subtypes of Multipart */
{ "mixed", MIXED },
{ "alternative", ALTERNATIVE },
{ "digest", DIGEST },
{ "signed", SIGNED },
{ "parallel", PARALLEL },
{ "related", RELATED },
{ "report", REPORT }, |
fdc8a467 |
{ "appledouble", APPLEDOUBLE }, |
b151ef55 |
{ NULL, 0 }
}; |
8a88fb93 |
#ifdef CL_THREAD_SAFE
static pthread_mutex_t tables_mutex = PTHREAD_MUTEX_INITIALIZER;
#endif |
7b8fb055 |
static table_t *rfc821Table, *subtypeTable; |
b151ef55 |
|
7cef72ea |
/* Maximum filenames under various systems */
#ifndef NAME_MAX /* e.g. Linux */
#ifdef MAXNAMELEN /* e.g. Solaris */
#define NAME_MAX MAXNAMELEN
#else
#ifdef FILENAME_MAX /* e.g. SCO */
#define NAME_MAX FILENAME_MAX
#endif
#endif
#endif
|
0dbec6b9 |
#ifndef O_BINARY
#define O_BINARY 0
#endif
|
4465fb04 |
#define SAVE_TO_DISC /* multipart/message are saved in a temporary file */
|
b151ef55 |
/*
* TODO: when signal handling is added, need to remove temp files when a
* signal is received
* TODO: add option to scan in memory not via temp files, perhaps with a |
74b5c349 |
* named pipe or memory mapped file, though this won't work on big e-mails
* containing many levels of encapsulated messages - it'd just take too much
* RAM |
15c8cace |
* TODO: if debug is enabled, catch a segfault and dump the current e-mail
* in it's entirety, then call abort() |
c6259ac5 |
* TODO: parse .msg format files |
fdc8a467 |
* TODO: fully handle AppleDouble format, see
* http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf |
f54a8635 |
* TODO: ensure parseEmailHeaders is always called before parseEmailBody
* TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody |
b151ef55 |
*/
int
cl_mbox(const char *dir, int desc)
{ |
c6259ac5 |
int retcode, i; |
f54a8635 |
message *m, *body; |
b151ef55 |
FILE *fd; |
c6259ac5 |
char buffer[LINE_LENGTH]; |
b151ef55 |
cli_dbgmsg("in mbox()\n");
|
c6259ac5 |
i = dup(desc);
if((fd = fdopen(i, "rb")) == NULL) {
cli_errmsg("Can't open descriptor %d\n", desc);
close(i); |
b151ef55 |
return -1; |
c6259ac5 |
}
if(fgets(buffer, sizeof(buffer), fd) == NULL) {
/* empty message */
fclose(fd);
return 0;
} |
b151ef55 |
m = messageCreate(); |
7b8fb055 |
if(m == NULL) { |
c6259ac5 |
fclose(fd); |
7b8fb055 |
return 0;
}
|
8a88fb93 |
#ifdef CL_THREAD_SAFE
pthread_mutex_lock(&tables_mutex);
#endif |
7b8fb055 |
if(rfc821Table == NULL) {
assert(subtypeTable == NULL);
if(initialiseTables(&rfc821Table, &subtypeTable) < 0) { |
8a88fb93 |
rfc821Table = NULL;
subtypeTable = NULL;
#ifdef CL_THREAD_SAFE
pthread_mutex_unlock(&tables_mutex);
#endif |
7b8fb055 |
messageDestroy(m);
fclose(fd);
return -1;
} |
b151ef55 |
} |
8a88fb93 |
#ifdef CL_THREAD_SAFE
pthread_mutex_unlock(&tables_mutex);
#endif |
b151ef55 |
|
f54a8635 |
/*
* is it a UNIX style mbox with more than one
* mail message, or just a single mail message?
*/
if(strncmp(buffer, "From ", 5) == 0) { |
b151ef55 |
/* |
c6259ac5 |
* Have been asked to check a UNIX style mbox file, which
* may contain more than one e-mail message to decode |
b151ef55 |
*/ |
f54a8635 |
bool lastLineWasEmpty = FALSE; |
bf497d0a |
int messagenumber = 1; |
b151ef55 |
|
c6259ac5 |
do {
/*cli_dbgmsg("read: %s", buffer);*/ |
b151ef55 |
|
f54a8635 |
cli_chomp(buffer);
if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) { |
bf497d0a |
cli_dbgmsg("Deal with email number %d\n", messagenumber++); |
b151ef55 |
/* |
f54a8635 |
* End of a message in the mail box |
b151ef55 |
*/ |
f54a8635 |
body = parseEmailHeaders(m, rfc821Table);
messageDestroy(m);
if(messageGetBody(body)) |
09ccd6e0 |
if(!parseEmailBody(body, NULL, 0, NULL, dir, rfc821Table, subtypeTable)) {
messageReset(body);
m = body;
continue;
} |
b151ef55 |
/* |
f54a8635 |
* Starting a new message, throw away all the
* information about the old one |
b151ef55 |
*/ |
f54a8635 |
m = body;
messageReset(body); |
b151ef55 |
|
c6259ac5 |
cli_dbgmsg("Finished processing message\n"); |
f54a8635 |
} else |
a66ca28a |
lastLineWasEmpty = (bool)(buffer[0] == '\0'); |
4c927f11 |
if(messageAddLine(m, buffer, 1) < 0)
break; |
c6259ac5 |
} while(fgets(buffer, sizeof(buffer), fd) != NULL); |
bf497d0a |
cli_dbgmsg("Deal with email number %d\n", messagenumber); |
f54a8635 |
} else |
7fca6080 |
/*
* It's a single message, parse the headers then the body
*/ |
b759d5eb |
do
/*
* No need to preprocess such as cli_chomp() since
* that'll be done by parseEmailHeaders() |
4465fb04 |
*
* TODO: this needlessly creates a message object,
* it'd be better if parseEmailHeaders could also
* read in from a file. I do not want to lump the
* parseEmailHeaders code here, that'd be a duplication
* of code I want to avoid |
b759d5eb |
*/ |
4c927f11 |
if(messageAddLine(m, buffer, 1) < 0)
break; |
b759d5eb |
while(fgets(buffer, sizeof(buffer), fd) != NULL); |
7fca6080 |
|
b151ef55 |
fclose(fd);
|
c6259ac5 |
retcode = 0;
|
f54a8635 |
body = parseEmailHeaders(m, rfc821Table);
messageDestroy(m); |
b151ef55 |
/*
* Write out the last entry in the mailbox
*/ |
f54a8635 |
if(messageGetBody(body))
if(!parseEmailBody(body, NULL, 0, NULL, dir, rfc821Table, subtypeTable)) |
c6259ac5 |
retcode = -1; |
b151ef55 |
/*
* Tidy up and quit
*/ |
f54a8635 |
messageDestroy(body); |
b151ef55 |
cli_dbgmsg("cli_mbox returning %d\n", retcode);
return retcode;
}
/* |
7fca6080 |
* The given message contains a raw e-mail.
*
* This function parses the headers of m and sets the message's arguments |
68be129f |
*
* Returns the message's body with the correct arguments set |
7fca6080 |
*/ |
68be129f |
static message *
parseEmailHeaders(const message *m, const table_t *rfc821Table) |
7fca6080 |
{ |
4465fb04 |
bool inContinuationHeader = FALSE; /* state machine: ugh */ |
68be129f |
bool inHeader = TRUE; |
ffd59a3e |
const text *t; |
f54a8635 |
message *ret;
if(m == NULL)
return NULL;
ret = messageCreate(); |
7fca6080 |
|
ffd59a3e |
for(t = messageGetBody(m); t; t = t->t_next) { |
7fca6080 |
char *buffer = strdup(t->t_text);
#ifdef CL_THREAD_SAFE
char *strptr;
#endif
|
ffd59a3e |
if(buffer == NULL)
break;
|
062ba8b0 |
cli_chomp(buffer); |
7fca6080 |
/*
* Section B.2 of RFC822 says TAB or SPACE means |
062ba8b0 |
* a continuation of the previous entry. |
7fca6080 |
*/ |
68be129f |
if(inHeader && ((buffer[0] == '\t') || (buffer[0] == ' '))) |
062ba8b0 |
inContinuationHeader = TRUE; |
7fca6080 |
|
062ba8b0 |
if(inContinuationHeader) {
const char *ptr; |
7fca6080 |
if(!continuationMarker(buffer)) |
062ba8b0 |
inContinuationHeader = FALSE; /* no more args */ |
7fca6080 |
/*
* Add all the arguments on the line
*/
for(ptr = strtok_r(buffer, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr)) |
68be129f |
messageAddArgument(ret, ptr); |
ffd59a3e |
free(buffer); |
68be129f |
} else if(inHeader) { |
7fca6080 |
cli_dbgmsg("Deal with header %s\n", buffer);
/*
* A blank line signifies the end of the header and
* the start of the text
*/ |
ffd59a3e |
if(strlen(buffer) == 0) { |
7fca6080 |
cli_dbgmsg("End of header information\n"); |
09ccd6e0 |
inContinuationHeader = inHeader = FALSE; |
8c0250d5 |
} else if(parseEmailHeader(ret, buffer, rfc821Table) == CONTENT_TYPE)
inContinuationHeader = continuationMarker(buffer); |
ffd59a3e |
free(buffer); |
09ccd6e0 |
} else {
/*cli_dbgmsg("Add line to body '%s'\n", buffer);*/ |
ffd59a3e |
messageAddLine(ret, buffer, 0); |
09ccd6e0 |
} |
ffd59a3e |
} |
68be129f |
|
4465fb04 |
messageClean(ret);
|
09ccd6e0 |
cli_dbgmsg("parseEmailHeaders: return\n");
|
68be129f |
return ret; |
7fca6080 |
}
/* |
8c0250d5 |
* Handle a header line of an email message
*/
static int
parseEmailHeader(message *m, const char *line, const table_t *rfc821Table)
{ |
74b5c349 |
char *copy, *cmd; |
8c0250d5 |
int ret = -1;
#ifdef CL_THREAD_SAFE
char *strptr;
#endif
|
0704dad8 |
cli_dbgmsg("parseEmailHeader '%s'\n", line);
|
74b5c349 |
if(strchr(line, ':') == NULL)
return -1;
copy = strdup(line);
|
39ff42ee |
cmd = strtok_r(copy, ":", &strptr); |
8c0250d5 |
|
6e07998e |
if(cmd && *cmd) { |
8c0250d5 |
char *arg = strtok_r(NULL, "", &strptr);
if(arg)
/*
* Found a header such as
* Content-Type: multipart/mixed;
* set arg to be
* "multipart/mixed" and cmd to |
39ff42ee |
* be "Content-Type" |
8c0250d5 |
*/
ret = parseMimeHeader(m, cmd, rfc821Table, arg);
}
free(copy);
return ret;
}
/* |
b151ef55 |
* This is a recursive routine.
* |
7fca6080 |
* This function parses the body of mainMessage and saves its attachments in dir
* |
68be129f |
* mainMessage is the buffer to be parsed, it contains an e-mail's body, without
* any headers. First |
062ba8b0 |
* time of calling it'll be |
b151ef55 |
* the whole message. Later it'll be parts of a multipart message
* textIn is the plain text message being built up so far
* blobsIn contains the array of attachments found so far
* |
0bcad2b1 |
* Returns: |
b151ef55 |
* 0 for fail |
852e3ce4 |
* 1 for success, attachments saved
* 2 for success, attachments not saved |
b151ef55 |
*/
static int /* success or fail */ |
2250ea69 |
parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, const char *dir, table_t *rfc821Table, table_t *subtypeTable) |
b151ef55 |
{ |
6613d595 |
message **messages; /* parts of a multipart message */ |
5c7cf3f1 |
int inhead, inMimeHead, i, rc = 1, htmltextPart, multiparts = 0; |
b151ef55 |
text *aText;
blob *blobList[MAX_ATTACHMENTS], **blobs;
const char *cptr; |
2250ea69 |
message *mainMessage; |
b151ef55 |
|
7fca6080 |
cli_dbgmsg("in parseEmailBody(nBlobs = %d)\n", nBlobs); |
b151ef55 |
/* Pre-assertions */
if(nBlobs >= MAX_ATTACHMENTS) {
cli_warnmsg("Not all attachments will be scanned\n");
return 2;
}
aText = textIn;
blobs = blobsIn; |
6613d595 |
messages = NULL; |
2250ea69 |
mainMessage = messageIn; |
b151ef55 |
/* Anything left to be parsed? */ |
0bcad2b1 |
if(mainMessage && (messageGetBody(mainMessage) != NULL)) { |
c7256385 |
int numberOfAttachments = 0, numberOfNewAttachments; |
b151ef55 |
mime_type mimeType;
const char *mimeSubtype;
const text *t_line; |
f5e9abc8 |
/*bool isAlternative;*/ |
b151ef55 |
const char *boundary;
message *aMessage;
|
c6259ac5 |
cli_dbgmsg("Parsing mail file\n");
|
b151ef55 |
mimeType = messageGetMimeType(mainMessage);
mimeSubtype = messageGetMimeSubtype(mainMessage);
if((mimeType == TEXT) && (tableFind(subtypeTable, mimeSubtype) == PLAIN)) {
/*
* This is effectively no encoding, notice that we
* don't check that charset is us-ascii
*/
cli_dbgmsg("assume no encoding\n");
mimeType = NOMIME;
}
|
c6259ac5 |
cli_dbgmsg("mimeType = %d\n", mimeType);
|
b151ef55 |
switch(mimeType) {
case NOMIME:
aText = textAddMessage(aText, mainMessage);
break;
case TEXT:
if(tableFind(subtypeTable, mimeSubtype) == PLAIN)
aText = textCopy(messageGetBody(mainMessage));
break;
case MULTIPART:
boundary = messageFindArgument(mainMessage, "boundary");
if(boundary == NULL) {
cli_warnmsg("Multipart MIME message contains no boundaries\n"); |
2227f20e |
/* Broken e-mail message */
mimeType = NOMIME;
/*
* The break means that we will still
* check if the file contains a uuencoded file
*/
break; |
b151ef55 |
}
|
93002b48 |
if(mimeSubtype[0] == '\0') {
cli_warnmsg("Multipart has no subtype assuming alternative\n");
mimeSubtype = "alternative";
messageSetMimeSubtype(mainMessage, "alternative");
}
|
b151ef55 |
/*
* Get to the start of the first message
*/ |
0704dad8 |
t_line = messageGetBody(mainMessage);
if(t_line == NULL) {
cli_warnmsg("Multipart MIME message has no body\n");
free((char *)boundary);
mimeType = NOMIME;
break;
}
do |
bf8ea488 |
if(boundaryStart(t_line->t_text, boundary)) |
b151ef55 |
break; |
0704dad8 |
while((t_line = t_line->t_next) != NULL); |
b151ef55 |
if(t_line == NULL) { |
0704dad8 |
cli_warnmsg("Multipart MIME message contains no boundary lines\n"); |
bf8ea488 |
/*
* Free added by Thomas Lamy
* <Thomas.Lamy@in-online.net>
*/
free((char *)boundary); |
2227f20e |
mimeType = NOMIME;
/*
* The break means that we will still
* check if the file contains a uuencoded file
*/
break; |
b151ef55 |
}
/*
* Build up a table of all of the parts of this
* multipart message. Remember, each part may itself
* be a multipart message.
*/
inhead = 1;
inMimeHead = 0;
|
68be129f |
/*
* This looks like parseEmailHeaders() - maybe there's
* some duplication of code to be cleaned up
*/ |
6613d595 |
for(multiparts = 0; t_line; multiparts++) { |
26564cf5 |
int lines = 0;
|
6613d595 |
messages = cli_realloc(messages, ((multiparts + 1) * sizeof(message *)));
|
b151ef55 |
aMessage = messages[multiparts] = messageCreate();
cli_dbgmsg("Now read in part %d\n", multiparts);
|
0bf1353d |
/*
* Ignore blank lines. There shouldn't be ANY
* but some viruses insert them
*/
while((t_line = t_line->t_next) != NULL) {
cli_chomp(t_line->t_text);
if(strlen(t_line->t_text) != 0)
break;
}
if(t_line == NULL) {
cli_dbgmsg("Empty part\n");
continue;
}
do { |
b151ef55 |
const char *line = t_line->t_text;
|
68be129f |
/*cli_dbgmsg("inMimeHead %d inhead %d boundary %s line '%s' next '%s'\n",
inMimeHead, inhead, boundary, line, t_line->t_next ? t_line->t_next->t_text : "(null)");*/ |
b151ef55 |
if(inMimeHead) { |
7baeb4a6 |
/*
* Handle continuation lines
* because the previous line
* ended with a ;
*/ |
68be129f |
cli_dbgmsg("About to add mime Argument '%s'\n",
line); |
7baeb4a6 |
/*
* Handle the case when it
* isn't really a continuation
* line:
* Content-Type: application/octet-stream;
* Content-Transfer-Encoding: base64
*/
parseEmailHeader(aMessage, line, rfc821Table);
|
b151ef55 |
while(isspace((int)*line))
line++;
if(*line == '\0') {
inhead = inMimeHead = 0;
continue;
}
/*
* This may cause a trailing ';'
* to be added if this test
* fails - TODO: verify this
*/
inMimeHead = continuationMarker(line);
messageAddArgument(aMessage, line);
} else if(inhead) {
if(strlen(line) == 0) {
inhead = 0;
continue;
} |
a64bf87e |
if(isspace((int)*line)) {
/*
* The first line is
* continuation line.
* This is tricky
* to handle, but
* all we can do is our
* best
*/
cli_dbgmsg("Part %d starts with a continuation line\n",
multiparts);
messageAddArgument(aMessage, line);
/*
* Give it a default
* MIME type since
* that may be the
* missing line
*
* Choose application to
* force a save
*/
if(messageGetMimeType(aMessage) == NOMIME)
messageSetMimeType(aMessage, "application");
continue;
}
|
b151ef55 |
/*
* Some clients are broken and
* put white space after the ;
*/
inMimeHead = continuationMarker(line); |
4674dc9a |
if(!inMimeHead)
if(t_line->t_next && ((t_line->t_next->t_text[0] == '\t') || (t_line->t_next->t_text[0] == ' ')))
inMimeHead = TRUE; |
68badbc1 |
|
8c0250d5 |
parseEmailHeader(aMessage, line, rfc821Table); |
b151ef55 |
} else if(boundaryStart(line, boundary)) {
inhead = 1;
break;
} else if(endOfMessage(line, boundary)) {
/*
* Some viruses put information
* *after* the end of message,
* which presumably some broken
* mail clients find, so we
* can't assume that this
* is the end of the message
*/
/* t_line = NULL;*/
break; |
26564cf5 |
} else { |
ffd59a3e |
messageAddLine(aMessage, line, 1); |
26564cf5 |
lines++;
} |
0bf1353d |
} while((t_line = t_line->t_next) != NULL);
|
b151ef55 |
messageClean(aMessage); |
26564cf5 |
cli_dbgmsg("Part %d has %d lines\n",
multiparts, lines); |
b151ef55 |
}
free((char *)boundary);
|
6638be41 |
/*
* We've finished message we're parsing
*/
if(mainMessage && (mainMessage != messageIn)) {
messageDestroy(mainMessage);
mainMessage = NULL; |
2250ea69 |
} |
b151ef55 |
|
6613d595 |
if(multiparts == 0) {
if(messages)
free(messages); |
6638be41 |
return 2; /* Nothing to do */ |
6613d595 |
} |
6638be41 |
|
b151ef55 |
cli_dbgmsg("The message has %d parts\n", multiparts);
cli_dbgmsg("Find out the multipart type(%s)\n", mimeSubtype);
switch(tableFind(subtypeTable, mimeSubtype)) {
case RELATED: |
68be129f |
cli_dbgmsg("Multipart related handler\n"); |
b151ef55 |
/* |
295e425f |
* Have a look to see if there's HTML code
* which will need scanning |
b151ef55 |
*/
aMessage = NULL;
assert(multiparts > 0);
|
0bcad2b1 |
htmltextPart = getTextPart(messages, multiparts); |
b151ef55 |
|
0bcad2b1 |
if(htmltextPart >= 0)
aText = textAddMessage(aText, messages[htmltextPart]); |
b151ef55 |
else
/* |
295e425f |
* There isn't an HTML bit. If there's a
* multipart bit, it'll may be in there
* somewhere |
b151ef55 |
*/
for(i = 0; i < multiparts; i++)
if(messageGetMimeType(messages[i]) == MULTIPART) {
aMessage = messages[i]; |
0bcad2b1 |
htmltextPart = i; |
b151ef55 |
break;
}
|
295e425f |
if(htmltextPart == -1) {
cli_dbgmsg("No HTML code found to be scanned");
rc = 0;
} else |
7fca6080 |
rc = parseEmailBody(aMessage, blobs, nBlobs, aText, dir, rfc821Table, subtypeTable); |
b151ef55 |
blobArrayDestroy(blobs, nBlobs); |
c6259ac5 |
blobs = NULL;
nBlobs = 0; |
b151ef55 |
/*
* Fixed based on an idea from Stephen White <stephen@earth.li>
* The message is confused about the difference
* between alternative and related. Badtrans.B
* suffers from this problem.
*
* Fall through in this case:
* Content-Type: multipart/related;
* type="multipart/alternative"
*/ |
f5e9abc8 |
/*
* Changed to always fall through based on
* an idea from Michael Dankov <misha@btrc.ru>
* that some viruses are completely confused
* about the difference between related
* and mixed
*/
/*cptr = messageFindArgument(mainMessage, "type"); |
b151ef55 |
if(cptr == NULL)
break;
isAlternative = (bool)(strcasecmp(cptr, "multipart/alternative") == 0);
free((char *)cptr);
if(!isAlternative) |
f5e9abc8 |
break;*/ |
b151ef55 |
case ALTERNATIVE:
cli_dbgmsg("Multipart alternative handler\n");
|
0bcad2b1 |
htmltextPart = getTextPart(messages, multiparts); |
b151ef55 |
|
0bcad2b1 |
if(htmltextPart == -1)
htmltextPart = 0; |
b151ef55 |
|
0bcad2b1 |
aMessage = messages[htmltextPart]; |
b151ef55 |
aText = textAddMessage(aText, aMessage);
|
7fca6080 |
rc = parseEmailBody(NULL, blobs, nBlobs, aText, dir, rfc821Table, subtypeTable); |
7b8fb055 |
|
b151ef55 |
if(rc == 1) {
/*
* Alternative message has saved its
* attachments, ensure we don't do
* the same thing
*/ |
d3d2fb1e |
blobArrayDestroy(blobs, nBlobs);
blobs = NULL; |
b151ef55 |
nBlobs = 0;
rc = 2;
}
/*
* Fall through - some clients are broken and
* say alternative instead of mixed. The Klez
* virus is broken that way
*/
case REPORT:
/*
* According to section 1 of RFC1892, the
* syntax of multipart/report is the same
* as multipart/mixed. There are some required
* parameters, but there's no need for us to
* verify that they exist
*/
case MIXED: |
fdc8a467 |
case APPLEDOUBLE: /* not really supported */ |
b151ef55 |
/*
* Look for attachments
*
* Not all formats are supported. If an
* unsupported format turns out to be
* common enough to implement, it is a simple
* matter to add it
*/ |
2250ea69 |
if(aText) {
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage); |
b151ef55 |
mainMessage = NULL; |
2250ea69 |
} |
b151ef55 |
cli_dbgmsg("Mixed message with %d parts\n", multiparts);
for(i = 0; i < multiparts; i++) {
bool addAttachment = FALSE;
bool addToText = FALSE;
const char *dtype; |
f54a8635 |
message *body; |
b151ef55 |
aMessage = messages[i];
assert(aMessage != NULL);
dtype = messageGetDispositionType(aMessage); |
0bcad2b1 |
cptr = messageGetMimeSubtype(aMessage); |
b151ef55 |
cli_dbgmsg("Mixed message part %d is of type %d\n",
i, messageGetMimeType(aMessage));
switch(messageGetMimeType(aMessage)) {
case APPLICATION: |
c6259ac5 |
#if 0
/* strict checking... */ |
b151ef55 |
if((strcasecmp(dtype, "attachment") == 0) || |
0bcad2b1 |
(strcasecmp(cptr, "x-msdownload") == 0) || |
c6259ac5 |
(strcasecmp(cptr, "octet-stream") == 0) || |
0bcad2b1 |
(strcasecmp(dtype, "octet-stream") == 0)) |
b151ef55 |
addAttachment = TRUE;
else { |
c6259ac5 |
cli_dbgmsg("Discarded mixed/application not sent as attachment\n"); |
b151ef55 |
continue;
} |
c6259ac5 |
#endif
addAttachment = TRUE; |
b151ef55 |
break;
case NOMIME: |
7b8fb055 |
if(mainMessage) {
const text *t_line = uuencodeBegin(mainMessage);
if(t_line) {
blob *aBlob;
cli_dbgmsg("Found uuencoded message in multipart/mixed mainMessage\n");
messageSetEncoding(mainMessage, "x-uuencode");
aBlob = messageToBlob(mainMessage);
if(aBlob) {
assert(blobGetFilename(aBlob) != NULL);
blobClose(aBlob);
blobList[numberOfAttachments++] = aBlob;
}
}
if(mainMessage != messageIn)
messageDestroy(mainMessage);
mainMessage = NULL;
} |
b151ef55 |
addToText = TRUE;
if(messageGetBody(aMessage) == NULL)
/*
* No plain text version
*/ |
ffd59a3e |
messageAddLine(aMessage, "No plain text alternative", 1); |
b151ef55 |
assert(messageGetBody(aMessage) != NULL);
break;
case TEXT: |
852e3ce4 |
cli_dbgmsg("Mixed message text part disposition \"%s\"\n",
dtype); |
b151ef55 |
if(strcasecmp(dtype, "attachment") == 0)
addAttachment = TRUE;
else if((*dtype == '\0') || (strcasecmp(dtype, "inline") == 0)) { |
852e3ce4 |
const text *t_line = uuencodeBegin(aMessage);
|
2250ea69 |
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage); |
b151ef55 |
mainMessage = NULL; |
852e3ce4 |
if(t_line) {
cli_dbgmsg("Found uuencoded message in multipart/mixed text portion\n");
messageSetEncoding(aMessage, "x-uuencode");
addAttachment = TRUE;
} else if(strcasecmp(messageGetMimeSubtype(aMessage), "plain") == 0) { |
7584963d |
char *filename; |
852e3ce4 |
/*
* Strictly speaking
* a text/html part is
* not an attachment. We
* pretend it is so that
* we can decode and
* scan it
*/ |
7584963d |
filename = (char *)messageFindArgument(aMessage, "filename");
if(filename == NULL)
filename = (char *)messageFindArgument(aMessage, "name");
if(filename == NULL) {
cli_dbgmsg("Adding part to main message\n");
addToText = TRUE;
} else {
cli_dbgmsg("Treating %s as attachment\n",
filename);
free(filename);
addAttachment = TRUE;
} |
852e3ce4 |
} else { |
b151ef55 |
messageAddArgument(aMessage, "filename=textportion");
addAttachment = TRUE;
}
} else { |
bad123c6 |
cli_warnmsg("Text type %s is not supported\n", dtype); |
b151ef55 |
continue;
}
break;
case MESSAGE: |
2e0f78a6 |
/* Content-Type: message/rfc822 */ |
b151ef55 |
cli_dbgmsg("Found message inside multipart\n"); |
ffd59a3e |
if(encodingLine(aMessage) == NULL) {
assert(aMessage == messages[i]);
messageDestroy(messages[i]);
messages[i] = NULL; |
af852ae0 |
continue; |
ffd59a3e |
} |
2e0f78a6 |
messageAddLineAtTop(aMessage,
"Received: by clamd"); |
4465fb04 |
#ifdef SAVE_TO_DISC
/*
* Save this embedded message
* to a temporary file
*/
saveTextPart(aMessage, dir);
assert(aMessage == messages[i]);
messageDestroy(messages[i]);
messages[i] = NULL;
#else
/*
* Scan in memory, faster but
* is open to DoS attacks when
* many nested levels are
* involved.
*/ |
f54a8635 |
body = parseEmailHeaders(aMessage, rfc821Table); |
bad123c6 |
/*
* We've fininished with the
* original copy of the message,
* so throw that away and
* deal with the encapsulated
* message as a message.
* This can save a lot of memory
*/
assert(aMessage == messages[i]);
messageDestroy(messages[i]);
messages[i] = NULL; |
f54a8635 |
if(body) {
rc = parseEmailBody(body, blobs, nBlobs, NULL, dir, rfc821Table, subtypeTable);
messageDestroy(body);
} |
4465fb04 |
#endif |
b151ef55 |
continue;
case MULTIPART:
/*
* It's a multi part within a multi part
* Run the message parser on this bit, it won't
* be an attachment
*/
cli_dbgmsg("Found multipart inside multipart\n"); |
f54a8635 |
if(aMessage) {
body = parseEmailHeaders(aMessage, rfc821Table);
if(body) { |
74b5c349 |
assert(aMessage == messages[i]);
messageDestroy(messages[i]);
messages[i] = NULL; |
2250ea69 |
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage); |
f54a8635 |
|
6638be41 |
/*t = messageToText(body);
rc = parseEmailBody(body, blobs, nBlobs, t, dir, rfc821Table, subtypeTable);*/
rc = parseEmailBody(body, blobs, nBlobs, aText, dir, rfc821Table, subtypeTable);
/*textDestroy(t);*/ |
74b5c349 |
cli_dbgmsg("Finished recursion\n");
|
f54a8635 |
mainMessage = body;
}
} else {
rc = parseEmailBody(NULL, blobs, nBlobs, NULL, dir, rfc821Table, subtypeTable); |
2250ea69 |
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage); |
f54a8635 |
mainMessage = NULL;
} |
b151ef55 |
continue;
case AUDIO:
case IMAGE: |
c7256385 |
case VIDEO: |
b151ef55 |
/*
* TODO: it may be nice to
* have an option to throw
* away all images and sound
* files for ultra-secure sites
*/
addAttachment = TRUE;
break;
default: |
c7256385 |
cli_warnmsg("Only text and application attachments are supported, type = %d\n", |
b151ef55 |
messageGetMimeType(aMessage));
continue;
}
/*
* It must be either text or
* an attachment. It can't be both
*/
assert(addToText || addAttachment);
assert(!(addToText && addAttachment));
|
7b8fb055 |
if(addToText) { |
b151ef55 |
aText = textAdd(aText, messageGetBody(aMessage)); |
7b8fb055 |
} else if(addAttachment) { |
b151ef55 |
blob *aBlob = messageToBlob(aMessage);
if(aBlob) {
assert(blobGetFilename(aBlob) != NULL); |
c7256385 |
blobClose(aBlob);
blobList[numberOfAttachments++] = aBlob; |
b151ef55 |
}
} |
6638be41 |
assert(aMessage == messages[i]);
messageDestroy(messages[i]);
messages[i] = NULL; |
b151ef55 |
}
if(numberOfAttachments == 0) {
/* No usable attachment was found */ |
7fca6080 |
rc = parseEmailBody(NULL, NULL, 0, aText, dir, rfc821Table, subtypeTable); |
b151ef55 |
break;
} |
c7256385 |
|
b151ef55 |
/*
* Store any existing attachments at the end of
* the list we've just built up
*/ |
c7256385 |
numberOfNewAttachments = 0; |
b151ef55 |
for(i = 0; i < nBlobs; i++) { |
c7256385 |
int j; |
0bcad2b1 |
#ifdef CL_DEBUG |
b151ef55 |
assert(blobs[i]->magic == BLOB); |
0bcad2b1 |
#endif |
c7256385 |
for(j = 0; j < numberOfAttachments; j++)
if(blobcmp(blobs[i], blobList[j]) == 0)
break;
if(j >= numberOfAttachments) {
assert(numberOfAttachments < MAX_ATTACHMENTS);
cli_dbgmsg("Attaching %s to list of blobs\n",
blobGetFilename(blobs[i]));
blobClose(blobs[i]);
blobList[numberOfAttachments++] = blobs[i];
numberOfNewAttachments++;
} else {
cli_warnmsg("Don't scan the same file twice as '%s' and '%s'\n",
blobGetFilename(blobs[i]),
blobGetFilename(blobList[j]));
blobDestroy(blobs[i]);
} |
b151ef55 |
}
|
9a35912c |
/* |
c7256385 |
* If we've found nothing new save what we have
* and quit - that's this part all done.
*/
if(numberOfNewAttachments == 0) {
rc = parseEmailBody(NULL, blobList, numberOfAttachments, NULL, dir, rfc821Table, subtypeTable);
break;
}
/* |
9a35912c |
* If there's only one part of the MULTIPART
* we already have the body to decode so
* there's no more work to do.
*
* This is mostly for the situation where |
bad123c6 |
* broken messages claim to be multipart
* but aren't was causing us to go into |
9a35912c |
* infinite recursion
*/
if(multiparts > 1)
rc = parseEmailBody(mainMessage, blobList, numberOfAttachments, aText, dir, rfc821Table, subtypeTable); |
4e7ca2b1 |
else if(numberOfAttachments == 1) {
(void)saveFile(blobList[0], dir);
blobDestroy(blobList[0]);
} |
b151ef55 |
break;
case DIGEST: |
f54a8635 |
/*
* TODO:
* According to section 5.1.5 RFC2046, the
* default mime type of multipart/digest parts
* is message/rfc822
*/ |
b151ef55 |
case SIGNED:
case PARALLEL:
/*
* If we're here it could be because we have a
* multipart/mixed message, consisting of a
* message followed by an attachment. That
* message itself is a multipart/alternative
* message and we need to dig out the plain
* text part of that alternative
*/ |
0bcad2b1 |
htmltextPart = getTextPart(messages, multiparts);
if(htmltextPart == -1)
htmltextPart = 0; |
b151ef55 |
|
7fca6080 |
rc = parseEmailBody(messages[htmltextPart], blobs, nBlobs, aText, dir, rfc821Table, subtypeTable); |
b151ef55 |
blobArrayDestroy(blobs, nBlobs); |
c6259ac5 |
blobs = NULL;
nBlobs = 0; |
b151ef55 |
break;
default:
/*
* According to section 7.2.6 of RFC1521,
* unrecognised multiparts should be treated as
* multipart/mixed. I don't do this yet so
* that I can see what comes along...
*/
cli_warnmsg("Unsupported multipart format `%s'\n", mimeSubtype);
rc = 0;
}
for(i = 0; i < multiparts; i++) |
bad123c6 |
if(messages[i])
messageDestroy(messages[i]); |
b151ef55 |
if(blobs && (blobsIn == NULL))
puts("arraydestroy");
|
2250ea69 |
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage);
|
c6259ac5 |
if(aText && (textIn == NULL))
textDestroy(aText);
|
6613d595 |
if(messages)
free(messages);
|
b151ef55 |
return rc;
case MESSAGE:
/*
* Check for forbidden encodings
*/
switch(messageGetEncoding(mainMessage)) {
case NOENCODING:
case EIGHTBIT:
case BINARY:
break;
default: |
c6259ac5 |
cli_warnmsg("MIME type 'message' cannot be decoded\n"); |
b151ef55 |
break;
} |
c6259ac5 |
if((strcasecmp(mimeSubtype, "rfc822") == 0) ||
(strcasecmp(mimeSubtype, "delivery-status") == 0)) { |
bad123c6 |
message *m = parseEmailHeaders(mainMessage, rfc821Table);
if(m) {
cli_dbgmsg("Decode rfc822");
|
4465fb04 |
if(mainMessage && (mainMessage != messageIn)) {
messageDestroy(mainMessage);
mainMessage = NULL;
} |
bad123c6 |
if(messageGetBody(m))
rc = parseEmailBody(m, NULL, 0, NULL, dir, rfc821Table, subtypeTable);
messageDestroy(m);
} |
b151ef55 |
break; |
bf8ea488 |
} else if(strcasecmp(mimeSubtype, "partial") == 0) |
b151ef55 |
/* TODO */ |
8a88fb93 |
cli_warnmsg("Content-type message/partial not yet supported\n"); |
bf8ea488 |
else if(strcasecmp(mimeSubtype, "external-body") == 0) |
b151ef55 |
/*
* I don't believe that we should be going
* around the Internet looking for referenced
* files...
*/
cli_warnmsg("Attempt to send Content-type message/external-body trapped"); |
bf8ea488 |
else |
b151ef55 |
cli_warnmsg("Unsupported message format `%s'\n", mimeSubtype);
|
2250ea69 |
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage); |
6613d595 |
if(messages)
free(messages); |
b151ef55 |
return 0;
case APPLICATION: |
0bcad2b1 |
cptr = messageGetMimeSubtype(mainMessage);
|
04421a14 |
/*if((strcasecmp(cptr, "octet-stream") == 0) ||
(strcasecmp(cptr, "x-msdownload") == 0)) {*/
{ |
b151ef55 |
blob *aBlob = messageToBlob(mainMessage);
if(aBlob) {
cli_dbgmsg("Saving main message as attachment %d\n", nBlobs);
assert(blobGetFilename(aBlob) != NULL);
/*
* It's likely that we won't have built
* a set of attachments
*/
if(blobs == NULL)
blobs = blobList; |
c6259ac5 |
for(i = 0; i < nBlobs; i++)
if(blobs[i] == NULL)
break; |
c7256385 |
blobClose(aBlob); |
c6259ac5 |
blobs[i] = aBlob;
if(i == nBlobs) {
nBlobs++;
assert(nBlobs < MAX_ATTACHMENTS);
} |
b151ef55 |
} |
04421a14 |
} /*else
cli_warnmsg("Discarded application not sent as attachment\n");*/ |
b151ef55 |
break;
case AUDIO:
case VIDEO:
case IMAGE:
break;
default:
cli_warnmsg("Message received with unknown mime encoding");
break;
}
}
cli_dbgmsg("%d attachments found\n", nBlobs);
|
0bcad2b1 |
if(nBlobs == 0) {
blob *b;
|
b151ef55 |
/* |
15c8cace |
* No attachments - scan the text portions, often files
* are hidden in HTML code |
b151ef55 |
*/ |
0bcad2b1 |
cli_dbgmsg("%d multiparts found\n", multiparts); |
15c8cace |
for(i = 0; i < multiparts; i++) {
b = messageToBlob(messages[i]); |
b151ef55 |
|
0bcad2b1 |
assert(b != NULL);
|
15c8cace |
cli_dbgmsg("Saving multipart %d, encoded with scheme %d\n",
i, messageGetEncoding(messages[i])); |
0bcad2b1 |
(void)saveFile(b, dir); |
b151ef55 |
blobDestroy(b);
} |
0bcad2b1 |
if(mainMessage) {
/*
* Look for uu-encoded main file
*/ |
5a01973c |
const text *t_line; |
0bcad2b1 |
|
5a01973c |
if((t_line = uuencodeBegin(mainMessage)) != NULL) { |
2227f20e |
cli_dbgmsg("Found uuencoded file\n");
|
15c8cace |
/*
* Main part contains uuencoded section
*/ |
852e3ce4 |
messageSetEncoding(mainMessage, "x-uuencode"); |
0bcad2b1 |
if((b = messageToBlob(mainMessage)) != NULL) {
if((cptr = blobGetFilename(b)) != NULL) {
cli_dbgmsg("Found uuencoded message %s\n", cptr);
(void)saveFile(b, dir);
}
blobDestroy(b);
} |
92915cee |
} else if((encodingLine(mainMessage) != NULL) && |
a750c93c |
((t_line = bounceBegin(mainMessage)) != NULL)) {
const text *t;
static const char encoding[] = "Content-Transfer-Encoding"; |
92915cee |
/*
* Attempt to save the original (unbounced)
* message - clamscan will find that in the
* directory and call us again (with any luck)
* having found an e-mail message to handle |
a750c93c |
*
* This finds a lot of false positives, the
* search that an encoding line is in the
* bounce (i.e. it's after the bounce header)
* helps a bit, but at the expense of scanning
* the entire message. messageAddLine
* optimisation could help here, but needs
* careful thought, do it with line numbers
* would be best, since the current method in
* messageAddLine of checking encoding first
* must remain otherwise non bounce messages
* won't be scanned |
92915cee |
*/ |
a750c93c |
for(t = t_line; t; t = t->t_next)
if((strncasecmp(t->t_text, encoding, sizeof(encoding) - 1) == 0) &&
(strstr(t->t_text, "7bit") == NULL)) |
7b8fb055 |
break; |
a750c93c |
if(t && ((b = textToBlob(t_line, NULL)) != NULL)) { |
92915cee |
cli_dbgmsg("Found a bounce message\n");
saveFile(b, dir);
blobDestroy(b);
} |
5a01973c |
} else { |
b759d5eb |
bool saveIt;
|
5a01973c |
cli_dbgmsg("Not found uuencoded file\n");
|
af852ae0 |
if(messageGetMimeType(mainMessage) == MESSAGE) |
b759d5eb |
/*
* Quick peek, if the encapsulated
* message has no
* content encoding statement don't
* bother saving to scan, it's safe
*/ |
627465e7 |
saveIt = (encodingLine(mainMessage) != NULL);
else if((t_line = encodingLine(mainMessage)) != NULL) {
/*
* Some bounces include the message |
a750c93c |
* body without the headers.
* Unfortunately this generates a
* lot of false positives that a bounce
* has been found when it hasn't. |
627465e7 |
*/ |
92915cee |
if((b = blobCreate()) != NULL) { |
627465e7 |
cli_dbgmsg("Found a bounce message with no header\n");
blobAddData(b, "Received: by clamd\n", 19); |
02c9dc2a |
b = textToBlob(t_line, b); |
627465e7 |
saveFile(b, dir);
blobDestroy(b);
}
saveIt = FALSE; |
a750c93c |
} else |
627465e7 |
/*
* Save the entire text portion, |
a750c93c |
* since it it may be an HTML file with
* a JavaScript virus |
627465e7 |
*/ |
b759d5eb |
saveIt = TRUE;
if(saveIt) {
cli_dbgmsg("Saving text part to scan\n");
saveTextPart(mainMessage, dir);
} |
0bcad2b1 |
} |
68be129f |
} else
rc = (multiparts) ? 1 : 2; /* anything saved? */ |
b151ef55 |
} else {
short attachmentNumber;
for(attachmentNumber = 0; attachmentNumber < nBlobs; attachmentNumber++) {
blob *b = blobs[attachmentNumber];
|
c6259ac5 |
if(b) {
if(!saveFile(b, dir))
break;
blobDestroy(b);
blobs[attachmentNumber] = NULL;
} |
b151ef55 |
}
}
if(aText && (textIn == NULL))
textDestroy(aText);
/* Already done */ |
c6259ac5 |
if(blobs && (blobsIn == NULL))
blobArrayDestroy(blobs, nBlobs); |
b151ef55 |
|
2250ea69 |
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage);
|
6613d595 |
if(messages)
free(messages);
|
68be129f |
cli_dbgmsg("parseEmailBody() returning %d\n", rc); |
b151ef55 |
|
68be129f |
return rc; |
b151ef55 |
}
/*
* Is the current line the start of a new section?
*
* New sections start with --boundary
*/
static int
boundaryStart(const char *line, const char *boundary)
{
/*
* Gibe.B3 is broken it has:
* boundary="---- =_NextPart_000_01C31177.9DC7C000"
* but it's boundaries look like
* ------ =_NextPart_000_01C31177.9DC7C000
* notice the extra '-'
*/ |
0704dad8 |
/*cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary);*/ |
b151ef55 |
if(strstr(line, boundary) != NULL) {
cli_dbgmsg("found %s in %s\n", boundary, line);
return 1;
}
if(*line++ != '-')
return 0;
if(*line++ != '-')
return 0;
return strcasecmp(line, boundary) == 0;
}
/*
* Is the current line the end?
*
* The message ends with with --boundary--
*/
static int
endOfMessage(const char *line, const char *boundary)
{
size_t len;
if(*line++ != '-')
return 0;
if(*line++ != '-')
return 0;
len = strlen(boundary); |
c6259ac5 |
if(strncasecmp(line, boundary, len) != 0)
return 0; |
b151ef55 |
if(strlen(line) != (len + 2))
return 0;
line = &line[len];
if(*line++ != '-')
return 0;
return *line == '-';
}
/*
* Initialise the various lookup tables
*/
static int
initialiseTables(table_t **rfc821Table, table_t **subtypeTable)
{
const struct tableinit *tableinit;
/*
* Initialise the various look up tables
*/
*rfc821Table = tableCreate();
assert(*rfc821Table != NULL);
for(tableinit = rfc821headers; tableinit->key; tableinit++) |
7b8fb055 |
if(tableInsert(*rfc821Table, tableinit->key, tableinit->value) < 0) {
tableDestroy(*rfc821Table); |
b151ef55 |
return -1; |
7b8fb055 |
} |
b151ef55 |
*subtypeTable = tableCreate();
assert(*subtypeTable != NULL);
for(tableinit = mimeSubtypes; tableinit->key; tableinit++)
if(tableInsert(*subtypeTable, tableinit->key, tableinit->value) < 0) {
tableDestroy(*rfc821Table); |
7b8fb055 |
tableDestroy(*subtypeTable); |
b151ef55 |
return -1;
}
return 0;
}
/* |
0bcad2b1 |
* If there's a HTML text version use that, otherwise |
b151ef55 |
* use the first text part, otherwise just use the |
0bcad2b1 |
* first one around. HTML text is most likely to include
* a scripting worm |
b151ef55 |
*
* If we can't find one, return -1
*/
static int
getTextPart(message *const messages[], size_t size)
{
size_t i;
for(i = 0; i < size; i++) {
assert(messages[i] != NULL);
if((messageGetMimeType(messages[i]) == TEXT) && |
0bcad2b1 |
(strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0)) |
b151ef55 |
return (int)i;
}
for(i = 0; i < size; i++)
if(messageGetMimeType(messages[i]) == TEXT)
return (int)i;
return -1;
}
/*
* strip -
* Remove the trailing spaces from a buffer
* Returns it's new length (a la strlen)
*
* len must be int not size_t because of the >= 0 test, it is sizeof(buf)
* not strlen(buf)
*/
static size_t
strip(char *buf, int len)
{
register char *ptr;
register size_t i;
if((buf == NULL) || (len <= 0))
return(0);
i = strlen(buf);
if(len > (int)(i + 1))
return(i);
ptr = &buf[--len];
#if defined(UNIX) || defined(C_LINUX) || defined(C_DARWIN) /* watch - it may be in shared text area */
do
if(*ptr)
*ptr = '\0';
while((--len >= 0) && !isgraph(*--ptr) && (*ptr != '\n') && (*ptr != '\r'));
#else /* more characters can be displayed on DOS */
do
#ifndef REAL_MODE_DOS
if(*ptr) /* C8.0 puts into a text area */
#endif
*ptr = '\0';
while((--len >= 0) && ((*--ptr == '\0') || (isspace((int)*ptr))));
#endif
return((size_t)(len + 1));
}
/*
* strstrip:
* Strip a given string
*/ |
3db105a2 |
size_t |
b151ef55 |
strstrip(char *s)
{
if(s == (char *)NULL)
return(0);
return(strip(s, strlen(s) + 1));
}
/*
* When parsing a MIME header see if this spans more than one line. A
* semi-colon at the end of the line indicates that the MIME information
* is continued on the next line.
*
* Some clients are broken and put white space after the ;
*/
static bool
continuationMarker(const char *line)
{
const char *ptr;
assert(line != NULL);
#ifdef CL_DEBUG
cli_dbgmsg("continuationMarker(%s)\n", line);
#endif
if(strlen(line) == 0)
return FALSE;
ptr = strchr(line, '\0');
assert(ptr != NULL);
|
752c34b9 |
while(ptr > line) |
b151ef55 |
switch(*--ptr) {
case '\n':
case '\r':
case ' ':
case '\t':
continue;
case ';':
return TRUE;
default:
return FALSE;
}
return FALSE;
}
static int
parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg)
{
int type = tableFind(rfc821Table, cmd);
#ifdef CL_THREAD_SAFE
char *strptr;
#endif
char *copy = strdup(arg); |
c6259ac5 |
char *ptr = copy; |
b151ef55 |
cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg); |
181c7548 |
strstrip(copy); |
b151ef55 |
switch(type) {
case CONTENT_TYPE:
/*
* Fix for non RFC1521 compliant mailers
* that send content-type: Text instead
* of content-type: Text/Plain, or
* just simply "Content-Type:"
*/ |
a8c7e017 |
if(arg == NULL) |
b151ef55 |
cli_warnmsg("Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii\n");
else if(strchr(copy, '/') == NULL)
cli_warnmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", copy);
else {
/*
* Some clients are broken and
* put white space after the ;
*/ |
09ccd6e0 |
/*strstrip(copy);*/ |
7e572372 |
if(*arg == '/') {
cli_warnmsg("Content-type '/' received, assuming application/octet-stream\n");
messageSetMimeType(m, "application");
messageSetMimeSubtype(m, "octet-stream"); |
0ada8f3e |
strtok_r(copy, ";", &strptr); |
7e572372 |
} else {
char *s; |
b151ef55 |
|
7e572372 |
messageSetMimeType(m, strtok_r(copy, "/", &strptr));
/*
* Stephen White <stephen@earth.li>
* Some clients put space after
* the mime type but before
* the ;
*/
s = strtok_r(NULL, ";", &strptr);
strstrip(s);
messageSetMimeSubtype(m, s);
} |
b151ef55 |
/* |
0704dad8 |
* Add in all rest of the the arguments.
* e.g. if the header is this:
* Content-Type:', arg='multipart/mixed; boundary=foo
* we find the boundary argument set it |
b151ef55 |
*/ |
0704dad8 |
copy = strtok_r(NULL, "", &strptr);
if(copy)
messageAddArguments(m, copy); |
b151ef55 |
}
break;
case CONTENT_TRANSFER_ENCODING:
messageSetEncoding(m, copy);
break;
case CONTENT_DISPOSITION: |
a980b067 |
arg = strtok_r(copy, ";", &strptr);
if(arg && *arg) {
messageSetDispositionType(m, arg);
messageAddArgument(m, strtok_r(NULL, "\r\n", &strptr));
} |
b151ef55 |
} |
c6259ac5 |
free(ptr); |
b151ef55 |
return type;
}
|
68be129f |
/* |
5a01973c |
* Save the text portion of the message
*/
static void
saveTextPart(message *m, const char *dir)
{
blob *b;
messageAddArgument(m, "filename=textportion");
if((b = messageToBlob(m)) != NULL) {
/*
* Save main part to scan that
*/
cli_dbgmsg("Saving main message, encoded with scheme %d\n",
messageGetEncoding(m));
(void)saveFile(b, dir);
blobDestroy(b);
}
}
/* |
68be129f |
* Save some data as a unique file in the given directory. |
a750c93c |
*
* TODO: don't save archive files if archive scanning is disabled, or
* OLE2 files if that is disabled or pattern match --exclude, but
* we need access to the command line options/clamav.conf here to
* be able to do that |
68be129f |
*/ |
0bcad2b1 |
static bool |
b151ef55 |
saveFile(const blob *b, const char *dir)
{ |
c95ae98b |
const unsigned long nbytes = blobGetDataSize(b); |
1d53a315 |
size_t suffixLen = 0; |
b151ef55 |
int fd; |
0bcad2b1 |
const char *cptr, *suffix; |
701a425d |
char filename[NAME_MAX + 1]; |
b151ef55 |
assert(dir != NULL);
if(nbytes == 0) |
0bcad2b1 |
return TRUE; |
b151ef55 |
cptr = blobGetFilename(b);
if(cptr == NULL) {
cptr = "unknown";
suffix = "";
} else {
/*
* Some programs are broken and use an idea of a ".suffix"
* to determine the file type rather than looking up the
* magic number. CPM has a lot to answer for...
* FIXME: the suffix now appears twice in the filename...
*/
suffix = strrchr(cptr, '.');
if(suffix == NULL)
suffix = ""; |
1d53a315 |
else {
suffixLen = strlen(suffix);
if(suffixLen > 4) {
/* Found a full stop which isn't a suffix */
suffix = "";
suffixLen = 0;
}
} |
b151ef55 |
}
cli_dbgmsg("Saving attachment in %s/%s\n", dir, cptr);
|
0bcad2b1 |
/*
* Allow for very long filenames. We have to truncate them to fit
*/ |
1d53a315 |
snprintf(filename, sizeof(filename) - 1 - suffixLen, "%s/%.*sXXXXXX", dir,
(int)(sizeof(filename) - 9 - suffixLen - strlen(dir)), cptr); |
b151ef55 |
/* |
181c7548 |
* TODO: add a HAVE_MKSTEMP property |
b151ef55 |
*/ |
4b0a2de6 |
#if defined(C_LINUX) || defined(C_BSD) || defined(HAVE_MKSTEMP) || defined(C_SOLARIS) || defined(C_CYGWIN) |
b151ef55 |
fd = mkstemp(filename);
#else
(void)mktemp(filename); |
0dbec6b9 |
fd = open(filename, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600); |
b151ef55 |
#endif
if(fd < 0) { |
181c7548 |
cli_errmsg("Can't create temporary file %s: %s\n", filename, strerror(errno)); |
39ff42ee |
cli_dbgmsg("%lu %d %d\n", suffixLen, sizeof(filename), strlen(filename)); |
0bcad2b1 |
return FALSE; |
b151ef55 |
}
/* |
c6259ac5 |
* Add the suffix back to the end of the filename. Tut-tut, filenames
* should be independant of their usage on UNIX type systems. |
b151ef55 |
*/ |
1d53a315 |
if(suffixLen > 1) { |
6ecba059 |
char stub[NAME_MAX + 1]; |
c6259ac5 |
|
181c7548 |
snprintf(stub, sizeof(stub), "%s%s", filename, suffix); |
a8c7e017 |
#ifdef C_LINUX
rename(stub, filename);
#else |
b151ef55 |
link(stub, filename);
unlink(stub); |
a8c7e017 |
#endif |
b151ef55 |
}
|
b9ec1705 |
cli_dbgmsg("Saving attachment as %s (%lu bytes long)\n", |
b151ef55 |
filename, nbytes);
|
c95ae98b |
if(cli_writen(fd, blobGetData(b), (size_t)nbytes) != nbytes) {
perror(filename);
close(fd);
return FALSE; |
b9ec1705 |
}
|
c6259ac5 |
return (close(fd) >= 0); |
b151ef55 |
} |