e3aaff8e |
/* |
b025d30e |
* Copyright (C) 2002-2006 Nigel Horne <njh@bandsman.co.uk> |
e3aaff8e |
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software |
48b7b4a7 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA. |
e3aaff8e |
*/ |
69c62847 |
static char const rcsid[] = "$Id: mbox.c,v 1.381 2007/02/15 12:26:44 njh Exp $"; |
ea541184 |
#ifdef _MSC_VER
#include <winsock.h> /* only needed in CL_EXPERIMENTAL */
#endif |
6d6e8271 |
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif |
e3aaff8e |
#ifndef CL_DEBUG |
548a5f96 |
#define NDEBUG /* map CLAMAV debug onto standard */ |
e3aaff8e |
#endif
#ifdef CL_THREAD_SAFE |
98cb5cba |
#ifndef _REENTRANT |
e3aaff8e |
#define _REENTRANT /* for Solaris 2.8 */
#endif |
98cb5cba |
#endif |
e3aaff8e |
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <assert.h>
#include <string.h> |
bc6bbeff |
#ifdef HAVE_STRINGS_H |
e3aaff8e |
#include <strings.h> |
bc6bbeff |
#endif |
e3aaff8e |
#include <ctype.h>
#include <time.h>
#include <fcntl.h> |
bc6bbeff |
#ifdef HAVE_SYS_PARAM_H |
d4d14218 |
#include <sys/param.h> |
bc6bbeff |
#endif
#include "clamav.h"
#ifndef C_WINDOWS |
f10460ed |
#include <dirent.h> |
bc6bbeff |
#endif |
a0b21816 |
#include <limits.h> |
093e013c |
#include <signal.h> |
e3aaff8e |
|
cd153266 |
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
|
242bfde8 |
#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
#include <stddef.h>
#endif
|
e2875303 |
#ifdef CL_THREAD_SAFE
#include <pthread.h>
#endif
|
0f7f7682 |
#include "others.h"
#include "str.h"
#include "filetypes.h" |
e3aaff8e |
#include "mbox.h" |
d77ac7de |
#include "dconf.h"
#define DCONF_PHISHING mctx->ctx->dconf->phishing |
e3aaff8e |
|
02927896 |
#ifdef CL_DEBUG |
093e013c |
|
92dbfae7 |
#if defined(C_LINUX) || defined(C_CYGWIN) |
093e013c |
#include <features.h> |
92dbfae7 |
#endif |
093e013c |
|
02927896 |
#if __GLIBC__ == 2 && __GLIBC_MINOR__ >= 1
#define HAVE_BACKTRACE
#endif |
3f3f9085 |
#endif |
02927896 |
#ifdef HAVE_BACKTRACE
#include <execinfo.h>
#include <syslog.h>
static void sigsegv(int sig);
static void print_trace(int use_syslog); |
a9d251e0 |
/*#define SAVE_TMP /* Save the file being worked on in tmp */ |
02927896 |
#endif
|
c2b2d8af |
#if defined(NO_STRTOK_R) || !defined(CL_THREAD_SAFE) |
e3aaff8e |
#undef strtok_r
#undef __strtok_r
#define strtok_r(a,b,c) strtok(a,b)
#endif
|
0cf4cea7 |
#ifdef HAVE_STDBOOL_H |
89d4073d |
#ifdef C_BEOS
#include "SupportDefs.h"
#else |
edee0700 |
#include <stdbool.h> |
89d4073d |
#endif |
edee0700 |
#else
#ifdef FALSE
typedef unsigned char bool;
#else
typedef enum { FALSE = 0, TRUE = 1 } bool; |
e3aaff8e |
#endif
#endif
|
ecc3d638 |
typedef enum {
FAIL,
OK,
OK_ATTACHMENTS_NOT_SAVED, |
69c62847 |
VIRUS,
MAXREC |
ecc3d638 |
} mbox_status;
|
9f2024cc |
#ifndef isblank
#define isblank(c) (((c) == ' ') || ((c) == '\t'))
#endif
|
9b4bb8b7 |
#define SAVE_TO_DISC /* multipart/message are saved in a temporary file */ |
393a6d67 |
|
ad091acf |
#define FOLLOWURLS 5 /*
* Maximum number of URLs scanned in a message |
59d4e1cf |
* part. Helps to prevent Dialer.gen-45 and
* Trojan.WinREG.Zapchast which are often
* dispatched by emails which point to it. If |
ad091acf |
* not defined, don't check any URLs |
f7cd5fbf |
* It is also used to indicate the number of
* 301/302 redirects we wish to follow |
ad091acf |
*/ |
9b4bb8b7 |
|
c52d991e |
#include "htmlnorm.h"
#include "phishcheck.h"
|
ea541184 |
#ifndef C_WINDOWS
#include <netdb.h>
#include <sys/socket.h>
#include <netinet/in.h> |
89d4073d |
#ifndef C_BEOS |
ea541184 |
#include <net/if.h>
#include <arpa/inet.h>
#endif |
89d4073d |
#endif |
f4a02249 |
#ifndef C_WINDOWS
#define closesocket(s) close(s)
#endif
|
ea541184 |
#include <fcntl.h>
#ifndef C_WINDOWS
#include <sys/time.h>
#endif
#ifndef HAVE_IN_PORT_T
typedef unsigned short in_port_t;
#endif
#ifndef HAVE_IN_ADDR_T
typedef unsigned int in_addr_t;
#endif
#if (!defined(EALREADY)) && (defined(WSAEALREADY))
#define EALREADY WSAEALREADY
#endif
#if (!defined(EINPROGRESS)) && (defined(WSAEINPROGRESS))
#define EINPROGRESS WSAEINPROGRESS
#endif
#if (!defined(EISCONN)) && (defined(WSAEISCONN))
#define EISCONN WSAEISCONN
#endif
|
f10460ed |
/* |
6e84cebb |
* Define this to handle messages covered by section 7.3.2 of RFC1341. |
f10460ed |
* This is experimental code so it is up to YOU to (1) ensure it's secure |
cf569541 |
* (2) periodically trim the directory of old files
*
* If you use the load balancing feature of clamav-milter to run clamd on |
fb79b576 |
* more than one machine you must make sure that .../partial is on a shared |
cf569541 |
* network filesystem |
f10460ed |
*/ |
bc6bbeff |
#ifndef C_WINDOWS /* TODO: when opendir() is done */ |
fb79b576 |
#define PARTIAL_DIR |
bc6bbeff |
#endif |
f10460ed |
|
ac9b941b |
/*#define NEW_WORLD*/ |
d72749e0 |
|
8c68fcc1 |
/*#define SCAN_UNENCODED_BOUNCES *//* |
12bd9764 |
* Slows things down a lot and only catches unencoded copies |
214621f2 |
* of EICAR within bounces, which don't matter |
12bd9764 |
*/
|
c1fce7f7 |
typedef struct mbox_ctx {
const char *dir;
const table_t *rfc821Table;
const table_t *subtypeTable;
cli_ctx *ctx;
} mbox_ctx;
|
0f7f7682 |
static int cli_parse_mbox(const char *dir, int desc, cli_ctx *ctx); |
ae5c693a |
static message *parseEmailFile(FILE *fin, const table_t *rfc821Table, const char *firstLine, const char *dir); |
2673dc74 |
static message *parseEmailHeaders(message *m, const table_t *rfc821Table); |
4c60b74f |
static int parseEmailHeader(message *m, const char *line, const table_t *rfc821Table); |
ecc3d638 |
static mbox_status parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int recursion_level); |
e3aaff8e |
static int boundaryStart(const char *line, const char *boundary); |
69c62847 |
static int boundaryEnd(const char *line, const char *boundary); |
e3aaff8e |
static int initialiseTables(table_t **rfc821Table, table_t **subtypeTable);
static int getTextPart(message *const messages[], size_t size);
static size_t strip(char *buf, int len);
static int parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg); |
3f46285b |
static void saveTextPart(message *m, const char *dir, int destroy_text); |
50df4118 |
static char *rfc2047(const char *in); |
d72749e0 |
static char *rfc822comments(const char *in, char *out); |
f10460ed |
#ifdef PARTIAL_DIR
static int rfc1341(message *m, const char *dir);
#endif |
ddea752e |
static bool usefulHeader(int commandNumber, const char *cmd); |
d8142abc |
static char *getline_from_mbox(char *buffer, size_t len, FILE *fin); |
a603478f |
static bool isBounceStart(const char *line); |
47d9cc65 |
static bool exportBinhexMessage(const char *dir, message *m);
static int exportBounceMessage(text *start, const mbox_ctx *ctx); |
ecc3d638 |
static message *do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, mbox_ctx *mctx, message *messageIn, text **tptr, unsigned int recursion_level); |
4f4a8f4a |
static int count_quotes(const char *buf); |
842c7d49 |
static bool next_is_folded_header(const text *t); |
0cf4cea7 |
static bool newline_in_header(const char *line); |
9b4bb8b7 |
|
ecc3d638 |
static blob *getHrefs(message *m, tag_arguments_t *hrefs);
static void hrefs_done(blob *b, tag_arguments_t *hrefs); |
ad422cc9 |
static void checkURLs(message *m, mbox_ctx *mctx, mbox_status *rc, int is_html);
static void do_checkURLs(const char *dir, tag_arguments_t *hrefs); |
c52d991e |
|
17a5e7ea |
#if defined(FOLLOWURLS) && (FOLLOWURLS > 0) |
f121cb96 |
struct arg { |
b362ea45 |
char *url; |
2c7d1edd |
const char *dir; |
f121cb96 |
char *filename; |
f7cd5fbf |
int depth; |
f121cb96 |
};
#ifdef CL_THREAD_SAFE
static void *getURL(void *a);
#else
static void *getURL(struct arg *arg);
#endif |
9b4bb8b7 |
#endif
|
e3aaff8e |
/* Maximum line length according to RFC821 */ |
85bb253e |
#define RFC2821LENGTH 1000 |
e3aaff8e |
/* Hashcodes for our hash tables */
#define CONTENT_TYPE 1
#define CONTENT_TRANSFER_ENCODING 2
#define CONTENT_DISPOSITION 3
/* Mime sub types */
#define PLAIN 1
#define ENRICHED 2
#define HTML 3
#define RICHTEXT 4
#define MIXED 5 |
946a0ad3 |
#define ALTERNATIVE 6 /* RFC1521*/ |
e3aaff8e |
#define DIGEST 7
#define SIGNED 8
#define PARALLEL 9
#define RELATED 10 /* RFC2387 */
#define REPORT 11 /* RFC1892 */ |
c9b8f252 |
#define APPLEDOUBLE 12 /* Handling of this in only noddy for now */ |
393a6d67 |
#define FAX MIXED /*
* RFC3458
* Drafts stated to treat is as mixed if it is
* not known. This disappeared in the final
* version (except when talking about
* voice-message), but it is good enough for us
* since we do no validation of coversheet
* presence etc. (which also has disappeared
* in the final version)
*/ |
9a729c80 |
#define ENCRYPTED 13 /*
* e.g. RFC2015
* Content-Type: multipart/encrypted;
* boundary="nextPart1383049.XCRrrar2yq";
* protocol="application/pgp-encrypted"
*/ |
6e5d95eb |
#define X_BFILE RELATED /*
* BeOS, expert two parts: the file and it's
* attributes. The attributes part comes as
* Content-Type: application/x-be_attribute
* name="foo"
* I can't find where it is defined, any
* pointers would be appreciated. For now
* we treat it as multipart/related
*/ |
c79a2273 |
#define KNOWBOT 14 /* Unknown and undocumented format? */ |
e3aaff8e |
static const struct tableinit {
const char *key;
int value;
} rfc821headers[] = { |
303f9be9 |
/* TODO: make these regular expressions */ |
5c1150ac |
{ "Content-Type", CONTENT_TYPE }, |
a9f386ed |
{ "Content-Transfer-Encoding", CONTENT_TRANSFER_ENCODING },
{ "Content-Disposition", CONTENT_DISPOSITION }, |
e3aaff8e |
{ NULL, 0 } |
15033cb6 |
}, mimeSubtypes[] = { /* see RFC2045 */ |
e3aaff8e |
/* subtypes of Text */
{ "plain", PLAIN },
{ "enriched", ENRICHED },
{ "html", HTML },
{ "richtext", RICHTEXT },
/* subtypes of Multipart */
{ "mixed", MIXED },
{ "alternative", ALTERNATIVE },
{ "digest", DIGEST },
{ "signed", SIGNED },
{ "parallel", PARALLEL },
{ "related", RELATED },
{ "report", REPORT }, |
c9b8f252 |
{ "appledouble", APPLEDOUBLE }, |
393a6d67 |
{ "fax-message", FAX }, |
9a729c80 |
{ "encrypted", ENCRYPTED }, |
6e5d95eb |
{ "x-bfile", X_BFILE }, /* BeOS */ |
c79a2273 |
{ "knowbot", KNOWBOT }, /* ??? */
{ "knowbot-metadata", KNOWBOT }, /* ??? */
{ "knowbot-code", KNOWBOT }, /* ??? */
{ "knowbot-state", KNOWBOT }, /* ??? */ |
e3aaff8e |
{ NULL, 0 }
}; |
e2875303 |
#ifdef CL_THREAD_SAFE
static pthread_mutex_t tables_mutex = PTHREAD_MUTEX_INITIALIZER;
#endif |
e3aaff8e |
|
bac883ff |
#ifndef O_BINARY
#define O_BINARY 0
#endif
|
d72749e0 |
#ifdef NEW_WORLD |
f24bf390 |
|
dd7f118f |
#include "matcher.h"
|
c6023c3f |
#undef PARTIAL_DIR
|
f24bf390 |
#if HAVE_MMAP
#if HAVE_SYS_MMAN_H
#include <sys/mman.h>
#else /* HAVE_SYS_MMAN_H */
#undef HAVE_MMAP
#endif |
0c8e0638 |
#else /*HAVE_MMAP*/
#undef NEW_WORLD
#endif |
f24bf390 |
#endif
|
0c8e0638 |
#ifdef NEW_WORLD |
ceabee13 |
/*
* Files larger than this are scanned with the old method, should be
* StreamMaxLength, I guess
* If NW_MAX_FILE_SIZE is not defined, all files go through the
* new method. This definition is for machines very tight on RAM, or
* with large StreamMaxLength values
*/
#define MAX_ALLOCATION 134217728 /* see libclamav/others.c */
#define NW_MAX_FILE_SIZE MAX_ALLOCATION
|
d72749e0 |
struct scanlist { |
ceabee13 |
const char *start;
size_t size;
encoding_type decoder; /* only BASE64 and QUOTEDPRINTABLE for now */
struct scanlist *next; |
d72749e0 |
};
|
ceabee13 |
static struct map {
const char *offset; /* sorted */
const char *word;
struct map *next;
} *map, *tail;
|
dd7f118f |
static int save_text(cli_ctx *ctx, const char *dir, const char *start, size_t len); |
ceabee13 |
static void create_map(const char *begin, const char *end);
static void add_to_map(const char *offset, const char *word);
static const char *find_in_map(const char *offset, const char *word);
static void free_map(void);
|
f24bf390 |
/*
* This could be the future. Instead of parsing and decoding it just decodes. |
d72749e0 |
* |
f24bf390 |
* USE IT AT YOUR PERIL, a large number of viruses are not detected with this
* method, possibly because the decoded files must be exact and not have
* extra data at the start or end, which this code will produce. |
f003b79e |
* |
d72749e0 |
* Currently only supports base64 and quoted-printable
*
* You may also see a lot of warnings. For the moment it falls back to old
* world mode if it doesn't know what to do - that'll be removed.
* The code is untidy...
*
* FIXME: Some mailbox scans are slower with this method. I suspect that it's
* because the scan can proceed to the end of the file rather than the end
* of the attachment which can mean than later emails are scanned many times |
0b28fbb8 |
* |
bc0fc102 |
* FIXME: quoted printable doesn't know when to stop, so size related virus
* matching breaks
* |
ceabee13 |
* TODO: Fall through to cli_parse_mbox() too often |
c6023c3f |
* |
0c8e0638 |
* TODO: Add support for systems without mmap() |
c6023c3f |
*
* TODO: partial_dir fall through |
a05e6d45 |
*
* FIXME: Some EICAR gets through |
f24bf390 |
*/
int |
0f7f7682 |
cli_mbox(const char *dir, int desc, cli_ctx *ctx) |
f24bf390 |
{ |
0c8e0638 |
char *start, *ptr, *line;
const char *last, *p, *q; |
6b1cf491 |
size_t size; |
f24bf390 |
struct stat statb;
message *m;
fileblob *fb; |
dd7f118f |
int ret = CL_CLEAN; |
af7dfe53 |
int wasAlloced; |
d72749e0 |
struct scanlist *scanlist, *scanelem; |
f24bf390 |
|
7c56033f |
if(dir == NULL) {
cli_warnmsg("cli_mbox called with NULL dir\n");
return CL_ENULLARG;
} |
f24bf390 |
if(fstat(desc, &statb) < 0)
return CL_EOPEN;
size = statb.st_size;
if(size == 0)
return CL_CLEAN;
|
ceabee13 |
#ifdef NW_MAX_FILE_SIZE
if(size > NW_MAX_FILE_SIZE) |
0f7f7682 |
return cli_parse_mbox(dir, desc, ctx); |
ceabee13 |
#endif |
f24bf390 |
|
c6023c3f |
/*cli_warnmsg("NEW_WORLD is new code - use at your own risk.\n");*/ |
bc0fc102 |
#ifdef PARTIAL_DIR
cli_warnmsg("PARTIAL_DIR doesn't work in the NEW_WORLD yet\n");
#endif |
6862efc7 |
|
d72749e0 |
start = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, 0);
if(start == MAP_FAILED) |
f24bf390 |
return CL_EMEM;
cli_dbgmsg("mmap'ed mbox\n");
|
d72749e0 |
ptr = cli_malloc(size);
if(ptr) {
memcpy(ptr, start, size); |
af7dfe53 |
munmap(start, size); |
d72749e0 |
start = ptr; |
ceabee13 |
wasAlloced = 1; |
af7dfe53 |
} else
wasAlloced = 0;
|
ceabee13 |
/* last points to the last *valid* address in the array */
last = &start[size - 1];
create_map(start, last);
|
d72749e0 |
scanelem = scanlist = NULL;
q = start; |
ceabee13 |
/*
* FIXME: mismatch of const char * and char * here and in later calls
* to find_in_map()
*/
while((p = find_in_map(q, "base64")) != NULL) { |
d72749e0 |
cli_dbgmsg("Found base64\n");
if(scanelem) {
scanelem->next = cli_malloc(sizeof(struct scanlist));
scanelem = scanelem->next;
} else
scanlist = scanelem = cli_malloc(sizeof(struct scanlist));
scanelem->next = NULL;
scanelem->decoder = BASE64;
q = scanelem->start = &p[6]; |
ceabee13 |
if(((p = find_in_map(q, "\nFrom ")) != NULL) ||
((p = find_in_map(q, "base64")) != NULL) ||
((p = find_in_map(q, "quoted-printable")) != NULL)) { |
5198de85 |
scanelem->size = (size_t)(p - q); |
d72749e0 |
q = p; |
0b28fbb8 |
} else {
scanelem->size = (size_t)(last - scanelem->start) + 1;
break;
} |
6b1cf491 |
cli_dbgmsg("base64: last %u q %u\n", (unsigned int)last, (unsigned int)q); |
d72749e0 |
assert(scanelem->size <= size); |
f24bf390 |
} |
c6023c3f |
|
d72749e0 |
q = start; |
ceabee13 |
while((p = find_in_map(q, "quoted-printable")) != NULL) { |
0b28fbb8 |
if(p != q)
switch(p[-1]) {
case ' ':
case ':':
case '=': /* wrong but allow it */
break;
default:
q = &p[16];
cli_dbgmsg("Ignore quoted-printable false positive\n");
continue; /* false positive */
} |
72cf1461 |
|
d72749e0 |
cli_dbgmsg("Found quoted-printable\n"); |
c6023c3f |
#ifdef notdef
/*
* The problem with quoted printable is recognising when to stop
* parsing
*/ |
d72749e0 |
if(scanelem) {
scanelem->next = cli_malloc(sizeof(struct scanlist));
scanelem = scanelem->next;
} else
scanlist = scanelem = cli_malloc(sizeof(struct scanlist));
scanelem->next = NULL;
scanelem->decoder = QUOTEDPRINTABLE;
q = scanelem->start = &p[16]; |
6b1cf491 |
cli_dbgmsg("qp: last %u q %u\n", (unsigned int)last, (unsigned int)q); |
ceabee13 |
if(((p = find_in_map(q, "\nFrom ")) != NULL) ||
((p = find_in_map(q, "quoted-printable")) != NULL) ||
((p = find_in_map(q, "base64")) != NULL)) { |
5198de85 |
scanelem->size = (size_t)(p - q); |
d72749e0 |
q = p; |
0b28fbb8 |
cli_dbgmsg("qp: scanelem->size = %u\n", scanelem->size);
} else {
scanelem->size = (size_t)(last - scanelem->start) + 1;
break;
} |
d72749e0 |
assert(scanelem->size <= size); |
c6023c3f |
#else
if(wasAlloced)
free(start);
else
munmap(start, size);
|
ceabee13 |
free_map(); |
0f7f7682 |
return cli_parse_mbox(dir, desc, ctx); |
c6023c3f |
#endif |
f24bf390 |
}
|
d72749e0 |
if(scanlist == NULL) {
const struct tableinit *tableinit;
bool anyHeadersFound = FALSE; |
0b28fbb8 |
bool hasuuencode = FALSE; |
c6023c3f |
cli_file_t type; |
d72749e0 |
/* FIXME: message: There could of course be no decoder needed... */
for(tableinit = rfc821headers; tableinit->key; tableinit++) |
ceabee13 |
if(find_in_map(start, tableinit->key)) { |
d72749e0 |
anyHeadersFound = TRUE;
break;
}
|
dd7f118f |
if((!anyHeadersFound) &&
((p = find_in_map(start, "\nbegin ")) != NULL) &&
(isuuencodebegin(++p))) |
0b28fbb8 |
/* uuencoded part */
hasuuencode = TRUE; |
dd7f118f |
else {
cli_dbgmsg("Nothing encoded, looking for a text part to save\n");
ret = save_text(ctx, dir, start, size);
if(wasAlloced)
free(start);
else
munmap(start, size);
free_map();
if(ret != CL_EFORMAT)
return ret;
ret = CL_CLEAN;
} |
0b28fbb8 |
|
ceabee13 |
free_map();
|
c6023c3f |
type = cli_filetype(start, size);
if((type == CL_TYPE_UNKNOWN_TEXT) &&
(strncmp(start, "Microsoft Mail Internet Headers", 31) == 0)) |
ceabee13 |
type = CL_TYPE_MAIL; |
c6023c3f |
|
af7dfe53 |
if(wasAlloced)
free(start);
else
munmap(start, size); |
f24bf390 |
|
0b28fbb8 |
if(anyHeadersFound || hasuuencode) {
/* TODO: reduce the number of falls through here */ |
c6023c3f |
if(hasuuencode) |
dd7f118f |
/* TODO: fast track visa */
cli_warnmsg("New world - fall back to old uudecoder\n"); |
c6023c3f |
else |
dd7f118f |
cli_warnmsg("cli_mbox: unknown encoder, type %d\n", type); |
c6023c3f |
if(type == CL_TYPE_MAIL) |
0f7f7682 |
return cli_parse_mbox(dir, desc, ctx); |
c6023c3f |
cli_dbgmsg("Unknown filetype %d, return CLEAN\n", type);
return CL_CLEAN; |
f003b79e |
} |
0b28fbb8 |
|
dd7f118f |
#if 0 /* I don't believe this is needed any more */ |
ff07f243 |
/*
* The message could be a plain text phish
* FIXME: Can't get to the option whether we are looking for
* phishes or not, so assume we are, this slows things a
* lot
* Should be
* if((type == CL_TYPE_MAIL) && (!(no-phishing))
*/
if(type == CL_TYPE_MAIL) |
0f7f7682 |
return cli_parse_mbox(dir, desc, ctx); |
dd7f118f |
#endif |
ff07f243 |
cli_dbgmsg("cli_mbox: I believe it's plain text (type == %d) which must be clean\n",
type); |
d72749e0 |
return CL_CLEAN;
} |
ceabee13 |
#if 0
if(wasAlloced) {
const char *max = NULL;
for(scanelem = scanlist; scanelem; scanelem = scanelem->next) {
const char *end = &scanelem->start[scanelem->size];
if(end > max)
max = end;
}
if(max < last)
printf("could free %d bytes\n", (int)(last - max));
}
#endif |
d72749e0 |
for(scanelem = scanlist; scanelem; scanelem = scanelem->next) {
if(scanelem->decoder == BASE64) { |
ceabee13 |
const char *b64start = scanelem->start;
size_t b64size = scanelem->size; |
d72749e0 |
cli_dbgmsg("b64size = %lu\n", b64size); |
012682d0 |
while((*b64start != '\n') && (*b64start != '\r')) { |
f003b79e |
b64start++;
b64size--; |
d72749e0 |
}
/*
* Look for the end of the headers
*/
while(b64start < last) {
if(*b64start == ';') { |
f003b79e |
b64start++;
b64size--; |
012682d0 |
} else if((memcmp(b64start, "\n\n", 2) == 0) ||
(memcmp(b64start, "\r\r", 2) == 0)) {
b64start += 2;
b64size -= 2;
break;
} else if(memcmp(b64start, "\r\n\r\n", 4) == 0) {
b64start += 4;
b64size -= 4;
break; |
c6023c3f |
} else if(memcmp(b64start, "\n \n", 3) == 0) {
/*
* Some viruses are broken and have
* one space character at the end of
* the headers
*/
b64start += 3;
b64size -= 3;
break;
} else if(memcmp(b64start, "\r\n \r\n", 5) == 0) {
/*
* Some viruses are broken and have
* one space character at the end of
* the headers
*/
b64start += 5;
b64size -= 5;
break; |
f003b79e |
} |
5b76248c |
b64start++; |
d72749e0 |
b64size--; |
5b76248c |
} |
f003b79e |
|
d72749e0 |
if(b64size > 0L) |
0b28fbb8 |
while((!isalnum(*b64start)) && (*b64start != '/')) { |
d72749e0 |
if(b64size-- == 0L)
break;
b64start++;
}
if(b64size > 0L) { |
c04baf9e |
int lastline; |
a9ecf619 |
char *tmpfilename;
unsigned char *uptr;
|
d72749e0 |
cli_dbgmsg("cli_mbox: decoding %ld base64 bytes\n", b64size); |
c04baf9e |
if((fb = fileblobCreate()) == NULL) { |
dd7f118f |
free_map(); |
a9ecf619 |
if(wasAlloced)
free(start);
else
munmap(start, size);
return CL_EMEM;
}
|
c04baf9e |
tmpfilename = cli_gentemp(dir); |
dd7f118f |
if(tmpfilename == NULL) {
free_map(); |
a9ecf619 |
if(wasAlloced)
free(start);
else
munmap(start, size); |
c04baf9e |
fileblobDestroy(fb); |
a9ecf619 |
|
c04baf9e |
return CL_EMEM; |
a9ecf619 |
} |
c04baf9e |
fileblobSetFilename(fb, dir, tmpfilename);
free(tmpfilename); |
a9ecf619 |
|
d72749e0 |
line = NULL; |
f003b79e |
|
d72749e0 |
m = messageCreate(); |
c6023c3f |
if(m == NULL) { |
dd7f118f |
free_map(); |
c6023c3f |
if(wasAlloced)
free(start);
else
munmap(start, size); |
c04baf9e |
fileblobDestroy(fb); |
c6023c3f |
|
d72749e0 |
return CL_EMEM; |
c6023c3f |
} |
d72749e0 |
messageSetEncoding(m, "base64"); |
f003b79e |
|
a603478f |
messageSetCTX(m, ctx);
fileblobSetCTX(fb, ctx);
|
012682d0 |
lastline = 0; |
0b28fbb8 |
do { |
a9ecf619 |
int length = 0, datalen; |
012682d0 |
char *newline, *equal; |
a9ecf619 |
unsigned char *bigbuf, *data;
unsigned char smallbuf[1024]; |
ff07f243 |
const char *cptr; |
f003b79e |
|
d72749e0 |
/*printf("%ld: ", b64size); fflush(stdout);*/ |
f003b79e |
|
ff07f243 |
for(cptr = b64start; b64size && (*cptr != '\n') && (*cptr != '\r'); cptr++) { |
d72749e0 |
length++;
--b64size;
} |
f003b79e |
|
d72749e0 |
/*printf("%d: ", length); fflush(stdout);*/ |
f003b79e |
|
468c0f21 |
newline = cli_realloc(line, length + 1);
if(newline == NULL)
break;
line = newline; |
f003b79e |
|
d72749e0 |
memcpy(line, b64start, length);
line[length] = '\0'; |
f003b79e |
|
012682d0 |
equal = strchr(line, '=');
if(equal) {
lastline++;
*equal = '\0';
} |
d72749e0 |
/*puts(line);*/ |
f003b79e |
|
a9ecf619 |
#if 0 |
d72749e0 |
if(messageAddStr(m, line) < 0)
break; |
a9ecf619 |
#endif |
c04baf9e |
if(length >= (int)sizeof(smallbuf)) { |
a9ecf619 |
datalen = length + 2;
data = bigbuf = cli_malloc(datalen);
if(data == NULL)
break;
} else {
bigbuf = NULL;
data = smallbuf;
datalen = sizeof(data) - 1;
}
uptr = decodeLine(m, BASE64, line, data, datalen);
if(uptr == NULL) {
if(bigbuf)
free(bigbuf);
break;
}
/*cli_dbgmsg("base64: write %u bytes\n", (size_t)(uptr - data));*/ |
c04baf9e |
datalen = fileblobAddData(fb, data, (size_t)(uptr - data)); |
a9ecf619 |
if(bigbuf)
free(bigbuf); |
f003b79e |
|
c04baf9e |
if(datalen < 0)
break; |
d4a7dd82 |
if(fileblobContainsVirus(fb)) |
01c99f53 |
break; |
c04baf9e |
|
ff07f243 |
if((b64size > 0) && (*cptr == '\r')) {
b64start = ++cptr; |
012682d0 |
--b64size;
} |
ff07f243 |
if((b64size > 0) && (*cptr == '\n')) {
b64start = ++cptr; |
d72749e0 |
--b64size;
} |
012682d0 |
if(lastline) |
d72749e0 |
break; |
0b28fbb8 |
} while(b64size > 0L);
|
a9ecf619 |
if(m->base64chars) {
unsigned char data[4];
uptr = base64Flush(m, data);
if(uptr) {
/*cli_dbgmsg("base64: flush %u bytes\n", (size_t)(uptr - data));*/ |
c04baf9e |
(void)fileblobAddData(fb, data, (size_t)(uptr - data)); |
a9ecf619 |
}
} |
c04baf9e |
if(fb)
fileblobDestroy(fb);
else |
d4a7dd82 |
ret = -1; |
a9ecf619 |
messageDestroy(m);
free(line); |
f24bf390 |
} |
d72749e0 |
} else if(scanelem->decoder == QUOTEDPRINTABLE) { |
ceabee13 |
const char *quotedstart = scanelem->start;
size_t quotedsize = scanelem->size; |
f003b79e |
|
d72749e0 |
cli_dbgmsg("quotedsize = %lu\n", quotedsize);
while(*quotedstart != '\n') { |
f003b79e |
quotedstart++;
quotedsize--; |
d72749e0 |
}
/*
* Look for the end of the headers
*/
while(quotedstart < last) {
if(*quotedstart == ';') { |
f003b79e |
quotedstart++;
quotedsize--; |
012682d0 |
} else if((*quotedstart == '\n') || (*quotedstart == '\r')) { |
d72749e0 |
quotedstart++;
quotedsize--;
if((*quotedstart == '\n') || (*quotedstart == '\r')) {
quotedstart++;
quotedsize--;
break;
} |
f003b79e |
} |
d72749e0 |
quotedstart++;
quotedsize--; |
f003b79e |
} |
f24bf390 |
|
d72749e0 |
while(!isalnum(*quotedstart)) {
quotedstart++;
quotedsize--;
} |
f24bf390 |
|
d72749e0 |
if(quotedsize > 0L) {
cli_dbgmsg("cli_mbox: decoding %ld quoted-printable bytes\n", quotedsize); |
f24bf390 |
|
d72749e0 |
m = messageCreate(); |
c6023c3f |
if(m == NULL) { |
dd7f118f |
free_map(); |
c6023c3f |
if(wasAlloced)
free(start);
else
munmap(start, size);
|
d72749e0 |
return CL_EMEM; |
c6023c3f |
} |
d72749e0 |
messageSetEncoding(m, "quoted-printable"); |
a603478f |
messageSetCTX(m, ctx); |
f24bf390 |
|
d72749e0 |
line = NULL; |
f24bf390 |
|
0b28fbb8 |
do { |
d72749e0 |
int length = 0; |
468c0f21 |
char *newline; |
ff07f243 |
const char *cptr; |
f24bf390 |
|
d72749e0 |
/*printf("%ld: ", quotedsize); fflush(stdout);*/ |
f24bf390 |
|
ff07f243 |
for(cptr = quotedstart; quotedsize && (*cptr != '\n') && (*cptr != '\r'); cptr++) { |
d72749e0 |
length++;
--quotedsize;
} |
f24bf390 |
|
d72749e0 |
/*printf("%d: ", length); fflush(stdout);*/ |
f24bf390 |
|
468c0f21 |
newline = cli_realloc(line, length + 1);
if(newline == NULL)
break;
line = newline; |
f24bf390 |
|
d72749e0 |
memcpy(line, quotedstart, length);
line[length] = '\0'; |
f24bf390 |
|
d72749e0 |
/*puts(line);*/ |
f24bf390 |
|
d72749e0 |
if(messageAddStr(m, line) < 0)
break; |
f003b79e |
|
ff07f243 |
if((quotedsize > 0) && (*cptr == '\r')) {
quotedstart = ++cptr; |
012682d0 |
--quotedsize;
} |
ff07f243 |
if((quotedsize > 0) && (*cptr == '\n')) {
quotedstart = ++cptr; |
d72749e0 |
--quotedsize;
} |
0b28fbb8 |
} while(quotedsize > 0L);
|
d72749e0 |
free(line); |
2673dc74 |
fb = messageToFileblob(m, dir, 1); |
d72749e0 |
messageDestroy(m); |
f003b79e |
|
d4a7dd82 |
if(fb) |
d72749e0 |
fileblobDestroy(fb); |
d4a7dd82 |
else
ret = -1; |
d72749e0 |
} |
f24bf390 |
}
} |
d72749e0 |
scanelem = scanlist;
|
dd7f118f |
/*
* There could be a phish in the plain text part, so save that
* FIXME: Can't get to the option whether we are looking for
* phishes or not, so assume we are, this slows things a
* lot
* Should be
* if((type == CL_TYPE_MAIL) && (!(no-phishing))
*/
ret = save_text(ctx, dir, start, size);
free_map();
|
d72749e0 |
while(scanelem) {
struct scanlist *n = scanelem->next;
free(scanelem);
scanelem = n;
} |
f24bf390 |
|
af7dfe53 |
if(wasAlloced)
free(start);
else
munmap(start, size); |
f24bf390 |
|
af7dfe53 |
/*
* FIXME: Need to run cl_scandir() here and return that value
*/ |
a9ecf619 |
cli_dbgmsg("cli_mbox: ret = %d\n", ret); |
dd7f118f |
if(ret != CL_EFORMAT)
return ret; |
f003b79e |
|
dd7f118f |
cli_warnmsg("New world - don't know what to do - fall back to old world\n"); |
d72749e0 |
/* Fall back for now */ |
0b28fbb8 |
lseek(desc, 0L, SEEK_SET); |
0f7f7682 |
return cli_parse_mbox(dir, desc, ctx); |
f24bf390 |
} |
ceabee13 |
|
dd7f118f |
/*
* Save a text part - it could contain phish or jscript
*/
static int
save_text(cli_ctx *ctx, const char *dir, const char *start, size_t len)
{
const char *p;
if((p = find_in_map(start, "\n\n")) || (p = find_in_map(start, "\r\n\r\n"))) {
const char *q;
fileblob *fb;
char *tmpfilename;
if(((q = find_in_map(start, "base64")) == NULL) &&
((q = find_in_map(start, "quoted_printable")) == NULL)) {
cli_dbgmsg("It's all plain text!\n");
if(*p == '\r')
p += 4;
else
p += 2;
len -= (p - start);
} else if(((q = find_in_map(p, "\nFrom ")) == NULL) &&
((q = find_in_map(p, "base64")) == NULL) &&
((q = find_in_map(p, "quoted-printable")) == NULL))
cli_dbgmsg("Can't find end of plain text - assume it's all\n");
else
len = (size_t)(q - p);
if(len < 5) {
cli_dbgmsg("save_text: Too small\n");
return CL_EFORMAT;
}
if(ctx->scanned)
*ctx->scanned += len / CL_COUNT_PRECISION;
/*
* This doesn't work, cli_scanbuff isn't designed to be used
* in this way. It gets the "filetype" wrong and then
* doesn't scan correctly
*/ |
c3045077 |
if(cli_scanbuff((char *)p, len, ctx->virname, ctx->engine, CL_TYPE_UNKNOWN_DATA) == CL_VIRUS) { |
dd7f118f |
cli_dbgmsg("save_text: found %s\n", *ctx->virname);
return CL_VIRUS;
}
fb = fileblobCreate();
if(fb == NULL)
return CL_EMEM;
tmpfilename = cli_gentemp(dir);
if(tmpfilename == NULL) {
fileblobDestroy(fb);
return CL_ETMPFILE;
}
cli_dbgmsg("save plain bit to %s, %u bytes\n",
tmpfilename, len);
fileblobSetFilename(fb, dir, tmpfilename);
free(tmpfilename);
(void)fileblobAddData(fb, (const unsigned char *)p, len);
fileblobDestroy(fb);
return CL_SUCCESS;
}
cli_dbgmsg("No text part found to save\n");
return CL_EFORMAT;
}
|
ceabee13 |
static void
create_map(const char *begin, const char *end)
{
const struct wordlist {
const char *word;
int len;
} wordlist[] = {
{ "base64", 6 },
{ "quoted-printable", 16 },
{ "\nbegin ", 7 }, |
dd7f118f |
{ "\nFrom ", 6 },
{ "\n\n", 2 },
{ "\r\n\r\n", 4 }, |
ceabee13 |
{ NULL, 0 }
};
if(map) {
cli_warnmsg("create_map called without free_map\n");
free_map();
}
while(begin < end) {
const struct wordlist *word;
for(word = wordlist; word->word; word++) {
if((end - begin) < word->len)
continue;
if(strncasecmp(begin, word->word, word->len) == 0) {
add_to_map(begin, word->word);
break;
}
}
begin++;
}
}
/* To sort map, assume 'offset' is presented in sorted order */
static void
add_to_map(const char *offset, const char *word)
{
if(map) {
tail->next = cli_malloc(sizeof(struct map)); /* FIXME: verify */
tail = tail->next;
} else
map = tail = cli_malloc(sizeof(struct map)); /* FIXME: verify */
tail->offset = offset;
tail->word = word;
tail->next = NULL;
}
static const char *
find_in_map(const char *offset, const char *word)
{
const struct map *item;
for(item = map; item; item = item->next)
if(item->offset >= offset)
if(strcasecmp(word, item->word) == 0)
return item->offset;
return NULL;
}
static void
free_map(void)
{
while(map) {
struct map *next = map->next;
free(map);
map = next;
}
map = NULL;
}
#else /*!NEW_WORLD*/ |
f24bf390 |
int |
0f7f7682 |
cli_mbox(const char *dir, int desc, cli_ctx *ctx) |
f24bf390 |
{ |
7c56033f |
if(dir == NULL) {
cli_warnmsg("cli_mbox called with NULL dir\n");
return CL_ENULLARG;
} |
0f7f7682 |
return cli_parse_mbox(dir, desc, ctx); |
f24bf390 |
}
#endif
|
e3aaff8e |
/*
* TODO: when signal handling is added, need to remove temp files when a |
ef822cfc |
* signal is received |
e3aaff8e |
* TODO: add option to scan in memory not via temp files, perhaps with a |
1bfbedd4 |
* named pipe or memory mapped file, though this won't work on big e-mails
* containing many levels of encapsulated messages - it'd just take too much
* RAM |
049a18b9 |
* TODO: parse .msg format files |
c9b8f252 |
* TODO: fully handle AppleDouble format, see |
ef822cfc |
* http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf |
89670d69 |
* TODO: ensure parseEmailHeaders is always called before parseEmailBody
* TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody |
9f2024cc |
* TODO: Handle unepected NUL bytes in header lines which stop strcmp()s:
* e.g. \0Content-Type: application/binary; |
e3aaff8e |
*/ |
f24bf390 |
static int |
0f7f7682 |
cli_parse_mbox(const char *dir, int desc, cli_ctx *ctx) |
e3aaff8e |
{ |
049a18b9 |
int retcode, i; |
ddea752e |
message *body; |
e3aaff8e |
FILE *fd; |
85bb253e |
char buffer[RFC2821LENGTH + 1]; |
c1fce7f7 |
mbox_ctx mctx; |
c7b69776 |
#ifdef HAVE_BACKTRACE |
02927896 |
void (*segv)(int);
#endif |
393a6d67 |
static table_t *rfc821, *subtype; |
a9d251e0 |
#ifdef SAVE_TMP |
5cdb01fc |
char tmpfilename[16];
int tmpfd;
#endif |
e3aaff8e |
|
4c586bc8 |
#ifdef NEW_WORLD
cli_dbgmsg("fall back to old world\n");
#else |
e3aaff8e |
cli_dbgmsg("in mbox()\n"); |
4c586bc8 |
#endif |
e3aaff8e |
|
049a18b9 |
i = dup(desc);
if((fd = fdopen(i, "rb")) == NULL) {
cli_errmsg("Can't open descriptor %d\n", desc);
close(i); |
ef822cfc |
return CL_EOPEN; |
049a18b9 |
} |
59d4e1cf |
rewind(fd); /* bug 240 */ |
a9d251e0 |
#ifdef SAVE_TMP |
5cdb01fc |
/*
* Copy the incoming mail for debugging, so that if it falls over
* we have a copy of the offending email. This is debugging code
* that you shouldn't of course install in a live environment. I am
* not interested in hearing about security issues with this section
* of the parser.
*/
strcpy(tmpfilename, "/tmp/mboxXXXXXX");
tmpfd = mkstemp(tmpfilename);
if(tmpfd < 0) {
perror(tmpfilename);
cli_errmsg("Can't make debugging file\n");
} else {
FILE *tmpfp = fdopen(tmpfd, "w");
if(tmpfp) {
while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL)
fputs(buffer, tmpfp);
fclose(tmpfp);
rewind(fd);
} else
cli_errmsg("Can't fdopen debugging file\n");
}
#endif |
4b187745 |
if(fgets(buffer, sizeof(buffer) - 1, fd) == NULL) { |
049a18b9 |
/* empty message */
fclose(fd); |
a9d251e0 |
#ifdef SAVE_TMP |
5cdb01fc |
unlink(tmpfilename);
#endif |
ef822cfc |
return CL_CLEAN; |
049a18b9 |
} |
e2875303 |
#ifdef CL_THREAD_SAFE
pthread_mutex_lock(&tables_mutex);
#endif |
393a6d67 |
if(rfc821 == NULL) {
assert(subtype == NULL); |
51fc2aa8 |
|
393a6d67 |
if(initialiseTables(&rfc821, &subtype) < 0) {
rfc821 = NULL;
subtype = NULL; |
e2875303 |
#ifdef CL_THREAD_SAFE
pthread_mutex_unlock(&tables_mutex);
#endif |
51fc2aa8 |
fclose(fd); |
a9d251e0 |
#ifdef SAVE_TMP |
5cdb01fc |
unlink(tmpfilename);
#endif |
ef822cfc |
return CL_EMEM; |
51fc2aa8 |
} |
e3aaff8e |
} |
e2875303 |
#ifdef CL_THREAD_SAFE
pthread_mutex_unlock(&tables_mutex);
#endif |
e3aaff8e |
|
3f3f9085 |
#ifdef HAVE_BACKTRACE |
02927896 |
segv = signal(SIGSEGV, sigsegv);
#endif
|
a603478f |
retcode = CL_SUCCESS; |
e791b5ac |
body = NULL;
|
c1fce7f7 |
mctx.dir = dir;
mctx.rfc821Table = rfc821;
mctx.subtypeTable = subtype;
mctx.ctx = ctx;
|
89670d69 |
/* |
45dc1456 |
* Is it a UNIX style mbox with more than one |
89670d69 |
* mail message, or just a single mail message? |
45dc1456 |
*
* TODO: It would be better if we called cli_scandir here rather than
* in cli_scanmail. Then we could improve the way mailboxes with more
* than one message is handled, e.g. stopping parsing when an infected
* message is stopped, and giving a better indication of which message
* within the mailbox is infected |
89670d69 |
*/ |
25071deb |
/*if((strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {*/
if(strncmp(buffer, "From ", 5) == 0) { |
e3aaff8e |
/* |
049a18b9 |
* Have been asked to check a UNIX style mbox file, which
* may contain more than one e-mail message to decode |
f003b79e |
*
* It would be far better for scanners.c to do this splitting
* and do this
* FOR EACH mail in the mailbox
* DO
* pass this mail to cli_mbox --
* scan this file
* IF this file has a virus quit
* THEN
* return CL_VIRUS
* FI
* END
* This would remove a problem with this code that it can
* fill up the tmp directory before it starts scanning |
e3aaff8e |
*/ |
ddea752e |
bool lastLineWasEmpty;
int messagenumber;
message *m = messageCreate();
if(m == NULL) {
fclose(fd);
#ifdef HAVE_BACKTRACE
signal(SIGSEGV, segv);
#endif |
a9d251e0 |
#ifdef SAVE_TMP |
5cdb01fc |
unlink(tmpfilename);
#endif |
ddea752e |
return CL_EMEM;
}
lastLineWasEmpty = FALSE;
messagenumber = 1; |
a603478f |
messageSetCTX(m, ctx); |
e3aaff8e |
|
049a18b9 |
do { |
89670d69 |
cli_chomp(buffer); |
25071deb |
/*if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {*/
if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) { |
f35bc674 |
cli_dbgmsg("Deal with email number %d\n", messagenumber++); |
e3aaff8e |
/* |
89670d69 |
* End of a message in the mail box |
e3aaff8e |
*/ |
b2223aad |
body = parseEmailHeaders(m, rfc821); |
4f1d0bfc |
if(body == NULL) {
messageReset(m);
continue;
} |
a603478f |
messageSetCTX(body, ctx); |
89670d69 |
messageDestroy(m); |
a603478f |
if(messageGetBody(body)) { |
ecc3d638 |
mbox_status rc = parseEmailBody(body, NULL, &mctx, 0);
if(rc == FAIL) { |
e17491b2 |
messageReset(body);
m = body;
continue; |
ecc3d638 |
} else if(rc == VIRUS) { |
a603478f |
cli_dbgmsg("Message number %d is infected\n",
messagenumber);
retcode = CL_VIRUS; |
826c9892 |
m = NULL; |
a603478f |
break; |
e17491b2 |
} |
a603478f |
} |
e3aaff8e |
/* |
89670d69 |
* Starting a new message, throw away all the |
f24bf390 |
* information about the old one. It would
* be best to be able to scan this message
* now, but cli_scanfile needs arguments
* that haven't been passed here so it can't be
* called |
e3aaff8e |
*/ |
89670d69 |
m = body;
messageReset(body); |
a603478f |
messageSetCTX(body, ctx); |
e3aaff8e |
|
049a18b9 |
cli_dbgmsg("Finished processing message\n"); |
89670d69 |
} else |
547b89de |
lastLineWasEmpty = (bool)(buffer[0] == '\0'); |
4945127a |
|
fa5661be |
if(isuuencodebegin(buffer)) { |
5198de85 |
/* |
4945127a |
* Fast track visa to uudecode.
* TODO: binhex, yenc
*/ |
3953039b |
if(uudecodeFile(m, buffer, dir, fd) < 0) |
fa5661be |
if(messageAddStr(m, buffer) < 0)
break;
} else |
69c62847 |
/* at this point, the \n has been removed */ |
4945127a |
if(messageAddStr(m, buffer) < 0)
break; |
4b187745 |
} while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL); |
f35bc674 |
|
ddea752e |
fclose(fd);
|
a603478f |
if(retcode == CL_SUCCESS) {
cli_dbgmsg("Extract attachments from email %d\n", messagenumber);
body = parseEmailHeaders(m, rfc821);
}
if(m)
messageDestroy(m); |
4f1d0bfc |
} else { |
7e577f26 |
/*
* It's a single message, parse the headers then the body |
4f1d0bfc |
*/ |
69543a9d |
if(strncmp(buffer, "P I ", 4) == 0)
/*
* CommuniGate Pro format: ignore headers until
* blank line
*/ |
4b187745 |
while((fgets(buffer, sizeof(buffer) - 1, fd) != NULL) && |
69543a9d |
(strchr("\r\n", buffer[0]) == NULL))
;
/*
* Ignore any blank lines at the top of the message
*/ |
4f1d0bfc |
while(strchr("\r\n", buffer[0]) && |
d8142abc |
(getline_from_mbox(buffer, sizeof(buffer) - 1, fd) != NULL)) |
87c9313e |
;
|
9ed148a8 |
buffer[sizeof(buffer) - 1] = '\0'; |
4b187745 |
|
ae5c693a |
body = parseEmailFile(fd, rfc821, buffer, dir); |
ddea752e |
fclose(fd); |
4f1d0bfc |
} |
7e577f26 |
|
4f1d0bfc |
if(body) {
/*
* Write out the last entry in the mailbox
*/ |
a603478f |
if((retcode == CL_SUCCESS) && messageGetBody(body)) {
messageSetCTX(body, ctx); |
242ffd7a |
switch(parseEmailBody(body, NULL, &mctx, 0)) { |
ecc3d638 |
case FAIL: |
69c62847 |
/*
* beware: cli_magic_scandesc(),
* changes this into CL_CLEAN, so only
* use it to inform the higher levels
* that we couldn't decode it because
* it isn't an mbox, not to signal
* decoding errors on what *is* a valid
* mbox
*/ |
a603478f |
retcode = CL_EFORMAT;
break; |
69c62847 |
case MAXREC:
retcode = CL_EMAXREC;
break; |
ecc3d638 |
case VIRUS: |
a603478f |
retcode = CL_VIRUS;
break;
}
} |
e3aaff8e |
|
4f1d0bfc |
/*
* Tidy up and quit
*/
messageDestroy(body);
} |
e3aaff8e |
cli_dbgmsg("cli_mbox returning %d\n", retcode);
|
3f3f9085 |
#ifdef HAVE_BACKTRACE |
02927896 |
signal(SIGSEGV, segv);
#endif
|
a9d251e0 |
#ifdef SAVE_TMP |
5cdb01fc |
unlink(tmpfilename);
#endif |
e3aaff8e |
return retcode;
}
/* |
ddea752e |
* Read in an email message from fin, parse it, and return the message |
7e577f26 |
* |
ddea752e |
* FIXME: files full of new lines and nothing else are
* handled ungracefully...
*/
static message * |
ae5c693a |
parseEmailFile(FILE *fin, const table_t *rfc821, const char *firstLine, const char *dir) |
ddea752e |
{
bool inHeader = TRUE; |
6e3d492a |
bool bodyIsEmpty = TRUE; |
06466233 |
bool lastWasBlank = FALSE, lastBodyLineWasBlank = FALSE; |
ddea752e |
message *ret;
bool anyHeadersFound = FALSE;
int commandNumber = -1; |
41b7a56b |
char *fullline = NULL, *boundary = NULL; |
ddea752e |
size_t fulllinelength = 0; |
85bb253e |
char buffer[RFC2821LENGTH + 1]; |
ddea752e |
cli_dbgmsg("parseEmailFile\n");
ret = messageCreate();
if(ret == NULL)
return NULL;
strcpy(buffer, firstLine);
do { |
4f4a8f4a |
const char *line; |
ddea752e |
(void)cli_chomp(buffer);
|
4f4a8f4a |
if(buffer[0] == '\0') |
72cf1461 |
line = NULL; |
4f4a8f4a |
else
line = buffer; |
ddea752e |
/*
* Don't blank lines which are only spaces from headers,
* otherwise they'll be treated as the end of header marker
*/ |
41b7a56b |
if(lastWasBlank) {
lastWasBlank = FALSE;
if(boundaryStart(buffer, boundary)) {
cli_dbgmsg("Found a header line with space that should be blank\n");
inHeader = FALSE;
}
} |
ddea752e |
if(inHeader) { |
0ed29506 |
cli_dbgmsg("parseEmailFile: check '%s' fullline %p\n",
buffer ? buffer : "", fullline); |
2a0041b8 |
/*
* Ensure wide characters are handled where
* sizeof(char) > 1
*/
if(line && isspace(line[0] & 0xFF)) { |
41b7a56b |
char copy[sizeof(buffer)];
strcpy(copy, buffer);
strstrip(copy);
if(copy[0] == '\0') {
/* |
4d4166a9 |
* The header line contains only white
* space. This is not the end of the
* headers according to RFC2822, but
* some MUAs will handle it as though
* it were, and virus writers exploit
* this bug. We can't just break from
* the loop here since that would allow
* other exploits such as inserting a
* white space line before the
* content-type line. So we just have
* to make a best guess. Sigh. |
41b7a56b |
*/
if(fullline) {
if(parseEmailHeader(ret, fullline, rfc821) < 0)
continue;
free(fullline);
fullline = NULL;
} |
300a8ae9 |
if(boundary ||
((boundary = (char *)messageFindArgument(ret, "boundary")) != NULL)) { |
41b7a56b |
lastWasBlank = TRUE;
continue;
}
}
} |
72cf1461 |
if((line == NULL) && (fullline == NULL)) { /* empty line */ |
0ed29506 |
/*
* A blank line signifies the end of
* the header and the start of the text
*/
if(!anyHeadersFound)
/* Ignore the junk at the top */
continue; |
5860ae08 |
|
0ed29506 |
cli_dbgmsg("End of header information\n");
inHeader = FALSE;
bodyIsEmpty = TRUE; |
ddea752e |
} else {
char *ptr; |
d6d5763c |
int lookahead; |
ddea752e |
if(fullline == NULL) { |
85bb253e |
char cmd[RFC2821LENGTH + 1], out[RFC2821LENGTH + 1]; |
ddea752e |
/*
* Continuation of line we're ignoring?
*/ |
0ed29506 |
if(isblank(line[0])) |
ddea752e |
continue;
/*
* Is this a header we're interested in?
*/ |
72cf1461 |
if((strchr(line, ':') == NULL) ||
(cli_strtokbuf(line, 0, ":", cmd) == NULL)) {
if(strncmp(line, "From ", 5) == 0) |
ddea752e |
anyHeadersFound = TRUE;
continue;
}
|
d72749e0 |
ptr = rfc822comments(cmd, out); |
ddea752e |
commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
switch(commandNumber) {
case CONTENT_TRANSFER_ENCODING:
case CONTENT_DISPOSITION:
case CONTENT_TYPE:
anyHeadersFound = TRUE;
break;
default:
if(!anyHeadersFound)
anyHeadersFound = usefulHeader(commandNumber, cmd);
continue;
} |
0cf4cea7 |
fullline = cli_strdup(line); |
72cf1461 |
fulllinelength = strlen(line) + 1;
} else if(line != NULL) {
fulllinelength += strlen(line); |
468c0f21 |
ptr = cli_realloc(fullline, fulllinelength);
if(ptr == NULL)
continue;
fullline = ptr; |
72cf1461 |
strcat(fullline, line); |
ddea752e |
}
assert(fullline != NULL);
lookahead = getc(fin);
if(lookahead != EOF) {
ungetc(lookahead, fin);
/*
* Section B.2 of RFC822 says TAB or
* SPACE means a continuation of the
* previous entry.
*
* Add all the arguments on the line
*/ |
9f2024cc |
if(isblank(lookahead)) |
ddea752e |
continue;
}
|
11f253d6 |
/*
* Handle broken headers, where the next
* line isn't indented by whitespace
*/
if(fullline[fulllinelength - 2] == ';')
/* Add arguments to this line */
continue;
|
4f4a8f4a |
if(line && (count_quotes(fullline) & 1))
continue; |
ddea752e |
|
d72749e0 |
ptr = rfc822comments(fullline, NULL); |
ddea752e |
if(ptr) {
free(fullline);
fullline = ptr;
}
if(parseEmailHeader(ret, fullline, rfc821) < 0)
continue;
free(fullline);
fullline = NULL;
} |
fa5661be |
} else if(line && isuuencodebegin(line)) { |
ae5c693a |
/*
* Fast track visa to uudecode.
* TODO: binhex, yenc
*/ |
6e3d492a |
bodyIsEmpty = FALSE; |
3953039b |
if(uudecodeFile(ret, line, dir, fin) < 0) |
fa5661be |
if(messageAddStr(ret, line) < 0)
break; |
06466233 |
} else {
if(line == NULL) { |
c52d991e |
/*
* Although this would save time and RAM, some
* phish signatures have been built which need
* the blank lines
*/
if(lastBodyLineWasBlank &&
(messageGetMimeType(ret) != TEXT)) { |
06466233 |
cli_dbgmsg("Ignoring consecutive blank lines in the body\n");
continue;
}
lastBodyLineWasBlank = TRUE; |
6e3d492a |
} else {
if(bodyIsEmpty) {
/*
* Broken message: new line in the
* middle of the headers, so the first
* line of the body is in fact
* the last lines of the header
*/ |
0cf4cea7 |
if(newline_in_header(line)) |
6e3d492a |
continue; |
0cf4cea7 |
bodyIsEmpty = FALSE; |
6e3d492a |
} |
06466233 |
lastBodyLineWasBlank = FALSE; |
6e3d492a |
} |
06466233 |
|
72cf1461 |
if(messageAddStr(ret, line) < 0) |
ddea752e |
break; |
06466233 |
} |
d8142abc |
} while(getline_from_mbox(buffer, sizeof(buffer) - 1, fin) != NULL); |
ddea752e |
|
300a8ae9 |
if(boundary)
free(boundary);
|
ddea752e |
if(fullline) {
if(*fullline) switch(commandNumber) {
case CONTENT_TRANSFER_ENCODING:
case CONTENT_DISPOSITION:
case CONTENT_TYPE: |
0d35f10f |
cli_dbgmsg("parseEmailFile: Fullline unparsed '%s'\n", fullline); |
ddea752e |
}
free(fullline);
}
if(!anyHeadersFound) {
/*
* False positive in believing we have an e-mail when we don't
*/
messageDestroy(ret);
cli_dbgmsg("parseEmailFile: no headers found, assuming it isn't an email\n");
return NULL;
}
messageClean(ret);
cli_dbgmsg("parseEmailFile: return\n");
return ret;
}
/*
* The given message contains a raw e-mail. |
e06d34dc |
*
* Returns the message's body with the correct arguments set |
f73920a4 |
*
* The downside of this approach is that for a short time we have two copies
* of the message in memory, the upside is that it makes for easier parsing
* of encapsulated messages, and in the long run uses less memory in those
* scenarios |
ddea752e |
*
* TODO: remove the duplication with parseEmailFile |
7e577f26 |
*/ |
e06d34dc |
static message * |
2673dc74 |
parseEmailHeaders(message *m, const table_t *rfc821) |
7e577f26 |
{ |
e06d34dc |
bool inHeader = TRUE; |
0d35f10f |
bool bodyIsEmpty = TRUE; |
b2223aad |
const text *t; |
89670d69 |
message *ret; |
4f1d0bfc |
bool anyHeadersFound = FALSE; |
15033cb6 |
int commandNumber = -1; |
efb5f16c |
char *fullline = NULL; |
2ad0c86e |
size_t fulllinelength = 0; |
89670d69 |
|
02927896 |
cli_dbgmsg("parseEmailHeaders\n");
|
89670d69 |
if(m == NULL)
return NULL;
ret = messageCreate(); |
7e577f26 |
|
b2223aad |
for(t = messageGetBody(m); t; t = t->t_next) { |
0cf4cea7 |
const char *line; |
7e577f26 |
|
b2223aad |
if(t->t_line) |
0cf4cea7 |
line = lineGetData(t->t_line); |
b2223aad |
else |
0cf4cea7 |
line = NULL; |
7e577f26 |
|
28010d29 |
if(inHeader) { |
4e75d9b6 |
cli_dbgmsg("parseEmailHeaders: check '%s'\n", |
0cf4cea7 |
line ? line : "");
if(line == NULL) { |
4e75d9b6 |
/*
* A blank line signifies the end of
* the header and the start of the text
*/
cli_dbgmsg("End of header information\n"); |
ad091acf |
if(!anyHeadersFound) {
cli_dbgmsg("Nothing interesting in the header\n");
break;
} |
0d35f10f |
inHeader = FALSE;
bodyIsEmpty = TRUE; |
2ad0c86e |
} else { |
b116962d |
char *ptr;
|
2ad0c86e |
if(fullline == NULL) { |
85bb253e |
char cmd[RFC2821LENGTH + 1]; |
45aba293 |
/*
* Continuation of line we're ignoring?
*/ |
0cf4cea7 |
if(isblank(line[0])) |
45aba293 |
continue;
/*
* Is this a header we're interested in?
*/ |
0cf4cea7 |
if((strchr(line, ':') == NULL) ||
(cli_strtokbuf(line, 0, ":", cmd) == NULL)) {
if(strncmp(line, "From ", 5) == 0) |
b116962d |
anyHeadersFound = TRUE; |
45aba293 |
continue; |
b116962d |
} |
45aba293 |
|
d72749e0 |
ptr = rfc822comments(cmd, NULL); |
59921c02 |
commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
if(ptr)
free(ptr); |
45aba293 |
switch(commandNumber) {
case CONTENT_TRANSFER_ENCODING:
case CONTENT_DISPOSITION:
case CONTENT_TYPE: |
b116962d |
anyHeadersFound = TRUE; |
45aba293 |
break;
default: |
ddea752e |
if(!anyHeadersFound)
anyHeadersFound = usefulHeader(commandNumber, cmd); |
45aba293 |
continue;
} |
0cf4cea7 |
fullline = cli_strdup(line);
fulllinelength = strlen(line) + 1;
} else if(line) {
fulllinelength += strlen(line); |
468c0f21 |
ptr = cli_realloc(fullline, fulllinelength);
if(ptr == NULL)
continue;
fullline = ptr; |
0cf4cea7 |
strcat(fullline, line); |
2ad0c86e |
} |
13462674 |
|
ddea752e |
assert(fullline != NULL); |
2ad0c86e |
|
842c7d49 |
if(next_is_folded_header(t))
/* Add arguments to this line */
continue; |
2ad0c86e |
|
4f4a8f4a |
if(count_quotes(fullline) & 1) |
2ad0c86e |
continue;
|
d72749e0 |
ptr = rfc822comments(fullline, NULL); |
2ad0c86e |
if(ptr) {
free(fullline);
fullline = ptr;
} |
c77c8809 |
|
ddea752e |
if(parseEmailHeader(ret, fullline, rfc821) < 0)
continue; |
28010d29 |
|
ddea752e |
free(fullline);
fullline = NULL; |
f12d2498 |
} |
0d35f10f |
} else {
if(bodyIsEmpty) { |
0cf4cea7 |
if(line == NULL) |
0d35f10f |
/* throw away leading blank lines */
continue;
/*
* Broken message: new line in the
* middle of the headers, so the first
* line of the body is in fact
* the last lines of the header
*/ |
0cf4cea7 |
if(newline_in_header(line)) |
0d35f10f |
continue;
bodyIsEmpty = FALSE;
}
/*if(t->t_line && isuuencodebegin(t->t_line))
puts("FIXME: add fast visa here");*/ |
0cf4cea7 |
/*cli_dbgmsg("Add line to body '%s'\n", line);*/ |
b2223aad |
if(messageAddLine(ret, t->t_line) < 0) |
0e4e16d4 |
break; |
0d35f10f |
} |
d879a7b0 |
} |
e06d34dc |
|
efb5f16c |
if(fullline) { |
2ad0c86e |
if(*fullline) switch(commandNumber) {
case CONTENT_TRANSFER_ENCODING:
case CONTENT_DISPOSITION:
case CONTENT_TYPE: |
aaaae842 |
cli_dbgmsg("parseEmailHeaders: Fullline unparsed '%s'\n", fullline); |
2ad0c86e |
} |
efb5f16c |
free(fullline);
}
|
4f1d0bfc |
if(!anyHeadersFound) {
/*
* False positive in believing we have an e-mail when we don't
*/
messageDestroy(ret);
cli_dbgmsg("parseEmailHeaders: no headers found, assuming it isn't an email\n");
return NULL;
}
|
93d41ee4 |
messageClean(ret);
|
e17491b2 |
cli_dbgmsg("parseEmailHeaders: return\n");
|
e06d34dc |
return ret; |
7e577f26 |
}
/* |
4c60b74f |
* Handle a header line of an email message
*/
static int |
393a6d67 |
parseEmailHeader(message *m, const char *line, const table_t *rfc821) |
4c60b74f |
{ |
0960ff5e |
char *cmd; |
4c60b74f |
int ret = -1;
#ifdef CL_THREAD_SAFE
char *strptr;
#endif |
97867f21 |
const char *separater; |
50df4118 |
char *copy, tokenseparater[2]; |
4c60b74f |
|
20d3dde9 |
cli_dbgmsg("parseEmailHeader '%s'\n", line);
|
97867f21 |
/*
* In RFC822 the separater between the key a value is a colon,
* e.g. Content-Transfer-Encoding: base64
* However some MUA's are lapse about this and virus writers exploit
* this hole, so we need to check all known possiblities
*/
for(separater = ":= "; *separater; separater++)
if(strchr(line, *separater) != NULL)
break;
if(*separater == '\0') |
1bfbedd4 |
return -1;
|
50df4118 |
copy = rfc2047(line);
if(copy == NULL) |
8ebe8dbc |
/* an RFC checker would return -1 here */ |
0cf4cea7 |
copy = cli_strdup(line); |
f2f25418 |
|
97867f21 |
tokenseparater[0] = *separater;
tokenseparater[1] = '\0';
|
548a5f96 |
#ifdef CL_THREAD_SAFE |
97867f21 |
cmd = strtok_r(copy, tokenseparater, &strptr); |
548a5f96 |
#else
cmd = strtok(copy, tokenseparater);
#endif |
4c60b74f |
|
3499d81e |
if(cmd && (strstrip(cmd) > 0)) { |
548a5f96 |
#ifdef CL_THREAD_SAFE |
4c60b74f |
char *arg = strtok_r(NULL, "", &strptr); |
548a5f96 |
#else
char *arg = strtok(NULL, "");
#endif |
4c60b74f |
if(arg)
/*
* Found a header such as
* Content-Type: multipart/mixed;
* set arg to be
* "multipart/mixed" and cmd to |
a9f386ed |
* be "Content-Type" |
4c60b74f |
*/ |
393a6d67 |
ret = parseMimeHeader(m, cmd, rfc821, arg); |
4c60b74f |
} |
50df4118 |
free(copy); |
4c60b74f |
return ret;
}
/* |
e3aaff8e |
* This is a recursive routine. |
45dc1456 |
* FIXME: We are not passed &mrec so we can't check against MAX_MAIL_RECURSION |
e3aaff8e |
* |
7e577f26 |
* This function parses the body of mainMessage and saves its attachments in dir
* |
e06d34dc |
* mainMessage is the buffer to be parsed, it contains an e-mail's body, without |
f12d2498 |
* any headers. First time of calling it'll be
* the whole message. Later it'll be parts of a multipart message |
e3aaff8e |
* textIn is the plain text message being built up so far
*/ |
ecc3d638 |
static mbox_status |
242ffd7a |
parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int recursion_level) |
e3aaff8e |
{ |
42eebd87 |
mbox_status rc; |
2673dc74 |
text *aText = textIn;
message *mainMessage = messageIn; |
b0b860f1 |
fileblob *fb; |
a603478f |
bool infected = FALSE; |
c3a79a7a |
const int doPhishingScan = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE); |
8386c723 |
|
b0b860f1 |
cli_dbgmsg("in parseEmailBody\n"); |
e3aaff8e |
|
23841257 |
if(mctx->ctx->limits && mctx->ctx->limits->maxmailrec) { |
72910996 |
const cli_ctx *ctx = mctx->ctx; /* needed for BLOCKMAX :-( */
|
5684fccf |
/*
* This is approximate
*/ |
a5faab66 |
if(recursion_level > ctx->limits->maxmailrec) {
cli_warnmsg("parseEmailBody: hit maximum recursion level (%u)\n", recursion_level);
if(BLOCKMAX) {
if(ctx->virname)
*ctx->virname = "MIME.RecursionLimit";
return VIRUS;
} else |
69c62847 |
return MAXREC; |
242ffd7a |
} |
72910996 |
} |
242ffd7a |
|
f7bf6fd2 |
rc = OK;
|
e3aaff8e |
/* Anything left to be parsed? */ |
d4d14218 |
if(mainMessage && (messageGetBody(mainMessage) != NULL)) { |
e3aaff8e |
mime_type mimeType; |
2673dc74 |
int subtype, inhead, htmltextPart, inMimeHead, i; |
95e11e5a |
const char *mimeSubtype;
char *protocol, *boundary; |
e3aaff8e |
const text *t_line; |
98cb5cba |
/*bool isAlternative;*/ |
e3aaff8e |
message *aMessage; |
2673dc74 |
int multiparts = 0;
message **messages = NULL; /* parts of a multipart message */ |
e3aaff8e |
|
049a18b9 |
cli_dbgmsg("Parsing mail file\n");
|
e3aaff8e |
mimeType = messageGetMimeType(mainMessage);
mimeSubtype = messageGetMimeSubtype(mainMessage);
|
528c8a2d |
/* pre-process */ |
c1fce7f7 |
subtype = tableFind(mctx->subtypeTable, mimeSubtype); |
7c5a7a47 |
if((mimeType == TEXT) && (subtype == PLAIN)) { |
e3aaff8e |
/*
* This is effectively no encoding, notice that we
* don't check that charset is us-ascii
*/ |
c06e8a5c |
cli_dbgmsg("text/plain: Assume no attachements\n"); |
e3aaff8e |
mimeType = NOMIME; |
ae5c693a |
messageSetMimeSubtype(mainMessage, ""); |
528c8a2d |
} else if((mimeType == MESSAGE) &&
(strcasecmp(mimeSubtype, "rfc822-headers") == 0)) {
/*
* RFC1892/RFC3462: section 2 text/rfc822-headers
* incorrectly sent as message/rfc822-headers |
46d375fe |
*
* Parse as text/plain, i.e. no mime |
528c8a2d |
*/
cli_dbgmsg("Changing message/rfc822-headers to text/rfc822-headers\n"); |
46d375fe |
mimeType = NOMIME; |
ae5c693a |
messageSetMimeSubtype(mainMessage, ""); |
a05e6d45 |
} else
cli_dbgmsg("mimeType = %d\n", mimeType); |
049a18b9 |
|
e3aaff8e |
switch(mimeType) {
case NOMIME: |
6fd711b2 |
cli_dbgmsg("Not a mime encoded message\n"); |
e3aaff8e |
aText = textAddMessage(aText, mainMessage); |
647a4f8d |
if(!doPhishingScan)
break; |
94aea271 |
/*
* Fall through: some phishing mails claim they are
* text/plain, when they are in fact html
*/ |
e3aaff8e |
case TEXT: |
8a892c3b |
/* text/plain has been preprocessed as no encoding */ |
d77ac7de |
if(((mctx->ctx->options&CL_SCAN_MAILURL) && (subtype == HTML)) || doPhishingScan) { |
47d9cc65 |
/*
* It would be better to save and scan the
* file and only checkURLs if it's found to be
* clean
*/ |
8b899010 |
checkURLs(mainMessage, mctx, &rc, (subtype == HTML));
/*
* There might be html sent without subtype
* html too, so scan them for phishing
*/ |
ecc3d638 |
if(rc == VIRUS) |
8b899010 |
infected = TRUE; |
c52d991e |
} |
e3aaff8e |
break;
case MULTIPART: |
c8a7cef0 |
cli_dbgmsg("Content-type 'multipart' handler\n"); |
e3aaff8e |
boundary = messageFindArgument(mainMessage, "boundary");
if(boundary == NULL) { |
842c7d49 |
cli_warnmsg("Multipart/%s MIME message contains no boundary header\n",
mimeSubtype); |
e2e7ebf5 |
/* Broken e-mail message */
mimeType = NOMIME;
/*
* The break means that we will still
* check if the file contains a uuencoded file
*/
break; |
e3aaff8e |
}
|
c79a2273 |
/* Perhaps it should assume mixed? */ |
cb5a87e0 |
if(mimeSubtype[0] == '\0') {
cli_warnmsg("Multipart has no subtype assuming alternative\n");
mimeSubtype = "alternative";
messageSetMimeSubtype(mainMessage, "alternative");
}
|
e3aaff8e |
/*
* Get to the start of the first message
*/ |
20d3dde9 |
t_line = messageGetBody(mainMessage);
if(t_line == NULL) {
cli_warnmsg("Multipart MIME message has no body\n");
free((char *)boundary);
mimeType = NOMIME;
break;
}
do |
bae9c53f |
if(t_line->t_line) {
if(boundaryStart(lineGetData(t_line->t_line), boundary))
break;
/* |
182bbcc8 |
* Found a binhex file before |
ebe57840 |
* the first multipart |
b116962d |
* TODO: check yEnc |
bae9c53f |
*/ |
182bbcc8 |
if(binhexBegin(mainMessage) == t_line) { |
47d9cc65 |
if(exportBinhexMessage(mctx->dir, mainMessage)) { |
a05e6d45 |
/* virus found */ |
ecc3d638 |
rc = VIRUS; |
47d9cc65 |
infected = TRUE; |
a05e6d45 |
break; |
b116962d |
} |
42eebd87 |
} else if(t_line->t_next && |
a5faab66 |
(encodingLine(mainMessage) == t_line->t_next)) { |
99c2299d |
/*
* We look for the next line
* since later on we'll skip
* over the important line when
* we think it's a blank line
* at the top of the message -
* which it would have been in
* an RFC compliant world
*/ |
42eebd87 |
cli_dbgmsg("Found MIME attachment before the first MIME section \"%s\"\n",
lineGetData(t_line->t_next->t_line)); |
99c2299d |
if(messageGetEncoding(mainMessage) == NOENCODING)
break; |
b116962d |
} |
bae9c53f |
} |
20d3dde9 |
while((t_line = t_line->t_next) != NULL); |
e3aaff8e |
if(t_line == NULL) { |
47d9cc65 |
cli_dbgmsg("Multipart MIME message contains no boundary lines (%s)\n",
boundary); |
5a642650 |
/*
* Free added by Thomas Lamy
* <Thomas.Lamy@in-online.net>
*/
free((char *)boundary); |
e2e7ebf5 |
mimeType = NOMIME;
/*
* The break means that we will still |
182bbcc8 |
* check if the file contains a yEnc/binhex file |
e2e7ebf5 |
*/
break; |
e3aaff8e |
}
/*
* Build up a table of all of the parts of this
* multipart message. Remember, each part may itself
* be a multipart message.
*/
inhead = 1;
inMimeHead = 0;
|
e06d34dc |
/* |
9bccc1e5 |
* Re-read this variable in case mimeSubtype has changed
*/
subtype = tableFind(mctx->subtypeTable, mimeSubtype);
/* |
9a729c80 |
* Parse the mainMessage object and create an array
* of objects called messages, one for each of the |
9bccc1e5 |
* multiparts that mainMessage contains. |
3f46285b |
* |
e06d34dc |
* This looks like parseEmailHeaders() - maybe there's
* some duplication of code to be cleaned up |
9bccc1e5 |
* |
842c7d49 |
* We may need to create an array rather than just |
9bccc1e5 |
* save each part as it is found because not all
* elements will need scanning, and we don't yet know
* which of those elements it will be, except in
* the case of mixed, when all parts need to be scanned. |
e06d34dc |
*/ |
9bccc1e5 |
for(multiparts = 0; t_line && !infected; multiparts++) { |
56ae62e2 |
int lines = 0; |
1a74d4df |
message **m; |
69c62847 |
mbox_status old_rc; |
56ae62e2 |
|
1a74d4df |
m = cli_realloc(messages, ((multiparts + 1) * sizeof(message *))); |
f12d2498 |
if(m == NULL) |
1a74d4df |
break;
messages = m; |
b726511f |
|
e3aaff8e |
aMessage = messages[multiparts] = messageCreate(); |
3f3f9085 |
if(aMessage == NULL) {
multiparts--;
continue;
} |
c1fce7f7 |
messageSetCTX(aMessage, mctx->ctx); |
e3aaff8e |
cli_dbgmsg("Now read in part %d\n", multiparts);
|
8ba634a9 |
/*
* Ignore blank lines. There shouldn't be ANY
* but some viruses insert them
*/ |
02927896 |
while((t_line = t_line->t_next) != NULL) |
b2223aad |
if(t_line->t_line &&
/*(cli_chomp(t_line->t_text) > 0))*/
(strlen(lineGetData(t_line->t_line)) > 0)) |
d79597e3 |
break; |
8ba634a9 |
if(t_line == NULL) {
cli_dbgmsg("Empty part\n"); |
b9ce9639 |
/*
* Remove this part unless there's |
182bbcc8 |
* a binhex portion somewhere in |
b9ce9639 |
* the complete message that we may
* throw away by mistake if the MIME
* encoding information is incorrect
*/ |
9bccc1e5 |
if(mainMessage &&
(binhexBegin(mainMessage) == NULL)) { |
b9ce9639 |
messageDestroy(aMessage);
--multiparts;
} |
8ba634a9 |
continue;
}
do { |
b2223aad |
const char *line = lineGetData(t_line->t_line); |
e3aaff8e |
|
fa5661be |
/*cli_dbgmsg("multipart %d: inMimeHead %d inhead %d boundary '%s' line '%s' next '%s'\n",
multiparts, inMimeHead, inhead, boundary, line, |
391f7bb3 |
t_line->t_next && t_line->t_next->t_line ? lineGetData(t_line->t_next->t_line) : "(null)");*/ |
e3aaff8e |
|
f1c1300c |
if(inMimeHead) { /* continuation line */ |
02927896 |
if(line == NULL) { |
59b99810 |
/*inhead =*/ inMimeHead = 0; |
02927896 |
continue;
} |
3a978f7d |
/*
* Handle continuation lines
* because the previous line |
1eec55a6 |
* ended with a ; or this line
* starts with a white space |
3a978f7d |
*/ |
1eec55a6 |
cli_dbgmsg("Multipart %d: About to add mime Argument '%s'\n",
multiparts, line); |
3a978f7d |
/*
* Handle the case when it
* isn't really a continuation
* line:
* Content-Type: application/octet-stream;
* Content-Transfer-Encoding: base64
*/ |
c1fce7f7 |
parseEmailHeader(aMessage, line, mctx->rfc821Table); |
3a978f7d |
|
e3aaff8e |
while(isspace((int)*line))
line++;
if(*line == '\0') {
inhead = inMimeHead = 0;
continue;
} |
0ed29506 |
inMimeHead = FALSE; |
e3aaff8e |
messageAddArgument(aMessage, line); |
f1c1300c |
} else if(inhead) { /* handling normal headers */ |
4f4a8f4a |
/*int quotes;*/ |
c8a7cef0 |
char *fullline, *ptr; |
2ad0c86e |
|
02927896 |
if(line == NULL) { |
7e67e382 |
/*
* empty line, should the end of the headers,
* but some base64 decoders, e.g. uudeview, are broken
* and will handle this type of entry, decoding the
* base64 content...
* Content-Type: application/octet-stream; name=text.zip
* Content-Transfer-Encoding: base64
* Content-Disposition: attachment; filename="text.zip" |
5198de85 |
* |
7e67e382 |
* Content-Disposition: attachment;
* filename=text.zip
* Content-Type: application/octet-stream;
* name=text.zip
* Content-Transfer-Encoding: base64 |
5198de85 |
* |
7e67e382 |
* UEsDBAoAAAAAAACgPjJ2RHw676gAAO+oAABEAAAAbWFpbF90ZXh0LWluZm8udHh0ICAgICAgICAg
*/ |
842c7d49 |
const text *next = t_line->t_next;
|
7e67e382 |
if(next && next->t_line) {
const char *data = lineGetData(next->t_line); |
5b76248c |
if((messageGetEncoding(aMessage) == NOENCODING) && |
842c7d49 |
(messageGetMimeType(aMessage) == APPLICATION) &&
strstr(data, "base64")) { |
d72749e0 |
/*
* Handle this nightmare (note the blank
* line in the header and the incorrect
* content-transfer-encoding header)
*
* Content-Type: application/octet-stream; name="zipped_files.EXEX-Spanska: Yes
*
* r-Encoding: base64
* Content-Disposition: attachment; filename="zipped_files.EXE"
*/ |
842c7d49 |
messageSetEncoding(aMessage, "base64");
cli_dbgmsg("Ignoring fake end of headers\n");
continue;
} |
5cdb01fc |
if((strncmp(data, "Content", 7) == 0) ||
(strncmp(data, "filename=", 9) == 0)) { |
7e67e382 |
cli_dbgmsg("Ignoring fake end of headers\n");
continue;
}
} |
59b99810 |
cli_dbgmsg("Multipart %d: End of header information\n",
multiparts); |
e3aaff8e |
inhead = 0;
continue;
} |
c76810dc |
if(isspace((int)*line)) {
/*
* The first line is
* continuation line.
* This is tricky
* to handle, but
* all we can do is our
* best
*/
cli_dbgmsg("Part %d starts with a continuation line\n",
multiparts);
messageAddArgument(aMessage, line);
/*
* Give it a default
* MIME type since
* that may be the
* missing line
*
* Choose application to
* force a save
*/
if(messageGetMimeType(aMessage) == NOMIME)
messageSetMimeType(aMessage, "application");
continue;
}
|
c8a7cef0 |
inMimeHead = FALSE; |
b2223aad |
|
85bb253e |
assert(strlen(line) <= RFC2821LENGTH); |
f1c1300c |
|
d72749e0 |
fullline = rfc822comments(line, NULL); |
c8a7cef0 |
if(fullline == NULL) |
0cf4cea7 |
fullline = cli_strdup(line); |
2ad0c86e |
|
4f4a8f4a |
/*quotes = count_quotes(fullline);*/ |
f1c1300c |
|
c8a7cef0 |
/*
* Fold next lines to the end of this
* if they start with a white space
* or if this line has an odd number of quotes:
* Content-Type: application/octet-stream; name="foo
* "
*/ |
842c7d49 |
while(t_line && next_is_folded_header(t_line)) {
const char *data;
t_line = t_line->t_next;
data = lineGetData(t_line->t_line); |
f1c1300c |
|
f1d57230 |
if(data[1] == '\0') {
/*
* Broken message: the
* blank line at the end
* of the headers isn't blank -
* it contains a space
*/
cli_dbgmsg("Multipart %d: headers not terminated by blank line\n",
multiparts);
inhead = FALSE;
break;
}
|
c8a7cef0 |
ptr = cli_realloc(fullline,
strlen(fullline) + strlen(data) + 1); |
303f9be9 |
|
c8a7cef0 |
if(ptr == NULL)
break; |
1eec55a6 |
|
c8a7cef0 |
fullline = ptr;
strcat(fullline, data); |
2ad0c86e |
|
4f4a8f4a |
/*quotes = count_quotes(data);*/ |
1eec55a6 |
} |
842c7d49 |
|
c8a7cef0 |
cli_dbgmsg("Multipart %d: About to parse folded header '%s'\n",
multiparts, fullline);
|
c1fce7f7 |
parseEmailHeader(aMessage, fullline, mctx->rfc821Table); |
c8a7cef0 |
free(fullline); |
69c62847 |
} else if(boundaryEnd(line, boundary)) { |
e3aaff8e |
/*
* Some viruses put information
* *after* the end of message,
* which presumably some broken
* mail clients find, so we
* can't assume that this
* is the end of the message
*/
/* t_line = NULL;*/
break; |
391f7bb3 |
} else if(boundaryStart(line, boundary)) {
inhead = 1;
break; |
56ae62e2 |
} else { |
b2223aad |
if(messageAddLine(aMessage, t_line->t_line) < 0) |
1a74d4df |
break; |
56ae62e2 |
lines++;
} |
8ba634a9 |
} while((t_line = t_line->t_next) != NULL);
|
69c62847 |
cli_dbgmsg("Part %d has %d lines, rc = %d\n",
multiparts, lines, rc); |
9bccc1e5 |
/*
* Only save in the array of messages if some
* decision will be taken on whether to scan.
* If all parts will be scanned then save to
* file straight away
*/
switch(subtype) {
case MIXED:
case ALTERNATIVE:
case REPORT:
case DIGEST:
case APPLEDOUBLE:
case KNOWBOT:
case -1: |
69c62847 |
old_rc = rc; |
9bccc1e5 |
mainMessage = do_multipart(mainMessage,
messages, multiparts,
&rc, mctx, messageIn, |
5684fccf |
&aText, recursion_level); |
69c62847 |
if((rc == OK_ATTACHMENTS_NOT_SAVED) && (old_rc == OK))
rc = OK; |
a7a2e2d4 |
if(messages[multiparts]) {
messageDestroy(messages[multiparts]);
messages[multiparts] = NULL;
} |
9bccc1e5 |
--multiparts; |
ecc3d638 |
if(rc == VIRUS) |
9bccc1e5 |
infected = TRUE;
break;
default:
messageClean(aMessage);
} |
e3aaff8e |
}
free((char *)boundary);
|
7c1eb3bf |
/* |
c79a2273 |
* Preprocess. Anything special to be done before
* we handle the multiparts? |
9a729c80 |
*/ |
9bccc1e5 |
switch(subtype) { |
c79a2273 |
case KNOWBOT:
/* TODO */
cli_dbgmsg("multipart/knowbot parsed as multipart/mixed for now\n");
mimeSubtype = "mixed";
break; |
5d8100cb |
case -1:
/*
* According to section 7.2.6 of
* RFC1521, unrecognised multiparts
* should be treated as multipart/mixed.
*/ |
38d07186 |
cli_dbgmsg("Unsupported multipart format `%s', parsed as mixed\n", mimeSubtype); |
5d8100cb |
mimeSubtype = "mixed";
break; |
c79a2273 |
} |
9a729c80 |
/* |
7c1eb3bf |
* We've finished message we're parsing
*/
if(mainMessage && (mainMessage != messageIn)) {
messageDestroy(mainMessage);
mainMessage = NULL; |
ad9c6836 |
} |
e3aaff8e |
|
9bccc1e5 |
cli_dbgmsg("The message has %d parts\n", multiparts);
if(((multiparts == 0) || infected) && (aText == NULL)) { |
b912eaf2 |
if(messages) {
for(i = 0; i < multiparts; i++)
if(messages[i])
messageDestroy(messages[i]); |
b726511f |
free(messages); |
b912eaf2 |
}
|
9bccc1e5 |
/* |
ecc3d638 |
* Nothing to do |
9bccc1e5 |
*/ |
69c62847 |
switch(rc) {
case VIRUS: return VIRUS;
case MAXREC: return MAXREC;
default: return OK_ATTACHMENTS_NOT_SAVED;
} |
b726511f |
} |
7c1eb3bf |
|
393a6d67 |
cli_dbgmsg("Find out the multipart type (%s)\n", mimeSubtype); |
e3aaff8e |
|
9a729c80 |
/*
* We now have all the parts of the multipart message
* in the messages array:
* message *messages[multiparts]
* Let's decide what to do with them all
*/ |
c1fce7f7 |
switch(tableFind(mctx->subtypeTable, mimeSubtype)) { |
e3aaff8e |
case RELATED: |
e06d34dc |
cli_dbgmsg("Multipart related handler\n"); |
e3aaff8e |
/* |
294d0774 |
* Have a look to see if there's HTML code
* which will need scanning |
e3aaff8e |
*/
aMessage = NULL;
assert(multiparts > 0);
|
d4d14218 |
htmltextPart = getTextPart(messages, multiparts); |
e3aaff8e |
|
d4d14218 |
if(htmltextPart >= 0)
aText = textAddMessage(aText, messages[htmltextPart]); |
e3aaff8e |
else
/* |
294d0774 |
* There isn't an HTML bit. If there's a
* multipart bit, it'll may be in there
* somewhere |
e3aaff8e |
*/
for(i = 0; i < multiparts; i++)
if(messageGetMimeType(messages[i]) == MULTIPART) {
aMessage = messages[i]; |
d4d14218 |
htmltextPart = i; |
e3aaff8e |
break;
}
|
59da5a4f |
if(htmltextPart == -1) |
16037392 |
cli_dbgmsg("No HTML code found to be scanned\n"); |
59da5a4f |
else { |
242ffd7a |
rc = parseEmailBody(aMessage, aText, mctx, recursion_level + 1); |
99f7771b |
if((rc == OK) && aMessage) { |
59da5a4f |
assert(aMessage == messages[htmltextPart]); |
99f7771b |
messageDestroy(aMessage); |
59da5a4f |
messages[htmltextPart] = NULL;
}
} |
e3aaff8e |
/*
* Fixed based on an idea from Stephen White <stephen@earth.li>
* The message is confused about the difference
* between alternative and related. Badtrans.B
* suffers from this problem.
*
* Fall through in this case:
* Content-Type: multipart/related;
* type="multipart/alternative"
*/ |
98cb5cba |
/*
* Changed to always fall through based on
* an idea from Michael Dankov <misha@btrc.ru>
* that some viruses are completely confused
* about the difference between related
* and mixed
*/
/*cptr = messageFindArgument(mainMessage, "type"); |
e3aaff8e |
if(cptr == NULL)
break;
isAlternative = (bool)(strcasecmp(cptr, "multipart/alternative") == 0);
free((char *)cptr);
if(!isAlternative) |
98cb5cba |
break;*/ |
ba867aed |
case DIGEST:
/*
* According to section 5.1.5 RFC2046, the
* default mime type of multipart/digest parts
* is message/rfc822
*
* We consider them as alternative, wrong in
* the strictest sense since they aren't
* alternatives - all parts a valid - but it's
* OK for our needs since it means each part
* will be scanned
*/ |
e3aaff8e |
case ALTERNATIVE:
cli_dbgmsg("Multipart alternative handler\n");
/*
* Fall through - some clients are broken and
* say alternative instead of mixed. The Klez |
5d8100cb |
* virus is broken that way, and anyway we
* wish to scan all of the alternatives |
e3aaff8e |
*/
case REPORT:
/*
* According to section 1 of RFC1892, the
* syntax of multipart/report is the same
* as multipart/mixed. There are some required
* parameters, but there's no need for us to
* verify that they exist
*/
case MIXED: |
c9b8f252 |
case APPLEDOUBLE: /* not really supported */ |
e3aaff8e |
/*
* Look for attachments
*
* Not all formats are supported. If an
* unsupported format turns out to be
* common enough to implement, it is a simple
* matter to add it
*/ |
ad9c6836 |
if(aText) {
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage); |
e3aaff8e |
mainMessage = NULL; |
ad9c6836 |
} |
e3aaff8e |
cli_dbgmsg("Mixed message with %d parts\n", multiparts);
for(i = 0; i < multiparts; i++) { |
c1fce7f7 |
mainMessage = do_multipart(mainMessage,
messages, i, &rc, mctx, |
242ffd7a |
messageIn, &aText, recursion_level + 1); |
ecc3d638 |
if(rc == VIRUS) { |
a603478f |
infected = TRUE;
break;
} |
69c62847 |
if(rc == MAXREC)
break; |
e3aaff8e |
}
|
242ffd7a |
/* rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1); */ |
e3aaff8e |
break;
case SIGNED:
case PARALLEL:
/*
* If we're here it could be because we have a
* multipart/mixed message, consisting of a
* message followed by an attachment. That
* message itself is a multipart/alternative
* message and we need to dig out the plain
* text part of that alternative
*/ |
d4d14218 |
htmltextPart = getTextPart(messages, multiparts);
if(htmltextPart == -1)
htmltextPart = 0; |
e3aaff8e |
|
242ffd7a |
rc = parseEmailBody(messages[htmltextPart], aText, mctx, recursion_level + 1); |
e3aaff8e |
break; |
9a729c80 |
case ENCRYPTED: |
08e5a453 |
rc = FAIL; /* Not yet handled */ |
c79a2273 |
protocol = (char *)messageFindArgument(mainMessage, "protocol"); |
9a729c80 |
if(protocol) {
if(strcasecmp(protocol, "application/pgp-encrypted") == 0) {
/* RFC2015 */
cli_warnmsg("PGP encoded attachment not scanned\n"); |
ecc3d638 |
rc = OK_ATTACHMENTS_NOT_SAVED; |
9a729c80 |
} else |
6fcf5624 |
cli_warnmsg("Unknown encryption protocol '%s' - if you believe this file contains a virus, submit it to www.clamav.net\n", protocol); |
9a729c80 |
free(protocol);
} else |
4ab382c3 |
cli_dbgmsg("Encryption method missing protocol name\n"); |
9a729c80 |
break; |
e3aaff8e |
default: |
5d8100cb |
assert(0); |
e3aaff8e |
}
|
ad9c6836 |
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage);
|
6e84cebb |
if(aText && (textIn == NULL)) { |
a603478f |
if((!infected) && (fb = fileblobCreate()) != NULL) { |
c8a7cef0 |
cli_dbgmsg("Save non mime and/or text/plain part\n"); |
c1fce7f7 |
fileblobSetFilename(fb, mctx->dir, "textpart"); |
e097c0dd |
/*fileblobAddData(fb, "Received: by clamd (textpart)\n", 30);*/ |
c1fce7f7 |
fileblobSetCTX(fb, mctx->ctx); |
3f46285b |
(void)textToFileblob(aText, fb, 1); |
6e84cebb |
fileblobDestroy(fb);
} |
049a18b9 |
textDestroy(aText); |
6e84cebb |
} |
049a18b9 |
|
b116962d |
for(i = 0; i < multiparts; i++)
if(messages[i])
messageDestroy(messages[i]);
|
b726511f |
if(messages)
free(messages);
|
e3aaff8e |
return rc;
case MESSAGE:
/*
* Check for forbidden encodings
*/
switch(messageGetEncoding(mainMessage)) {
case NOENCODING:
case EIGHTBIT:
case BINARY:
break;
default: |
049a18b9 |
cli_warnmsg("MIME type 'message' cannot be decoded\n"); |
e3aaff8e |
break;
} |
ecc3d638 |
rc = FAIL; |
049a18b9 |
if((strcasecmp(mimeSubtype, "rfc822") == 0) ||
(strcasecmp(mimeSubtype, "delivery-status") == 0)) { |
c1fce7f7 |
message *m = parseEmailHeaders(mainMessage, mctx->rfc821Table); |
c693116d |
if(m) { |
af3c6acb |
cli_dbgmsg("Decode rfc822\n"); |
c693116d |
|
c1fce7f7 |
messageSetCTX(m, mctx->ctx); |
a603478f |
|
93d41ee4 |
if(mainMessage && (mainMessage != messageIn)) {
messageDestroy(mainMessage);
mainMessage = NULL; |
59da5a4f |
} else
messageReset(mainMessage); |
c693116d |
if(messageGetBody(m)) |
242ffd7a |
rc = parseEmailBody(m, NULL, mctx, recursion_level + 1); |
c693116d |
messageDestroy(m);
} |
e3aaff8e |
break; |
5a15955b |
} else if(strcasecmp(mimeSubtype, "disposition-notification") == 0) { |
12f3689d |
/* RFC 2298 - handle like a normal email */ |
ecc3d638 |
rc = OK; |
12f3689d |
break; |
5a15955b |
} else if(strcasecmp(mimeSubtype, "partial") == 0) { |
f10460ed |
#ifdef PARTIAL_DIR
/* RFC1341 message split over many emails */ |
c1fce7f7 |
if(rfc1341(mainMessage, mctx->dir) >= 0) |
ecc3d638 |
rc = OK; |
f10460ed |
#else |
22080fa5 |
cli_warnmsg("Partial message received from MUA/MTA - message cannot be scanned\n"); |
f10460ed |
#endif
} else if(strcasecmp(mimeSubtype, "external-body") == 0) |
22080fa5 |
/* TODO */ |
e3aaff8e |
cli_warnmsg("Attempt to send Content-type message/external-body trapped"); |
5a642650 |
else |
6fcf5624 |
cli_warnmsg("Unsupported message format `%s' - if you believe this file contains a virus, submit it to www.clamav.net\n", mimeSubtype); |
e3aaff8e |
|
f10460ed |
|
ad9c6836 |
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage); |
b726511f |
if(messages)
free(messages); |
f10460ed |
return rc; |
e3aaff8e |
case APPLICATION: |
4ab382c3 |
/*cptr = messageGetMimeSubtype(mainMessage); |
d4d14218 |
|
4ab382c3 |
if((strcasecmp(cptr, "octet-stream") == 0) || |
aa0210b6 |
(strcasecmp(cptr, "x-msdownload") == 0)) {*/
{ |
2673dc74 |
fb = messageToFileblob(mainMessage, mctx->dir, 1); |
e3aaff8e |
|
0e5a0129 |
if(fb) {
cli_dbgmsg("Saving main message as attachment\n");
fileblobDestroy(fb); |
fbb7262e |
if(mainMessage != messageIn) {
messageDestroy(mainMessage);
mainMessage = NULL;
} else
messageReset(mainMessage); |
e3aaff8e |
} |
aa0210b6 |
} /*else
cli_warnmsg("Discarded application not sent as attachment\n");*/ |
e3aaff8e |
break;
case AUDIO:
case VIDEO:
case IMAGE:
break;
default:
cli_warnmsg("Message received with unknown mime encoding");
break;
} |
2673dc74 |
|
b912eaf2 |
if(messages) {
/* "can't happen" */ |
2e5a3528 |
cli_warnmsg("messages != NULL, report to http://bugs.clamav.net\n"); |
2673dc74 |
free(messages); |
b912eaf2 |
} |
e3aaff8e |
}
|
f12d2498 |
if(aText && (textIn == NULL)) { |
6fd711b2 |
/* Look for a bounce in the text (non mime encoded) portion */
const text *t; |
b116962d |
|
6fd711b2 |
for(t = aText; t; t = t->t_next) {
const line_t *l = t->t_line; |
ebe57840 |
const text *lookahead, *topofbounce; |
6fd711b2 |
const char *s; |
ebe57840 |
bool inheader; |
b116962d |
|
6fd711b2 |
if(l == NULL)
continue;
|
a603478f |
if(!isBounceStart(lineGetData(l))) |
6fd711b2 |
continue;
|
a7a2e2d4 |
lookahead = t->t_next;
if(lookahead) {
if(isBounceStart(lineGetData(lookahead->t_line)))
/* don't save worthless header lines */
continue;
} else /* don't save a single liner */
break;
|
6fd711b2 |
/*
* We've found what looks like the start of a bounce
* message. Only bother saving if it really is a bounce
* message, this helps to speed up scanning of ping-pong
* messages that have lots of bounces within bounces in
* them
*/ |
a7a2e2d4 |
for(; lookahead; lookahead = lookahead->t_next) { |
6fd711b2 |
l = lookahead->t_line;
if(l == NULL)
break;
s = lineGetData(l);
if(strncasecmp(s, "Content-Type:", 13) == 0)
/*
* Don't bother with plain/text or
* plain/html
*/
if(strstr(s, "text/") == NULL) |
6862efc7 |
/*
* Don't bother to save the unuseful
* part
*/ |
6fd711b2 |
break;
}
if(lookahead && (lookahead->t_line == NULL)) {
cli_dbgmsg("Non mime part bounce message is not mime encoded, so it will not be scanned\n");
t = lookahead;
/* look for next bounce message */
continue;
}
|
6862efc7 |
/*
* Prescan the bounce message to see if there's likely
* to be anything nasty.
* This algorithm is hand crafted and may be breakable
* so all submissions are welcome. It's best NOT to
* remove this however you may be tempted, because it
* significantly speeds up the scanning of multiple
* bounces (i.e. bounces within many bounces)
*/
for(; lookahead; lookahead = lookahead->t_next) {
l = lookahead->t_line;
if(l) {
s = lineGetData(l);
if((strncasecmp(s, "Content-Type:", 13) == 0) &&
(strstr(s, "multipart/") == NULL) &&
(strstr(s, "message/rfc822") == NULL) &&
(strstr(s, "text/plain") == NULL))
break;
}
}
if(lookahead == NULL) { |
7c56033f |
cli_dbgmsg("cli_mbox: I believe it's plain text which must be clean\n"); |
6862efc7 |
/* nothing here, move along please */
break;
} |
ebe57840 |
if((fb = fileblobCreate()) == NULL)
break;
cli_dbgmsg("Save non mime part bounce message\n"); |
c1fce7f7 |
fileblobSetFilename(fb, mctx->dir, "bounce"); |
95e11e5a |
fileblobAddData(fb, (const unsigned char *)"Received: by clamd (bounce)\n", 28); |
c1fce7f7 |
fileblobSetCTX(fb, mctx->ctx); |
ebe57840 |
inheader = TRUE;
topofbounce = NULL; |
01c99f53 |
do { |
ebe57840 |
l = t->t_line;
if(l == NULL) {
if(inheader) {
inheader = FALSE;
topofbounce = t;
}
} else {
s = lineGetData(l); |
95e11e5a |
fileblobAddData(fb, (const unsigned char *)s, strlen(s)); |
ebe57840 |
} |
95e11e5a |
fileblobAddData(fb, (const unsigned char *)"\n", 1); |
ebe57840 |
lookahead = t->t_next;
if(lookahead == NULL)
break;
t = lookahead;
l = t->t_line;
if((!inheader) && l) {
s = lineGetData(l); |
a603478f |
if(isBounceStart(s)) { |
b2ba24f5 |
cli_dbgmsg("Found the start of another bounce candidate (%s)\n", s); |
ebe57840 |
break;
}
} |
01c99f53 |
} while(!fileblobContainsVirus(fb)); |
ebe57840 |
fileblobDestroy(fb);
if(topofbounce)
t = topofbounce;
/*
* Don't do this - it slows bugs.txt
*/
/*if(mainMessage)
mainMessage->bounce = NULL;*/ |
6fd711b2 |
} |
f12d2498 |
textDestroy(aText);
aText = NULL;
}
|
b0b860f1 |
/*
* No attachments - scan the text portions, often files
* are hidden in HTML code
*/ |
ecc3d638 |
if(mainMessage && (rc != VIRUS)) { |
47d9cc65 |
text *t_line;
|
e3aaff8e |
/* |
b0b860f1 |
* Look for uu-encoded main file |
e3aaff8e |
*/ |
182bbcc8 |
if((encodingLine(mainMessage) != NULL) && |
47d9cc65 |
((t_line = bounceBegin(mainMessage)) != NULL)) {
if(exportBounceMessage(t_line, mctx)) |
ecc3d638 |
rc = OK; |
b0b860f1 |
} else {
bool saveIt; |
d4d14218 |
|
b0b860f1 |
if(messageGetMimeType(mainMessage) == MESSAGE) |
f01bbfe8 |
/* |
b0b860f1 |
* Quick peek, if the encapsulated
* message has no
* content encoding statement don't
* bother saving to scan, it's safe |
f01bbfe8 |
*/ |
47d9cc65 |
saveIt = (bool)(encodingLine(mainMessage) != NULL); |
b0b860f1 |
else if((t_line = encodingLine(mainMessage)) != NULL) { |
a7527b1f |
/* |
b0b860f1 |
* Some bounces include the message
* body without the headers. |
ebe57840 |
* FIXME: Unfortunately this generates a |
b0b860f1 |
* lot of false positives that a bounce
* has been found when it hasn't. |
a7527b1f |
*/ |
b0b860f1 |
if((fb = fileblobCreate()) != NULL) { |
ae5c693a |
cli_dbgmsg("Found a bounce message with no header at '%s'\n",
lineGetData(t_line->t_line)); |
c1fce7f7 |
fileblobSetFilename(fb, mctx->dir, "bounce"); |
f24bf390 |
fileblobAddData(fb,
(const unsigned char *)"Received: by clamd (bounce)\n",
28); |
cca4efe4 |
|
01c99f53 |
/*fileblobSetCTX(fb, ctx);*/ |
94aea271 |
fileblobDestroy(textToFileblob(t_line, fb, 1)); |
5c1150ac |
} |
b0b860f1 |
saveIt = FALSE; |
2673dc74 |
} else |
b0b860f1 |
/*
* Save the entire text portion,
* since it it may be an HTML file with |
a05e6d45 |
* a JavaScript virus or a phish |
b0b860f1 |
*/
saveIt = TRUE; |
e3aaff8e |
|
b0b860f1 |
if(saveIt) {
cli_dbgmsg("Saving text part to scan\n"); |
3f46285b |
saveTextPart(mainMessage, mctx->dir, 1); |
59da5a4f |
if(mainMessage != messageIn) {
messageDestroy(mainMessage);
mainMessage = NULL;
} else
messageReset(mainMessage); |
ecc3d638 |
rc = OK; |
049a18b9 |
} |
e3aaff8e |
} |
69c62847 |
} /*else |
ecc3d638 |
rc = OK_ATTACHMENTS_NOT_SAVED; /* nothing saved */ |
e3aaff8e |
|
ad9c6836 |
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage);
|
ecc3d638 |
if((rc != FAIL) && infected)
rc = VIRUS; |
a603478f |
|
e06d34dc |
cli_dbgmsg("parseEmailBody() returning %d\n", rc); |
e3aaff8e |
|
e06d34dc |
return rc; |
e3aaff8e |
}
/*
* Is the current line the start of a new section?
*
* New sections start with --boundary
*/
static int
boundaryStart(const char *line, const char *boundary)
{ |
95e11e5a |
const char *ptr;
char *out; |
df8806fd |
int rc; |
85bb253e |
char buf[RFC2821LENGTH + 1]; |
2ad0c86e |
|
0e4e16d4 |
if(line == NULL)
return 0; /* empty line */ |
300a8ae9 |
if(boundary == NULL)
return 0; |
0e4e16d4 |
|
6e84cebb |
/*cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary);*/ |
2ad0c86e |
|
ebe57840 |
if((*line != '-') && (*line != '('))
return 0;
if(strchr(line, '-') == NULL)
return 0;
|
d72749e0 |
if(strlen(line) <= sizeof(buf)) {
out = NULL;
ptr = rfc822comments(line, buf);
} else |
95e11e5a |
ptr = out = rfc822comments(line, NULL); |
d72749e0 |
|
2ad0c86e |
if(ptr == NULL) |
95e11e5a |
ptr = line; |
2ad0c86e |
|
a9d251e0 |
if((*ptr++ != '-') || (*ptr == '\0')) { |
d72749e0 |
if(out)
free(out); |
0e4e16d4 |
return 0; |
2ad0c86e |
} |
0e4e16d4 |
|
e3aaff8e |
/* |
0e4e16d4 |
* Gibe.B3 is broken, it has: |
e3aaff8e |
* boundary="---- =_NextPart_000_01C31177.9DC7C000"
* but it's boundaries look like
* ------ =_NextPart_000_01C31177.9DC7C000 |
0e4e16d4 |
* notice the one too few '-'.
* Presumably this is a deliberate exploitation of a bug in some mail
* clients.
*
* The trouble is that this creates a lot of false positives for
* boundary conditions, if we're too lax about matches. We do our level
* best to avoid these false positives. For example if we have
* boundary="1" we want to ensure that we don't break out of every line
* that has -1 in it instead of starting --1. This needs some more work. |
7c56033f |
*
* Look with and without RFC822 comments stripped, I've seen some
* samples where () are taken as comments in boundaries and some where |
69c62847 |
* they're not. Irrespective of whatever RFC2822 says, we need to find
* viruses in both types of mails. |
e3aaff8e |
*/ |
69c62847 |
if((strstr(&ptr[1], boundary) != NULL) || (strstr(line, boundary) != NULL)) {
const char *k = ptr;
/*
* We need to ensure that we don't match --11=-=-=11 when
* looking for --1=-=-=1 in well behaved headers, that's a
* false positive problem mentioned above
*/
rc = 0;
do
if(strcmp(++k, boundary) == 0) {
rc = 1;
break;
}
while(*k == '-');
if(rc == 0) {
k = &line[1];
do
if(strcmp(++k, boundary) == 0) {
rc = 1;
break;
}
while(*k == '-');
}
} else if(*ptr++ != '-')
rc = 0; |
df8806fd |
else |
79179da5 |
rc = (strcasecmp(ptr, boundary) == 0); |
df8806fd |
|
d72749e0 |
if(out)
free(out); |
df8806fd |
|
69c62847 |
if(rc == 1) |
df8806fd |
cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line);
return rc; |
e3aaff8e |
}
/*
* Is the current line the end?
*
* The message ends with with --boundary--
*/
static int |
69c62847 |
boundaryEnd(const char *line, const char *boundary) |
e3aaff8e |
{
size_t len;
|
02927896 |
if(line == NULL)
return 0; |
69c62847 |
/*cli_dbgmsg("boundaryEnd: line = '%s' boundary = '%s'\n", line, boundary);*/
|
e3aaff8e |
if(*line++ != '-')
return 0;
if(*line++ != '-')
return 0;
len = strlen(boundary); |
049a18b9 |
if(strncasecmp(line, boundary, len) != 0)
return 0; |
0e523db2 |
/*
* Use < rather than == because some broken mails have white
* space after the boundary
*/ |
c29f01c5 |
if(strlen(line) < (len + 2)) |
e3aaff8e |
return 0;
line = &line[len];
if(*line++ != '-')
return 0; |
69c62847 |
if(*line == '-') {
cli_dbgmsg("boundaryEnd: found %s in %s\n", boundary, line);
return 1;
}
return 0; |
e3aaff8e |
}
/*
* Initialise the various lookup tables
*/
static int
initialiseTables(table_t **rfc821Table, table_t **subtypeTable)
{
const struct tableinit *tableinit;
/*
* Initialise the various look up tables
*/
*rfc821Table = tableCreate();
assert(*rfc821Table != NULL);
for(tableinit = rfc821headers; tableinit->key; tableinit++) |
51fc2aa8 |
if(tableInsert(*rfc821Table, tableinit->key, tableinit->value) < 0) {
tableDestroy(*rfc821Table); |
767f16ab |
*rfc821Table = NULL; |
e3aaff8e |
return -1; |
51fc2aa8 |
} |
e3aaff8e |
*subtypeTable = tableCreate();
assert(*subtypeTable != NULL);
for(tableinit = mimeSubtypes; tableinit->key; tableinit++)
if(tableInsert(*subtypeTable, tableinit->key, tableinit->value) < 0) {
tableDestroy(*rfc821Table); |
51fc2aa8 |
tableDestroy(*subtypeTable); |
767f16ab |
*rfc821Table = NULL;
*subtypeTable = NULL; |
e3aaff8e |
return -1;
}
return 0;
}
/* |
d4d14218 |
* If there's a HTML text version use that, otherwise |
e3aaff8e |
* use the first text part, otherwise just use the |
d4d14218 |
* first one around. HTML text is most likely to include
* a scripting worm |
e3aaff8e |
*
* If we can't find one, return -1
*/
static int
getTextPart(message *const messages[], size_t size)
{
size_t i; |
28010d29 |
int textpart = -1; |
e3aaff8e |
for(i = 0; i < size; i++) {
assert(messages[i] != NULL); |
28010d29 |
if(messageGetMimeType(messages[i]) == TEXT) {
if(strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0)
return (int)i;
textpart = (int)i;
} |
e3aaff8e |
} |
28010d29 |
return textpart; |
e3aaff8e |
}
/*
* strip - |
767f16ab |
* Remove the trailing spaces from a buffer. Don't call this directly,
* always call strstrip() which is a wrapper to this routine to be used with
* NUL terminated strings. This code looks a bit strange because of it's
* heritage from code that worked on strings that weren't necessarily NUL
* terminated.
* TODO: rewrite for clamAV
* |
e3aaff8e |
* Returns it's new length (a la strlen)
*
* len must be int not size_t because of the >= 0 test, it is sizeof(buf)
* not strlen(buf)
*/
static size_t
strip(char *buf, int len)
{
register char *ptr;
register size_t i;
if((buf == NULL) || (len <= 0)) |
767f16ab |
return 0; |
e3aaff8e |
i = strlen(buf);
if(len > (int)(i + 1)) |
767f16ab |
return i; |
e3aaff8e |
ptr = &buf[--len];
|
ecc3d638 |
#if defined(UNIX) || defined(C_LINUX) || defined(C_DARWIN) /* watch - it may be in shared text area */ |
e3aaff8e |
do
if(*ptr)
*ptr = '\0'; |
87c9313e |
while((--len >= 0) && (!isgraph(*--ptr)) && (*ptr != '\n') && (*ptr != '\r')); |
e3aaff8e |
#else /* more characters can be displayed on DOS */
do
#ifndef REAL_MODE_DOS
if(*ptr) /* C8.0 puts into a text area */
#endif
*ptr = '\0'; |
ced371fe |
while((--len >= 0) && ((*--ptr == '\0') || isspace((int)(*ptr & 0xFF)))); |
e3aaff8e |
#endif
return((size_t)(len + 1));
}
/*
* strstrip:
* Strip a given string
*/ |
f0627588 |
size_t |
e3aaff8e |
strstrip(char *s)
{
if(s == (char *)NULL)
return(0); |
02927896 |
|
bc6bbeff |
return(strip(s, (int)strlen(s) + 1)); |
e3aaff8e |
}
static int
parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg)
{ |
38d07186 |
char *copy, *p;
const char *ptr; |
8b3563f2 |
int commandNumber; |
4f1d0bfc |
|
e3aaff8e |
cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg); |
8b3563f2 |
|
d72749e0 |
copy = rfc822comments(cmd, NULL); |
38d07186 |
if(copy) {
commandNumber = tableFind(rfc821Table, copy);
free(copy); |
f017fbdd |
} else
commandNumber = tableFind(rfc821Table, cmd); |
8b3563f2 |
|
d72749e0 |
copy = rfc822comments(arg, NULL); |
e3aaff8e |
|
38d07186 |
if(copy)
ptr = copy;
else
ptr = arg; |
e75e1ad1 |
|
8b3563f2 |
switch(commandNumber) { |
e3aaff8e |
case CONTENT_TYPE:
/*
* Fix for non RFC1521 compliant mailers
* that send content-type: Text instead
* of content-type: Text/Plain, or
* just simply "Content-Type:"
*/ |
5e394e73 |
if(arg == NULL) |
69543a9d |
/*
* According to section 4 of RFC1521:
* "Note also that a subtype specification is
* MANDATORY. There are no default subtypes"
* |
1eec55a6 |
* We have to break this and make an assumption |
69543a9d |
* for the subtype because virus writers and
* email client writers don't get it right
*/
cli_warnmsg("Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii\n"); |
38d07186 |
else if(strchr(ptr, '/') == NULL) |
69543a9d |
/*
* Empty field, such as
* Content-Type:
* which I believe is illegal according to
* RFC1521
*/ |
38d07186 |
cli_dbgmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", ptr); |
e3aaff8e |
else { |
8037334b |
int i; |
6d312569 |
char *mimeArgs; /* RHS of the ; */
|
e3aaff8e |
/*
* Some clients are broken and
* put white space after the ;
*/ |
2625d6a0 |
if(*arg == '/') {
cli_warnmsg("Content-type '/' received, assuming application/octet-stream\n");
messageSetMimeType(m, "application");
messageSetMimeSubtype(m, "octet-stream");
} else {
/* |
0960ff5e |
* The content type could be in quotes:
* Content-Type: "multipart/mixed"
* FIXME: this is a hack in that ignores
* the quotes, it doesn't handle
* them properly |
2625d6a0 |
*/ |
38d07186 |
while(isspace(*ptr))
ptr++;
if(ptr[0] == '\"')
ptr++; |
0960ff5e |
|
38d07186 |
if(ptr[0] != '/') { |
f017fbdd |
char *s;
char *mimeType; /* LHS of the ; */ |
38d07186 |
#ifdef CL_THREAD_SAFE |
1adb6fa8 |
char *strptr = NULL; |
38d07186 |
#endif |
f017fbdd |
|
38d07186 |
s = mimeType = cli_strtok(ptr, 0, ";"); |
0960ff5e |
/* |
6d312569 |
* Handle
* Content-Type: foo/bar multipart/mixed
* and
* Content-Type: multipart/mixed foo/bar |
0960ff5e |
*/ |
38e2ab86 |
if(s && *s) for(;;) { |
548a5f96 |
#ifdef CL_THREAD_SAFE |
6d312569 |
int set = messageSetMimeType(m, strtok_r(s, "/", &strptr)); |
548a5f96 |
#else
int set = messageSetMimeType(m, strtok(s, "/"));
#endif |
6d312569 |
/*
* Stephen White <stephen@earth.li>
* Some clients put space after
* the mime type but before
* the ;
*/ |
548a5f96 |
#ifdef CL_THREAD_SAFE |
6d312569 |
s = strtok_r(NULL, ";", &strptr); |
548a5f96 |
#else
s = strtok(NULL, ";");
#endif |
6d312569 |
if(s == NULL)
break;
if(set) { |
f017fbdd |
size_t len = strstrip(s) - 1; |
6d312569 |
if(s[len] == '\"') {
s[len] = '\0';
len = strstrip(s);
}
if(len) { |
f017fbdd |
if(strchr(s, ' ')) {
char *t = cli_strtok(s, 0, " "); |
6d312569 |
|
f017fbdd |
messageSetMimeSubtype(m, t);
free(t);
} else
messageSetMimeSubtype(m, s); |
6d312569 |
} |
0960ff5e |
} |
6d312569 |
while(*s && !isspace(*s))
s++;
if(*s++ == '\0')
break;
if(*s == '\0')
break; |
0960ff5e |
} |
38e2ab86 |
if(mimeType)
free(mimeType); |
f017fbdd |
} |
2625d6a0 |
} |
e3aaff8e |
/* |
20d3dde9 |
* Add in all rest of the the arguments.
* e.g. if the header is this:
* Content-Type:', arg='multipart/mixed; boundary=foo
* we find the boundary argument set it |
e3aaff8e |
*/ |
8037334b |
i = 1; |
38d07186 |
while((mimeArgs = cli_strtok(ptr, i++, ";")) != NULL) { |
8037334b |
cli_dbgmsg("mimeArgs = '%s'\n", mimeArgs);
|
6d312569 |
messageAddArguments(m, mimeArgs);
free(mimeArgs);
} |
e3aaff8e |
}
break;
case CONTENT_TRANSFER_ENCODING: |
38d07186 |
messageSetEncoding(m, ptr); |
e3aaff8e |
break;
case CONTENT_DISPOSITION: |
38d07186 |
p = cli_strtok(ptr, 0, ";");
if(p) {
if(*p) {
messageSetDispositionType(m, p);
free(p);
p = cli_strtok(ptr, 1, ";");
messageAddArgument(m, p);
}
free(p); |
548a5f96 |
} |
ab84808e |
if((p = (char *)messageFindArgument(m, "filename")) == NULL)
/*
* Handle this type of header, without
* a filename (e.g. some Worm.Torvil.D)
* Content-ID: <nRfkHdrKsAxRU>
* Content-Transfer-Encoding: base64
* Content-Disposition: attachment
*/
messageAddArgument(m, "filename=unknown");
else
free(p); |
e3aaff8e |
} |
38d07186 |
if(copy)
free(copy); |
e3aaff8e |
|
4f1d0bfc |
return 0; |
e3aaff8e |
}
|
e06d34dc |
/* |
cca4efe4 |
* Save the text portion of the message
*/
static void |
3f46285b |
saveTextPart(message *m, const char *dir, int destroy_text) |
cca4efe4 |
{ |
0e5a0129 |
fileblob *fb; |
cca4efe4 |
messageAddArgument(m, "filename=textportion"); |
3f46285b |
if((fb = messageToFileblob(m, dir, destroy_text)) != NULL) { |
cca4efe4 |
/*
* Save main part to scan that
*/ |
c77c8809 |
cli_dbgmsg("Saving main message\n"); |
cca4efe4 |
|
0e5a0129 |
fileblobDestroy(fb); |
cca4efe4 |
}
}
|
a9714c49 |
/* |
f017fbdd |
* Handle RFC822 comments in headers. |
d72749e0 |
* If out == NULL, return a buffer without the comments, the caller must free
* the returned buffer
* Return NULL on error or if the input * has no comments. |
f017fbdd |
* See secion 3.4.3 of RFC822 |
a9714c49 |
* TODO: handle comments that go on to more than one line
*/
static char * |
d72749e0 |
rfc822comments(const char *in, char *out) |
a9714c49 |
{
const char *iptr; |
d72749e0 |
char *optr; |
a9714c49 |
int backslash, inquote, commentlevel;
if(in == NULL) |
f017fbdd |
return NULL; |
a9714c49 |
if(strchr(in, '(') == NULL) |
f017fbdd |
return NULL; |
a9714c49 |
|
d72749e0 |
assert(out != in);
if(out == NULL) {
out = cli_malloc(strlen(in) + 1);
if(out == NULL)
return NULL;
} |
a9714c49 |
backslash = commentlevel = inquote = 0;
optr = out;
cli_dbgmsg("rfc822comments: contains a comment\n");
for(iptr = in; *iptr; iptr++)
if(backslash) { |
b814e02a |
if(commentlevel == 0)
*optr++ = *iptr; |
a9714c49 |
backslash = 0;
} else switch(*iptr) {
case '\\':
backslash = 1;
break;
case '\"': |
7c56033f |
*optr++ = '\"'; |
a9714c49 |
inquote = !inquote;
break;
case '(': |
7c56033f |
if(inquote)
*optr++ = '(';
else
commentlevel++; |
a9714c49 |
break;
case ')': |
7c56033f |
if(inquote)
*optr++ = ')';
else if(commentlevel > 0) |
a9714c49 |
commentlevel--;
break;
default:
if(commentlevel == 0)
*optr++ = *iptr;
}
if(backslash) /* last character was a single backslash */
*optr++ = '\\';
*optr = '\0';
|
d72749e0 |
/*strstrip(out);*/ |
a9714c49 |
cli_dbgmsg("rfc822comments '%s'=>'%s'\n", in, out);
return out;
} |
50df4118 |
/*
* Handle RFC2047 encoding. Returns a malloc'd buffer that the caller must
* free, or NULL on error
*/
static char *
rfc2047(const char *in)
{
char *out, *pout;
size_t len;
|
4de5fffd |
if((strstr(in, "=?") == NULL) || (strstr(in, "?=") == NULL)) |
0cf4cea7 |
return cli_strdup(in); |
50df4118 |
cli_dbgmsg("rfc2047 '%s'\n", in);
out = cli_malloc(strlen(in) + 1);
if(out == NULL)
return NULL;
pout = out;
/* For each RFC2047 string */
while(*in) { |
cf569541 |
char encoding, *ptr, *enctext; |
50df4118 |
message *m;
blob *b;
/* Find next RFC2047 string */
while(*in) {
if((*in == '=') && (in[1] == '?')) {
in += 2;
break;
}
*pout++ = *in++;
}
/* Skip over charset, find encoding */
while((*in != '?') && *in)
in++;
if(*in == '\0')
break;
encoding = *++in;
encoding = tolower(encoding);
if((encoding != 'q') && (encoding != 'b')) { |
6fcf5624 |
cli_warnmsg("Unsupported RFC2047 encoding type '%c' - if you believe this file contains a virus, submit it to www.clamav.net\n", encoding); |
1b00d9a4 |
free(out);
out = NULL; |
50df4118 |
break;
}
/* Skip to encoded text */
if(*++in != '?')
break;
if(*++in == '\0')
break;
|
0cf4cea7 |
enctext = cli_strdup(in); |
cf569541 |
if(enctext == NULL) {
free(out);
out = NULL;
break;
} |
50df4118 |
in = strstr(in, "?="); |
cf569541 |
if(in == NULL) {
free(enctext); |
50df4118 |
break; |
cf569541 |
} |
50df4118 |
in += 2;
ptr = strstr(enctext, "?=");
assert(ptr != NULL);
*ptr = '\0';
/*cli_dbgmsg("Need to decode '%s' with method '%c'\n", enctext, encoding);*/
m = messageCreate(); |
c77c8809 |
if(m == NULL) |
50df4118 |
break; |
564b3e07 |
messageAddStr(m, enctext); |
cf569541 |
free(enctext); |
767f16ab |
switch(encoding) { |
50df4118 |
case 'q':
messageSetEncoding(m, "quoted-printable");
break;
case 'b':
messageSetEncoding(m, "base64");
break;
} |
2673dc74 |
b = messageToBlob(m, 1); |
50df4118 |
len = blobGetDataSize(b); |
95e11e5a |
cli_dbgmsg("Decoded as '%*.*s'\n", (int)len, (int)len,
blobGetData(b)); |
50df4118 |
memcpy(pout, blobGetData(b), len);
blobDestroy(b);
messageDestroy(m);
if(pout[len - 1] == '\n')
pout += len - 1;
else
pout += len;
} |
ec8e31fa |
if(out == NULL)
return NULL;
*pout = '\0'; |
50df4118 |
|
ec8e31fa |
cli_dbgmsg("rfc2047 returns '%s'\n", out); |
50df4118 |
return out;
}
|
f10460ed |
#ifdef PARTIAL_DIR
/*
* Handle partial messages
*/
static int
rfc1341(message *m, const char *dir)
{
fileblob *fb; |
13462674 |
char *arg, *id, *number, *total, *oldfilename; |
fb79b576 |
const char *tmpdir; |
16ea58a2 |
char pdir[NAME_MAX + 1]; |
fb79b576 |
|
f003b79e |
id = (char *)messageFindArgument(m, "id");
if(id == NULL)
return -1;
|
f83668bf |
#ifdef C_CYGWIN |
45aba293 |
if((tmpdir = getenv("TEMP")) == (char *)NULL)
if((tmpdir = getenv("TMP")) == (char *)NULL)
if((tmpdir = getenv("TMPDIR")) == (char *)NULL)
tmpdir = "C:\\"; |
fb79b576 |
#else |
45aba293 |
if((tmpdir = getenv("TMPDIR")) == (char *)NULL)
if((tmpdir = getenv("TMP")) == (char *)NULL)
if((tmpdir = getenv("TEMP")) == (char *)NULL) |
fb79b576 |
#ifdef P_tmpdir |
45aba293 |
tmpdir = P_tmpdir; |
fb79b576 |
#else |
45aba293 |
tmpdir = "/tmp"; |
fb79b576 |
#endif
#endif |
f10460ed |
|
16ea58a2 |
snprintf(pdir, sizeof(pdir) - 1, "%s/clamav-partial", tmpdir); |
fb79b576 |
if((mkdir(pdir, 0700) < 0) && (errno != EEXIST)) {
cli_errmsg("Can't create the directory '%s'\n", pdir); |
f10460ed |
return -1; |
13462674 |
} else {
struct stat statb;
|
fb79b576 |
if(stat(pdir, &statb) < 0) {
cli_errmsg("Can't stat the directory '%s'\n", pdir); |
13462674 |
return -1;
}
if(statb.st_mode & 077)
cli_warnmsg("Insecure partial directory %s (mode 0%o)\n", |
95e11e5a |
pdir, (int)(statb.st_mode & 0777)); |
f10460ed |
}
number = (char *)messageFindArgument(m, "number");
if(number == NULL) {
free(id);
return -1;
}
oldfilename = (char *)messageFindArgument(m, "filename");
if(oldfilename == NULL)
oldfilename = (char *)messageFindArgument(m, "name");
arg = cli_malloc(10 + strlen(id) + strlen(number)); |
a43dd3cd |
if(arg) {
sprintf(arg, "filename=%s%s", id, number);
messageAddArgument(m, arg);
free(arg);
} |
f10460ed |
if(oldfilename) {
cli_warnmsg("Must reset to %s\n", oldfilename);
free(oldfilename);
}
|
2673dc74 |
if((fb = messageToFileblob(m, pdir, 0)) == NULL) { |
f10460ed |
free(id);
free(number);
return -1;
}
fileblobDestroy(fb);
total = (char *)messageFindArgument(m, "total");
cli_dbgmsg("rfc1341: %s, %s of %s\n", id, number, (total) ? total : "?");
if(total) {
int n = atoi(number);
int t = atoi(total);
DIR *dd = NULL;
|
f003b79e |
free(total); |
f10460ed |
/*
* If it's the last one - reassemble it |
3a0f75c6 |
* FIXME: this assumes that we receive the parts in order |
f10460ed |
*/ |
fb79b576 |
if((n == t) && ((dd = opendir(pdir)) != NULL)) { |
f10460ed |
FILE *fout;
char outname[NAME_MAX + 1]; |
e6be10f7 |
time_t now; |
f10460ed |
|
11b50569 |
sanitiseName(id); |
95e11e5a |
|
f10460ed |
snprintf(outname, sizeof(outname) - 1, "%s/%s", dir, id);
cli_dbgmsg("outname: %s\n", outname);
fout = fopen(outname, "wb");
if(fout == NULL) { |
3a0f75c6 |
cli_errmsg("Can't open '%s' for writing", outname); |
f10460ed |
free(id);
free(number);
closedir(dd);
return -1;
}
|
e6be10f7 |
time(&now); |
f10460ed |
for(n = 1; n <= t; n++) {
char filename[NAME_MAX + 1]; |
13462674 |
const struct dirent *dent;
#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2) |
242bfde8 |
union {
struct dirent d;
char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
} result; |
13462674 |
#endif |
f10460ed |
snprintf(filename, sizeof(filename), "%s%d", id, n); |
fdc6adbe |
|
242bfde8 |
#ifdef HAVE_READDIR_R_3
while((readdir_r(dd, &result.d, &dent) == 0) && dent) { |
f10460ed |
#elif defined(HAVE_READDIR_R_2) |
242bfde8 |
while((dent = (struct dirent *)readdir_r(dd, &result.d))) { |
fdc6adbe |
#else /*!HAVE_READDIR_R*/ |
f10460ed |
while((dent = readdir(dd))) {
#endif
FILE *fin; |
e6be10f7 |
char buffer[BUFSIZ], fullname[NAME_MAX + 1]; |
f10460ed |
int nblanks; |
e6be10f7 |
struct stat statb; |
95e11e5a |
extern short cli_leavetemps_flag; |
f10460ed |
|
b4c5d082 |
#ifndef C_CYGWIN |
f10460ed |
if(dent->d_ino == 0)
continue; |
f83668bf |
#endif |
f10460ed |
|
a43dd3cd |
snprintf(fullname, sizeof(fullname) - 1,
"%s/%s", pdir, dent->d_name);
|
e6be10f7 |
if(strncmp(filename, dent->d_name, strlen(filename)) != 0) {
if(!cli_leavetemps_flag)
continue;
if(stat(fullname, &statb) < 0)
continue;
if(now - statb.st_mtime > (time_t)(7 * 24 * 3600))
if(unlink(fullname) >= 0)
cli_warnmsg("removed old RFC1341 file %s\n", fullname); |
f10460ed |
continue; |
e6be10f7 |
} |
f10460ed |
|
e6be10f7 |
fin = fopen(fullname, "rb"); |
f10460ed |
if(fin == NULL) { |
e6be10f7 |
cli_errmsg("Can't open '%s' for reading", fullname); |
f10460ed |
fclose(fout);
unlink(outname);
free(id);
free(number);
closedir(dd);
return -1;
}
nblanks = 0; |
9f2024cc |
while(fgets(buffer, sizeof(buffer) - 1, fin) != NULL) |
f10460ed |
/*
* Ensure that trailing newlines
* aren't copied
*/ |
f003b79e |
if(buffer[0] == '\n') |
f10460ed |
nblanks++; |
f003b79e |
else { |
f10460ed |
if(nblanks)
do
putc('\n', fout);
while(--nblanks > 0);
fputs(buffer, fout);
}
fclose(fin); |
fb79b576 |
/* don't unlink if leave temps */
if(!cli_leavetemps_flag) |
e6be10f7 |
unlink(fullname); |
f10460ed |
break;
}
rewinddir(dd);
}
closedir(dd);
fclose(fout);
}
} |
f003b79e |
free(number); |
f10460ed |
free(id);
return 0;
}
#endif
|
c52d991e |
static void
hrefs_done(blob *b, tag_arguments_t *hrefs)
{
if(b)
blobDestroy(b);
html_tag_arg_free(hrefs);
}
/*
* This used to be part of checkURLs, split out, because phishingScan needs it
* too, and phishingScan might be used in situations where checkURLs is
* disabled (see ifdef)
*/
static blob *
getHrefs(message *m, tag_arguments_t *hrefs)
{ |
8386c723 |
blob *b = messageToBlob(m, 0); |
c52d991e |
size_t len;
if(b == NULL)
return NULL;
len = blobGetDataSize(b);
if(len == 0) {
blobDestroy(b);
return NULL;
}
/* TODO: make this size customisable */
if(len > 100*1024) { |
00a36e85 |
cli_warnmsg("Viruses pointed to by URLs not scanned in large message\n"); |
c52d991e |
blobDestroy(b);
return NULL;
}
hrefs->count = 0;
hrefs->tag = hrefs->value = NULL;
hrefs->contents = NULL;
|
8386c723 |
cli_dbgmsg("getHrefs: calling html_normalise_mem\n"); |
d77ac7de |
if(!html_normalise_mem(blobGetData(b), (off_t)len, NULL, hrefs,m->ctx->dconf)) { |
c52d991e |
blobDestroy(b);
return NULL;
} |
8386c723 |
cli_dbgmsg("getHrefs: html_normalise_mem returned\n"); |
c52d991e |
/* TODO: Do we need to call remove_html_comments? */
return b;
}
|
ad422cc9 |
/*
* Experimental: validate URLs for phishes
* followurls: see if URLs point to malware
*/ |
c52d991e |
static void |
ecc3d638 |
checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html) |
c52d991e |
{ |
94aea271 |
blob *b; |
ad422cc9 |
tag_arguments_t hrefs; |
c52d991e |
|
d77ac7de |
hrefs.scanContents = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE); |
c52d991e |
#if (!defined(FOLLOWURLS)) || (FOLLOWURLS <= 0) |
94aea271 |
if(!hrefs.scanContents) |
093e013c |
/*
* Don't waste time extracting hrefs (parsing html), nobody
* will need it
*/ |
c52d991e |
return;
#endif
|
94aea271 |
hrefs.count = 0;
hrefs.tag = hrefs.value = NULL;
hrefs.contents = NULL;
b = getHrefs(mainMessage, &hrefs);
if(b) { |
d77ac7de |
if(hrefs.scanContents) { |
94aea271 |
if(phishingScan(mainMessage, mctx->dir, mctx->ctx, &hrefs) == CL_VIRUS) {
mainMessage->isInfected = TRUE; |
ecc3d638 |
*rc = VIRUS; |
94aea271 |
cli_dbgmsg("PH:Phishing found\n");
}
} |
ecc3d638 |
if(is_html && (mctx->ctx->options&CL_SCAN_MAILURL) && (*rc != VIRUS)) |
ad422cc9 |
do_checkURLs(mctx->dir, &hrefs); |
94aea271 |
}
hrefs_done(b,&hrefs); |
c52d991e |
}
|
ad422cc9 |
|
c52d991e |
#if defined(FOLLOWURLS) && (FOLLOWURLS > 0)
static void |
ad422cc9 |
do_checkURLs(const char *dir, tag_arguments_t *hrefs) |
c52d991e |
{
table_t *t;
int i, n; |
93509e9a |
#ifdef CL_THREAD_SAFE |
c52d991e |
pthread_t tid[FOLLOWURLS];
struct arg args[FOLLOWURLS];
#endif
t = tableCreate();
if(t == NULL)
return;
n = 0;
for(i = 0; i < hrefs->count; i++) {
const char *url = (const char *)hrefs->value[i];
/*
* TODO: If it's an image source, it'd be nice to note beacons
* where width="0" height="0", which needs support from
* the HTML normalise code
*/
if(strncasecmp("http://", url, 7) == 0) {
char *ptr;
#ifndef CL_THREAD_SAFE
struct arg arg;
#endif
char name[NAME_MAX + 1];
if(tableFind(t, url) == 1) {
cli_dbgmsg("URL %s already downloaded\n", url);
continue;
}
/*
* What about foreign character spoofing?
*/
if(strchr(url, '%') && strchr(url, '@'))
cli_warnmsg("Possible URL spoofing attempt noticed, but not yet handled (%s)\n", url);
if(n == FOLLOWURLS) { |
ad422cc9 |
cli_warnmsg("URL %s will not be scanned (FOLLOWURLS limit %d was reached)\n",
url, FOLLOWURLS); |
c52d991e |
break;
}
(void)tableInsert(t, url, 1);
cli_dbgmsg("Downloading URL %s to be scanned\n", url);
strncpy(name, url, sizeof(name) - 1);
name[sizeof(name) - 1] = '\0';
for(ptr = name; *ptr; ptr++)
if(*ptr == '/')
*ptr = '_';
#ifdef CL_THREAD_SAFE
args[n].dir = dir; |
0cf4cea7 |
args[n].url = cli_strdup(url);
args[n].filename = cli_strdup(name); |
f7cd5fbf |
args[n].depth = 0; |
c52d991e |
pthread_create(&tid[n], NULL, getURL, &args[n]);
#else |
0cf4cea7 |
arg.url = cli_strdup(url); |
c52d991e |
arg.dir = dir;
arg.filename = name; |
f7cd5fbf |
arg.depth = 0; |
c52d991e |
getURL(&arg); |
b362ea45 |
free(arg.url); |
c52d991e |
#endif
++n;
}
}
tableDestroy(t);
|
93509e9a |
#ifdef CL_THREAD_SAFE |
c52d991e |
assert(n <= FOLLOWURLS);
cli_dbgmsg("checkURLs: waiting for %d thread(s) to finish\n", n);
while(--n >= 0) {
pthread_join(tid[n], NULL);
free(args[n].filename); |
b362ea45 |
free(args[n].url); |
c52d991e |
}
#endif
}
|
ad422cc9 |
#else /*!FOLLOWURLS*/ |
c52d991e |
|
71ba1dcd |
static void |
ad422cc9 |
do_checkURLs(const char *dir, tag_arguments_t *hrefs) |
71ba1dcd |
{
}
|
c52d991e |
#endif
|
00a36e85 |
#if defined(FOLLOWURLS) && (FOLLOWURLS > 0) |
2c9c9f3b |
/*
* Includes some Win32 patches by Gianluigi Tiesi <sherpya@netfarm.it> |
ec0cef20 |
*
* FIXME: Often WMF exploits work by sending people an email directing them
* to a page which displays a picture containing the exploit. This is not
* currently found, since only the HTML on the referred page is downloaded.
* It would be useful to scan the HTML for references to pictures and
* download them for scanning. But that will hit performance so there is
* an issue here. |
2c9c9f3b |
*/ |
093e013c |
|
10bb79d0 |
/*
* Removing the reliance on libcurl
* Includes some of the freshclam hacks by Everton da Silva Marques
* everton.marques@gmail.com>
*/
#ifndef timercmp |
94aea271 |
# define timercmp(a, b, cmp) \
(((a)->tv_sec == (b)->tv_sec) ? \ |
10bb79d0 |
((a)->tv_usec cmp (b)->tv_usec) : \
((a)->tv_sec cmp (b)->tv_sec))
#endif /* timercmp */
#ifndef timersub |
94aea271 |
# define timersub(a, b, result) \
do { \
(result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
(result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
if ((result)->tv_usec < 0) { \
--(result)->tv_sec; \
(result)->tv_usec += 1000000; \
} \ |
10bb79d0 |
} while (0)
#endif /* timersub */
|
fed0cf98 |
static long nonblock_fcntl(int sock);
static void restore_fcntl(int sock, long fcntl_flags);
static int nonblock_connect(int sock, const struct sockaddr *addr, socklen_t addrlen, int secs);
static int connect_error(int sock); |
43bebd43 |
static int my_r_gethostbyname(const char *hostname, struct hostent *hp, char *buf, size_t len); |
10bb79d0 |
|
b362ea45 |
#define NONBLOCK_SELECT_MAX_FAILURES 3
#define NONBLOCK_MAX_BOGUS_LOOPS 10 |
fed0cf98 |
|
8f4b1f3e |
/*
* Simple implementation of a subset of RFC1945 (HTTP/1.0)
* TODO: HTTP/1.1 (RFC2068)
*/ |
10bb79d0 |
static void *
#ifdef CL_THREAD_SAFE
getURL(void *a)
#else
getURL(struct arg *arg)
#endif
{
FILE *fp;
#ifdef CL_THREAD_SAFE
struct arg *arg = (struct arg *)a;
#endif
const char *url = arg->url;
const char *dir = arg->dir;
const char *filename = arg->filename; |
ea541184 |
#ifdef C_WINDOWS
SOCKET sd;
#else
int sd;
#endif |
10bb79d0 |
struct sockaddr_in server; |
ea541184 |
#ifdef HAVE_IN_ADDR_T |
10bb79d0 |
in_addr_t ip; |
ea541184 |
#else |
5162562e |
unsigned int ip; |
ea541184 |
#endif |
fed0cf98 |
in_port_t port;
static in_port_t default_port;
static int tcp; |
f136b57d |
int doingsite, firstpacket; |
10bb79d0 |
char *ptr; |
b362ea45 |
int flags, via_proxy; |
10bb79d0 |
const char *proxy; |
8f4b1f3e |
char buf[BUFSIZ + 1], site[BUFSIZ], fout[NAME_MAX + 1]; |
10bb79d0 |
|
43bebd43 |
if(strlen(url) > (sizeof(site) - 1)) {
cli_dbgmsg("Ignoring long URL \"%s\"\n", url);
return NULL;
}
|
10bb79d0 |
snprintf(fout, sizeof(fout) - 1, "%s/%s", dir, filename);
fp = fopen(fout, "wb");
if(fp == NULL) {
cli_errmsg("Can't open '%s' for writing", fout);
return NULL;
} |
43bebd43 |
cli_dbgmsg("Saving %s to %s\n", url, fout); |
fed0cf98 |
|
89d4073d |
#ifndef C_BEOS |
fed0cf98 |
if(tcp == 0) {
const struct protoent *proto = getprotobyname("tcp");
if(proto == NULL) {
cli_warnmsg("Unknown prototol tcp, check /etc/protocols\n");
fclose(fp);
return NULL;
}
tcp = proto->p_proto; |
ea541184 |
#ifndef C_WINDOWS |
f136b57d |
endprotoent(); |
ea541184 |
#endif |
fed0cf98 |
} |
89d4073d |
#endif |
fed0cf98 |
if(default_port == 0) {
const struct servent *servent = getservbyname("http", "tcp");
if(servent)
default_port = (in_port_t)ntohs(servent->s_port);
else
default_port = 80; |
89ee28d0 |
#if !defined(C_WINDOWS) && !defined(C_BEOS) |
fed0cf98 |
endservent(); |
ea541184 |
#endif |
fed0cf98 |
}
port = default_port;
|
43bebd43 |
doingsite = 1;
ptr = site;
|
10bb79d0 |
proxy = getenv("http_proxy"); /* FIXME: handle no_proxy */ |
b362ea45 |
via_proxy = (proxy && *proxy);
if(via_proxy) { |
10bb79d0 |
if(strncasecmp(proxy, "http://", 7) != 0) {
cli_warnmsg("Unsupported proxy protocol\n");
fclose(fp);
return NULL;
}
|
43bebd43 |
cli_dbgmsg("Getting %s via %s\n", url, proxy);
|
10bb79d0 |
proxy += 7;
while(*proxy) {
if(doingsite && (*proxy == ':')) {
port = 0;
while(isdigit(*++proxy)) {
port *= 10;
port += *proxy - '0';
}
continue;
}
if(doingsite && (*proxy == '/')) {
proxy++;
break;
}
*ptr++ = *proxy++;
}
} else {
cli_dbgmsg("Getting %s\n", url);
if(strncasecmp(url, "http://", 7) != 0) {
cli_warnmsg("Unsupported protocol\n");
fclose(fp);
return NULL;
}
url += 7;
while(*url) {
if(doingsite && (*url == ':')) {
port = 0;
while(isdigit(*++url)) {
port *= 10;
port += *url - '0';
}
continue;
}
if(doingsite && (*url == '/')) {
url++;
break;
}
*ptr++ = *url++;
} |
43bebd43 |
}
*ptr = '\0'; |
10bb79d0 |
|
43bebd43 |
memset((char *)&server, '\0', sizeof(struct sockaddr_in));
server.sin_family = AF_INET;
server.sin_port = (in_port_t)htons(port); |
10bb79d0 |
|
43bebd43 |
ip = inet_addr(site); |
10bb79d0 |
#ifdef INADDR_NONE |
43bebd43 |
if(ip == INADDR_NONE) { |
10bb79d0 |
#else |
43bebd43 |
if(ip == (in_addr_t)-1) { |
10bb79d0 |
#endif |
43bebd43 |
struct hostent h; |
f136b57d |
if((my_r_gethostbyname(site, &h, buf, sizeof(buf)) != 0) ||
(h.h_addr_list == NULL) ||
(h.h_addr == NULL)) { |
43bebd43 |
cli_dbgmsg("Unknown host %s\n", site); |
10bb79d0 |
fclose(fp);
return NULL;
}
|
43bebd43 |
memcpy((char *)&ip, h.h_addr, sizeof(ip)); |
10bb79d0 |
} |
43bebd43 |
server.sin_addr.s_addr = ip;
if((sd = socket(AF_INET, SOCK_STREAM, tcp)) < 0) {
fclose(fp);
return NULL;
}
flags = nonblock_fcntl(sd);
if(nonblock_connect(sd, (struct sockaddr *)&server, sizeof(struct sockaddr_in), 5) < 0) { |
ea541184 |
closesocket(sd); |
43bebd43 |
fclose(fp);
return NULL;
}
restore_fcntl(sd, flags);
/*
* TODO: consider HTTP/1.1
*/ |
b362ea45 |
if(via_proxy) |
43bebd43 |
snprintf(buf, sizeof(buf) - 1, |
8a894fd8 |
"GET %s HTTP/1.0\r\nUser-Agent: ClamAV %s\r\n\r\n",
url, VERSION); |
43bebd43 |
else
snprintf(buf, sizeof(buf) - 1, |
8a894fd8 |
"GET /%s HTTP/1.0\r\nUser-Agent: ClamAV %s\r\n\r\n",
url, VERSION); |
10bb79d0 |
|
93509e9a |
/*cli_dbgmsg("%s", buf);*/ |
b362ea45 |
|
ea541184 |
if(send(sd, buf, (int)strlen(buf), 0) < 0) {
closesocket(sd); |
10bb79d0 |
fclose(fp);
return NULL;
}
|
ea541184 |
#ifdef SHUT_WR |
10bb79d0 |
shutdown(sd, SHUT_WR); |
ea541184 |
#else
shutdown(sd, 1);
#endif |
10bb79d0 |
|
f136b57d |
firstpacket = 1;
|
10bb79d0 |
for(;;) {
fd_set set;
struct timeval tv; |
2cc843da |
int n; |
10bb79d0 |
FD_ZERO(&set);
FD_SET(sd, &set);
tv.tv_sec = 30; /* FIXME: make this customisable */
tv.tv_usec = 0;
if(select(sd + 1, &set, NULL, NULL, &tv) < 0) {
if(errno == EINTR)
continue; |
ea541184 |
closesocket(sd); |
10bb79d0 |
fclose(fp);
return NULL;
}
if(!FD_ISSET(sd, &set)) {
fclose(fp); |
ea541184 |
closesocket(sd); |
10bb79d0 |
return NULL;
} |
884f536d |
n = recv(sd, buf, sizeof(buf) - 1, 0); |
f136b57d |
|
10bb79d0 |
if(n < 0) {
fclose(fp); |
ea541184 |
closesocket(sd); |
10bb79d0 |
return NULL;
}
if(n == 0)
break; |
398d802b |
/*
* FIXME: Handle header in more than one packet
*/ |
f136b57d |
if(firstpacket) {
char *statusptr;
buf[n] = '\0';
statusptr = cli_strtok(buf, 1, " ");
if(statusptr) {
int status = atoi(statusptr);
cli_dbgmsg("HTTP status %d\n", status);
free(statusptr); |
b362ea45 |
if((status == 301) || (status == 302)) {
char *location;
location = strstr(buf, "\nLocation: ");
if(location) {
char *end;
|
f7cd5fbf |
unlink(fout);
if(arg->depth >= FOLLOWURLS) {
cli_warnmsg("URL %s will not be followed to %s (FOLLOWURLS limit %d was reached)\n",
arg->url, location, FOLLOWURLS);
break;
}
|
b362ea45 |
fclose(fp); |
ea541184 |
closesocket(sd); |
b362ea45 |
location += 11;
free(arg->url);
end = location;
while(*end && (*end != '\n'))
end++;
*end = '\0'; |
0cf4cea7 |
arg->url = cli_strdup(location); |
f7cd5fbf |
arg->depth++; |
b362ea45 |
cli_dbgmsg("Redirecting to %s\n", arg->url);
return getURL(arg);
}
} |
f136b57d |
} |
398d802b |
/*
* Don't write the HTTP header
*/ |
8f4b1f3e |
if((ptr = strstr(buf, "\r\n\r\n")) != NULL) {
ptr += 4;
n -= (int)(ptr - buf);
} else if((ptr = strstr(buf, "\n\n")) != NULL) { |
398d802b |
ptr += 2;
n -= (int)(ptr - buf);
} else
ptr = buf;
|
f136b57d |
firstpacket = 0; |
398d802b |
} else
ptr = buf; |
f136b57d |
|
2cc843da |
if(n && (fwrite(ptr, n, 1, fp) != 1)) { |
10bb79d0 |
cli_warnmsg("Error writing %d bytes to %s\n",
n, fout);
break;
}
}
fclose(fp); |
ea541184 |
closesocket(sd); |
10bb79d0 |
return NULL;
}
|
43bebd43 |
/*
* Have a copy here because r_gethostbyname is in shared not libclamav :-(
*/
static int
my_r_gethostbyname(const char *hostname, struct hostent *hp, char *buf, size_t len)
{
#if defined(HAVE_GETHOSTBYNAME_R_6)
/* e.g. Linux */
struct hostent *hp2;
int ret = -1;
if((hostname == NULL) || (hp == NULL))
return -1;
if(gethostbyname_r(hostname, hp, buf, len, &hp2, &ret) < 0)
return ret;
#elif defined(HAVE_GETHOSTBYNAME_R_5)
/* e.g. BSD, Solaris, Cygwin */ |
89ee28d0 |
/*
* Configure doesn't work on BeOS. We need -lnet to link, but configure
* doesn't add it, so you need to do something like
* LIBS=-lnet ./configure --enable-cache --disable-clamav
*/ |
43bebd43 |
int ret = -1;
if((hostname == NULL) || (hp == NULL))
return -1;
if(gethostbyname_r(hostname, hp, buf, len, &ret) == NULL)
return ret;
#elif defined(HAVE_GETHOSTBYNAME_R_3)
/* e.g. HP/UX, AIX */
if((hostname == NULL) || (hp == NULL))
return -1;
if(gethostbyname_r(hostname, &hp, (struct hostent_data *)buf) < 0)
return h_errno;
#else |
ea541184 |
/* Single thread the code e.g. VS2005 */ |
43bebd43 |
struct hostent *hp2;
#ifdef CL_THREAD_SAFE
static pthread_mutex_t hostent_mutex = PTHREAD_MUTEX_INITIALIZER;
#endif
if((hostname == NULL) || (hp == NULL))
return -1;
#ifdef CL_THREAD_SAFE
pthread_mutex_lock(&hostent_mutex);
#endif
if((hp2 = gethostbyname(hostname)) == NULL) {
#ifdef CL_THREAD_SAFE
pthread_mutex_unlock(&hostent_mutex);
#endif
return h_errno;
}
memcpy(hp, hp2, sizeof(struct hostent));
#ifdef CL_THREAD_SAFE
pthread_mutex_unlock(&hostent_mutex);
#endif
#endif
return 0;
}
|
10bb79d0 |
static long
nonblock_fcntl(int sock)
{ |
ea541184 |
#ifdef F_GETFL |
10bb79d0 |
long fcntl_flags; /* Save fcntl() flags */
fcntl_flags = fcntl(sock, F_GETFL, 0);
if(fcntl_flags < 0)
cli_warnmsg("nonblock_fcntl: saving: fcntl(%d, F_GETFL): errno=%d: %s\n",
sock, errno, strerror(errno));
else if(fcntl(sock, F_SETFL, fcntl_flags | O_NONBLOCK))
cli_warnmsg("nonblock_fcntl: fcntl(%d, F_SETFL, O_NONBLOCK): errno=%d: %s\n",
sock, errno, strerror(errno));
return fcntl_flags; |
ea541184 |
#else
return 0L;
#endif |
10bb79d0 |
}
static void
restore_fcntl(int sock, long fcntl_flags)
{ |
ea541184 |
#ifdef F_SETFL |
f136b57d |
if(fcntl_flags != -1)
if(fcntl(sock, F_SETFL, fcntl_flags)) { |
10bb79d0 |
cli_warnmsg("restore_fcntl: restoring: fcntl(%d, F_SETFL): errno=%d: %s\n",
sock, errno, strerror(errno));
} |
ea541184 |
#endif |
10bb79d0 |
}
static int
nonblock_connect(int sock, const struct sockaddr *addr, socklen_t addrlen, int secs)
{
/* Max. of unexpected select() failures */
int select_failures = NONBLOCK_SELECT_MAX_FAILURES;
/* Max. of useless loops */
int bogus_loops = NONBLOCK_MAX_BOGUS_LOOPS; |
f136b57d |
struct timeval timeout; /* When we should time out */ |
10bb79d0 |
int numfd; /* Highest fdset fd plus 1 */
/* Calculate into 'timeout' when we should time out */
gettimeofday(&timeout, 0);
timeout.tv_sec += secs;
/* Launch (possibly) non-blocking connect() request */
if(connect(sock, addr, addrlen)) {
int e = errno; |
ad422cc9 |
cli_dbgmsg("nonblock_connect: connect(): fd=%d errno=%d: %s\n", |
10bb79d0 |
sock, e, strerror(e));
switch (e) {
case EALREADY:
case EINPROGRESS:
break; /* wait for connection */
case EISCONN:
return 0; /* connected */
default:
cli_warnmsg("nonblock_connect: connect(): fd=%d errno=%d: %s\n",
sock, e, strerror(e));
return -1; /* failed */
}
} else
return connect_error(sock);
numfd = sock + 1; /* Highest fdset fd plus 1 */
for (;;) {
fd_set fds;
struct timeval now;
struct timeval wait;
int n;
/* Force timeout if we ran out of time */
gettimeofday(&now, 0);
if (timercmp(&now, &timeout, >)) {
cli_warnmsg("connect timing out (%d secs)\n",
secs);
break; /* failed */
}
/* Calculate into 'wait' how long to wait */
timersub(&timeout, &now, &wait); /* wait = timeout - now */
/* Init fds with 'sock' as the only fd */
FD_ZERO(&fds);
FD_SET(sock, &fds);
n = select(numfd, 0, &fds, 0, &wait);
if (n < 0) {
cli_warnmsg("nonblock_connect: select() failure %d: errno=%d: %s\n",
select_failures, errno, strerror(errno));
if (--select_failures >= 0)
continue; /* keep waiting */
break; /* failed */
}
|
ad422cc9 |
cli_dbgmsg("nonblock_connect: select = %d\n", n); |
10bb79d0 |
|
fed0cf98 |
if(n) |
10bb79d0 |
return connect_error(sock);
/* Select returned, but there is no work to do... */
if (--bogus_loops < 0) {
cli_warnmsg("nonblock_connect: giving up due to excessive bogus loops\n");
break; /* failed */
}
} /* for loop: keep waiting */
return -1; /* failed */
}
static int
connect_error(int sock)
{ |
2325b432 |
#ifdef SO_ERROR |
10bb79d0 |
int optval;
socklen_t optlen;
optlen = sizeof(optval);
getsockopt(sock, SOL_SOCKET, SO_ERROR, &optval, &optlen);
if(optval)
cli_warnmsg("connect_error: getsockopt(SO_ERROR): fd=%d error=%d: %s\n",
sock, optval, strerror(optval));
return optval ? -1 : 0; |
2325b432 |
#else
return 0;
#endif |
10bb79d0 |
}
|
9b4bb8b7 |
#endif |
93509e9a |
|
f2f25418 |
#ifdef HAVE_BACKTRACE |
4f1d0bfc |
static void |
02927896 |
sigsegv(int sig)
{
signal(SIGSEGV, SIG_DFL); |
f2f25418 |
print_trace(1); |
02927896 |
exit(SIGSEGV);
}
|
4f1d0bfc |
static void |
02927896 |
print_trace(int use_syslog)
{
void *array[10];
size_t size;
char **strings;
size_t i;
pid_t pid = getpid();
size = backtrace(array, 10);
strings = backtrace_symbols(array, size);
if(use_syslog == 0)
cli_dbgmsg("Backtrace of pid %d:\n", pid); |
f73920a4 |
else |
02927896 |
syslog(LOG_ERR, "Backtrace of pid %d:", pid);
for(i = 0; i < size; i++)
if(use_syslog) |
04c39f39 |
syslog(LOG_ERR, "bt[%u]: %s", i, strings[i]); |
02927896 |
else
cli_dbgmsg("%s\n", strings[i]);
|
ef822cfc |
/* TODO: dump the current email */
|
02927896 |
free(strings);
}
#endif |
f24bf390 |
|
6cad6a15 |
/* See also clamav-milter */ |
ddea752e |
static bool
usefulHeader(int commandNumber, const char *cmd)
{
switch(commandNumber) {
case CONTENT_TRANSFER_ENCODING:
case CONTENT_DISPOSITION:
case CONTENT_TYPE:
return TRUE;
default:
if(strcasecmp(cmd, "From") == 0)
return TRUE; |
6cad6a15 |
if(strcasecmp(cmd, "Received") == 0) |
ddea752e |
return TRUE; |
6cad6a15 |
if(strcasecmp(cmd, "De") == 0) |
ddea752e |
return TRUE;
}
return FALSE;
}
|
ae5c693a |
/* |
72cf1461 |
* Like fgets but cope with end of line by "\n", "\r\n", "\n\r", "\r"
*/
static char * |
d8142abc |
getline_from_mbox(char *buffer, size_t len, FILE *fin) |
72cf1461 |
{
char *ret;
if(feof(fin))
return NULL;
if((len == 0) || (buffer == NULL)) { |
27395a6e |
cli_errmsg("Invalid call to getline_from_mbox(). Refer to http://www.clamav.net/bugs\n"); |
72cf1461 |
return NULL;
}
ret = buffer;
do {
int c = getc(fin);
if(ferror(fin))
return NULL;
switch(c) {
case '\n':
*buffer++ = '\n';
c = getc(fin);
if((c != '\r') && !feof(fin))
ungetc(c, fin);
break;
default: |
c40715eb |
*buffer++ = (char)c; |
72cf1461 |
continue;
case EOF:
break;
case '\r':
*buffer++ = '\n';
c = getc(fin);
if((c != '\n') && !feof(fin))
ungetc(c, fin);
break;
}
break; |
85bb253e |
} while(--len > 1); |
72cf1461 |
if(len == 0) { |
182bbcc8 |
/* the email probably breaks RFC821 */ |
00a36e85 |
cli_warnmsg("getline_from_mbox: buffer overflow stopped, line lost\n"); |
72cf1461 |
return NULL;
} |
00a36e85 |
*buffer = '\0';
|
85bb253e |
if(len == 1) |
0d35f10f |
/* overflows will have appeared on separate lines */ |
f136b57d |
cli_dbgmsg("getline_from_mbox: buffer overflow stopped, line recovered\n"); |
72cf1461 |
return ret;
} |
b2ba24f5 |
|
a603478f |
/*
* Is this line a candidate for the start of a bounce message?
*/ |
b2ba24f5 |
static bool |
a603478f |
isBounceStart(const char *line) |
b2ba24f5 |
{
if(line == NULL)
return FALSE;
if(*line == '\0')
return FALSE; |
25071deb |
/*if((strncmp(line, "From ", 5) == 0) && !isalnum(line[5])) |
b2ba24f5 |
return FALSE;
if((strncmp(line, ">From ", 6) == 0) && !isalnum(line[6])) |
25071deb |
return FALSE;*/ |
edee0700 |
if(cli_filetype((const unsigned char *)line, strlen(line)) != CL_TYPE_MAIL) |
b2ba24f5 |
return FALSE;
if((strncmp(line, "From ", 5) == 0) ||
(strncmp(line, ">From ", 6) == 0)) {
int numSpaces = 0, numDigits = 0;
do
if(*line == ' ')
numSpaces++;
else if(isdigit(*line))
numDigits++;
while(*++line != '\0');
if(numSpaces < 6)
return FALSE;
if(numDigits < 11)
return FALSE;
}
return TRUE;
} |
a05e6d45 |
/*
* Extract a binhexEncoded message, return if it's found to be infected as we
* extract it
*/
static bool |
47d9cc65 |
exportBinhexMessage(const char *dir, message *m) |
a05e6d45 |
{
bool infected = FALSE;
fileblob *fb;
if(messageGetEncoding(m) == NOENCODING)
messageSetEncoding(m, "x-binhex");
|
2673dc74 |
fb = messageToFileblob(m, dir, 0); |
a05e6d45 |
if(fb) {
if(fileblobContainsVirus(fb))
infected = TRUE;
cli_dbgmsg("Binhex file decoded to %s\n",
fileblobGetFilename(fb));
fileblobDestroy(fb);
} else
cli_errmsg("Couldn't decode binhex file to %s\n", dir);
return infected;
} |
c1fce7f7 |
/* |
47d9cc65 |
* Locate any bounce message and extract it. Return 1 if anything found
*/
static int
exportBounceMessage(text *start, const mbox_ctx *mctx)
{
int rc = 0;
text *t;
fileblob *fb;
/*
* Attempt to save the original (unbounced)
* message - clamscan will find that in the
* directory and call us again (with any luck)
* having found an e-mail message to handle.
*
* This finds a lot of false positives, the
* search that a content type is in the
* bounce (i.e. it's after the bounce header)
* helps a bit.
*
* messageAddLine
* optimisation could help here, but needs
* careful thought, do it with line numbers
* would be best, since the current method in
* messageAddLine of checking encoding first
* must remain otherwise non bounce messages
* won't be scanned
*/
for(t = start; t; t = t->t_next) {
const char *txt = lineGetData(t->t_line); |
ecc3d638 |
char cmd[RFC2821LENGTH + 1]; |
47d9cc65 |
if(txt == NULL)
continue;
if(cli_strtokbuf(txt, 0, ":", cmd) == NULL)
continue;
switch(tableFind(mctx->rfc821Table, cmd)) {
case CONTENT_TRANSFER_ENCODING:
if((strstr(txt, "7bit") == NULL) &&
(strstr(txt, "8bit") == NULL))
break;
continue;
case CONTENT_DISPOSITION:
break;
case CONTENT_TYPE:
if(strstr(txt, "text/plain") != NULL)
t = NULL;
break;
default:
if(strcasecmp(cmd, "From") == 0)
start = t;
else if(strcasecmp(cmd, "Received") == 0)
start = t;
continue;
}
break;
}
if(t && ((fb = fileblobCreate()) != NULL)) {
cli_dbgmsg("Found a bounce message\n");
fileblobSetFilename(fb, mctx->dir, "bounce");
/*fileblobSetCTX(fb, mctx->ctx);*/
if(textToFileblob(start, fb, 1) == NULL)
cli_dbgmsg("Nothing new to save in the bounce message\n");
else
rc = 1;
fileblobDestroy(fb);
} else
cli_dbgmsg("Not found a bounce message\n");
return rc;
}
/* |
c1fce7f7 |
* Handle the ith element of a number of multiparts, e.g. multipart/alternative
*/
static message * |
ecc3d638 |
do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, mbox_ctx *mctx, message *messageIn, text **tptr, unsigned int recursion_level) |
c1fce7f7 |
{
bool addToText = FALSE;
const char *dtype;
#ifndef SAVE_TO_DISC
message *body;
#endif
message *aMessage = messages[i];
|
d77ac7de |
const int doPhishingScan = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING&PHISHING_CONF_ENGINE);
|
c1fce7f7 |
if(aMessage == NULL)
return mainMessage;
|
69c62847 |
if(*rc != OK)
return mainMessage;
|
c1fce7f7 |
cli_dbgmsg("Mixed message part %d is of type %d\n",
i, messageGetMimeType(aMessage));
switch(messageGetMimeType(aMessage)) {
case APPLICATION:
case AUDIO:
case IMAGE:
case VIDEO:
break;
case NOMIME:
cli_dbgmsg("No mime headers found in multipart part %d\n", i);
if(mainMessage) {
if(binhexBegin(aMessage)) {
cli_dbgmsg("Found binhex message in multipart/mixed mainMessage\n");
|
47d9cc65 |
if(exportBinhexMessage(mctx->dir, mainMessage)) |
5684fccf |
*rc = VIRUS; |
c1fce7f7 |
}
if(mainMessage != messageIn)
messageDestroy(mainMessage);
mainMessage = NULL;
} else if(aMessage) {
if(binhexBegin(aMessage)) {
cli_dbgmsg("Found binhex message in multipart/mixed non mime part\n"); |
47d9cc65 |
if(exportBinhexMessage(mctx->dir, aMessage)) |
5684fccf |
*rc = VIRUS; |
c1fce7f7 |
assert(aMessage == messages[i]);
messageReset(messages[i]);
}
}
addToText = TRUE;
if(messageGetBody(aMessage) == NULL)
/*
* No plain text version
*/
cli_dbgmsg("No plain text alternative\n");
break;
case TEXT:
dtype = messageGetDispositionType(aMessage);
cli_dbgmsg("Mixed message text part disposition \"%s\"\n",
dtype);
if(strcasecmp(dtype, "attachment") == 0)
break;
if((*dtype == '\0') || (strcasecmp(dtype, "inline") == 0)) {
const char *cptr;
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage);
mainMessage = NULL;
cptr = messageGetMimeSubtype(aMessage);
cli_dbgmsg("Mime subtype \"%s\"\n", cptr);
if((tableFind(mctx->subtypeTable, cptr) == PLAIN) &&
(messageGetEncoding(aMessage) == NOENCODING)) {
char *filename;
/*
* Strictly speaking
* a text/plain part is
* not an attachment. We
* pretend it is so that
* we can decode and
* scan it
*/
filename = (char *)messageFindArgument(aMessage, "filename");
if(filename == NULL)
filename = (char *)messageFindArgument(aMessage, "name");
if(filename == NULL) {
cli_dbgmsg("Adding part to main message\n");
addToText = TRUE;
} else {
cli_dbgmsg("Treating %s as attachment\n",
filename);
free(filename);
}
} else { |
56aea026 |
const int is_html = (tableFind(mctx->subtypeTable, cptr) == HTML); |
c3a79a7a |
if((mctx->ctx->options&CL_SCAN_MAILURL) && is_html) |
56aea026 |
checkURLs(aMessage, mctx, rc, 1); |
d77ac7de |
else if(doPhishingScan) |
56aea026 |
checkURLs(aMessage, mctx, rc, is_html); |
c1fce7f7 |
messageAddArgument(aMessage,
"filename=mixedtextportion");
}
break;
}
cli_dbgmsg("Text type %s is not supported\n", dtype);
return mainMessage;
case MESSAGE:
/* Content-Type: message/rfc822 */
cli_dbgmsg("Found message inside multipart (encoding type %d)\n",
messageGetEncoding(aMessage));
#ifndef SCAN_UNENCODED_BOUNCES
switch(messageGetEncoding(aMessage)) {
case NOENCODING:
case EIGHTBIT:
case BINARY:
if(encodingLine(aMessage) == NULL) {
/*
* This means that the message
* has no attachments
*
* The test for
* messageGetEncoding is needed
* since encodingLine won't have
* been set if the message
* itself has been encoded
*/
cli_dbgmsg("Unencoded multipart/message will not be scanned\n");
assert(aMessage == messages[i]);
messageDestroy(messages[i]);
messages[i] = NULL;
return mainMessage;
}
/* FALLTHROUGH */
default:
cli_dbgmsg("Encoded multipart/message will be scanned\n");
}
#endif
#if 0
messageAddStrAtTop(aMessage,
"Received: by clamd (message/rfc822)");
#endif
#ifdef SAVE_TO_DISC
/*
* Save this embedded message
* to a temporary file
*/ |
3f46285b |
saveTextPart(aMessage, mctx->dir, 1); |
c1fce7f7 |
assert(aMessage == messages[i]);
messageDestroy(messages[i]);
messages[i] = NULL;
#else
/* |
b912eaf2 |
* Scan in memory, faster but is open to DoS attacks
* when many nested levels are involved. |
c1fce7f7 |
*/ |
0072fa21 |
body = parseEmailHeaders(aMessage, mctx->rfc821Table);
|
c1fce7f7 |
/*
* We've fininished with the
* original copy of the message,
* so throw that away and
* deal with the encapsulated
* message as a message.
* This can save a lot of memory
*/
assert(aMessage == messages[i]);
messageDestroy(messages[i]);
messages[i] = NULL;
if(body) { |
0072fa21 |
messageSetCTX(body, mctx->ctx);
*rc = parseEmailBody(body, NULL, mctx, recursion_level + 1); |
69c62847 |
if((*rc == OK) && messageContainsVirus(body)) |
5684fccf |
*rc = VIRUS; |
c1fce7f7 |
messageDestroy(body);
}
#endif
return mainMessage;
case MULTIPART:
/*
* It's a multi part within a multi part
* Run the message parser on this bit, it won't
* be an attachment
*/
cli_dbgmsg("Found multipart inside multipart\n");
if(aMessage) {
/*
* The headers were parsed when reading in the
* whole multipart section
*/ |
242ffd7a |
*rc = parseEmailBody(aMessage, *tptr, mctx, recursion_level + 1); |
69c62847 |
cli_dbgmsg("Finished recursion, rc = %d\n", *rc); |
c1fce7f7 |
assert(aMessage == messages[i]);
messageDestroy(messages[i]);
messages[i] = NULL;
} else { |
242ffd7a |
*rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1); |
c1fce7f7 |
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage);
mainMessage = NULL;
}
return mainMessage;
default:
cli_warnmsg("Only text and application attachments are supported, type = %d\n",
messageGetMimeType(aMessage));
return mainMessage;
}
if(addToText) {
cli_dbgmsg("Adding to non mime-part\n");
*tptr = textAdd(*tptr, messageGetBody(aMessage));
} else { |
2673dc74 |
fileblob *fb = messageToFileblob(aMessage, mctx->dir, 1); |
c1fce7f7 |
if(fb) {
if(fileblobContainsVirus(fb)) |
5684fccf |
*rc = VIRUS; |
c1fce7f7 |
fileblobDestroy(fb);
}
}
if(messageContainsVirus(aMessage)) |
5684fccf |
*rc = VIRUS; |
c1fce7f7 |
messageDestroy(aMessage);
messages[i] = NULL;
return mainMessage;
} |
4f4a8f4a |
/*
* Returns the number of quote characters in the given string
*/
static int
count_quotes(const char *buf)
{
int quotes = 0;
while(*buf)
if(*buf++ == '\"')
quotes++;
return quotes;
} |
842c7d49 |
/*
* Will the next line be a folded header? See RFC2822 section 2.2.3
*/
static bool
next_is_folded_header(const text *t)
{
const text *next = t->t_next;
const char *data, *ptr;
if(next == NULL)
return FALSE;
if(next->t_line == NULL)
return FALSE;
data = lineGetData(next->t_line);
/* |
b912eaf2 |
* Section B.2 of RFC822 says TAB or SPACE means a continuation of the |
842c7d49 |
* previous entry.
*/
if(isblank(data[0]))
return TRUE;
if(strchr(data, '=') == NULL)
/*
* Avoid false positives with
* Content-Type: text/html;
* Content-Transfer-Encoding: quoted-printable
*/
return FALSE; |
bc6bbeff |
|
842c7d49 |
/*
* Some are broken and don't fold headers lines
* correctly as per section 2.2.3 of RFC2822.
* Generally they miss the white space at
* the start of the fold line:
* Content-Type: multipart/related;
* type="multipart/alternative";
* boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
* should read:
* Content-Type: multipart/related;
* type="multipart/alternative";
* boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
* Since we're a virus checker not an RFC
* verifier we need to handle these
*/
data = lineGetData(t->t_line);
ptr = strchr(data, '\0');
while(--ptr > data)
switch(*ptr) {
case ';':
return TRUE;
case '\n':
case ' ':
case '\r':
case '\t':
continue; /* white space at end of line */
default:
return FALSE;
}
return FALSE;
} |
0cf4cea7 |
/*
* This routine is called on the first line of the body of
* an email to handle broken messages that have newlines
* in the middle of its headers
*/
static bool
newline_in_header(const char *line)
{
cli_dbgmsg("newline_in_header, check \"%s\"\n", line);
if(strncmp(line, "Message-Id: ", 12) == 0)
return TRUE;
if(strncmp(line, "Date: ", 6) == 0)
return TRUE;
return FALSE;
} |