b151ef55 |
/* |
70b54406 |
* Copyright (C) 2002-2006 Nigel Horne <njh@bandsman.co.uk> |
b151ef55 |
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software |
30738099 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA. |
b151ef55 |
*/ |
4db74788 |
static char const rcsid[] = "$Id: message.c,v 1.193 2007/01/07 21:30:49 njh Exp $"; |
8b242bb9 |
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif |
b151ef55 |
#ifndef CL_DEBUG |
0b08b624 |
#define NDEBUG /* map CLAMAV debug onto standard */ |
b151ef55 |
#endif
#ifdef CL_THREAD_SAFE |
dd8a7e90 |
#ifndef _REENTRANT |
b151ef55 |
#define _REENTRANT /* for Solaris 2.8 */
#endif |
dd8a7e90 |
#endif |
b151ef55 |
|
f0146bc6 |
#ifdef C_DARWIN |
b151ef55 |
#include <sys/types.h>
#endif
#include <stdlib.h>
#include <string.h> |
40d54f7f |
#ifdef HAVE_STRINGS_H |
b151ef55 |
#include <strings.h> |
40d54f7f |
#endif |
b151ef55 |
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
|
8a88fb93 |
#ifdef CL_THREAD_SAFE
#include <pthread.h>
#endif
|
5c86c162 |
#include "others.h"
#include "str.h"
#include "filetypes.h"
|
b151ef55 |
#include "mbox.h"
|
06bce849 |
#ifndef isblank
#define isblank(c) (((c) == ' ') || ((c) == '\t'))
#endif
|
53ee0b60 |
#define RFC2045LENGTH 76 /* maximum number of characters on a line */
|
4db74788 |
#ifdef HAVE_STDBOOL_H |
a4f8f199 |
#include <stdbool.h>
#else
#ifdef FALSE
typedef unsigned char bool;
#else
typedef enum { FALSE = 0, TRUE = 1 } bool;
#endif
#endif |
b151ef55 |
|
de617e3e |
static void messageIsEncoding(message *m); |
285a69b4 |
static unsigned char *decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast); |
fb405afc |
static void sanitiseBase64(char *s); |
f5d6e670 |
#ifdef __GNUC__
static unsigned char hex(char c) __attribute__((const));
static unsigned char base64(char c) __attribute__((const));
static unsigned char uudecode(char c) __attribute__((const));
#else |
b151ef55 |
static unsigned char hex(char c);
static unsigned char base64(char c);
static unsigned char uudecode(char c); |
f5d6e670 |
#endif |
b151ef55 |
static const char *messageGetArgument(const message *m, int arg); |
985cc85e |
static void *messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy)(void *), void (*setFilename)(void *, const char *, const char *), void (*addData)(void *, const unsigned char *, size_t), void *(*exportText)(text *, void *, int), void (*setCTX)(void *, cli_ctx *), int destroy_text); |
b4cb4486 |
static int usefulArg(const char *arg); |
e24738dc |
static void messageDedup(message *m); |
5e5a162c |
static char *rfc2231(const char *in); |
b329234a |
static int simil(const char *str1, const char *str2); |
b151ef55 |
/*
* These maps are ordered in decreasing likelyhood of their appearance |
0e3b08fc |
* in an e-mail. Probably these should be in a table... |
b151ef55 |
*/
static const struct encoding_map {
const char *string;
encoding_type type; |
da850706 |
} encoding_map[] = { /* rfc2045 */ |
b151ef55 |
{ "7bit", NOENCODING }, |
b759d5eb |
{ "text/plain", NOENCODING }, |
da850706 |
{ "quoted-printable", QUOTEDPRINTABLE }, /* rfc2045 */ |
98685ac1 |
{ "base64", BASE64 }, /* rfc2045 */ |
b151ef55 |
{ "8bit", EIGHTBIT }, |
bb2432d7 |
{ "binary", BINARY }, |
af66c329 |
{ "x-uuencode", UUENCODE }, /* uuencode(5) */ |
00f95393 |
{ "x-yencode", YENCODE }, |
fef5ad63 |
{ "x-binhex", BINHEX }, |
7ea0c270 |
{ "us-ascii", NOENCODING }, /* incorrect */ |
ef3cf57d |
{ "x-uue", UUENCODE }, /* incorrect */ |
a4c3d0a3 |
{ "uuencode", UUENCODE }, /* incorrect */ |
6ba88eb8 |
{ NULL, NOENCODING } |
b151ef55 |
};
static struct mime_map {
const char *string;
mime_type type;
} mime_map[] = {
{ "text", TEXT },
{ "multipart", MULTIPART },
{ "application", APPLICATION },
{ "audio", AUDIO },
{ "image", IMAGE },
{ "message", MESSAGE },
{ "video", VIDEO }, |
6ba88eb8 |
{ NULL, TEXT } |
b151ef55 |
};
|
fe6ce0ba |
/*
* See RFC2045, section 6.8, table 1
*/ |
d17de037 |
static const unsigned char base64Table[256] = { |
5ae253d2 |
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,62,255,255,255,63,
52,53,54,55,56,57,58,59,60,61,255,255,255,0,255,255,
255,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
15,16,17,18,19,20,21,22,23,24,25,255,255,255,255,255,
255,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
41,42,43,44,45,46,47,48,49,50,51,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
};
|
b151ef55 |
message *
messageCreate(void)
{
message *m = (message *)cli_calloc(1, sizeof(message));
|
0e3b08fc |
if(m) |
bbf43447 |
m->mimeType = NOMIME; |
b151ef55 |
return m;
}
void
messageDestroy(message *m)
{ |
4d9c0ca8 |
assert(m != NULL);
|
b151ef55 |
messageReset(m);
free(m);
}
void
messageReset(message *m)
{
int i;
assert(m != NULL);
if(m->mimeSubtype)
free(m->mimeSubtype);
if(m->mimeDispositionType)
free(m->mimeDispositionType);
|
27a375f2 |
if(m->mimeArguments) {
for(i = 0; i < m->numberOfArguments; i++)
free(m->mimeArguments[i]);
free(m->mimeArguments);
} |
b151ef55 |
if(m->body_first)
textDestroy(m->body_first);
|
285a69b4 |
assert(m->base64chars == 0);
|
0e3b08fc |
if(m->encodingTypes) {
assert(m->numberOfEncTypes > 0);
free(m->encodingTypes);
} |
73175a15 |
memset(m, '\0', sizeof(message));
m->mimeType = NOMIME; |
b151ef55 |
}
|
de509b8e |
/* |
bb2432d7 |
* Handle the Content-Type header. The syntax is in RFC1341. |
2bcec72b |
* Return success (1) or failure (0). Failure only happens when it's an
* unknown type and we've already received a known type, or we've received an
* empty type. If we receive an unknown type by itself we default to application |
de509b8e |
*/ |
2bcec72b |
int |
b151ef55 |
messageSetMimeType(message *mess, const char *type)
{ |
8a88fb93 |
#ifdef CL_THREAD_SAFE
static pthread_mutex_t mime_mutex = PTHREAD_MUTEX_INITIALIZER;
#endif |
1f8eb426 |
const struct mime_map *m; |
dad64ecb |
int typeval; |
1f8eb426 |
static table_t *mime_table; |
b151ef55 |
assert(mess != NULL); |
4a46b8a2 |
if(type == NULL) {
cli_warnmsg("Empty content-type field\n");
return 0;
} |
b151ef55 |
cli_dbgmsg("messageSetMimeType: '%s'\n", type);
/* Ignore leading spaces */ |
82189c76 |
while(!isalpha(*type)) |
b151ef55 |
if(*type++ == '\0') |
2bcec72b |
return 0; |
b151ef55 |
|
8a88fb93 |
#ifdef CL_THREAD_SAFE
pthread_mutex_lock(&mime_mutex);
#endif |
dad64ecb |
if(mime_table == NULL) {
mime_table = tableCreate(); |
8a88fb93 |
if(mime_table == NULL) {
#ifdef CL_THREAD_SAFE
pthread_mutex_unlock(&mime_mutex);
#endif |
2bcec72b |
return 0; |
8a88fb93 |
} |
dad64ecb |
for(m = mime_map; m->string; m++)
if(!tableInsert(mime_table, m->string, m->type)) {
tableDestroy(mime_table); |
8a88fb93 |
mime_table = NULL;
#ifdef CL_THREAD_SAFE
pthread_mutex_unlock(&mime_mutex);
#endif |
2bcec72b |
return 0; |
dad64ecb |
}
} |
8a88fb93 |
#ifdef CL_THREAD_SAFE
pthread_mutex_unlock(&mime_mutex);
#endif |
dad64ecb |
typeval = tableFind(mime_table, type);
|
2bcec72b |
if(typeval != -1) { |
f0146bc6 |
mess->mimeType = (mime_type)typeval; |
2bcec72b |
return 1; |
1f8eb426 |
}
if(mess->mimeType == NOMIME) { |
b151ef55 |
if(strncasecmp(type, "x-", 2) == 0)
mess->mimeType = MEXTENSION; |
9fc8173e |
else { |
27a375f2 |
/*
* Based on a suggestion by James Stevens
* <James@kyzo.com>
* Force scanning of strange messages
*/ |
9fc8173e |
if(strcasecmp(type, "plain") == 0) { |
79879cfb |
cli_dbgmsg("Incorrect MIME type: `plain', set to Text\n"); |
9fc8173e |
mess->mimeType = TEXT;
} else {
/*
* Don't handle broken e-mail probably sending
* Content-Type: plain/text
* instead of
* Content-Type: text/plain
* as an attachment
*/ |
1f8eb426 |
int highestSimil = 0, t = -1;
const char *closest = NULL;
for(m = mime_map; m->string; m++) {
const int s = simil(m->string, type);
if(s > highestSimil) {
highestSimil = s;
closest = m->string;
t = m->type;
}
}
if(highestSimil >= 50) {
cli_dbgmsg("Unknown MIME type \"%s\" - guessing as %s (%u%% certainty)\n",
type, closest, highestSimil); |
388072d8 |
mess->mimeType = (mime_type)t; |
1f8eb426 |
} else { |
a363da65 |
cli_dbgmsg("Unknown MIME type: `%s', set to Application - if you believe this file contains a virus, submit it to www.clamav.net\n", type); |
1f8eb426 |
mess->mimeType = APPLICATION;
} |
9fc8173e |
} |
27a375f2 |
} |
2bcec72b |
return 1; |
b151ef55 |
} |
2bcec72b |
return 0; |
b151ef55 |
}
mime_type
messageGetMimeType(const message *m)
{ |
4d9c0ca8 |
assert(m != NULL);
return m->mimeType; |
b151ef55 |
}
void
messageSetMimeSubtype(message *m, const char *subtype)
{
assert(m != NULL); |
6b9ba2a4 |
if(subtype == NULL) {
/*
* Handle broken content-type lines, e.g.
* Content-Type: text/
*/
cli_dbgmsg("Empty content subtype\n");
subtype = "";
} |
b151ef55 |
if(m->mimeSubtype)
free(m->mimeSubtype);
|
4db74788 |
m->mimeSubtype = cli_strdup(subtype); |
b151ef55 |
}
const char *
messageGetMimeSubtype(const message *m)
{ |
b329234a |
return (m->mimeSubtype) ? m->mimeSubtype : ""; |
b151ef55 |
}
void
messageSetDispositionType(message *m, const char *disptype)
{
assert(m != NULL);
|
3db105a2 |
if(m->mimeDispositionType)
free(m->mimeDispositionType);
if(disptype == NULL) {
m->mimeDispositionType = NULL;
return;
}
|
8b04b4f6 |
/*
* It's broken for there to be an entry such as "Content-Disposition:"
* However some spam and viruses are rather broken, it's a sign
* that something is wrong if we get that - maybe we should force a
* scan of this part
*/ |
3db105a2 |
while(*disptype && isspace((int)*disptype))
disptype++;
if(*disptype) { |
4db74788 |
m->mimeDispositionType = cli_strdup(disptype); |
3db105a2 |
if(m->mimeDispositionType)
strstrip(m->mimeDispositionType); |
552878b2 |
} else
m->mimeDispositionType = NULL; |
b151ef55 |
}
const char *
messageGetDispositionType(const message *m)
{ |
b329234a |
return (m->mimeDispositionType) ? m->mimeDispositionType : ""; |
b151ef55 |
}
/*
* TODO:
* Arguments are held on a per message basis, they should be held on
* a per section basis. Otherwise what happens if two sections have two
* different values for charset? Probably doesn't matter for the use this
* code will be given, but will need fixing if this code is used elsewhere
*/
void
messageAddArgument(message *m, const char *arg)
{
int offset;
assert(m != NULL);
if(arg == NULL)
return; /* Note: this is not an error condition */
while(isspace(*arg))
arg++;
if(*arg == '\0')
/* Empty argument? Probably a broken mail client... */
return;
|
49dff330 |
cli_dbgmsg("messageAddArgument, arg='%s'\n", arg);
|
b4cb4486 |
if(!usefulArg(arg)) |
cea95096 |
return;
|
c6259ac5 |
for(offset = 0; offset < m->numberOfArguments; offset++) |
b151ef55 |
if(m->mimeArguments[offset] == NULL)
break;
else if(strcasecmp(arg, m->mimeArguments[offset]) == 0)
return; /* already in there */
|
c6259ac5 |
if(offset == m->numberOfArguments) { |
bbf43447 |
char **ptr;
|
c6259ac5 |
m->numberOfArguments++; |
bbf43447 |
ptr = (char **)cli_realloc(m->mimeArguments, m->numberOfArguments * sizeof(char *));
if(ptr == NULL) {
m->numberOfArguments--;
return;
}
m->mimeArguments = ptr; |
c6259ac5 |
} |
b151ef55 |
|
802c37fc |
arg = m->mimeArguments[offset] = rfc2231(arg); |
0bf1353d |
/*
* This is terribly broken from an RFC point of view but is useful
* for catching viruses which have a filename but no type of
* mime. By pretending defaulting to an application rather than
* to nomime we can ensure they're saved and scanned
*/
if((strncasecmp(arg, "filename=", 9) == 0) || (strncasecmp(arg, "name=", 5) == 0))
if(messageGetMimeType(m) == NOMIME) {
cli_dbgmsg("Force mime encoding to application\n");
messageSetMimeType(m, "application");
} |
b151ef55 |
}
/*
* Add in all the arguments.
* Cope with:
* name="foo bar.doc"
* charset=foo name=bar
*/
void
messageAddArguments(message *m, const char *s)
{
const char *string = s;
cli_dbgmsg("Add arguments '%s'\n", string);
assert(string != NULL);
while(*string) {
const char *key, *cptr;
char *data, *field;
if(isspace(*string) || (*string == ';')) {
string++;
continue;
}
key = string; |
b4cb4486 |
|
b151ef55 |
data = strchr(string, '=');
/* |
da850706 |
* Some spam breaks RFC2045 by using ':' instead of '=' |
b151ef55 |
* e.g.:
* Content-Type: text/html; charset:ISO-8859-1
* should be:
* Content-type: text/html; charset=ISO-8859-1
*
* We give up with lines that are completely broken because
* we don't have ESP and don't know what was meant to be there.
* It's unlikely to really be a problem.
*/
if(data == NULL)
data = strchr(string, ':');
if(data == NULL) {
/*
* Completely broken, give up
*/ |
b4cb4486 |
cli_dbgmsg("Can't parse header \"%s\"\n", s); |
b151ef55 |
return;
}
|
53ee0b60 |
string = &data[1]; |
b151ef55 |
|
752c34b9 |
/*
* Handle white space to the right of the equals sign |
da850706 |
* This breaks RFC2045 which has: |
b4cb4486 |
* parameter := attribute "=" value
* attribute := token ; case-insensitive
* token := 1*<any (ASCII) CHAR except SPACE, CTLs,
* or tspecials>
* But too many MUAs ignore this |
752c34b9 |
*/
while(isspace(*string) && (*string != '\0'))
string++;
|
b151ef55 |
cptr = string++;
|
1ecd46be |
if(strlen(key) == 0)
continue;
|
b151ef55 |
if(*cptr == '"') {
char *ptr;
/*
* The field is in quotes, so look for the
* closing quotes
*/ |
4db74788 |
key = cli_strdup(key); |
4d9c0ca8 |
if(key == NULL)
return;
|
b151ef55 |
ptr = strchr(key, '=');
if(ptr == NULL)
ptr = strchr(key, ':');
*ptr = '\0';
|
53ee0b60 |
string = strchr(++cptr, '"'); |
b4cb4486 |
|
1ecd46be |
if(string == NULL) {
cli_dbgmsg("Unbalanced quote character in \"%s\"\n", s);
string = "";
} else
string++; |
b151ef55 |
|
b4cb4486 |
if(!usefulArg(key)) {
free((char *)key);
continue;
}
|
4db74788 |
data = cli_strdup(cptr); |
b151ef55 |
|
bbf43447 |
ptr = (data) ? strchr(data, '"') : NULL; |
b151ef55 |
if(ptr == NULL) {
/*
* Weird e-mail header such as:
* Content-Type: application/octet-stream; name="
* "
* Content-Transfer-Encoding: base64
* Content-Disposition: attachment; filename="
* "
*
* TODO: the file should still be saved and
* virus checked
*/ |
a363da65 |
cli_dbgmsg("Can't parse header \"%s\" - if you believe this file contains a virus, submit it to www.clamav.net\n", s); |
bbf43447 |
if(data)
free(data); |
b151ef55 |
free((char *)key);
return;
}
*ptr = '\0';
|
d32343c3 |
field = cli_realloc((char *)key, strlen(key) + strlen(data) + 2);
if(field) {
strcat(field, "=");
strcat(field, data);
} else
free((char *)key);
free(data); |
b151ef55 |
} else {
size_t len; |
bf8ea488 |
if(*cptr == '\0') { |
06bce849 |
cli_dbgmsg("Ignoring empty field in \"%s\"\n", s); |
bf8ea488 |
return;
}
|
b151ef55 |
/*
* The field is not in quotes, so look for the closing
* white space
*/
while((*string != '\0') && !isspace(*string))
string++;
len = (size_t)string - (size_t)key + 1;
field = cli_malloc(len);
|
bbf43447 |
if(field) {
memcpy(field, key, len - 1);
field[len - 1] = '\0';
}
}
if(field) {
messageAddArgument(m, field);
free(field); |
b151ef55 |
}
}
}
static const char *
messageGetArgument(const message *m, int arg)
{
assert(m != NULL);
assert(arg >= 0); |
c6259ac5 |
assert(arg < m->numberOfArguments); |
b151ef55 |
|
b329234a |
return (m->mimeArguments[arg]) ? m->mimeArguments[arg] : ""; |
b151ef55 |
}
/*
* Find a MIME variable from the header and return a COPY to the value of that
* variable. The caller must free the copy
*/
const char *
messageFindArgument(const message *m, const char *variable)
{
int i; |
dad64ecb |
size_t len; |
b151ef55 |
assert(m != NULL);
assert(variable != NULL);
|
dad64ecb |
len = strlen(variable);
|
c6259ac5 |
for(i = 0; i < m->numberOfArguments; i++) { |
b151ef55 |
const char *ptr;
ptr = messageGetArgument(m, i);
if((ptr == NULL) || (*ptr == '\0')) |
dad64ecb |
continue; |
b151ef55 |
#ifdef CL_DEBUG
cli_dbgmsg("messageFindArgument: compare %d bytes of %s with %s\n",
len, variable, ptr);
#endif
if(strncasecmp(ptr, variable, len) == 0) {
ptr = &ptr[len];
while(isspace(*ptr))
ptr++; |
752c34b9 |
if(*ptr != '=') { |
133dcdcd |
cli_warnmsg("messageFindArgument: no '=' sign found in MIME header '%s' (%s)\n", variable, messageGetArgument(m, i)); |
752c34b9 |
return NULL;
} |
b151ef55 |
if((*++ptr == '"') && (strchr(&ptr[1], '"') != NULL)) { |
bf8ea488 |
/* Remove any quote characters */ |
4db74788 |
char *ret = cli_strdup(++ptr); |
bf8ea488 |
char *p;
|
bbf43447 |
if(ret == NULL)
return NULL;
|
bf8ea488 |
/*
* Thomas Lamy <Thomas.Lamy@in-online.net>:
* fix un-quoting of boundary strings from
* header, occurs if boundary was given as
* 'boundary="_Test_";'
*
* At least two quotes in string, assume
* quoted argument
* end string at next quote
*/ |
53ee0b60 |
if((p = strchr(ret, '"')) != NULL) {
ret[strlen(ret) - 1] = '\0'; |
bf8ea488 |
*p = '\0'; |
53ee0b60 |
} |
bbf43447 |
return ret; |
b151ef55 |
} |
4db74788 |
return cli_strdup(ptr); |
b151ef55 |
}
} |
bbf43447 |
return NULL; |
b151ef55 |
}
void
messageSetEncoding(message *m, const char *enctype)
{
const struct encoding_map *e; |
4db74788 |
int i; |
0e3b08fc |
char *type; |
4db74788 |
|
b151ef55 |
assert(m != NULL);
assert(enctype != NULL);
|
0e3b08fc |
/*m->encodingType = EEXTENSION;*/ |
b151ef55 |
|
06bce849 |
while(isblank(*enctype)) |
4674dc9a |
enctype++;
|
a4c3d0a3 |
cli_dbgmsg("messageSetEncoding: '%s'\n", enctype);
|
9fc8173e |
if(strcasecmp(enctype, "8 bit") == 0) {
cli_dbgmsg("Broken content-transfer-encoding: '8 bit' changed to '8bit'\n"); |
bb2432d7 |
enctype = "8bit"; |
9fc8173e |
} |
bb2432d7 |
/* |
0e3b08fc |
* Iterate through
* Content-Transfer-Encoding: base64 binary
* cli_strtok's fieldno counts from 0
*/
i = 0;
while((type = cli_strtok(enctype, i++, " \t")) != NULL) { |
b329234a |
int highestSimil = 0;
const char *closest = NULL;
|
53ee0b60 |
for(e = encoding_map; e->string; e++) {
int sim;
const char lowertype = tolower(type[0]); |
23e1c37c |
|
53ee0b60 |
if((lowertype != tolower(e->string[0])) && (lowertype != 'x'))
/*
* simil is expensive, I'm yet to encounter only
* one example of a missent encoding when the
* first character was wrong, so lets assume no
* match to save the call.
*
* That example was quoted-printable sent as
* X-quoted-printable.
*/
continue;
|
2add0ed7 |
if(strcmp(e->string, "uuencode") == 0)
/*
* No need to test here - fast track visa will have
* handled uuencoded files
*/
continue;
|
53ee0b60 |
sim = simil(type, e->string);
if(sim == 100) { |
0e3b08fc |
int j;
encoding_type *et;
|
aedb0336 |
for(j = 0; j < m->numberOfEncTypes; j++) |
53ee0b60 |
if(m->encodingTypes[j] == e->type) |
0e3b08fc |
break; |
53ee0b60 |
if(j < m->numberOfEncTypes) {
cli_dbgmsg("Ignoring duplicate encoding mechanism '%s'\n",
type);
break;
} |
b329234a |
|
0e3b08fc |
et = (encoding_type *)cli_realloc(m->encodingTypes, (m->numberOfEncTypes + 1) * sizeof(encoding_type)); |
b329234a |
if(et == NULL)
break; |
0e3b08fc |
m->encodingTypes = et;
m->encodingTypes[m->numberOfEncTypes++] = e->type;
cli_dbgmsg("Encoding type %d is \"%s\"\n", m->numberOfEncTypes, type);
break; |
53ee0b60 |
} else if(sim > highestSimil) {
closest = e->string;
highestSimil = sim; |
0e3b08fc |
} |
53ee0b60 |
} |
0e3b08fc |
if(e->string == NULL) {
/* |
cf25aed7 |
* The stated encoding type is illegal, so we
* use a best guess of what it should be.
* |
b329234a |
* 50% is arbitary. For example 7bi will match as
* 66% certain to be 7bit |
0e3b08fc |
*/ |
1f8eb426 |
if(highestSimil >= 50) { |
90639c82 |
cli_dbgmsg("Unknown encoding type \"%s\" - guessing as %s (%u%% certainty)\n", |
b329234a |
type, closest, highestSimil);
messageSetEncoding(m, closest);
} else { |
a363da65 |
cli_dbgmsg("Unknown encoding type \"%s\" - if you believe this file contains a virus, submit it to www.clamav.net\n", type); |
b329234a |
/*
* Err on the side of safety, enable all
* decoding modules
*/
messageSetEncoding(m, "base64");
messageSetEncoding(m, "quoted-printable");
} |
b151ef55 |
}
|
0e3b08fc |
free(type);
} |
b151ef55 |
}
encoding_type
messageGetEncoding(const message *m)
{
assert(m != NULL); |
0e3b08fc |
if(m->numberOfEncTypes == 0)
return NOENCODING;
return m->encodingTypes[0]; |
b151ef55 |
}
|
de617e3e |
int
messageAddLine(message *m, line_t *line)
{
assert(m != NULL);
if(m->body_first == NULL)
m->body_last = m->body_first = (text *)cli_malloc(sizeof(text));
else {
m->body_last->t_next = (text *)cli_malloc(sizeof(text));
m->body_last = m->body_last->t_next;
}
if(m->body_last == NULL)
return -1;
m->body_last->t_next = NULL;
if(line && lineGetData(line)) {
m->body_last->t_line = lineLink(line);
messageIsEncoding(m);
} else
m->body_last->t_line = NULL;
return 1;
}
|
b151ef55 |
/* |
edb35c0a |
* Add the given line to the end of the given message |
ffd59a3e |
* If needed a copy of the given line is taken which the caller must free |
edb35c0a |
* Line must not be terminated by a \n |
b151ef55 |
*/ |
4c927f11 |
int |
321d5c00 |
messageAddStr(message *m, const char *data) |
b151ef55 |
{ |
c1e96196 |
line_t *repeat = NULL;
|
b151ef55 |
assert(m != NULL);
|
c1e96196 |
if(data) { |
321d5c00 |
if(*data == '\0')
data = NULL;
else {
/*
* If it's only white space, just store one space to
* save memory. You must store something since it may
* be a header line
*/
int iswhite = 1;
const char *p; |
c1e96196 |
|
321d5c00 |
for(p = data; *p; p++) |
5e28cd2b |
if(((*p) & 0x80) || !isspace(*p)) { |
321d5c00 |
iswhite = 0;
break;
}
if(iswhite) {
/*cli_dbgmsg("messageAddStr: empty line: '%s'\n", data);*/
data = " "; |
a78256af |
} |
c1e96196 |
}
}
|
b151ef55 |
if(m->body_first == NULL)
m->body_last = m->body_first = (text *)cli_malloc(sizeof(text));
else { |
e24738dc |
assert(m->body_last != NULL); |
af66c329 |
if((data == NULL) && (m->body_last->t_line == NULL)) |
24c897dc |
/*
* Although this would save time and RAM, some
* phish signatures have been built which need the
* blank lines
*/
if(messageGetMimeType(m) != TEXT)
/* don't save two blank lines in sucession */
return 1; |
af66c329 |
|
b151ef55 |
m->body_last->t_next = (text *)cli_malloc(sizeof(text)); |
e24738dc |
if(m->body_last->t_next == NULL) {
messageDedup(m);
m->body_last->t_next = (text *)cli_malloc(sizeof(text));
if(m->body_last->t_next == NULL) {
cli_errmsg("messageAddStr: out of memory\n");
return -1;
}
}
|
c1e96196 |
if(data && m->body_last->t_line && (strcmp(data, lineGetData(m->body_last->t_line)) == 0))
repeat = m->body_last->t_line; |
b151ef55 |
m->body_last = m->body_last->t_next;
}
|
e24738dc |
if(m->body_last == NULL) {
cli_errmsg("messageAddStr: out of memory\n"); |
4c927f11 |
return -1; |
e24738dc |
} |
f5a4d7e8 |
|
b151ef55 |
m->body_last->t_next = NULL;
|
de617e3e |
if(data && *data) { |
c1e96196 |
if(repeat)
m->body_last->t_line = lineLink(repeat); |
399e1865 |
else { |
e24738dc |
m->body_last->t_line = lineCreate(data);
if(m->body_last->t_line == NULL) { |
399e1865 |
messageDedup(m);
m->body_last->t_line = lineCreate(data); |
de617e3e |
|
399e1865 |
if(m->body_last->t_line == NULL) {
cli_errmsg("messageAddStr: out of memory\n");
return -1;
}
}
/* cli_chomp(m->body_last->t_text); */ |
5ae253d2 |
messageIsEncoding(m); |
399e1865 |
} |
98685ac1 |
} else |
de617e3e |
m->body_last->t_line = NULL; |
98685ac1 |
|
4c927f11 |
return 1; |
b151ef55 |
}
|
ffd59a3e |
/* |
edb35c0a |
* Add the given line to the start of the given message
* A copy of the given line is taken which the caller must free
* Line must not be terminated by a \n
*/
int |
de617e3e |
messageAddStrAtTop(message *m, const char *data) |
edb35c0a |
{
text *oldfirst;
assert(m != NULL);
if(m->body_first == NULL) |
de617e3e |
return messageAddLine(m, lineCreate(data)); |
bbf43447 |
|
edb35c0a |
oldfirst = m->body_first;
m->body_first = (text *)cli_malloc(sizeof(text));
if(m->body_first == NULL) {
m->body_first = oldfirst;
return -1;
}
m->body_first->t_next = oldfirst; |
de617e3e |
m->body_first->t_line = lineCreate((data) ? data : ""); |
edb35c0a |
|
de617e3e |
if(m->body_first->t_line == NULL) {
cli_errmsg("messageAddStrAtTop: out of memory\n"); |
edb35c0a |
return -1;
}
return 1;
}
/* |
de617e3e |
* See if the last line marks the start of a non MIME inclusion that
* will need to be scanned
*/
static void
messageIsEncoding(message *m)
{
static const char encoding[] = "Content-Transfer-Encoding";
static const char binhex[] = "(This file must be converted with BinHex 4.0)";
const char *line = lineGetData(m->body_last->t_line);
|
55274fda |
/* not enough matches to warrant this test */
/*if(lineGetRefCount(m->body_last->t_line) > 1) {
return;
}*/
|
de617e3e |
if((m->encoding == NULL) &&
(strncasecmp(line, encoding, sizeof(encoding) - 1) == 0) &&
(strstr(line, "7bit") == NULL))
m->encoding = m->body_last; |
cf25aed7 |
else if((m->bounce == NULL) && |
ce0883f6 |
(strncasecmp(line, "Received: ", 10) == 0) && |
a4f8f199 |
(cli_filetype((const unsigned char *)line, strlen(line)) == CL_TYPE_MAIL)) |
de617e3e |
m->bounce = m->body_last; |
2add0ed7 |
/* Not needed with fast track visa technology */
/*else if((m->uuencode == NULL) && isuuencodebegin(line))
m->uuencode = m->body_last;*/ |
5ae253d2 |
else if((m->binhex == NULL) && |
1a220adb |
strstr(line, "BinHex") && |
4bdd7a93 |
(simil(line, binhex) > 90)) |
1a220adb |
/*
* Look for close matches for BinHex, but
* simil() is expensive so only do it if it's
* likely to be found
*/ |
5ae253d2 |
m->binhex = m->body_last; |
00f95393 |
else if((m->yenc == NULL) && (strncmp(line, "=ybegin line=", 13) == 0))
m->yenc = m->body_last; |
de617e3e |
}
/* |
ffd59a3e |
* Returns a pointer to the body of the message. Note that it does NOT return
* a copy of the data
*/ |
985cc85e |
text *
messageGetBody(message *m) |
b151ef55 |
{
assert(m != NULL); |
ffd59a3e |
return m->body_first; |
b151ef55 |
}
/*
* Clean up the message by removing trailing spaces and blank lines
*/
void
messageClean(message *m)
{
text *newEnd = textClean(m->body_first);
if(newEnd)
m->body_last = newEnd;
}
/* |
e6b25cd3 |
* Export a message using the given export routines |
0d252351 |
*
* TODO: It really should export into an array, one
* for each encoding algorithm. However, what it does is it returns the
* last item that was exported. That's sufficient for now. |
b151ef55 |
*/ |
a446de17 |
static void * |
985cc85e |
messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy)(void *), void (*setFilename)(void *, const char *, const char *), void (*addData)(void *, const unsigned char *, size_t), void *(*exportText)(text *, void *, int), void(*setCTX)(void *, cli_ctx *), int destroy_text) |
b151ef55 |
{ |
e6b25cd3 |
void *ret; |
985cc85e |
text *t_line; |
dd8a7e90 |
char *filename; |
0e3b08fc |
int i; |
b151ef55 |
assert(m != NULL);
|
0e3b08fc |
if(messageGetBody(m) == NULL)
return NULL;
|
e6b25cd3 |
ret = (*create)(); |
b151ef55 |
|
e6b25cd3 |
if(ret == NULL) |
02c9dc2a |
return NULL; |
b151ef55 |
|
802c37fc |
cli_dbgmsg("messageExport: numberOfEncTypes == %d\n", m->numberOfEncTypes);
|
0e3b08fc |
if((t_line = binhexBegin(m)) != NULL) { |
a42dba7d |
unsigned char byte; |
40d54f7f |
size_t newlen = 0L, len, dataforklen, resourceforklen, l; |
a4c3d0a3 |
unsigned char *data; |
bbf43447 |
char *ptr; |
bb5d6279 |
int bytenumber; |
285a69b4 |
blob *tmp; |
bc75e1d1 |
/*
* Table look up by Thomas Lamy <Thomas.Lamy@in-online.net>
* HQX conversion table - illegal chars are 0xff
*/
const unsigned char hqxtbl[] = {
/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
/* 00-0f */ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
/* 10-1f */ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
/* 20-2f */ 0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0xff,0xff,
/* 30-3f */ 0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0xff,0x14,0x15,0xff,0xff,0xff,0xff,0xff,0xff,
/* 40-4f */ 0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0xff,
/* 50-5f */ 0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0xff,0x2c,0x2d,0x2e,0x2f,0xff,0xff,0xff,0xff,
/* 60-6f */ 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0xff,0x37,0x38,0x39,0x3a,0x3b,0x3c,0xff,0xff,
/* 70-7f */ 0x3d,0x3e,0x3f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
}; |
a42dba7d |
|
9b2be218 |
cli_dbgmsg("messageExport: decode binhex\n"); |
a42dba7d |
/*
* Decode BinHex4. First create a temporary blob which contains
* the encoded message. Then decode that blob to the target |
0bf1353d |
* blob, free the temporary blob and return the target one |
bc75e1d1 |
* |
fdb974a5 |
* FIXME: EICAR isn't detected: should create 3 files in fork
* format: .info, .data and .rsrc. This is needed for
* position dependant detection such as EICAR
* |
bc75e1d1 |
* See RFC1741 |
a42dba7d |
*/ |
a4c3d0a3 |
while(((t_line = t_line->t_next) != NULL) &&
(t_line->t_line == NULL))
; |
a42dba7d |
|
985cc85e |
tmp = textToBlob(t_line, NULL,
((m->numberOfEncTypes == 1) && (m->encodingTypes[0] == BINHEX)) ? destroy_text : 0);
|
a4c3d0a3 |
if(tmp == NULL) { |
985cc85e |
/*
* FIXME: We've probably run out of memory during the
* text to blob.
* TODO: if m->numberOfEncTypes == 1 we could delete
* the text object as we decode it
*/
cli_warnmsg("Couldn't start binhex parser\n"); |
e6b25cd3 |
(*destroy)(ret); |
bc75e1d1 |
return NULL;
} |
bb5d6279 |
|
a4c3d0a3 |
data = blobGetData(tmp); |
a42dba7d |
|
a4c3d0a3 |
if(data == NULL) {
cli_warnmsg("Couldn't locate the binhex message that was claimed to be there\n"); |
bbf43447 |
blobDestroy(tmp); |
e6b25cd3 |
(*destroy)(ret); |
bbf43447 |
return NULL;
} |
a4c3d0a3 |
len = blobGetDataSize(tmp); |
a42dba7d |
|
a4c3d0a3 |
if(data[0] == ':') {
unsigned char *uptr;
/* 7 bit (ala RFC1741) */ |
a42dba7d |
|
a4c3d0a3 |
/*
* FIXME: this is dirty code, modification of the
* contents of a member of the blob object should be
* done through blob.c
*
* Convert 7 bit data into 8 bit
*/
cli_dbgmsg("decode HQX7 message (%lu bytes)\n", len); |
bb5d6279 |
|
a4c3d0a3 |
uptr = cli_malloc(len);
if(uptr == NULL) {
blobDestroy(tmp);
(*destroy)(ret);
return NULL; |
bc75e1d1 |
} |
a4c3d0a3 |
memcpy(uptr, data, len);
bytenumber = 0; |
83ec020f |
|
bc75e1d1 |
/* |
a4c3d0a3 |
* uptr now contains the encoded (7bit) data - len bytes long
* data will contain the unencoded (8bit) data |
bc75e1d1 |
*/ |
a4c3d0a3 |
for(l = 1; l < len; l++) {
unsigned char c = uptr[l];
if(c == ':') |
bc75e1d1 |
break; |
a4c3d0a3 |
if((c == '\n') || (c == '\r'))
continue;
if((c < 0x20) || (c > 0x7f) || (hqxtbl[c] == 0xff)) {
cli_warnmsg("Invalid HQX7 character '%c' (0x%02x)\n", c, c); |
bc75e1d1 |
break; |
a4c3d0a3 |
}
c = hqxtbl[c];
assert(c <= 63);
/*
* These masks probably aren't needed, but
* they're here to verify the code is correct
*/
switch(bytenumber) {
case 0:
data[newlen] = (c << 2) & 0xFC;
bytenumber = 1;
break;
case 1:
data[newlen++] |= (c >> 4) & 0x3;
data[newlen] = (c << 4) & 0xF0;
bytenumber = 2;
break;
case 2:
data[newlen++] |= (c >> 2) & 0xF;
data[newlen] = (c << 6) & 0xC0;
bytenumber = 3;
break;
case 3:
data[newlen++] |= c & 0x3F;
bytenumber = 0;
break;
} |
bc75e1d1 |
} |
bb5d6279 |
|
a4c3d0a3 |
cli_dbgmsg("decoded HQX7 message (now %lu bytes)\n", newlen); |
bc75e1d1 |
|
a4c3d0a3 |
/*
* Throw away the old encoded (7bit) data
* data now points to the encoded (8bit) data - newlen bytes
*
* The data array may contain repetitive characters
*/
free(uptr);
} else { |
a363da65 |
cli_warnmsg("HQX8 messages not yet supported - if you believe this file contains a virus, submit it to www.clamav.net\n"); |
a4c3d0a3 |
newlen = len;
} |
bc75e1d1 |
/*
* Uncompress repetitive characters
*/
if(memchr(data, 0x90, newlen)) {
blob *u = blobCreate(); /* uncompressed data */
|
285a69b4 |
if(u == NULL) { |
e6b25cd3 |
(*destroy)(ret); |
285a69b4 |
blobDestroy(tmp);
return NULL;
} |
bc75e1d1 |
/*
* Includes compression
*/
for(l = 0L; l < newlen; l++) {
unsigned char c = data[l]; |
a42dba7d |
/* |
bc75e1d1 |
* TODO: handle the case where the first byte
* is 0x90 |
a42dba7d |
*/ |
bc75e1d1 |
blobAddData(u, &c, 1);
if((l < (newlen - 1L)) && (data[l + 1] == 0x90)) {
int count;
l += 2;
count = data[l];
if(count == 0) {
c = 0x90;
blobAddData(u, &c, 1); |
ffd59a3e |
} else { |
0d252351 |
#ifdef CL_DEBUG
cli_dbgmsg("uncompress HQX7 at 0x%06x: %d repetitive bytes\n", l, count);
#endif |
ffd59a3e |
blobGrow(u, count); |
bc75e1d1 |
while(--count > 0)
blobAddData(u, &c, 1); |
ffd59a3e |
} |
a42dba7d |
}
} |
3fbd1711 |
blobDestroy(tmp); |
bc75e1d1 |
tmp = u;
data = blobGetData(tmp);
len = blobGetDataSize(tmp);
cli_dbgmsg("Uncompressed %lu bytes to %lu\n", newlen, len);
} else {
len = newlen;
cli_dbgmsg("HQX7 message (%lu bytes) is not compressed\n",
len); |
a42dba7d |
} |
dad64ecb |
if(len == 0) {
cli_warnmsg("Discarding empty binHex attachment\n"); |
e6b25cd3 |
(*destroy)(ret); |
dad64ecb |
blobDestroy(tmp);
return NULL;
} |
a42dba7d |
/* |
bc75e1d1 |
* The blob tmp now contains the uncompressed data
* of len bytes, i.e. the repetitive bytes have been removed
*/
/*
* Parse the header
* |
a42dba7d |
* TODO: set filename argument in message as well
*/
byte = data[0]; |
6afdc3ab |
if(byte >= len) { |
e6b25cd3 |
(*destroy)(ret); |
6afdc3ab |
blobDestroy(tmp);
return NULL;
} |
a42dba7d |
filename = cli_malloc(byte + 1); |
bbf43447 |
if(filename == NULL) { |
e6b25cd3 |
(*destroy)(ret); |
bbf43447 |
blobDestroy(tmp);
return NULL;
} |
bc75e1d1 |
memcpy(filename, &data[1], byte); |
a42dba7d |
filename[byte] = '\0'; |
e6b25cd3 |
(*setFilename)(ret, dir, filename); |
dad64ecb |
/*ptr = cli_malloc(strlen(filename) + 6);*/
ptr = cli_malloc(byte + 6); |
bbf43447 |
if(ptr) {
sprintf(ptr, "name=%s", filename);
messageAddArgument(m, ptr);
free(ptr);
} |
a42dba7d |
/*
* skip over length, filename, version, type, creator and flags
*/
byte = 1 + byte + 1 + 4 + 4 + 2; |
83ec020f |
/*
* Set len to be the data fork length
*/ |
64b0fff6 |
dataforklen = ((data[byte] << 24) & 0xFF000000) | |
582808c3 |
((data[byte + 1] << 16) & 0xFF0000) |
((data[byte + 2] << 8) & 0xFF00) |
(data[byte + 3] & 0xFF); |
bc75e1d1 |
|
64b0fff6 |
resourceforklen = ((data[byte + 4] << 24) & 0xFF000000) | |
582808c3 |
((data[byte + 5] << 16) & 0xFF0000) |
((data[byte + 6] << 8) & 0xFF00) |
(data[byte + 7] & 0xFF); |
64b0fff6 |
cli_dbgmsg("Filename = '%s', data fork length = %lu, resource fork length = %lu bytes\n",
filename, dataforklen, resourceforklen); |
bc75e1d1 |
free((char *)filename); |
a42dba7d |
/*
* Skip over data fork length, resource fork length and CRC
*/
byte += 10;
|
dad64ecb |
l = blobGetDataSize(tmp) - byte; |
db42f46e |
|
64b0fff6 |
if(l < dataforklen) { |
db42f46e |
cli_warnmsg("Corrupt BinHex file, claims it is %lu bytes long in a message of %lu bytes\n", |
64b0fff6 |
dataforklen, l);
dataforklen = l; |
db42f46e |
} |
55274fda |
if(setCTX && m->ctx)
(*setCTX)(ret, m->ctx);
|
64b0fff6 |
(*addData)(ret, &data[byte], dataforklen); |
a42dba7d |
blobDestroy(tmp);
|
fd969c26 |
if(destroy_text)
m->binhex = NULL; |
fef5ad63 |
|
fd969c26 |
if((m->numberOfEncTypes == 0) ||
((m->numberOfEncTypes == 1) && (m->encodingTypes[0] == BINHEX))) { |
fef5ad63 |
cli_dbgmsg("Finished exporting binhex file\n");
return ret;
} |
0e3b08fc |
}
if(m->numberOfEncTypes == 0) {
/*
* Fast copy
*/ |
fd969c26 |
cli_dbgmsg("messageExport: Entering fast copy mode\n");
|
6a91c55b |
filename = (char *)messageFindArgument(m, "filename"); |
b151ef55 |
if(filename == NULL) { |
6a91c55b |
filename = (char *)messageFindArgument(m, "name"); |
b151ef55 |
if(filename == NULL) { |
c93e52c1 |
cli_dbgmsg("Unencoded attachment sent with no filename\n"); |
3b6eace4 |
messageAddArgument(m, "name=attachment"); |
0e3b08fc |
} else |
bbf43447 |
/*
* Some virus attachments don't say how they've
* been encoded. We assume base64
*/
messageSetEncoding(m, "base64"); |
b151ef55 |
}
|
63f87938 |
(*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment"); |
b151ef55 |
|
5eeffbb9 |
if(filename)
free((char *)filename); |
b151ef55 |
|
2add0ed7 |
if(m->numberOfEncTypes == 0) |
985cc85e |
return exportText(messageGetBody(m), ret, destroy_text); |
b151ef55 |
}
|
55274fda |
if(setCTX && m->ctx)
(*setCTX)(ret, m->ctx);
|
0e3b08fc |
for(i = 0; i < m->numberOfEncTypes; i++) {
encoding_type enctype = m->encodingTypes[i]; |
d17de037 |
size_t size; |
0e3b08fc |
|
0d252351 |
if(i > 0) {
void *newret;
newret = (*create)();
if(newret == NULL) {
cli_errmsg("Not all decoding algorithms were run\n");
return ret;
}
(*destroy)(ret);
ret = newret;
} |
ad642304 |
cli_dbgmsg("messageExport: enctype %d is %d\n", i, enctype); |
b151ef55 |
/* |
0e3b08fc |
* Find the filename to decode |
b151ef55 |
*/ |
2add0ed7 |
if(((enctype == YENCODE) && yEncBegin(m)) || ((i == 0) && yEncBegin(m))) { |
00f95393 |
/*
* TODO: handle multipart yEnc encoded files
*/
t_line = yEncBegin(m); |
bb2432d7 |
filename = (char *)lineGetData(t_line->t_line); |
00f95393 |
if((filename = strstr(filename, " name=")) != NULL) { |
4db74788 |
filename = cli_strdup(&filename[6]); |
00f95393 |
if(filename) {
cli_chomp(filename);
strstrip(filename);
cli_dbgmsg("Set yEnc filename to \"%s\"\n", filename);
} |
5eeffbb9 |
} |
00f95393 |
|
63f87938 |
(*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment"); |
5eeffbb9 |
if(filename) {
free((char *)filename);
filename = NULL;
} |
00f95393 |
t_line = t_line->t_next;
enctype = YENCODE; |
74ca33e9 |
m->yenc = NULL; |
0e3b08fc |
} else { |
2add0ed7 |
if(enctype == UUENCODE) {
/*
* The body will have been stripped out by the fast track visa
* system. Treat as plain/text, which means we'll still scan
* for funnies outside of the uuencoded portion.
*/ |
56896211 |
cli_dbgmsg("messageExport: treat uuencode as text/plain\n"); |
2add0ed7 |
enctype = m->encodingTypes[i] = NOENCODING;
} |
0e3b08fc |
filename = (char *)messageFindArgument(m, "filename");
if(filename == NULL) {
filename = (char *)messageFindArgument(m, "name");
if(filename == NULL) {
cli_dbgmsg("Attachment sent with no filename\n");
messageAddArgument(m, "name=attachment");
} else if(enctype == NOENCODING)
/* |
24c897dc |
* Some virus attachments don't say how
* they've been encoded. We assume
* base64. |
2add0ed7 |
* |
24c897dc |
* FIXME: don't do this if it's a fall
* through from uuencode |
0e3b08fc |
*/
messageSetEncoding(m, "base64");
}
|
63f87938 |
(*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment"); |
0e3b08fc |
t_line = messageGetBody(m);
} |
ad642304 |
|
00f95393 |
if(filename)
free((char *)filename); |
0e3b08fc |
/* |
c93e52c1 |
* t_line should now point to the first (encoded) line of the
* message |
0e3b08fc |
*/
if(t_line == NULL) {
cli_warnmsg("Empty attachment not saved\n");
(*destroy)(ret);
return NULL;
}
if(enctype == NOENCODING) { |
ce73653f |
/* |
0e3b08fc |
* Fast copy |
ce73653f |
*/ |
985cc85e |
if(i == m->numberOfEncTypes - 1) {
/* last one */
(void)exportText(t_line, ret, destroy_text);
break;
}
(void)exportText(t_line, ret, 0); |
0e3b08fc |
continue; |
ce73653f |
} |
b151ef55 |
|
d17de037 |
size = 0; |
0e3b08fc |
do { |
0d252351 |
unsigned char smallbuf[1024];
unsigned char *uptr, *data; |
0e3b08fc |
const char *line = lineGetData(t_line->t_line); |
0d252351 |
unsigned char *bigbuf;
size_t datasize; |
b151ef55 |
|
2add0ed7 |
if(enctype == YENCODE) { |
00f95393 |
if(line == NULL)
continue; |
5eeffbb9 |
if(strncmp(line, "=yend ", 6) == 0) |
00f95393 |
break; |
0e3b08fc |
}
|
0d252351 |
/*
* Add two bytes for '\n' and '\0'
*/
datasize = (line) ? strlen(line) + 2 : 0; |
02c9dc2a |
|
23e1c37c |
if(datasize >= sizeof(smallbuf))
data = bigbuf = (unsigned char *)cli_malloc(datasize); |
0d252351 |
else {
bigbuf = NULL;
data = smallbuf;
datasize = sizeof(smallbuf);
} |
02c9dc2a |
|
23e1c37c |
uptr = decodeLine(m, enctype, line, data, datasize); |
0d252351 |
if(uptr == NULL) {
if(data == bigbuf)
free(data);
break;
} |
1e06e1ab |
|
d17de037 |
if(uptr != data) { |
af66c329 |
assert((size_t)(uptr - data) < datasize); |
0e3b08fc |
(*addData)(ret, data, (size_t)(uptr - data)); |
d17de037 |
size += (size_t)(uptr - data);
} |
02c9dc2a |
|
0d252351 |
if(data == bigbuf)
free(data);
|
0e3b08fc |
/* |
da850706 |
* According to RFC2045, '=' is used to pad out |
0e3b08fc |
* the last byte and should be used as evidence
* of the end of the data. Some mail clients
* annoyingly then put plain text after the '='
* byte and viruses exploit this bug. Sigh
*/
/*if(enctype == BASE64)
if(strchr(line, '='))
break;*/ |
0e01c158 |
if(line && destroy_text && (i == m->numberOfEncTypes - 1)) {
lineUnlink(t_line->t_line);
t_line->t_line = NULL;
} |
0e3b08fc |
} while((t_line = t_line->t_next) != NULL); |
d17de037 |
|
8f465848 |
cli_dbgmsg("Exported %u bytes using enctype %d\n", size, enctype); |
752c34b9 |
|
f98d4ab4 |
/* Verify we have nothing left to flush out */
if(m->base64chars) {
unsigned char data[4];
unsigned char *ptr; |
285a69b4 |
|
c8bc44d6 |
ptr = base64Flush(m, data); |
f98d4ab4 |
if(ptr)
(*addData)(ret, data, (size_t)(ptr - data));
} |
285a69b4 |
}
|
e6b25cd3 |
return ret;
}
|
c8bc44d6 |
unsigned char *
base64Flush(message *m, unsigned char *buf)
{
cli_dbgmsg("%u trailing bytes to export\n", m->base64chars);
if(m->base64chars) {
unsigned char *ret = decode(m, NULL, buf, base64, FALSE);
m->base64chars = 0;
return ret;
}
return NULL;
}
|
e6b25cd3 |
/*
* Decode and transfer the contents of the message into a fileblob
* The caller must free the returned fileblob
*/
fileblob * |
985cc85e |
messageToFileblob(message *m, const char *dir, int destroy) |
e6b25cd3 |
{ |
985cc85e |
fileblob *fb;
|
a446de17 |
cli_dbgmsg("messageToFileblob\n"); |
6fe0da47 |
fb = messageExport(m, dir,
(void *(*)(void))fileblobCreate,
(void(*)(void *))fileblobDestroy,
(void(*)(void *, const char *, const char *))fileblobSetFilename,
(void(*)(void *, const unsigned char *, size_t))fileblobAddData,
(void *(*)(text *, void *, int))textToFileblob,
(void(*)(void *, cli_ctx *))fileblobSetCTX,
destroy); |
985cc85e |
if(destroy && m->body_first) {
textDestroy(m->body_first);
m->body_first = m->body_last = NULL;
}
return fb; |
e6b25cd3 |
}
/* |
aea1b159 |
* Decode and transfer the contents of the message into a closed blob |
e6b25cd3 |
* The caller must free the returned blob
*/
blob * |
985cc85e |
messageToBlob(message *m, int destroy) |
e6b25cd3 |
{ |
fd969c26 |
blob *b;
cli_dbgmsg("messageToBlob\n");
b = messageExport(m, NULL, |
6fe0da47 |
(void *(*)(void))blobCreate,
(void(*)(void *))blobDestroy,
(void(*)(void *, const char *, const char *))blobSetFilename,
(void(*)(void *, const unsigned char *, size_t))blobAddData,
(void *(*)(text *, void *, int))textToBlob,
(void(*)(void *, cli_ctx *))NULL,
destroy); |
985cc85e |
if(destroy && m->body_first) {
textDestroy(m->body_first);
m->body_first = m->body_last = NULL;
}
return b; |
b151ef55 |
}
/*
* Decode and transfer the contents of the message into a text area |
ffd59a3e |
* The caller must free the returned text |
b151ef55 |
*/
text * |
285a69b4 |
messageToText(message *m) |
b151ef55 |
{ |
0e3b08fc |
int i; |
b151ef55 |
text *first = NULL, *last = NULL;
const text *t_line;
assert(m != NULL);
|
0e3b08fc |
if(m->numberOfEncTypes == 0) { |
b151ef55 |
/*
* Fast copy
*/
for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
if(first == NULL)
first = last = cli_malloc(sizeof(text));
else {
last->t_next = cli_malloc(sizeof(text));
last = last->t_next;
}
|
de617e3e |
if(last == NULL) { |
285a69b4 |
if(first)
textDestroy(first); |
cea95096 |
return NULL;
} |
0b08b624 |
if(t_line->t_line)
last->t_line = lineLink(t_line->t_line);
else
last->t_line = NULL; /* empty line */ |
b151ef55 |
} |
0e3b08fc |
if(last)
last->t_next = NULL;
return first;
}
/*
* Scan over the data a number of times once for each claimed encoding
* type
*/
for(i = 0; i < m->numberOfEncTypes; i++) {
const encoding_type enctype = m->encodingTypes[i];
cli_dbgmsg("messageToText: export transfer method %d = %d\n",
i, enctype); |
eac601be |
switch(enctype) {
case NOENCODING:
case BINARY:
case EIGHTBIT:
/*
* Fast copy
*/
for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
if(first == NULL)
first = last = cli_malloc(sizeof(text));
else {
last->t_next = cli_malloc(sizeof(text));
last = last->t_next;
}
if(last == NULL) {
if(first) {
last->t_next = NULL;
textDestroy(first);
}
return NULL;
}
if(t_line->t_line)
last->t_line = lineLink(t_line->t_line);
else
last->t_line = NULL; /* empty line */ |
0e3b08fc |
} |
eac601be |
continue;
case UUENCODE: |
b01f527d |
cli_errmsg("messageToText: Unexpected attempt to handle uuencoded file - report to http://bugs.clamav.net\n"); |
2add0ed7 |
if(first) {
last->t_next = NULL;
textDestroy(first); |
0e3b08fc |
} |
2add0ed7 |
return NULL; |
eac601be |
case YENCODE:
t_line = yEncBegin(m); |
00f95393 |
|
eac601be |
if(t_line == NULL) {
/*cli_warnmsg("YENCODED attachment is missing begin statement\n");*/
if(first) {
last->t_next = NULL;
textDestroy(first);
}
return NULL; |
15bfc2e4 |
} |
eac601be |
t_line = t_line->t_next;
default:
if((i == 0) && binhexBegin(m))
cli_warnmsg("Binhex messages not supported yet.\n");
t_line = messageGetBody(m); |
a42dba7d |
} |
b151ef55 |
|
27a375f2 |
for(; t_line; t_line = t_line->t_next) {
unsigned char data[1024];
unsigned char *uptr; |
de617e3e |
const char *line = lineGetData(t_line->t_line); |
27a375f2 |
|
2add0ed7 |
if(enctype == BASE64) |
285a69b4 |
/*
* ignore blanks - breaks RFC which is
* probably the point!
*/
if(line == NULL)
continue; |
752c34b9 |
|
0d252351 |
assert((line == NULL) || (strlen(line) <= sizeof(data)));
|
0e3b08fc |
uptr = decodeLine(m, enctype, line, data, sizeof(data)); |
b151ef55 |
|
27a375f2 |
if(uptr == NULL)
break; |
b151ef55 |
|
27a375f2 |
assert(uptr <= &data[sizeof(data)]);
if(first == NULL)
first = last = cli_malloc(sizeof(text));
else {
last->t_next = cli_malloc(sizeof(text));
last = last->t_next;
} |
b151ef55 |
|
98685ac1 |
if(last == NULL) |
bbf43447 |
break; |
752c34b9 |
|
290ba18f |
/*
* If the decoded line is the same as the encoded
* there's no need to take a copy, just link it.
* Note that the comparison is done without the
* trailing newline that the decoding routine may have
* added - that's why there's a strncmp rather than a
* strcmp - that'd be bad for MIME decoders, but is OK
* for AV software
*/ |
5eeffbb9 |
if((data[0] == '\n') || (data[0] == '\0'))
last->t_line = NULL; |
d16754aa |
else if(line && (strncmp((const char *)data, line, strlen(line)) == 0)) { |
74ca33e9 |
#ifdef CL_DEBUG |
290ba18f |
cli_dbgmsg("messageToText: decoded line is the same(%s)\n", data); |
74ca33e9 |
#endif |
290ba18f |
last->t_line = lineLink(t_line->t_line);
} else |
5eeffbb9 |
last->t_line = lineCreate((char *)data); |
98685ac1 |
|
0e3b08fc |
if(line && enctype == BASE64) |
752c34b9 |
if(strchr(line, '='))
break; |
27a375f2 |
} |
82348395 |
if(m->base64chars) {
unsigned char data[4];
|
4b0e970e |
memset(data, '\0', sizeof(data)); |
5eeffbb9 |
if(decode(m, NULL, data, base64, FALSE) && data[0]) { |
82348395 |
if(first == NULL)
first = last = cli_malloc(sizeof(text));
else {
last->t_next = cli_malloc(sizeof(text));
last = last->t_next;
}
if(last != NULL) |
5eeffbb9 |
last->t_line = lineCreate((char *)data); |
82348395 |
}
m->base64chars = 0;
} |
b151ef55 |
}
if(last)
last->t_next = NULL;
return first;
}
|
985cc85e |
text *
yEncBegin(message *m) |
00f95393 |
{
return m->yenc;
}
|
b151ef55 |
/* |
a42dba7d |
* Scan to find the BINHEX message (if any)
*/ |
f5a4d7e8 |
#if 0 |
985cc85e |
const text *
binhexBegin(message *m) |
a42dba7d |
{
const text *t_line;
for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next)
if(strcasecmp(t_line->t_text, "(This file must be converted with BinHex 4.0)") == 0)
return t_line;
return NULL;
} |
f5a4d7e8 |
#else |
985cc85e |
text *
binhexBegin(message *m) |
f5a4d7e8 |
{
return m->binhex;
}
#endif |
a42dba7d |
/* |
5a01973c |
* Scan to find a bounce message. There is no standard for these, not
* even a convention, so don't expect this to be foolproof
*/ |
f5a4d7e8 |
#if 0 |
985cc85e |
text *
bounceBegin(message *m) |
5a01973c |
{
const text *t_line; |
bb5d6279 |
|
20fa2f53 |
for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) |
06d4e856 |
if(cli_filetype(t_line->t_text, strlen(t_line->t_text)) == CL_TYPE_MAIL) |
20fa2f53 |
return t_line; |
5a01973c |
return NULL;
} |
f5a4d7e8 |
#else |
985cc85e |
text *
bounceBegin(message *m) |
f5a4d7e8 |
{
return m->bounce;
}
#endif
/*
* If a message doesn't not contain another message which could be harmful
* it is deemed to be safe.
*
* TODO: ensure nothing can get through this
*
* TODO: check to see if we need to
* find anything else, perhaps anything
* from the RFC821 table?
*/
#if 0
int
messageIsAllText(const message *m)
{
const text *t;
for(t = messageGetBody(m); t; t = t->t_next)
if(strncasecmp(t->t_text,
"Content-Transfer-Encoding",
strlen("Content-Transfer-Encoding")) == 0)
return 0;
return 1;
}
#else |
985cc85e |
text *
encodingLine(message *m) |
f5a4d7e8 |
{ |
627465e7 |
return m->encoding; |
f5a4d7e8 |
}
#endif |
5a01973c |
|
a446de17 |
void
messageClearMarkers(message *m)
{ |
2add0ed7 |
m->encoding = m->bounce = m->binhex = NULL; |
a446de17 |
}
|
5a01973c |
/* |
b151ef55 |
* Decode a line and add it to a buffer, return the end of the buffer |
27a375f2 |
* to help appending callers. There is no new line at the end of "line" |
eaacc2de |
*
* len is sizeof(ptr) |
b151ef55 |
*/ |
d1a6ea81 |
unsigned char * |
0e3b08fc |
decodeLine(message *m, encoding_type et, const char *line, unsigned char *buf, size_t buflen) |
b151ef55 |
{ |
2a3ceff4 |
size_t len, reallen; |
bf8ea488 |
bool softbreak; |
23e1c37c |
char *p2, *copy; |
53ee0b60 |
char base64buf[RFC2045LENGTH + 1]; |
b151ef55 |
|
15bfc2e4 |
/*cli_dbgmsg("decodeLine(et = %d buflen = %u)\n", (int)et, buflen);*/ |
0d252351 |
|
b151ef55 |
assert(m != NULL); |
eaacc2de |
assert(buf != NULL); |
b151ef55 |
|
0e3b08fc |
switch(et) { |
ee576466 |
case BINARY:
/*
* TODO: find out what this is, encoded as binary??
*/
/* fall through */ |
b151ef55 |
case NOENCODING:
case EIGHTBIT: |
c6259ac5 |
default: /* unknown encoding type - try our best */ |
963e073f |
if(line) /* empty line? */ |
abac42dd |
buf = (unsigned char *)cli_strrcpy((char *)buf, line); |
b151ef55 |
/* Put the new line back in */ |
abac42dd |
return (unsigned char *)cli_strrcpy((char *)buf, "\n"); |
b151ef55 |
case QUOTEDPRINTABLE: |
98685ac1 |
if(line == NULL) { /* empty line */
*buf++ = '\n';
break;
} |
285a69b4 |
|
da850706 |
softbreak = FALSE; |
23e1c37c |
while(buflen && *line) { |
da850706 |
if(*line == '=') {
unsigned char byte;
if((*++line == '\0') || (*line == '\n')) {
softbreak = TRUE;
/* soft line break */
break;
}
byte = hex(*line);
if((*++line == '\0') || (*line == '\n')) {
/*
* broken e-mail, not
* adhering to RFC2045
*/
*buf++ = byte;
break;
}
|
582808c3 |
/*
* Fix by Torok Edvin
* <edwintorok@gmail.com>
* Handle messages that use a broken
* quoted-printable encoding of
* href=\"http://, instead of =3D
*/
if(byte != '=') {
byte <<= 4;
byte += hex(*line);
} else
line -= 2;
|
da850706 |
*buf++ = byte;
} else
*buf++ = *line; |
23e1c37c |
++line;
--buflen; |
da850706 |
} |
bf8ea488 |
if(!softbreak)
/* Put the new line back in */ |
eaacc2de |
*buf++ = '\n'; |
b151ef55 |
break;
case BASE64: |
98685ac1 |
if(line == NULL)
break; |
752c34b9 |
/* |
da850706 |
* RFC2045 sets the maximum length to 76 bytes |
752c34b9 |
* but many e-mail clients ignore that
*/ |
53ee0b60 |
if(strlen(line) < sizeof(base64buf)) {
strcpy(base64buf, line);
copy = base64buf;
} else { |
4db74788 |
copy = cli_strdup(line); |
53ee0b60 |
if(copy == NULL)
break;
} |
bbf43447 |
|
752c34b9 |
p2 = strchr(copy, '='); |
b151ef55 |
if(p2)
*p2 = '\0'; |
285a69b4 |
|
d17de037 |
sanitiseBase64(copy);
|
b151ef55 |
/*
* Klez doesn't always put "=" on the last line
*/ |
285a69b4 |
buf = decode(m, copy, buf, base64, (p2 == NULL) && ((strlen(copy) & 3) == 0)); |
752c34b9 |
|
53ee0b60 |
if(copy != base64buf)
free(copy); |
b151ef55 |
break;
case UUENCODE: |
b3a5cdd8 |
assert(m->base64chars == 0);
|
98685ac1 |
if((line == NULL) || (*line == '\0')) /* empty line */ |
3c52fb18 |
break; |
b151ef55 |
if(strcasecmp(line, "end") == 0)
break; |
64ff0d49 |
if(isuuencodebegin(line))
break; |
b151ef55 |
if((line[0] & 0x3F) == ' ')
break;
|
af66c329 |
/*
* reallen contains the number of bytes that were
* encoded
*/ |
2a3ceff4 |
reallen = (size_t)uudecode(*line++); |
af66c329 |
if(reallen <= 0)
break;
if(reallen > 62) |
2a3ceff4 |
break;
len = strlen(line); |
b151ef55 |
|
af66c329 |
if((len > buflen) || (reallen > len)) |
eaacc2de |
/*
* In practice this should never occur since
* the maximum length of a uuencoded line is
* 62 characters
*/ |
8dc9ee9e |
cli_warnmsg("uudecode: buffer overflow stopped, attempting to ignore but decoding may fail\n"); |
2a3ceff4 |
else {
(void)decode(m, line, buf, uudecode, (len & 3) == 0);
buf = &buf[reallen];
} |
b3a5cdd8 |
m->base64chars = 0; /* this happens with broken uuencoded files */ |
b151ef55 |
break; |
00f95393 |
case YENCODE:
if((line == NULL) || (*line == '\0')) /* empty line */
break;
if(strncmp(line, "=yend ", 6) == 0)
break;
while(*line)
if(*line == '=') {
if(*++line == '\0')
break;
*buf++ = ((*line++ - 64) & 255);
} else
*buf++ = ((*line++ - 42) & 255);
break; |
b151ef55 |
}
|
eaacc2de |
*buf = '\0';
return buf; |
b151ef55 |
}
|
6ba88eb8 |
/* |
fb405afc |
* Remove the non base64 characters such as spaces from a string. Spaces
* shouldn't appear mid string in base64 files, but some broken mail clients
* ignore such errors rather than discarding the mail, and virus writers
* exploit this bug |
285a69b4 |
*/
static void |
fb405afc |
sanitiseBase64(char *s) |
285a69b4 |
{ |
15bfc2e4 |
/*cli_dbgmsg("sanitiseBase64 '%s'\n", s);*/ |
fb405afc |
for(; *s; s++) |
15bfc2e4 |
if(base64Table[(unsigned int)(*s & 0xFF)] == 255) { |
fb405afc |
char *p1; |
e982ca83 |
|
fb405afc |
for(p1 = s; p1[0] != '\0'; p1++)
p1[0] = p1[1]; |
811e3356 |
--s; |
fb405afc |
} |
285a69b4 |
}
/* |
6ba88eb8 |
* Returns one byte after the end of the decoded data in "out" |
285a69b4 |
*
* Update m->base64chars with the last few bytes of data that we haven't
* decoded. After the last line is found, decode will be called with in = NULL
* to flush these out |
6ba88eb8 |
*/ |
b151ef55 |
static unsigned char * |
285a69b4 |
decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast)
{
unsigned char b1, b2, b3, b4;
unsigned char cb1, cb2, cb3; /* carried over from last line */
|
15bfc2e4 |
/*cli_dbgmsg("decode %s (len %d isFast %d base64chars %d)\n", in, |
285a69b4 |
in ? strlen(in) : 0, |
f0146bc6 |
isFast, m->base64chars);*/ |
285a69b4 |
cb1 = cb2 = cb3 = '\0';
switch(m->base64chars) {
case 3:
cb3 = m->base64_3;
/* FALLTHROUGH */
case 2:
cb2 = m->base64_2;
/* FALLTHROUGH */
case 1:
cb1 = m->base64_1;
isFast = FALSE;
break;
default:
assert(m->base64chars <= 3);
}
if(isFast)
/* Fast decoding if not last line */
while(*in) {
b1 = (*decoder)(*in++);
b2 = (*decoder)(*in++);
b3 = (*decoder)(*in++);
/*
* Put this line here to help on some compilers which
* can make use of some architecure's ability to
* multiprocess when different variables can be
* updated at the same time - here b3 is used in
* one line, b1/b2 in the next and b4 in the next after
* that, b3 and b4 rely on in but b1/b2 don't
*/
*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
b4 = (*decoder)(*in++);
*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
*out++ = (b3 << 6) | (b4 & 0x3F);
} |
0d252351 |
else if(in == NULL) { /* flush */
int nbytes;
if(m->base64chars == 0)
return out; |
285a69b4 |
|
0d252351 |
cli_dbgmsg("base64chars = %d (%c %c %c)\n", m->base64chars, |
87901cab |
isalnum(cb1) ? cb1 : '@',
isalnum(cb2) ? cb2 : '@',
isalnum(cb3) ? cb3 : '@'); |
285a69b4 |
|
0d252351 |
m->base64chars--;
b1 = cb1;
nbytes = 1; |
d17de037 |
|
0d252351 |
if(m->base64chars) { |
285a69b4 |
m->base64chars--; |
0d252351 |
b2 = cb2; |
285a69b4 |
if(m->base64chars) { |
c8e1ad63 |
nbytes = 2; |
285a69b4 |
m->base64chars--; |
0d252351 |
b3 = cb3; |
362fe28f |
nbytes = 3; |
0d252351 |
} else if(b2) |
c8e1ad63 |
nbytes = 2; |
0d252351 |
} |
285a69b4 |
|
0d252351 |
switch(nbytes) {
case 3:
b4 = '\0';
/* fall through */
case 4:
*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
*out++ = (b2 << 4) | ((b3 >> 2) & 0xF); |
70b54406 |
if((nbytes == 4) || b3)
*out++ = (b3 << 6) | (b4 & 0x3F); |
0d252351 |
break;
case 2:
*out++ = (b1 << 2) | ((b2 >> 4) & 0x3); |
c8e1ad63 |
if((b2 << 4) & 0xFF)
*out++ = b2 << 4; |
0d252351 |
break;
case 1:
*out++ = b1 << 2;
break;
default:
assert(0);
}
} else while(*in) {
int nbytes; |
285a69b4 |
|
0d252351 |
if(m->base64chars) {
m->base64chars--;
b1 = cb1;
} else
b1 = (*decoder)(*in++); |
285a69b4 |
|
0d252351 |
if(*in == '\0') {
b2 = '\0';
nbytes = 1;
} else { |
285a69b4 |
if(m->base64chars) {
m->base64chars--; |
0d252351 |
b2 = cb2; |
285a69b4 |
} else |
0d252351 |
b2 = (*decoder)(*in++); |
285a69b4 |
if(*in == '\0') { |
0d252351 |
b3 = '\0';
nbytes = 2; |
285a69b4 |
} else {
if(m->base64chars) {
m->base64chars--; |
0d252351 |
b3 = cb3; |
285a69b4 |
} else |
0d252351 |
b3 = (*decoder)(*in++); |
285a69b4 |
if(*in == '\0') { |
0d252351 |
b4 = '\0';
nbytes = 3; |
285a69b4 |
} else { |
0d252351 |
b4 = (*decoder)(*in++);
nbytes = 4; |
285a69b4 |
}
} |
0d252351 |
} |
285a69b4 |
|
0d252351 |
switch(nbytes) {
case 3:
m->base64_3 = b3;
case 2:
m->base64_2 = b2;
case 1:
m->base64_1 = b1; |
285a69b4 |
break; |
0d252351 |
case 4:
*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
*out++ = (b3 << 6) | (b4 & 0x3F);
break;
default:
assert(0);
}
if(nbytes != 4) {
m->base64chars = nbytes;
break; |
285a69b4 |
}
}
return out;
} |
b151ef55 |
static unsigned char
hex(char c)
{
if(isdigit(c))
return c - '0';
if((c >= 'A') && (c <= 'F'))
return c - 'A' + 10; |
e66e8982 |
if((c >= 'a') && (c <= 'f'))
return c - 'a' + 10;
cli_dbgmsg("Illegal hex character '%c'\n", c); |
b151ef55 |
/* |
da850706 |
* Some mails (notably some spam) break RFC2045 by failing to encode |
b151ef55 |
* the '=' character
*/
return '=';
}
|
5ae253d2 |
static unsigned char
base64(char c)
{ |
15bfc2e4 |
const unsigned char ret = base64Table[(unsigned int)(c & 0xFF)]; |
5ae253d2 |
if(ret == 255) { |
0d252351 |
/*cli_dbgmsg("Illegal character <%c> in base64 encoding\n", c);*/ |
5ae253d2 |
return 63;
}
return ret;
} |
b151ef55 |
static unsigned char
uudecode(char c)
{ |
b329234a |
return c - ' '; |
b151ef55 |
} |
b4cb4486 |
/*
* These are the only arguments we're interested in.
* Do 'fgrep messageFindArgument *.c' if you don't believe me!
* It's probably not good doing this since each time a new
* messageFindArgument is added I need to remember to look here,
* but it can save a lot of memory...
*/
static int
usefulArg(const char *arg)
{
if((strncasecmp(arg, "name", 4) != 0) &&
(strncasecmp(arg, "filename", 8) != 0) &&
(strncasecmp(arg, "boundary", 8) != 0) && |
b62a19da |
(strncasecmp(arg, "protocol", 8) != 0) && |
9a7398ee |
(strncasecmp(arg, "id", 2) != 0) &&
(strncasecmp(arg, "number", 6) != 0) &&
(strncasecmp(arg, "total", 5) != 0) && |
b4cb4486 |
(strncasecmp(arg, "type", 4) != 0)) {
cli_dbgmsg("Discarding unwanted argument '%s'\n", arg);
return 0;
}
return 1;
} |
e24738dc |
|
b65d2aad |
void
messageSetCTX(message *m, cli_ctx *ctx)
{
m->ctx = ctx;
}
int
messageContainsVirus(const message *m)
{
return m->isInfected ? TRUE : FALSE;
}
|
e24738dc |
/*
* We've run out of memory. Try to recover some by
* deduping the message |
9a69a785 |
*
* FIXME: this can take a long time. The real solution is for system admins
* to refrain from setting ulimits too low, then this routine won't be
* called |
e24738dc |
*/
static void
messageDedup(message *m)
{
const text *t1;
size_t saved = 0;
|
d16754aa |
cli_dbgmsg("messageDedup\n");
|
e24738dc |
t1 = m->dedupedThisFar ? m->dedupedThisFar : m->body_first;
for(t1 = m->body_first; t1; t1 = t1->t_next) {
const char *d1;
text *t2;
line_t *l1;
unsigned int r1;
if(saved >= 100*1000)
break; /* that's enough */
l1 = t1->t_line;
if(l1 == NULL)
continue;
d1 = lineGetData(l1);
if(strlen(d1) < 8)
continue; /* wouldn't recover many bytes */ |
d16754aa |
|
e24738dc |
r1 = (unsigned int)lineGetRefCount(l1);
if(r1 == 255)
continue;
/*
* We don't want to foul up any pointers
*/
if(t1 == m->encoding)
continue;
if(t1 == m->bounce)
continue;
if(t1 == m->binhex)
continue;
if(t1 == m->yenc)
continue;
for(t2 = t1->t_next; t2; t2 = t2->t_next) {
const char *d2;
line_t *l2 = t2->t_line;
if(l2 == NULL)
continue;
d2 = lineGetData(l2);
if(d1 == d2)
/* already linked */
continue;
if(strcmp(d1, d2) == 0) {
if(lineUnlink(l2) == NULL) |
d16754aa |
saved += strlen(d1) + 1; |
e24738dc |
t2->t_line = lineLink(l1);
if(t2->t_line == NULL) {
cli_errmsg("messageDedup: out of memory\n");
return;
} |
d16754aa |
if(++r1 == 255)
break; |
e24738dc |
}
}
} |
d16754aa |
cli_dbgmsg("messageDedup reclaimed %u bytes\n", saved); |
e24738dc |
m->dedupedThisFar = t1;
} |
b329234a |
/* |
5e5a162c |
* Handle RFC2231 encoding. Returns a malloc'd buffer that the caller must
* free, or NULL on error.
*
* TODO: Currently only handles paragraph 4 of RFC2231 e.g.
* protocol*=ansi-x3.4-1968''application%2Fpgp-signature;
*/
static char *
rfc2231(const char *in)
{ |
802c37fc |
const char *ptr;
char *ret, *out; |
49dff330 |
enum { LANGUAGE, CHARSET, CONTENTS } field; |
5e5a162c |
|
49dff330 |
if(strstr(in, "*0*=") != NULL) {
cli_warnmsg("RFC2231 parameter continuations are not yet handled\n"); |
4db74788 |
return cli_strdup(in); |
49dff330 |
}
ptr = strstr(in, "*0=");
if(ptr != NULL)
/*
* Parameter continuation, with no continuation
* Thunderbird 1.5 (and possibly other versions) does this
*/
field = CONTENTS;
else {
ptr = strstr(in, "*=");
field = LANGUAGE;
} |
5e5a162c |
if(ptr == NULL) /* quick return */ |
4db74788 |
return cli_strdup(in); |
5e5a162c |
cli_dbgmsg("rfc2231 '%s'\n", in);
ret = cli_malloc(strlen(in) + 1);
if(ret == NULL)
return NULL;
|
49dff330 |
/*
* memcpy(out, in, (ptr - in));
* out = &out[ptr - in];
* in = ptr;
*/ |
51f308f2 |
out = ret;
while(in != ptr)
*out++ = *in++; |
5e5a162c |
*out++ = '=';
|
49dff330 |
while(*ptr++ != '=')
;
|
5e5a162c |
/*
* We don't do anything with the language and character set, just skip
* over them!
*/ |
49dff330 |
while(*ptr) { |
5e5a162c |
switch(field) {
case LANGUAGE: |
49dff330 |
if(*ptr == '\'') |
5e5a162c |
field = CHARSET;
break;
case CHARSET: |
49dff330 |
if(*ptr == '\'') |
5e5a162c |
field = CONTENTS;
break;
case CONTENTS: |
49dff330 |
if(*ptr == '%') { |
5e5a162c |
unsigned char byte;
|
49dff330 |
if((*++ptr == '\0') || (*ptr == '\n')) |
5e5a162c |
break;
|
49dff330 |
byte = hex(*ptr); |
5e5a162c |
|
49dff330 |
if((*++ptr == '\0') || (*ptr == '\n')) { |
5e5a162c |
*out++ = byte;
break;
}
byte <<= 4; |
49dff330 |
byte += hex(*ptr); |
5e5a162c |
*out++ = byte;
} else |
49dff330 |
*out++ = *ptr; |
5e5a162c |
} |
49dff330 |
if(*ptr++ == '\0') |
fe6ce0ba |
/*
* Incorrect message that has just one character after
* a '%'.
* FIXME: stash something in out that would, for example
* treat %2 as %02, assuming field == CONTENTS
*/ |
abaac091 |
break; |
5e5a162c |
}
if(field != CONTENTS) {
free(ret); |
802c37fc |
cli_warnmsg("Invalid RFC2231 header: '%s'\n", in); |
4db74788 |
return cli_strdup(""); |
5e5a162c |
} |
802c37fc |
|
5e5a162c |
*out = '\0';
cli_dbgmsg("rfc2231 returns '%s'\n", ret);
return ret;
}
/* |
b329234a |
* common/simil:
* From Computing Magazine 20/8/92
* Returns %ge number from 0 to 100 - how similar are 2 strings?
* 100 for exact match, < for error
*/
struct pstr_list { /* internal stack */
char *d1;
struct pstr_list *next;
};
#define OUT_OF_MEMORY (-2)
#define FAILURE (-3)
#define SUCCESS (-4)
#define ARRAY_OVERFLOW (-5)
typedef struct pstr_list ELEMENT1;
typedef ELEMENT1 *LINK1;
static int push(LINK1 *top, const char *string);
static int pop(LINK1 *top, char *buffer);
static unsigned int compare(char *ls1, char **rs1, char *ls2, char **rs2);
|
4bdd7a93 |
#define MAX_PATTERN_SIZ 50 /* maximum string lengths */ |
b329234a |
static int
simil(const char *str1, const char *str2)
{
LINK1 top = NULL;
unsigned int score = 0; |
40d54f7f |
size_t common, total;
size_t len1, len2; |
b329234a |
char *rs1 = NULL, *rs2 = NULL;
char *s1, *s2; |
4db74788 |
char ls1[MAX_PATTERN_SIZ], ls2[MAX_PATTERN_SIZ]; |
b329234a |
if(strcasecmp(str1, str2) == 0)
return 100;
|
4db74788 |
if((s1 = cli_strdup(str1)) == NULL) |
b329234a |
return OUT_OF_MEMORY; |
4db74788 |
if((s2 = cli_strdup(str2)) == NULL) { |
b329234a |
free(s1);
return OUT_OF_MEMORY;
}
if(((total = strstrip(s1)) > MAX_PATTERN_SIZ - 1) || ((len2 = strstrip(s2)) > MAX_PATTERN_SIZ - 1)) {
free(s1);
free(s2);
return ARRAY_OVERFLOW;
}
total += len2;
|
63f87938 |
if((push(&top, s1) == OUT_OF_MEMORY) ||
(push(&top, s2) == OUT_OF_MEMORY)) {
free(s1);
free(s2); |
b329234a |
return OUT_OF_MEMORY; |
63f87938 |
} |
b329234a |
while(pop(&top, ls2) == SUCCESS) {
pop(&top, ls1);
common = compare(ls1, &rs1, ls2, &rs2);
if(common > 0) { |
40d54f7f |
score += (unsigned int)common; |
b329234a |
len1 = strlen(ls1);
len2 = strlen(ls2);
if((len1 > 1 && len2 >= 1) || (len2 > 1 && len1 >= 1))
if((push(&top, ls1) == OUT_OF_MEMORY) || (push(&top, ls2) == OUT_OF_MEMORY)) {
free(s1);
free(s2);
return OUT_OF_MEMORY;
}
len1 = strlen(rs1);
len2 = strlen(rs2);
if((len1 > 1 && len2 >= 1) || (len2 > 1 && len1 >= 1))
if((push(&top, rs1) == OUT_OF_MEMORY) || (push(&top, rs2) == OUT_OF_MEMORY)) {
free(s1);
free(s2);
return OUT_OF_MEMORY;
}
}
}
free(s1);
free(s2);
return (total > 0) ? ((score * 200) / total) : 0;
}
static unsigned int
compare(char *ls1, char **rs1, char *ls2, char **rs2)
{ |
4db74788 |
unsigned int common, maxchars = 0; |
b329234a |
bool some_similarity = FALSE;
char *s1, *s2;
char *maxs1 = NULL, *maxs2 = NULL, *maxe1 = NULL, *maxe2 = NULL;
char *cs1, *cs2, *start1, *end1, *end2;
end1 = ls1 + strlen(ls1);
end2 = ls2 + strlen(ls2);
start1 = ls1;
for(;;) {
s1 = start1;
s2 = ls2;
if(s1 < end1) {
while(s1 < end1 && s2 < end2) {
if(tolower(*s1) == tolower(*s2)) {
some_similarity = TRUE;
cs1 = s1;
cs2 = s2;
common = 0;
do
if(s1 == end1 || s2 == end2)
break;
else {
s1++;
s2++;
common++;
}
while(tolower(*s1) == tolower(*s2));
if(common > maxchars) { |
4db74788 |
unsigned int diff = common - maxchars; |
b329234a |
maxchars = common;
maxs1 = cs1;
maxs2 = cs2;
maxe1 = s1;
maxe2 = s2;
end1 -= diff;
end2 -= diff;
} else
s1 -= common;
} else
s2++;
}
start1++;
} else
break;
}
if(some_similarity) {
*maxs1 = '\0';
*maxs2 = '\0';
*rs1 = maxe1;
*rs2 = maxe2;
}
return maxchars;
}
static int
push(LINK1 *top, const char *string)
{
LINK1 element;
if((element = (LINK1)cli_malloc(sizeof(ELEMENT1))) == NULL)
return OUT_OF_MEMORY; |
4db74788 |
if((element->d1 = cli_strdup(string)) == NULL) |
b329234a |
return OUT_OF_MEMORY;
element->next = *top;
*top = element;
return SUCCESS;
}
static int
pop(LINK1 *top, char *buffer)
{
LINK1 t1;
if((t1 = *top) != NULL) {
(void)strcpy(buffer, t1->d1);
*top = t1->next;
free(t1->d1);
free((char *)t1);
return SUCCESS;
}
return FAILURE;
} |
64ff0d49 |
/*
* Have we found a line that is a start of a uuencoded file (see uuencode(5))?
*/
int
isuuencodebegin(const char *line)
{ |
2add0ed7 |
if(line[0] != 'b') /* quick check */
return 0;
|
64ff0d49 |
if(strlen(line) < 10)
return 0;
return (strncasecmp(line, "begin ", 6) == 0) &&
isdigit(line[6]) && isdigit(line[7]) &&
isdigit(line[8]) && (line[9] == ' ');
} |