b151ef55 |
/*
* Copyright (C) 2002 Nigel Horne <njh@bandsman.co.uk>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
cec5297a |
*
* Change History:
* $Log: message.c,v $ |
438edd40 |
* Revision 1.31 2004/02/17 09:53:56 nigelhorne
* Added bounce message
* |
d5539026 |
* Revision 1.30 2004/02/13 14:23:56 nigelhorne
* Add a new bounce delimeter
* |
a4b10000 |
* Revision 1.29 2004/02/10 17:01:30 nigelhorne
* Recognise a new type of bounce message
* |
6b9ba2a4 |
* Revision 1.28 2004/02/07 23:13:55 nigelhorne
* Handle content-type: text/
* |
8b242bb9 |
* Revision 1.27 2004/02/06 13:46:08 kojm
* Support for clamav-config.h
* |
3e74af5d |
* Revision 1.26 2004/02/06 13:10:34 nigelhorne
* Now integrates with winzip
* |
bb5d6279 |
* Revision 1.25 2004/02/05 11:23:07 nigelhorne
* Bounce messages are now table driven
* |
bc75e1d1 |
* Revision 1.24 2004/02/04 13:29:16 nigelhorne
* Handle blobAddData of more than 128K
* |
40ab42d9 |
* Revision 1.23 2004/02/03 23:04:09 nigelhorne
* Disabled binhex code
* |
0bf1353d |
* Revision 1.22 2004/02/03 22:54:59 nigelhorne
* Catch another example of Worm.Dumaru.Y
* |
83ec020f |
* Revision 1.21 2004/02/03 14:35:37 nigelhorne
* Fixed an infinite loop on binhex
* |
0bac8a3c |
* Revision 1.20 2004/02/02 17:10:04 nigelhorne
* Scan a rare form of bounce message
* |
1058c39e |
* Revision 1.19 2004/02/02 15:52:09 nigelhorne
* Remove handling of 8bit binhex files for now
* |
3fbd1711 |
* Revision 1.18 2004/02/02 15:30:54 nigelhorne
* Remove handling of 8bit binhex files for now
* |
1068321e |
* Revision 1.17 2004/02/02 14:01:58 nigelhorne
* Carefully crafted binhex messages could have caused a crash
* |
5a01973c |
* Revision 1.16 2004/01/28 10:15:24 nigelhorne
* Added support to scan some bounce messages
* |
6a91c55b |
* Revision 1.15 2004/01/14 10:08:45 nigelhorne
* blobGetData now allows contents to be changed - tuttut
* |
f14906ba |
* Revision 1.14 2004/01/10 13:01:19 nigelhorne
* Added BinHex compression support
* |
a42dba7d |
* Revision 1.13 2004/01/09 18:01:03 nigelhorne
* Started BinHex work
* |
dd8a7e90 |
* Revision 1.12 2003/12/05 09:34:00 nigelhorne
* Use cli_tok instead of strtok - replaced now by cli_strtok
* |
eaacc2de |
* Revision 1.11 2003/11/17 07:57:12 nigelhorne
* Prevent buffer overflow in broken uuencoded files
* |
8b04b4f6 |
* Revision 1.10 2003/11/05 07:03:51 nigelhorne
* Handle broken content-disposition
* |
4674dc9a |
* Revision 1.9 2003/10/01 09:28:23 nigelhorne
* Handle content-type header going over to a new line
* |
cec5297a |
* Revision 1.8 2003/09/28 10:07:08 nigelhorne
* uuencodebegin() no longer static
* |
b151ef55 |
*/ |
438edd40 |
static char const rcsid[] = "$Id: message.c,v 1.31 2004/02/17 09:53:56 nigelhorne Exp $"; |
8b242bb9 |
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif |
b151ef55 |
#ifndef CL_DEBUG |
c6259ac5 |
/*#define NDEBUG /* map CLAMAV debug onto standard */ |
b151ef55 |
#endif
#ifdef CL_THREAD_SAFE |
dd8a7e90 |
#ifndef _REENTRANT |
b151ef55 |
#define _REENTRANT /* for Solaris 2.8 */
#endif |
dd8a7e90 |
#endif |
b151ef55 |
#if C_DARWIN
#include <sys/types.h>
#include <sys/malloc.h>
#else
#ifdef HAVE_MALLOC_H /* tk: FreeBSD-CURRENT doesn't support malloc.h */
#include <malloc.h>
#endif
#endif
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#include "mbox.h"
#include "blob.h"
#include "text.h" |
bb5d6279 |
#include "table.h" |
b151ef55 |
#include "strrcpy.h"
#include "others.h" |
dd8a7e90 |
#include "str.h" |
b151ef55 |
/* required for AIX and Tru64 */
#ifdef TRUE
#undef TRUE
#endif
#ifdef FALSE
#undef FALSE
#endif
|
bf8ea488 |
typedef enum { FALSE = 0, TRUE = 1 } bool; |
b151ef55 |
|
eaacc2de |
static unsigned char *decodeLine(const message *m, const char *line, unsigned char *buf, size_t buflen); |
b151ef55 |
static unsigned char *decode(const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast);
static unsigned char hex(char c);
static unsigned char base64(char c);
static unsigned char uudecode(char c);
static const char *messageGetArgument(const message *m, int arg);
/*
* These maps are ordered in decreasing likelyhood of their appearance
* in an e-mail
*/
static const struct encoding_map {
const char *string;
encoding_type type;
} encoding_map[] = {
{ "7bit", NOENCODING },
{ "quoted-printable", QUOTEDPRINTABLE }, /* rfc1522 */
{ "base64", BASE64 },
{ "8bit", EIGHTBIT },
{ "x-uuencode", UUENCODE },
{ "binary", BINARY },
{ NULL, 0 }
};
static struct mime_map {
const char *string;
mime_type type;
} mime_map[] = {
{ "text", TEXT },
{ "multipart", MULTIPART },
{ "application", APPLICATION },
{ "audio", AUDIO },
{ "image", IMAGE },
{ "message", MESSAGE },
{ "video", VIDEO },
{ NULL, 0 }
};
|
d5539026 |
static const char *bounces[] = {
"--- Below this line is a copy of the message.",
"------ This is a copy of the message, including all the headers. ------",
"=================================================================================",
"------- The original message sent:",
" ----- Original message follows -----",
"------- Original mail message ----",
"------ A continuacion adjuntamos copia del mensaje, incluyendo las cabeceras. ------",
",------- Returned Message --------", |
438edd40 |
"A copy of the original message below this line:", |
d5539026 |
NULL
};
|
b151ef55 |
message *
messageCreate(void)
{
message *m = (message *)cli_calloc(1, sizeof(message));
m->mimeType = NOMIME;
return m;
}
void
messageDestroy(message *m)
{
messageReset(m);
free(m);
}
void
messageReset(message *m)
{
int i;
assert(m != NULL);
if(m->mimeSubtype)
free(m->mimeSubtype);
if(m->mimeDispositionType)
free(m->mimeDispositionType);
|
27a375f2 |
if(m->mimeArguments) {
for(i = 0; i < m->numberOfArguments; i++)
free(m->mimeArguments[i]);
free(m->mimeArguments);
} |
b151ef55 |
if(m->body_first)
textDestroy(m->body_first);
memset(m, '\0', sizeof(message));
m->mimeType = NOMIME;
}
void
messageSetMimeType(message *mess, const char *type)
{
const struct mime_map *m;
assert(mess != NULL);
assert(type != NULL);
mess->mimeType = NOMIME;
cli_dbgmsg("messageSetMimeType: '%s'\n", type);
/* Ignore leading spaces */
while(isspace(*type))
if(*type++ == '\0')
return;
for(m = mime_map; m->string; m++)
if(strcasecmp(type, m->string) == 0) {
mess->mimeType = m->type;
break;
}
if(mess->mimeType == NOMIME) {
if(strncasecmp(type, "x-", 2) == 0)
mess->mimeType = MEXTENSION; |
27a375f2 |
else {
/*
* Based on a suggestion by James Stevens
* <James@kyzo.com>
* Force scanning of strange messages
*/
cli_warnmsg("Unknown MIME type: `%s' - set to Application\n", type);
mess->mimeType = APPLICATION;
} |
b151ef55 |
}
}
mime_type
messageGetMimeType(const message *m)
{
return(m->mimeType);
}
void
messageSetMimeSubtype(message *m, const char *subtype)
{
assert(m != NULL); |
6b9ba2a4 |
if(subtype == NULL) {
/*
* Handle broken content-type lines, e.g.
* Content-Type: text/
*/
cli_dbgmsg("Empty content subtype\n");
subtype = "";
} |
b151ef55 |
if(m->mimeSubtype)
free(m->mimeSubtype);
m->mimeSubtype = strdup(subtype);
}
const char *
messageGetMimeSubtype(const message *m)
{
return((m->mimeSubtype) ? m->mimeSubtype : "");
}
void
messageSetDispositionType(message *m, const char *disptype)
{
assert(m != NULL);
|
8b04b4f6 |
/*
* It's broken for there to be an entry such as "Content-Disposition:"
* However some spam and viruses are rather broken, it's a sign
* that something is wrong if we get that - maybe we should force a
* scan of this part
*/
if(disptype)
m->mimeDispositionType = strdup(disptype); |
b151ef55 |
}
const char *
messageGetDispositionType(const message *m)
{
return((m->mimeDispositionType) ? m->mimeDispositionType : "");
}
/*
* TODO:
* Arguments are held on a per message basis, they should be held on
* a per section basis. Otherwise what happens if two sections have two
* different values for charset? Probably doesn't matter for the use this
* code will be given, but will need fixing if this code is used elsewhere
*/
void
messageAddArgument(message *m, const char *arg)
{
int offset;
assert(m != NULL);
if(arg == NULL)
return; /* Note: this is not an error condition */
while(isspace(*arg))
arg++;
if(*arg == '\0')
/* Empty argument? Probably a broken mail client... */
return;
cli_dbgmsg("Add argument '%s'\n", arg);
|
c6259ac5 |
for(offset = 0; offset < m->numberOfArguments; offset++) |
b151ef55 |
if(m->mimeArguments[offset] == NULL)
break;
else if(strcasecmp(arg, m->mimeArguments[offset]) == 0)
return; /* already in there */
|
c6259ac5 |
if(offset == m->numberOfArguments) {
m->numberOfArguments++; |
dd8a7e90 |
m->mimeArguments = (char **)cli_realloc(m->mimeArguments, m->numberOfArguments * sizeof(char *)); |
c6259ac5 |
} |
b151ef55 |
m->mimeArguments[offset] = strdup(arg); |
0bf1353d |
/*
* This is terribly broken from an RFC point of view but is useful
* for catching viruses which have a filename but no type of
* mime. By pretending defaulting to an application rather than
* to nomime we can ensure they're saved and scanned
*/
if((strncasecmp(arg, "filename=", 9) == 0) || (strncasecmp(arg, "name=", 5) == 0))
if(messageGetMimeType(m) == NOMIME) {
cli_dbgmsg("Force mime encoding to application\n");
messageSetMimeType(m, "application");
} |
b151ef55 |
}
/*
* Add in all the arguments.
* Cope with:
* name="foo bar.doc"
* charset=foo name=bar
*/
void
messageAddArguments(message *m, const char *s)
{
const char *string = s;
cli_dbgmsg("Add arguments '%s'\n", string);
assert(string != NULL);
while(*string) {
const char *key, *cptr;
char *data, *field;
if(isspace(*string) || (*string == ';')) {
string++;
continue;
}
key = string;
data = strchr(string, '=');
/*
* Some spam breaks RFC1521 by using ':' instead of '='
* e.g.:
* Content-Type: text/html; charset:ISO-8859-1
* should be:
* Content-type: text/html; charset=ISO-8859-1
*
* We give up with lines that are completely broken because
* we don't have ESP and don't know what was meant to be there.
* It's unlikely to really be a problem.
*/
if(data == NULL)
data = strchr(string, ':');
if(data == NULL) {
/*
* Completely broken, give up
*/
cli_warnmsg("Can't parse non RFC1521 header \"%s\"\n",
s);
return;
}
string = data;
string++;
|
752c34b9 |
/*
* Handle white space to the right of the equals sign
*/
while(isspace(*string) && (*string != '\0'))
string++;
|
b151ef55 |
cptr = string++;
if(*cptr == '"') {
char *ptr;
/*
* The field is in quotes, so look for the
* closing quotes
*/
key = strdup(key);
ptr = strchr(key, '=');
if(ptr == NULL)
ptr = strchr(key, ':');
*ptr = '\0';
cptr++;
string = strchr(cptr, '"');
if((string == NULL) || (strlen(key) == 0)) {
cli_warnmsg("Can't parse header \"%s\"\n", s);
free((char *)key);
return;
}
string++;
data = strdup(cptr);
ptr = strchr(data, '"');
if(ptr == NULL) {
/*
* Weird e-mail header such as:
* Content-Type: application/octet-stream; name="
* "
* Content-Transfer-Encoding: base64
* Content-Disposition: attachment; filename="
* "
*
* TODO: the file should still be saved and
* virus checked
*/
cli_warnmsg("Can't parse header \"%s\"\n", s);
free(data);
free((char *)key);
return;
}
*ptr = '\0';
field = cli_malloc(strlen(key) + strlen(data) + 2);
sprintf(field, "%s=%s", key, data);
free((char *)key);
free(data);
} else {
size_t len; |
bf8ea488 |
if(*cptr == '\0') {
cli_warnmsg("Ignoring empty field in \"%s\"\n", s);
return;
}
|
b151ef55 |
/*
* The field is not in quotes, so look for the closing
* white space
*/
while((*string != '\0') && !isspace(*string))
string++;
len = (size_t)string - (size_t)key + 1;
field = cli_malloc(len);
memcpy(field, key, len - 1);
field[len - 1] = '\0';
}
messageAddArgument(m, field);
free(field);
}
}
static const char *
messageGetArgument(const message *m, int arg)
{
assert(m != NULL);
assert(arg >= 0); |
c6259ac5 |
assert(arg < m->numberOfArguments); |
b151ef55 |
return((m->mimeArguments[arg]) ? m->mimeArguments[arg] : "");
}
/*
* Find a MIME variable from the header and return a COPY to the value of that
* variable. The caller must free the copy
*/
const char *
messageFindArgument(const message *m, const char *variable)
{
int i;
assert(m != NULL);
assert(variable != NULL);
|
c6259ac5 |
for(i = 0; i < m->numberOfArguments; i++) { |
b151ef55 |
const char *ptr;
size_t len;
ptr = messageGetArgument(m, i);
if((ptr == NULL) || (*ptr == '\0'))
return(NULL);
len = strlen(variable);
#ifdef CL_DEBUG
cli_dbgmsg("messageFindArgument: compare %d bytes of %s with %s\n",
len, variable, ptr);
#endif
if(strncasecmp(ptr, variable, len) == 0) {
ptr = &ptr[len];
while(isspace(*ptr))
ptr++; |
752c34b9 |
if(*ptr != '=') {
cli_warnmsg("messageFindArgument: no '=' sign found in MIME header\n");
return NULL;
} |
b151ef55 |
if((*++ptr == '"') && (strchr(&ptr[1], '"') != NULL)) { |
bf8ea488 |
/* Remove any quote characters */ |
b151ef55 |
char *ret = strdup(++ptr); |
bf8ea488 |
char *p;
ret[strlen(ret) - 1] = '\0';
/*
* Thomas Lamy <Thomas.Lamy@in-online.net>:
* fix un-quoting of boundary strings from
* header, occurs if boundary was given as
* 'boundary="_Test_";'
*
* At least two quotes in string, assume
* quoted argument
* end string at next quote
*/
if((p = strchr(ret, '"')) != NULL)
*p = '\0'; |
b151ef55 |
return(ret);
}
return(strdup(ptr));
}
}
return(NULL);
}
void
messageSetEncoding(message *m, const char *enctype)
{
const struct encoding_map *e;
assert(m != NULL);
assert(enctype != NULL);
m->encodingType = EEXTENSION;
|
4674dc9a |
while((*enctype == '\t') || (*enctype == ' '))
enctype++;
|
b151ef55 |
for(e = encoding_map; e->string; e++)
if(strcasecmp(enctype, e->string) == 0) {
m->encodingType = e->type; |
4674dc9a |
cli_dbgmsg("Encoding type is \"%s\"\n", enctype); |
b151ef55 |
return;
}
|
c6259ac5 |
cli_warnmsg("Unknown encoding type \"%s\"\n", enctype); |
b151ef55 |
}
encoding_type
messageGetEncoding(const message *m)
{
assert(m != NULL);
return(m->encodingType);
}
/*
* Line should not be terminated by a \n
*/
void
messageAddLine(message *m, const char *line)
{
assert(m != NULL);
if(m->body_first == NULL)
m->body_last = m->body_first = (text *)cli_malloc(sizeof(text));
else {
m->body_last->t_next = (text *)cli_malloc(sizeof(text));
m->body_last = m->body_last->t_next;
}
assert(m->body_last != NULL);
m->body_last->t_next = NULL;
m->body_last->t_text = strdup((line) ? line : "");
assert(m->body_last->t_text != NULL);
assert(m->body_first != NULL);
}
const text *
messageGetBody(const message *m)
{
assert(m != NULL);
return(m->body_first);
}
/*
* Clean up the message by removing trailing spaces and blank lines
*/
void
messageClean(message *m)
{
text *newEnd = textClean(m->body_first);
if(newEnd)
m->body_last = newEnd;
}
/*
* Decode and transfer the contents of the message into a blob
*/
blob *
messageToBlob(const message *m)
{
blob *b;
const text *t_line = NULL; |
dd8a7e90 |
char *filename; |
b151ef55 |
assert(m != NULL);
b = blobCreate();
assert(b != NULL);
/*
* Find the filename to decode
*/
if(messageGetEncoding(m) == UUENCODE) { |
27a375f2 |
t_line = uuencodeBegin(m); |
b151ef55 |
if(t_line == NULL) {
/*cli_warnmsg("UUENCODED attachment is missing begin statement\n");*/
blobDestroy(b);
return NULL;
}
|
dd8a7e90 |
filename = cli_strtok(t_line->t_text, 2, " "); |
b151ef55 |
if(filename == NULL) { |
c6259ac5 |
cli_dbgmsg("UUencoded attachment sent with no filename\n"); |
b151ef55 |
blobDestroy(b);
return NULL;
} |
dd8a7e90 |
cli_chomp(filename); |
b151ef55 |
cli_dbgmsg("Set uuencode filename to \"%s\"\n", filename);
blobSetFilename(b, filename);
t_line = t_line->t_next; |
a42dba7d |
} else if((t_line = binhexBegin(m)) != NULL) {
unsigned char byte; |
bc75e1d1 |
unsigned long len, l, newlen = 0L; |
a42dba7d |
char *filename; |
bc75e1d1 |
unsigned char *ptr, *data; |
bb5d6279 |
int bytenumber; |
bc75e1d1 |
blob *tmp = blobCreate();
/*
* Table look up by Thomas Lamy <Thomas.Lamy@in-online.net>
* HQX conversion table - illegal chars are 0xff
*/
const unsigned char hqxtbl[] = {
/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
/* 00-0f */ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
/* 10-1f */ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
/* 20-2f */ 0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0xff,0xff,
/* 30-3f */ 0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0xff,0x14,0x15,0xff,0xff,0xff,0xff,0xff,0xff,
/* 40-4f */ 0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0xff,
/* 50-5f */ 0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0xff,0x2c,0x2d,0x2e,0x2f,0xff,0xff,0xff,0xff,
/* 60-6f */ 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0xff,0x37,0x38,0x39,0x3a,0x3b,0x3c,0xff,0xff,
/* 70-7f */ 0x3d,0x3e,0x3f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
}; |
a42dba7d |
/*
* Decode BinHex4. First create a temporary blob which contains
* the encoded message. Then decode that blob to the target |
0bf1353d |
* blob, free the temporary blob and return the target one |
bc75e1d1 |
*
* See RFC1741 |
a42dba7d |
*/ |
bb5d6279 |
while((t_line = t_line->t_next) != NULL) |
a42dba7d |
blobAddData(tmp, (unsigned char *)t_line->t_text, strlen(t_line->t_text));
data = blobGetData(tmp); |
1068321e |
if(data == NULL) {
cli_warnmsg("Couldn't locate the binhex message that was claimed to be there\n");
blobDestroy(tmp);
blobDestroy(b);
return NULL;
} |
bc75e1d1 |
if(data[0] != ':') {
/*
* TODO: Need an example of this before I can be
* sure it works
* Possibly data[0] = '#'
*/
cli_warnmsg("8 bit binhex code is not yet supported\n");
blobDestroy(tmp);
blobDestroy(b);
return NULL;
} |
bb5d6279 |
len = blobGetDataSize(tmp);
|
6a91c55b |
/*
* FIXME: this is dirty code, modification of the contents
* of a member of the blob object should be done through blob.c |
bc75e1d1 |
*
* Convert 7 bit data into 8 bit |
6a91c55b |
*/ |
bc75e1d1 |
cli_dbgmsg("decode HQX7 message (%lu bytes)\n", len); |
a42dba7d |
|
bc75e1d1 |
ptr = cli_malloc(len);
memcpy(ptr, data, len); |
bb5d6279 |
bytenumber = 0; |
a42dba7d |
|
bc75e1d1 |
/*
* ptr now contains the encoded (7bit) data - len bytes long
* data will contain the unencoded (8bit) data
*/
for(l = 1; l < len; l++) {
unsigned char c = ptr[l]; |
a42dba7d |
|
bc75e1d1 |
if(c == ':')
break; |
a42dba7d |
|
bb5d6279 |
if((c == '\n') || (c == '\r'))
continue;
|
bc75e1d1 |
if((c < 0x20) || (c > 0x7f) || (hqxtbl[c] == 0xff)) {
cli_warnmsg("Invalid HQX7 character '%c' (0x%02x)\n", c, c);
break;
}
c = hqxtbl[c];
assert(c <= 63); |
83ec020f |
|
bc75e1d1 |
/*
* These masks probably aren't needed, but
* they're here to verify the code is correct
*/
switch(bytenumber) {
case 0:
data[newlen] = (c << 2) & 0xFC;
bytenumber = 1; |
a42dba7d |
break; |
bc75e1d1 |
case 1:
data[newlen++] |= (c >> 4) & 0x3;
data[newlen] = (c << 4) & 0xF0;
bytenumber = 2;
break;
case 2:
data[newlen++] |= (c >> 2) & 0xF;
data[newlen] = (c << 6) & 0xC0;
bytenumber = 3;
break;
case 3:
data[newlen++] |= c & 0x3F;
bytenumber = 0;
break;
}
} |
bb5d6279 |
|
bc75e1d1 |
cli_dbgmsg("decoded HQX7 message (now %lu bytes)\n", newlen);
/*
* Throw away the old encoded (7bit) data
* data now points to the encoded (8bit) data - newlen bytes
*
* The data array may contain repetitive characters
*/
free(ptr);
/*
* Uncompress repetitive characters
*/
if(memchr(data, 0x90, newlen)) {
blob *u = blobCreate(); /* uncompressed data */
/*
* Includes compression
*/
for(l = 0L; l < newlen; l++) {
unsigned char c = data[l]; |
a42dba7d |
/* |
bc75e1d1 |
* TODO: handle the case where the first byte
* is 0x90 |
a42dba7d |
*/ |
bc75e1d1 |
blobAddData(u, &c, 1);
if((l < (newlen - 1L)) && (data[l + 1] == 0x90)) {
int count;
l += 2;
count = data[l];
#ifdef CL_DEBUG
cli_dbgmsg("uncompress HQX7 at 0x%06x: %d repetitive bytes\n", l, count);
#endif
if(count == 0) {
c = 0x90;
blobAddData(u, &c, 1);
} else
while(--count > 0)
blobAddData(u, &c, 1); |
a42dba7d |
}
} |
3fbd1711 |
blobDestroy(tmp); |
bc75e1d1 |
tmp = u;
data = blobGetData(tmp);
len = blobGetDataSize(tmp);
cli_dbgmsg("Uncompressed %lu bytes to %lu\n", newlen, len);
} else {
len = newlen;
cli_dbgmsg("HQX7 message (%lu bytes) is not compressed\n",
len); |
a42dba7d |
}
/* |
bc75e1d1 |
* The blob tmp now contains the uncompressed data
* of len bytes, i.e. the repetitive bytes have been removed
*/
/*
* Parse the header
* |
a42dba7d |
* TODO: set filename argument in message as well
*/
byte = data[0];
filename = cli_malloc(byte + 1); |
bc75e1d1 |
memcpy(filename, &data[1], byte); |
a42dba7d |
filename[byte] = '\0';
blobSetFilename(b, filename);
/*
* skip over length, filename, version, type, creator and flags
*/
byte = 1 + byte + 1 + 4 + 4 + 2; |
83ec020f |
/*
* Set len to be the data fork length
*/ |
bc75e1d1 |
len = ((data[byte] << 24) & 0xFF000000) |
((data[byte + 1] << 16) & 0xFF0000) |
((data[byte + 2] << 8) & 0xFF00) |
(data[byte + 3] & 0xFF);
cli_dbgmsg("Filename = '%s', data fork length = %lu bytes\n",
filename, len);
free((char *)filename); |
a42dba7d |
/*
* Skip over data fork length, resource fork length and CRC
*/
byte += 10;
|
bc75e1d1 |
blobAddData(b, &data[byte], len); |
a42dba7d |
blobDestroy(tmp);
return b; |
b151ef55 |
} else {
/*
* Discard attachments with no filename
*/ |
6a91c55b |
filename = (char *)messageFindArgument(m, "filename"); |
b151ef55 |
if(filename == NULL) { |
6a91c55b |
filename = (char *)messageFindArgument(m, "name"); |
b151ef55 |
if(filename == NULL) { |
c6259ac5 |
cli_dbgmsg("Attachment sent with no filename\n"); |
b151ef55 |
blobDestroy(b);
return NULL;
}
}
blobSetFilename(b, filename);
t_line = messageGetBody(m);
} |
dd8a7e90 |
free((char *)filename); |
b151ef55 |
/*
* t_line should now point to the first (encoded) line of the message
*/
if(t_line == NULL) {
cli_warnmsg("Empty attachment not saved\n");
blobDestroy(b);
return NULL;
}
if(messageGetEncoding(m) == NOENCODING)
/*
* Fast copy
*/
do {
blobAddData(b, (unsigned char *)t_line->t_text, strlen(t_line->t_text));
blobAddData(b, (unsigned char *)"\n", 1);
} while((t_line = t_line->t_next) != NULL);
else
do {
unsigned char data[1024];
unsigned char *uptr; |
27a375f2 |
const char *line = t_line->t_text; |
b151ef55 |
if(messageGetEncoding(m) == UUENCODE)
if(strcasecmp(line, "end") == 0)
break; |
752c34b9 |
|
eaacc2de |
uptr = decodeLine(m, line, data, sizeof(data)); |
b151ef55 |
|
c6259ac5 |
if(uptr == NULL)
break; |
b151ef55 |
assert(uptr <= &data[sizeof(data)]);
blobAddData(b, data, (size_t)(uptr - data)); |
752c34b9 |
/*
* According to RFC1521, '=' is used to pad out
* the last byte and should be used as evidence
* of the end of the data. Some mail clients
* annoyingly then put plain text after the '='
* bytes. Sigh
*/
if(messageGetEncoding(m) == BASE64)
if(strchr(line, '='))
break;
|
b151ef55 |
} while((t_line = t_line->t_next) != NULL);
return b;
}
/*
* Decode and transfer the contents of the message into a text area
*/
text *
messageToText(const message *m)
{
text *first = NULL, *last = NULL;
const text *t_line;
assert(m != NULL);
if(messageGetEncoding(m) == NOENCODING)
/*
* Fast copy
*/
for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
const char *line;
if(first == NULL)
first = last = cli_malloc(sizeof(text));
else {
last->t_next = cli_malloc(sizeof(text));
last = last->t_next;
}
assert(last != NULL);
line = t_line->t_text;
last->t_text = cli_malloc(strlen(line) + 2);
assert(last->t_text != NULL);
sprintf(last->t_text, "%s\n", line);
} |
27a375f2 |
else {
if(messageGetEncoding(m) == UUENCODE) {
t_line = uuencodeBegin(m); |
b151ef55 |
|
27a375f2 |
if(t_line == NULL) {
/*cli_warnmsg("UUENCODED attachment is missing begin statement\n");*/
return NULL;
}
t_line = t_line->t_next; |
a42dba7d |
} else { |
1068321e |
if(binhexBegin(m)) |
a42dba7d |
cli_warnmsg("Binhex messages not supported yet (2).\n"); |
27a375f2 |
t_line = messageGetBody(m); |
a42dba7d |
} |
b151ef55 |
|
27a375f2 |
for(; t_line; t_line = t_line->t_next) {
unsigned char data[1024];
unsigned char *uptr;
const char *line = t_line->t_text;
if(messageGetEncoding(m) == UUENCODE)
if(strcasecmp(line, "end") == 0)
break; |
752c34b9 |
|
eaacc2de |
uptr = decodeLine(m, line, data, sizeof(data)); |
b151ef55 |
|
27a375f2 |
if(uptr == NULL)
break; |
b151ef55 |
|
27a375f2 |
assert(uptr <= &data[sizeof(data)]);
if(first == NULL)
first = last = cli_malloc(sizeof(text));
else {
last->t_next = cli_malloc(sizeof(text));
last = last->t_next;
}
assert(last != NULL); |
b151ef55 |
|
27a375f2 |
last->t_text = strdup((char *)data);
assert(last->t_text != NULL); |
752c34b9 |
if(messageGetEncoding(m) == BASE64)
if(strchr(line, '='))
break; |
27a375f2 |
} |
b151ef55 |
}
if(last)
last->t_next = NULL;
return first;
}
|
a42dba7d |
/*
* Scan to find the UUENCODED message (if any)
*/ |
cec5297a |
const text * |
27a375f2 |
uuencodeBegin(const message *m)
{
const text *t_line;
/*
* Fix based on an idea by Magnus Jonsson
* <Magnus.Jonsson@umdac.umu.se>, to allow for blank
* lines before the begin. Should not happen, but some
* e-mail clients are rather broken...
*/
for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
const char *line = t_line->t_text;
if((strncasecmp(line, "begin ", 6) == 0) &&
(isdigit(line[6])) &&
(isdigit(line[7])) &&
(isdigit(line[8])) &&
(line[9] == ' '))
return t_line;
}
return NULL;
}
|
b151ef55 |
/* |
a42dba7d |
* Scan to find the BINHEX message (if any)
*/
const text *
binhexBegin(const message *m)
{
const text *t_line;
for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next)
if(strcasecmp(t_line->t_text, "(This file must be converted with BinHex 4.0)") == 0)
return t_line;
return NULL;
}
/* |
5a01973c |
* Scan to find a bounce message. There is no standard for these, not
* even a convention, so don't expect this to be foolproof
*/
const text *
bounceBegin(const message *m)
{
const text *t_line; |
bb5d6279 |
static table_t *bounceMessages;
if(bounceMessages == NULL) {
const char **bounce;
bounceMessages = tableCreate();
for(bounce = bounces; *bounce; bounce++)
if(tableInsert(bounceMessages, *bounce, 1) < 0)
cli_warnmsg("Bounce messages starting with\n\t%s\nwon't be detected\n",
*bounce);
} |
5a01973c |
for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) |
bb5d6279 |
if(tableFind(bounceMessages, t_line->t_text) == 1) |
5a01973c |
return t_line;
return NULL;
}
/* |
b151ef55 |
* Decode a line and add it to a buffer, return the end of the buffer |
27a375f2 |
* to help appending callers. There is no new line at the end of "line" |
eaacc2de |
*
* len is sizeof(ptr) |
b151ef55 |
*/
static unsigned char * |
eaacc2de |
decodeLine(const message *m, const char *line, unsigned char *buf, size_t buflen) |
b151ef55 |
{ |
dd8a7e90 |
size_t len; |
bf8ea488 |
bool softbreak; |
b151ef55 |
char *p2; |
752c34b9 |
char *copy; |
b151ef55 |
assert(m != NULL);
assert(line != NULL); |
eaacc2de |
assert(buf != NULL); |
b151ef55 |
switch(messageGetEncoding(m)) {
case NOENCODING:
case EIGHTBIT: |
c6259ac5 |
default: /* unknown encoding type - try our best */ |
eaacc2de |
buf = (unsigned char *)strrcpy((char *)buf, line); |
b151ef55 |
/* Put the new line back in */ |
eaacc2de |
return (unsigned char *)strrcpy((char *)buf, "\n"); |
b151ef55 |
case QUOTEDPRINTABLE: |
bf8ea488 |
softbreak = FALSE; |
b151ef55 |
while(*line) {
if(*line == '=') {
unsigned char byte;
|
bf8ea488 |
if((*++line == '\0') || (*line == '\n')) {
softbreak = TRUE; |
b151ef55 |
/* soft line break */
break; |
bf8ea488 |
} |
b151ef55 |
|
752c34b9 |
byte = hex(*line);
if((*++line == '\0') || (*line == '\n')) {
/*
* broken e-mail, not
* adhering to RFC1522
*/ |
eaacc2de |
*buf++ = byte; |
752c34b9 |
break;
}
|
b151ef55 |
byte <<= 4;
byte += hex(*line); |
eaacc2de |
*buf++ = byte; |
bf8ea488 |
} else |
eaacc2de |
*buf++ = *line; |
b151ef55 |
line++;
} |
bf8ea488 |
if(!softbreak)
/* Put the new line back in */ |
eaacc2de |
*buf++ = '\n'; |
b151ef55 |
break;
case BASE64: |
752c34b9 |
/*
* RFC1521 sets the maximum length to 76 bytes
* but many e-mail clients ignore that
*/
copy = strdup(line);
p2 = strchr(copy, '='); |
b151ef55 |
if(p2)
*p2 = '\0';
/*
* Klez doesn't always put "=" on the last line
*/ |
eaacc2de |
/*buf = decode(line, buf, base64, p2 == NULL);*/
buf = decode(copy, buf, base64, 0); |
752c34b9 |
free(copy); |
b151ef55 |
break;
case UUENCODE: |
cec5297a |
if(*line == '\0') /* empty line */ |
3c52fb18 |
break; |
b151ef55 |
if(strncasecmp(line, "begin ", 6) == 0)
break;
if(strcasecmp(line, "end") == 0)
break;
if((line[0] & 0x3F) == ' ')
break;
len = *line++ - ' ';
|
eaacc2de |
if(len > buflen)
/*
* In practice this should never occur since
* the maximum length of a uuencoded line is
* 62 characters
*/
cli_warnmsg("uudecode: buffer overflow stopped, attempting to ignore but decoding may fail");
else
buf = decode(line, buf, uudecode, (len & 3) == 0); |
b151ef55 |
break;
case BINARY:
/*
* TODO: find out what this is, encoded as binary??
*/
break;
}
|
eaacc2de |
*buf = '\0';
return buf; |
b151ef55 |
}
static unsigned char *
decode(const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast)
{
unsigned char b1, b2, b3, b4;
int nbytes;
if(isFast)
/* Fast decoding if not last line */
while(*in) {
b1 = (*decoder)(*in++);
b2 = (*decoder)(*in++);
b3 = (*decoder)(*in++);
b4 = (*decoder)(*in++);
*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
*out++ = (b3 << 6) | (b4 & 0x3F);
}
else
/* Slower decoding for last line */
while(*in) {
b1 = (*decoder)(*in++);
if(*in == '\0') {
b2 = '\0';
nbytes = 1;
} else {
assert(*in != '\0');
b2 = (*decoder)(*in++);
if(*in == '\0') {
b3 = '\0';
nbytes = 2;
} else {
assert(*in != '\0');
b3 = (*decoder)(*in++);
if(*in == '\0') {
b4 = '\0';
nbytes = 3;
} else {
assert(*in != '\0');
b4 = (*decoder)(*in++);
nbytes = 4;
}
}
}
switch(nbytes) {
case 3:
b4 = '\0';
/* fall through */
case 4:
*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
*out++ = (b3 << 6) | (b4 & 0x3F);
break;
case 2:
*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
*out++ = b2 << 4;
break;
case 1:
*out++ = b1 << 2;
break;
default:
assert(0);
}
if(nbytes != 4)
break;
}
return out;
}
static unsigned char
hex(char c)
{
if(isdigit(c))
return c - '0';
if((c >= 'A') && (c <= 'F'))
return c - 'A' + 10;
/*
* Some mails (notably some spam) break RFC1522 by failing to encode
* the '=' character
*/
return '=';
}
static unsigned char
base64(char c)
{
if(isupper(c))
return c - 'A';
if(islower(c))
return c - 'a' + 26;
if(isdigit(c))
return c - '0' + 52;
if(c == '+')
return 62;
|
752c34b9 |
if(c != '/')
cli_warnmsg("Illegal character <%c> in base64 encoding\n", c); |
b151ef55 |
return 63;
}
static unsigned char
uudecode(char c)
{
return(c - ' ');
} |