/* * Copyright (C) 2002 Nigel Horne * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Change History: * $Log: mbox.c,v $ * Revision 1.30 2004/01/13 10:12:05 nigelhorne * Remove duplicate code when handling multipart messages * * Revision 1.29 2004/01/09 18:27:11 nigelhorne * ParseMimeHeader could corrupt arg * * Revision 1.28 2004/01/09 15:07:42 nigelhorne * Re-engineered update 1.11 lost in recent changes * * Revision 1.27 2004/01/09 14:45:59 nigelhorne * Removed duplicated code in multipart handler * * Revision 1.26 2004/01/09 10:20:54 nigelhorne * Locate uuencoded viruses hidden in text poritions of multipart/mixed mime messages * * Revision 1.25 2004/01/06 14:41:18 nigelhorne * Handle headers which do not not have a space after the ':' * * Revision 1.24 2003/12/20 13:55:36 nigelhorne * Ensure multipart just save the bodies of attachments * * Revision 1.23 2003/12/14 18:07:01 nigelhorne * Some viruses in embedded messages were not being found * * Revision 1.22 2003/12/13 16:42:23 nigelhorne * call new cli_chomp * * Revision 1.21 2003/12/11 14:35:48 nigelhorne * Better handling of encapsulated messages * * Revision 1.20 2003/12/06 04:03:26 nigelhorne * Handle hand crafted emails that incorrectly set multipart headers * * Revision 1.19 2003/11/21 07:26:31 nigelhorne * Scan multipart alternatives that have no boundaries, finds some uuencoded happy99 * * Revision 1.18 2003/11/17 08:13:21 nigelhorne * Handle spaces at the end of lines of MIME headers * * Revision 1.17 2003/11/06 05:06:42 nigelhorne * Some applications weren't being scanned * * Revision 1.16 2003/11/04 08:24:00 nigelhorne * Handle multipart messages that have no text portion * * Revision 1.15 2003/10/12 20:13:49 nigelhorne * Use NO_STRTOK_R consistent with message.c * * Revision 1.14 2003/10/12 12:37:11 nigelhorne * Appledouble encoded EICAR now found * * Revision 1.13 2003/10/01 09:27:42 nigelhorne * Handle content-type header going over to a new line * * Revision 1.12 2003/09/29 17:10:19 nigelhorne * Moved stub from heap to stack since its maximum size is known * * Revision 1.11 2003/09/29 12:58:32 nigelhorne * Handle Content-Type: /; name="eicar.com" * * Revision 1.10 2003/09/28 10:06:34 nigelhorne * Compilable under SCO; removed duplicate code with message.c * */ static char const rcsid[] = "$Id: mbox.c,v 1.30 2004/01/13 10:12:05 nigelhorne Exp $"; #ifndef CL_DEBUG /*#define NDEBUG /* map CLAMAV debug onto standard */ #endif #ifdef CL_THREAD_SAFE #ifndef _REENTRANT #define _REENTRANT /* for Solaris 2.8 */ #endif #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "table.h" #include "mbox.h" #include "blob.h" #include "text.h" #include "message.h" #include "others.h" #include "defaults.h" #include "str.h" #if defined(NO_STRTOK_R) || !defined(CL_THREAD_SAFE) #undef strtok_r #undef __strtok_r #define strtok_r(a,b,c) strtok(a,b) #endif /* required for AIX and Tru64 */ #ifdef TRUE #undef TRUE #endif #ifdef FALSE #undef FALSE #endif typedef enum { FALSE = 0, TRUE = 1 } bool; static message *parseEmailHeaders(const message *m, const table_t *rfc821Table); static int parseEmailHeader(message *m, const char *line, const table_t *rfc821Table); static int parseEmailBody(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, const char *dir, table_t *rfc821Table, table_t *subtypeTable); static int boundaryStart(const char *line, const char *boundary); static int endOfMessage(const char *line, const char *boundary); static int initialiseTables(table_t **rfc821Table, table_t **subtypeTable); static int getTextPart(message *const messages[], size_t size); static size_t strip(char *buf, int len); static size_t strstrip(char *s); static bool continuationMarker(const char *line); static int parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg); static bool saveFile(const blob *b, const char *dir); /* Maximum number of attachments that we accept */ #define MAX_ATTACHMENTS 10 /* Maximum line length according to RFC821 */ #define LINE_LENGTH 1000 /* Hashcodes for our hash tables */ #define CONTENT_TYPE 1 #define CONTENT_TRANSFER_ENCODING 2 #define CONTENT_DISPOSITION 3 /* Mime sub types */ #define PLAIN 1 #define ENRICHED 2 #define HTML 3 #define RICHTEXT 4 #define MIXED 5 #define ALTERNATIVE 6 #define DIGEST 7 #define SIGNED 8 #define PARALLEL 9 #define RELATED 10 /* RFC2387 */ #define REPORT 11 /* RFC1892 */ #define APPLEDOUBLE 12 /* Handling of this in only noddy for now */ static const struct tableinit { const char *key; int value; } rfc821headers[] = { /* TODO: make these regular expressions */ { "Content-Type:", CONTENT_TYPE }, { "Content-Transfer-Encoding:", CONTENT_TRANSFER_ENCODING }, { "Content-Disposition:", CONTENT_DISPOSITION }, { NULL, 0 } }, mimeSubtypes[] = { /* subtypes of Text */ { "plain", PLAIN }, { "enriched", ENRICHED }, { "html", HTML }, { "richtext", RICHTEXT }, /* subtypes of Multipart */ { "mixed", MIXED }, { "alternative", ALTERNATIVE }, { "digest", DIGEST }, { "signed", SIGNED }, { "parallel", PARALLEL }, { "related", RELATED }, { "report", REPORT }, { "appledouble", APPLEDOUBLE }, { NULL, 0 } }; /* Maximum filenames under various systems */ #ifndef NAME_MAX /* e.g. Linux */ #ifdef MAXNAMELEN /* e.g. Solaris */ #define NAME_MAX MAXNAMELEN #else #ifdef FILENAME_MAX /* e.g. SCO */ #define NAME_MAX FILENAME_MAX #endif #endif #endif /* * TODO: when signal handling is added, need to remove temp files when a * signal is received * TODO: add option to scan in memory not via temp files, perhaps with a * named pipe or memory mapped file? * TODO: if debug is enabled, catch a segfault and dump the current e-mail * in it's entirety, then call abort() * TODO: parse .msg format files * TODO: fully handle AppleDouble format, see * http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf * TODO: ensure parseEmailHeaders is always called before parseEmailBody * TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody */ int cl_mbox(const char *dir, int desc) { int retcode, i; message *m, *body; table_t *rfc821Table, *subtypeTable; FILE *fd; char buffer[LINE_LENGTH]; cli_dbgmsg("in mbox()\n"); i = dup(desc); if((fd = fdopen(i, "rb")) == NULL) { cli_errmsg("Can't open descriptor %d\n", desc); close(i); return -1; } if(fgets(buffer, sizeof(buffer), fd) == NULL) { /* empty message */ fclose(fd); return 0; } m = messageCreate(); assert(m != NULL); if(initialiseTables(&rfc821Table, &subtypeTable) < 0) { messageDestroy(m); fclose(fd); return -1; } /* * is it a UNIX style mbox with more than one * mail message, or just a single mail message? */ if(strncmp(buffer, "From ", 5) == 0) { /* * Have been asked to check a UNIX style mbox file, which * may contain more than one e-mail message to decode */ bool lastLineWasEmpty = FALSE; do { /*cli_dbgmsg("read: %s", buffer);*/ cli_chomp(buffer); if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) { /* * End of a message in the mail box */ body = parseEmailHeaders(m, rfc821Table); messageDestroy(m); messageClean(body); if(messageGetBody(body)) if(!parseEmailBody(body, NULL, 0, NULL, dir, rfc821Table, subtypeTable)) break; /* * Starting a new message, throw away all the * information about the old one */ m = body; messageReset(body); lastLineWasEmpty = TRUE; cli_dbgmsg("Finished processing message\n"); } else lastLineWasEmpty = (buffer[0] == '\0'); messageAddLine(m, buffer); } while(fgets(buffer, sizeof(buffer), fd) != NULL); } else /* * It's a single message, parse the headers then the body */ do { cli_chomp(buffer); messageAddLine(m, buffer); } while(fgets(buffer, sizeof(buffer), fd) != NULL); fclose(fd); retcode = 0; body = parseEmailHeaders(m, rfc821Table); messageDestroy(m); /* * Write out the last entry in the mailbox */ messageClean(body); if(messageGetBody(body)) if(!parseEmailBody(body, NULL, 0, NULL, dir, rfc821Table, subtypeTable)) retcode = -1; /* * Tidy up and quit */ messageDestroy(body); tableDestroy(rfc821Table); tableDestroy(subtypeTable); cli_dbgmsg("cli_mbox returning %d\n", retcode); return retcode; } /* * The given message contains a raw e-mail. * * This function parses the headers of m and sets the message's arguments * * Returns the message's body with the correct arguments set */ static message * parseEmailHeaders(const message *m, const table_t *rfc821Table) { bool inContinuationHeader = FALSE; bool inHeader = TRUE; text *t, *msgText; message *ret; if(m == NULL) return NULL; msgText = messageToText(m); if(msgText == NULL) return NULL; t = msgText; ret = messageCreate(); do { char *buffer = strdup(t->t_text); #ifdef CL_THREAD_SAFE char *strptr; #endif cli_chomp(buffer); /* * Section B.2 of RFC822 says TAB or SPACE means * a continuation of the previous entry. */ if(inHeader && ((buffer[0] == '\t') || (buffer[0] == ' '))) inContinuationHeader = TRUE; if(inContinuationHeader) { const char *ptr; if(!continuationMarker(buffer)) inContinuationHeader = FALSE; /* no more args */ /* * Add all the arguments on the line */ for(ptr = strtok_r(buffer, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr)) messageAddArgument(ret, ptr); } else if(inHeader) { cli_dbgmsg("Deal with header %s\n", buffer); /* * A blank line signifies the end of the header and * the start of the text */ if(strstrip(buffer) == 0) { cli_dbgmsg("End of header information\n"); inHeader = FALSE; } else if(parseEmailHeader(ret, buffer, rfc821Table) == CONTENT_TYPE) inContinuationHeader = continuationMarker(buffer); } else messageAddLine(ret, buffer); free(buffer); } while((t = t->t_next) != NULL); textDestroy(msgText); return ret; } /* * Handle a header line of an email message * TODO: handle spaces before the ':' */ static int parseEmailHeader(message *m, const char *line, const table_t *rfc821Table) { char *copy = strdup(line); char *cmd; int ret = -1; #ifdef CL_THREAD_SAFE char *strptr; #endif cmd = strtok_r(copy, " \t", &strptr); if(*cmd) { char *arg = strtok_r(NULL, "", &strptr); if(arg) /* * Found a header such as * Content-Type: multipart/mixed; * set arg to be * "multipart/mixed" and cmd to * be "Content-Type:" */ ret = parseMimeHeader(m, cmd, rfc821Table, arg); else { /* * Handle the case where the * header does not have a space * after the ':', e.g. * Content-Type:multipart/mixed; */ arg = strchr(cmd, ':'); if(arg && (*++arg != '\0')) { char *p; cmd = strdup(cmd); p = strchr(cmd, ':'); *++p = '\0'; ret = parseMimeHeader(m, cmd, rfc821Table, arg); free(cmd); } } } free(copy); return ret; } /* * This is a recursive routine. * * This function parses the body of mainMessage and saves its attachments in dir * * mainMessage is the buffer to be parsed, it contains an e-mail's body, without * any headers. First * time of calling it'll be * the whole message. Later it'll be parts of a multipart message * textIn is the plain text message being built up so far * blobsIn contains the array of attachments found so far * * Returns: * 0 for fail * 1 for success, attachments saved * 2 for success, attachments not saved */ static int /* success or fail */ parseEmailBody(message *mainMessage, blob **blobsIn, int nBlobs, text *textIn, const char *dir, table_t *rfc821Table, table_t *subtypeTable) { message *messages[MAXALTERNATIVE]; int inhead, inMimeHead, i, rc, htmltextPart, multiparts = 0; text *aText; blob *blobList[MAX_ATTACHMENTS], **blobs; const char *cptr; cli_dbgmsg("in parseEmailBody(nBlobs = %d)\n", nBlobs); /* Pre-assertions */ if(nBlobs >= MAX_ATTACHMENTS) { cli_warnmsg("Not all attachments will be scanned\n"); return 2; } aText = textIn; blobs = blobsIn; /* Anything left to be parsed? */ if(mainMessage && (messageGetBody(mainMessage) != NULL)) { int numberOfAttachments = 0; mime_type mimeType; const char *mimeSubtype; const text *t_line; /*bool isAlternative;*/ const char *boundary; message *aMessage; cli_dbgmsg("Parsing mail file\n"); mimeType = messageGetMimeType(mainMessage); mimeSubtype = messageGetMimeSubtype(mainMessage); if((mimeType == TEXT) && (tableFind(subtypeTable, mimeSubtype) == PLAIN)) { /* * This is effectively no encoding, notice that we * don't check that charset is us-ascii */ cli_dbgmsg("assume no encoding\n"); mimeType = NOMIME; } cli_dbgmsg("mimeType = %d\n", mimeType); switch(mimeType) { case NOMIME: aText = textAddMessage(aText, mainMessage); break; case TEXT: if(tableFind(subtypeTable, mimeSubtype) == PLAIN) aText = textCopy(messageGetBody(mainMessage)); break; case MULTIPART: assert(mimeSubtype[0] != '\0'); boundary = messageFindArgument(mainMessage, "boundary"); if(boundary == NULL) { cli_warnmsg("Multipart MIME message contains no boundaries\n"); /* Broken e-mail message */ mimeType = NOMIME; /* * The break means that we will still * check if the file contains a uuencoded file */ break; } /* * Get to the start of the first message */ for(t_line = messageGetBody(mainMessage); t_line; t_line = t_line->t_next) if(boundaryStart(t_line->t_text, boundary)) break; if(t_line == NULL) { cli_warnmsg("Multipart MIME message contains no parts\n"); /* * Free added by Thomas Lamy * */ free((char *)boundary); mimeType = NOMIME; /* * The break means that we will still * check if the file contains a uuencoded file */ break; } /* * Build up a table of all of the parts of this * multipart message. Remember, each part may itself * be a multipart message. */ inhead = 1; inMimeHead = 0; /* * This looks like parseEmailHeaders() - maybe there's * some duplication of code to be cleaned up */ for(multiparts = 0; t_line && (multiparts < MAXALTERNATIVE); multiparts++) { aMessage = messages[multiparts] = messageCreate(); cli_dbgmsg("Now read in part %d\n", multiparts); /* tk: shut up parentheses warning */ while((t_line = t_line->t_next)) { const char *line = t_line->t_text; /*cli_dbgmsg("inMimeHead %d inhead %d boundary %s line '%s' next '%s'\n", inMimeHead, inhead, boundary, line, t_line->t_next ? t_line->t_next->t_text : "(null)");*/ if(inMimeHead) { cli_dbgmsg("About to add mime Argument '%s'\n", line); while(isspace((int)*line)) line++; if(*line == '\0') { inhead = inMimeHead = 0; continue; } /* * This may cause a trailing ';' * to be added if this test * fails - TODO: verify this */ inMimeHead = continuationMarker(line); messageAddArgument(aMessage, line); } else if(inhead) { if(strlen(line) == 0) { inhead = 0; continue; } /* * Some clients are broken and * put white space after the ; */ inMimeHead = continuationMarker(line); if(!inMimeHead) if(t_line->t_next && ((t_line->t_next->t_text[0] == '\t') || (t_line->t_next->t_text[0] == ' '))) inMimeHead = TRUE; parseEmailHeader(aMessage, line, rfc821Table); } else if(boundaryStart(line, boundary)) { inhead = 1; break; } else if(endOfMessage(line, boundary)) { /* * Some viruses put information * *after* the end of message, * which presumably some broken * mail clients find, so we * can't assume that this * is the end of the message */ /* t_line = NULL;*/ break; } else messageAddLine(aMessage, line); } messageClean(aMessage); } free((char *)boundary); if(multiparts == 0) return 2; /* Nothing to do */ cli_dbgmsg("The message has %d parts\n", multiparts); cli_dbgmsg("Find out the multipart type(%s)\n", mimeSubtype); switch(tableFind(subtypeTable, mimeSubtype)) { case RELATED: cli_dbgmsg("Multipart related handler\n"); /* * Have a look to see if there's HTML code * which will need scanning */ aMessage = NULL; assert(multiparts > 0); htmltextPart = getTextPart(messages, multiparts); if(htmltextPart >= 0) aText = textAddMessage(aText, messages[htmltextPart]); else /* * There isn't an HTML bit. If there's a * multipart bit, it'll may be in there * somewhere */ for(i = 0; i < multiparts; i++) if(messageGetMimeType(messages[i]) == MULTIPART) { aMessage = messages[i]; htmltextPart = i; break; } if(htmltextPart == -1) { cli_dbgmsg("No HTML code found to be scanned"); rc = 0; } else rc = parseEmailBody(aMessage, blobs, nBlobs, aText, dir, rfc821Table, subtypeTable); blobArrayDestroy(blobs, nBlobs); blobs = NULL; nBlobs = 0; /* * Fixed based on an idea from Stephen White * The message is confused about the difference * between alternative and related. Badtrans.B * suffers from this problem. * * Fall through in this case: * Content-Type: multipart/related; * type="multipart/alternative" */ /* * Changed to always fall through based on * an idea from Michael Dankov * that some viruses are completely confused * about the difference between related * and mixed */ /*cptr = messageFindArgument(mainMessage, "type"); if(cptr == NULL) break; isAlternative = (bool)(strcasecmp(cptr, "multipart/alternative") == 0); free((char *)cptr); if(!isAlternative) break;*/ case ALTERNATIVE: cli_dbgmsg("Multipart alternative handler\n"); htmltextPart = getTextPart(messages, multiparts); if(htmltextPart == -1) htmltextPart = 0; aMessage = messages[htmltextPart]; aText = textAddMessage(aText, aMessage); rc = parseEmailBody(NULL, blobs, nBlobs, aText, dir, rfc821Table, subtypeTable); if(rc == 1) { /* * Alternative message has saved its * attachments, ensure we don't do * the same thing */ blobArrayDestroy(blobs, nBlobs); blobs = NULL; nBlobs = 0; rc = 2; } /* * Fall through - some clients are broken and * say alternative instead of mixed. The Klez * virus is broken that way */ case REPORT: /* * According to section 1 of RFC1892, the * syntax of multipart/report is the same * as multipart/mixed. There are some required * parameters, but there's no need for us to * verify that they exist */ case MIXED: case APPLEDOUBLE: /* not really supported */ /* * Look for attachments * * Not all formats are supported. If an * unsupported format turns out to be * common enough to implement, it is a simple * matter to add it */ if(aText) mainMessage = NULL; cli_dbgmsg("Mixed message with %d parts\n", multiparts); for(i = 0; i < multiparts; i++) { bool addAttachment = FALSE; bool addToText = FALSE; const char *dtype; text *t; message *body; aMessage = messages[i]; assert(aMessage != NULL); dtype = messageGetDispositionType(aMessage); cptr = messageGetMimeSubtype(aMessage); cli_dbgmsg("Mixed message part %d is of type %d\n", i, messageGetMimeType(aMessage)); switch(messageGetMimeType(aMessage)) { case APPLICATION: #if 0 /* strict checking... */ if((strcasecmp(dtype, "attachment") == 0) || (strcasecmp(cptr, "x-msdownload") == 0) || (strcasecmp(cptr, "octet-stream") == 0) || (strcasecmp(dtype, "octet-stream") == 0)) addAttachment = TRUE; else { cli_dbgmsg("Discarded mixed/application not sent as attachment\n"); continue; } #endif addAttachment = TRUE; break; case NOMIME: mainMessage = NULL; addToText = TRUE; if(messageGetBody(aMessage) == NULL) /* * No plain text version */ messageAddLine(aMessage, "No plain text alternative"); assert(messageGetBody(aMessage) != NULL); break; case TEXT: cli_dbgmsg("Mixed message text part disposition \"%s\"\n", dtype); if(strcasecmp(dtype, "attachment") == 0) addAttachment = TRUE; else if((*dtype == '\0') || (strcasecmp(dtype, "inline") == 0)) { const text *t_line = uuencodeBegin(aMessage); mainMessage = NULL; if(t_line) { cli_dbgmsg("Found uuencoded message in multipart/mixed text portion\n"); messageSetEncoding(aMessage, "x-uuencode"); addAttachment = TRUE; } else if(strcasecmp(messageGetMimeSubtype(aMessage), "plain") == 0) { /* * Strictly speaking * a text/html part is * not an attachment. We * pretend it is so that * we can decode and * scan it */ cli_dbgmsg("Adding part to main message\n"); addToText = TRUE; } else { messageAddArgument(aMessage, "filename=textportion"); addAttachment = TRUE; } } else { cli_dbgmsg("Text type %s is not supported", dtype); continue; } break; case MESSAGE: cli_dbgmsg("Found message inside multipart\n"); body = parseEmailHeaders(aMessage, rfc821Table); if(body) { rc = parseEmailBody(body, blobs, nBlobs, NULL, dir, rfc821Table, subtypeTable); messageDestroy(body); } continue; case MULTIPART: /* * It's a multi part within a multi part * Run the message parser on this bit, it won't * be an attachment * */ cli_dbgmsg("Found multipart inside multipart\n"); if(aMessage) { body = parseEmailHeaders(aMessage, rfc821Table); if(body) { t = messageToText(body); rc = parseEmailBody(body, blobs, nBlobs, t, dir, rfc821Table, subtypeTable); textDestroy(t); mainMessage = body; } } else { rc = parseEmailBody(NULL, blobs, nBlobs, NULL, dir, rfc821Table, subtypeTable); mainMessage = NULL; } continue; case AUDIO: case IMAGE: /* * TODO: it may be nice to * have an option to throw * away all images and sound * files for ultra-secure sites */ addAttachment = TRUE; break; default: cli_dbgmsg("Only text and application attachments are supported, type = %d\n", messageGetMimeType(aMessage)); continue; } /* * It must be either text or * an attachment. It can't be both */ assert(addToText || addAttachment); assert(!(addToText && addAttachment)); if(addToText) aText = textAdd(aText, messageGetBody(aMessage)); else if(addAttachment) { blob *aBlob = messageToBlob(aMessage); if(aBlob) { assert(blobGetFilename(aBlob) != NULL); /*if(blobGetDataSize(aBlob) > 0)*/ blobList[numberOfAttachments++] = aBlob; } } } if(numberOfAttachments == 0) { /* No usable attachment was found */ rc = parseEmailBody(NULL, NULL, 0, aText, dir, rfc821Table, subtypeTable); break; } /* * Store any existing attachments at the end of * the list we've just built up */ for(i = 0; i < nBlobs; i++) { #ifdef CL_DEBUG assert(blobs[i]->magic == BLOB); #endif blobList[numberOfAttachments++] = blobs[i]; } rc = parseEmailBody(mainMessage, blobList, numberOfAttachments, aText, dir, rfc821Table, subtypeTable); break; case DIGEST: /* * TODO: * According to section 5.1.5 RFC2046, the * default mime type of multipart/digest parts * is message/rfc822 */ case SIGNED: case PARALLEL: /* * If we're here it could be because we have a * multipart/mixed message, consisting of a * message followed by an attachment. That * message itself is a multipart/alternative * message and we need to dig out the plain * text part of that alternative */ htmltextPart = getTextPart(messages, multiparts); if(htmltextPart == -1) htmltextPart = 0; rc = parseEmailBody(messages[htmltextPart], blobs, nBlobs, aText, dir, rfc821Table, subtypeTable); blobArrayDestroy(blobs, nBlobs); blobs = NULL; nBlobs = 0; break; default: /* * According to section 7.2.6 of RFC1521, * unrecognised multiparts should be treated as * multipart/mixed. I don't do this yet so * that I can see what comes along... */ cli_warnmsg("Unsupported multipart format `%s'\n", mimeSubtype); rc = 0; } for(i = 0; i < multiparts; i++) messageDestroy(messages[i]); if(blobs && (blobsIn == NULL)) puts("arraydestroy"); if(aText && (textIn == NULL)) textDestroy(aText); return rc; case MESSAGE: /* * Check for forbidden encodings */ switch(messageGetEncoding(mainMessage)) { case NOENCODING: case EIGHTBIT: case BINARY: break; default: cli_warnmsg("MIME type 'message' cannot be decoded\n"); break; } if((strcasecmp(mimeSubtype, "rfc822") == 0) || (strcasecmp(mimeSubtype, "delivery-status") == 0)) { /* * Found a message encapsulated within * another message * * Thomas Lamy : * ensure t is correctly freed */ text *t, *msgText = messageToText(mainMessage); message *m, *body; t = msgText; assert(t != NULL); m = messageCreate(); assert(m != NULL); cli_dbgmsg("Decode rfc822"); do { char *buffer = strdup(t->t_text); cli_chomp(buffer); messageAddLine(m, buffer); free(buffer); } while((t = t->t_next) != NULL); textDestroy(msgText); body = parseEmailHeaders(m, rfc821Table); messageDestroy(m); m = body; messageClean(m); if(messageGetBody(m)) rc = parseEmailBody(m, NULL, 0, NULL, dir, rfc821Table, subtypeTable); messageDestroy(m); break; } else if(strcasecmp(mimeSubtype, "partial") == 0) /* TODO */ cli_warnmsg("Content-type message/partial not yet supported"); else if(strcasecmp(mimeSubtype, "external-body") == 0) /* * I don't believe that we should be going * around the Internet looking for referenced * files... */ cli_warnmsg("Attempt to send Content-type message/external-body trapped"); else cli_warnmsg("Unsupported message format `%s'\n", mimeSubtype); return 0; case APPLICATION: cptr = messageGetMimeSubtype(mainMessage); /*if((strcasecmp(cptr, "octet-stream") == 0) || (strcasecmp(cptr, "x-msdownload") == 0)) {*/ { blob *aBlob = messageToBlob(mainMessage); if(aBlob) { cli_dbgmsg("Saving main message as attachment %d\n", nBlobs); assert(blobGetFilename(aBlob) != NULL); /* * It's likely that we won't have built * a set of attachments */ if(blobs == NULL) blobs = blobList; for(i = 0; i < nBlobs; i++) if(blobs[i] == NULL) break; blobs[i] = aBlob; if(i == nBlobs) { nBlobs++; assert(nBlobs < MAX_ATTACHMENTS); } } } /*else cli_warnmsg("Discarded application not sent as attachment\n");*/ break; case AUDIO: case VIDEO: case IMAGE: break; default: cli_warnmsg("Message received with unknown mime encoding"); break; } } cli_dbgmsg("%d attachments found\n", nBlobs); rc = 1; if(nBlobs == 0) { blob *b; /* * No attachments - scan the text portions, often files * are hidden in HTML code */ cli_dbgmsg("%d multiparts found\n", multiparts); for(i = 0; i < multiparts; i++) { b = messageToBlob(messages[i]); assert(b != NULL); cli_dbgmsg("Saving multipart %d, encoded with scheme %d\n", i, messageGetEncoding(messages[i])); (void)saveFile(b, dir); blobDestroy(b); } if(mainMessage) { /* * Look for uu-encoded main file */ const text *t_line = uuencodeBegin(mainMessage); if(t_line != NULL) { cli_dbgmsg("Found uuencoded file\n"); /* * Main part contains uuencoded section */ messageSetEncoding(mainMessage, "x-uuencode"); if((b = messageToBlob(mainMessage)) != NULL) { if((cptr = blobGetFilename(b)) != NULL) { cli_dbgmsg("Found uuencoded message %s\n", cptr); (void)saveFile(b, dir); } blobDestroy(b); } } else { cli_dbgmsg("Not found uuencoded file\n"); messageAddArgument(mainMessage, "filename=textportion"); if((b = messageToBlob(mainMessage)) != NULL) { /* * Save main part to scan that */ cli_dbgmsg("Saving main message, encoded with scheme %d\n", messageGetEncoding(mainMessage)); (void)saveFile(b, dir); blobDestroy(b); } } } else rc = (multiparts) ? 1 : 2; /* anything saved? */ } else { short attachmentNumber; for(attachmentNumber = 0; attachmentNumber < nBlobs; attachmentNumber++) { blob *b = blobs[attachmentNumber]; if(b) { if(!saveFile(b, dir)) break; blobDestroy(b); blobs[attachmentNumber] = NULL; } } } if(aText && (textIn == NULL)) textDestroy(aText); /* Already done */ if(blobs && (blobsIn == NULL)) blobArrayDestroy(blobs, nBlobs); cli_dbgmsg("parseEmailBody() returning %d\n", rc); return rc; } /* * Is the current line the start of a new section? * * New sections start with --boundary */ static int boundaryStart(const char *line, const char *boundary) { /* * Gibe.B3 is broken it has: * boundary="---- =_NextPart_000_01C31177.9DC7C000" * but it's boundaries look like * ------ =_NextPart_000_01C31177.9DC7C000 * notice the extra '-' */ if(strstr(line, boundary) != NULL) { cli_dbgmsg("found %s in %s\n", boundary, line); return 1; } if(*line++ != '-') return 0; if(*line++ != '-') return 0; return strcasecmp(line, boundary) == 0; } /* * Is the current line the end? * * The message ends with with --boundary-- */ static int endOfMessage(const char *line, const char *boundary) { size_t len; if(*line++ != '-') return 0; if(*line++ != '-') return 0; len = strlen(boundary); if(strncasecmp(line, boundary, len) != 0) return 0; if(strlen(line) != (len + 2)) return 0; line = &line[len]; if(*line++ != '-') return 0; return *line == '-'; } /* * Initialise the various lookup tables */ static int initialiseTables(table_t **rfc821Table, table_t **subtypeTable) { const struct tableinit *tableinit; /* * Initialise the various look up tables */ *rfc821Table = tableCreate(); assert(*rfc821Table != NULL); for(tableinit = rfc821headers; tableinit->key; tableinit++) if(tableInsert(*rfc821Table, tableinit->key, tableinit->value) < 0) return -1; *subtypeTable = tableCreate(); assert(*subtypeTable != NULL); for(tableinit = mimeSubtypes; tableinit->key; tableinit++) if(tableInsert(*subtypeTable, tableinit->key, tableinit->value) < 0) { tableDestroy(*rfc821Table); return -1; } return 0; } /* * If there's a HTML text version use that, otherwise * use the first text part, otherwise just use the * first one around. HTML text is most likely to include * a scripting worm * * If we can't find one, return -1 */ static int getTextPart(message *const messages[], size_t size) { size_t i; for(i = 0; i < size; i++) { assert(messages[i] != NULL); if((messageGetMimeType(messages[i]) == TEXT) && (strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0)) return (int)i; } for(i = 0; i < size; i++) if(messageGetMimeType(messages[i]) == TEXT) return (int)i; return -1; } /* * strip - * Remove the trailing spaces from a buffer * Returns it's new length (a la strlen) * * len must be int not size_t because of the >= 0 test, it is sizeof(buf) * not strlen(buf) */ static size_t strip(char *buf, int len) { register char *ptr; register size_t i; if((buf == NULL) || (len <= 0)) return(0); i = strlen(buf); if(len > (int)(i + 1)) return(i); ptr = &buf[--len]; #if defined(UNIX) || defined(C_LINUX) || defined(C_DARWIN) /* watch - it may be in shared text area */ do if(*ptr) *ptr = '\0'; while((--len >= 0) && !isgraph(*--ptr) && (*ptr != '\n') && (*ptr != '\r')); #else /* more characters can be displayed on DOS */ do #ifndef REAL_MODE_DOS if(*ptr) /* C8.0 puts into a text area */ #endif *ptr = '\0'; while((--len >= 0) && ((*--ptr == '\0') || (isspace((int)*ptr)))); #endif return((size_t)(len + 1)); } /* * strstrip: * Strip a given string */ static size_t strstrip(char *s) { if(s == (char *)NULL) return(0); return(strip(s, strlen(s) + 1)); } /* * When parsing a MIME header see if this spans more than one line. A * semi-colon at the end of the line indicates that the MIME information * is continued on the next line. * * Some clients are broken and put white space after the ; */ static bool continuationMarker(const char *line) { const char *ptr; assert(line != NULL); #ifdef CL_DEBUG cli_dbgmsg("continuationMarker(%s)\n", line); #endif if(strlen(line) == 0) return FALSE; ptr = strchr(line, '\0'); assert(ptr != NULL); while(ptr > line) switch(*--ptr) { case '\n': case '\r': case ' ': case '\t': continue; case ';': return TRUE; default: return FALSE; } return FALSE; } static int parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg) { int type = tableFind(rfc821Table, cmd); #ifdef CL_THREAD_SAFE char *strptr; #endif char *copy = strdup(arg); char *ptr = copy; cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg); strstrip(copy); switch(type) { case CONTENT_TYPE: /* * Fix for non RFC1521 compliant mailers * that send content-type: Text instead * of content-type: Text/Plain, or * just simply "Content-Type:" */ if(arg == NULL) cli_warnmsg("Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii\n"); else if(strchr(copy, '/') == NULL) cli_warnmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", copy); else { /* * Some clients are broken and * put white space after the ; */ strstrip(copy); if(*arg == '/') { cli_warnmsg("Content-type '/' received, assuming application/octet-stream\n"); messageSetMimeType(m, "application"); messageSetMimeSubtype(m, "octet-stream"); strtok_r(copy, ";", &strptr); } else { char *s; messageSetMimeType(m, strtok_r(copy, "/", &strptr)); /* * Stephen White * Some clients put space after * the mime type but before * the ; */ s = strtok_r(NULL, ";", &strptr); strstrip(s); messageSetMimeSubtype(m, s); } /* * Add in all the arguments. */ while((copy = strtok_r(NULL, "\r\n \t", &strptr))) messageAddArgument(m, copy); } break; case CONTENT_TRANSFER_ENCODING: messageSetEncoding(m, copy); break; case CONTENT_DISPOSITION: messageSetDispositionType(m, strtok_r(copy, ";", &strptr)); messageAddArgument(m, strtok_r(NULL, "\r\n", &strptr)); } free(ptr); return type; } /* * Save some data as a unique file in the given directory. */ static bool saveFile(const blob *b, const char *dir) { unsigned long nbytes = blobGetDataSize(b); size_t len = 0; int fd; const char *cptr, *suffix; char filename[NAME_MAX + 1]; assert(dir != NULL); if(nbytes == 0) return TRUE; cptr = blobGetFilename(b); if(cptr == NULL) { cptr = "unknown"; suffix = ""; } else { /* * Some programs are broken and use an idea of a ".suffix" * to determine the file type rather than looking up the * magic number. CPM has a lot to answer for... * FIXME: the suffix now appears twice in the filename... */ suffix = strrchr(cptr, '.'); if(suffix == NULL) suffix = ""; else len = strlen(suffix); } cli_dbgmsg("Saving attachment in %s/%s\n", dir, cptr); /* * Allow for very long filenames. We have to truncate them to fit */ snprintf(filename, sizeof(filename) - 1 - len, "%s/%.*sXXXXXX", dir, (int)(sizeof(filename) - 9 - len - strlen(dir)), cptr); /* * TODO: add a HAVE_MKSTEMP property */ #if defined(C_LINUX) || defined(C_BSD) || defined(HAVE_MKSTEMP) fd = mkstemp(filename); #else (void)mktemp(filename); fd = open(filename, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC, 0600); #endif if(fd < 0) { cli_errmsg("Can't create temporary file %s: %s\n", filename, strerror(errno)); return FALSE; } /* * Add the suffix back to the end of the filename. Tut-tut, filenames * should be independant of their usage on UNIX type systems. */ if(len > 1) { char stub[NAME_MAX + 1]; snprintf(stub, sizeof(stub), "%s%s", filename, suffix); #ifdef C_LINUX rename(stub, filename); #else link(stub, filename); unlink(stub); #endif } write(fd, blobGetData(b), (size_t)nbytes); cli_dbgmsg("Attachment saved as %s (%lu bytes long)\n", filename, nbytes); return (close(fd) >= 0); }