GitList

clamav-devel/libclamav/mbox.c

e3aaff8e	/* * Copyright (C) 2002 Nigel Horne <njh@bandsman.co.uk> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
3eaab452	* * Change History: * $Log: mbox.c,v $
f1c1300c	* Revision 1.163 2004/10/31 09:28:56 nigelhorne * Handle unbalanced quotes in multipart headers *
c79a2273	* Revision 1.162 2004/10/24 04:35:15 nigelhorne * Handle multipart/knowbot as multipart/mixed *
fdc6adbe	* Revision 1.161 2004/10/21 10:18:40 nigelhorne * PARTIAL: readdir_r even more options :-( *
a0b21816	* Revision 1.160 2004/10/21 09:41:07 nigelhorne * PARTIAL: add readdir_r fix to BeOS *
13462674	* Revision 1.159 2004/10/20 10:35:41 nigelhorne * Partial mode: fix possible stack corruption with Solaris *
a7398c9c	* Revision 1.158 2004/10/17 09:29:21 nigelhorne * Advise to report broken emails *
0356cdc0	* Revision 1.157 2004/10/16 20:53:28 nigelhorne * Tidy up *
6e5d95eb	* Revision 1.156 2004/10/16 19:09:39 nigelhorne * Handle BeMail (BeOS) files *
1815e490	* Revision 1.155 2004/10/16 17:23:04 nigelhorne * Handle colons in quotes in headers *
15033cb6	* Revision 1.154 2004/10/16 09:01:05 nigelhorne * Improved handling of wraparound headers *
4de5fffd	* Revision 1.153 2004/10/14 21:18:49 nigelhorne * Harden the test for RFC2047 encoded headers *
cf569541	* Revision 1.152 2004/10/14 17:45:13 nigelhorne * RFC2047 on long lines produced by continuation headers *
3a0f75c6	* Revision 1.151 2004/10/10 11:10:20 nigelhorne * Remove perror - replace with cli_errmsg *
1d3d7dd9	* Revision 1.150 2004/10/09 08:01:37 nigelhorne * Needs libcurl >= 7.11 *
5a15955b	* Revision 1.149 2004/10/06 17:21:30 nigelhorne * Fix RFC2298 handling broken by RFC1341 code *
f10460ed	* Revision 1.148 2004/10/05 15:41:53 nigelhorne * First draft of code to handle RFC1341 *
9a729c80	* Revision 1.147 2004/10/04 12:18:09 nigelhorne * Better warning message about PGP attachments not being scanned *
1b00d9a4	* Revision 1.146 2004/10/04 10:52:39 nigelhorne * Better error message on RFC2047 decode error *
7c5a7a47	* Revision 1.145 2004/10/01 13:49:22 nigelhorne * Minor code tidy *
22080fa5	* Revision 1.144 2004/10/01 07:55:36 nigelhorne * Better error message on message/partial *
2c7d1edd	* Revision 1.143 2004/09/30 21:47:35 nigelhorne * Removed unneeded strdups *
c77c8809	* Revision 1.142 2004/09/28 18:40:12 nigelhorne * Use stack rather than heap where possible *
ba867aed	* Revision 1.141 2004/09/23 08:43:25 nigelhorne * Scan multipart/digest messages *
00d46ae6	* Revision 1.140 2004/09/22 16:09:51 nigelhorne * Build if CURLOPT_DNS_USE_GLOBAL_CACHE isn't supported *
12f3689d	* Revision 1.139 2004/09/22 15:49:13 nigelhorne * Handle RFC2298 messages *
cd483c9b	* Revision 1.138 2004/09/22 15:21:50 nigelhorne * Fix typo *
15f4aa67	* Revision 1.137 2004/09/21 20:47:38 nigelhorne * FOLLOWURL: Set a default username and password for password protected pages *
7e492164	* Revision 1.136 2004/09/21 12:18:52 nigelhorne * Fallback to CURLOPT_FILE if CURLOPT_WRITEDATA isn't defined *
548a5f96	* Revision 1.135 2004/09/21 08:14:00 nigelhorne * Now compiles in machines with libcurl but without threads *
59da5a4f	* Revision 1.134 2004/09/20 17:08:43 nigelhorne * Some performance enhancements *
8037334b	* Revision 1.133 2004/09/20 12:44:03 nigelhorne * Fix parsing error on mime arguments *
f017fbdd	* Revision 1.132 2004/09/20 08:31:56 nigelhorne * FOLLOWURLS now compiled if libcurl is found *
767f16ab	* Revision 1.131 2004/09/18 14:59:25 nigelhorne * Code tidy *
6d312569	* Revision 1.130 2004/09/17 10:56:29 nigelhorne * Handle multiple content-type headers and use the most likely *
53bfac08	* Revision 1.129 2004/09/17 09:48:53 nigelhorne * Handle attempts to hide mime type *
a9714c49	* Revision 1.128 2004/09/17 09:09:44 nigelhorne * Better handling of RFC822 comments *
50df4118	* Revision 1.127 2004/09/16 18:00:43 nigelhorne * Handle RFC2047 *
0960ff5e	* Revision 1.126 2004/09/16 14:23:57 nigelhorne * Handle quotes around mime type *
97867f21	* Revision 1.125 2004/09/16 12:59:36 nigelhorne * Handle = and space as header separaters *
1eec55a6	* Revision 1.124 2004/09/16 11:20:33 nigelhorne * Better handling of folded headers in multipart messages *
8b3563f2	* Revision 1.123 2004/09/16 08:56:19 nigelhorne * Handle RFC822 Comments *
3499d81e	* Revision 1.122 2004/09/15 22:09:26 nigelhorne * Handle spaces before colons *
c7b69776	* Revision 1.121 2004/09/15 18:08:23 nigelhorne * Handle multiple encoding types *
d77c655d	* Revision 1.120 2004/09/15 08:47:07 nigelhorne * Cleaner way to initialise hrefs *
15021325	* Revision 1.119 2004/09/14 20:47:28 nigelhorne * Use new normalise code *
90bb9c3e	* Revision 1.118 2004/09/14 12:09:37 nigelhorne * Include old normalise code *
3805ebcb	* Revision 1.117 2004/09/13 16:44:01 kojm * minor cleanup *
b143af46	* Revision 1.116 2004/09/13 13:16:28 nigelhorne * Return CL_EFORMAT on bad format *
ef822cfc	* Revision 1.115 2004/09/06 11:02:08 nigelhorne * Normalise HTML before scanning for URLs to download *
28010d29	* Revision 1.114 2004/09/03 15:59:00 nigelhorne * Handle boundary= "foo" *
69543a9d	* Revision 1.113 2004/08/26 09:33:20 nigelhorne * Scan Communigate Pro files *
09e05292	* Revision 1.112 2004/08/23 13:15:16 nigelhorne * messageClearMarkers *
b0b860f1	* Revision 1.111 2004/08/22 20:20:14 nigelhorne * Tidy *
78e302e1	* Revision 1.110 2004/08/22 15:08:59 nigelhorne * messageExport *
0e5a0129	* Revision 1.109 2004/08/22 10:34:24 nigelhorne * Use fileblob *
b2223aad	* Revision 1.108 2004/08/21 11:57:57 nigelhorne * Use line.[ch] *
1df4cbc7	* Revision 1.107 2004/08/20 04:55:07 nigelhorne * FOLLOWURL *
ab3107bc	* Revision 1.106 2004/08/20 04:53:18 nigelhorne * Tidy up *
f121cb96	* Revision 1.105 2004/08/18 21:35:08 nigelhorne * Multithread the FollowURL calls *
55a3f03b	* Revision 1.104 2004/08/18 15:53:43 nigelhorne * Honour CL_MAILURL *
38cf81a6	* Revision 1.103 2004/08/18 10:49:45 nigelhorne * CHECKURLs was mistakenly turned on *
65684cec	* Revision 1.102 2004/08/18 07:45:20 nigelhorne * Use configure WITH_CURL value *
393a6d67	* Revision 1.101 2004/08/17 08:28:32 nigelhorne * Support multitype/fax-message *
87c9313e	* Revision 1.100 2004/08/12 10:36:09 nigelhorne * LIBCURL completed *
9558d802	* Revision 1.99 2004/08/11 15:28:39 nigelhorne * No longer needs curl.h *
4f1d0bfc	* Revision 1.98 2004/08/11 14:46:22 nigelhorne * Better handling of false positive emails *
928579ad	* Revision 1.97 2004/08/10 14:02:22 nigelhorne * * empty log message * *
9b4bb8b7	* Revision 1.96 2004/08/10 08:14:00 nigelhorne * Enable CHECKURL *
bbd2d959	* Revision 1.95 2004/08/09 21:37:21 kojm * libclamav: add new option CL_MAILURL *
6eedb434	* Revision 1.94 2004/08/09 08:26:36 nigelhorne * Thread safe checkURL *
71ba1dcd	* Revision 1.93 2004/08/08 21:30:47 nigelhorne * First draft of CheckURL *
ebfb4048	* Revision 1.92 2004/08/08 19:13:14 nigelhorne * Better handling of bounces *
f12d2498	* Revision 1.91 2004/08/04 18:59:19 nigelhorne * Tidy up multipart handling *
1a74d4df	* Revision 1.90 2004/07/26 17:02:56 nigelhorne * Fix crash when debugging on SPARC *
f7fa3820	* Revision 1.89 2004/07/26 09:12:12 nigelhorne * Fix crash when debugging on Solaris *
0c0894b8	* Revision 1.88 2004/07/20 14:35:29 nigelhorne * Some MYDOOM.I were getting through *
8000d078	* Revision 1.87 2004/07/19 17:54:40 kojm * Use new patter matching algorithm. Cleanup. *
0e4e16d4	* Revision 1.86 2004/07/06 09:32:45 nigelhorne * Better handling of Gibe.3 boundary exploit *
b9ce9639	* Revision 1.85 2004/06/30 19:48:58 nigelhorne * Some TR.Happy99.SKA were getting through *
3f3f9085	* Revision 1.84 2004/06/30 14:30:40 nigelhorne * Fix compilation error on Solaris *
f2f25418	* Revision 1.83 2004/06/28 11:44:45 nigelhorne * Remove empty parts *
f73920a4	* Revision 1.82 2004/06/25 13:56:38 nigelhorne * Optimise messages without other messages encapsulated within them *
21631591	* Revision 1.81 2004/06/24 21:36:38 nigelhorne * Plug memory leak with large number of attachments *
d79597e3	* Revision 1.80 2004/06/23 16:23:25 nigelhorne * Further empty line optimisation *
02927896	* Revision 1.79 2004/06/22 04:08:01 nigelhorne * Optimise empty lines *
640ed140	* Revision 1.78 2004/06/21 10:21:19 nigelhorne * Fix crash when a multipart/mixed message contains many parts that need to be scanned as attachments *
b726511f	* Revision 1.77 2004/06/18 10:07:12 nigelhorne * Allow any number of alternatives in multipart messages *
e2875303	* Revision 1.76 2004/06/16 08:07:39 nigelhorne * Added thread safety *
cb5a87e0	* Revision 1.75 2004/06/14 09:07:10 nigelhorne * Handle spam using broken e-mail generators for multipart/alternative *
51fc2aa8	* Revision 1.74 2004/06/09 18:18:59 nigelhorne * Find uuencoded viruses in multipart/mixed that have no start of message boundaries *
d46678ed	* Revision 1.73 2004/05/14 08:15:55 nigelhorne * Use mkstemp on cygwin *
187061d8	* Revision 1.72 2004/05/12 11:20:37 nigelhorne * More bounce message false positives handled *
a7527b1f	* Revision 1.71 2004/05/10 11:35:11 nigelhorne * No need to update mbox.c for cli_filetype problem
ba888390	*
2f4737ed	* Revision 1.69 2004/05/06 11:26:49 nigelhorne * Force attachments marked as RFC822 messages to be scanned *
f0627588	* Revision 1.68 2004/04/29 08:59:24 nigelhorne * Tidied up SetDispositionType *
aeca3893	* Revision 1.67 2004/04/23 10:47:41 nigelhorne * If an inline text portion has a filename treat is as an attachment *
f35bc674	* Revision 1.66 2004/04/14 08:32:21 nigelhorne * When debugging print the email number in mailboxes *
3a978f7d	* Revision 1.65 2004/04/07 18:18:07 nigelhorne * Some occurances of W97M.Lexar were let through *
93d41ee4	* Revision 1.64 2004/04/05 09:32:20 nigelhorne * Added SCAN_TO_DISC define *
12f5aef2	* Revision 1.63 2004/04/01 15:32:34 nigelhorne * Graceful exit if messageAddLine fails in strdup *
7c1eb3bf	* Revision 1.62 2004/03/31 17:00:20 nigelhorne * Code tidy up free memory earlier *
1bfbedd4	* Revision 1.61 2004/03/30 22:45:13 nigelhorne * Better handling of multipart/multipart messages *
d879a7b0	* Revision 1.60 2004/03/29 09:22:03 nigelhorne * Tidy up code and reduce shuffling of data *
4d825c09	* Revision 1.59 2004/03/26 11:08:36 nigelhorne * Use cli_writen *
c81143fc	* Revision 1.58 2004/03/25 22:40:46 nigelhorne * Removed even more calls to realloc and some duplicated code *
61485e09	* Revision 1.57 2004/03/21 17:19:49 nigelhorne * Handle bounce messages with no headers *
ae3bda56	* Revision 1.56 2004/03/21 09:41:26 nigelhorne * Faster scanning for non MIME messages *
891d6e39	* Revision 1.55 2004/03/20 17:39:23 nigelhorne * First attempt to handle all bounces *
ef704fb3	* Revision 1.54 2004/03/19 15:40:45 nigelhorne * Handle empty content-disposition types *
705e985c	* Revision 1.53 2004/03/19 08:08:02 nigelhorne * If a message part of a multipart contains an RFC822 message that has no encoding don't scan it *
5c1150ac	* Revision 1.52 2004/03/18 21:51:41 nigelhorne * If a message only contains a single RFC822 message that has no encoding don't save for scanning *
c693116d	* Revision 1.51 2004/03/17 19:48:12 nigelhorne * Improved embedded RFC822 message handling *
e17491b2	* Revision 1.50 2004/03/10 22:05:39 nigelhorne * Fix seg fault when a message in a multimessage mailbox fails to scan *
97e8ea68	* Revision 1.49 2004/03/04 13:01:58 nigelhorne * Ensure all bounces are rescanned by cl_mbox *
c19dc6cd	* Revision 1.48 2004/02/27 12:16:26 nigelhorne * Catch lines just containing ':' *
a9f386ed	* Revision 1.47 2004/02/23 10:13:08 nigelhorne * Handle spaces before : in headers *
d5f16694	* Revision 1.46 2004/02/18 13:29:19 nigelhorne * Stop buffer overflows for files with very long suffixes *
56ae62e2	* Revision 1.45 2004/02/18 10:07:40 nigelhorne * Find some Yaha *
8ef734d4	* Revision 1.44 2004/02/15 08:45:54 nigelhorne * Avoid scanning the same file twice *
20d3dde9	* Revision 1.43 2004/02/14 19:04:05 nigelhorne * Handle spaces in boundaries *
bac883ff	* Revision 1.42 2004/02/14 17:23:45 nigelhorne * Had deleted O_BINARY by mistake *
d8f615d7	* Revision 1.41 2004/02/12 18:43:58 nigelhorne * Use mkstemp on Solaris *
547b89de	* Revision 1.40 2004/02/11 08:15:59 nigelhorne * Use O_BINARY for cygwin *
6d6e8271	* Revision 1.39 2004/02/06 13:46:08 kojm * Support for clamav-config.h *
a7e8f192	* Revision 1.38 2004/02/04 13:29:48 nigelhorne * Handle partial writes - and print when write fails *
8ba634a9	* Revision 1.37 2004/02/03 22:54:59 nigelhorne * Catch another example of Worm.Dumaru.Y *
c76810dc	* Revision 1.36 2004/02/02 09:52:57 nigelhorne * Some instances of Worm.Dumaru.Y got through the net *
cca4efe4	* Revision 1.35 2004/01/28 10:15:24 nigelhorne * Added support to scan some bounce messages *
6b8999f0	* Revision 1.34 2004/01/24 17:43:37 nigelhorne * Removed (incorrect) warning about uninitialised variable *
ad9c6836	* Revision 1.33 2004/01/23 10:38:22 nigelhorne * Fixed memory leak in handling some multipart messages *
ab74690c	* Revision 1.32 2004/01/23 08:51:19 nigelhorne * Add detection of uuencoded viruses in single part multipart/mixed files *
9d2797b6	* Revision 1.31 2004/01/22 22:13:06 nigelhorne * Prevent infinite recursion on broken uuencoded files *
4c60b74f	* Revision 1.30 2004/01/13 10:12:05 nigelhorne * Remove duplicate code when handling multipart messages *
3f07cba4	* Revision 1.29 2004/01/09 18:27:11 nigelhorne * ParseMimeHeader could corrupt arg *
2625d6a0	* Revision 1.28 2004/01/09 15:07:42 nigelhorne * Re-engineered update 1.11 lost in recent changes *
303f9be9	* Revision 1.27 2004/01/09 14:45:59 nigelhorne * Removed duplicated code in multipart handler *
d110fe1c	* Revision 1.26 2004/01/09 10:20:54 nigelhorne * Locate uuencoded viruses hidden in text poritions of multipart/mixed mime messages *
d2a47316	* Revision 1.25 2004/01/06 14:41:18 nigelhorne * Handle headers which do not not have a space after the ':' *
89670d69	* Revision 1.24 2003/12/20 13:55:36 nigelhorne * Ensure multipart just save the bodies of attachments *
e06d34dc	* Revision 1.23 2003/12/14 18:07:01 nigelhorne * Some viruses in embedded messages were not being found *
5e8a92bb	* Revision 1.22 2003/12/13 16:42:23 nigelhorne * call new cli_chomp *
7e577f26	* Revision 1.21 2003/12/11 14:35:48 nigelhorne * Better handling of encapsulated messages *
98cb5cba	* Revision 1.20 2003/12/06 04:03:26 nigelhorne * Handle hand crafted emails that incorrectly set multipart headers *
e2e7ebf5	* Revision 1.19 2003/11/21 07:26:31 nigelhorne * Scan multipart alternatives that have no boundaries, finds some uuencoded happy99 *
7d34e010	* Revision 1.18 2003/11/17 08:13:21 nigelhorne * Handle spaces at the end of lines of MIME headers *
aa0210b6	* Revision 1.17 2003/11/06 05:06:42 nigelhorne * Some applications weren't being scanned *
294d0774	* Revision 1.16 2003/11/04 08:24:00 nigelhorne * Handle multipart messages that have no text portion *
c2b2d8af	* Revision 1.15 2003/10/12 20:13:49 nigelhorne * Use NO_STRTOK_R consistent with message.c *
c9b8f252	* Revision 1.14 2003/10/12 12:37:11 nigelhorne * Appledouble encoded EICAR now found *
098d38f1	* Revision 1.13 2003/10/01 09:27:42 nigelhorne * Handle content-type header going over to a new line *
5f0d267f	* Revision 1.12 2003/09/29 17:10:19 nigelhorne * Moved stub from heap to stack since its maximum size is known *
94b6160c	* Revision 1.11 2003/09/29 12:58:32 nigelhorne * Handle Content-Type: /; name="eicar.com" *
3eaab452	* Revision 1.10 2003/09/28 10:06:34 nigelhorne * Compilable under SCO; removed duplicate code with message.c *
e3aaff8e	*/
f1c1300c	static char const rcsid[] = "$Id: mbox.c,v 1.163 2004/10/31 09:28:56 nigelhorne Exp $";
6d6e8271	#if HAVE_CONFIG_H #include "clamav-config.h" #endif
e3aaff8e	#ifndef CL_DEBUG
548a5f96	#define NDEBUG /* map CLAMAV debug onto standard */
e3aaff8e	#endif #ifdef CL_THREAD_SAFE
98cb5cba	#ifndef _REENTRANT
e3aaff8e	#define _REENTRANT /* for Solaris 2.8 */ #endif
98cb5cba	#endif
e3aaff8e	#include <stdio.h> #include <stdlib.h> #include <errno.h> #include <assert.h> #include <string.h> #include <strings.h> #include <ctype.h> #include <time.h> #include <unistd.h> #include <fcntl.h> #include <sys/stat.h> #include <sys/types.h>
d4d14218	#include <sys/param.h>
e3aaff8e	#include <clamav.h>
f10460ed	#include <dirent.h>
a0b21816	#include <limits.h>
e3aaff8e
e2875303	#ifdef CL_THREAD_SAFE #include <pthread.h> #endif
e3aaff8e	#include "table.h" #include "mbox.h" #include "blob.h"
b2223aad	#include "line.h"
e3aaff8e	#include "text.h" #include "message.h" #include "others.h" #include "defaults.h"
7e577f26	#include "str.h"
e3aaff8e
02927896	#ifdef CL_DEBUG #if __GLIBC__ == 2 && __GLIBC_MINOR__ >= 1 #define HAVE_BACKTRACE #endif
3f3f9085	#endif
02927896	#ifdef HAVE_BACKTRACE #include <execinfo.h> #include <signal.h> #include <syslog.h> static void sigsegv(int sig); static void print_trace(int use_syslog); #endif
c2b2d8af	#if defined(NO_STRTOK_R) \|\| !defined(CL_THREAD_SAFE)
e3aaff8e	#undef strtok_r #undef __strtok_r #define strtok_r(a,b,c) strtok(a,b) #endif /* required for AIX and Tru64 */ #ifdef TRUE #undef TRUE #endif #ifdef FALSE #undef FALSE #endif
87c9313e	typedef enum { FALSE = 0, TRUE = 1 } bool;
9b4bb8b7	#define SAVE_TO_DISC /* multipart/message are saved in a temporary file */
393a6d67
f017fbdd	/* * Code does exist to run FOLLORURLS on systems without libcurl, however that * is not recommended so it is not compiled by default / #ifdef WITH_CURL #define FOLLOWURLS /
65684cec	* If an email contains URLs, check them - helps to * find Dialer.gen-45
87c9313e	*/
f017fbdd	#endif
9b4bb8b7
55a3f03b	#ifdef FOLLOWURLS
65684cec
15021325	#include "htmlnorm.h"
65684cec	#define MAX_URLS 5 /*
393a6d67	* Maximum number of URLs scanned in a message * part */
65684cec	#ifdef WITH_CURL /* Set in configure / / * To build with WITH_CURL: * LDFLAGS=`curl-config --libs` ./configure ... */
9558d802	#include <curl/curl.h>
1d3d7dd9	/* * Needs curl >= 7.11 (I've heard that 7.9 can cause crashes and 7.10 is * untested) / #if (LIBCURL_VERSION_MAJOR < 7) #undef WITH_CURL / also undef FOLLOWURLS? */
9558d802	#endif
55a3f03b
3a0f75c6	#if (LIBCURL_VERSION_MAJOR == 7) && (LIBCURL_VERSION_MINOR < 10)
1d3d7dd9	#undef WITH_CURL /* also undef FOLLOWURLS? / #endif #endif /WITH_CURL*/
55a3f03b	#else /!FOLLOWURLS/ #undef WITH_CURL
1d3d7dd9	#endif /FOLLOWURLS/
9558d802
f10460ed	/* * Define this to handle RFC1341 messages. * This is experimental code so it is up to YOU to (1) ensure it's secure
cf569541	* (2) periodically trim the directory of old files * * If you use the load balancing feature of clamav-milter to run clamd on * more than one machine you must make sure that /tmp/partial is on a shared * network filesystem
f10460ed	*/
1d3d7dd9	/#define PARTIAL_DIR "/tmp/partial" / FIXME: should be config based on TMPDIR */
f10460ed
b2223aad	static message parseEmailHeaders(const message m, const table_t *rfc821Table);
4c60b74f	static int parseEmailHeader(message m, const char line, const table_t *rfc821Table);
b0b860f1	static int parseEmailBody(message messageIn, text textIn, const char dir, const table_t rfc821Table, const table_t *subtypeTable, unsigned int options);
e3aaff8e	static int boundaryStart(const char line, const char boundary); static int endOfMessage(const char line, const char boundary); static int initialiseTables(table_t rfc821Table, table_t subtypeTable); static int getTextPart(message const messages[], size_t size); static size_t strip(char buf, int len); static bool continuationMarker(const char line); static int parseMimeHeader(message m, const char cmd, const table_t rfc821Table, const char *arg);
cca4efe4	static void saveTextPart(message m, const char dir);
50df4118	static char rfc2047(const char in);
a9714c49	static char rfc822comments(const char in);
f10460ed	#ifdef PARTIAL_DIR static int rfc1341(message m, const char dir); #endif
9b4bb8b7
71ba1dcd	static void checkURLs(message m, const char dir);
65684cec	#ifdef WITH_CURL
f121cb96	struct arg {
2c7d1edd	const char url; const char dir;
f121cb96	char filename; }; #ifdef CL_THREAD_SAFE static void getURL(void a); #else static void getURL(struct arg *arg); #endif
9b4bb8b7	#endif
e3aaff8e	/* Maximum line length according to RFC821 / #define LINE_LENGTH 1000 / Hashcodes for our hash tables / #define CONTENT_TYPE 1 #define CONTENT_TRANSFER_ENCODING 2 #define CONTENT_DISPOSITION 3 / Mime sub types / #define PLAIN 1 #define ENRICHED 2 #define HTML 3 #define RICHTEXT 4 #define MIXED 5 #define ALTERNATIVE 6 #define DIGEST 7 #define SIGNED 8 #define PARALLEL 9 #define RELATED 10 / RFC2387 / #define REPORT 11 / RFC1892 */
c9b8f252	#define APPLEDOUBLE 12 /* Handling of this in only noddy for now */
393a6d67	#define FAX MIXED /* * RFC3458 * Drafts stated to treat is as mixed if it is * not known. This disappeared in the final * version (except when talking about * voice-message), but it is good enough for us * since we do no validation of coversheet * presence etc. (which also has disappeared * in the final version) */
9a729c80	#define ENCRYPTED 13 /* * e.g. RFC2015 * Content-Type: multipart/encrypted; * boundary="nextPart1383049.XCRrrar2yq"; * protocol="application/pgp-encrypted" */
6e5d95eb	#define X_BFILE RELATED /* * BeOS, expert two parts: the file and it's * attributes. The attributes part comes as * Content-Type: application/x-be_attribute * name="foo" * I can't find where it is defined, any * pointers would be appreciated. For now * we treat it as multipart/related */
c79a2273	#define KNOWBOT 14 /* Unknown and undocumented format? */
e3aaff8e	static const struct tableinit { const char *key; int value; } rfc821headers[] = {
303f9be9	/* TODO: make these regular expressions */
5c1150ac	{ "Content-Type", CONTENT_TYPE },
a9f386ed	{ "Content-Transfer-Encoding", CONTENT_TRANSFER_ENCODING }, { "Content-Disposition", CONTENT_DISPOSITION },
e3aaff8e	{ NULL, 0 }
15033cb6	}, mimeSubtypes[] = { /* see RFC2045 */
e3aaff8e	/* subtypes of Text / { "plain", PLAIN }, { "enriched", ENRICHED }, { "html", HTML }, { "richtext", RICHTEXT }, / subtypes of Multipart */ { "mixed", MIXED }, { "alternative", ALTERNATIVE }, { "digest", DIGEST }, { "signed", SIGNED }, { "parallel", PARALLEL }, { "related", RELATED }, { "report", REPORT },
c9b8f252	{ "appledouble", APPLEDOUBLE },
393a6d67	{ "fax-message", FAX },
9a729c80	{ "encrypted", ENCRYPTED },
6e5d95eb	{ "x-bfile", X_BFILE }, /* BeOS */
c79a2273	{ "knowbot", KNOWBOT }, /* ??? / { "knowbot-metadata", KNOWBOT }, / ??? / { "knowbot-code", KNOWBOT }, / ??? / { "knowbot-state", KNOWBOT }, / ??? */
e3aaff8e	{ NULL, 0 } };
e2875303	#ifdef CL_THREAD_SAFE static pthread_mutex_t tables_mutex = PTHREAD_MUTEX_INITIALIZER; #endif
e3aaff8e
3eaab452	/* Maximum filenames under various systems / #ifndef NAME_MAX / e.g. Linux / #ifdef MAXNAMELEN / e.g. Solaris / #define NAME_MAX MAXNAMELEN #else #ifdef FILENAME_MAX / e.g. SCO */ #define NAME_MAX FILENAME_MAX #endif #endif #endif
bac883ff	#ifndef O_BINARY #define O_BINARY 0 #endif
e3aaff8e	/* * TODO: when signal handling is added, need to remove temp files when a
ef822cfc	* signal is received
e3aaff8e	* TODO: add option to scan in memory not via temp files, perhaps with a
1bfbedd4	* named pipe or memory mapped file, though this won't work on big e-mails * containing many levels of encapsulated messages - it'd just take too much * RAM
049a18b9	* TODO: parse .msg format files
c9b8f252	* TODO: fully handle AppleDouble format, see
ef822cfc	* http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
89670d69	* TODO: ensure parseEmailHeaders is always called before parseEmailBody * TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody
69543a9d	* TODO: Look into TNEF. Is there anything that needs to be done here?
e3aaff8e	*/ int
bbd2d959	cli_mbox(const char *dir, int desc, unsigned int options)
e3aaff8e	{
049a18b9	int retcode, i;
89670d69	message m, body;
e3aaff8e	FILE *fd;
049a18b9	char buffer[LINE_LENGTH];
c7b69776	#ifdef HAVE_BACKTRACE
02927896	void (*segv)(int); #endif
393a6d67	static table_t rfc821, subtype;
e3aaff8e	cli_dbgmsg("in mbox()\n");
049a18b9	i = dup(desc); if((fd = fdopen(i, "rb")) == NULL) { cli_errmsg("Can't open descriptor %d\n", desc); close(i);
ef822cfc	return CL_EOPEN;
049a18b9	} if(fgets(buffer, sizeof(buffer), fd) == NULL) { /* empty message */ fclose(fd);
ef822cfc	return CL_CLEAN;
049a18b9	}
e3aaff8e	m = messageCreate();
51fc2aa8	if(m == NULL) {
049a18b9	fclose(fd);
ef822cfc	return CL_EMEM;
51fc2aa8	}
e2875303	#ifdef CL_THREAD_SAFE pthread_mutex_lock(&tables_mutex); #endif
393a6d67	if(rfc821 == NULL) { assert(subtype == NULL);
51fc2aa8
393a6d67	if(initialiseTables(&rfc821, &subtype) < 0) { rfc821 = NULL; subtype = NULL;
e2875303	#ifdef CL_THREAD_SAFE pthread_mutex_unlock(&tables_mutex); #endif
51fc2aa8	messageDestroy(m); fclose(fd);
ef822cfc	return CL_EMEM;
51fc2aa8	}
e3aaff8e	}
e2875303	#ifdef CL_THREAD_SAFE pthread_mutex_unlock(&tables_mutex); #endif
e3aaff8e
3f3f9085	#ifdef HAVE_BACKTRACE
02927896	segv = signal(SIGSEGV, sigsegv); #endif
89670d69	/* * is it a UNIX style mbox with more than one * mail message, or just a single mail message? */ if(strncmp(buffer, "From ", 5) == 0) {
e3aaff8e	/*
049a18b9	* Have been asked to check a UNIX style mbox file, which * may contain more than one e-mail message to decode
e3aaff8e	*/
89670d69	bool lastLineWasEmpty = FALSE;
f35bc674	int messagenumber = 1;
e3aaff8e
049a18b9	do { /cli_dbgmsg("read: %s", buffer);/
e3aaff8e
89670d69	cli_chomp(buffer); if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) {
f35bc674	cli_dbgmsg("Deal with email number %d\n", messagenumber++);
e3aaff8e	/*
89670d69	* End of a message in the mail box
e3aaff8e	*/
b2223aad	body = parseEmailHeaders(m, rfc821);
4f1d0bfc	if(body == NULL) { messageReset(m); continue; }
89670d69	messageDestroy(m); if(messageGetBody(body))
b0b860f1	if(!parseEmailBody(body, NULL, dir, rfc821, subtype, options)) {
e17491b2	messageReset(body); m = body; continue; }
e3aaff8e	/*
89670d69	* Starting a new message, throw away all the * information about the old one
e3aaff8e	*/
89670d69	m = body; messageReset(body);
e3aaff8e
049a18b9	cli_dbgmsg("Finished processing message\n");
89670d69	} else
547b89de	lastLineWasEmpty = (bool)(buffer[0] == '\0');
b2223aad	if(messageAddStr(m, buffer) < 0)
12f5aef2	break;
049a18b9	} while(fgets(buffer, sizeof(buffer), fd) != NULL);
f35bc674	cli_dbgmsg("Deal with email number %d\n", messagenumber);
4f1d0bfc	} else {
7e577f26	/* * It's a single message, parse the headers then the body
4f1d0bfc	* Ignore blank lines at the start of the message */
69543a9d	if(strncmp(buffer, "P I ", 4) == 0) /* * CommuniGate Pro format: ignore headers until * blank line / while((fgets(buffer, sizeof(buffer), fd) != NULL) && (strchr("\r\n", buffer[0]) == NULL)) ; / * Ignore any blank lines at the top of the message */
4f1d0bfc	while(strchr("\r\n", buffer[0]) && (fgets(buffer, sizeof(buffer), fd) != NULL))
87c9313e	;
4f1d0bfc	/* * FIXME: files full of new lines and nothing else are * handled ungracefully...
7e577f26	*/
b2223aad	do {
5c1150ac	/*
93d41ee4	* TODO: this needlessly creates a message object, * it'd be better if parseEmailHeaders could also * read in from a file. I do not want to lump the * parseEmailHeaders code here, that'd be a duplication * of code I want to avoid
5c1150ac	*/
b2223aad	(void)cli_chomp(buffer); if(messageAddStr(m, buffer) < 0)
12f5aef2	break;
b2223aad	} while(fgets(buffer, sizeof(buffer), fd) != NULL);
4f1d0bfc	}
7e577f26
e3aaff8e	fclose(fd);
ef822cfc	/* * This is not necessarily true, but since the only options are * CL_CLEAN and CL_VIRUS this is the better choice. It would be * nice to have CL_CONTINUESCANNING or something like that */ retcode = CL_CLEAN;
049a18b9
b2223aad	body = parseEmailHeaders(m, rfc821);
89670d69	messageDestroy(m);
4f1d0bfc	if(body) { /* * Write out the last entry in the mailbox */ if(messageGetBody(body))
b0b860f1	if(!parseEmailBody(body, NULL, dir, rfc821, subtype, options))
b143af46	retcode = CL_EFORMAT;
e3aaff8e
4f1d0bfc	/* * Tidy up and quit */ messageDestroy(body); }
e3aaff8e	cli_dbgmsg("cli_mbox returning %d\n", retcode);
3f3f9085	#ifdef HAVE_BACKTRACE
02927896	signal(SIGSEGV, segv); #endif
e3aaff8e	return retcode; } /*
7e577f26	* The given message contains a raw e-mail. * * This function parses the headers of m and sets the message's arguments
e06d34dc	* * Returns the message's body with the correct arguments set
f73920a4	* * The downside of this approach is that for a short time we have two copies * of the message in memory, the upside is that it makes for easier parsing * of encapsulated messages, and in the long run uses less memory in those * scenarios
7e577f26	*/
e06d34dc	static message *
b2223aad	parseEmailHeaders(const message m, const table_t rfc821)
7e577f26	{
e06d34dc	bool inHeader = TRUE;
0356cdc0	bool contMarker = FALSE;
b2223aad	const text *t;
89670d69	message *ret;
4f1d0bfc	bool anyHeadersFound = FALSE;
28010d29	bool Xheader = FALSE;
15033cb6	int commandNumber = -1;
89670d69
02927896	cli_dbgmsg("parseEmailHeaders\n");
89670d69	if(m == NULL) return NULL; ret = messageCreate();
7e577f26
b2223aad	for(t = messageGetBody(m); t; t = t->t_next) { const char *buffer;
7e577f26
b2223aad	if(t->t_line) buffer = lineGetData(t->t_line); else
02927896	buffer = NULL;
7e577f26
28010d29	if(inHeader) {
d79597e3	if(buffer == NULL) {
28010d29	/* * A blank line signifies the end of the header * and the start of the text */
7e577f26	cli_dbgmsg("End of header information\n");
f12d2498	inHeader = FALSE;
0356cdc0	} else if(((buffer[0] == '\t') \|\| (buffer[0] == ' ') \|\| contMarker) &&
28010d29	(!Xheader)) { /* * Section B.2 of RFC822 says TAB or SPACE means * a continuation of the previous entry. * * Add all the arguments on the line */
1815e490	char *ptr;
c77c8809	char copy[LINE_LENGTH + 1];
1815e490	bool inquotes = FALSE;
13462674	bool arequotes = FALSE;
f10460ed	#ifdef CL_THREAD_SAFE char *strptr; #endif
13462674
0356cdc0	contMarker = continuationMarker(buffer);
15033cb6	switch(commandNumber) { case CONTENT_TRANSFER_ENCODING: case CONTENT_DISPOSITION: case CONTENT_TYPE: break; default: continue; }
c77c8809	assert(strlen(buffer) < sizeof(copy)); strcpy(copy, buffer);
28010d29
1815e490	/* * Ensure that the colon in headers such as * this doesn't get mistaken for a token * separator * boundary="=.J:gysAG)N(3_zv" / for(ptr = copy; ptr; ptr++) if(*ptr == '\"') inquotes = !inquotes;
13462674	else if(inquotes) {
1815e490	*ptr \|= '\200';
13462674	arequotes = TRUE; }
1815e490
548a5f96	#ifdef CL_THREAD_SAFE
28010d29	for(ptr = strtok_r(copy, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr))
1815e490	if(strchr(ptr, '=')) {
13462674	if(arequotes) { char p2; for(p2 = ptr; p2; p2++) *p2 &= '\177'; }
28010d29	messageAddArguments(ret, ptr);
1815e490	}
548a5f96	#else for(ptr = strtok(copy, ";"); ptr; ptr = strtok(NULL, ":"))
1815e490	if(strchr(ptr, '=')) {
13462674	if(arequotes) { char p2; for(p2 = ptr; p2; p2++) *p2 &= '\177'; }
548a5f96	messageAddArguments(ret, ptr);
1815e490	}
548a5f96	#endif
f12d2498	} else {
28010d29	Xheader = (bool)(buffer[0] == 'X');
0356cdc0	contMarker = continuationMarker(buffer);
393a6d67	if((parseEmailHeader(ret, buffer, rfc821) >= 0) \|\|
15033cb6	(strncasecmp(buffer, "From ", 5) == 0)) { char cmd[LINE_LENGTH + 1]; if(cli_strtokbuf(buffer, 0, ":", cmd) != NULL) { anyHeadersFound = TRUE; commandNumber = tableFind(rfc821, cmd); } }
f12d2498	}
e17491b2	} else { /cli_dbgmsg("Add line to body '%s'\n", buffer);/
b2223aad	if(messageAddLine(ret, t->t_line) < 0)
0e4e16d4	break;
e17491b2	}
d879a7b0	}
e06d34dc
4f1d0bfc	if(!anyHeadersFound) { /* * False positive in believing we have an e-mail when we don't */ messageDestroy(ret); cli_dbgmsg("parseEmailHeaders: no headers found, assuming it isn't an email\n"); return NULL; }
93d41ee4	messageClean(ret);
e17491b2	cli_dbgmsg("parseEmailHeaders: return\n");
e06d34dc	return ret;
7e577f26	} /*
4c60b74f	* Handle a header line of an email message */ static int
393a6d67	parseEmailHeader(message m, const char line, const table_t *rfc821)
4c60b74f	{
0960ff5e	char *cmd;
4c60b74f	int ret = -1; #ifdef CL_THREAD_SAFE char *strptr; #endif
97867f21	const char *separater;
50df4118	char *copy, tokenseparater[2];
4c60b74f
20d3dde9	cli_dbgmsg("parseEmailHeader '%s'\n", line);
97867f21	/* * In RFC822 the separater between the key a value is a colon, * e.g. Content-Transfer-Encoding: base64 * However some MUA's are lapse about this and virus writers exploit * this hole, so we need to check all known possiblities / for(separater = ":= "; separater; separater++) if(strchr(line, separater) != NULL) break; if(separater == '\0')
1bfbedd4	return -1;
50df4118	copy = rfc2047(line); if(copy == NULL) return -1;
f2f25418
97867f21	tokenseparater[0] = *separater; tokenseparater[1] = '\0';
548a5f96	#ifdef CL_THREAD_SAFE
97867f21	cmd = strtok_r(copy, tokenseparater, &strptr);
548a5f96	#else cmd = strtok(copy, tokenseparater); #endif
4c60b74f
3499d81e	if(cmd && (strstrip(cmd) > 0)) {
548a5f96	#ifdef CL_THREAD_SAFE
4c60b74f	char *arg = strtok_r(NULL, "", &strptr);
548a5f96	#else char *arg = strtok(NULL, ""); #endif
4c60b74f	if(arg) /* * Found a header such as * Content-Type: multipart/mixed; * set arg to be * "multipart/mixed" and cmd to
a9f386ed	* be "Content-Type"
4c60b74f	*/
393a6d67	ret = parseMimeHeader(m, cmd, rfc821, arg);
4c60b74f	}
50df4118	free(copy);
4c60b74f	return ret; } /*
e3aaff8e	* This is a recursive routine. *
7e577f26	* This function parses the body of mainMessage and saves its attachments in dir *
e06d34dc	* mainMessage is the buffer to be parsed, it contains an e-mail's body, without
f12d2498	* any headers. First time of calling it'll be * the whole message. Later it'll be parts of a multipart message
e3aaff8e	* textIn is the plain text message being built up so far *
d4d14218	* Returns:
e3aaff8e	* 0 for fail
d110fe1c	* 1 for success, attachments saved * 2 for success, attachments not saved
e3aaff8e	/ static int / success or fail */
b0b860f1	parseEmailBody(message messageIn, text textIn, const char dir, const table_t rfc821Table, const table_t *subtypeTable, unsigned int options)
e3aaff8e	{
b726511f	message *messages; / parts of a multipart message */
6b8999f0	int inhead, inMimeHead, i, rc = 1, htmltextPart, multiparts = 0;
e3aaff8e	text aText; const char cptr;
ad9c6836	message *mainMessage;
b0b860f1	fileblob *fb;
e3aaff8e
b0b860f1	cli_dbgmsg("in parseEmailBody\n");
e3aaff8e	aText = textIn;
b726511f	messages = NULL;
ad9c6836	mainMessage = messageIn;
e3aaff8e	/* Anything left to be parsed? */
d4d14218	if(mainMessage && (messageGetBody(mainMessage) != NULL)) {
e3aaff8e	mime_type mimeType;
7c5a7a47	int subtype;
9a729c80	const char mimeSubtype, boundary; char *protocol;
e3aaff8e	const text *t_line;
98cb5cba	/bool isAlternative;/
e3aaff8e	message *aMessage;
049a18b9	cli_dbgmsg("Parsing mail file\n");
e3aaff8e	mimeType = messageGetMimeType(mainMessage); mimeSubtype = messageGetMimeSubtype(mainMessage);
7c5a7a47	subtype = tableFind(subtypeTable, mimeSubtype); if((mimeType == TEXT) && (subtype == PLAIN)) {
e3aaff8e	/* * This is effectively no encoding, notice that we * don't check that charset is us-ascii */ cli_dbgmsg("assume no encoding\n"); mimeType = NOMIME;
7c5a7a47	messageSetMimeSubtype(mainMessage, NULL);
e3aaff8e	}
049a18b9	cli_dbgmsg("mimeType = %d\n", mimeType);
e3aaff8e	switch(mimeType) { case NOMIME: aText = textAddMessage(aText, mainMessage); break; case TEXT:
7c5a7a47	if(subtype == PLAIN)
3f3f9085	/* * Consider what to do if this fails * (i.e. aText == NULL): * We mustn't just return since that could * cause a virus to be missed that we * could be capable of scanning. Ignoring * the error is probably the safest, we may be * able to scan anyway and we lose nothing */
e3aaff8e	aText = textCopy(messageGetBody(mainMessage));
7c5a7a47	else if((options&CL_SCAN_MAILURL) && (subtype == HTML)) checkURLs(mainMessage, dir);
e3aaff8e	break; case MULTIPART: boundary = messageFindArgument(mainMessage, "boundary"); if(boundary == NULL) { cli_warnmsg("Multipart MIME message contains no boundaries\n");
e2e7ebf5	/* Broken e-mail message / mimeType = NOMIME; / * The break means that we will still * check if the file contains a uuencoded file */ break;
e3aaff8e	}
c79a2273	/* Perhaps it should assume mixed? */
cb5a87e0	if(mimeSubtype[0] == '\0') { cli_warnmsg("Multipart has no subtype assuming alternative\n"); mimeSubtype = "alternative"; messageSetMimeSubtype(mainMessage, "alternative"); }
e3aaff8e	/* * Get to the start of the first message */
20d3dde9	t_line = messageGetBody(mainMessage); if(t_line == NULL) { cli_warnmsg("Multipart MIME message has no body\n"); free((char *)boundary); mimeType = NOMIME; break; } do
b2223aad	if(boundaryStart(lineGetData(t_line->t_line), boundary))
e3aaff8e	break;
20d3dde9	while((t_line = t_line->t_next) != NULL);
e3aaff8e	if(t_line == NULL) {
28010d29	cli_dbgmsg("Multipart MIME message contains no boundary lines\n");
5a642650	/* * Free added by Thomas Lamy * <Thomas.Lamy@in-online.net> / free((char )boundary);
e2e7ebf5	mimeType = NOMIME; /* * The break means that we will still * check if the file contains a uuencoded file */ break;
e3aaff8e	} /* * Build up a table of all of the parts of this * multipart message. Remember, each part may itself * be a multipart message. */ inhead = 1; inMimeHead = 0;
e06d34dc	/*
9a729c80	* Parse the mainMessage object and create an array * of objects called messages, one for each of the * multiparts that mainMessage contains *
e06d34dc	* This looks like parseEmailHeaders() - maybe there's * some duplication of code to be cleaned up */
b726511f	for(multiparts = 0; t_line; multiparts++) {
56ae62e2	int lines = 0;
1a74d4df	message **m;
56ae62e2
1a74d4df	m = cli_realloc(messages, ((multiparts + 1) * sizeof(message *)));
f12d2498	if(m == NULL)
1a74d4df	break; messages = m;
b726511f
e3aaff8e	aMessage = messages[multiparts] = messageCreate();
3f3f9085	if(aMessage == NULL) { multiparts--; continue; }
e3aaff8e	cli_dbgmsg("Now read in part %d\n", multiparts);
8ba634a9	/* * Ignore blank lines. There shouldn't be ANY * but some viruses insert them */
02927896	while((t_line = t_line->t_next) != NULL)
b2223aad	if(t_line->t_line && /(cli_chomp(t_line->t_text) > 0))/ (strlen(lineGetData(t_line->t_line)) > 0))
d79597e3	break;
8ba634a9	if(t_line == NULL) { cli_dbgmsg("Empty part\n");
b9ce9639	/* * Remove this part unless there's * a uuencoded portion somewhere in * the complete message that we may * throw away by mistake if the MIME * encoding information is incorrect */ if(uuencodeBegin(mainMessage) == NULL) { messageDestroy(aMessage); --multiparts; }
8ba634a9	continue; } do {
b2223aad	const char *line = lineGetData(t_line->t_line);
e3aaff8e
e06d34dc	/cli_dbgmsg("inMimeHead %d inhead %d boundary %s line '%s' next '%s'\n", inMimeHead, inhead, boundary, line, t_line->t_next ? t_line->t_next->t_text : "(null)");/
e3aaff8e
f1c1300c	if(inMimeHead) { /* continuation line */
02927896	if(line == NULL) { inhead = inMimeHead = 0; continue; }
3a978f7d	/* * Handle continuation lines * because the previous line
1eec55a6	* ended with a ; or this line * starts with a white space
3a978f7d	*/
1eec55a6	cli_dbgmsg("Multipart %d: About to add mime Argument '%s'\n", multiparts, line);
3a978f7d	/* * Handle the case when it * isn't really a continuation * line: * Content-Type: application/octet-stream; * Content-Transfer-Encoding: base64 */ parseEmailHeader(aMessage, line, rfc821Table);
e3aaff8e	while(isspace((int)line)) line++; if(line == '\0') { inhead = inMimeHead = 0; continue; } /* * This may cause a trailing ';' * to be added if this test * fails - TODO: verify this */ inMimeHead = continuationMarker(line); messageAddArgument(aMessage, line);
f1c1300c	} else if(inhead) { /* handling normal headers */
02927896	if(line == NULL) { /* empty line */
e3aaff8e	inhead = 0; continue; }
c76810dc	if(isspace((int)line)) { / * The first line is * continuation line. * This is tricky * to handle, but * all we can do is our * best / cli_dbgmsg("Part %d starts with a continuation line\n", multiparts); messageAddArgument(aMessage, line); / * Give it a default * MIME type since * that may be the * missing line * * Choose application to * force a save */ if(messageGetMimeType(aMessage) == NOMIME) messageSetMimeType(aMessage, "application"); continue; }
e3aaff8e	/* * Some clients are broken and * put white space after the ; */ inMimeHead = continuationMarker(line);
b2223aad	if(!inMimeHead) { const text *next = t_line->t_next;
1eec55a6	char *fullline = strdup(line);
f1c1300c	int quotes = 0; const char *qptr;
b2223aad
c77c8809	assert(strlen(line) <= LINE_LENGTH);
f1c1300c	for(qptr = line; qptr; qptr++) if(qptr == '\"') quotes++;
1eec55a6	/* * Fold next lines to the end of this * if they start with a white space
f1c1300c	* or if this line has an odd number of quotes: * Content-Type: application/octet-stream; name="foo * "
1eec55a6	*/ while(next && next->t_line) {
b2223aad	const char *data = lineGetData(next->t_line);
1eec55a6	char *ptr;
f1c1300c	if((!isspace(data[0])) && ((quotes & 1) == 0))
1eec55a6	break; ptr = cli_realloc(fullline, strlen(fullline) + strlen(data) + 1); if(ptr == NULL) break;
b2223aad
1eec55a6	fullline = ptr; strcat(fullline, data);
f1c1300c	for(qptr = data; qptr; qptr++) if(qptr == '\"') quotes++;
1eec55a6	t_line = next; next = next->t_next;
b2223aad	}
1eec55a6	cli_dbgmsg("Multipart %d: About to parse folded header '%s'\n", multiparts, fullline);
303f9be9
1eec55a6	parseEmailHeader(aMessage, fullline, rfc821Table); free(fullline); } else { cli_dbgmsg("Multipart %d: About to parse header '%s'\n", multiparts, line); parseEmailHeader(aMessage, line, rfc821Table); }
e3aaff8e	} else if(boundaryStart(line, boundary)) { inhead = 1; break; } else if(endOfMessage(line, boundary)) { /* * Some viruses put information * after the end of message, * which presumably some broken * mail clients find, so we * can't assume that this * is the end of the message / / t_line = NULL;*/ break;
56ae62e2	} else {
b2223aad	if(messageAddLine(aMessage, t_line->t_line) < 0)
1a74d4df	break;
56ae62e2	lines++; }
8ba634a9	} while((t_line = t_line->t_next) != NULL);
e3aaff8e	messageClean(aMessage);
56ae62e2	cli_dbgmsg("Part %d has %d lines\n", multiparts, lines);
e3aaff8e	} free((char *)boundary);
7c1eb3bf	/*
c79a2273	* Preprocess. Anything special to be done before * we handle the multiparts?
9a729c80	*/
c79a2273	switch(tableFind(subtypeTable, mimeSubtype)) { case KNOWBOT: /* TODO */ cli_dbgmsg("multipart/knowbot parsed as multipart/mixed for now\n"); mimeSubtype = "mixed"; break; }
9a729c80	/*
7c1eb3bf	* We've finished message we're parsing */ if(mainMessage && (mainMessage != messageIn)) { messageDestroy(mainMessage); mainMessage = NULL;
ad9c6836	}
e3aaff8e
b726511f	if(multiparts == 0) { if(messages) free(messages);
7c1eb3bf	return 2; /* Nothing to do */
b726511f	}
7c1eb3bf
e3aaff8e	cli_dbgmsg("The message has %d parts\n", multiparts);
393a6d67	cli_dbgmsg("Find out the multipart type (%s)\n", mimeSubtype);
e3aaff8e
9a729c80	/* * We now have all the parts of the multipart message * in the messages array: * message messages[multiparts] Let's decide what to do with them all */
e3aaff8e	switch(tableFind(subtypeTable, mimeSubtype)) { case RELATED:
e06d34dc	cli_dbgmsg("Multipart related handler\n");
e3aaff8e	/*
294d0774	* Have a look to see if there's HTML code * which will need scanning
e3aaff8e	*/ aMessage = NULL; assert(multiparts > 0);
d4d14218	htmltextPart = getTextPart(messages, multiparts);
e3aaff8e
d4d14218	if(htmltextPart >= 0) aText = textAddMessage(aText, messages[htmltextPart]);
e3aaff8e	else /*
294d0774	* There isn't an HTML bit. If there's a * multipart bit, it'll may be in there * somewhere
e3aaff8e	*/ for(i = 0; i < multiparts; i++) if(messageGetMimeType(messages[i]) == MULTIPART) { aMessage = messages[i];
d4d14218	htmltextPart = i;
e3aaff8e	break; }
59da5a4f	if(htmltextPart == -1)
294d0774	cli_dbgmsg("No HTML code found to be scanned");
59da5a4f	else {
b0b860f1	rc = parseEmailBody(aMessage, aText, dir, rfc821Table, subtypeTable, options);
59da5a4f	if(rc == 1) { assert(aMessage == messages[htmltextPart]); messageDestroy(aMessage); messages[htmltextPart] = NULL; } }
e3aaff8e	/* * Fixed based on an idea from Stephen White <stephen@earth.li> * The message is confused about the difference * between alternative and related. Badtrans.B * suffers from this problem. * * Fall through in this case: * Content-Type: multipart/related; * type="multipart/alternative" */
98cb5cba	/* * Changed to always fall through based on * an idea from Michael Dankov <misha@btrc.ru> * that some viruses are completely confused * about the difference between related * and mixed / /cptr = messageFindArgument(mainMessage, "type");
e3aaff8e	if(cptr == NULL) break; isAlternative = (bool)(strcasecmp(cptr, "multipart/alternative") == 0); free((char *)cptr); if(!isAlternative)
98cb5cba	break;*/
ba867aed	case DIGEST: /* * According to section 5.1.5 RFC2046, the * default mime type of multipart/digest parts * is message/rfc822 * * We consider them as alternative, wrong in * the strictest sense since they aren't * alternatives - all parts a valid - but it's * OK for our needs since it means each part * will be scanned */
e3aaff8e	case ALTERNATIVE: cli_dbgmsg("Multipart alternative handler\n");
59da5a4f	#if 0
d4d14218	htmltextPart = getTextPart(messages, multiparts);
e3aaff8e
d4d14218	if(htmltextPart == -1) htmltextPart = 0;
e3aaff8e
d4d14218	aMessage = messages[htmltextPart];
e3aaff8e	aText = textAddMessage(aText, aMessage);
b0b860f1	rc = parseEmailBody(NULL, aText, dir, rfc821Table, subtypeTable, options);
51fc2aa8
b0b860f1	if(rc == 1)
e3aaff8e	/* * Alternative message has saved its * attachments, ensure we don't do * the same thing */ rc = 2;
59da5a4f	#endif
e3aaff8e	/* * Fall through - some clients are broken and * say alternative instead of mixed. The Klez * virus is broken that way / case REPORT: / * According to section 1 of RFC1892, the * syntax of multipart/report is the same * as multipart/mixed. There are some required * parameters, but there's no need for us to * verify that they exist */ case MIXED:
c9b8f252	case APPLEDOUBLE: /* not really supported */
e3aaff8e	/* * Look for attachments * * Not all formats are supported. If an * unsupported format turns out to be * common enough to implement, it is a simple * matter to add it */
ad9c6836	if(aText) { if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
e3aaff8e	mainMessage = NULL;
ad9c6836	}
e3aaff8e	cli_dbgmsg("Mixed message with %d parts\n", multiparts); for(i = 0; i < multiparts; i++) { bool addAttachment = FALSE; bool addToText = FALSE; const char *dtype;
b0b860f1	#ifndef SAVE_TO_DISC
89670d69	message *body;
f12d2498	#endif
e3aaff8e	aMessage = messages[i];
59da5a4f	if(aMessage == NULL) continue;
e3aaff8e	dtype = messageGetDispositionType(aMessage);
d4d14218	cptr = messageGetMimeSubtype(aMessage);
e3aaff8e	cli_dbgmsg("Mixed message part %d is of type %d\n", i, messageGetMimeType(aMessage)); switch(messageGetMimeType(aMessage)) { case APPLICATION:
049a18b9	#if 0 /* strict checking... */
e3aaff8e	if((strcasecmp(dtype, "attachment") == 0) \|\|
d4d14218	(strcasecmp(cptr, "x-msdownload") == 0) \|\|
049a18b9	(strcasecmp(cptr, "octet-stream") == 0) \|\|
d4d14218	(strcasecmp(dtype, "octet-stream") == 0))
e3aaff8e	addAttachment = TRUE; else {
049a18b9	cli_dbgmsg("Discarded mixed/application not sent as attachment\n");
e3aaff8e	continue; }
049a18b9	#endif addAttachment = TRUE;
e3aaff8e	break; case NOMIME:
51fc2aa8	if(mainMessage) {
393a6d67	const text *u_line = uuencodeBegin(mainMessage); if(u_line) {
51fc2aa8	cli_dbgmsg("Found uuencoded message in multipart/mixed mainMessage\n"); messageSetEncoding(mainMessage, "x-uuencode");
0e5a0129	fb = messageToFileblob(mainMessage, dir);
51fc2aa8
0e5a0129	if(fb) fileblobDestroy(fb);
51fc2aa8	} if(mainMessage != messageIn) messageDestroy(mainMessage); mainMessage = NULL; }
e3aaff8e	addToText = TRUE; if(messageGetBody(aMessage) == NULL) /* * No plain text version */
b2223aad	messageAddStr(aMessage, "No plain text alternative");
e3aaff8e	assert(messageGetBody(aMessage) != NULL); break; case TEXT:
d110fe1c	cli_dbgmsg("Mixed message text part disposition \"%s\"\n", dtype);
e3aaff8e	if(strcasecmp(dtype, "attachment") == 0) addAttachment = TRUE; else if((*dtype == '\0') \|\| (strcasecmp(dtype, "inline") == 0)) {
393a6d67	const text *u_line = uuencodeBegin(aMessage);
d110fe1c
ad9c6836	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
e3aaff8e	mainMessage = NULL;
78e302e1	cli_dbgmsg("Mime subtype \"%s\"\n", cptr);
393a6d67	if(u_line) {
d110fe1c	cli_dbgmsg("Found uuencoded message in multipart/mixed text portion\n"); messageSetEncoding(aMessage, "x-uuencode"); addAttachment = TRUE;
ab3107bc	} else if(tableFind(subtypeTable, cptr) == PLAIN) {
aeca3893	char *filename;
d110fe1c	/* * Strictly speaking
ab3107bc	* a text/plain part is
d110fe1c	* not an attachment. We * pretend it is so that * we can decode and * scan it */
aeca3893	filename = (char )messageFindArgument(aMessage, "filename"); if(filename == NULL) filename = (char )messageFindArgument(aMessage, "name"); if(filename == NULL) { cli_dbgmsg("Adding part to main message\n"); addToText = TRUE; } else { cli_dbgmsg("Treating %s as attachment\n", filename); free(filename); addAttachment = TRUE; }
d110fe1c	} else {
3805ebcb	if(options&CL_SCAN_MAILURL)
ab3107bc	if(tableFind(subtypeTable, cptr) == HTML) checkURLs(aMessage, dir);
e3aaff8e	messageAddArgument(aMessage, "filename=textportion"); addAttachment = TRUE; } } else {
28010d29	cli_dbgmsg("Text type %s is not supported\n", dtype);
e3aaff8e	continue; } break; case MESSAGE:
2f4737ed	/* Content-Type: message/rfc822 */
e3aaff8e	cli_dbgmsg("Found message inside multipart\n");
d879a7b0	if(encodingLine(aMessage) == NULL) { assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL;
705e985c	continue;
d879a7b0	}
b2223aad	messageAddStrAtTop(aMessage,
2f4737ed	"Received: by clamd");
93d41ee4	#ifdef SAVE_TO_DISC /* * Save this embedded message * to a temporary file / saveTextPart(aMessage, dir); assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL; #else / * Scan in memory, faster but * is open to DoS attacks when * many nested levels are * involved. */
f73920a4	body = parseEmailHeaders(aMessage, rfc821Table, TRUE);
c693116d	/* * We've fininished with the * original copy of the message, * so throw that away and * deal with the encapsulated * message as a message. * This can save a lot of memory */ assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL;
89670d69	if(body) {
b0b860f1	rc = parseEmailBody(body, NULL, dir, rfc821Table, subtypeTable, options);
89670d69	messageDestroy(body); }
93d41ee4	#endif
e3aaff8e	continue; case MULTIPART: /* * It's a multi part within a multi part * Run the message parser on this bit, it won't * be an attachment */ cli_dbgmsg("Found multipart inside multipart\n");
f12d2498	if(aMessage) { /* * The headers were parsed when reading in the * whole multipart section */
b0b860f1	rc = parseEmailBody(aMessage, aText, dir, rfc821Table, subtypeTable, options);
f12d2498	cli_dbgmsg("Finished recursion\n"); assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL;
89670d69	} else {
b0b860f1	rc = parseEmailBody(NULL, NULL, dir, rfc821Table, subtypeTable, options);
ad9c6836	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
89670d69	mainMessage = NULL; }
e3aaff8e	continue; case AUDIO: case IMAGE:
8ef734d4	case VIDEO:
e3aaff8e	addAttachment = TRUE; break; default:
8ef734d4	cli_warnmsg("Only text and application attachments are supported, type = %d\n",
e3aaff8e	messageGetMimeType(aMessage)); continue; } /* * It must be either text or * an attachment. It can't be both */ assert(addToText \|\| addAttachment); assert(!(addToText && addAttachment));
640ed140	if(addToText)
e3aaff8e	aText = textAdd(aText, messageGetBody(aMessage));
b0b860f1	else { fb = messageToFileblob(aMessage, dir);
e3aaff8e
0e5a0129	if(fb) fileblobDestroy(fb);
e3aaff8e	}
7c1eb3bf	assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL;
e3aaff8e	}
b0b860f1	/* rc = parseEmailBody(NULL, NULL, dir, rfc821Table, subtypeTable, options); */
e3aaff8e	break; case SIGNED: case PARALLEL: /* * If we're here it could be because we have a * multipart/mixed message, consisting of a * message followed by an attachment. That * message itself is a multipart/alternative * message and we need to dig out the plain * text part of that alternative */
d4d14218	htmltextPart = getTextPart(messages, multiparts); if(htmltextPart == -1) htmltextPart = 0;
e3aaff8e
b0b860f1	rc = parseEmailBody(messages[htmltextPart], aText, dir, rfc821Table, subtypeTable, options);
e3aaff8e	break;
9a729c80	case ENCRYPTED: rc = 0;
c79a2273	protocol = (char *)messageFindArgument(mainMessage, "protocol");
9a729c80	if(protocol) { if(strcasecmp(protocol, "application/pgp-encrypted") == 0) { /* RFC2015 */ cli_warnmsg("PGP encoded attachment not scanned\n"); rc = 2; } else cli_warnmsg("Unknown encryption protocol '%s' - report to bugs@clamav.net\n"); free(protocol); } else cli_warnmsg("Encryption method missing protocol name - report to bugs@clamav.net\n"); break;
e3aaff8e	default: /* * According to section 7.2.6 of RFC1521, * unrecognised multiparts should be treated as * multipart/mixed. I don't do this yet so * that I can see what comes along... */
a7398c9c	cli_warnmsg("Unsupported multipart format `%s' - report to bugs@clamav.net\n", mimeSubtype);
e3aaff8e	rc = 0; } for(i = 0; i < multiparts; i++)
c693116d	if(messages[i]) messageDestroy(messages[i]);
e3aaff8e
ad9c6836	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
049a18b9	if(aText && (textIn == NULL)) textDestroy(aText);
b726511f	if(messages) free(messages);
e3aaff8e	return rc; case MESSAGE: /* * Check for forbidden encodings */ switch(messageGetEncoding(mainMessage)) { case NOENCODING: case EIGHTBIT: case BINARY: break; default:
049a18b9	cli_warnmsg("MIME type 'message' cannot be decoded\n");
e3aaff8e	break; }
f10460ed	rc = 0;
049a18b9	if((strcasecmp(mimeSubtype, "rfc822") == 0) \|\| (strcasecmp(mimeSubtype, "delivery-status") == 0)) {
b2223aad	message *m = parseEmailHeaders(mainMessage, rfc821Table);
c693116d	if(m) { cli_dbgmsg("Decode rfc822");
93d41ee4	if(mainMessage && (mainMessage != messageIn)) { messageDestroy(mainMessage); mainMessage = NULL;
59da5a4f	} else messageReset(mainMessage);
c693116d	if(messageGetBody(m))
b0b860f1	rc = parseEmailBody(m, NULL, dir, rfc821Table, subtypeTable, options);
c693116d	messageDestroy(m); }
e3aaff8e	break;
5a15955b	} else if(strcasecmp(mimeSubtype, "disposition-notification") == 0) {
12f3689d	/* RFC 2298 - handle like a normal email */
5a15955b	rc = 1;
12f3689d	break;
5a15955b	} else if(strcasecmp(mimeSubtype, "partial") == 0) {
f10460ed	#ifdef PARTIAL_DIR /* RFC1341 message split over many emails */ if(rfc1341(mainMessage, dir) >= 0) rc = 1; #else
22080fa5	cli_warnmsg("Partial message received from MUA/MTA - message cannot be scanned\n");
f10460ed	rc = 0; #endif } else if(strcasecmp(mimeSubtype, "external-body") == 0)
22080fa5	/* TODO */
e3aaff8e	cli_warnmsg("Attempt to send Content-type message/external-body trapped");
5a642650	else
22080fa5	cli_warnmsg("Unsupported message format `%s' - please report to bugs@clamav.net\n", mimeSubtype);
e3aaff8e
f10460ed
ad9c6836	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
b726511f	if(messages) free(messages);
f10460ed	return rc;
e3aaff8e	case APPLICATION:
d4d14218	cptr = messageGetMimeSubtype(mainMessage);
aa0210b6	/if((strcasecmp(cptr, "octet-stream") == 0) \|\| (strcasecmp(cptr, "x-msdownload") == 0)) {/ {
b0b860f1	fb = messageToFileblob(mainMessage, dir);
e3aaff8e
0e5a0129	if(fb) { cli_dbgmsg("Saving main message as attachment\n"); fileblobDestroy(fb);
09e05292	messageClearMarkers(mainMessage);
e3aaff8e	}
aa0210b6	} /else cli_warnmsg("Discarded application not sent as attachment\n");/
e3aaff8e	break; case AUDIO: case VIDEO: case IMAGE: break; default: cli_warnmsg("Message received with unknown mime encoding"); break; } }
f12d2498	if(aText && (textIn == NULL)) { textDestroy(aText); aText = NULL; }
b0b860f1	/* * No attachments - scan the text portions, often files * are hidden in HTML code */ cli_dbgmsg("%d multiparts found\n", multiparts); for(i = 0; i < multiparts; i++) { fb = messageToFileblob(messages[i], dir);
e3aaff8e
b0b860f1	if(fb) {
c7b69776	cli_dbgmsg("Saving multipart %d\n", i);
d4d14218
b0b860f1	fileblobDestroy(fb); } } if(mainMessage) {
e3aaff8e	/*
b0b860f1	* Look for uu-encoded main file
e3aaff8e	*/
b0b860f1	const text *t_line; if((t_line = uuencodeBegin(mainMessage)) != NULL) { cli_dbgmsg("Found uuencoded file\n");
d4d14218
b0b860f1	/* * Main part contains uuencoded section */ messageSetEncoding(mainMessage, "x-uuencode");
d4d14218
b0b860f1	if((fb = messageToFileblob(mainMessage, dir)) != NULL) { if((cptr = fileblobGetFilename(fb)) != NULL) cli_dbgmsg("Found uuencoded message %s\n", cptr);
0e5a0129	fileblobDestroy(fb); }
b0b860f1	} else if((encodingLine(mainMessage) != NULL) &&
69543a9d	((t_line = bounceBegin(mainMessage)) != NULL)) {
b0b860f1	const text *t; static const char encoding[] = "Content-Transfer-Encoding";
d4d14218	/*
b0b860f1	* Attempt to save the original (unbounced) * message - clamscan will find that in the * directory and call us again (with any luck) * having found an e-mail message to handle * * This finds a lot of false positives, the * search that an encoding line is in the * bounce (i.e. it's after the bounce header) * helps a bit, but at the expense of scanning * the entire message. messageAddLine * optimisation could help here, but needs * careful thought, do it with line numbers * would be best, since the current method in * messageAddLine of checking encoding first * must remain otherwise non bounce messages * won't be scanned
d4d14218	*/
b0b860f1	for(t = t_line; t; t = t->t_next) { const char *txt = lineGetData(t->t_line); if(txt && (strncasecmp(txt, encoding, sizeof(encoding) - 1) == 0) && (strstr(txt, "7bit") == NULL) && (strstr(txt, "8bit") == NULL)) break; } if(t && ((fb = fileblobCreate()) != NULL)) { cli_dbgmsg("Found a bounce message\n"); fileblobSetFilename(fb, dir, "bounce"); fb = textToFileblob(t_line, fb); fileblobDestroy(fb);
b143af46	} else cli_dbgmsg("Not found a bounce message\n");
b0b860f1	} else { bool saveIt;
d4d14218
b0b860f1	cli_dbgmsg("Not found uuencoded file\n");
e2e7ebf5
b0b860f1	if(messageGetMimeType(mainMessage) == MESSAGE)
f01bbfe8	/*
b0b860f1	* Quick peek, if the encapsulated * message has no * content encoding statement don't * bother saving to scan, it's safe
f01bbfe8	*/
b0b860f1	saveIt = (encodingLine(mainMessage) != NULL); else if((t_line = encodingLine(mainMessage)) != NULL) {
a7527b1f	/*
b0b860f1	* Some bounces include the message * body without the headers. * Unfortunately this generates a * lot of false positives that a bounce * has been found when it hasn't.
a7527b1f	*/
b0b860f1	if((fb = fileblobCreate()) != NULL) { cli_dbgmsg("Found a bounce message with no header\n");
0e5a0129	fileblobSetFilename(fb, dir, "bounce");
b0b860f1	fileblobAddData(fb, "Received: by clamd\n", 19);
cca4efe4
b0b860f1	fb = textToFileblob(t_line, fb);
5c1150ac
b0b860f1	fileblobDestroy(fb);
5c1150ac	}
b0b860f1	saveIt = FALSE;
59da5a4f	} else if(multiparts == 0)
b0b860f1	/* * Save the entire text portion, * since it it may be an HTML file with * a JavaScript virus */ saveIt = TRUE;
59da5a4f	else saveIt = FALSE;
e3aaff8e
b0b860f1	if(saveIt) { cli_dbgmsg("Saving text part to scan\n"); /* * TODO: May be better to save aText */ saveTextPart(mainMessage, dir);
59da5a4f	if(mainMessage != messageIn) { messageDestroy(mainMessage); mainMessage = NULL; } else messageReset(mainMessage); rc = 1;
049a18b9	}
e3aaff8e	}
b0b860f1	} else rc = (multiparts) ? 1 : 2; /* anything saved? */
e3aaff8e
ad9c6836	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
b726511f	if(messages) free(messages);
e06d34dc	cli_dbgmsg("parseEmailBody() returning %d\n", rc);
e3aaff8e
e06d34dc	return rc;
e3aaff8e	} /* * Is the current line the start of a new section? * * New sections start with --boundary / static int boundaryStart(const char line, const char *boundary) {
0e4e16d4	if(line == NULL) return 0; /* empty line */
f7fa3820	cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary);
0e4e16d4	if(*line++ != '-') return 0;
e3aaff8e	/*
0e4e16d4	* Gibe.B3 is broken, it has:
e3aaff8e	* boundary="---- =_NextPart_000_01C31177.9DC7C000" * but it's boundaries look like * ------ =_NextPart_000_01C31177.9DC7C000
0e4e16d4	* notice the one too few '-'. * Presumably this is a deliberate exploitation of a bug in some mail * clients. * * The trouble is that this creates a lot of false positives for * boundary conditions, if we're too lax about matches. We do our level * best to avoid these false positives. For example if we have * boundary="1" we want to ensure that we don't break out of every line * that has -1 in it instead of starting --1. This needs some more work.
e3aaff8e	*/ if(strstr(line, boundary) != NULL) {
0c0894b8	cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line);
e3aaff8e	return 1; } if(line++ != '-') return 0; return strcasecmp(line, boundary) == 0; } / * Is the current line the end? * * The message ends with with --boundary-- / static int endOfMessage(const char line, const char *boundary) { size_t len;
02927896	if(line == NULL) return 0;
1a74d4df	cli_dbgmsg("endOfMessage: line = '%s' boundary = '%s'\n", line, boundary);
e3aaff8e	if(line++ != '-') return 0; if(line++ != '-') return 0; len = strlen(boundary);
049a18b9	if(strncasecmp(line, boundary, len) != 0) return 0;
e3aaff8e	if(strlen(line) != (len + 2)) return 0; line = &line[len]; if(line++ != '-') return 0; return line == '-'; } /* * Initialise the various lookup tables / static int initialiseTables(table_t rfc821Table, table_t subtypeTable) { const struct tableinit tableinit; /* * Initialise the various look up tables / rfc821Table = tableCreate(); assert(*rfc821Table != NULL); for(tableinit = rfc821headers; tableinit->key; tableinit++)
51fc2aa8	if(tableInsert(rfc821Table, tableinit->key, tableinit->value) < 0) { tableDestroy(rfc821Table);
767f16ab	*rfc821Table = NULL;
e3aaff8e	return -1;
51fc2aa8	}
e3aaff8e	subtypeTable = tableCreate(); assert(subtypeTable != NULL); for(tableinit = mimeSubtypes; tableinit->key; tableinit++) if(tableInsert(subtypeTable, tableinit->key, tableinit->value) < 0) { tableDestroy(rfc821Table);
51fc2aa8	tableDestroy(*subtypeTable);
767f16ab	rfc821Table = NULL; subtypeTable = NULL;
e3aaff8e	return -1; } return 0; } /*
d4d14218	* If there's a HTML text version use that, otherwise
e3aaff8e	* use the first text part, otherwise just use the
d4d14218	* first one around. HTML text is most likely to include * a scripting worm
e3aaff8e	* * If we can't find one, return -1 / static int getTextPart(message const messages[], size_t size) { size_t i;
28010d29	int textpart = -1;
e3aaff8e	for(i = 0; i < size; i++) { assert(messages[i] != NULL);
28010d29	if(messageGetMimeType(messages[i]) == TEXT) { if(strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0) return (int)i; textpart = (int)i; }
e3aaff8e	}
28010d29	return textpart;
e3aaff8e	} /* * strip -
767f16ab	* Remove the trailing spaces from a buffer. Don't call this directly, * always call strstrip() which is a wrapper to this routine to be used with * NUL terminated strings. This code looks a bit strange because of it's * heritage from code that worked on strings that weren't necessarily NUL * terminated. * TODO: rewrite for clamAV *
e3aaff8e	* Returns it's new length (a la strlen) * * len must be int not size_t because of the >= 0 test, it is sizeof(buf) * not strlen(buf) / static size_t strip(char buf, int len) { register char *ptr; register size_t i; if((buf == NULL) \|\| (len <= 0))
767f16ab	return 0;
e3aaff8e	i = strlen(buf); if(len > (int)(i + 1))
767f16ab	return i;
e3aaff8e	ptr = &buf[--len]; #if defined(UNIX) \|\| defined(C_LINUX) \|\| defined(C_DARWIN) /* watch - it may be in shared text area / do if(ptr) *ptr = '\0';
87c9313e	while((--len >= 0) && (!isgraph(--ptr)) && (ptr != '\n') && (*ptr != '\r'));
e3aaff8e	#else /* more characters can be displayed on DOS / do #ifndef REAL_MODE_DOS if(ptr) /* C8.0 puts into a text area / #endif ptr = '\0'; while((--len >= 0) && ((--ptr == '\0') \|\| (isspace((int)ptr)))); #endif return((size_t)(len + 1)); } /* * strstrip: * Strip a given string */
f0627588	size_t
e3aaff8e	strstrip(char s) { if(s == (char )NULL) return(0);
02927896
e3aaff8e	return(strip(s, strlen(s) + 1)); } /* * When parsing a MIME header see if this spans more than one line. A * semi-colon at the end of the line indicates that the MIME information * is continued on the next line. * * Some clients are broken and put white space after the ; / static bool continuationMarker(const char line) { const char *ptr;
02927896	if(line == NULL) return FALSE;
e3aaff8e	#ifdef CL_DEBUG cli_dbgmsg("continuationMarker(%s)\n", line); #endif if(strlen(line) == 0) return FALSE; ptr = strchr(line, '\0'); assert(ptr != NULL);
28c29d59	while(ptr > line)
e3aaff8e	switch(--ptr) { case '\n': case '\r': case ' ': case '\t': continue; case ';': return TRUE; default: return FALSE; } return FALSE; } static int parseMimeHeader(message m, const char cmd, const table_t rfc821Table, const char arg) { #ifdef CL_THREAD_SAFE char strptr; #endif
8b3563f2	char copy, ptr; int commandNumber;
4f1d0bfc
e3aaff8e	cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg);
8b3563f2
a9714c49	ptr = rfc822comments(cmd);
f017fbdd	if(ptr) { commandNumber = tableFind(rfc821Table, ptr); free(ptr); } else commandNumber = tableFind(rfc821Table, cmd);
8b3563f2
a9714c49	copy = rfc822comments(arg);
f017fbdd	if(copy == NULL) copy = strdup(arg);
8b3563f2	if(copy == NULL) return -1;
e3aaff8e
f017fbdd	ptr = copy;
8b3563f2	switch(commandNumber) {
e3aaff8e	case CONTENT_TYPE: /* * Fix for non RFC1521 compliant mailers * that send content-type: Text instead * of content-type: Text/Plain, or * just simply "Content-Type:" */
5e394e73	if(arg == NULL)
69543a9d	/* * According to section 4 of RFC1521: * "Note also that a subtype specification is * MANDATORY. There are no default subtypes" *
1eec55a6	* We have to break this and make an assumption
69543a9d	* for the subtype because virus writers and * email client writers don't get it right */ cli_warnmsg("Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii\n");
e3aaff8e	else if(strchr(copy, '/') == NULL)
69543a9d	/* * Empty field, such as * Content-Type: * which I believe is illegal according to * RFC1521 */
28010d29	cli_dbgmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", copy);
e3aaff8e	else {
8037334b	int i;
6d312569	char mimeArgs; / RHS of the ; */
e3aaff8e	/* * Some clients are broken and * put white space after the ; */
2625d6a0	if(arg == '/') { cli_warnmsg("Content-type '/' received, assuming application/octet-stream\n"); messageSetMimeType(m, "application"); messageSetMimeSubtype(m, "octet-stream"); } else { /
0960ff5e	* The content type could be in quotes: * Content-Type: "multipart/mixed" * FIXME: this is a hack in that ignores * the quotes, it doesn't handle * them properly
2625d6a0	*/
f017fbdd	while(isspace(*copy)) copy++; if(copy[0] == '\"') copy++;
0960ff5e
f017fbdd	if(copy[0] != '/') { char s; char mimeType; /* LHS of the ; */ s = mimeType = cli_strtok(copy, 0, ";");
0960ff5e	/*
6d312569	* Handle * Content-Type: foo/bar multipart/mixed * and * Content-Type: multipart/mixed foo/bar
0960ff5e	*/
6d312569	for(;;) {
548a5f96	#ifdef CL_THREAD_SAFE
6d312569	int set = messageSetMimeType(m, strtok_r(s, "/", &strptr));
548a5f96	#else int set = messageSetMimeType(m, strtok(s, "/")); #endif
6d312569	/* * Stephen White <stephen@earth.li> * Some clients put space after * the mime type but before * the ; */
548a5f96	#ifdef CL_THREAD_SAFE
6d312569	s = strtok_r(NULL, ";", &strptr);
548a5f96	#else s = strtok(NULL, ";"); #endif
6d312569	if(s == NULL) break; if(set) {
f017fbdd	size_t len = strstrip(s) - 1;
6d312569	if(s[len] == '\"') { s[len] = '\0'; len = strstrip(s); } if(len) {
f017fbdd	if(strchr(s, ' ')) { char *t = cli_strtok(s, 0, " ");
6d312569
f017fbdd	messageSetMimeSubtype(m, t); free(t); } else messageSetMimeSubtype(m, s);
6d312569	}
0960ff5e	}
6d312569	while(s && !isspace(s)) s++; if(s++ == '\0') break; if(s == '\0') break;
0960ff5e	}
f017fbdd	free(mimeType); }
2625d6a0	}
e3aaff8e	/*
20d3dde9	* Add in all rest of the the arguments. * e.g. if the header is this: * Content-Type:', arg='multipart/mixed; boundary=foo * we find the boundary argument set it
e3aaff8e	*/
8037334b	i = 1; while((mimeArgs = cli_strtok(copy, i++, ";")) != NULL) { cli_dbgmsg("mimeArgs = '%s'\n", mimeArgs);
6d312569	messageAddArguments(m, mimeArgs); free(mimeArgs); }
e3aaff8e	} break; case CONTENT_TRANSFER_ENCODING: messageSetEncoding(m, copy); break; case CONTENT_DISPOSITION:
548a5f96	#ifdef CL_THREAD_SAFE
ef704fb3	arg = strtok_r(copy, ";", &strptr); if(arg && *arg) { messageSetDispositionType(m, arg); messageAddArgument(m, strtok_r(NULL, "\r\n", &strptr)); }
548a5f96	#else arg = strtok(copy, ";"); if(arg && *arg) { messageSetDispositionType(m, arg); messageAddArgument(m, strtok(NULL, "\r\n")); } #endif
e3aaff8e	}
049a18b9	free(ptr);
e3aaff8e
4f1d0bfc	return 0;
e3aaff8e	}
e06d34dc	/*
cca4efe4	* Save the text portion of the message / static void saveTextPart(message m, const char *dir) {
0e5a0129	fileblob *fb;
cca4efe4	messageAddArgument(m, "filename=textportion");
0e5a0129	if((fb = messageToFileblob(m, dir)) != NULL) {
cca4efe4	/* * Save main part to scan that */
c77c8809	cli_dbgmsg("Saving main message\n");
cca4efe4
0e5a0129	fileblobDestroy(fb);
cca4efe4	} }
a9714c49	/*
f017fbdd	* Handle RFC822 comments in headers. * Returns a buffer without the comments or NULL on error or if the input * has no comments. The caller must free the returned buffer * See secion 3.4.3 of RFC822
a9714c49	* TODO: handle comments that go on to more than one line / static char rfc822comments(const char in) { const char iptr; char out, optr; int backslash, inquote, commentlevel; if(in == NULL)
f017fbdd	return NULL;
a9714c49	if(strchr(in, '(') == NULL)
f017fbdd	return NULL;
a9714c49	out = cli_malloc(strlen(in) + 1); if(out == NULL) return NULL; backslash = commentlevel = inquote = 0; optr = out; cli_dbgmsg("rfc822comments: contains a comment\n"); for(iptr = in; iptr; iptr++) if(backslash) { optr++ = iptr; backslash = 0; } else switch(iptr) { case '\\': backslash = 1; break; case '\"': inquote = !inquote; break; case '(': commentlevel++; break; case ')': if(commentlevel > 0) commentlevel--; break; default: if(commentlevel == 0) optr++ = iptr; } if(backslash) /* last character was a single backslash / optr++ = '\\'; *optr = '\0'; strstrip(out); cli_dbgmsg("rfc822comments '%s'=>'%s'\n", in, out); return out; }
50df4118	/* * Handle RFC2047 encoding. Returns a malloc'd buffer that the caller must * free, or NULL on error / static char rfc2047(const char in) { char out, *pout; size_t len;
4de5fffd	if((strstr(in, "=?") == NULL) \|\| (strstr(in, "?=") == NULL))
50df4118	return strdup(in); cli_dbgmsg("rfc2047 '%s'\n", in); out = cli_malloc(strlen(in) + 1); if(out == NULL) return NULL; pout = out; /* For each RFC2047 string / while(in) {
cf569541	char encoding, ptr, enctext;
50df4118	message m; blob b; /* Find next RFC2047 string / while(in) { if((in == '=') && (in[1] == '?')) { in += 2; break; } pout++ = in++; } / Skip over charset, find encoding / while((in != '?') && in) in++; if(in == '\0') break; encoding = *++in; encoding = tolower(encoding); if((encoding != 'q') && (encoding != 'b')) {
1b00d9a4	cli_warnmsg("Unsupported RFC2047 encoding type '%c' - report to bugs@clamav.net\n", encoding); free(out); out = NULL;
50df4118	break; } /* Skip to encoded text / if(++in != '?') break; if(*++in == '\0') break;
cf569541	enctext = strdup(in); if(enctext == NULL) { free(out); out = NULL; break; }
50df4118	in = strstr(in, "?=");
cf569541	if(in == NULL) { free(enctext);
50df4118	break;
cf569541	}
50df4118	in += 2; ptr = strstr(enctext, "?="); assert(ptr != NULL); ptr = '\0'; /cli_dbgmsg("Need to decode '%s' with method '%c'\n", enctext, encoding);*/ m = messageCreate();
c77c8809	if(m == NULL)
50df4118	break; messageAddStr(m, enctext);
cf569541	free(enctext);
767f16ab	switch(encoding) {
50df4118	case 'q': messageSetEncoding(m, "quoted-printable"); break; case 'b': messageSetEncoding(m, "base64"); break; } b = messageToBlob(m); len = blobGetDataSize(b); cli_dbgmsg("Decoded as '%.s'\n", len, len, blobGetData(b)); memcpy(pout, blobGetData(b), len); blobDestroy(b); messageDestroy(m); if(pout[len - 1] == '\n') pout += len - 1; else pout += len; } *pout = '\0'; cli_dbgmsg("rfc2047 returns '%s'\n", out); return out; }
f10460ed	#ifdef PARTIAL_DIR /* * Handle partial messages / static int rfc1341(message m, const char dir) { fileblob fb;
13462674	char arg, id, number, total, *oldfilename;
f10460ed	if((mkdir(PARTIAL_DIR, 0700) < 0) && (errno != EEXIST)) {
13462674	cli_errmsg("Can't create the directory '%s'\n", PARTIAL_DIR);
f10460ed	return -1;
13462674	} else { struct stat statb; if(stat(PARTIAL_DIR, &statb) < 0) { cli_errmsg("Can't stat the directory '%s'\n", PARTIAL_DIR); return -1; } if(statb.st_mode & 077) cli_warnmsg("Insecure partial directory %s (mode 0%o)\n", PARTIAL_DIR, statb.st_mode & 0777);
f10460ed	} id = (char )messageFindArgument(m, "id"); if(id == NULL) return -1; number = (char )messageFindArgument(m, "number"); if(number == NULL) { free(id); return -1; } oldfilename = (char )messageFindArgument(m, "filename"); if(oldfilename == NULL) oldfilename = (char )messageFindArgument(m, "name"); arg = cli_malloc(10 + strlen(id) + strlen(number)); sprintf(arg, "filename=%s%s", id, number); messageAddArgument(m, arg); free(arg); if(oldfilename) { cli_warnmsg("Must reset to %s\n", oldfilename); free(oldfilename); } if((fb = messageToFileblob(m, PARTIAL_DIR)) == NULL) { free(id); free(number); return -1; } fileblobDestroy(fb); total = (char )messageFindArgument(m, "total"); cli_dbgmsg("rfc1341: %s, %s of %s\n", id, number, (total) ? total : "?"); if(total) { int n = atoi(number); int t = atoi(total); DIR dd = NULL; /* * If it's the last one - reassemble it
3a0f75c6	* FIXME: this assumes that we receive the parts in order
f10460ed	/ if((n == t) && ((dd = opendir(PARTIAL_DIR)) != NULL)) { FILE fout; char outname[NAME_MAX + 1]; snprintf(outname, sizeof(outname) - 1, "%s/%s", dir, id); cli_dbgmsg("outname: %s\n", outname); fout = fopen(outname, "wb"); if(fout == NULL) {
3a0f75c6	cli_errmsg("Can't open '%s' for writing", outname);
f10460ed	free(id); free(total); free(number); closedir(dd); return -1; } for(n = 1; n <= t; n++) { char filename[NAME_MAX + 1];
13462674	const struct dirent *dent; #if defined(HAVE_READDIR_R_3) \|\| defined(HAVE_READDIR_R_2)
a0b21816	#if defined(C_SOLARIS) \|\| defined(C_BEOS) char result[sizeof(struct dirent) + PATH_MAX + 1];
13462674	#else struct dirent result; #endif #endif
f10460ed	snprintf(filename, sizeof(filename), "%s%d", id, n); #ifdef HAVE_READDIR_R_3
c79a2273
fdc6adbe	#if defined(C_SOLARIS) \|\| defined(C_BEOS) while((readdir_r(dd, (struct dirent *)result, &dent) == 0) && dent) { #else
13462674	while((readdir_r(dd, (struct dirent *)&result, &dent) == 0) && dent) {
fdc6adbe	#endif
f10460ed	#elif defined(HAVE_READDIR_R_2)
fdc6adbe	#if defined(C_SOLARIS) \|\| defined(C_BEOS)
13462674	while((dent = (struct dirent )readdir_r(dd, (struct dirent )&result))) {
f10460ed	#else
fdc6adbe	while((dent = (struct dirent )readdir_r(dd, (struct dirent )result))) { #endif #else /!HAVE_READDIR_R/
f10460ed	while((dent = readdir(dd))) { #endif char fullname[NAME_MAX + 1]; FILE *fin; char buffer[BUFSIZ]; int nblanks; if(dent->d_ino == 0) continue; if(strncmp(filename, dent->d_name, strlen(filename)) != 0) continue; sprintf(fullname, "%s/%s", PARTIAL_DIR, dent->d_name); fin = fopen(fullname, "rb"); if(fin == NULL) {
3a0f75c6	cli_errmsg("Can't open '%s' for reading", fullname);
f10460ed	fclose(fout); unlink(outname); free(id); free(total); free(number); closedir(dd); return -1; } nblanks = 0; while(fgets(buffer, sizeof(buffer), fin) != NULL) /* * Ensure that trailing newlines * aren't copied / if(buffer[0] == '\n') { nblanks++; } else { if(nblanks) do putc('\n', fout); while(--nblanks > 0); fputs(buffer, fout); } fclose(fin); / FIXME: don't unlink if leave temps */ unlink(fullname); break; } rewinddir(dd); } closedir(dd); fclose(fout); } free(number); } free(id); free(total); return 0; } #endif
55a3f03b	#ifdef FOLLOWURLS
71ba1dcd	static void checkURLs(message m, const char dir) { blob *b = messageToBlob(m); size_t len;
87c9313e	table_t *t;
15021325	int i, n;
f121cb96	#if defined(WITH_CURL) && defined(CL_THREAD_SAFE) pthread_t tid[MAX_URLS]; struct arg args[MAX_URLS]; #endif
15021325	tag_arguments_t hrefs;
71ba1dcd	if(b == NULL) return; len = blobGetDataSize(b);
22080fa5	if(len == 0) { blobDestroy(b);
55a3f03b	return;
22080fa5	}
55a3f03b
87c9313e	/* TODO: make this size customisable / if(len > 1001024) { cli_warnmsg("Viruses pointed to by URL not scanned in large message\n"); blobDestroy(b);
22080fa5	return;
87c9313e	}
22080fa5	blobClose(b);
87c9313e	t = tableCreate();
3499d81e	if(t == NULL) { blobDestroy(b); return; }
55a3f03b
d77c655d	hrefs.count = 0; hrefs.tag = hrefs.value = NULL;
15021325	cli_dbgmsg("checkURLs: calling html_normalise_mem\n");
3499d81e	if(!html_normalise_mem(blobGetData(b), len, NULL, &hrefs)) {
ef822cfc	blobDestroy(b); tableDestroy(t); return;
3499d81e	} cli_dbgmsg("checkURLs: html_normalise_mem returned\n");
b143af46	/* TODO: Do we need to call remove_html_comments? */
87c9313e
15021325	n = 0; for(i = 0; i < hrefs.count; i++) { const char url = hrefs.value[i]; if(strncasecmp("http://", url, 7) == 0) { char ptr;
f121cb96	#ifdef WITH_CURL #ifndef CL_THREAD_SAFE struct arg arg; #endif #else /!WITH_CURL/
6eedb434	#ifdef CL_THREAD_SAFE static pthread_mutex_t system_mutex = PTHREAD_MUTEX_INITIALIZER; #endif
87c9313e	struct stat statb; char cmd[512];
f121cb96	#endif /WITH_CURL/
2c7d1edd	char name[NAME_MAX + 1];
15021325	if(tableFind(t, url) == 1) { cli_dbgmsg("URL %s already downloaded\n", url);
4f1d0bfc	continue; }
b2223aad	if(n == MAX_URLS) { cli_warnmsg("Not all URLs will be scanned\n"); break; }
15021325	(void)tableInsert(t, url, 1); cli_dbgmsg("Downloading URL %s to be scanned\n", url); strncpy(name, url, sizeof(name)); for(ptr = name; ptr; ptr++) if(ptr == '/') *ptr = '_';
71ba1dcd
65684cec	#ifdef WITH_CURL
f121cb96	#ifdef CL_THREAD_SAFE
2c7d1edd	args[n].dir = dir; args[n].url = url;
f121cb96	args[n].filename = strdup(name); pthread_create(&tid[n], NULL, getURL, &args[n]); #else
15021325	arg.url = url;
f121cb96	arg.dir = dir; arg.filename = name; getURL(&arg); #endif
9b4bb8b7	#else /* * TODO: maximum size and timeouts */
15021325	snprintf(cmd, sizeof(cmd), "GET -t10 %s > %s/%s 2>/dev/null", url, dir, name);
71ba1dcd	cli_dbgmsg("%s\n", cmd);
6eedb434	#ifdef CL_THREAD_SAFE pthread_mutex_lock(&system_mutex); #endif
71ba1dcd	system(cmd);
6eedb434	#ifdef CL_THREAD_SAFE pthread_mutex_unlock(&system_mutex); #endif snprintf(cmd, sizeof(cmd), "%s/%s", dir, name); if(stat(cmd, &statb) >= 0) if(statb.st_size == 0) {
15021325	cli_warnmsg("URL %s failed to download\n", url);
6eedb434	/* * Don't bother scanning an empty file */ (void)unlink(cmd); }
9b4bb8b7	#endif
f121cb96	++n;
71ba1dcd	} } blobDestroy(b);
4f1d0bfc	tableDestroy(t);
f121cb96	#if defined(WITH_CURL) && defined(CL_THREAD_SAFE)
ba867aed	assert(n <= MAX_URLS);
f121cb96	cli_dbgmsg("checkURLs: waiting for %d thread(s) to finish\n", n); while(--n >= 0) { pthread_join(tid[n], NULL); free(args[n].filename); } #endif
2c7d1edd	html_tag_arg_free(&hrefs);
71ba1dcd	}
65684cec	#ifdef WITH_CURL
f121cb96	static void * #ifdef CL_THREAD_SAFE getURL(void a) #else getURL(struct arg arg) #endif
9b4bb8b7	{ char *fout;
87c9313e	CURL *curl;
9b4bb8b7	FILE *fp;
87c9313e	struct curl_slist *headers; static int initialised = 0;
f121cb96	#ifdef CL_THREAD_SAFE static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER; struct arg arg = (struct arg )a; #endif const char url = arg->url; const char dir = arg->dir; const char *filename = arg->filename;
9b4bb8b7
f121cb96	#ifdef CL_THREAD_SAFE pthread_mutex_lock(&init_mutex); #endif
87c9313e	if(!initialised) {
f121cb96	if(curl_global_init(CURL_GLOBAL_NOTHING) != 0) { #ifdef CL_THREAD_SAFE pthread_mutex_unlock(&init_mutex); #endif return NULL; }
87c9313e	initialised = 1;
9b4bb8b7	}
f121cb96	#ifdef CL_THREAD_SAFE pthread_mutex_unlock(&init_mutex); #endif
87c9313e	/* easy isn't the word I'd use... */ curl = curl_easy_init(); if(curl == NULL)
f121cb96	return NULL;
65684cec
87c9313e	(void)curl_easy_setopt(curl, CURLOPT_USERAGENT, "www.clamav.net"); if(curl_easy_setopt(curl, CURLOPT_URL, url) != 0)
f121cb96	return NULL;
87c9313e
9b4bb8b7	fout = cli_malloc(strlen(dir) + strlen(filename) + 2);
65684cec	if(fout == NULL) { curl_easy_cleanup(curl);
f121cb96	return NULL;
65684cec	}
9b4bb8b7
2c7d1edd	snprintf(fout, NAME_MAX, "%s/%s", dir, filename);
9b4bb8b7	fp = fopen(fout, "w"); if(fp == NULL) {
3a0f75c6	cli_errmsg("Can't open '%s' for writing", fout);
9b4bb8b7	free(fout);
65684cec	curl_easy_cleanup(curl);
f121cb96	return NULL;
9b4bb8b7	}
7e492164	#ifdef CURLOPT_WRITEDATA
f121cb96	if(curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp) != 0) { fclose(fp); free(fout); curl_easy_cleanup(curl); return NULL; }
7e492164	#else if(curl_easy_setopt(curl, CURLOPT_FILE, fp) != 0) { fclose(fp); free(fout); curl_easy_cleanup(curl); return NULL; } #endif
f121cb96
87c9313e	/*
55a3f03b	* If an item is in squid's cache get it from there (TCP_HIT/200)
87c9313e	* by default curl doesn't (TCP_CLIENT_REFRESH_MISS/200) */ headers = curl_slist_append(NULL, "Pragma:"); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
9b4bb8b7
87c9313e	/* These should be customisable */ curl_easy_setopt(curl, CURLOPT_TIMEOUT, 30); curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10);
393a6d67	#ifdef CURLOPT_MAXFILESIZE curl_easy_setopt(curl, CURLOPT_MAXFILESIZE, 50*1024); #endif
9b4bb8b7
f121cb96	#ifdef CL_THREAD_SAFE
00d46ae6	#ifdef CURLOPT_DNS_USE_GLOBAL_CACHE
f121cb96	curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0); #endif
00d46ae6	#endif
15f4aa67	/* * Prevent password: prompting with older versions * FIXME: a better username? */
cd483c9b	curl_easy_setopt(curl, CURLOPT_USERPWD, "username:password");
15f4aa67
f121cb96	/* * FIXME: valgrind reports "pthread_mutex_unlock: mutex is not locked" * from gethostbyaddr_r within this. It may be a bug in libcurl * rather than this code, but I need to check, see Curl_resolv() * If pushed really hard it will sometimes say * Conditional jump or move depends on uninitialised value(s) and * quit. But the program seems to work OK without valgrind... * Perhaps Curl_resolv() isn't thread safe? */
87c9313e	if(curl_easy_perform(curl) != CURLE_OK) { cli_warnmsg("URL %s failed to download\n", url); unlink(fout); } fclose(fp);
f121cb96	curl_slist_free_all(headers);
87c9313e	curl_easy_cleanup(curl); free(fout);
f121cb96	return NULL;
9b4bb8b7	} #endif #else static void checkURLs(message m, const char dir) { } #endif
f2f25418	#ifdef HAVE_BACKTRACE
4f1d0bfc	static void
02927896	sigsegv(int sig) { signal(SIGSEGV, SIG_DFL);
f2f25418	print_trace(1);
02927896	exit(SIGSEGV); }
4f1d0bfc	static void
02927896	print_trace(int use_syslog) { void array[10]; size_t size; char *strings; size_t i; pid_t pid = getpid(); size = backtrace(array, 10); strings = backtrace_symbols(array, size); if(use_syslog == 0) cli_dbgmsg("Backtrace of pid %d:\n", pid);
f73920a4	else
02927896	syslog(LOG_ERR, "Backtrace of pid %d:", pid); for(i = 0; i < size; i++) if(use_syslog)
767f16ab	syslog(LOG_ERR, "bt[%d]: %s", (int)i, strings[i]);
02927896	else cli_dbgmsg("%s\n", strings[i]);
ef822cfc	/* TODO: dump the current email */
02927896	free(strings); } #endif