GitList

libclamav/mbox.c

b151ef55	/* * Copyright (C) 2002 Nigel Horne <njh@bandsman.co.uk> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
7cef72ea	* * Change History: * $Log: mbox.c,v $
e279f3ea	* Revision 1.189 2004/11/27 14:39:01 nigelhorne * Honour section 7.2.6 of RFC1521 *
9f43cc75	* Revision 1.188 2004/11/27 14:17:35 nigelhorne * Handle attachments before the first mime section *
ef3cf57d	* Revision 1.187 2004/11/27 13:16:56 nigelhorne * uuencode failures no longer fatal *
a1c924f9	* Revision 1.186 2004/11/27 11:59:28 nigelhorne * Handle comments in the command part of headers *
39b5a552	* Revision 1.185 2004/11/26 23:00:29 nigelhorne * Handle spaces after the final MIME boundary and binHex attachments after that boundary *
30fb8a0b	* Revision 1.184 2004/11/26 21:51:48 nigelhorne * Scan uuencodes after the final MIME section *
699fafc3	* Revision 1.183 2004/11/26 17:32:42 nigelhorne * Add debug message for end of multipart headers *
aa479b7d	* Revision 1.182 2004/11/26 12:05:17 nigelhorne * Remove warning message *
0856891e	* Revision 1.181 2004/11/22 15:18:51 nigelhorne * Performance work *
c29ebe66	* Revision 1.180 2004/11/19 11:32:16 nigelhorne * Scan email footers (portions after the last MIME boundary *
8386482b	* Revision 1.179 2004/11/18 18:09:07 nigelhorne * First draft of binhex.c *
2ed1bc5a	* Revision 1.178 2004/11/15 13:58:50 nigelhorne * Fix obscure chance of memory leak *
9180b8bb	* Revision 1.177 2004/11/12 22:22:21 nigelhorne * Performance speeded up *
d85c1fad	* Revision 1.176 2004/11/12 09:41:45 nigelhorne * Parial mode now on by default *
ad642304	* Revision 1.175 2004/11/11 22:15:46 nigelhorne * Rewrite handling of folded headers *
0a94ffaf	* Revision 1.174 2004/11/10 10:08:45 nigelhorne * Fix escaped parenthesis in rfc822 comments *
74ca33e9	* Revision 1.173 2004/11/09 19:40:06 nigelhorne * Find uuencoded files in preambles to multipart messages *
39d09964	* Revision 1.172 2004/11/09 13:33:38 nigelhorne * Tidy *
2176c0e5	* Revision 1.171 2004/11/09 12:24:32 nigelhorne * Better handling of mail-follow-urls when CURL is not installed *
d768ac5a	* Revision 1.170 2004/11/09 10:08:02 nigelhorne * Added basic handling of folded headers in the main message *
28ea5910	* Revision 1.169 2004/11/08 16:27:09 nigelhorne * Fix crash with correctly encoded uuencode files *
802c37fc	* Revision 1.168 2004/11/08 10:26:22 nigelhorne * Fix crash if x-yencode is mistakenly guessed *
ad3d1172	* Revision 1.167 2004/11/07 16:59:42 nigelhorne * Tidy *
5e5a162c	* Revision 1.166 2004/11/07 16:39:00 nigelhorne * Handle para 4 of RFC2231 *
0eb8bafc	* Revision 1.165 2004/11/06 21:43:23 nigelhorne * Fix possible segfault in handling broken RFC2047 headers *
a77dc192	* Revision 1.164 2004/11/04 10:13:41 nigelhorne * Rehashed readdir_r patch *
e0377124	* Revision 1.163 2004/10/31 09:28:56 nigelhorne * Handle unbalanced quotes in multipart headers *
cbc2eaa9	* Revision 1.162 2004/10/24 04:35:15 nigelhorne * Handle multipart/knowbot as multipart/mixed *
2c7b958d	* Revision 1.161 2004/10/21 10:18:40 nigelhorne * PARTIAL: readdir_r even more options :-( *
67a25177	* Revision 1.160 2004/10/21 09:41:07 nigelhorne * PARTIAL: add readdir_r fix to BeOS *
3a0946f5	* Revision 1.159 2004/10/20 10:35:41 nigelhorne * Partial mode: fix possible stack corruption with Solaris *
e2a46f19	* Revision 1.158 2004/10/17 09:29:21 nigelhorne * Advise to report broken emails *
9fc8173e	* Revision 1.157 2004/10/16 20:53:28 nigelhorne * Tidy up *
db09f781	* Revision 1.156 2004/10/16 19:09:39 nigelhorne * Handle BeMail (BeOS) files *
5f72fd3b	* Revision 1.155 2004/10/16 17:23:04 nigelhorne * Handle colons in quotes in headers *
4fc38d69	* Revision 1.154 2004/10/16 09:01:05 nigelhorne * Improved handling of wraparound headers *
95f98162	* Revision 1.153 2004/10/14 21:18:49 nigelhorne * Harden the test for RFC2047 encoded headers *
291ac47f	* Revision 1.152 2004/10/14 17:45:13 nigelhorne * RFC2047 on long lines produced by continuation headers *
138b73f6	* Revision 1.151 2004/10/10 11:10:20 nigelhorne * Remove perror - replace with cli_errmsg *
6736d46f	* Revision 1.150 2004/10/09 08:01:37 nigelhorne * Needs libcurl >= 7.11 *
f8c25c7a	* Revision 1.149 2004/10/06 17:21:30 nigelhorne * Fix RFC2298 handling broken by RFC1341 code *
9a7398ee	* Revision 1.148 2004/10/05 15:41:53 nigelhorne * First draft of code to handle RFC1341 *
b62a19da	* Revision 1.147 2004/10/04 12:18:09 nigelhorne * Better warning message about PGP attachments not being scanned *
c3400886	* Revision 1.146 2004/10/04 10:52:39 nigelhorne * Better error message on RFC2047 decode error *
5eeffbb9	* Revision 1.145 2004/10/01 13:49:22 nigelhorne * Minor code tidy *
e94471f4	* Revision 1.144 2004/10/01 07:55:36 nigelhorne * Better error message on message/partial *
a95c894a	* Revision 1.143 2004/09/30 21:47:35 nigelhorne * Removed unneeded strdups *
37819555	* Revision 1.142 2004/09/28 18:40:12 nigelhorne * Use stack rather than heap where possible *
d28e1902	* Revision 1.141 2004/09/23 08:43:25 nigelhorne * Scan multipart/digest messages *
c07de365	* Revision 1.140 2004/09/22 16:09:51 nigelhorne * Build if CURLOPT_DNS_USE_GLOBAL_CACHE isn't supported *
d6e30cce	* Revision 1.139 2004/09/22 15:49:13 nigelhorne * Handle RFC2298 messages *
66df01fa	* Revision 1.138 2004/09/22 15:21:50 nigelhorne * Fix typo *
02406150	* Revision 1.137 2004/09/21 20:47:38 nigelhorne * FOLLOWURL: Set a default username and password for password protected pages *
05ea2522	* Revision 1.136 2004/09/21 12:18:52 nigelhorne * Fallback to CURLOPT_FILE if CURLOPT_WRITEDATA isn't defined *
897fd9c7	* Revision 1.135 2004/09/21 08:14:00 nigelhorne * Now compiles in machines with libcurl but without threads *
74c6f514	* Revision 1.134 2004/09/20 17:08:43 nigelhorne * Some performance enhancements *
137740e1	* Revision 1.133 2004/09/20 12:44:03 nigelhorne * Fix parsing error on mime arguments *
e9bdeb72	* Revision 1.132 2004/09/20 08:31:56 nigelhorne * FOLLOWURLS now compiled if libcurl is found *
4d9c0ca8	* Revision 1.131 2004/09/18 14:59:25 nigelhorne * Code tidy *
6fd6d771	* Revision 1.130 2004/09/17 10:56:29 nigelhorne * Handle multiple content-type headers and use the most likely *
2bcec72b	* Revision 1.129 2004/09/17 09:48:53 nigelhorne * Handle attempts to hide mime type *
90905415	* Revision 1.128 2004/09/17 09:09:44 nigelhorne * Better handling of RFC822 comments *
0674e2af	* Revision 1.127 2004/09/16 18:00:43 nigelhorne * Handle RFC2047 *
de509b8e	* Revision 1.126 2004/09/16 14:23:57 nigelhorne * Handle quotes around mime type *
31b05bcb	* Revision 1.125 2004/09/16 12:59:36 nigelhorne * Handle = and space as header separaters *
21cd233d	* Revision 1.124 2004/09/16 11:20:33 nigelhorne * Better handling of folded headers in multipart messages *
56d8328d	* Revision 1.123 2004/09/16 08:56:19 nigelhorne * Handle RFC822 Comments *
3a0ef2ee	* Revision 1.122 2004/09/15 22:09:26 nigelhorne * Handle spaces before colons *
0e3b08fc	* Revision 1.121 2004/09/15 18:08:23 nigelhorne * Handle multiple encoding types *
a2d786fc	* Revision 1.120 2004/09/15 08:47:07 nigelhorne * Cleaner way to initialise hrefs *
6da40aa1	* Revision 1.119 2004/09/14 20:47:28 nigelhorne * Use new normalise code *
73b2c34c	* Revision 1.118 2004/09/14 12:09:37 nigelhorne * Include old normalise code *
06d4e856	* Revision 1.117 2004/09/13 16:44:01 kojm * minor cleanup *
e745ac7e	* Revision 1.116 2004/09/13 13:16:28 nigelhorne * Return CL_EFORMAT on bad format *
7d3d11d0	* Revision 1.115 2004/09/06 11:02:08 nigelhorne * Normalise HTML before scanning for URLs to download *
b4cb4486	* Revision 1.114 2004/09/03 15:59:00 nigelhorne * Handle boundary= "foo" *
f1c33aa0	* Revision 1.113 2004/08/26 09:33:20 nigelhorne * Scan Communigate Pro files *
a446de17	* Revision 1.112 2004/08/23 13:15:16 nigelhorne * messageClearMarkers *
565c449d	* Revision 1.111 2004/08/22 20:20:14 nigelhorne * Tidy *
e6b25cd3	* Revision 1.110 2004/08/22 15:08:59 nigelhorne * messageExport *
1e06e1ab	* Revision 1.109 2004/08/22 10:34:24 nigelhorne * Use fileblob *
de617e3e	* Revision 1.108 2004/08/21 11:57:57 nigelhorne * Use line.[ch] *
c408cfa5	* Revision 1.107 2004/08/20 04:55:07 nigelhorne * FOLLOWURL *
c9ae17be	* Revision 1.106 2004/08/20 04:53:18 nigelhorne * Tidy up *
314ff77b	* Revision 1.105 2004/08/18 21:35:08 nigelhorne * Multithread the FollowURL calls *
3eb12bae	* Revision 1.104 2004/08/18 15:53:43 nigelhorne * Honour CL_MAILURL *
28498221	* Revision 1.103 2004/08/18 10:49:45 nigelhorne * CHECKURLs was mistakenly turned on *
da812a6a	* Revision 1.102 2004/08/18 07:45:20 nigelhorne * Use configure WITH_CURL value *
49674596	* Revision 1.101 2004/08/17 08:28:32 nigelhorne * Support multitype/fax-message *
6b93ea0c	* Revision 1.100 2004/08/12 10:36:09 nigelhorne * LIBCURL completed *
88771ffa	* Revision 1.99 2004/08/11 15:28:39 nigelhorne * No longer needs curl.h *
f2b068fb	* Revision 1.98 2004/08/11 14:46:22 nigelhorne * Better handling of false positive emails *
0fba2555	* Revision 1.97 2004/08/10 14:02:22 nigelhorne * * empty log message * *
3fa72383	* Revision 1.96 2004/08/10 08:14:00 nigelhorne * Enable CHECKURL *
5431ebba	* Revision 1.95 2004/08/09 21:37:21 kojm * libclamav: add new option CL_MAILURL *
bf6f653d	* Revision 1.94 2004/08/09 08:26:36 nigelhorne * Thread safe checkURL *
c5ed8336	* Revision 1.93 2004/08/08 21:30:47 nigelhorne * First draft of CheckURL *
bac2c10a	* Revision 1.92 2004/08/08 19:13:14 nigelhorne * Better handling of bounces *
d32343c3	* Revision 1.91 2004/08/04 18:59:19 nigelhorne * Tidy up multipart handling *
79e432d2	* Revision 1.90 2004/07/26 17:02:56 nigelhorne * Fix crash when debugging on SPARC *
9c70ef30	* Revision 1.89 2004/07/26 09:12:12 nigelhorne * Fix crash when debugging on Solaris *
285a69b4	* Revision 1.88 2004/07/20 14:35:29 nigelhorne * Some MYDOOM.I were getting through *
f91f55e0	* Revision 1.87 2004/07/19 17:54:40 kojm * Use new patter matching algorithm. Cleanup. *
80a8c7d8	* Revision 1.86 2004/07/06 09:32:45 nigelhorne * Better handling of Gibe.3 boundary exploit *
61db35a1	* Revision 1.85 2004/06/30 19:48:58 nigelhorne * Some TR.Happy99.SKA were getting through *
89e9a596	* Revision 1.84 2004/06/30 14:30:40 nigelhorne * Fix compilation error on Solaris *
d1382234	* Revision 1.83 2004/06/28 11:44:45 nigelhorne * Remove empty parts *
735377bc	* Revision 1.82 2004/06/25 13:56:38 nigelhorne * Optimise messages without other messages encapsulated within them *
f84fe2e8	* Revision 1.81 2004/06/24 21:36:38 nigelhorne * Plug memory leak with large number of attachments *
784e2335	* Revision 1.80 2004/06/23 16:23:25 nigelhorne * Further empty line optimisation *
98685ac1	* Revision 1.79 2004/06/22 04:08:01 nigelhorne * Optimise empty lines *
006f738e	* Revision 1.78 2004/06/21 10:21:19 nigelhorne * Fix crash when a multipart/mixed message contains many parts that need to be scanned as attachments *
6613d595	* Revision 1.77 2004/06/18 10:07:12 nigelhorne * Allow any number of alternatives in multipart messages *
8a88fb93	* Revision 1.76 2004/06/16 08:07:39 nigelhorne * Added thread safety *
93002b48	* Revision 1.75 2004/06/14 09:07:10 nigelhorne * Handle spam using broken e-mail generators for multipart/alternative *
7b8fb055	* Revision 1.74 2004/06/09 18:18:59 nigelhorne * Find uuencoded viruses in multipart/mixed that have no start of message boundaries *
4b0a2de6	* Revision 1.73 2004/05/14 08:15:55 nigelhorne * Use mkstemp on cygwin *
a750c93c	* Revision 1.72 2004/05/12 11:20:37 nigelhorne * More bounce message false positives handled *
92915cee	* Revision 1.71 2004/05/10 11:35:11 nigelhorne * No need to update mbox.c for cli_filetype problem
0b244177	*
2e0f78a6	* Revision 1.69 2004/05/06 11:26:49 nigelhorne * Force attachments marked as RFC822 messages to be scanned *
3db105a2	* Revision 1.68 2004/04/29 08:59:24 nigelhorne * Tidied up SetDispositionType *
7584963d	* Revision 1.67 2004/04/23 10:47:41 nigelhorne * If an inline text portion has a filename treat is as an attachment *
bf497d0a	* Revision 1.66 2004/04/14 08:32:21 nigelhorne * When debugging print the email number in mailboxes *
7baeb4a6	* Revision 1.65 2004/04/07 18:18:07 nigelhorne * Some occurances of W97M.Lexar were let through *
4465fb04	* Revision 1.64 2004/04/05 09:32:20 nigelhorne * Added SCAN_TO_DISC define *
4c927f11	* Revision 1.63 2004/04/01 15:32:34 nigelhorne * Graceful exit if messageAddLine fails in strdup *
6638be41	* Revision 1.62 2004/03/31 17:00:20 nigelhorne * Code tidy up free memory earlier *
74b5c349	* Revision 1.61 2004/03/30 22:45:13 nigelhorne * Better handling of multipart/multipart messages *
ffd59a3e	* Revision 1.60 2004/03/29 09:22:03 nigelhorne * Tidy up code and reduce shuffling of data *
c95ae98b	* Revision 1.59 2004/03/26 11:08:36 nigelhorne * Use cli_writen *
02c9dc2a	* Revision 1.58 2004/03/25 22:40:46 nigelhorne * Removed even more calls to realloc and some duplicated code *
627465e7	* Revision 1.57 2004/03/21 17:19:49 nigelhorne * Handle bounce messages with no headers *
f5a4d7e8	* Revision 1.56 2004/03/21 09:41:26 nigelhorne * Faster scanning for non MIME messages *
3e556ea8	* Revision 1.55 2004/03/20 17:39:23 nigelhorne * First attempt to handle all bounces *
a980b067	* Revision 1.54 2004/03/19 15:40:45 nigelhorne * Handle empty content-disposition types *
af852ae0	* Revision 1.53 2004/03/19 08:08:02 nigelhorne * If a message part of a multipart contains an RFC822 message that has no encoding don't scan it *
b759d5eb	* Revision 1.52 2004/03/18 21:51:41 nigelhorne * If a message only contains a single RFC822 message that has no encoding don't save for scanning *
bad123c6	* Revision 1.51 2004/03/17 19:48:12 nigelhorne * Improved embedded RFC822 message handling *
09ccd6e0	* Revision 1.50 2004/03/10 22:05:39 nigelhorne * Fix seg fault when a message in a multimessage mailbox fails to scan *
b0d8b0db	* Revision 1.49 2004/03/04 13:01:58 nigelhorne * Ensure all bounces are rescanned by cl_mbox *
6e07998e	* Revision 1.48 2004/02/27 12:16:26 nigelhorne * Catch lines just containing ':' *
39ff42ee	* Revision 1.47 2004/02/23 10:13:08 nigelhorne * Handle spaces before : in headers *
1d53a315	* Revision 1.46 2004/02/18 13:29:19 nigelhorne * Stop buffer overflows for files with very long suffixes *
26564cf5	* Revision 1.45 2004/02/18 10:07:40 nigelhorne * Find some Yaha *
c7256385	* Revision 1.44 2004/02/15 08:45:54 nigelhorne * Avoid scanning the same file twice *
0704dad8	* Revision 1.43 2004/02/14 19:04:05 nigelhorne * Handle spaces in boundaries *
0dbec6b9	* Revision 1.42 2004/02/14 17:23:45 nigelhorne * Had deleted O_BINARY by mistake *
d32e668f	* Revision 1.41 2004/02/12 18:43:58 nigelhorne * Use mkstemp on Solaris *
a66ca28a	* Revision 1.40 2004/02/11 08:15:59 nigelhorne * Use O_BINARY for cygwin *
8b242bb9	* Revision 1.39 2004/02/06 13:46:08 kojm * Support for clamav-config.h *
b9ec1705	* Revision 1.38 2004/02/04 13:29:48 nigelhorne * Handle partial writes - and print when write fails *
0bf1353d	* Revision 1.37 2004/02/03 22:54:59 nigelhorne * Catch another example of Worm.Dumaru.Y *
a64bf87e	* Revision 1.36 2004/02/02 09:52:57 nigelhorne * Some instances of Worm.Dumaru.Y got through the net *
5a01973c	* Revision 1.35 2004/01/28 10:15:24 nigelhorne * Added support to scan some bounce messages *
5c7cf3f1	* Revision 1.34 2004/01/24 17:43:37 nigelhorne * Removed (incorrect) warning about uninitialised variable *
2250ea69	* Revision 1.33 2004/01/23 10:38:22 nigelhorne * Fixed memory leak in handling some multipart messages *
4e7ca2b1	* Revision 1.32 2004/01/23 08:51:19 nigelhorne * Add detection of uuencoded viruses in single part multipart/mixed files *
9a35912c	* Revision 1.31 2004/01/22 22:13:06 nigelhorne * Prevent infinite recursion on broken uuencoded files *
8c0250d5	* Revision 1.30 2004/01/13 10:12:05 nigelhorne * Remove duplicate code when handling multipart messages *
0ada8f3e	* Revision 1.29 2004/01/09 18:27:11 nigelhorne * ParseMimeHeader could corrupt arg *
7e572372	* Revision 1.28 2004/01/09 15:07:42 nigelhorne * Re-engineered update 1.11 lost in recent changes *
68badbc1	* Revision 1.27 2004/01/09 14:45:59 nigelhorne * Removed duplicated code in multipart handler *
852e3ce4	* Revision 1.26 2004/01/09 10:20:54 nigelhorne * Locate uuencoded viruses hidden in text poritions of multipart/mixed mime messages *
441992ed	* Revision 1.25 2004/01/06 14:41:18 nigelhorne * Handle headers which do not not have a space after the ':' *
f54a8635	* Revision 1.24 2003/12/20 13:55:36 nigelhorne * Ensure multipart just save the bodies of attachments *
68be129f	* Revision 1.23 2003/12/14 18:07:01 nigelhorne * Some viruses in embedded messages were not being found *
062ba8b0	* Revision 1.22 2003/12/13 16:42:23 nigelhorne * call new cli_chomp *
7fca6080	* Revision 1.21 2003/12/11 14:35:48 nigelhorne * Better handling of encapsulated messages *
f5e9abc8	* Revision 1.20 2003/12/06 04:03:26 nigelhorne * Handle hand crafted emails that incorrectly set multipart headers *
2227f20e	* Revision 1.19 2003/11/21 07:26:31 nigelhorne * Scan multipart alternatives that have no boundaries, finds some uuencoded happy99 *
181c7548	* Revision 1.18 2003/11/17 08:13:21 nigelhorne * Handle spaces at the end of lines of MIME headers *
04421a14	* Revision 1.17 2003/11/06 05:06:42 nigelhorne * Some applications weren't being scanned *
295e425f	* Revision 1.16 2003/11/04 08:24:00 nigelhorne * Handle multipart messages that have no text portion *
07cbf822	* Revision 1.15 2003/10/12 20:13:49 nigelhorne * Use NO_STRTOK_R consistent with message.c *
fdc8a467	* Revision 1.14 2003/10/12 12:37:11 nigelhorne * Appledouble encoded EICAR now found *
4674dc9a	* Revision 1.13 2003/10/01 09:27:42 nigelhorne * Handle content-type header going over to a new line *
6ecba059	* Revision 1.12 2003/09/29 17:10:19 nigelhorne * Moved stub from heap to stack since its maximum size is known *
47ab99fa	* Revision 1.11 2003/09/29 12:58:32 nigelhorne * Handle Content-Type: /; name="eicar.com" *
7cef72ea	* Revision 1.10 2003/09/28 10:06:34 nigelhorne * Compilable under SCO; removed duplicate code with message.c *
b151ef55	*/
e279f3ea	static char const rcsid[] = "$Id: mbox.c,v 1.189 2004/11/27 14:39:01 nigelhorne Exp $";
8b242bb9	#if HAVE_CONFIG_H #include "clamav-config.h" #endif
b151ef55	#ifndef CL_DEBUG
897fd9c7	#define NDEBUG /* map CLAMAV debug onto standard */
b151ef55	#endif #ifdef CL_THREAD_SAFE
f5e9abc8	#ifndef _REENTRANT
b151ef55	#define _REENTRANT /* for Solaris 2.8 */ #endif
f5e9abc8	#endif
b151ef55	#include <stdio.h> #include <stdlib.h> #include <errno.h> #include <assert.h> #include <string.h> #include <strings.h> #include <ctype.h> #include <time.h> #include <unistd.h> #include <fcntl.h> #include <sys/stat.h> #include <sys/types.h>
0bcad2b1	#include <sys/param.h>
b151ef55	#include <clamav.h>
9a7398ee	#include <dirent.h>
67a25177	#include <limits.h>
b151ef55
a77dc192	#if defined(HAVE_READDIR_R_3) \|\| defined(HAVE_READDIR_R_2) #include <stddef.h> #endif
8a88fb93	#ifdef CL_THREAD_SAFE #include <pthread.h> #endif
b151ef55	#include "table.h" #include "mbox.h" #include "blob.h"
de617e3e	#include "line.h"
b151ef55	#include "text.h" #include "message.h" #include "others.h" #include "defaults.h"
7fca6080	#include "str.h"
b151ef55
98685ac1	#ifdef CL_DEBUG #if __GLIBC__ == 2 && __GLIBC_MINOR__ >= 1 #define HAVE_BACKTRACE #endif
89e9a596	#endif
98685ac1	#ifdef HAVE_BACKTRACE #include <execinfo.h> #include <signal.h> #include <syslog.h> static void sigsegv(int sig); static void print_trace(int use_syslog); #endif
07cbf822	#if defined(NO_STRTOK_R) \|\| !defined(CL_THREAD_SAFE)
b151ef55	#undef strtok_r #undef __strtok_r #define strtok_r(a,b,c) strtok(a,b) #endif /* required for AIX and Tru64 */ #ifdef TRUE #undef TRUE #endif #ifdef FALSE #undef FALSE #endif
6b93ea0c	typedef enum { FALSE = 0, TRUE = 1 } bool;
3fa72383	#define SAVE_TO_DISC /* multipart/message are saved in a temporary file */
49674596
e9bdeb72	/* * Code does exist to run FOLLORURLS on systems without libcurl, however that * is not recommended so it is not compiled by default / #ifdef WITH_CURL #define FOLLOWURLS /
da812a6a	* If an email contains URLs, check them - helps to * find Dialer.gen-45
6b93ea0c	*/
e9bdeb72	#endif
3fa72383
3eb12bae	#ifdef FOLLOWURLS
da812a6a
6da40aa1	#include "htmlnorm.h"
da812a6a	#define MAX_URLS 5 /*
49674596	* Maximum number of URLs scanned in a message * part */
da812a6a	#ifdef WITH_CURL /* Set in configure / / * To build with WITH_CURL: * LDFLAGS=`curl-config --libs` ./configure ... */
88771ffa	#include <curl/curl.h>
6736d46f	/* * Needs curl >= 7.11 (I've heard that 7.9 can cause crashes and 7.10 is * untested) / #if (LIBCURL_VERSION_MAJOR < 7) #undef WITH_CURL / also undef FOLLOWURLS? */
88771ffa	#endif
3eb12bae
138b73f6	#if (LIBCURL_VERSION_MAJOR == 7) && (LIBCURL_VERSION_MINOR < 10)
6736d46f	#undef WITH_CURL /* also undef FOLLOWURLS? / #endif #endif /WITH_CURL*/
3eb12bae	#else /!FOLLOWURLS/ #undef WITH_CURL
6736d46f	#endif /FOLLOWURLS/
88771ffa
9a7398ee	/*
c29ebe66	* Define this to handle messages covered by section 7.3.2 of RFC1341.
9a7398ee	* This is experimental code so it is up to YOU to (1) ensure it's secure
291ac47f	* (2) periodically trim the directory of old files * * If you use the load balancing feature of clamav-milter to run clamd on
d85c1fad	* more than one machine you must make sure that .../partial is on a shared
291ac47f	* network filesystem
9a7398ee	*/
d85c1fad	#define PARTIAL_DIR
9a7398ee
de617e3e	static message parseEmailHeaders(const message m, const table_t *rfc821Table);
8c0250d5	static int parseEmailHeader(message m, const char line, const table_t *rfc821Table);
565c449d	static int parseEmailBody(message messageIn, text textIn, const char dir, const table_t rfc821Table, const table_t *subtypeTable, unsigned int options);
b151ef55	static int boundaryStart(const char line, const char boundary); static int endOfMessage(const char line, const char boundary); static int initialiseTables(table_t rfc821Table, table_t subtypeTable); static int getTextPart(message const messages[], size_t size); static size_t strip(char buf, int len); static bool continuationMarker(const char line); static int parseMimeHeader(message m, const char cmd, const table_t rfc821Table, const char *arg);
5a01973c	static void saveTextPart(message m, const char dir);
0674e2af	static char rfc2047(const char in);
90905415	static char rfc822comments(const char in);
9a7398ee	#ifdef PARTIAL_DIR static int rfc1341(message m, const char dir); #endif
3fa72383
c5ed8336	static void checkURLs(message m, const char dir);
da812a6a	#ifdef WITH_CURL
314ff77b	struct arg {
a95c894a	const char url; const char dir;
314ff77b	char filename; }; #ifdef CL_THREAD_SAFE static void getURL(void a); #else static void getURL(struct arg *arg); #endif
3fa72383	#endif
b151ef55	/* Maximum line length according to RFC821 / #define LINE_LENGTH 1000 / Hashcodes for our hash tables / #define CONTENT_TYPE 1 #define CONTENT_TRANSFER_ENCODING 2 #define CONTENT_DISPOSITION 3 / Mime sub types / #define PLAIN 1 #define ENRICHED 2 #define HTML 3 #define RICHTEXT 4 #define MIXED 5 #define ALTERNATIVE 6 #define DIGEST 7 #define SIGNED 8 #define PARALLEL 9 #define RELATED 10 / RFC2387 / #define REPORT 11 / RFC1892 */
fdc8a467	#define APPLEDOUBLE 12 /* Handling of this in only noddy for now */
49674596	#define FAX MIXED /* * RFC3458 * Drafts stated to treat is as mixed if it is * not known. This disappeared in the final * version (except when talking about * voice-message), but it is good enough for us * since we do no validation of coversheet * presence etc. (which also has disappeared * in the final version) */
b62a19da	#define ENCRYPTED 13 /* * e.g. RFC2015 * Content-Type: multipart/encrypted; * boundary="nextPart1383049.XCRrrar2yq"; * protocol="application/pgp-encrypted" */
db09f781	#define X_BFILE RELATED /* * BeOS, expert two parts: the file and it's * attributes. The attributes part comes as * Content-Type: application/x-be_attribute * name="foo" * I can't find where it is defined, any * pointers would be appreciated. For now * we treat it as multipart/related */
cbc2eaa9	#define KNOWBOT 14 /* Unknown and undocumented format? */
b151ef55	static const struct tableinit { const char *key; int value; } rfc821headers[] = {
68badbc1	/* TODO: make these regular expressions */
b759d5eb	{ "Content-Type", CONTENT_TYPE },
39ff42ee	{ "Content-Transfer-Encoding", CONTENT_TRANSFER_ENCODING }, { "Content-Disposition", CONTENT_DISPOSITION },
b151ef55	{ NULL, 0 }
4fc38d69	}, mimeSubtypes[] = { /* see RFC2045 */
b151ef55	/* subtypes of Text / { "plain", PLAIN }, { "enriched", ENRICHED }, { "html", HTML }, { "richtext", RICHTEXT }, / subtypes of Multipart */ { "mixed", MIXED }, { "alternative", ALTERNATIVE }, { "digest", DIGEST }, { "signed", SIGNED }, { "parallel", PARALLEL }, { "related", RELATED }, { "report", REPORT },
fdc8a467	{ "appledouble", APPLEDOUBLE },
49674596	{ "fax-message", FAX },
b62a19da	{ "encrypted", ENCRYPTED },
db09f781	{ "x-bfile", X_BFILE }, /* BeOS */
cbc2eaa9	{ "knowbot", KNOWBOT }, /* ??? / { "knowbot-metadata", KNOWBOT }, / ??? / { "knowbot-code", KNOWBOT }, / ??? / { "knowbot-state", KNOWBOT }, / ??? */
b151ef55	{ NULL, 0 } };
8a88fb93	#ifdef CL_THREAD_SAFE static pthread_mutex_t tables_mutex = PTHREAD_MUTEX_INITIALIZER; #endif
b151ef55
7cef72ea	/* Maximum filenames under various systems / #ifndef NAME_MAX / e.g. Linux / #ifdef MAXNAMELEN / e.g. Solaris / #define NAME_MAX MAXNAMELEN #else #ifdef FILENAME_MAX / e.g. SCO */ #define NAME_MAX FILENAME_MAX #endif #endif #endif
0dbec6b9	#ifndef O_BINARY #define O_BINARY 0 #endif
b151ef55	/* * TODO: when signal handling is added, need to remove temp files when a
7d3d11d0	* signal is received
b151ef55	* TODO: add option to scan in memory not via temp files, perhaps with a
74b5c349	* named pipe or memory mapped file, though this won't work on big e-mails * containing many levels of encapsulated messages - it'd just take too much * RAM
c6259ac5	* TODO: parse .msg format files
fdc8a467	* TODO: fully handle AppleDouble format, see
7d3d11d0	* http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
f54a8635	* TODO: ensure parseEmailHeaders is always called before parseEmailBody * TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody
f1c33aa0	* TODO: Look into TNEF. Is there anything that needs to be done here?
b151ef55	*/ int
5431ebba	cli_mbox(const char *dir, int desc, unsigned int options)
b151ef55	{
c6259ac5	int retcode, i;
f54a8635	message m, body;
b151ef55	FILE *fd;
802c37fc	char buffer[LINE_LENGTH + 1];
0e3b08fc	#ifdef HAVE_BACKTRACE
98685ac1	void (*segv)(int); #endif
49674596	static table_t rfc821, subtype;
b151ef55	cli_dbgmsg("in mbox()\n");
c6259ac5	i = dup(desc); if((fd = fdopen(i, "rb")) == NULL) { cli_errmsg("Can't open descriptor %d\n", desc); close(i);
7d3d11d0	return CL_EOPEN;
c6259ac5	}
802c37fc	if(fgets(buffer, sizeof(buffer) - 1, fd) == NULL) {
c6259ac5	/* empty message */ fclose(fd);
7d3d11d0	return CL_CLEAN;
c6259ac5	}
b151ef55	m = messageCreate();
7b8fb055	if(m == NULL) {
c6259ac5	fclose(fd);
7d3d11d0	return CL_EMEM;
7b8fb055	}
8a88fb93	#ifdef CL_THREAD_SAFE pthread_mutex_lock(&tables_mutex); #endif
49674596	if(rfc821 == NULL) { assert(subtype == NULL);
7b8fb055
49674596	if(initialiseTables(&rfc821, &subtype) < 0) { rfc821 = NULL; subtype = NULL;
8a88fb93	#ifdef CL_THREAD_SAFE pthread_mutex_unlock(&tables_mutex); #endif
7b8fb055	messageDestroy(m); fclose(fd);
7d3d11d0	return CL_EMEM;
7b8fb055	}
b151ef55	}
8a88fb93	#ifdef CL_THREAD_SAFE pthread_mutex_unlock(&tables_mutex); #endif
b151ef55
89e9a596	#ifdef HAVE_BACKTRACE
98685ac1	segv = signal(SIGSEGV, sigsegv); #endif
f54a8635	/* * is it a UNIX style mbox with more than one * mail message, or just a single mail message? */ if(strncmp(buffer, "From ", 5) == 0) {
b151ef55	/*
c6259ac5	* Have been asked to check a UNIX style mbox file, which * may contain more than one e-mail message to decode
b151ef55	*/
f54a8635	bool lastLineWasEmpty = FALSE;
bf497d0a	int messagenumber = 1;
b151ef55
c6259ac5	do { /cli_dbgmsg("read: %s", buffer);/
b151ef55
f54a8635	cli_chomp(buffer); if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) {
bf497d0a	cli_dbgmsg("Deal with email number %d\n", messagenumber++);
b151ef55	/*
f54a8635	* End of a message in the mail box
b151ef55	*/
de617e3e	body = parseEmailHeaders(m, rfc821);
f2b068fb	if(body == NULL) { messageReset(m); continue; }
f54a8635	messageDestroy(m); if(messageGetBody(body))
565c449d	if(!parseEmailBody(body, NULL, dir, rfc821, subtype, options)) {
09ccd6e0	messageReset(body); m = body; continue; }
b151ef55	/*
f54a8635	* Starting a new message, throw away all the * information about the old one
b151ef55	*/
f54a8635	m = body; messageReset(body);
b151ef55
c6259ac5	cli_dbgmsg("Finished processing message\n");
f54a8635	} else
a66ca28a	lastLineWasEmpty = (bool)(buffer[0] == '\0');
de617e3e	if(messageAddStr(m, buffer) < 0)
4c927f11	break;
802c37fc	} while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL);
bf497d0a	cli_dbgmsg("Deal with email number %d\n", messagenumber);
f2b068fb	} else {
7fca6080	/* * It's a single message, parse the headers then the body
f2b068fb	* Ignore blank lines at the start of the message */
f1c33aa0	if(strncmp(buffer, "P I ", 4) == 0) /* * CommuniGate Pro format: ignore headers until * blank line */
802c37fc	while((fgets(buffer, sizeof(buffer) - 1, fd) != NULL) &&
f1c33aa0	(strchr("\r\n", buffer[0]) == NULL)) ; /* * Ignore any blank lines at the top of the message */
f2b068fb	while(strchr("\r\n", buffer[0]) &&
802c37fc	(fgets(buffer, sizeof(buffer) - 1, fd) != NULL))
6b93ea0c	;
802c37fc	buffer[LINE_LENGTH] = '\0';
f2b068fb	/* * FIXME: files full of new lines and nothing else are * handled ungracefully...
7fca6080	*/
de617e3e	do {
b759d5eb	/*
4465fb04	* TODO: this needlessly creates a message object, * it'd be better if parseEmailHeaders could also * read in from a file. I do not want to lump the * parseEmailHeaders code here, that'd be a duplication * of code I want to avoid
b759d5eb	*/
de617e3e	(void)cli_chomp(buffer); if(messageAddStr(m, buffer) < 0)
4c927f11	break;
802c37fc	} while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL);
f2b068fb	}
7fca6080
b151ef55	fclose(fd);
7d3d11d0	/* * This is not necessarily true, but since the only options are * CL_CLEAN and CL_VIRUS this is the better choice. It would be * nice to have CL_CONTINUESCANNING or something like that */ retcode = CL_CLEAN;
c6259ac5
de617e3e	body = parseEmailHeaders(m, rfc821);
f54a8635	messageDestroy(m);
f2b068fb	if(body) { /* * Write out the last entry in the mailbox */ if(messageGetBody(body))
565c449d	if(!parseEmailBody(body, NULL, dir, rfc821, subtype, options))
e745ac7e	retcode = CL_EFORMAT;
b151ef55
f2b068fb	/* * Tidy up and quit */ messageDestroy(body); }
b151ef55	cli_dbgmsg("cli_mbox returning %d\n", retcode);
89e9a596	#ifdef HAVE_BACKTRACE
98685ac1	signal(SIGSEGV, segv); #endif
b151ef55	return retcode; } /*
7fca6080	* The given message contains a raw e-mail. * * This function parses the headers of m and sets the message's arguments
68be129f	* * Returns the message's body with the correct arguments set
735377bc	* * The downside of this approach is that for a short time we have two copies * of the message in memory, the upside is that it makes for easier parsing * of encapsulated messages, and in the long run uses less memory in those * scenarios
7fca6080	*/
68be129f	static message *
de617e3e	parseEmailHeaders(const message m, const table_t rfc821)
7fca6080	{
68be129f	bool inHeader = TRUE;
9fc8173e	bool contMarker = FALSE;
de617e3e	const text *t;
f54a8635	message *ret;
f2b068fb	bool anyHeadersFound = FALSE;
4fc38d69	int commandNumber = -1;
d768ac5a	char *fullline = NULL;
ad642304	size_t fulllinelength = 0;
f54a8635
98685ac1	cli_dbgmsg("parseEmailHeaders\n");
f54a8635	if(m == NULL) return NULL; ret = messageCreate();
7fca6080
de617e3e	for(t = messageGetBody(m); t; t = t->t_next) { const char *buffer;
7fca6080
de617e3e	if(t->t_line) buffer = lineGetData(t->t_line); else
98685ac1	buffer = NULL;
7fca6080
a1c924f9	cli_dbgmsg("parseEmailHeaders: check '%s'\n", buffer ? buffer : "");
b4cb4486	if(inHeader) {
9180b8bb	if((buffer == NULL) && !contMarker) {
b4cb4486	/* * A blank line signifies the end of the header * and the start of the text */
7fca6080	cli_dbgmsg("End of header information\n");
d32343c3	inHeader = FALSE;
ad642304	} else {
0856891e	char *ptr;
ad642304	const char *qptr; int quotes;
0856891e	if(buffer == NULL) { contMarker = FALSE; continue; }
ad642304	if(fullline == NULL) {
9180b8bb	char cmd[LINE_LENGTH + 1]; /* * Continuation of line we're ignoring? / if((buffer[0] == '\t') \|\| (buffer[0] == ' ') \|\| contMarker) { contMarker = continuationMarker(buffer); continue; } / * Is this a header we're interested in? */
0856891e	if((strchr(buffer, ':') == NULL) \|\| (cli_strtokbuf(buffer, 0, ":", cmd) == NULL)) { if(strncmp(buffer, "From ", 5) == 0) anyHeadersFound = TRUE;
9180b8bb	continue;
0856891e	}
9180b8bb
a1c924f9	ptr = rfc822comments(cmd); commandNumber = tableFind(rfc821, ptr ? ptr : cmd); if(ptr) free(ptr);
9180b8bb	switch(commandNumber) { case CONTENT_TRANSFER_ENCODING: case CONTENT_DISPOSITION: case CONTENT_TYPE:
0856891e	anyHeadersFound = TRUE;
9180b8bb	break; default:
0856891e	if(strcasecmp(cmd, "From") == 0) anyHeadersFound = TRUE; else if(strcasecmp(cmd, "Received") == 0) anyHeadersFound = TRUE; else if(strcasecmp(cmd, "De") == 0) anyHeadersFound = TRUE;
9180b8bb	continue; } fullline = strdup(buffer); fulllinelength = strlen(buffer) + 1; } else if(buffer) { fulllinelength += strlen(buffer); fullline = cli_realloc(fullline, fulllinelength); strcat(fullline, buffer);
ad642304	}
3a0946f5
9fc8173e	contMarker = continuationMarker(buffer);
9180b8bb
ad642304	if(contMarker) continue; if(t->t_next && (t->t_next->t_line != NULL)) { const char next = lineGetData(t->t_next->t_line); /
0856891e	* Section B.2 of RFC822 says TAB or * SPACE means a continuation of the * previous entry.
ad642304	* * Add all the arguments on the line / if((next[0] == '\t') \|\| (next[0] == ' ')) continue; } quotes = 0; for(qptr = buffer; qptr; qptr++) if(*qptr == '\"') quotes++;
9180b8bb	if(quotes & 1)
ad642304	continue; ptr = rfc822comments(fullline); if(ptr) { free(fullline); fullline = ptr; }
37819555
ad642304	if(parseEmailHeader(ret, fullline, rfc821) < 0) continue;
b4cb4486
9180b8bb	free(fullline); fullline = NULL;
d32343c3	}
9180b8bb	} else
09ccd6e0	/cli_dbgmsg("Add line to body '%s'\n", buffer);/
de617e3e	if(messageAddLine(ret, t->t_line) < 0)
80a8c7d8	break;
ffd59a3e	}
68be129f
d768ac5a	if(fullline) {
ad642304	if(*fullline) switch(commandNumber) { case CONTENT_TRANSFER_ENCODING: case CONTENT_DISPOSITION: case CONTENT_TYPE: cli_warnmsg("parseEmailHeaders: Fullline set '%s' - report to bugs@clamav.net\n", fullline); }
d768ac5a	free(fullline); }
f2b068fb	if(!anyHeadersFound) { /* * False positive in believing we have an e-mail when we don't */ messageDestroy(ret); cli_dbgmsg("parseEmailHeaders: no headers found, assuming it isn't an email\n"); return NULL; }
4465fb04	messageClean(ret);
09ccd6e0	cli_dbgmsg("parseEmailHeaders: return\n");
68be129f	return ret;
7fca6080	} /*
8c0250d5	* Handle a header line of an email message */ static int
49674596	parseEmailHeader(message m, const char line, const table_t *rfc821)
8c0250d5	{
de509b8e	char *cmd;
8c0250d5	int ret = -1; #ifdef CL_THREAD_SAFE char *strptr; #endif
31b05bcb	const char *separater;
0674e2af	char *copy, tokenseparater[2];
8c0250d5
0704dad8	cli_dbgmsg("parseEmailHeader '%s'\n", line);
31b05bcb	/* * In RFC822 the separater between the key a value is a colon, * e.g. Content-Transfer-Encoding: base64 * However some MUA's are lapse about this and virus writers exploit * this hole, so we need to check all known possiblities / for(separater = ":= "; separater; separater++) if(strchr(line, separater) != NULL) break; if(separater == '\0')
74b5c349	return -1;
0674e2af	copy = rfc2047(line); if(copy == NULL) return -1;
d1382234
31b05bcb	tokenseparater[0] = *separater; tokenseparater[1] = '\0';
897fd9c7	#ifdef CL_THREAD_SAFE
31b05bcb	cmd = strtok_r(copy, tokenseparater, &strptr);
897fd9c7	#else cmd = strtok(copy, tokenseparater); #endif
8c0250d5
3a0ef2ee	if(cmd && (strstrip(cmd) > 0)) {
897fd9c7	#ifdef CL_THREAD_SAFE
8c0250d5	char *arg = strtok_r(NULL, "", &strptr);
897fd9c7	#else char *arg = strtok(NULL, ""); #endif
8c0250d5	if(arg) /* * Found a header such as * Content-Type: multipart/mixed; * set arg to be * "multipart/mixed" and cmd to
39ff42ee	* be "Content-Type"
8c0250d5	*/
49674596	ret = parseMimeHeader(m, cmd, rfc821, arg);
8c0250d5	}
0674e2af	free(copy);
8c0250d5	return ret; } /*
b151ef55	* This is a recursive routine. *
7fca6080	* This function parses the body of mainMessage and saves its attachments in dir *
68be129f	* mainMessage is the buffer to be parsed, it contains an e-mail's body, without
d32343c3	* any headers. First time of calling it'll be * the whole message. Later it'll be parts of a multipart message
b151ef55	* textIn is the plain text message being built up so far *
0bcad2b1	* Returns:
b151ef55	* 0 for fail
852e3ce4	* 1 for success, attachments saved * 2 for success, attachments not saved
b151ef55	/ static int / success or fail */
565c449d	parseEmailBody(message messageIn, text textIn, const char dir, const table_t rfc821Table, const table_t *subtypeTable, unsigned int options)
b151ef55	{
6613d595	message *messages; / parts of a multipart message */
30fb8a0b	int inMimeHead, i, rc = 1, htmltextPart, multiparts = 0;
b151ef55	text aText; const char cptr;
2250ea69	message *mainMessage;
565c449d	fileblob *fb;
b151ef55
565c449d	cli_dbgmsg("in parseEmailBody\n");
b151ef55	aText = textIn;
6613d595	messages = NULL;
2250ea69	mainMessage = messageIn;
b151ef55	/* Anything left to be parsed? */
0bcad2b1	if(mainMessage && (messageGetBody(mainMessage) != NULL)) {
b151ef55	mime_type mimeType;
30fb8a0b	int subtype, inhead;
b62a19da	const char mimeSubtype, boundary; char *protocol;
b151ef55	const text *t_line;
f5e9abc8	/bool isAlternative;/
b151ef55	message *aMessage;
c6259ac5	cli_dbgmsg("Parsing mail file\n");
b151ef55	mimeType = messageGetMimeType(mainMessage); mimeSubtype = messageGetMimeSubtype(mainMessage);
5eeffbb9	subtype = tableFind(subtypeTable, mimeSubtype); if((mimeType == TEXT) && (subtype == PLAIN)) {
b151ef55	/* * This is effectively no encoding, notice that we * don't check that charset is us-ascii */ cli_dbgmsg("assume no encoding\n"); mimeType = NOMIME;
5eeffbb9	messageSetMimeSubtype(mainMessage, NULL);
b151ef55	}
c6259ac5	cli_dbgmsg("mimeType = %d\n", mimeType);
b151ef55	switch(mimeType) { case NOMIME: aText = textAddMessage(aText, mainMessage); break; case TEXT:
5eeffbb9	if(subtype == PLAIN)
89e9a596	/* * Consider what to do if this fails * (i.e. aText == NULL): * We mustn't just return since that could * cause a virus to be missed that we * could be capable of scanning. Ignoring * the error is probably the safest, we may be * able to scan anyway and we lose nothing */
b151ef55	aText = textCopy(messageGetBody(mainMessage));
5eeffbb9	else if((options&CL_SCAN_MAILURL) && (subtype == HTML)) checkURLs(mainMessage, dir);
b151ef55	break; case MULTIPART: boundary = messageFindArgument(mainMessage, "boundary"); if(boundary == NULL) { cli_warnmsg("Multipart MIME message contains no boundaries\n");
2227f20e	/* Broken e-mail message / mimeType = NOMIME; / * The break means that we will still * check if the file contains a uuencoded file */ break;
b151ef55	}
cbc2eaa9	/* Perhaps it should assume mixed? */
93002b48	if(mimeSubtype[0] == '\0') { cli_warnmsg("Multipart has no subtype assuming alternative\n"); mimeSubtype = "alternative"; messageSetMimeSubtype(mainMessage, "alternative"); }
b151ef55	/* * Get to the start of the first message */
0704dad8	t_line = messageGetBody(mainMessage); if(t_line == NULL) { cli_warnmsg("Multipart MIME message has no body\n"); free((char *)boundary); mimeType = NOMIME; break; } do
74ca33e9	if(t_line->t_line) { if(boundaryStart(lineGetData(t_line->t_line), boundary)) break; /*
0856891e	* Found a uuencoded/binhex file before the first multipart * TODO: check yEnc
74ca33e9	*/
0856891e	if(uuencodeBegin(mainMessage) == t_line) {
74ca33e9	if(messageGetEncoding(mainMessage) == NOENCODING) { messageSetEncoding(mainMessage, "x-uuencode"); fb = messageToFileblob(mainMessage, dir); if(fb) fileblobDestroy(fb); }
0856891e	} else if(binhexBegin(mainMessage) == t_line) { if(messageGetEncoding(mainMessage) == NOENCODING) { messageSetEncoding(mainMessage, "x-binhex"); fb = messageToFileblob(mainMessage, dir); if(fb) fileblobDestroy(fb); }
9f43cc75	} else if(encodingLine(mainMessage) == t_line->t_next) { /* * We look for the next line * since later on we'll skip * over the important line when * we think it's a blank line * at the top of the message - * which it would have been in * an RFC compliant world */ cli_dbgmsg("Found MIME attachment before the first MIME section\n"); if(messageGetEncoding(mainMessage) == NOENCODING) break;
0856891e	}
74ca33e9	}
0704dad8	while((t_line = t_line->t_next) != NULL);
b151ef55	if(t_line == NULL) {
b4cb4486	cli_dbgmsg("Multipart MIME message contains no boundary lines\n");
bf8ea488	/* * Free added by Thomas Lamy * <Thomas.Lamy@in-online.net> / free((char )boundary);
2227f20e	mimeType = NOMIME; /* * The break means that we will still * check if the file contains a uuencoded file */ break;
b151ef55	} /* * Build up a table of all of the parts of this * multipart message. Remember, each part may itself * be a multipart message. */ inhead = 1; inMimeHead = 0;
68be129f	/*
b62a19da	* Parse the mainMessage object and create an array * of objects called messages, one for each of the * multiparts that mainMessage contains *
68be129f	* This looks like parseEmailHeaders() - maybe there's * some duplication of code to be cleaned up */
6613d595	for(multiparts = 0; t_line; multiparts++) {
26564cf5	int lines = 0;
79e432d2	message **m;
26564cf5
79e432d2	m = cli_realloc(messages, ((multiparts + 1) * sizeof(message *)));
d32343c3	if(m == NULL)
79e432d2	break; messages = m;
6613d595
b151ef55	aMessage = messages[multiparts] = messageCreate();
89e9a596	if(aMessage == NULL) { multiparts--; continue; }
b151ef55	cli_dbgmsg("Now read in part %d\n", multiparts);
0bf1353d	/* * Ignore blank lines. There shouldn't be ANY * but some viruses insert them */
98685ac1	while((t_line = t_line->t_next) != NULL)
de617e3e	if(t_line->t_line && /(cli_chomp(t_line->t_text) > 0))/ (strlen(lineGetData(t_line->t_line)) > 0))
784e2335	break;
0bf1353d	if(t_line == NULL) { cli_dbgmsg("Empty part\n");
61db35a1	/* * Remove this part unless there's * a uuencoded portion somewhere in * the complete message that we may * throw away by mistake if the MIME * encoding information is incorrect */ if(uuencodeBegin(mainMessage) == NULL) { messageDestroy(aMessage); --multiparts; }
0bf1353d	continue; } do {
de617e3e	const char *line = lineGetData(t_line->t_line);
b151ef55
39b5a552	/*printf("inMimeHead %d inhead %d boundary '%s' line '%s' next '%s'\n",
30fb8a0b	inMimeHead, inhead, boundary, line, t_line->t_next && t_line->t_next->t_line ? lineGetData(t_line->t_next->t_line) : "(null)");*/
b151ef55
e0377124	if(inMimeHead) { /* continuation line */
98685ac1	if(line == NULL) {
699fafc3	/inhead =/ inMimeHead = 0;
98685ac1	continue; }
7baeb4a6	/* * Handle continuation lines * because the previous line
21cd233d	* ended with a ; or this line * starts with a white space
7baeb4a6	*/
21cd233d	cli_dbgmsg("Multipart %d: About to add mime Argument '%s'\n", multiparts, line);
7baeb4a6	/* * Handle the case when it * isn't really a continuation * line: * Content-Type: application/octet-stream; * Content-Transfer-Encoding: base64 */ parseEmailHeader(aMessage, line, rfc821Table);
b151ef55	while(isspace((int)line)) line++; if(line == '\0') { inhead = inMimeHead = 0; continue; } /* * This may cause a trailing ';' * to be added if this test * fails - TODO: verify this */ inMimeHead = continuationMarker(line); messageAddArgument(aMessage, line);
e0377124	} else if(inhead) { /* handling normal headers */
ad642304	char *ptr;
98685ac1	if(line == NULL) { /* empty line */
699fafc3	cli_dbgmsg("Multipart %d: End of header information\n", multiparts);
b151ef55	inhead = 0; continue; }
a64bf87e	if(isspace((int)line)) { / * The first line is * continuation line. * This is tricky * to handle, but * all we can do is our * best / cli_dbgmsg("Part %d starts with a continuation line\n", multiparts); messageAddArgument(aMessage, line); / * Give it a default * MIME type since * that may be the * missing line * * Choose application to * force a save */ if(messageGetMimeType(aMessage) == NOMIME) messageSetMimeType(aMessage, "application"); continue; }
b151ef55	/* * Some clients are broken and * put white space after the ; */ inMimeHead = continuationMarker(line);
de617e3e	if(!inMimeHead) { const text *next = t_line->t_next;
ad642304	char *fullline;
e0377124	int quotes = 0; const char *qptr;
de617e3e
37819555	assert(strlen(line) <= LINE_LENGTH);
e0377124
ad642304	fullline = rfc822comments(line); if(fullline == NULL) fullline = strdup(line); for(qptr = fullline; *qptr; qptr++)
e0377124	if(*qptr == '\"') quotes++;
21cd233d	/* * Fold next lines to the end of this * if they start with a white space
e0377124	* or if this line has an odd number of quotes: * Content-Type: application/octet-stream; name="foo * "
21cd233d	*/ while(next && next->t_line) {
de617e3e	const char *data = lineGetData(next->t_line);
21cd233d
e0377124	if((!isspace(data[0])) && ((quotes & 1) == 0))
21cd233d	break; ptr = cli_realloc(fullline, strlen(fullline) + strlen(data) + 1); if(ptr == NULL) break;
de617e3e
21cd233d	fullline = ptr; strcat(fullline, data);
e0377124	for(qptr = data; qptr; qptr++) if(qptr == '\"') quotes++;
21cd233d	t_line = next; next = next->t_next;
de617e3e	}
21cd233d	cli_dbgmsg("Multipart %d: About to parse folded header '%s'\n", multiparts, fullline);
68badbc1
21cd233d	parseEmailHeader(aMessage, fullline, rfc821Table); free(fullline); } else { cli_dbgmsg("Multipart %d: About to parse header '%s'\n", multiparts, line);
ad642304	ptr = rfc822comments(line); parseEmailHeader(aMessage, (ptr) ? ptr : line, rfc821Table); if(ptr) free(ptr);
21cd233d	}
b151ef55	} else if(endOfMessage(line, boundary)) { /* * Some viruses put information * after the end of message, * which presumably some broken * mail clients find, so we * can't assume that this * is the end of the message / / t_line = NULL;*/ break;
30fb8a0b	} else if(boundaryStart(line, boundary)) { inhead = 1; break;
26564cf5	} else {
de617e3e	if(messageAddLine(aMessage, t_line->t_line) < 0)
79e432d2	break;
26564cf5	lines++; }
0bf1353d	} while((t_line = t_line->t_next) != NULL);
b151ef55	messageClean(aMessage);
26564cf5	cli_dbgmsg("Part %d has %d lines\n", multiparts, lines);
b151ef55	} free((char *)boundary);
6638be41	/*
cbc2eaa9	* Preprocess. Anything special to be done before * we handle the multiparts?
b62a19da	*/
cbc2eaa9	switch(tableFind(subtypeTable, mimeSubtype)) { case KNOWBOT: /* TODO */ cli_dbgmsg("multipart/knowbot parsed as multipart/mixed for now\n"); mimeSubtype = "mixed"; break;
e279f3ea	case -1: /* * According to section 7.2.6 of * RFC1521, unrecognised multiparts * should be treated as multipart/mixed. */ cli_warnmsg("Unsupported multipart format `%s', parsed as mixed\n", mimeSubtype); mimeSubtype = "mixed"; break;
cbc2eaa9	}
b62a19da	/*
6638be41	* We've finished message we're parsing */ if(mainMessage && (mainMessage != messageIn)) { messageDestroy(mainMessage); mainMessage = NULL;
2250ea69	}
b151ef55
6613d595	if(multiparts == 0) { if(messages) free(messages);
6638be41	return 2; /* Nothing to do */
6613d595	}
6638be41
b151ef55	cli_dbgmsg("The message has %d parts\n", multiparts);
49674596	cli_dbgmsg("Find out the multipart type (%s)\n", mimeSubtype);
b151ef55
b62a19da	/* * We now have all the parts of the multipart message * in the messages array: * message messages[multiparts] Let's decide what to do with them all */
b151ef55	switch(tableFind(subtypeTable, mimeSubtype)) { case RELATED:
68be129f	cli_dbgmsg("Multipart related handler\n");
b151ef55	/*
295e425f	* Have a look to see if there's HTML code * which will need scanning
b151ef55	*/ aMessage = NULL; assert(multiparts > 0);
0bcad2b1	htmltextPart = getTextPart(messages, multiparts);
b151ef55
0bcad2b1	if(htmltextPart >= 0) aText = textAddMessage(aText, messages[htmltextPart]);
b151ef55	else /*
295e425f	* There isn't an HTML bit. If there's a * multipart bit, it'll may be in there * somewhere
b151ef55	*/ for(i = 0; i < multiparts; i++) if(messageGetMimeType(messages[i]) == MULTIPART) { aMessage = messages[i];
0bcad2b1	htmltextPart = i;
b151ef55	break; }
74c6f514	if(htmltextPart == -1)
295e425f	cli_dbgmsg("No HTML code found to be scanned");
74c6f514	else {
565c449d	rc = parseEmailBody(aMessage, aText, dir, rfc821Table, subtypeTable, options);
74c6f514	if(rc == 1) { assert(aMessage == messages[htmltextPart]); messageDestroy(aMessage); messages[htmltextPart] = NULL; } }
b151ef55	/* * Fixed based on an idea from Stephen White <stephen@earth.li> * The message is confused about the difference * between alternative and related. Badtrans.B * suffers from this problem. * * Fall through in this case: * Content-Type: multipart/related; * type="multipart/alternative" */
f5e9abc8	/* * Changed to always fall through based on * an idea from Michael Dankov <misha@btrc.ru> * that some viruses are completely confused * about the difference between related * and mixed / /cptr = messageFindArgument(mainMessage, "type");
b151ef55	if(cptr == NULL) break; isAlternative = (bool)(strcasecmp(cptr, "multipart/alternative") == 0); free((char *)cptr); if(!isAlternative)
f5e9abc8	break;*/
d28e1902	case DIGEST: /* * According to section 5.1.5 RFC2046, the * default mime type of multipart/digest parts * is message/rfc822 * * We consider them as alternative, wrong in * the strictest sense since they aren't * alternatives - all parts a valid - but it's * OK for our needs since it means each part * will be scanned */
b151ef55	case ALTERNATIVE: cli_dbgmsg("Multipart alternative handler\n"); /* * Fall through - some clients are broken and * say alternative instead of mixed. The Klez
e279f3ea	* virus is broken that way, and anyway we * wish to scan all of the alternatives
b151ef55	/ case REPORT: / * According to section 1 of RFC1892, the * syntax of multipart/report is the same * as multipart/mixed. There are some required * parameters, but there's no need for us to * verify that they exist */ case MIXED:
fdc8a467	case APPLEDOUBLE: /* not really supported */
b151ef55	/* * Look for attachments * * Not all formats are supported. If an * unsupported format turns out to be * common enough to implement, it is a simple * matter to add it */
2250ea69	if(aText) { if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
b151ef55	mainMessage = NULL;
2250ea69	}
b151ef55	cli_dbgmsg("Mixed message with %d parts\n", multiparts); for(i = 0; i < multiparts; i++) { bool addAttachment = FALSE; bool addToText = FALSE; const char *dtype;
565c449d	#ifndef SAVE_TO_DISC
f54a8635	message *body;
d32343c3	#endif
b151ef55	aMessage = messages[i];
74c6f514	if(aMessage == NULL) continue;
b151ef55	cli_dbgmsg("Mixed message part %d is of type %d\n", i, messageGetMimeType(aMessage)); switch(messageGetMimeType(aMessage)) { case APPLICATION:
c6259ac5	addAttachment = TRUE;
b151ef55	break; case NOMIME:
c29ebe66	cli_dbgmsg("No mime headers found in multipart part %d\n", i);
7b8fb055	if(mainMessage) {
39b5a552	if(uuencodeBegin(aMessage)) {
7b8fb055	cli_dbgmsg("Found uuencoded message in multipart/mixed mainMessage\n"); messageSetEncoding(mainMessage, "x-uuencode");
1e06e1ab	fb = messageToFileblob(mainMessage, dir);
7b8fb055
1e06e1ab	if(fb) fileblobDestroy(fb);
7b8fb055	} if(mainMessage != messageIn) messageDestroy(mainMessage); mainMessage = NULL;
30fb8a0b	} else if(aMessage) {
39b5a552	if(uuencodeBegin(aMessage)) {
30fb8a0b	cli_dbgmsg("Found uuencoded message in multipart/mixed non mime part\n"); messageSetEncoding(aMessage, "x-uuencode"); fb = messageToFileblob(aMessage, dir); if(fb) fileblobDestroy(fb); assert(aMessage == messages[i]); messageReset(messages[i]);
39b5a552	} else if(binhexBegin(aMessage)) { cli_dbgmsg("Found binhex message in multipart/mixed non mime part\n"); messageSetEncoding(aMessage, "x-binhex"); fb = messageToFileblob(aMessage, dir); if(fb) fileblobDestroy(fb); assert(aMessage == messages[i]); messageReset(messages[i]);
30fb8a0b	}
7b8fb055	}
b151ef55	addToText = TRUE; if(messageGetBody(aMessage) == NULL) /* * No plain text version */
de617e3e	messageAddStr(aMessage, "No plain text alternative");
b151ef55	assert(messageGetBody(aMessage) != NULL); break; case TEXT:
ef3cf57d	dtype = messageGetDispositionType(aMessage);
852e3ce4	cli_dbgmsg("Mixed message text part disposition \"%s\"\n", dtype);
b151ef55	if(strcasecmp(dtype, "attachment") == 0) addAttachment = TRUE; else if((*dtype == '\0') \|\| (strcasecmp(dtype, "inline") == 0)) {
2250ea69	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
b151ef55	mainMessage = NULL;
ef3cf57d	cptr = messageGetMimeSubtype(aMessage);
e6b25cd3	cli_dbgmsg("Mime subtype \"%s\"\n", cptr);
ef3cf57d	if(uuencodeBegin(aMessage)) {
852e3ce4	cli_dbgmsg("Found uuencoded message in multipart/mixed text portion\n"); messageSetEncoding(aMessage, "x-uuencode"); addAttachment = TRUE;
c9ae17be	} else if(tableFind(subtypeTable, cptr) == PLAIN) {
7584963d	char *filename;
852e3ce4	/* * Strictly speaking
c9ae17be	* a text/plain part is
852e3ce4	* not an attachment. We * pretend it is so that * we can decode and * scan it */
7584963d	filename = (char )messageFindArgument(aMessage, "filename"); if(filename == NULL) filename = (char )messageFindArgument(aMessage, "name"); if(filename == NULL) { cli_dbgmsg("Adding part to main message\n"); addToText = TRUE; } else { cli_dbgmsg("Treating %s as attachment\n", filename); free(filename); addAttachment = TRUE; }
852e3ce4	} else {
06d4e856	if(options&CL_SCAN_MAILURL)
c9ae17be	if(tableFind(subtypeTable, cptr) == HTML) checkURLs(aMessage, dir);
b151ef55	messageAddArgument(aMessage, "filename=textportion"); addAttachment = TRUE; } } else {
b4cb4486	cli_dbgmsg("Text type %s is not supported\n", dtype);
b151ef55	continue; } break; case MESSAGE:
2e0f78a6	/* Content-Type: message/rfc822 */
b151ef55	cli_dbgmsg("Found message inside multipart\n");
ffd59a3e	if(encodingLine(aMessage) == NULL) { assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL;
af852ae0	continue;
ffd59a3e	}
de617e3e	messageAddStrAtTop(aMessage,
0856891e	"Received: by clamd (message/rfc822)");
4465fb04	#ifdef SAVE_TO_DISC /* * Save this embedded message * to a temporary file / saveTextPart(aMessage, dir); assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL; #else / * Scan in memory, faster but * is open to DoS attacks when * many nested levels are * involved. */
735377bc	body = parseEmailHeaders(aMessage, rfc821Table, TRUE);
bad123c6	/* * We've fininished with the * original copy of the message, * so throw that away and * deal with the encapsulated * message as a message. * This can save a lot of memory */ assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL;
f54a8635	if(body) {
565c449d	rc = parseEmailBody(body, NULL, dir, rfc821Table, subtypeTable, options);
f54a8635	messageDestroy(body); }
4465fb04	#endif
b151ef55	continue; case MULTIPART: /* * It's a multi part within a multi part * Run the message parser on this bit, it won't * be an attachment */ cli_dbgmsg("Found multipart inside multipart\n");
d32343c3	if(aMessage) { /* * The headers were parsed when reading in the * whole multipart section */
565c449d	rc = parseEmailBody(aMessage, aText, dir, rfc821Table, subtypeTable, options);
d32343c3	cli_dbgmsg("Finished recursion\n"); assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL;
f54a8635	} else {
565c449d	rc = parseEmailBody(NULL, NULL, dir, rfc821Table, subtypeTable, options);
2250ea69	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
f54a8635	mainMessage = NULL; }
b151ef55	continue; case AUDIO: case IMAGE:
c7256385	case VIDEO:
b151ef55	addAttachment = TRUE; break; default:
c7256385	cli_warnmsg("Only text and application attachments are supported, type = %d\n",
b151ef55	messageGetMimeType(aMessage)); continue; } /* * It must be either text or * an attachment. It can't be both */ assert(addToText \|\| addAttachment); assert(!(addToText && addAttachment));
c29ebe66	if(addToText) { cli_dbgmsg("Adding to non mime-part\n");
b151ef55	aText = textAdd(aText, messageGetBody(aMessage));
c29ebe66	} else {
565c449d	fb = messageToFileblob(aMessage, dir);
b151ef55
1e06e1ab	if(fb) fileblobDestroy(fb);
b151ef55	}
6638be41	assert(aMessage == messages[i]); messageDestroy(messages[i]); messages[i] = NULL;
b151ef55	}
565c449d	/* rc = parseEmailBody(NULL, NULL, dir, rfc821Table, subtypeTable, options); */
b151ef55	break; case SIGNED: case PARALLEL: /* * If we're here it could be because we have a * multipart/mixed message, consisting of a * message followed by an attachment. That * message itself is a multipart/alternative * message and we need to dig out the plain * text part of that alternative */
0bcad2b1	htmltextPart = getTextPart(messages, multiparts); if(htmltextPart == -1) htmltextPart = 0;
b151ef55
565c449d	rc = parseEmailBody(messages[htmltextPart], aText, dir, rfc821Table, subtypeTable, options);
b151ef55	break;
b62a19da	case ENCRYPTED: rc = 0;
cbc2eaa9	protocol = (char *)messageFindArgument(mainMessage, "protocol");
b62a19da	if(protocol) { if(strcasecmp(protocol, "application/pgp-encrypted") == 0) { /* RFC2015 */ cli_warnmsg("PGP encoded attachment not scanned\n"); rc = 2; } else cli_warnmsg("Unknown encryption protocol '%s' - report to bugs@clamav.net\n"); free(protocol); } else cli_warnmsg("Encryption method missing protocol name - report to bugs@clamav.net\n"); break;
b151ef55	default:
e279f3ea	assert(0);
b151ef55	}
2250ea69	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
c29ebe66	if(aText && (textIn == NULL)) { if((fb = fileblobCreate()) != NULL) { cli_dbgmsg("Save non mime part\n"); fileblobSetFilename(fb, dir, "textpart");
0856891e	fileblobAddData(fb, "Received: by clamd (textpart)\n", 30);
c29ebe66	fb = textToFileblob(aText, fb); fileblobDestroy(fb); }
c6259ac5	textDestroy(aText);
c29ebe66	}
c6259ac5
0856891e	for(i = 0; i < multiparts; i++) if(messages[i]) messageDestroy(messages[i]);
6613d595	if(messages) free(messages);
b151ef55	return rc; case MESSAGE: /* * Check for forbidden encodings */ switch(messageGetEncoding(mainMessage)) { case NOENCODING: case EIGHTBIT: case BINARY: break; default:
c6259ac5	cli_warnmsg("MIME type 'message' cannot be decoded\n");
b151ef55	break; }
9a7398ee	rc = 0;
c6259ac5	if((strcasecmp(mimeSubtype, "rfc822") == 0) \|\| (strcasecmp(mimeSubtype, "delivery-status") == 0)) {
de617e3e	message *m = parseEmailHeaders(mainMessage, rfc821Table);
bad123c6	if(m) { cli_dbgmsg("Decode rfc822");
4465fb04	if(mainMessage && (mainMessage != messageIn)) { messageDestroy(mainMessage); mainMessage = NULL;
74c6f514	} else messageReset(mainMessage);
bad123c6	if(messageGetBody(m))
565c449d	rc = parseEmailBody(m, NULL, dir, rfc821Table, subtypeTable, options);
bad123c6	messageDestroy(m); }
b151ef55	break;
f8c25c7a	} else if(strcasecmp(mimeSubtype, "disposition-notification") == 0) {
d6e30cce	/* RFC 2298 - handle like a normal email */
f8c25c7a	rc = 1;
d6e30cce	break;
f8c25c7a	} else if(strcasecmp(mimeSubtype, "partial") == 0) {
9a7398ee	#ifdef PARTIAL_DIR /* RFC1341 message split over many emails */ if(rfc1341(mainMessage, dir) >= 0) rc = 1; #else
e94471f4	cli_warnmsg("Partial message received from MUA/MTA - message cannot be scanned\n");
9a7398ee	rc = 0; #endif } else if(strcasecmp(mimeSubtype, "external-body") == 0)
e94471f4	/* TODO */
b151ef55	cli_warnmsg("Attempt to send Content-type message/external-body trapped");
bf8ea488	else
e94471f4	cli_warnmsg("Unsupported message format `%s' - please report to bugs@clamav.net\n", mimeSubtype);
b151ef55
9a7398ee
2250ea69	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
6613d595	if(messages) free(messages);
9a7398ee	return rc;
b151ef55	case APPLICATION:
0bcad2b1	cptr = messageGetMimeSubtype(mainMessage);
04421a14	/if((strcasecmp(cptr, "octet-stream") == 0) \|\| (strcasecmp(cptr, "x-msdownload") == 0)) {/ {
565c449d	fb = messageToFileblob(mainMessage, dir);
b151ef55
1e06e1ab	if(fb) { cli_dbgmsg("Saving main message as attachment\n"); fileblobDestroy(fb);
a446de17	messageClearMarkers(mainMessage);
b151ef55	}
04421a14	} /else cli_warnmsg("Discarded application not sent as attachment\n");/
b151ef55	break; case AUDIO: case VIDEO: case IMAGE: break; default: cli_warnmsg("Message received with unknown mime encoding"); break; } }
d32343c3	if(aText && (textIn == NULL)) {
0856891e	cli_dbgmsg("Non mime part not scanned - if you believe this file contains a virus report to bugs@clamav.net\n"); /if((fb = fileblobCreate()) != NULL) { cli_dbgmsg("Save non mime part\n"); fileblobSetFilename(fb, dir, "textpart"); fileblobAddData(fb, "Received: by clamd (textpart)\n", 30); fb = textToFileblob(aText, fb); fileblobDestroy(fb); }/
d32343c3	textDestroy(aText); aText = NULL; }
565c449d	/* * No attachments - scan the text portions, often files * are hidden in HTML code */ cli_dbgmsg("%d multiparts found\n", multiparts); for(i = 0; i < multiparts; i++) { fb = messageToFileblob(messages[i], dir);
b151ef55
565c449d	if(fb) {
0e3b08fc	cli_dbgmsg("Saving multipart %d\n", i);
0bcad2b1
565c449d	fileblobDestroy(fb); } } if(mainMessage) {
b151ef55	/*
565c449d	* Look for uu-encoded main file
b151ef55	*/
565c449d	const text *t_line; if((t_line = uuencodeBegin(mainMessage)) != NULL) { cli_dbgmsg("Found uuencoded file\n");
0bcad2b1
565c449d	/* * Main part contains uuencoded section */ messageSetEncoding(mainMessage, "x-uuencode");
0bcad2b1
565c449d	if((fb = messageToFileblob(mainMessage, dir)) != NULL) { if((cptr = fileblobGetFilename(fb)) != NULL) cli_dbgmsg("Found uuencoded message %s\n", cptr);
1e06e1ab	fileblobDestroy(fb); }
0856891e	rc = 1;
565c449d	} else if((encodingLine(mainMessage) != NULL) &&
f1c33aa0	((t_line = bounceBegin(mainMessage)) != NULL)) {
0856891e	const text t, start;
0bcad2b1	/*
565c449d	* Attempt to save the original (unbounced) * message - clamscan will find that in the * directory and call us again (with any luck)
8386482b	* having found an e-mail message to handle.
565c449d	* * This finds a lot of false positives, the
8386482b	* search that a content type is in the
565c449d	* bounce (i.e. it's after the bounce header)
8386482b	* helps a bit. * * messageAddLine
565c449d	* optimisation could help here, but needs * careful thought, do it with line numbers * would be best, since the current method in * messageAddLine of checking encoding first * must remain otherwise non bounce messages * won't be scanned
0bcad2b1	*/
0856891e	for(t = start = t_line; t; t = t->t_next) { char cmd[LINE_LENGTH + 1];
565c449d	const char *txt = lineGetData(t->t_line);
0856891e	if(txt == NULL) continue; if(cli_strtokbuf(txt, 0, ":", cmd) == NULL) continue; switch(tableFind(rfc821Table, cmd)) { case CONTENT_TRANSFER_ENCODING: if((strstr(txt, "7bit") == NULL) && (strstr(txt, "8bit") == NULL)) break; continue; case CONTENT_DISPOSITION: break; case CONTENT_TYPE: if(strstr(txt, "text/plain") != NULL) t = NULL; break; default: if(strcasecmp(cmd, "From") == 0) start = t_line; else if(strcasecmp(cmd, "Received") == 0) start = t_line; continue;
8386482b	}
0856891e	break;
565c449d	} if(t && ((fb = fileblobCreate()) != NULL)) { cli_dbgmsg("Found a bounce message\n"); fileblobSetFilename(fb, dir, "bounce");
0856891e	fb = textToFileblob(start, fb);
565c449d	fileblobDestroy(fb);
0856891e	rc = 1;
e745ac7e	} else cli_dbgmsg("Not found a bounce message\n");
565c449d	} else { bool saveIt;
0bcad2b1
565c449d	cli_dbgmsg("Not found uuencoded file\n");
2227f20e
565c449d	if(messageGetMimeType(mainMessage) == MESSAGE)
15c8cace	/*
565c449d	* Quick peek, if the encapsulated * message has no * content encoding statement don't * bother saving to scan, it's safe
15c8cace	*/
565c449d	saveIt = (encodingLine(mainMessage) != NULL); else if((t_line = encodingLine(mainMessage)) != NULL) {
92915cee	/*
565c449d	* Some bounces include the message * body without the headers. * Unfortunately this generates a * lot of false positives that a bounce * has been found when it hasn't.
92915cee	*/
565c449d	if((fb = fileblobCreate()) != NULL) { cli_dbgmsg("Found a bounce message with no header\n");
1e06e1ab	fileblobSetFilename(fb, dir, "bounce");
0856891e	fileblobAddData(fb, "Received: by clamd (bounce)\n", 28);
5a01973c
565c449d	fb = textToFileblob(t_line, fb);
b759d5eb
565c449d	fileblobDestroy(fb);
b759d5eb	}
565c449d	saveIt = FALSE;
74c6f514	} else if(multiparts == 0)
565c449d	/* * Save the entire text portion, * since it it may be an HTML file with * a JavaScript virus */ saveIt = TRUE;
74c6f514	else saveIt = FALSE;
b151ef55
565c449d	if(saveIt) { cli_dbgmsg("Saving text part to scan\n"); /* * TODO: May be better to save aText */ saveTextPart(mainMessage, dir);
74c6f514	if(mainMessage != messageIn) { messageDestroy(mainMessage); mainMessage = NULL; } else messageReset(mainMessage); rc = 1;
c6259ac5	}
b151ef55	}
565c449d	} else rc = (multiparts) ? 1 : 2; /* anything saved? */
b151ef55
2250ea69	if(mainMessage && (mainMessage != messageIn)) messageDestroy(mainMessage);
6613d595	if(messages) free(messages);
68be129f	cli_dbgmsg("parseEmailBody() returning %d\n", rc);
b151ef55
68be129f	return rc;
b151ef55	} /* * Is the current line the start of a new section? * * New sections start with --boundary / static int boundaryStart(const char line, const char *boundary) {
ad642304	char ptr, p;
2ed1bc5a	int rc;
ad642304
80a8c7d8	if(line == NULL) return 0; /* empty line */
c29ebe66	/cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary);/
ad642304	p = ptr = rfc822comments(line); if(ptr == NULL)
aa479b7d	ptr = (char *)line;
ad642304	if(*ptr++ != '-') { if(p) free(p);
80a8c7d8	return 0;
ad642304	}
80a8c7d8
b151ef55	/*
80a8c7d8	* Gibe.B3 is broken, it has:
b151ef55	* boundary="---- =_NextPart_000_01C31177.9DC7C000" * but it's boundaries look like * ------ =_NextPart_000_01C31177.9DC7C000
80a8c7d8	* notice the one too few '-'. * Presumably this is a deliberate exploitation of a bug in some mail * clients. * * The trouble is that this creates a lot of false positives for * boundary conditions, if we're too lax about matches. We do our level * best to avoid these false positives. For example if we have * boundary="1" we want to ensure that we don't break out of every line * that has -1 in it instead of starting --1. This needs some more work.
b151ef55	*/
2ed1bc5a	if(strstr(ptr, boundary) != NULL) rc = 1; else if(*ptr++ != '-') rc = 0; else rc = (strcasecmp(line, boundary) == 0);
ad642304	if(p) free(p);
2ed1bc5a	if(rc == 1) cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line); return rc;
b151ef55	} /* * Is the current line the end? * * The message ends with with --boundary-- / static int endOfMessage(const char line, const char *boundary) { size_t len;
98685ac1	if(line == NULL) return 0;
c29ebe66	/cli_dbgmsg("endOfMessage: line = '%s' boundary = '%s'\n", line, boundary);/
b151ef55	if(line++ != '-') return 0; if(line++ != '-') return 0; len = strlen(boundary);
c6259ac5	if(strncasecmp(line, boundary, len) != 0) return 0;
ef3cf57d	/* * Use < rather than == because some broken mails have white * space after the boundary */
39b5a552	if(strlen(line) < (len + 2))
b151ef55	return 0; line = &line[len]; if(line++ != '-') return 0; return line == '-'; } /* * Initialise the various lookup tables / static int initialiseTables(table_t rfc821Table, table_t subtypeTable) { const struct tableinit tableinit; /* * Initialise the various look up tables / rfc821Table = tableCreate(); assert(*rfc821Table != NULL); for(tableinit = rfc821headers; tableinit->key; tableinit++)
7b8fb055	if(tableInsert(rfc821Table, tableinit->key, tableinit->value) < 0) { tableDestroy(rfc821Table);
4d9c0ca8	*rfc821Table = NULL;
b151ef55	return -1;
7b8fb055	}
b151ef55	subtypeTable = tableCreate(); assert(subtypeTable != NULL); for(tableinit = mimeSubtypes; tableinit->key; tableinit++) if(tableInsert(subtypeTable, tableinit->key, tableinit->value) < 0) { tableDestroy(rfc821Table);
7b8fb055	tableDestroy(*subtypeTable);
4d9c0ca8	rfc821Table = NULL; subtypeTable = NULL;
b151ef55	return -1; } return 0; } /*
0bcad2b1	* If there's a HTML text version use that, otherwise
b151ef55	* use the first text part, otherwise just use the
0bcad2b1	* first one around. HTML text is most likely to include * a scripting worm
b151ef55	* * If we can't find one, return -1 / static int getTextPart(message const messages[], size_t size) { size_t i;
b4cb4486	int textpart = -1;
b151ef55	for(i = 0; i < size; i++) { assert(messages[i] != NULL);
b4cb4486	if(messageGetMimeType(messages[i]) == TEXT) { if(strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0) return (int)i; textpart = (int)i; }
b151ef55	}
b4cb4486	return textpart;
b151ef55	} /* * strip -
4d9c0ca8	* Remove the trailing spaces from a buffer. Don't call this directly, * always call strstrip() which is a wrapper to this routine to be used with * NUL terminated strings. This code looks a bit strange because of it's * heritage from code that worked on strings that weren't necessarily NUL * terminated. * TODO: rewrite for clamAV *
b151ef55	* Returns it's new length (a la strlen) * * len must be int not size_t because of the >= 0 test, it is sizeof(buf) * not strlen(buf) / static size_t strip(char buf, int len) { register char *ptr; register size_t i; if((buf == NULL) \|\| (len <= 0))
4d9c0ca8	return 0;
b151ef55	i = strlen(buf); if(len > (int)(i + 1))
4d9c0ca8	return i;
b151ef55	ptr = &buf[--len]; #if defined(UNIX) \|\| defined(C_LINUX) \|\| defined(C_DARWIN) /* watch - it may be in shared text area / do if(ptr) *ptr = '\0';
6b93ea0c	while((--len >= 0) && (!isgraph(--ptr)) && (ptr != '\n') && (*ptr != '\r'));
b151ef55	#else /* more characters can be displayed on DOS / do #ifndef REAL_MODE_DOS if(ptr) /* C8.0 puts into a text area / #endif ptr = '\0'; while((--len >= 0) && ((--ptr == '\0') \|\| (isspace((int)ptr)))); #endif return((size_t)(len + 1)); } /* * strstrip: * Strip a given string */
3db105a2	size_t
b151ef55	strstrip(char s) { if(s == (char )NULL) return(0);
98685ac1
b151ef55	return(strip(s, strlen(s) + 1)); } /* * When parsing a MIME header see if this spans more than one line. A * semi-colon at the end of the line indicates that the MIME information * is continued on the next line. * * Some clients are broken and put white space after the ; / static bool continuationMarker(const char line) { const char *ptr;
98685ac1	if(line == NULL) return FALSE;
b151ef55	#ifdef CL_DEBUG cli_dbgmsg("continuationMarker(%s)\n", line); #endif if(strlen(line) == 0) return FALSE; ptr = strchr(line, '\0'); assert(ptr != NULL);
752c34b9	while(ptr > line)
b151ef55	switch(--ptr) { case '\n': case '\r': case ' ': case '\t': continue; case ';': return TRUE; default: return FALSE; } return FALSE; } static int parseMimeHeader(message m, const char cmd, const table_t rfc821Table, const char arg) { #ifdef CL_THREAD_SAFE char strptr; #endif
56d8328d	char copy, ptr; int commandNumber;
f2b068fb
b151ef55	cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg);
56d8328d
90905415	ptr = rfc822comments(cmd);
e9bdeb72	if(ptr) { commandNumber = tableFind(rfc821Table, ptr); free(ptr); } else commandNumber = tableFind(rfc821Table, cmd);
56d8328d
90905415	copy = rfc822comments(arg);
e9bdeb72	if(copy == NULL) copy = strdup(arg);
56d8328d	if(copy == NULL) return -1;
b151ef55
ad3d1172	ptr = copy;
56d8328d	switch(commandNumber) {
b151ef55	case CONTENT_TYPE: /* * Fix for non RFC1521 compliant mailers * that send content-type: Text instead * of content-type: Text/Plain, or * just simply "Content-Type:" */
a8c7e017	if(arg == NULL)
f1c33aa0	/* * According to section 4 of RFC1521: * "Note also that a subtype specification is * MANDATORY. There are no default subtypes" *
21cd233d	* We have to break this and make an assumption
f1c33aa0	* for the subtype because virus writers and * email client writers don't get it right */ cli_warnmsg("Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii\n");
b151ef55	else if(strchr(copy, '/') == NULL)
f1c33aa0	/* * Empty field, such as * Content-Type: * which I believe is illegal according to * RFC1521 */
b4cb4486	cli_dbgmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", copy);
b151ef55	else {
137740e1	int i;
6fd6d771	char mimeArgs; / RHS of the ; */
b151ef55	/* * Some clients are broken and * put white space after the ; */
7e572372	if(arg == '/') { cli_warnmsg("Content-type '/' received, assuming application/octet-stream\n"); messageSetMimeType(m, "application"); messageSetMimeSubtype(m, "octet-stream"); } else { /
de509b8e	* The content type could be in quotes: * Content-Type: "multipart/mixed" * FIXME: this is a hack in that ignores * the quotes, it doesn't handle * them properly
7e572372	*/
e9bdeb72	while(isspace(*copy)) copy++; if(copy[0] == '\"') copy++;
de509b8e
e9bdeb72	if(copy[0] != '/') { char s; char mimeType; /* LHS of the ; */ s = mimeType = cli_strtok(copy, 0, ";");
de509b8e	/*
6fd6d771	* Handle * Content-Type: foo/bar multipart/mixed * and * Content-Type: multipart/mixed foo/bar
de509b8e	*/
6fd6d771	for(;;) {
897fd9c7	#ifdef CL_THREAD_SAFE
6fd6d771	int set = messageSetMimeType(m, strtok_r(s, "/", &strptr));
897fd9c7	#else int set = messageSetMimeType(m, strtok(s, "/")); #endif
6fd6d771	/* * Stephen White <stephen@earth.li> * Some clients put space after * the mime type but before * the ; */
897fd9c7	#ifdef CL_THREAD_SAFE
6fd6d771	s = strtok_r(NULL, ";", &strptr);
897fd9c7	#else s = strtok(NULL, ";"); #endif
6fd6d771	if(s == NULL) break; if(set) {
e9bdeb72	size_t len = strstrip(s) - 1;
6fd6d771	if(s[len] == '\"') { s[len] = '\0'; len = strstrip(s); } if(len) {
e9bdeb72	if(strchr(s, ' ')) { char *t = cli_strtok(s, 0, " ");
6fd6d771
e9bdeb72	messageSetMimeSubtype(m, t); free(t); } else messageSetMimeSubtype(m, s);
6fd6d771	}
de509b8e	}
6fd6d771	while(s && !isspace(s)) s++; if(s++ == '\0') break; if(s == '\0') break;
de509b8e	}
e9bdeb72	free(mimeType); }
7e572372	}
b151ef55	/*
0704dad8	* Add in all rest of the the arguments. * e.g. if the header is this: * Content-Type:', arg='multipart/mixed; boundary=foo * we find the boundary argument set it
b151ef55	*/
137740e1	i = 1; while((mimeArgs = cli_strtok(copy, i++, ";")) != NULL) { cli_dbgmsg("mimeArgs = '%s'\n", mimeArgs);
6fd6d771	messageAddArguments(m, mimeArgs); free(mimeArgs); }
b151ef55	} break; case CONTENT_TRANSFER_ENCODING: messageSetEncoding(m, copy); break; case CONTENT_DISPOSITION:
897fd9c7	#ifdef CL_THREAD_SAFE
a980b067	arg = strtok_r(copy, ";", &strptr); if(arg && *arg) { messageSetDispositionType(m, arg); messageAddArgument(m, strtok_r(NULL, "\r\n", &strptr)); }
897fd9c7	#else arg = strtok(copy, ";"); if(arg && *arg) { messageSetDispositionType(m, arg); messageAddArgument(m, strtok(NULL, "\r\n")); } #endif
b151ef55	}
c6259ac5	free(ptr);
b151ef55
f2b068fb	return 0;
b151ef55	}
68be129f	/*
5a01973c	* Save the text portion of the message / static void saveTextPart(message m, const char *dir) {
1e06e1ab	fileblob *fb;
5a01973c	messageAddArgument(m, "filename=textportion");
1e06e1ab	if((fb = messageToFileblob(m, dir)) != NULL) {
5a01973c	/* * Save main part to scan that */
37819555	cli_dbgmsg("Saving main message\n");
5a01973c
1e06e1ab	fileblobDestroy(fb);
5a01973c	} }
90905415	/*
e9bdeb72	* Handle RFC822 comments in headers. * Returns a buffer without the comments or NULL on error or if the input * has no comments. The caller must free the returned buffer * See secion 3.4.3 of RFC822
90905415	* TODO: handle comments that go on to more than one line / static char rfc822comments(const char in) { const char iptr; char out, optr; int backslash, inquote, commentlevel; if(in == NULL)
e9bdeb72	return NULL;
90905415	if(strchr(in, '(') == NULL)
e9bdeb72	return NULL;
90905415	out = cli_malloc(strlen(in) + 1); if(out == NULL) return NULL; backslash = commentlevel = inquote = 0; optr = out; cli_dbgmsg("rfc822comments: contains a comment\n"); for(iptr = in; *iptr; iptr++) if(backslash) {
0a94ffaf	if(commentlevel == 0) optr++ = iptr;
90905415	backslash = 0; } else switch(iptr) { case '\\': backslash = 1; break; case '\"': inquote = !inquote; break; case '(': commentlevel++; break; case ')': if(commentlevel > 0) commentlevel--; break; default: if(commentlevel == 0) optr++ = iptr; } if(backslash) / last character was a single backslash / optr++ = '\\'; *optr = '\0'; strstrip(out); cli_dbgmsg("rfc822comments '%s'=>'%s'\n", in, out); return out; }
0674e2af	/* * Handle RFC2047 encoding. Returns a malloc'd buffer that the caller must * free, or NULL on error / static char rfc2047(const char in) { char out, *pout; size_t len;
95f98162	if((strstr(in, "=?") == NULL) \|\| (strstr(in, "?=") == NULL))
0674e2af	return strdup(in); cli_dbgmsg("rfc2047 '%s'\n", in); out = cli_malloc(strlen(in) + 1); if(out == NULL) return NULL; pout = out; /* For each RFC2047 string / while(in) {
291ac47f	char encoding, ptr, enctext;
0674e2af	message m; blob b; /* Find next RFC2047 string / while(in) { if((in == '=') && (in[1] == '?')) { in += 2; break; } pout++ = in++; } / Skip over charset, find encoding / while((in != '?') && in) in++; if(in == '\0') break; encoding = *++in; encoding = tolower(encoding); if((encoding != 'q') && (encoding != 'b')) {
c3400886	cli_warnmsg("Unsupported RFC2047 encoding type '%c' - report to bugs@clamav.net\n", encoding); free(out); out = NULL;
0674e2af	break; } /* Skip to encoded text / if(++in != '?') break; if(*++in == '\0') break;
291ac47f	enctext = strdup(in); if(enctext == NULL) { free(out); out = NULL; break; }
0674e2af	in = strstr(in, "?=");
291ac47f	if(in == NULL) { free(enctext);
0674e2af	break;
291ac47f	}
0674e2af	in += 2; ptr = strstr(enctext, "?="); assert(ptr != NULL); ptr = '\0'; /cli_dbgmsg("Need to decode '%s' with method '%c'\n", enctext, encoding);*/ m = messageCreate();
37819555	if(m == NULL)
0674e2af	break; messageAddStr(m, enctext);
291ac47f	free(enctext);
4d9c0ca8	switch(encoding) {
0674e2af	case 'q': messageSetEncoding(m, "quoted-printable"); break; case 'b': messageSetEncoding(m, "base64"); break; } b = messageToBlob(m); len = blobGetDataSize(b); cli_dbgmsg("Decoded as '%.s'\n", len, len, blobGetData(b)); memcpy(pout, blobGetData(b), len); blobDestroy(b); messageDestroy(m); if(pout[len - 1] == '\n') pout += len - 1; else pout += len; }
5e5a162c	if(out == NULL) return NULL; *pout = '\0';
0674e2af
5e5a162c	cli_dbgmsg("rfc2047 returns '%s'\n", out);
0674e2af	return out; }
9a7398ee	#ifdef PARTIAL_DIR /* * Handle partial messages / static int rfc1341(message m, const char dir) { fileblob fb;
3a0946f5	char arg, id, number, total, *oldfilename;
d85c1fad	const char tmpdir; char pdir; #ifdef CYGWIN
9180b8bb	if((tmpdir = getenv("TEMP")) == (char )NULL) if((tmpdir = getenv("TMP")) == (char )NULL) if((tmpdir = getenv("TMPDIR")) == (char *)NULL) tmpdir = "C:\\";
d85c1fad	#else
9180b8bb	if((tmpdir = getenv("TMPDIR")) == (char )NULL) if((tmpdir = getenv("TMP")) == (char )NULL) if((tmpdir = getenv("TEMP")) == (char *)NULL)
d85c1fad	#ifdef P_tmpdir
9180b8bb	tmpdir = P_tmpdir;
d85c1fad	#else
9180b8bb	tmpdir = "/tmp";
d85c1fad	#endif #endif
9a7398ee
9180b8bb	pdir = cli_malloc(strlen(tmpdir) + 16);
d85c1fad	if(pdir == NULL) return -1;
9180b8bb	sprintf(pdir, "%s/clamav-partial", tmpdir);
d85c1fad	if((mkdir(pdir, 0700) < 0) && (errno != EEXIST)) { cli_errmsg("Can't create the directory '%s'\n", pdir); free(pdir);
9a7398ee	return -1;
3a0946f5	} else { struct stat statb;
d85c1fad	if(stat(pdir, &statb) < 0) { cli_errmsg("Can't stat the directory '%s'\n", pdir); free(pdir);
3a0946f5	return -1; } if(statb.st_mode & 077) cli_warnmsg("Insecure partial directory %s (mode 0%o)\n",
d85c1fad	pdir, statb.st_mode & 0777);
9a7398ee	} id = (char *)messageFindArgument(m, "id");
d85c1fad	if(id == NULL) { free(pdir);
9a7398ee	return -1;
d85c1fad	}
9a7398ee	number = (char *)messageFindArgument(m, "number"); if(number == NULL) { free(id);
d85c1fad	free(pdir);
9a7398ee	return -1; } oldfilename = (char )messageFindArgument(m, "filename"); if(oldfilename == NULL) oldfilename = (char )messageFindArgument(m, "name"); arg = cli_malloc(10 + strlen(id) + strlen(number)); sprintf(arg, "filename=%s%s", id, number); messageAddArgument(m, arg); free(arg); if(oldfilename) { cli_warnmsg("Must reset to %s\n", oldfilename); free(oldfilename); }
d85c1fad	if((fb = messageToFileblob(m, pdir)) == NULL) {
9a7398ee	free(id); free(number);
d85c1fad	free(pdir);
9a7398ee	return -1; } fileblobDestroy(fb); total = (char )messageFindArgument(m, "total"); cli_dbgmsg("rfc1341: %s, %s of %s\n", id, number, (total) ? total : "?"); if(total) { int n = atoi(number); int t = atoi(total); DIR dd = NULL; /* * If it's the last one - reassemble it
138b73f6	* FIXME: this assumes that we receive the parts in order
9a7398ee	*/
d85c1fad	if((n == t) && ((dd = opendir(pdir)) != NULL)) {
9a7398ee	FILE *fout; char outname[NAME_MAX + 1]; snprintf(outname, sizeof(outname) - 1, "%s/%s", dir, id); cli_dbgmsg("outname: %s\n", outname); fout = fopen(outname, "wb"); if(fout == NULL) {
138b73f6	cli_errmsg("Can't open '%s' for writing", outname);
9a7398ee	free(id); free(total); free(number); closedir(dd);
d85c1fad	free(pdir);
9a7398ee	return -1; } for(n = 1; n <= t; n++) { char filename[NAME_MAX + 1];
3a0946f5	const struct dirent *dent; #if defined(HAVE_READDIR_R_3) \|\| defined(HAVE_READDIR_R_2)
a77dc192	union { struct dirent d; char b[offsetof(struct dirent, d_name) + NAME_MAX + 1]; } result;
3a0946f5	#endif
9a7398ee	snprintf(filename, sizeof(filename), "%s%d", id, n);
2c7b958d
a77dc192	#ifdef HAVE_READDIR_R_3 while((readdir_r(dd, &result.d, &dent) == 0) && dent) {
9a7398ee	#elif defined(HAVE_READDIR_R_2)
a77dc192	while((dent = (struct dirent *)readdir_r(dd, &result.d))) {
2c7b958d	#else /!HAVE_READDIR_R/
9a7398ee	while((dent = readdir(dd))) { #endif char fullname[NAME_MAX + 1]; FILE *fin; char buffer[BUFSIZ]; int nblanks;
d85c1fad	extern short cli_leavetemps_flag;
9a7398ee	if(dent->d_ino == 0) continue; if(strncmp(filename, dent->d_name, strlen(filename)) != 0) continue;
d85c1fad	sprintf(fullname, "%s/%s", pdir, dent->d_name);
9a7398ee	fin = fopen(fullname, "rb"); if(fin == NULL) {
138b73f6	cli_errmsg("Can't open '%s' for reading", fullname);
9a7398ee	fclose(fout); unlink(outname); free(id); free(total); free(number); closedir(dd);
d85c1fad	free(pdir);
9a7398ee	return -1; } nblanks = 0; while(fgets(buffer, sizeof(buffer), fin) != NULL) /* * Ensure that trailing newlines * aren't copied */ if(buffer[0] == '\n') { nblanks++; } else { if(nblanks) do putc('\n', fout); while(--nblanks > 0); fputs(buffer, fout); } fclose(fin);
d85c1fad	/* don't unlink if leave temps */ if(!cli_leavetemps_flag) unlink(fullname);
9a7398ee	break; } rewinddir(dd); } closedir(dd); fclose(fout); } free(number); } free(id); free(total);
d85c1fad	free(pdir);
9a7398ee	return 0; } #endif
3eb12bae	#ifdef FOLLOWURLS
c5ed8336	static void checkURLs(message m, const char dir) { blob *b = messageToBlob(m); size_t len;
6b93ea0c	table_t *t;
6da40aa1	int i, n;
314ff77b	#if defined(WITH_CURL) && defined(CL_THREAD_SAFE) pthread_t tid[MAX_URLS]; struct arg args[MAX_URLS]; #endif
6da40aa1	tag_arguments_t hrefs;
c5ed8336	if(b == NULL) return; len = blobGetDataSize(b);
e94471f4	if(len == 0) { blobDestroy(b);
3eb12bae	return;
e94471f4	}
3eb12bae
6b93ea0c	/* TODO: make this size customisable / if(len > 1001024) { cli_warnmsg("Viruses pointed to by URL not scanned in large message\n"); blobDestroy(b);
e94471f4	return;
6b93ea0c	}
e94471f4	blobClose(b);
6b93ea0c	t = tableCreate();
3a0ef2ee	if(t == NULL) { blobDestroy(b); return; }
3eb12bae
a2d786fc	hrefs.count = 0; hrefs.tag = hrefs.value = NULL;
6da40aa1	cli_dbgmsg("checkURLs: calling html_normalise_mem\n");
3a0ef2ee	if(!html_normalise_mem(blobGetData(b), len, NULL, &hrefs)) {
7d3d11d0	blobDestroy(b); tableDestroy(t); return;
3a0ef2ee	} cli_dbgmsg("checkURLs: html_normalise_mem returned\n");
e745ac7e	/* TODO: Do we need to call remove_html_comments? */
6b93ea0c
6da40aa1	n = 0; for(i = 0; i < hrefs.count; i++) {
a77dc192	const char url = (const char )hrefs.value[i];
6da40aa1	if(strncasecmp("http://", url, 7) == 0) { char *ptr;
314ff77b	#ifdef WITH_CURL #ifndef CL_THREAD_SAFE struct arg arg; #endif #else /!WITH_CURL/
bf6f653d	#ifdef CL_THREAD_SAFE static pthread_mutex_t system_mutex = PTHREAD_MUTEX_INITIALIZER; #endif
6b93ea0c	struct stat statb; char cmd[512];
314ff77b	#endif /WITH_CURL/
a95c894a	char name[NAME_MAX + 1];
6da40aa1	if(tableFind(t, url) == 1) { cli_dbgmsg("URL %s already downloaded\n", url);
f2b068fb	continue; }
de617e3e	if(n == MAX_URLS) { cli_warnmsg("Not all URLs will be scanned\n"); break; }
6da40aa1	(void)tableInsert(t, url, 1); cli_dbgmsg("Downloading URL %s to be scanned\n", url);
2176c0e5	strncpy(name, url, sizeof(name) - 1);
39d09964	name[sizeof(name) - 1] = '\0';
6da40aa1	for(ptr = name; ptr; ptr++) if(ptr == '/') *ptr = '_';
c5ed8336
da812a6a	#ifdef WITH_CURL
314ff77b	#ifdef CL_THREAD_SAFE
a95c894a	args[n].dir = dir; args[n].url = url;
314ff77b	args[n].filename = strdup(name); pthread_create(&tid[n], NULL, getURL, &args[n]); #else
6da40aa1	arg.url = url;
314ff77b	arg.dir = dir; arg.filename = name; getURL(&arg); #endif
3fa72383	#else /* * TODO: maximum size and timeouts */
2176c0e5	len = sizeof(cmd) - 26 - strlen(dir) - strlen(name); #ifdef CL_DEBUG snprintf(cmd, sizeof(cmd) - 1, "GET -t10 %.s >%s/%s", len, url, dir, name); #else snprintf(cmd, sizeof(cmd) - 1, "GET -t10 %.s >%s/%s 2>/dev/null", len, url, dir, name); #endif cmd[sizeof(cmd) - 1] = '\0'; #ifndef WITH_CURL for(ptr = cmd; ptr; ptr++) if(strchr(";&", ptr)) *ptr = '_'; #endif
c5ed8336	cli_dbgmsg("%s\n", cmd);
bf6f653d	#ifdef CL_THREAD_SAFE pthread_mutex_lock(&system_mutex); #endif
c5ed8336	system(cmd);
bf6f653d	#ifdef CL_THREAD_SAFE pthread_mutex_unlock(&system_mutex); #endif snprintf(cmd, sizeof(cmd), "%s/%s", dir, name); if(stat(cmd, &statb) >= 0) if(statb.st_size == 0) {
6da40aa1	cli_warnmsg("URL %s failed to download\n", url);
bf6f653d	/* * Don't bother scanning an empty file */ (void)unlink(cmd); }
3fa72383	#endif
314ff77b	++n;
c5ed8336	} } blobDestroy(b);
f2b068fb	tableDestroy(t);
314ff77b	#if defined(WITH_CURL) && defined(CL_THREAD_SAFE)
d28e1902	assert(n <= MAX_URLS);
314ff77b	cli_dbgmsg("checkURLs: waiting for %d thread(s) to finish\n", n); while(--n >= 0) { pthread_join(tid[n], NULL); free(args[n].filename); } #endif
a95c894a	html_tag_arg_free(&hrefs);
c5ed8336	}
da812a6a	#ifdef WITH_CURL
314ff77b	static void * #ifdef CL_THREAD_SAFE getURL(void a) #else getURL(struct arg arg) #endif
3fa72383	{ char *fout;
6b93ea0c	CURL *curl;
3fa72383	FILE *fp;
6b93ea0c	struct curl_slist *headers; static int initialised = 0;
314ff77b	#ifdef CL_THREAD_SAFE static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER; struct arg arg = (struct arg )a; #endif const char url = arg->url; const char dir = arg->dir; const char *filename = arg->filename;
3fa72383
314ff77b	#ifdef CL_THREAD_SAFE pthread_mutex_lock(&init_mutex); #endif
6b93ea0c	if(!initialised) {
314ff77b	if(curl_global_init(CURL_GLOBAL_NOTHING) != 0) { #ifdef CL_THREAD_SAFE pthread_mutex_unlock(&init_mutex); #endif return NULL; }
6b93ea0c	initialised = 1;
3fa72383	}
314ff77b	#ifdef CL_THREAD_SAFE pthread_mutex_unlock(&init_mutex); #endif
6b93ea0c	/* easy isn't the word I'd use... */ curl = curl_easy_init(); if(curl == NULL)
314ff77b	return NULL;
da812a6a
6b93ea0c	(void)curl_easy_setopt(curl, CURLOPT_USERAGENT, "www.clamav.net"); if(curl_easy_setopt(curl, CURLOPT_URL, url) != 0)
314ff77b	return NULL;
6b93ea0c
3fa72383	fout = cli_malloc(strlen(dir) + strlen(filename) + 2);
da812a6a	if(fout == NULL) { curl_easy_cleanup(curl);
314ff77b	return NULL;
da812a6a	}
3fa72383
a95c894a	snprintf(fout, NAME_MAX, "%s/%s", dir, filename);
3fa72383	fp = fopen(fout, "w"); if(fp == NULL) {
138b73f6	cli_errmsg("Can't open '%s' for writing", fout);
3fa72383	free(fout);
da812a6a	curl_easy_cleanup(curl);
314ff77b	return NULL;
3fa72383	}
05ea2522	#ifdef CURLOPT_WRITEDATA
314ff77b	if(curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp) != 0) { fclose(fp); free(fout); curl_easy_cleanup(curl); return NULL; }
05ea2522	#else if(curl_easy_setopt(curl, CURLOPT_FILE, fp) != 0) { fclose(fp); free(fout); curl_easy_cleanup(curl); return NULL; } #endif
314ff77b
6b93ea0c	/*
3eb12bae	* If an item is in squid's cache get it from there (TCP_HIT/200)
6b93ea0c	* by default curl doesn't (TCP_CLIENT_REFRESH_MISS/200) */ headers = curl_slist_append(NULL, "Pragma:"); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
3fa72383
6b93ea0c	/* These should be customisable */ curl_easy_setopt(curl, CURLOPT_TIMEOUT, 30); curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10);
49674596	#ifdef CURLOPT_MAXFILESIZE curl_easy_setopt(curl, CURLOPT_MAXFILESIZE, 50*1024); #endif
3fa72383
314ff77b	#ifdef CL_THREAD_SAFE
c07de365	#ifdef CURLOPT_DNS_USE_GLOBAL_CACHE
314ff77b	curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0); #endif
c07de365	#endif
02406150	/* * Prevent password: prompting with older versions * FIXME: a better username? */
66df01fa	curl_easy_setopt(curl, CURLOPT_USERPWD, "username:password");
02406150
314ff77b	/* * FIXME: valgrind reports "pthread_mutex_unlock: mutex is not locked" * from gethostbyaddr_r within this. It may be a bug in libcurl * rather than this code, but I need to check, see Curl_resolv() * If pushed really hard it will sometimes say * Conditional jump or move depends on uninitialised value(s) and * quit. But the program seems to work OK without valgrind... * Perhaps Curl_resolv() isn't thread safe? */
8386482b	/*
aa479b7d	* On some C libraries (notably with FC3, glibc-2.3.3-74) you get a * memory leak * here in getaddrinfo(), see
0856891e	* https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=139559
8386482b	*/
6b93ea0c	if(curl_easy_perform(curl) != CURLE_OK) { cli_warnmsg("URL %s failed to download\n", url); unlink(fout); } fclose(fp);
314ff77b	curl_slist_free_all(headers);
6b93ea0c	curl_easy_cleanup(curl); free(fout);
314ff77b	return NULL;
3fa72383	} #endif #else static void checkURLs(message m, const char dir) { } #endif
d1382234	#ifdef HAVE_BACKTRACE
f2b068fb	static void
98685ac1	sigsegv(int sig) { signal(SIGSEGV, SIG_DFL);
d1382234	print_trace(1);
98685ac1	exit(SIGSEGV); }
f2b068fb	static void
98685ac1	print_trace(int use_syslog) { void array[10]; size_t size; char *strings; size_t i; pid_t pid = getpid(); size = backtrace(array, 10); strings = backtrace_symbols(array, size); if(use_syslog == 0) cli_dbgmsg("Backtrace of pid %d:\n", pid);
735377bc	else
98685ac1	syslog(LOG_ERR, "Backtrace of pid %d:", pid); for(i = 0; i < size; i++) if(use_syslog)
4d9c0ca8	syslog(LOG_ERR, "bt[%d]: %s", (int)i, strings[i]);
98685ac1	else cli_dbgmsg("%s\n", strings[i]);
7d3d11d0	/* TODO: dump the current email */
98685ac1	free(strings); } #endif