e3aaff8e |
/*
* Copyright (C) 2002 Nigel Horne <njh@bandsman.co.uk>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
3eaab452 |
*
* Change History:
* $Log: mbox.c,v $ |
f1c1300c |
* Revision 1.163 2004/10/31 09:28:56 nigelhorne
* Handle unbalanced quotes in multipart headers
* |
c79a2273 |
* Revision 1.162 2004/10/24 04:35:15 nigelhorne
* Handle multipart/knowbot as multipart/mixed
* |
fdc6adbe |
* Revision 1.161 2004/10/21 10:18:40 nigelhorne
* PARTIAL: readdir_r even more options :-(
* |
a0b21816 |
* Revision 1.160 2004/10/21 09:41:07 nigelhorne
* PARTIAL: add readdir_r fix to BeOS
* |
13462674 |
* Revision 1.159 2004/10/20 10:35:41 nigelhorne
* Partial mode: fix possible stack corruption with Solaris
* |
a7398c9c |
* Revision 1.158 2004/10/17 09:29:21 nigelhorne
* Advise to report broken emails
* |
0356cdc0 |
* Revision 1.157 2004/10/16 20:53:28 nigelhorne
* Tidy up
* |
6e5d95eb |
* Revision 1.156 2004/10/16 19:09:39 nigelhorne
* Handle BeMail (BeOS) files
* |
1815e490 |
* Revision 1.155 2004/10/16 17:23:04 nigelhorne
* Handle colons in quotes in headers
* |
15033cb6 |
* Revision 1.154 2004/10/16 09:01:05 nigelhorne
* Improved handling of wraparound headers
* |
4de5fffd |
* Revision 1.153 2004/10/14 21:18:49 nigelhorne
* Harden the test for RFC2047 encoded headers
* |
cf569541 |
* Revision 1.152 2004/10/14 17:45:13 nigelhorne
* RFC2047 on long lines produced by continuation headers
* |
3a0f75c6 |
* Revision 1.151 2004/10/10 11:10:20 nigelhorne
* Remove perror - replace with cli_errmsg
* |
1d3d7dd9 |
* Revision 1.150 2004/10/09 08:01:37 nigelhorne
* Needs libcurl >= 7.11
* |
5a15955b |
* Revision 1.149 2004/10/06 17:21:30 nigelhorne
* Fix RFC2298 handling broken by RFC1341 code
* |
f10460ed |
* Revision 1.148 2004/10/05 15:41:53 nigelhorne
* First draft of code to handle RFC1341
* |
9a729c80 |
* Revision 1.147 2004/10/04 12:18:09 nigelhorne
* Better warning message about PGP attachments not being scanned
* |
1b00d9a4 |
* Revision 1.146 2004/10/04 10:52:39 nigelhorne
* Better error message on RFC2047 decode error
* |
7c5a7a47 |
* Revision 1.145 2004/10/01 13:49:22 nigelhorne
* Minor code tidy
* |
22080fa5 |
* Revision 1.144 2004/10/01 07:55:36 nigelhorne
* Better error message on message/partial
* |
2c7d1edd |
* Revision 1.143 2004/09/30 21:47:35 nigelhorne
* Removed unneeded strdups
* |
c77c8809 |
* Revision 1.142 2004/09/28 18:40:12 nigelhorne
* Use stack rather than heap where possible
* |
ba867aed |
* Revision 1.141 2004/09/23 08:43:25 nigelhorne
* Scan multipart/digest messages
* |
00d46ae6 |
* Revision 1.140 2004/09/22 16:09:51 nigelhorne
* Build if CURLOPT_DNS_USE_GLOBAL_CACHE isn't supported
* |
12f3689d |
* Revision 1.139 2004/09/22 15:49:13 nigelhorne
* Handle RFC2298 messages
* |
cd483c9b |
* Revision 1.138 2004/09/22 15:21:50 nigelhorne
* Fix typo
* |
15f4aa67 |
* Revision 1.137 2004/09/21 20:47:38 nigelhorne
* FOLLOWURL: Set a default username and password for password protected pages
* |
7e492164 |
* Revision 1.136 2004/09/21 12:18:52 nigelhorne
* Fallback to CURLOPT_FILE if CURLOPT_WRITEDATA isn't defined
* |
548a5f96 |
* Revision 1.135 2004/09/21 08:14:00 nigelhorne
* Now compiles in machines with libcurl but without threads
* |
59da5a4f |
* Revision 1.134 2004/09/20 17:08:43 nigelhorne
* Some performance enhancements
* |
8037334b |
* Revision 1.133 2004/09/20 12:44:03 nigelhorne
* Fix parsing error on mime arguments
* |
f017fbdd |
* Revision 1.132 2004/09/20 08:31:56 nigelhorne
* FOLLOWURLS now compiled if libcurl is found
* |
767f16ab |
* Revision 1.131 2004/09/18 14:59:25 nigelhorne
* Code tidy
* |
6d312569 |
* Revision 1.130 2004/09/17 10:56:29 nigelhorne
* Handle multiple content-type headers and use the most likely
* |
53bfac08 |
* Revision 1.129 2004/09/17 09:48:53 nigelhorne
* Handle attempts to hide mime type
* |
a9714c49 |
* Revision 1.128 2004/09/17 09:09:44 nigelhorne
* Better handling of RFC822 comments
* |
50df4118 |
* Revision 1.127 2004/09/16 18:00:43 nigelhorne
* Handle RFC2047
* |
0960ff5e |
* Revision 1.126 2004/09/16 14:23:57 nigelhorne
* Handle quotes around mime type
* |
97867f21 |
* Revision 1.125 2004/09/16 12:59:36 nigelhorne
* Handle = and space as header separaters
* |
1eec55a6 |
* Revision 1.124 2004/09/16 11:20:33 nigelhorne
* Better handling of folded headers in multipart messages
* |
8b3563f2 |
* Revision 1.123 2004/09/16 08:56:19 nigelhorne
* Handle RFC822 Comments
* |
3499d81e |
* Revision 1.122 2004/09/15 22:09:26 nigelhorne
* Handle spaces before colons
* |
c7b69776 |
* Revision 1.121 2004/09/15 18:08:23 nigelhorne
* Handle multiple encoding types
* |
d77c655d |
* Revision 1.120 2004/09/15 08:47:07 nigelhorne
* Cleaner way to initialise hrefs
* |
15021325 |
* Revision 1.119 2004/09/14 20:47:28 nigelhorne
* Use new normalise code
* |
90bb9c3e |
* Revision 1.118 2004/09/14 12:09:37 nigelhorne
* Include old normalise code
* |
3805ebcb |
* Revision 1.117 2004/09/13 16:44:01 kojm
* minor cleanup
* |
b143af46 |
* Revision 1.116 2004/09/13 13:16:28 nigelhorne
* Return CL_EFORMAT on bad format
* |
ef822cfc |
* Revision 1.115 2004/09/06 11:02:08 nigelhorne
* Normalise HTML before scanning for URLs to download
* |
28010d29 |
* Revision 1.114 2004/09/03 15:59:00 nigelhorne
* Handle boundary= "foo"
* |
69543a9d |
* Revision 1.113 2004/08/26 09:33:20 nigelhorne
* Scan Communigate Pro files
* |
09e05292 |
* Revision 1.112 2004/08/23 13:15:16 nigelhorne
* messageClearMarkers
* |
b0b860f1 |
* Revision 1.111 2004/08/22 20:20:14 nigelhorne
* Tidy
* |
78e302e1 |
* Revision 1.110 2004/08/22 15:08:59 nigelhorne
* messageExport
* |
0e5a0129 |
* Revision 1.109 2004/08/22 10:34:24 nigelhorne
* Use fileblob
* |
b2223aad |
* Revision 1.108 2004/08/21 11:57:57 nigelhorne
* Use line.[ch]
* |
1df4cbc7 |
* Revision 1.107 2004/08/20 04:55:07 nigelhorne
* FOLLOWURL
* |
ab3107bc |
* Revision 1.106 2004/08/20 04:53:18 nigelhorne
* Tidy up
* |
f121cb96 |
* Revision 1.105 2004/08/18 21:35:08 nigelhorne
* Multithread the FollowURL calls
* |
55a3f03b |
* Revision 1.104 2004/08/18 15:53:43 nigelhorne
* Honour CL_MAILURL
* |
38cf81a6 |
* Revision 1.103 2004/08/18 10:49:45 nigelhorne
* CHECKURLs was mistakenly turned on
* |
65684cec |
* Revision 1.102 2004/08/18 07:45:20 nigelhorne
* Use configure WITH_CURL value
* |
393a6d67 |
* Revision 1.101 2004/08/17 08:28:32 nigelhorne
* Support multitype/fax-message
* |
87c9313e |
* Revision 1.100 2004/08/12 10:36:09 nigelhorne
* LIBCURL completed
* |
9558d802 |
* Revision 1.99 2004/08/11 15:28:39 nigelhorne
* No longer needs curl.h
* |
4f1d0bfc |
* Revision 1.98 2004/08/11 14:46:22 nigelhorne
* Better handling of false positive emails
* |
928579ad |
* Revision 1.97 2004/08/10 14:02:22 nigelhorne
* *** empty log message ***
* |
9b4bb8b7 |
* Revision 1.96 2004/08/10 08:14:00 nigelhorne
* Enable CHECKURL
* |
bbd2d959 |
* Revision 1.95 2004/08/09 21:37:21 kojm
* libclamav: add new option CL_MAILURL
* |
6eedb434 |
* Revision 1.94 2004/08/09 08:26:36 nigelhorne
* Thread safe checkURL
* |
71ba1dcd |
* Revision 1.93 2004/08/08 21:30:47 nigelhorne
* First draft of CheckURL
* |
ebfb4048 |
* Revision 1.92 2004/08/08 19:13:14 nigelhorne
* Better handling of bounces
* |
f12d2498 |
* Revision 1.91 2004/08/04 18:59:19 nigelhorne
* Tidy up multipart handling
* |
1a74d4df |
* Revision 1.90 2004/07/26 17:02:56 nigelhorne
* Fix crash when debugging on SPARC
* |
f7fa3820 |
* Revision 1.89 2004/07/26 09:12:12 nigelhorne
* Fix crash when debugging on Solaris
* |
0c0894b8 |
* Revision 1.88 2004/07/20 14:35:29 nigelhorne
* Some MYDOOM.I were getting through
* |
8000d078 |
* Revision 1.87 2004/07/19 17:54:40 kojm
* Use new patter matching algorithm. Cleanup.
* |
0e4e16d4 |
* Revision 1.86 2004/07/06 09:32:45 nigelhorne
* Better handling of Gibe.3 boundary exploit
* |
b9ce9639 |
* Revision 1.85 2004/06/30 19:48:58 nigelhorne
* Some TR.Happy99.SKA were getting through
* |
3f3f9085 |
* Revision 1.84 2004/06/30 14:30:40 nigelhorne
* Fix compilation error on Solaris
* |
f2f25418 |
* Revision 1.83 2004/06/28 11:44:45 nigelhorne
* Remove empty parts
* |
f73920a4 |
* Revision 1.82 2004/06/25 13:56:38 nigelhorne
* Optimise messages without other messages encapsulated within them
* |
21631591 |
* Revision 1.81 2004/06/24 21:36:38 nigelhorne
* Plug memory leak with large number of attachments
* |
d79597e3 |
* Revision 1.80 2004/06/23 16:23:25 nigelhorne
* Further empty line optimisation
* |
02927896 |
* Revision 1.79 2004/06/22 04:08:01 nigelhorne
* Optimise empty lines
* |
640ed140 |
* Revision 1.78 2004/06/21 10:21:19 nigelhorne
* Fix crash when a multipart/mixed message contains many parts that need to be scanned as attachments
* |
b726511f |
* Revision 1.77 2004/06/18 10:07:12 nigelhorne
* Allow any number of alternatives in multipart messages
* |
e2875303 |
* Revision 1.76 2004/06/16 08:07:39 nigelhorne
* Added thread safety
* |
cb5a87e0 |
* Revision 1.75 2004/06/14 09:07:10 nigelhorne
* Handle spam using broken e-mail generators for multipart/alternative
* |
51fc2aa8 |
* Revision 1.74 2004/06/09 18:18:59 nigelhorne
* Find uuencoded viruses in multipart/mixed that have no start of message boundaries
* |
d46678ed |
* Revision 1.73 2004/05/14 08:15:55 nigelhorne
* Use mkstemp on cygwin
* |
187061d8 |
* Revision 1.72 2004/05/12 11:20:37 nigelhorne
* More bounce message false positives handled
* |
a7527b1f |
* Revision 1.71 2004/05/10 11:35:11 nigelhorne
* No need to update mbox.c for cli_filetype problem |
ba888390 |
* |
2f4737ed |
* Revision 1.69 2004/05/06 11:26:49 nigelhorne
* Force attachments marked as RFC822 messages to be scanned
* |
f0627588 |
* Revision 1.68 2004/04/29 08:59:24 nigelhorne
* Tidied up SetDispositionType
* |
aeca3893 |
* Revision 1.67 2004/04/23 10:47:41 nigelhorne
* If an inline text portion has a filename treat is as an attachment
* |
f35bc674 |
* Revision 1.66 2004/04/14 08:32:21 nigelhorne
* When debugging print the email number in mailboxes
* |
3a978f7d |
* Revision 1.65 2004/04/07 18:18:07 nigelhorne
* Some occurances of W97M.Lexar were let through
* |
93d41ee4 |
* Revision 1.64 2004/04/05 09:32:20 nigelhorne
* Added SCAN_TO_DISC define
* |
12f5aef2 |
* Revision 1.63 2004/04/01 15:32:34 nigelhorne
* Graceful exit if messageAddLine fails in strdup
* |
7c1eb3bf |
* Revision 1.62 2004/03/31 17:00:20 nigelhorne
* Code tidy up free memory earlier
* |
1bfbedd4 |
* Revision 1.61 2004/03/30 22:45:13 nigelhorne
* Better handling of multipart/multipart messages
* |
d879a7b0 |
* Revision 1.60 2004/03/29 09:22:03 nigelhorne
* Tidy up code and reduce shuffling of data
* |
4d825c09 |
* Revision 1.59 2004/03/26 11:08:36 nigelhorne
* Use cli_writen
* |
c81143fc |
* Revision 1.58 2004/03/25 22:40:46 nigelhorne
* Removed even more calls to realloc and some duplicated code
* |
61485e09 |
* Revision 1.57 2004/03/21 17:19:49 nigelhorne
* Handle bounce messages with no headers
* |
ae3bda56 |
* Revision 1.56 2004/03/21 09:41:26 nigelhorne
* Faster scanning for non MIME messages
* |
891d6e39 |
* Revision 1.55 2004/03/20 17:39:23 nigelhorne
* First attempt to handle all bounces
* |
ef704fb3 |
* Revision 1.54 2004/03/19 15:40:45 nigelhorne
* Handle empty content-disposition types
* |
705e985c |
* Revision 1.53 2004/03/19 08:08:02 nigelhorne
* If a message part of a multipart contains an RFC822 message that has no encoding don't scan it
* |
5c1150ac |
* Revision 1.52 2004/03/18 21:51:41 nigelhorne
* If a message only contains a single RFC822 message that has no encoding don't save for scanning
* |
c693116d |
* Revision 1.51 2004/03/17 19:48:12 nigelhorne
* Improved embedded RFC822 message handling
* |
e17491b2 |
* Revision 1.50 2004/03/10 22:05:39 nigelhorne
* Fix seg fault when a message in a multimessage mailbox fails to scan
* |
97e8ea68 |
* Revision 1.49 2004/03/04 13:01:58 nigelhorne
* Ensure all bounces are rescanned by cl_mbox
* |
c19dc6cd |
* Revision 1.48 2004/02/27 12:16:26 nigelhorne
* Catch lines just containing ':'
* |
a9f386ed |
* Revision 1.47 2004/02/23 10:13:08 nigelhorne
* Handle spaces before : in headers
* |
d5f16694 |
* Revision 1.46 2004/02/18 13:29:19 nigelhorne
* Stop buffer overflows for files with very long suffixes
* |
56ae62e2 |
* Revision 1.45 2004/02/18 10:07:40 nigelhorne
* Find some Yaha
* |
8ef734d4 |
* Revision 1.44 2004/02/15 08:45:54 nigelhorne
* Avoid scanning the same file twice
* |
20d3dde9 |
* Revision 1.43 2004/02/14 19:04:05 nigelhorne
* Handle spaces in boundaries
* |
bac883ff |
* Revision 1.42 2004/02/14 17:23:45 nigelhorne
* Had deleted O_BINARY by mistake
* |
d8f615d7 |
* Revision 1.41 2004/02/12 18:43:58 nigelhorne
* Use mkstemp on Solaris
* |
547b89de |
* Revision 1.40 2004/02/11 08:15:59 nigelhorne
* Use O_BINARY for cygwin
* |
6d6e8271 |
* Revision 1.39 2004/02/06 13:46:08 kojm
* Support for clamav-config.h
* |
a7e8f192 |
* Revision 1.38 2004/02/04 13:29:48 nigelhorne
* Handle partial writes - and print when write fails
* |
8ba634a9 |
* Revision 1.37 2004/02/03 22:54:59 nigelhorne
* Catch another example of Worm.Dumaru.Y
* |
c76810dc |
* Revision 1.36 2004/02/02 09:52:57 nigelhorne
* Some instances of Worm.Dumaru.Y got through the net
* |
cca4efe4 |
* Revision 1.35 2004/01/28 10:15:24 nigelhorne
* Added support to scan some bounce messages
* |
6b8999f0 |
* Revision 1.34 2004/01/24 17:43:37 nigelhorne
* Removed (incorrect) warning about uninitialised variable
* |
ad9c6836 |
* Revision 1.33 2004/01/23 10:38:22 nigelhorne
* Fixed memory leak in handling some multipart messages
* |
ab74690c |
* Revision 1.32 2004/01/23 08:51:19 nigelhorne
* Add detection of uuencoded viruses in single part multipart/mixed files
* |
9d2797b6 |
* Revision 1.31 2004/01/22 22:13:06 nigelhorne
* Prevent infinite recursion on broken uuencoded files
* |
4c60b74f |
* Revision 1.30 2004/01/13 10:12:05 nigelhorne
* Remove duplicate code when handling multipart messages
* |
3f07cba4 |
* Revision 1.29 2004/01/09 18:27:11 nigelhorne
* ParseMimeHeader could corrupt arg
* |
2625d6a0 |
* Revision 1.28 2004/01/09 15:07:42 nigelhorne
* Re-engineered update 1.11 lost in recent changes
* |
303f9be9 |
* Revision 1.27 2004/01/09 14:45:59 nigelhorne
* Removed duplicated code in multipart handler
* |
d110fe1c |
* Revision 1.26 2004/01/09 10:20:54 nigelhorne
* Locate uuencoded viruses hidden in text poritions of multipart/mixed mime messages
* |
d2a47316 |
* Revision 1.25 2004/01/06 14:41:18 nigelhorne
* Handle headers which do not not have a space after the ':'
* |
89670d69 |
* Revision 1.24 2003/12/20 13:55:36 nigelhorne
* Ensure multipart just save the bodies of attachments
* |
e06d34dc |
* Revision 1.23 2003/12/14 18:07:01 nigelhorne
* Some viruses in embedded messages were not being found
* |
5e8a92bb |
* Revision 1.22 2003/12/13 16:42:23 nigelhorne
* call new cli_chomp
* |
7e577f26 |
* Revision 1.21 2003/12/11 14:35:48 nigelhorne
* Better handling of encapsulated messages
* |
98cb5cba |
* Revision 1.20 2003/12/06 04:03:26 nigelhorne
* Handle hand crafted emails that incorrectly set multipart headers
* |
e2e7ebf5 |
* Revision 1.19 2003/11/21 07:26:31 nigelhorne
* Scan multipart alternatives that have no boundaries, finds some uuencoded happy99
* |
7d34e010 |
* Revision 1.18 2003/11/17 08:13:21 nigelhorne
* Handle spaces at the end of lines of MIME headers
* |
aa0210b6 |
* Revision 1.17 2003/11/06 05:06:42 nigelhorne
* Some applications weren't being scanned
* |
294d0774 |
* Revision 1.16 2003/11/04 08:24:00 nigelhorne
* Handle multipart messages that have no text portion
* |
c2b2d8af |
* Revision 1.15 2003/10/12 20:13:49 nigelhorne
* Use NO_STRTOK_R consistent with message.c
* |
c9b8f252 |
* Revision 1.14 2003/10/12 12:37:11 nigelhorne
* Appledouble encoded EICAR now found
* |
098d38f1 |
* Revision 1.13 2003/10/01 09:27:42 nigelhorne
* Handle content-type header going over to a new line
* |
5f0d267f |
* Revision 1.12 2003/09/29 17:10:19 nigelhorne
* Moved stub from heap to stack since its maximum size is known
* |
94b6160c |
* Revision 1.11 2003/09/29 12:58:32 nigelhorne
* Handle Content-Type: /; name="eicar.com"
* |
3eaab452 |
* Revision 1.10 2003/09/28 10:06:34 nigelhorne
* Compilable under SCO; removed duplicate code with message.c
* |
e3aaff8e |
*/ |
f1c1300c |
static char const rcsid[] = "$Id: mbox.c,v 1.163 2004/10/31 09:28:56 nigelhorne Exp $"; |
6d6e8271 |
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif |
e3aaff8e |
#ifndef CL_DEBUG |
548a5f96 |
#define NDEBUG /* map CLAMAV debug onto standard */ |
e3aaff8e |
#endif
#ifdef CL_THREAD_SAFE |
98cb5cba |
#ifndef _REENTRANT |
e3aaff8e |
#define _REENTRANT /* for Solaris 2.8 */
#endif |
98cb5cba |
#endif |
e3aaff8e |
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <assert.h>
#include <string.h>
#include <strings.h>
#include <ctype.h>
#include <time.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h> |
d4d14218 |
#include <sys/param.h> |
e3aaff8e |
#include <clamav.h> |
f10460ed |
#include <dirent.h> |
a0b21816 |
#include <limits.h> |
e3aaff8e |
|
e2875303 |
#ifdef CL_THREAD_SAFE
#include <pthread.h>
#endif
|
e3aaff8e |
#include "table.h"
#include "mbox.h"
#include "blob.h" |
b2223aad |
#include "line.h" |
e3aaff8e |
#include "text.h"
#include "message.h"
#include "others.h"
#include "defaults.h" |
7e577f26 |
#include "str.h" |
e3aaff8e |
|
02927896 |
#ifdef CL_DEBUG
#if __GLIBC__ == 2 && __GLIBC_MINOR__ >= 1
#define HAVE_BACKTRACE
#endif |
3f3f9085 |
#endif |
02927896 |
#ifdef HAVE_BACKTRACE
#include <execinfo.h>
#include <signal.h>
#include <syslog.h>
static void sigsegv(int sig);
static void print_trace(int use_syslog);
#endif
|
c2b2d8af |
#if defined(NO_STRTOK_R) || !defined(CL_THREAD_SAFE) |
e3aaff8e |
#undef strtok_r
#undef __strtok_r
#define strtok_r(a,b,c) strtok(a,b)
#endif
/* required for AIX and Tru64 */
#ifdef TRUE
#undef TRUE
#endif
#ifdef FALSE
#undef FALSE
#endif
|
87c9313e |
typedef enum { FALSE = 0, TRUE = 1 } bool;
|
9b4bb8b7 |
#define SAVE_TO_DISC /* multipart/message are saved in a temporary file */ |
393a6d67 |
|
f017fbdd |
/*
* Code does exist to run FOLLORURLS on systems without libcurl, however that
* is not recommended so it is not compiled by default
*/
#ifdef WITH_CURL
#define FOLLOWURLS /* |
65684cec |
* If an email contains URLs, check them - helps to
* find Dialer.gen-45 |
87c9313e |
*/ |
f017fbdd |
#endif |
9b4bb8b7 |
|
55a3f03b |
#ifdef FOLLOWURLS |
65684cec |
|
15021325 |
#include "htmlnorm.h"
|
65684cec |
#define MAX_URLS 5 /* |
393a6d67 |
* Maximum number of URLs scanned in a message
* part
*/ |
65684cec |
#ifdef WITH_CURL /* Set in configure */
/*
* To build with WITH_CURL:
* LDFLAGS=`curl-config --libs` ./configure ...
*/ |
9558d802 |
#include <curl/curl.h> |
1d3d7dd9 |
/*
* Needs curl >= 7.11 (I've heard that 7.9 can cause crashes and 7.10 is
* untested)
*/
#if (LIBCURL_VERSION_MAJOR < 7)
#undef WITH_CURL /* also undef FOLLOWURLS? */ |
9558d802 |
#endif |
55a3f03b |
|
3a0f75c6 |
#if (LIBCURL_VERSION_MAJOR == 7) && (LIBCURL_VERSION_MINOR < 10) |
1d3d7dd9 |
#undef WITH_CURL /* also undef FOLLOWURLS? */
#endif
#endif /*WITH_CURL*/
|
55a3f03b |
#else /*!FOLLOWURLS*/
#undef WITH_CURL |
1d3d7dd9 |
#endif /*FOLLOWURLS*/ |
9558d802 |
|
f10460ed |
/*
* Define this to handle RFC1341 messages.
* This is experimental code so it is up to YOU to (1) ensure it's secure |
cf569541 |
* (2) periodically trim the directory of old files
*
* If you use the load balancing feature of clamav-milter to run clamd on
* more than one machine you must make sure that /tmp/partial is on a shared
* network filesystem |
f10460ed |
*/ |
1d3d7dd9 |
/*#define PARTIAL_DIR "/tmp/partial" /* FIXME: should be config based on TMPDIR */ |
f10460ed |
|
b2223aad |
static message *parseEmailHeaders(const message *m, const table_t *rfc821Table); |
4c60b74f |
static int parseEmailHeader(message *m, const char *line, const table_t *rfc821Table); |
b0b860f1 |
static int parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t *rfc821Table, const table_t *subtypeTable, unsigned int options); |
e3aaff8e |
static int boundaryStart(const char *line, const char *boundary);
static int endOfMessage(const char *line, const char *boundary);
static int initialiseTables(table_t **rfc821Table, table_t **subtypeTable);
static int getTextPart(message *const messages[], size_t size);
static size_t strip(char *buf, int len);
static bool continuationMarker(const char *line);
static int parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg); |
cca4efe4 |
static void saveTextPart(message *m, const char *dir); |
50df4118 |
static char *rfc2047(const char *in); |
a9714c49 |
static char *rfc822comments(const char *in); |
f10460ed |
#ifdef PARTIAL_DIR
static int rfc1341(message *m, const char *dir);
#endif |
9b4bb8b7 |
|
71ba1dcd |
static void checkURLs(message *m, const char *dir); |
65684cec |
#ifdef WITH_CURL |
f121cb96 |
struct arg { |
2c7d1edd |
const char *url;
const char *dir; |
f121cb96 |
char *filename;
};
#ifdef CL_THREAD_SAFE
static void *getURL(void *a);
#else
static void *getURL(struct arg *arg);
#endif |
9b4bb8b7 |
#endif
|
e3aaff8e |
/* Maximum line length according to RFC821 */
#define LINE_LENGTH 1000
/* Hashcodes for our hash tables */
#define CONTENT_TYPE 1
#define CONTENT_TRANSFER_ENCODING 2
#define CONTENT_DISPOSITION 3
/* Mime sub types */
#define PLAIN 1
#define ENRICHED 2
#define HTML 3
#define RICHTEXT 4
#define MIXED 5
#define ALTERNATIVE 6
#define DIGEST 7
#define SIGNED 8
#define PARALLEL 9
#define RELATED 10 /* RFC2387 */
#define REPORT 11 /* RFC1892 */ |
c9b8f252 |
#define APPLEDOUBLE 12 /* Handling of this in only noddy for now */ |
393a6d67 |
#define FAX MIXED /*
* RFC3458
* Drafts stated to treat is as mixed if it is
* not known. This disappeared in the final
* version (except when talking about
* voice-message), but it is good enough for us
* since we do no validation of coversheet
* presence etc. (which also has disappeared
* in the final version)
*/ |
9a729c80 |
#define ENCRYPTED 13 /*
* e.g. RFC2015
* Content-Type: multipart/encrypted;
* boundary="nextPart1383049.XCRrrar2yq";
* protocol="application/pgp-encrypted"
*/ |
6e5d95eb |
#define X_BFILE RELATED /*
* BeOS, expert two parts: the file and it's
* attributes. The attributes part comes as
* Content-Type: application/x-be_attribute
* name="foo"
* I can't find where it is defined, any
* pointers would be appreciated. For now
* we treat it as multipart/related
*/ |
c79a2273 |
#define KNOWBOT 14 /* Unknown and undocumented format? */ |
e3aaff8e |
static const struct tableinit {
const char *key;
int value;
} rfc821headers[] = { |
303f9be9 |
/* TODO: make these regular expressions */ |
5c1150ac |
{ "Content-Type", CONTENT_TYPE }, |
a9f386ed |
{ "Content-Transfer-Encoding", CONTENT_TRANSFER_ENCODING },
{ "Content-Disposition", CONTENT_DISPOSITION }, |
e3aaff8e |
{ NULL, 0 } |
15033cb6 |
}, mimeSubtypes[] = { /* see RFC2045 */ |
e3aaff8e |
/* subtypes of Text */
{ "plain", PLAIN },
{ "enriched", ENRICHED },
{ "html", HTML },
{ "richtext", RICHTEXT },
/* subtypes of Multipart */
{ "mixed", MIXED },
{ "alternative", ALTERNATIVE },
{ "digest", DIGEST },
{ "signed", SIGNED },
{ "parallel", PARALLEL },
{ "related", RELATED },
{ "report", REPORT }, |
c9b8f252 |
{ "appledouble", APPLEDOUBLE }, |
393a6d67 |
{ "fax-message", FAX }, |
9a729c80 |
{ "encrypted", ENCRYPTED }, |
6e5d95eb |
{ "x-bfile", X_BFILE }, /* BeOS */ |
c79a2273 |
{ "knowbot", KNOWBOT }, /* ??? */
{ "knowbot-metadata", KNOWBOT }, /* ??? */
{ "knowbot-code", KNOWBOT }, /* ??? */
{ "knowbot-state", KNOWBOT }, /* ??? */ |
e3aaff8e |
{ NULL, 0 }
}; |
e2875303 |
#ifdef CL_THREAD_SAFE
static pthread_mutex_t tables_mutex = PTHREAD_MUTEX_INITIALIZER;
#endif |
e3aaff8e |
|
3eaab452 |
/* Maximum filenames under various systems */
#ifndef NAME_MAX /* e.g. Linux */
#ifdef MAXNAMELEN /* e.g. Solaris */
#define NAME_MAX MAXNAMELEN
#else
#ifdef FILENAME_MAX /* e.g. SCO */
#define NAME_MAX FILENAME_MAX
#endif
#endif
#endif
|
bac883ff |
#ifndef O_BINARY
#define O_BINARY 0
#endif
|
e3aaff8e |
/*
* TODO: when signal handling is added, need to remove temp files when a |
ef822cfc |
* signal is received |
e3aaff8e |
* TODO: add option to scan in memory not via temp files, perhaps with a |
1bfbedd4 |
* named pipe or memory mapped file, though this won't work on big e-mails
* containing many levels of encapsulated messages - it'd just take too much
* RAM |
049a18b9 |
* TODO: parse .msg format files |
c9b8f252 |
* TODO: fully handle AppleDouble format, see |
ef822cfc |
* http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf |
89670d69 |
* TODO: ensure parseEmailHeaders is always called before parseEmailBody
* TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody |
69543a9d |
* TODO: Look into TNEF. Is there anything that needs to be done here? |
e3aaff8e |
*/
int |
bbd2d959 |
cli_mbox(const char *dir, int desc, unsigned int options) |
e3aaff8e |
{ |
049a18b9 |
int retcode, i; |
89670d69 |
message *m, *body; |
e3aaff8e |
FILE *fd; |
049a18b9 |
char buffer[LINE_LENGTH]; |
c7b69776 |
#ifdef HAVE_BACKTRACE |
02927896 |
void (*segv)(int);
#endif |
393a6d67 |
static table_t *rfc821, *subtype; |
e3aaff8e |
cli_dbgmsg("in mbox()\n");
|
049a18b9 |
i = dup(desc);
if((fd = fdopen(i, "rb")) == NULL) {
cli_errmsg("Can't open descriptor %d\n", desc);
close(i); |
ef822cfc |
return CL_EOPEN; |
049a18b9 |
}
if(fgets(buffer, sizeof(buffer), fd) == NULL) {
/* empty message */
fclose(fd); |
ef822cfc |
return CL_CLEAN; |
049a18b9 |
} |
e3aaff8e |
m = messageCreate(); |
51fc2aa8 |
if(m == NULL) { |
049a18b9 |
fclose(fd); |
ef822cfc |
return CL_EMEM; |
51fc2aa8 |
}
|
e2875303 |
#ifdef CL_THREAD_SAFE
pthread_mutex_lock(&tables_mutex);
#endif |
393a6d67 |
if(rfc821 == NULL) {
assert(subtype == NULL); |
51fc2aa8 |
|
393a6d67 |
if(initialiseTables(&rfc821, &subtype) < 0) {
rfc821 = NULL;
subtype = NULL; |
e2875303 |
#ifdef CL_THREAD_SAFE
pthread_mutex_unlock(&tables_mutex);
#endif |
51fc2aa8 |
messageDestroy(m);
fclose(fd); |
ef822cfc |
return CL_EMEM; |
51fc2aa8 |
} |
e3aaff8e |
} |
e2875303 |
#ifdef CL_THREAD_SAFE
pthread_mutex_unlock(&tables_mutex);
#endif |
e3aaff8e |
|
3f3f9085 |
#ifdef HAVE_BACKTRACE |
02927896 |
segv = signal(SIGSEGV, sigsegv);
#endif
|
89670d69 |
/*
* is it a UNIX style mbox with more than one
* mail message, or just a single mail message?
*/
if(strncmp(buffer, "From ", 5) == 0) { |
e3aaff8e |
/* |
049a18b9 |
* Have been asked to check a UNIX style mbox file, which
* may contain more than one e-mail message to decode |
e3aaff8e |
*/ |
89670d69 |
bool lastLineWasEmpty = FALSE; |
f35bc674 |
int messagenumber = 1; |
e3aaff8e |
|
049a18b9 |
do {
/*cli_dbgmsg("read: %s", buffer);*/ |
e3aaff8e |
|
89670d69 |
cli_chomp(buffer);
if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) { |
f35bc674 |
cli_dbgmsg("Deal with email number %d\n", messagenumber++); |
e3aaff8e |
/* |
89670d69 |
* End of a message in the mail box |
e3aaff8e |
*/ |
b2223aad |
body = parseEmailHeaders(m, rfc821); |
4f1d0bfc |
if(body == NULL) {
messageReset(m);
continue;
} |
89670d69 |
messageDestroy(m);
if(messageGetBody(body)) |
b0b860f1 |
if(!parseEmailBody(body, NULL, dir, rfc821, subtype, options)) { |
e17491b2 |
messageReset(body);
m = body;
continue;
} |
e3aaff8e |
/* |
89670d69 |
* Starting a new message, throw away all the
* information about the old one |
e3aaff8e |
*/ |
89670d69 |
m = body;
messageReset(body); |
e3aaff8e |
|
049a18b9 |
cli_dbgmsg("Finished processing message\n"); |
89670d69 |
} else |
547b89de |
lastLineWasEmpty = (bool)(buffer[0] == '\0'); |
b2223aad |
if(messageAddStr(m, buffer) < 0) |
12f5aef2 |
break; |
049a18b9 |
} while(fgets(buffer, sizeof(buffer), fd) != NULL); |
f35bc674 |
cli_dbgmsg("Deal with email number %d\n", messagenumber); |
4f1d0bfc |
} else { |
7e577f26 |
/*
* It's a single message, parse the headers then the body |
4f1d0bfc |
* Ignore blank lines at the start of the message
*/ |
69543a9d |
if(strncmp(buffer, "P I ", 4) == 0)
/*
* CommuniGate Pro format: ignore headers until
* blank line
*/
while((fgets(buffer, sizeof(buffer), fd) != NULL) &&
(strchr("\r\n", buffer[0]) == NULL))
;
/*
* Ignore any blank lines at the top of the message
*/ |
4f1d0bfc |
while(strchr("\r\n", buffer[0]) &&
(fgets(buffer, sizeof(buffer), fd) != NULL)) |
87c9313e |
;
|
4f1d0bfc |
/*
* FIXME: files full of new lines and nothing else are
* handled ungracefully... |
7e577f26 |
*/ |
b2223aad |
do { |
5c1150ac |
/* |
93d41ee4 |
* TODO: this needlessly creates a message object,
* it'd be better if parseEmailHeaders could also
* read in from a file. I do not want to lump the
* parseEmailHeaders code here, that'd be a duplication
* of code I want to avoid |
5c1150ac |
*/ |
b2223aad |
(void)cli_chomp(buffer);
if(messageAddStr(m, buffer) < 0) |
12f5aef2 |
break; |
b2223aad |
} while(fgets(buffer, sizeof(buffer), fd) != NULL); |
4f1d0bfc |
} |
7e577f26 |
|
e3aaff8e |
fclose(fd);
|
ef822cfc |
/*
* This is not necessarily true, but since the only options are
* CL_CLEAN and CL_VIRUS this is the better choice. It would be
* nice to have CL_CONTINUESCANNING or something like that
*/
retcode = CL_CLEAN; |
049a18b9 |
|
b2223aad |
body = parseEmailHeaders(m, rfc821); |
89670d69 |
messageDestroy(m); |
4f1d0bfc |
if(body) {
/*
* Write out the last entry in the mailbox
*/
if(messageGetBody(body)) |
b0b860f1 |
if(!parseEmailBody(body, NULL, dir, rfc821, subtype, options)) |
b143af46 |
retcode = CL_EFORMAT; |
e3aaff8e |
|
4f1d0bfc |
/*
* Tidy up and quit
*/
messageDestroy(body);
} |
e3aaff8e |
cli_dbgmsg("cli_mbox returning %d\n", retcode);
|
3f3f9085 |
#ifdef HAVE_BACKTRACE |
02927896 |
signal(SIGSEGV, segv);
#endif
|
e3aaff8e |
return retcode;
}
/* |
7e577f26 |
* The given message contains a raw e-mail.
*
* This function parses the headers of m and sets the message's arguments |
e06d34dc |
*
* Returns the message's body with the correct arguments set |
f73920a4 |
*
* The downside of this approach is that for a short time we have two copies
* of the message in memory, the upside is that it makes for easier parsing
* of encapsulated messages, and in the long run uses less memory in those
* scenarios |
7e577f26 |
*/ |
e06d34dc |
static message * |
b2223aad |
parseEmailHeaders(const message *m, const table_t *rfc821) |
7e577f26 |
{ |
e06d34dc |
bool inHeader = TRUE; |
0356cdc0 |
bool contMarker = FALSE; |
b2223aad |
const text *t; |
89670d69 |
message *ret; |
4f1d0bfc |
bool anyHeadersFound = FALSE; |
28010d29 |
bool Xheader = FALSE; |
15033cb6 |
int commandNumber = -1; |
89670d69 |
|
02927896 |
cli_dbgmsg("parseEmailHeaders\n");
|
89670d69 |
if(m == NULL)
return NULL;
ret = messageCreate(); |
7e577f26 |
|
b2223aad |
for(t = messageGetBody(m); t; t = t->t_next) {
const char *buffer; |
7e577f26 |
|
b2223aad |
if(t->t_line)
buffer = lineGetData(t->t_line);
else |
02927896 |
buffer = NULL; |
7e577f26 |
|
28010d29 |
if(inHeader) { |
d79597e3 |
if(buffer == NULL) { |
28010d29 |
/*
* A blank line signifies the end of the header
* and the start of the text
*/ |
7e577f26 |
cli_dbgmsg("End of header information\n"); |
f12d2498 |
inHeader = FALSE; |
0356cdc0 |
} else if(((buffer[0] == '\t') || (buffer[0] == ' ') || contMarker) && |
28010d29 |
(!Xheader)) {
/*
* Section B.2 of RFC822 says TAB or SPACE means
* a continuation of the previous entry.
*
* Add all the arguments on the line
*/ |
1815e490 |
char *ptr; |
c77c8809 |
char copy[LINE_LENGTH + 1]; |
1815e490 |
bool inquotes = FALSE; |
13462674 |
bool arequotes = FALSE; |
f10460ed |
#ifdef CL_THREAD_SAFE
char *strptr;
#endif |
13462674 |
|
0356cdc0 |
contMarker = continuationMarker(buffer); |
15033cb6 |
switch(commandNumber) {
case CONTENT_TRANSFER_ENCODING:
case CONTENT_DISPOSITION:
case CONTENT_TYPE:
break;
default:
continue;
} |
c77c8809 |
assert(strlen(buffer) < sizeof(copy));
strcpy(copy, buffer); |
28010d29 |
|
1815e490 |
/*
* Ensure that the colon in headers such as
* this doesn't get mistaken for a token
* separator
* boundary="=.J:gysAG)N(3_zv"
*/
for(ptr = copy; *ptr; ptr++)
if(*ptr == '\"')
inquotes = !inquotes; |
13462674 |
else if(inquotes) { |
1815e490 |
*ptr |= '\200'; |
13462674 |
arequotes = TRUE;
} |
1815e490 |
|
548a5f96 |
#ifdef CL_THREAD_SAFE |
28010d29 |
for(ptr = strtok_r(copy, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr)) |
1815e490 |
if(strchr(ptr, '=')) { |
13462674 |
if(arequotes) {
char *p2;
for(p2 = ptr; *p2; p2++)
*p2 &= '\177';
} |
28010d29 |
messageAddArguments(ret, ptr); |
1815e490 |
} |
548a5f96 |
#else
for(ptr = strtok(copy, ";"); ptr; ptr = strtok(NULL, ":")) |
1815e490 |
if(strchr(ptr, '=')) { |
13462674 |
if(arequotes) {
char *p2;
for(p2 = ptr; *p2; p2++)
*p2 &= '\177';
} |
548a5f96 |
messageAddArguments(ret, ptr); |
1815e490 |
} |
548a5f96 |
#endif |
f12d2498 |
} else { |
28010d29 |
Xheader = (bool)(buffer[0] == 'X'); |
0356cdc0 |
contMarker = continuationMarker(buffer); |
393a6d67 |
if((parseEmailHeader(ret, buffer, rfc821) >= 0) || |
15033cb6 |
(strncasecmp(buffer, "From ", 5) == 0)) {
char cmd[LINE_LENGTH + 1];
if(cli_strtokbuf(buffer, 0, ":", cmd) != NULL) {
anyHeadersFound = TRUE;
commandNumber = tableFind(rfc821, cmd);
}
} |
f12d2498 |
} |
e17491b2 |
} else {
/*cli_dbgmsg("Add line to body '%s'\n", buffer);*/ |
b2223aad |
if(messageAddLine(ret, t->t_line) < 0) |
0e4e16d4 |
break; |
e17491b2 |
} |
d879a7b0 |
} |
e06d34dc |
|
4f1d0bfc |
if(!anyHeadersFound) {
/*
* False positive in believing we have an e-mail when we don't
*/
messageDestroy(ret);
cli_dbgmsg("parseEmailHeaders: no headers found, assuming it isn't an email\n");
return NULL;
}
|
93d41ee4 |
messageClean(ret);
|
e17491b2 |
cli_dbgmsg("parseEmailHeaders: return\n");
|
e06d34dc |
return ret; |
7e577f26 |
}
/* |
4c60b74f |
* Handle a header line of an email message
*/
static int |
393a6d67 |
parseEmailHeader(message *m, const char *line, const table_t *rfc821) |
4c60b74f |
{ |
0960ff5e |
char *cmd; |
4c60b74f |
int ret = -1;
#ifdef CL_THREAD_SAFE
char *strptr;
#endif |
97867f21 |
const char *separater; |
50df4118 |
char *copy, tokenseparater[2]; |
4c60b74f |
|
20d3dde9 |
cli_dbgmsg("parseEmailHeader '%s'\n", line);
|
97867f21 |
/*
* In RFC822 the separater between the key a value is a colon,
* e.g. Content-Transfer-Encoding: base64
* However some MUA's are lapse about this and virus writers exploit
* this hole, so we need to check all known possiblities
*/
for(separater = ":= "; *separater; separater++)
if(strchr(line, *separater) != NULL)
break;
if(*separater == '\0') |
1bfbedd4 |
return -1;
|
50df4118 |
copy = rfc2047(line);
if(copy == NULL)
return -1; |
f2f25418 |
|
97867f21 |
tokenseparater[0] = *separater;
tokenseparater[1] = '\0';
|
548a5f96 |
#ifdef CL_THREAD_SAFE |
97867f21 |
cmd = strtok_r(copy, tokenseparater, &strptr); |
548a5f96 |
#else
cmd = strtok(copy, tokenseparater);
#endif |
4c60b74f |
|
3499d81e |
if(cmd && (strstrip(cmd) > 0)) { |
548a5f96 |
#ifdef CL_THREAD_SAFE |
4c60b74f |
char *arg = strtok_r(NULL, "", &strptr); |
548a5f96 |
#else
char *arg = strtok(NULL, "");
#endif |
4c60b74f |
if(arg)
/*
* Found a header such as
* Content-Type: multipart/mixed;
* set arg to be
* "multipart/mixed" and cmd to |
a9f386ed |
* be "Content-Type" |
4c60b74f |
*/ |
393a6d67 |
ret = parseMimeHeader(m, cmd, rfc821, arg); |
4c60b74f |
} |
50df4118 |
free(copy); |
4c60b74f |
return ret;
}
/* |
e3aaff8e |
* This is a recursive routine.
* |
7e577f26 |
* This function parses the body of mainMessage and saves its attachments in dir
* |
e06d34dc |
* mainMessage is the buffer to be parsed, it contains an e-mail's body, without |
f12d2498 |
* any headers. First time of calling it'll be
* the whole message. Later it'll be parts of a multipart message |
e3aaff8e |
* textIn is the plain text message being built up so far
* |
d4d14218 |
* Returns: |
e3aaff8e |
* 0 for fail |
d110fe1c |
* 1 for success, attachments saved
* 2 for success, attachments not saved |
e3aaff8e |
*/
static int /* success or fail */ |
b0b860f1 |
parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t *rfc821Table, const table_t *subtypeTable, unsigned int options) |
e3aaff8e |
{ |
b726511f |
message **messages; /* parts of a multipart message */ |
6b8999f0 |
int inhead, inMimeHead, i, rc = 1, htmltextPart, multiparts = 0; |
e3aaff8e |
text *aText;
const char *cptr; |
ad9c6836 |
message *mainMessage; |
b0b860f1 |
fileblob *fb; |
e3aaff8e |
|
b0b860f1 |
cli_dbgmsg("in parseEmailBody\n"); |
e3aaff8e |
aText = textIn; |
b726511f |
messages = NULL; |
ad9c6836 |
mainMessage = messageIn; |
e3aaff8e |
/* Anything left to be parsed? */ |
d4d14218 |
if(mainMessage && (messageGetBody(mainMessage) != NULL)) { |
e3aaff8e |
mime_type mimeType; |
7c5a7a47 |
int subtype; |
9a729c80 |
const char *mimeSubtype, *boundary;
char *protocol; |
e3aaff8e |
const text *t_line; |
98cb5cba |
/*bool isAlternative;*/ |
e3aaff8e |
message *aMessage;
|
049a18b9 |
cli_dbgmsg("Parsing mail file\n");
|
e3aaff8e |
mimeType = messageGetMimeType(mainMessage);
mimeSubtype = messageGetMimeSubtype(mainMessage);
|
7c5a7a47 |
subtype = tableFind(subtypeTable, mimeSubtype);
if((mimeType == TEXT) && (subtype == PLAIN)) { |
e3aaff8e |
/*
* This is effectively no encoding, notice that we
* don't check that charset is us-ascii
*/
cli_dbgmsg("assume no encoding\n");
mimeType = NOMIME; |
7c5a7a47 |
messageSetMimeSubtype(mainMessage, NULL); |
e3aaff8e |
}
|
049a18b9 |
cli_dbgmsg("mimeType = %d\n", mimeType);
|
e3aaff8e |
switch(mimeType) {
case NOMIME:
aText = textAddMessage(aText, mainMessage);
break;
case TEXT: |
7c5a7a47 |
if(subtype == PLAIN) |
3f3f9085 |
/*
* Consider what to do if this fails
* (i.e. aText == NULL):
* We mustn't just return since that could
* cause a virus to be missed that we
* could be capable of scanning. Ignoring
* the error is probably the safest, we may be
* able to scan anyway and we lose nothing
*/ |
e3aaff8e |
aText = textCopy(messageGetBody(mainMessage)); |
7c5a7a47 |
else if((options&CL_SCAN_MAILURL) && (subtype == HTML))
checkURLs(mainMessage, dir); |
e3aaff8e |
break;
case MULTIPART:
boundary = messageFindArgument(mainMessage, "boundary");
if(boundary == NULL) {
cli_warnmsg("Multipart MIME message contains no boundaries\n"); |
e2e7ebf5 |
/* Broken e-mail message */
mimeType = NOMIME;
/*
* The break means that we will still
* check if the file contains a uuencoded file
*/
break; |
e3aaff8e |
}
|
c79a2273 |
/* Perhaps it should assume mixed? */ |
cb5a87e0 |
if(mimeSubtype[0] == '\0') {
cli_warnmsg("Multipart has no subtype assuming alternative\n");
mimeSubtype = "alternative";
messageSetMimeSubtype(mainMessage, "alternative");
}
|
e3aaff8e |
/*
* Get to the start of the first message
*/ |
20d3dde9 |
t_line = messageGetBody(mainMessage);
if(t_line == NULL) {
cli_warnmsg("Multipart MIME message has no body\n");
free((char *)boundary);
mimeType = NOMIME;
break;
}
do |
b2223aad |
if(boundaryStart(lineGetData(t_line->t_line), boundary)) |
e3aaff8e |
break; |
20d3dde9 |
while((t_line = t_line->t_next) != NULL); |
e3aaff8e |
if(t_line == NULL) { |
28010d29 |
cli_dbgmsg("Multipart MIME message contains no boundary lines\n"); |
5a642650 |
/*
* Free added by Thomas Lamy
* <Thomas.Lamy@in-online.net>
*/
free((char *)boundary); |
e2e7ebf5 |
mimeType = NOMIME;
/*
* The break means that we will still
* check if the file contains a uuencoded file
*/
break; |
e3aaff8e |
}
/*
* Build up a table of all of the parts of this
* multipart message. Remember, each part may itself
* be a multipart message.
*/
inhead = 1;
inMimeHead = 0;
|
e06d34dc |
/* |
9a729c80 |
* Parse the mainMessage object and create an array
* of objects called messages, one for each of the
* multiparts that mainMessage contains
* |
e06d34dc |
* This looks like parseEmailHeaders() - maybe there's
* some duplication of code to be cleaned up
*/ |
b726511f |
for(multiparts = 0; t_line; multiparts++) { |
56ae62e2 |
int lines = 0; |
1a74d4df |
message **m; |
56ae62e2 |
|
1a74d4df |
m = cli_realloc(messages, ((multiparts + 1) * sizeof(message *))); |
f12d2498 |
if(m == NULL) |
1a74d4df |
break;
messages = m; |
b726511f |
|
e3aaff8e |
aMessage = messages[multiparts] = messageCreate(); |
3f3f9085 |
if(aMessage == NULL) {
multiparts--;
continue;
} |
e3aaff8e |
cli_dbgmsg("Now read in part %d\n", multiparts);
|
8ba634a9 |
/*
* Ignore blank lines. There shouldn't be ANY
* but some viruses insert them
*/ |
02927896 |
while((t_line = t_line->t_next) != NULL) |
b2223aad |
if(t_line->t_line &&
/*(cli_chomp(t_line->t_text) > 0))*/
(strlen(lineGetData(t_line->t_line)) > 0)) |
d79597e3 |
break; |
8ba634a9 |
if(t_line == NULL) {
cli_dbgmsg("Empty part\n"); |
b9ce9639 |
/*
* Remove this part unless there's
* a uuencoded portion somewhere in
* the complete message that we may
* throw away by mistake if the MIME
* encoding information is incorrect
*/
if(uuencodeBegin(mainMessage) == NULL) {
messageDestroy(aMessage);
--multiparts;
} |
8ba634a9 |
continue;
}
do { |
b2223aad |
const char *line = lineGetData(t_line->t_line); |
e3aaff8e |
|
e06d34dc |
/*cli_dbgmsg("inMimeHead %d inhead %d boundary %s line '%s' next '%s'\n",
inMimeHead, inhead, boundary, line, t_line->t_next ? t_line->t_next->t_text : "(null)");*/ |
e3aaff8e |
|
f1c1300c |
if(inMimeHead) { /* continuation line */ |
02927896 |
if(line == NULL) {
inhead = inMimeHead = 0;
continue;
} |
3a978f7d |
/*
* Handle continuation lines
* because the previous line |
1eec55a6 |
* ended with a ; or this line
* starts with a white space |
3a978f7d |
*/ |
1eec55a6 |
cli_dbgmsg("Multipart %d: About to add mime Argument '%s'\n",
multiparts, line); |
3a978f7d |
/*
* Handle the case when it
* isn't really a continuation
* line:
* Content-Type: application/octet-stream;
* Content-Transfer-Encoding: base64
*/
parseEmailHeader(aMessage, line, rfc821Table);
|
e3aaff8e |
while(isspace((int)*line))
line++;
if(*line == '\0') {
inhead = inMimeHead = 0;
continue;
}
/*
* This may cause a trailing ';'
* to be added if this test
* fails - TODO: verify this
*/
inMimeHead = continuationMarker(line);
messageAddArgument(aMessage, line); |
f1c1300c |
} else if(inhead) { /* handling normal headers */ |
02927896 |
if(line == NULL) {
/* empty line */ |
e3aaff8e |
inhead = 0;
continue;
} |
c76810dc |
if(isspace((int)*line)) {
/*
* The first line is
* continuation line.
* This is tricky
* to handle, but
* all we can do is our
* best
*/
cli_dbgmsg("Part %d starts with a continuation line\n",
multiparts);
messageAddArgument(aMessage, line);
/*
* Give it a default
* MIME type since
* that may be the
* missing line
*
* Choose application to
* force a save
*/
if(messageGetMimeType(aMessage) == NOMIME)
messageSetMimeType(aMessage, "application");
continue;
}
|
e3aaff8e |
/*
* Some clients are broken and
* put white space after the ;
*/
inMimeHead = continuationMarker(line); |
b2223aad |
if(!inMimeHead) {
const text *next = t_line->t_next; |
1eec55a6 |
char *fullline = strdup(line); |
f1c1300c |
int quotes = 0;
const char *qptr; |
b2223aad |
|
c77c8809 |
assert(strlen(line) <= LINE_LENGTH); |
f1c1300c |
for(qptr = line; *qptr; qptr++)
if(*qptr == '\"')
quotes++;
|
1eec55a6 |
/*
* Fold next lines to the end of this
* if they start with a white space |
f1c1300c |
* or if this line has an odd number of quotes:
* Content-Type: application/octet-stream; name="foo
* " |
1eec55a6 |
*/
while(next && next->t_line) { |
b2223aad |
const char *data = lineGetData(next->t_line); |
1eec55a6 |
char *ptr;
|
f1c1300c |
if((!isspace(data[0])) &&
((quotes & 1) == 0)) |
1eec55a6 |
break;
ptr = cli_realloc(fullline,
strlen(fullline) + strlen(data) + 1);
if(ptr == NULL)
break; |
b2223aad |
|
1eec55a6 |
fullline = ptr;
strcat(fullline, data);
|
f1c1300c |
for(qptr = data; *qptr; qptr++)
if(*qptr == '\"')
quotes++;
|
1eec55a6 |
t_line = next;
next = next->t_next; |
b2223aad |
} |
1eec55a6 |
cli_dbgmsg("Multipart %d: About to parse folded header '%s'\n",
multiparts, fullline); |
303f9be9 |
|
1eec55a6 |
parseEmailHeader(aMessage, fullline, rfc821Table);
free(fullline);
} else {
cli_dbgmsg("Multipart %d: About to parse header '%s'\n",
multiparts, line);
parseEmailHeader(aMessage, line, rfc821Table);
} |
e3aaff8e |
} else if(boundaryStart(line, boundary)) {
inhead = 1;
break;
} else if(endOfMessage(line, boundary)) {
/*
* Some viruses put information
* *after* the end of message,
* which presumably some broken
* mail clients find, so we
* can't assume that this
* is the end of the message
*/
/* t_line = NULL;*/
break; |
56ae62e2 |
} else { |
b2223aad |
if(messageAddLine(aMessage, t_line->t_line) < 0) |
1a74d4df |
break; |
56ae62e2 |
lines++;
} |
8ba634a9 |
} while((t_line = t_line->t_next) != NULL);
|
e3aaff8e |
messageClean(aMessage); |
56ae62e2 |
cli_dbgmsg("Part %d has %d lines\n",
multiparts, lines); |
e3aaff8e |
}
free((char *)boundary);
|
7c1eb3bf |
/* |
c79a2273 |
* Preprocess. Anything special to be done before
* we handle the multiparts? |
9a729c80 |
*/ |
c79a2273 |
switch(tableFind(subtypeTable, mimeSubtype)) {
case KNOWBOT:
/* TODO */
cli_dbgmsg("multipart/knowbot parsed as multipart/mixed for now\n");
mimeSubtype = "mixed";
break;
} |
9a729c80 |
/* |
7c1eb3bf |
* We've finished message we're parsing
*/
if(mainMessage && (mainMessage != messageIn)) {
messageDestroy(mainMessage);
mainMessage = NULL; |
ad9c6836 |
} |
e3aaff8e |
|
b726511f |
if(multiparts == 0) {
if(messages)
free(messages); |
7c1eb3bf |
return 2; /* Nothing to do */ |
b726511f |
} |
7c1eb3bf |
|
e3aaff8e |
cli_dbgmsg("The message has %d parts\n", multiparts); |
393a6d67 |
cli_dbgmsg("Find out the multipart type (%s)\n", mimeSubtype); |
e3aaff8e |
|
9a729c80 |
/*
* We now have all the parts of the multipart message
* in the messages array:
* message *messages[multiparts]
* Let's decide what to do with them all
*/ |
e3aaff8e |
switch(tableFind(subtypeTable, mimeSubtype)) {
case RELATED: |
e06d34dc |
cli_dbgmsg("Multipart related handler\n"); |
e3aaff8e |
/* |
294d0774 |
* Have a look to see if there's HTML code
* which will need scanning |
e3aaff8e |
*/
aMessage = NULL;
assert(multiparts > 0);
|
d4d14218 |
htmltextPart = getTextPart(messages, multiparts); |
e3aaff8e |
|
d4d14218 |
if(htmltextPart >= 0)
aText = textAddMessage(aText, messages[htmltextPart]); |
e3aaff8e |
else
/* |
294d0774 |
* There isn't an HTML bit. If there's a
* multipart bit, it'll may be in there
* somewhere |
e3aaff8e |
*/
for(i = 0; i < multiparts; i++)
if(messageGetMimeType(messages[i]) == MULTIPART) {
aMessage = messages[i]; |
d4d14218 |
htmltextPart = i; |
e3aaff8e |
break;
}
|
59da5a4f |
if(htmltextPart == -1) |
294d0774 |
cli_dbgmsg("No HTML code found to be scanned"); |
59da5a4f |
else { |
b0b860f1 |
rc = parseEmailBody(aMessage, aText, dir, rfc821Table, subtypeTable, options); |
59da5a4f |
if(rc == 1) {
assert(aMessage == messages[htmltextPart]);
messageDestroy(aMessage);
messages[htmltextPart] = NULL;
}
} |
e3aaff8e |
/*
* Fixed based on an idea from Stephen White <stephen@earth.li>
* The message is confused about the difference
* between alternative and related. Badtrans.B
* suffers from this problem.
*
* Fall through in this case:
* Content-Type: multipart/related;
* type="multipart/alternative"
*/ |
98cb5cba |
/*
* Changed to always fall through based on
* an idea from Michael Dankov <misha@btrc.ru>
* that some viruses are completely confused
* about the difference between related
* and mixed
*/
/*cptr = messageFindArgument(mainMessage, "type"); |
e3aaff8e |
if(cptr == NULL)
break;
isAlternative = (bool)(strcasecmp(cptr, "multipart/alternative") == 0);
free((char *)cptr);
if(!isAlternative) |
98cb5cba |
break;*/ |
ba867aed |
case DIGEST:
/*
* According to section 5.1.5 RFC2046, the
* default mime type of multipart/digest parts
* is message/rfc822
*
* We consider them as alternative, wrong in
* the strictest sense since they aren't
* alternatives - all parts a valid - but it's
* OK for our needs since it means each part
* will be scanned
*/ |
e3aaff8e |
case ALTERNATIVE:
cli_dbgmsg("Multipart alternative handler\n");
|
59da5a4f |
#if 0 |
d4d14218 |
htmltextPart = getTextPart(messages, multiparts); |
e3aaff8e |
|
d4d14218 |
if(htmltextPart == -1)
htmltextPart = 0; |
e3aaff8e |
|
d4d14218 |
aMessage = messages[htmltextPart]; |
e3aaff8e |
aText = textAddMessage(aText, aMessage);
|
b0b860f1 |
rc = parseEmailBody(NULL, aText, dir, rfc821Table, subtypeTable, options); |
51fc2aa8 |
|
b0b860f1 |
if(rc == 1) |
e3aaff8e |
/*
* Alternative message has saved its
* attachments, ensure we don't do
* the same thing
*/
rc = 2; |
59da5a4f |
#endif
|
e3aaff8e |
/*
* Fall through - some clients are broken and
* say alternative instead of mixed. The Klez
* virus is broken that way
*/
case REPORT:
/*
* According to section 1 of RFC1892, the
* syntax of multipart/report is the same
* as multipart/mixed. There are some required
* parameters, but there's no need for us to
* verify that they exist
*/
case MIXED: |
c9b8f252 |
case APPLEDOUBLE: /* not really supported */ |
e3aaff8e |
/*
* Look for attachments
*
* Not all formats are supported. If an
* unsupported format turns out to be
* common enough to implement, it is a simple
* matter to add it
*/ |
ad9c6836 |
if(aText) {
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage); |
e3aaff8e |
mainMessage = NULL; |
ad9c6836 |
} |
e3aaff8e |
cli_dbgmsg("Mixed message with %d parts\n", multiparts);
for(i = 0; i < multiparts; i++) {
bool addAttachment = FALSE;
bool addToText = FALSE;
const char *dtype; |
b0b860f1 |
#ifndef SAVE_TO_DISC |
89670d69 |
message *body; |
f12d2498 |
#endif |
e3aaff8e |
aMessage = messages[i];
|
59da5a4f |
if(aMessage == NULL)
continue; |
e3aaff8e |
dtype = messageGetDispositionType(aMessage); |
d4d14218 |
cptr = messageGetMimeSubtype(aMessage); |
e3aaff8e |
cli_dbgmsg("Mixed message part %d is of type %d\n",
i, messageGetMimeType(aMessage));
switch(messageGetMimeType(aMessage)) {
case APPLICATION: |
049a18b9 |
#if 0
/* strict checking... */ |
e3aaff8e |
if((strcasecmp(dtype, "attachment") == 0) || |
d4d14218 |
(strcasecmp(cptr, "x-msdownload") == 0) || |
049a18b9 |
(strcasecmp(cptr, "octet-stream") == 0) || |
d4d14218 |
(strcasecmp(dtype, "octet-stream") == 0)) |
e3aaff8e |
addAttachment = TRUE;
else { |
049a18b9 |
cli_dbgmsg("Discarded mixed/application not sent as attachment\n"); |
e3aaff8e |
continue;
} |
049a18b9 |
#endif
addAttachment = TRUE; |
e3aaff8e |
break;
case NOMIME: |
51fc2aa8 |
if(mainMessage) { |
393a6d67 |
const text *u_line = uuencodeBegin(mainMessage);
if(u_line) { |
51fc2aa8 |
cli_dbgmsg("Found uuencoded message in multipart/mixed mainMessage\n");
messageSetEncoding(mainMessage, "x-uuencode"); |
0e5a0129 |
fb = messageToFileblob(mainMessage, dir); |
51fc2aa8 |
|
0e5a0129 |
if(fb)
fileblobDestroy(fb); |
51fc2aa8 |
}
if(mainMessage != messageIn)
messageDestroy(mainMessage);
mainMessage = NULL;
} |
e3aaff8e |
addToText = TRUE;
if(messageGetBody(aMessage) == NULL)
/*
* No plain text version
*/ |
b2223aad |
messageAddStr(aMessage, "No plain text alternative"); |
e3aaff8e |
assert(messageGetBody(aMessage) != NULL);
break;
case TEXT: |
d110fe1c |
cli_dbgmsg("Mixed message text part disposition \"%s\"\n",
dtype); |
e3aaff8e |
if(strcasecmp(dtype, "attachment") == 0)
addAttachment = TRUE;
else if((*dtype == '\0') || (strcasecmp(dtype, "inline") == 0)) { |
393a6d67 |
const text *u_line = uuencodeBegin(aMessage); |
d110fe1c |
|
ad9c6836 |
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage); |
e3aaff8e |
mainMessage = NULL; |
78e302e1 |
cli_dbgmsg("Mime subtype \"%s\"\n", cptr); |
393a6d67 |
if(u_line) { |
d110fe1c |
cli_dbgmsg("Found uuencoded message in multipart/mixed text portion\n");
messageSetEncoding(aMessage, "x-uuencode");
addAttachment = TRUE; |
ab3107bc |
} else if(tableFind(subtypeTable, cptr) == PLAIN) { |
aeca3893 |
char *filename; |
d110fe1c |
/*
* Strictly speaking |
ab3107bc |
* a text/plain part is |
d110fe1c |
* not an attachment. We
* pretend it is so that
* we can decode and
* scan it
*/ |
aeca3893 |
filename = (char *)messageFindArgument(aMessage, "filename");
if(filename == NULL)
filename = (char *)messageFindArgument(aMessage, "name");
if(filename == NULL) {
cli_dbgmsg("Adding part to main message\n");
addToText = TRUE;
} else {
cli_dbgmsg("Treating %s as attachment\n",
filename);
free(filename);
addAttachment = TRUE;
} |
d110fe1c |
} else { |
3805ebcb |
if(options&CL_SCAN_MAILURL) |
ab3107bc |
if(tableFind(subtypeTable, cptr) == HTML)
checkURLs(aMessage, dir); |
e3aaff8e |
messageAddArgument(aMessage, "filename=textportion");
addAttachment = TRUE;
}
} else { |
28010d29 |
cli_dbgmsg("Text type %s is not supported\n", dtype); |
e3aaff8e |
continue;
}
break;
case MESSAGE: |
2f4737ed |
/* Content-Type: message/rfc822 */ |
e3aaff8e |
cli_dbgmsg("Found message inside multipart\n"); |
d879a7b0 |
if(encodingLine(aMessage) == NULL) {
assert(aMessage == messages[i]);
messageDestroy(messages[i]);
messages[i] = NULL; |
705e985c |
continue; |
d879a7b0 |
} |
b2223aad |
messageAddStrAtTop(aMessage, |
2f4737ed |
"Received: by clamd"); |
93d41ee4 |
#ifdef SAVE_TO_DISC
/*
* Save this embedded message
* to a temporary file
*/
saveTextPart(aMessage, dir);
assert(aMessage == messages[i]);
messageDestroy(messages[i]);
messages[i] = NULL;
#else
/*
* Scan in memory, faster but
* is open to DoS attacks when
* many nested levels are
* involved.
*/ |
f73920a4 |
body = parseEmailHeaders(aMessage, rfc821Table, TRUE); |
c693116d |
/*
* We've fininished with the
* original copy of the message,
* so throw that away and
* deal with the encapsulated
* message as a message.
* This can save a lot of memory
*/
assert(aMessage == messages[i]);
messageDestroy(messages[i]);
messages[i] = NULL; |
89670d69 |
if(body) { |
b0b860f1 |
rc = parseEmailBody(body, NULL, dir, rfc821Table, subtypeTable, options); |
89670d69 |
messageDestroy(body);
} |
93d41ee4 |
#endif |
e3aaff8e |
continue;
case MULTIPART:
/*
* It's a multi part within a multi part
* Run the message parser on this bit, it won't
* be an attachment
*/
cli_dbgmsg("Found multipart inside multipart\n"); |
f12d2498 |
if(aMessage) {
/*
* The headers were parsed when reading in the
* whole multipart section
*/ |
b0b860f1 |
rc = parseEmailBody(aMessage, aText, dir, rfc821Table, subtypeTable, options); |
f12d2498 |
cli_dbgmsg("Finished recursion\n");
assert(aMessage == messages[i]);
messageDestroy(messages[i]);
messages[i] = NULL; |
89670d69 |
} else { |
b0b860f1 |
rc = parseEmailBody(NULL, NULL, dir, rfc821Table, subtypeTable, options); |
ad9c6836 |
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage); |
89670d69 |
mainMessage = NULL;
} |
e3aaff8e |
continue;
case AUDIO:
case IMAGE: |
8ef734d4 |
case VIDEO: |
e3aaff8e |
addAttachment = TRUE;
break;
default: |
8ef734d4 |
cli_warnmsg("Only text and application attachments are supported, type = %d\n", |
e3aaff8e |
messageGetMimeType(aMessage));
continue;
}
/*
* It must be either text or
* an attachment. It can't be both
*/
assert(addToText || addAttachment);
assert(!(addToText && addAttachment));
|
640ed140 |
if(addToText) |
e3aaff8e |
aText = textAdd(aText, messageGetBody(aMessage)); |
b0b860f1 |
else {
fb = messageToFileblob(aMessage, dir); |
e3aaff8e |
|
0e5a0129 |
if(fb)
fileblobDestroy(fb); |
e3aaff8e |
} |
7c1eb3bf |
assert(aMessage == messages[i]);
messageDestroy(messages[i]);
messages[i] = NULL; |
e3aaff8e |
}
|
b0b860f1 |
/* rc = parseEmailBody(NULL, NULL, dir, rfc821Table, subtypeTable, options); */ |
e3aaff8e |
break;
case SIGNED:
case PARALLEL:
/*
* If we're here it could be because we have a
* multipart/mixed message, consisting of a
* message followed by an attachment. That
* message itself is a multipart/alternative
* message and we need to dig out the plain
* text part of that alternative
*/ |
d4d14218 |
htmltextPart = getTextPart(messages, multiparts);
if(htmltextPart == -1)
htmltextPart = 0; |
e3aaff8e |
|
b0b860f1 |
rc = parseEmailBody(messages[htmltextPart], aText, dir, rfc821Table, subtypeTable, options); |
e3aaff8e |
break; |
9a729c80 |
case ENCRYPTED:
rc = 0; |
c79a2273 |
protocol = (char *)messageFindArgument(mainMessage, "protocol"); |
9a729c80 |
if(protocol) {
if(strcasecmp(protocol, "application/pgp-encrypted") == 0) {
/* RFC2015 */
cli_warnmsg("PGP encoded attachment not scanned\n");
rc = 2;
} else
cli_warnmsg("Unknown encryption protocol '%s' - report to bugs@clamav.net\n");
free(protocol);
} else
cli_warnmsg("Encryption method missing protocol name - report to bugs@clamav.net\n");
break; |
e3aaff8e |
default:
/*
* According to section 7.2.6 of RFC1521,
* unrecognised multiparts should be treated as
* multipart/mixed. I don't do this yet so
* that I can see what comes along...
*/ |
a7398c9c |
cli_warnmsg("Unsupported multipart format `%s' - report to bugs@clamav.net\n", mimeSubtype); |
e3aaff8e |
rc = 0;
}
for(i = 0; i < multiparts; i++) |
c693116d |
if(messages[i])
messageDestroy(messages[i]); |
e3aaff8e |
|
ad9c6836 |
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage);
|
049a18b9 |
if(aText && (textIn == NULL))
textDestroy(aText);
|
b726511f |
if(messages)
free(messages);
|
e3aaff8e |
return rc;
case MESSAGE:
/*
* Check for forbidden encodings
*/
switch(messageGetEncoding(mainMessage)) {
case NOENCODING:
case EIGHTBIT:
case BINARY:
break;
default: |
049a18b9 |
cli_warnmsg("MIME type 'message' cannot be decoded\n"); |
e3aaff8e |
break;
} |
f10460ed |
rc = 0; |
049a18b9 |
if((strcasecmp(mimeSubtype, "rfc822") == 0) ||
(strcasecmp(mimeSubtype, "delivery-status") == 0)) { |
b2223aad |
message *m = parseEmailHeaders(mainMessage, rfc821Table); |
c693116d |
if(m) {
cli_dbgmsg("Decode rfc822");
|
93d41ee4 |
if(mainMessage && (mainMessage != messageIn)) {
messageDestroy(mainMessage);
mainMessage = NULL; |
59da5a4f |
} else
messageReset(mainMessage); |
c693116d |
if(messageGetBody(m)) |
b0b860f1 |
rc = parseEmailBody(m, NULL, dir, rfc821Table, subtypeTable, options); |
c693116d |
messageDestroy(m);
} |
e3aaff8e |
break; |
5a15955b |
} else if(strcasecmp(mimeSubtype, "disposition-notification") == 0) { |
12f3689d |
/* RFC 2298 - handle like a normal email */ |
5a15955b |
rc = 1; |
12f3689d |
break; |
5a15955b |
} else if(strcasecmp(mimeSubtype, "partial") == 0) { |
f10460ed |
#ifdef PARTIAL_DIR
/* RFC1341 message split over many emails */
if(rfc1341(mainMessage, dir) >= 0)
rc = 1;
#else |
22080fa5 |
cli_warnmsg("Partial message received from MUA/MTA - message cannot be scanned\n"); |
f10460ed |
rc = 0;
#endif
} else if(strcasecmp(mimeSubtype, "external-body") == 0) |
22080fa5 |
/* TODO */ |
e3aaff8e |
cli_warnmsg("Attempt to send Content-type message/external-body trapped"); |
5a642650 |
else |
22080fa5 |
cli_warnmsg("Unsupported message format `%s' - please report to bugs@clamav.net\n", mimeSubtype); |
e3aaff8e |
|
f10460ed |
|
ad9c6836 |
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage); |
b726511f |
if(messages)
free(messages); |
f10460ed |
return rc; |
e3aaff8e |
case APPLICATION: |
d4d14218 |
cptr = messageGetMimeSubtype(mainMessage);
|
aa0210b6 |
/*if((strcasecmp(cptr, "octet-stream") == 0) ||
(strcasecmp(cptr, "x-msdownload") == 0)) {*/
{ |
b0b860f1 |
fb = messageToFileblob(mainMessage, dir); |
e3aaff8e |
|
0e5a0129 |
if(fb) {
cli_dbgmsg("Saving main message as attachment\n");
fileblobDestroy(fb); |
09e05292 |
messageClearMarkers(mainMessage); |
e3aaff8e |
} |
aa0210b6 |
} /*else
cli_warnmsg("Discarded application not sent as attachment\n");*/ |
e3aaff8e |
break;
case AUDIO:
case VIDEO:
case IMAGE:
break;
default:
cli_warnmsg("Message received with unknown mime encoding");
break;
}
}
|
f12d2498 |
if(aText && (textIn == NULL)) {
textDestroy(aText);
aText = NULL;
}
|
b0b860f1 |
/*
* No attachments - scan the text portions, often files
* are hidden in HTML code
*/
cli_dbgmsg("%d multiparts found\n", multiparts);
for(i = 0; i < multiparts; i++) {
fb = messageToFileblob(messages[i], dir); |
e3aaff8e |
|
b0b860f1 |
if(fb) { |
c7b69776 |
cli_dbgmsg("Saving multipart %d\n", i); |
d4d14218 |
|
b0b860f1 |
fileblobDestroy(fb);
}
}
if(mainMessage) { |
e3aaff8e |
/* |
b0b860f1 |
* Look for uu-encoded main file |
e3aaff8e |
*/ |
b0b860f1 |
const text *t_line;
if((t_line = uuencodeBegin(mainMessage)) != NULL) {
cli_dbgmsg("Found uuencoded file\n"); |
d4d14218 |
|
b0b860f1 |
/*
* Main part contains uuencoded section
*/
messageSetEncoding(mainMessage, "x-uuencode"); |
d4d14218 |
|
b0b860f1 |
if((fb = messageToFileblob(mainMessage, dir)) != NULL) {
if((cptr = fileblobGetFilename(fb)) != NULL)
cli_dbgmsg("Found uuencoded message %s\n", cptr); |
0e5a0129 |
fileblobDestroy(fb);
} |
b0b860f1 |
} else if((encodingLine(mainMessage) != NULL) && |
69543a9d |
((t_line = bounceBegin(mainMessage)) != NULL)) { |
b0b860f1 |
const text *t;
static const char encoding[] = "Content-Transfer-Encoding"; |
d4d14218 |
/* |
b0b860f1 |
* Attempt to save the original (unbounced)
* message - clamscan will find that in the
* directory and call us again (with any luck)
* having found an e-mail message to handle
*
* This finds a lot of false positives, the
* search that an encoding line is in the
* bounce (i.e. it's after the bounce header)
* helps a bit, but at the expense of scanning
* the entire message. messageAddLine
* optimisation could help here, but needs
* careful thought, do it with line numbers
* would be best, since the current method in
* messageAddLine of checking encoding first
* must remain otherwise non bounce messages
* won't be scanned |
d4d14218 |
*/ |
b0b860f1 |
for(t = t_line; t; t = t->t_next) {
const char *txt = lineGetData(t->t_line);
if(txt &&
(strncasecmp(txt, encoding, sizeof(encoding) - 1) == 0) &&
(strstr(txt, "7bit") == NULL) &&
(strstr(txt, "8bit") == NULL))
break;
}
if(t && ((fb = fileblobCreate()) != NULL)) {
cli_dbgmsg("Found a bounce message\n");
fileblobSetFilename(fb, dir, "bounce");
fb = textToFileblob(t_line, fb);
fileblobDestroy(fb); |
b143af46 |
} else
cli_dbgmsg("Not found a bounce message\n"); |
b0b860f1 |
} else {
bool saveIt; |
d4d14218 |
|
b0b860f1 |
cli_dbgmsg("Not found uuencoded file\n"); |
e2e7ebf5 |
|
b0b860f1 |
if(messageGetMimeType(mainMessage) == MESSAGE) |
f01bbfe8 |
/* |
b0b860f1 |
* Quick peek, if the encapsulated
* message has no
* content encoding statement don't
* bother saving to scan, it's safe |
f01bbfe8 |
*/ |
b0b860f1 |
saveIt = (encodingLine(mainMessage) != NULL);
else if((t_line = encodingLine(mainMessage)) != NULL) { |
a7527b1f |
/* |
b0b860f1 |
* Some bounces include the message
* body without the headers.
* Unfortunately this generates a
* lot of false positives that a bounce
* has been found when it hasn't. |
a7527b1f |
*/ |
b0b860f1 |
if((fb = fileblobCreate()) != NULL) {
cli_dbgmsg("Found a bounce message with no header\n"); |
0e5a0129 |
fileblobSetFilename(fb, dir, "bounce"); |
b0b860f1 |
fileblobAddData(fb, "Received: by clamd\n", 19); |
cca4efe4 |
|
b0b860f1 |
fb = textToFileblob(t_line, fb); |
5c1150ac |
|
b0b860f1 |
fileblobDestroy(fb); |
5c1150ac |
} |
b0b860f1 |
saveIt = FALSE; |
59da5a4f |
} else if(multiparts == 0) |
b0b860f1 |
/*
* Save the entire text portion,
* since it it may be an HTML file with
* a JavaScript virus
*/
saveIt = TRUE; |
59da5a4f |
else
saveIt = FALSE; |
e3aaff8e |
|
b0b860f1 |
if(saveIt) {
cli_dbgmsg("Saving text part to scan\n");
/*
* TODO: May be better to save aText
*/
saveTextPart(mainMessage, dir); |
59da5a4f |
if(mainMessage != messageIn) {
messageDestroy(mainMessage);
mainMessage = NULL;
} else
messageReset(mainMessage);
rc = 1; |
049a18b9 |
} |
e3aaff8e |
} |
b0b860f1 |
} else
rc = (multiparts) ? 1 : 2; /* anything saved? */ |
e3aaff8e |
|
ad9c6836 |
if(mainMessage && (mainMessage != messageIn))
messageDestroy(mainMessage);
|
b726511f |
if(messages)
free(messages);
|
e06d34dc |
cli_dbgmsg("parseEmailBody() returning %d\n", rc); |
e3aaff8e |
|
e06d34dc |
return rc; |
e3aaff8e |
}
/*
* Is the current line the start of a new section?
*
* New sections start with --boundary
*/
static int
boundaryStart(const char *line, const char *boundary)
{ |
0e4e16d4 |
if(line == NULL)
return 0; /* empty line */
|
f7fa3820 |
cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary); |
0e4e16d4 |
if(*line++ != '-')
return 0;
|
e3aaff8e |
/* |
0e4e16d4 |
* Gibe.B3 is broken, it has: |
e3aaff8e |
* boundary="---- =_NextPart_000_01C31177.9DC7C000"
* but it's boundaries look like
* ------ =_NextPart_000_01C31177.9DC7C000 |
0e4e16d4 |
* notice the one too few '-'.
* Presumably this is a deliberate exploitation of a bug in some mail
* clients.
*
* The trouble is that this creates a lot of false positives for
* boundary conditions, if we're too lax about matches. We do our level
* best to avoid these false positives. For example if we have
* boundary="1" we want to ensure that we don't break out of every line
* that has -1 in it instead of starting --1. This needs some more work. |
e3aaff8e |
*/
if(strstr(line, boundary) != NULL) { |
0c0894b8 |
cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line); |
e3aaff8e |
return 1;
}
if(*line++ != '-')
return 0;
return strcasecmp(line, boundary) == 0;
}
/*
* Is the current line the end?
*
* The message ends with with --boundary--
*/
static int
endOfMessage(const char *line, const char *boundary)
{
size_t len;
|
02927896 |
if(line == NULL)
return 0; |
1a74d4df |
cli_dbgmsg("endOfMessage: line = '%s' boundary = '%s'\n", line, boundary); |
e3aaff8e |
if(*line++ != '-')
return 0;
if(*line++ != '-')
return 0;
len = strlen(boundary); |
049a18b9 |
if(strncasecmp(line, boundary, len) != 0)
return 0; |
e3aaff8e |
if(strlen(line) != (len + 2))
return 0;
line = &line[len];
if(*line++ != '-')
return 0;
return *line == '-';
}
/*
* Initialise the various lookup tables
*/
static int
initialiseTables(table_t **rfc821Table, table_t **subtypeTable)
{
const struct tableinit *tableinit;
/*
* Initialise the various look up tables
*/
*rfc821Table = tableCreate();
assert(*rfc821Table != NULL);
for(tableinit = rfc821headers; tableinit->key; tableinit++) |
51fc2aa8 |
if(tableInsert(*rfc821Table, tableinit->key, tableinit->value) < 0) {
tableDestroy(*rfc821Table); |
767f16ab |
*rfc821Table = NULL; |
e3aaff8e |
return -1; |
51fc2aa8 |
} |
e3aaff8e |
*subtypeTable = tableCreate();
assert(*subtypeTable != NULL);
for(tableinit = mimeSubtypes; tableinit->key; tableinit++)
if(tableInsert(*subtypeTable, tableinit->key, tableinit->value) < 0) {
tableDestroy(*rfc821Table); |
51fc2aa8 |
tableDestroy(*subtypeTable); |
767f16ab |
*rfc821Table = NULL;
*subtypeTable = NULL; |
e3aaff8e |
return -1;
}
return 0;
}
/* |
d4d14218 |
* If there's a HTML text version use that, otherwise |
e3aaff8e |
* use the first text part, otherwise just use the |
d4d14218 |
* first one around. HTML text is most likely to include
* a scripting worm |
e3aaff8e |
*
* If we can't find one, return -1
*/
static int
getTextPart(message *const messages[], size_t size)
{
size_t i; |
28010d29 |
int textpart = -1; |
e3aaff8e |
for(i = 0; i < size; i++) {
assert(messages[i] != NULL); |
28010d29 |
if(messageGetMimeType(messages[i]) == TEXT) {
if(strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0)
return (int)i;
textpart = (int)i;
} |
e3aaff8e |
} |
28010d29 |
return textpart; |
e3aaff8e |
}
/*
* strip - |
767f16ab |
* Remove the trailing spaces from a buffer. Don't call this directly,
* always call strstrip() which is a wrapper to this routine to be used with
* NUL terminated strings. This code looks a bit strange because of it's
* heritage from code that worked on strings that weren't necessarily NUL
* terminated.
* TODO: rewrite for clamAV
* |
e3aaff8e |
* Returns it's new length (a la strlen)
*
* len must be int not size_t because of the >= 0 test, it is sizeof(buf)
* not strlen(buf)
*/
static size_t
strip(char *buf, int len)
{
register char *ptr;
register size_t i;
if((buf == NULL) || (len <= 0)) |
767f16ab |
return 0; |
e3aaff8e |
i = strlen(buf);
if(len > (int)(i + 1)) |
767f16ab |
return i; |
e3aaff8e |
ptr = &buf[--len];
#if defined(UNIX) || defined(C_LINUX) || defined(C_DARWIN) /* watch - it may be in shared text area */
do
if(*ptr)
*ptr = '\0'; |
87c9313e |
while((--len >= 0) && (!isgraph(*--ptr)) && (*ptr != '\n') && (*ptr != '\r')); |
e3aaff8e |
#else /* more characters can be displayed on DOS */
do
#ifndef REAL_MODE_DOS
if(*ptr) /* C8.0 puts into a text area */
#endif
*ptr = '\0';
while((--len >= 0) && ((*--ptr == '\0') || (isspace((int)*ptr))));
#endif
return((size_t)(len + 1));
}
/*
* strstrip:
* Strip a given string
*/ |
f0627588 |
size_t |
e3aaff8e |
strstrip(char *s)
{
if(s == (char *)NULL)
return(0); |
02927896 |
|
e3aaff8e |
return(strip(s, strlen(s) + 1));
}
/*
* When parsing a MIME header see if this spans more than one line. A
* semi-colon at the end of the line indicates that the MIME information
* is continued on the next line.
*
* Some clients are broken and put white space after the ;
*/
static bool
continuationMarker(const char *line)
{
const char *ptr;
|
02927896 |
if(line == NULL)
return FALSE; |
e3aaff8e |
#ifdef CL_DEBUG
cli_dbgmsg("continuationMarker(%s)\n", line);
#endif
if(strlen(line) == 0)
return FALSE;
ptr = strchr(line, '\0');
assert(ptr != NULL);
|
28c29d59 |
while(ptr > line) |
e3aaff8e |
switch(*--ptr) {
case '\n':
case '\r':
case ' ':
case '\t':
continue;
case ';':
return TRUE;
default:
return FALSE;
}
return FALSE;
}
static int
parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg)
{
#ifdef CL_THREAD_SAFE
char *strptr;
#endif |
8b3563f2 |
char *copy, *ptr;
int commandNumber; |
4f1d0bfc |
|
e3aaff8e |
cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg); |
8b3563f2 |
|
a9714c49 |
ptr = rfc822comments(cmd); |
f017fbdd |
if(ptr) {
commandNumber = tableFind(rfc821Table, ptr);
free(ptr);
} else
commandNumber = tableFind(rfc821Table, cmd); |
8b3563f2 |
|
a9714c49 |
copy = rfc822comments(arg); |
f017fbdd |
if(copy == NULL)
copy = strdup(arg); |
8b3563f2 |
if(copy == NULL)
return -1; |
e3aaff8e |
|
f017fbdd |
ptr = copy;
|
8b3563f2 |
switch(commandNumber) { |
e3aaff8e |
case CONTENT_TYPE:
/*
* Fix for non RFC1521 compliant mailers
* that send content-type: Text instead
* of content-type: Text/Plain, or
* just simply "Content-Type:"
*/ |
5e394e73 |
if(arg == NULL) |
69543a9d |
/*
* According to section 4 of RFC1521:
* "Note also that a subtype specification is
* MANDATORY. There are no default subtypes"
* |
1eec55a6 |
* We have to break this and make an assumption |
69543a9d |
* for the subtype because virus writers and
* email client writers don't get it right
*/
cli_warnmsg("Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii\n"); |
e3aaff8e |
else if(strchr(copy, '/') == NULL) |
69543a9d |
/*
* Empty field, such as
* Content-Type:
* which I believe is illegal according to
* RFC1521
*/ |
28010d29 |
cli_dbgmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", copy); |
e3aaff8e |
else { |
8037334b |
int i; |
6d312569 |
char *mimeArgs; /* RHS of the ; */
|
e3aaff8e |
/*
* Some clients are broken and
* put white space after the ;
*/ |
2625d6a0 |
if(*arg == '/') {
cli_warnmsg("Content-type '/' received, assuming application/octet-stream\n");
messageSetMimeType(m, "application");
messageSetMimeSubtype(m, "octet-stream");
} else {
/* |
0960ff5e |
* The content type could be in quotes:
* Content-Type: "multipart/mixed"
* FIXME: this is a hack in that ignores
* the quotes, it doesn't handle
* them properly |
2625d6a0 |
*/ |
f017fbdd |
while(isspace(*copy))
copy++;
if(copy[0] == '\"')
copy++; |
0960ff5e |
|
f017fbdd |
if(copy[0] != '/') {
char *s;
char *mimeType; /* LHS of the ; */
s = mimeType = cli_strtok(copy, 0, ";"); |
0960ff5e |
/* |
6d312569 |
* Handle
* Content-Type: foo/bar multipart/mixed
* and
* Content-Type: multipart/mixed foo/bar |
0960ff5e |
*/ |
6d312569 |
for(;;) { |
548a5f96 |
#ifdef CL_THREAD_SAFE |
6d312569 |
int set = messageSetMimeType(m, strtok_r(s, "/", &strptr)); |
548a5f96 |
#else
int set = messageSetMimeType(m, strtok(s, "/"));
#endif |
6d312569 |
/*
* Stephen White <stephen@earth.li>
* Some clients put space after
* the mime type but before
* the ;
*/ |
548a5f96 |
#ifdef CL_THREAD_SAFE |
6d312569 |
s = strtok_r(NULL, ";", &strptr); |
548a5f96 |
#else
s = strtok(NULL, ";");
#endif |
6d312569 |
if(s == NULL)
break;
if(set) { |
f017fbdd |
size_t len = strstrip(s) - 1; |
6d312569 |
if(s[len] == '\"') {
s[len] = '\0';
len = strstrip(s);
}
if(len) { |
f017fbdd |
if(strchr(s, ' ')) {
char *t = cli_strtok(s, 0, " "); |
6d312569 |
|
f017fbdd |
messageSetMimeSubtype(m, t);
free(t);
} else
messageSetMimeSubtype(m, s); |
6d312569 |
} |
0960ff5e |
} |
6d312569 |
while(*s && !isspace(*s))
s++;
if(*s++ == '\0')
break;
if(*s == '\0')
break; |
0960ff5e |
} |
f017fbdd |
free(mimeType);
} |
2625d6a0 |
} |
e3aaff8e |
/* |
20d3dde9 |
* Add in all rest of the the arguments.
* e.g. if the header is this:
* Content-Type:', arg='multipart/mixed; boundary=foo
* we find the boundary argument set it |
e3aaff8e |
*/ |
8037334b |
i = 1;
while((mimeArgs = cli_strtok(copy, i++, ";")) != NULL) {
cli_dbgmsg("mimeArgs = '%s'\n", mimeArgs);
|
6d312569 |
messageAddArguments(m, mimeArgs);
free(mimeArgs);
} |
e3aaff8e |
}
break;
case CONTENT_TRANSFER_ENCODING:
messageSetEncoding(m, copy);
break;
case CONTENT_DISPOSITION: |
548a5f96 |
#ifdef CL_THREAD_SAFE |
ef704fb3 |
arg = strtok_r(copy, ";", &strptr);
if(arg && *arg) {
messageSetDispositionType(m, arg);
messageAddArgument(m, strtok_r(NULL, "\r\n", &strptr));
} |
548a5f96 |
#else
arg = strtok(copy, ";");
if(arg && *arg) {
messageSetDispositionType(m, arg);
messageAddArgument(m, strtok(NULL, "\r\n"));
}
#endif |
e3aaff8e |
} |
049a18b9 |
free(ptr); |
e3aaff8e |
|
4f1d0bfc |
return 0; |
e3aaff8e |
}
|
e06d34dc |
/* |
cca4efe4 |
* Save the text portion of the message
*/
static void
saveTextPart(message *m, const char *dir)
{ |
0e5a0129 |
fileblob *fb; |
cca4efe4 |
messageAddArgument(m, "filename=textportion"); |
0e5a0129 |
if((fb = messageToFileblob(m, dir)) != NULL) { |
cca4efe4 |
/*
* Save main part to scan that
*/ |
c77c8809 |
cli_dbgmsg("Saving main message\n"); |
cca4efe4 |
|
0e5a0129 |
fileblobDestroy(fb); |
cca4efe4 |
}
}
|
a9714c49 |
/* |
f017fbdd |
* Handle RFC822 comments in headers.
* Returns a buffer without the comments or NULL on error or if the input
* has no comments. The caller must free the returned buffer
* See secion 3.4.3 of RFC822 |
a9714c49 |
* TODO: handle comments that go on to more than one line
*/
static char *
rfc822comments(const char *in)
{
const char *iptr;
char *out, *optr;
int backslash, inquote, commentlevel;
if(in == NULL) |
f017fbdd |
return NULL; |
a9714c49 |
if(strchr(in, '(') == NULL) |
f017fbdd |
return NULL; |
a9714c49 |
out = cli_malloc(strlen(in) + 1);
if(out == NULL)
return NULL;
backslash = commentlevel = inquote = 0;
optr = out;
cli_dbgmsg("rfc822comments: contains a comment\n");
for(iptr = in; *iptr; iptr++)
if(backslash) {
*optr++ = *iptr;
backslash = 0;
} else switch(*iptr) {
case '\\':
backslash = 1;
break;
case '\"':
inquote = !inquote;
break;
case '(':
commentlevel++;
break;
case ')':
if(commentlevel > 0)
commentlevel--;
break;
default:
if(commentlevel == 0)
*optr++ = *iptr;
}
if(backslash) /* last character was a single backslash */
*optr++ = '\\';
*optr = '\0';
strstrip(out);
cli_dbgmsg("rfc822comments '%s'=>'%s'\n", in, out);
return out;
} |
50df4118 |
/*
* Handle RFC2047 encoding. Returns a malloc'd buffer that the caller must
* free, or NULL on error
*/
static char *
rfc2047(const char *in)
{
char *out, *pout;
size_t len;
|
4de5fffd |
if((strstr(in, "=?") == NULL) || (strstr(in, "?=") == NULL)) |
50df4118 |
return strdup(in);
cli_dbgmsg("rfc2047 '%s'\n", in);
out = cli_malloc(strlen(in) + 1);
if(out == NULL)
return NULL;
pout = out;
/* For each RFC2047 string */
while(*in) { |
cf569541 |
char encoding, *ptr, *enctext; |
50df4118 |
message *m;
blob *b;
/* Find next RFC2047 string */
while(*in) {
if((*in == '=') && (in[1] == '?')) {
in += 2;
break;
}
*pout++ = *in++;
}
/* Skip over charset, find encoding */
while((*in != '?') && *in)
in++;
if(*in == '\0')
break;
encoding = *++in;
encoding = tolower(encoding);
if((encoding != 'q') && (encoding != 'b')) { |
1b00d9a4 |
cli_warnmsg("Unsupported RFC2047 encoding type '%c' - report to bugs@clamav.net\n", encoding);
free(out);
out = NULL; |
50df4118 |
break;
}
/* Skip to encoded text */
if(*++in != '?')
break;
if(*++in == '\0')
break;
|
cf569541 |
enctext = strdup(in);
if(enctext == NULL) {
free(out);
out = NULL;
break;
} |
50df4118 |
in = strstr(in, "?="); |
cf569541 |
if(in == NULL) {
free(enctext); |
50df4118 |
break; |
cf569541 |
} |
50df4118 |
in += 2;
ptr = strstr(enctext, "?=");
assert(ptr != NULL);
*ptr = '\0';
/*cli_dbgmsg("Need to decode '%s' with method '%c'\n", enctext, encoding);*/
m = messageCreate(); |
c77c8809 |
if(m == NULL) |
50df4118 |
break;
messageAddStr(m, enctext); |
cf569541 |
free(enctext); |
767f16ab |
switch(encoding) { |
50df4118 |
case 'q':
messageSetEncoding(m, "quoted-printable");
break;
case 'b':
messageSetEncoding(m, "base64");
break;
}
b = messageToBlob(m);
len = blobGetDataSize(b);
cli_dbgmsg("Decoded as '%*.*s'\n", len, len, blobGetData(b));
memcpy(pout, blobGetData(b), len);
blobDestroy(b);
messageDestroy(m);
if(pout[len - 1] == '\n')
pout += len - 1;
else
pout += len;
}
*pout = '\0';
cli_dbgmsg("rfc2047 returns '%s'\n", out);
return out;
}
|
f10460ed |
#ifdef PARTIAL_DIR
/*
* Handle partial messages
*/
static int
rfc1341(message *m, const char *dir)
{
fileblob *fb; |
13462674 |
char *arg, *id, *number, *total, *oldfilename; |
f10460ed |
if((mkdir(PARTIAL_DIR, 0700) < 0) && (errno != EEXIST)) { |
13462674 |
cli_errmsg("Can't create the directory '%s'\n", PARTIAL_DIR); |
f10460ed |
return -1; |
13462674 |
} else {
struct stat statb;
if(stat(PARTIAL_DIR, &statb) < 0) {
cli_errmsg("Can't stat the directory '%s'\n", PARTIAL_DIR);
return -1;
}
if(statb.st_mode & 077)
cli_warnmsg("Insecure partial directory %s (mode 0%o)\n",
PARTIAL_DIR, statb.st_mode & 0777); |
f10460ed |
}
id = (char *)messageFindArgument(m, "id");
if(id == NULL)
return -1;
number = (char *)messageFindArgument(m, "number");
if(number == NULL) {
free(id);
return -1;
}
oldfilename = (char *)messageFindArgument(m, "filename");
if(oldfilename == NULL)
oldfilename = (char *)messageFindArgument(m, "name");
arg = cli_malloc(10 + strlen(id) + strlen(number));
sprintf(arg, "filename=%s%s", id, number);
messageAddArgument(m, arg);
free(arg);
if(oldfilename) {
cli_warnmsg("Must reset to %s\n", oldfilename);
free(oldfilename);
}
if((fb = messageToFileblob(m, PARTIAL_DIR)) == NULL) {
free(id);
free(number);
return -1;
}
fileblobDestroy(fb);
total = (char *)messageFindArgument(m, "total");
cli_dbgmsg("rfc1341: %s, %s of %s\n", id, number, (total) ? total : "?");
if(total) {
int n = atoi(number);
int t = atoi(total);
DIR *dd = NULL;
/*
* If it's the last one - reassemble it |
3a0f75c6 |
* FIXME: this assumes that we receive the parts in order |
f10460ed |
*/
if((n == t) && ((dd = opendir(PARTIAL_DIR)) != NULL)) {
FILE *fout;
char outname[NAME_MAX + 1];
snprintf(outname, sizeof(outname) - 1, "%s/%s", dir, id);
cli_dbgmsg("outname: %s\n", outname);
fout = fopen(outname, "wb");
if(fout == NULL) { |
3a0f75c6 |
cli_errmsg("Can't open '%s' for writing", outname); |
f10460ed |
free(id);
free(total);
free(number);
closedir(dd);
return -1;
}
for(n = 1; n <= t; n++) {
char filename[NAME_MAX + 1]; |
13462674 |
const struct dirent *dent;
#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2) |
a0b21816 |
#if defined(C_SOLARIS) || defined(C_BEOS)
char result[sizeof(struct dirent) + PATH_MAX + 1]; |
13462674 |
#else
struct dirent result;
#endif
#endif |
f10460ed |
snprintf(filename, sizeof(filename), "%s%d", id, n);
#ifdef HAVE_READDIR_R_3 |
c79a2273 |
|
fdc6adbe |
#if defined(C_SOLARIS) || defined(C_BEOS)
while((readdir_r(dd, (struct dirent *)result, &dent) == 0) && dent) {
#else |
13462674 |
while((readdir_r(dd, (struct dirent *)&result, &dent) == 0) && dent) { |
fdc6adbe |
#endif
|
f10460ed |
#elif defined(HAVE_READDIR_R_2) |
fdc6adbe |
#if defined(C_SOLARIS) || defined(C_BEOS) |
13462674 |
while((dent = (struct dirent *)readdir_r(dd, (struct dirent *)&result))) { |
f10460ed |
#else |
fdc6adbe |
while((dent = (struct dirent *)readdir_r(dd, (struct dirent *)result))) {
#endif
#else /*!HAVE_READDIR_R*/ |
f10460ed |
while((dent = readdir(dd))) {
#endif
char fullname[NAME_MAX + 1];
FILE *fin;
char buffer[BUFSIZ];
int nblanks;
if(dent->d_ino == 0)
continue;
if(strncmp(filename, dent->d_name, strlen(filename)) != 0)
continue;
sprintf(fullname, "%s/%s", PARTIAL_DIR, dent->d_name);
fin = fopen(fullname, "rb");
if(fin == NULL) { |
3a0f75c6 |
cli_errmsg("Can't open '%s' for reading", fullname); |
f10460ed |
fclose(fout);
unlink(outname);
free(id);
free(total);
free(number);
closedir(dd);
return -1;
}
nblanks = 0;
while(fgets(buffer, sizeof(buffer), fin) != NULL)
/*
* Ensure that trailing newlines
* aren't copied
*/
if(buffer[0] == '\n') {
nblanks++;
} else {
if(nblanks)
do
putc('\n', fout);
while(--nblanks > 0);
fputs(buffer, fout);
}
fclose(fin);
/* FIXME: don't unlink if leave temps */
unlink(fullname);
break;
}
rewinddir(dd);
}
closedir(dd);
fclose(fout);
}
free(number);
}
free(id);
free(total);
return 0;
}
#endif
|
55a3f03b |
#ifdef FOLLOWURLS |
71ba1dcd |
static void
checkURLs(message *m, const char *dir)
{
blob *b = messageToBlob(m);
size_t len; |
87c9313e |
table_t *t; |
15021325 |
int i, n; |
f121cb96 |
#if defined(WITH_CURL) && defined(CL_THREAD_SAFE)
pthread_t tid[MAX_URLS];
struct arg args[MAX_URLS];
#endif |
15021325 |
tag_arguments_t hrefs; |
71ba1dcd |
if(b == NULL)
return;
len = blobGetDataSize(b);
|
22080fa5 |
if(len == 0) {
blobDestroy(b); |
55a3f03b |
return; |
22080fa5 |
} |
55a3f03b |
|
87c9313e |
/* TODO: make this size customisable */
if(len > 100*1024) {
cli_warnmsg("Viruses pointed to by URL not scanned in large message\n");
blobDestroy(b); |
22080fa5 |
return; |
87c9313e |
}
|
22080fa5 |
blobClose(b); |
87c9313e |
t = tableCreate(); |
3499d81e |
if(t == NULL) {
blobDestroy(b);
return;
} |
55a3f03b |
|
d77c655d |
hrefs.count = 0;
hrefs.tag = hrefs.value = NULL; |
15021325 |
cli_dbgmsg("checkURLs: calling html_normalise_mem\n"); |
3499d81e |
if(!html_normalise_mem(blobGetData(b), len, NULL, &hrefs)) { |
ef822cfc |
blobDestroy(b);
tableDestroy(t);
return; |
3499d81e |
}
cli_dbgmsg("checkURLs: html_normalise_mem returned\n");
|
b143af46 |
/* TODO: Do we need to call remove_html_comments? */ |
87c9313e |
|
15021325 |
n = 0;
for(i = 0; i < hrefs.count; i++) {
const char *url = hrefs.value[i];
if(strncasecmp("http://", url, 7) == 0) {
char *ptr; |
f121cb96 |
#ifdef WITH_CURL
#ifndef CL_THREAD_SAFE
struct arg arg;
#endif
#else /*!WITH_CURL*/ |
6eedb434 |
#ifdef CL_THREAD_SAFE
static pthread_mutex_t system_mutex = PTHREAD_MUTEX_INITIALIZER;
#endif |
87c9313e |
struct stat statb;
char cmd[512]; |
f121cb96 |
#endif /*WITH_CURL*/ |
2c7d1edd |
char name[NAME_MAX + 1]; |
15021325 |
if(tableFind(t, url) == 1) {
cli_dbgmsg("URL %s already downloaded\n", url); |
4f1d0bfc |
continue;
} |
b2223aad |
if(n == MAX_URLS) {
cli_warnmsg("Not all URLs will be scanned\n");
break;
} |
15021325 |
(void)tableInsert(t, url, 1);
cli_dbgmsg("Downloading URL %s to be scanned\n", url);
strncpy(name, url, sizeof(name));
for(ptr = name; *ptr; ptr++)
if(*ptr == '/')
*ptr = '_'; |
71ba1dcd |
|
65684cec |
#ifdef WITH_CURL |
f121cb96 |
#ifdef CL_THREAD_SAFE |
2c7d1edd |
args[n].dir = dir;
args[n].url = url; |
f121cb96 |
args[n].filename = strdup(name);
pthread_create(&tid[n], NULL, getURL, &args[n]);
#else |
15021325 |
arg.url = url; |
f121cb96 |
arg.dir = dir;
arg.filename = name;
getURL(&arg);
#endif
|
9b4bb8b7 |
#else
/*
* TODO: maximum size and timeouts
*/ |
15021325 |
snprintf(cmd, sizeof(cmd), "GET -t10 %s > %s/%s 2>/dev/null", url, dir, name); |
71ba1dcd |
cli_dbgmsg("%s\n", cmd); |
6eedb434 |
#ifdef CL_THREAD_SAFE
pthread_mutex_lock(&system_mutex);
#endif |
71ba1dcd |
system(cmd); |
6eedb434 |
#ifdef CL_THREAD_SAFE
pthread_mutex_unlock(&system_mutex);
#endif
snprintf(cmd, sizeof(cmd), "%s/%s", dir, name);
if(stat(cmd, &statb) >= 0)
if(statb.st_size == 0) { |
15021325 |
cli_warnmsg("URL %s failed to download\n", url); |
6eedb434 |
/*
* Don't bother scanning an empty file
*/
(void)unlink(cmd);
} |
9b4bb8b7 |
#endif |
f121cb96 |
++n; |
71ba1dcd |
}
}
blobDestroy(b); |
4f1d0bfc |
tableDestroy(t); |
f121cb96 |
#if defined(WITH_CURL) && defined(CL_THREAD_SAFE) |
ba867aed |
assert(n <= MAX_URLS); |
f121cb96 |
cli_dbgmsg("checkURLs: waiting for %d thread(s) to finish\n", n);
while(--n >= 0) {
pthread_join(tid[n], NULL);
free(args[n].filename);
}
#endif |
2c7d1edd |
html_tag_arg_free(&hrefs); |
71ba1dcd |
}
|
65684cec |
#ifdef WITH_CURL |
f121cb96 |
static void *
#ifdef CL_THREAD_SAFE
getURL(void *a)
#else
getURL(struct arg *arg)
#endif |
9b4bb8b7 |
{
char *fout; |
87c9313e |
CURL *curl; |
9b4bb8b7 |
FILE *fp; |
87c9313e |
struct curl_slist *headers;
static int initialised = 0; |
f121cb96 |
#ifdef CL_THREAD_SAFE
static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
struct arg *arg = (struct arg *)a;
#endif
const char *url = arg->url;
const char *dir = arg->dir;
const char *filename = arg->filename; |
9b4bb8b7 |
|
f121cb96 |
#ifdef CL_THREAD_SAFE
pthread_mutex_lock(&init_mutex);
#endif |
87c9313e |
if(!initialised) { |
f121cb96 |
if(curl_global_init(CURL_GLOBAL_NOTHING) != 0) {
#ifdef CL_THREAD_SAFE
pthread_mutex_unlock(&init_mutex);
#endif
return NULL;
} |
87c9313e |
initialised = 1; |
9b4bb8b7 |
} |
f121cb96 |
#ifdef CL_THREAD_SAFE
pthread_mutex_unlock(&init_mutex);
#endif
|
87c9313e |
/* easy isn't the word I'd use... */
curl = curl_easy_init();
if(curl == NULL) |
f121cb96 |
return NULL; |
65684cec |
|
87c9313e |
(void)curl_easy_setopt(curl, CURLOPT_USERAGENT, "www.clamav.net");
if(curl_easy_setopt(curl, CURLOPT_URL, url) != 0) |
f121cb96 |
return NULL; |
87c9313e |
|
9b4bb8b7 |
fout = cli_malloc(strlen(dir) + strlen(filename) + 2);
|
65684cec |
if(fout == NULL) {
curl_easy_cleanup(curl); |
f121cb96 |
return NULL; |
65684cec |
} |
9b4bb8b7 |
|
2c7d1edd |
snprintf(fout, NAME_MAX, "%s/%s", dir, filename); |
9b4bb8b7 |
fp = fopen(fout, "w");
if(fp == NULL) { |
3a0f75c6 |
cli_errmsg("Can't open '%s' for writing", fout); |
9b4bb8b7 |
free(fout); |
65684cec |
curl_easy_cleanup(curl); |
f121cb96 |
return NULL; |
9b4bb8b7 |
} |
7e492164 |
#ifdef CURLOPT_WRITEDATA |
f121cb96 |
if(curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp) != 0) {
fclose(fp);
free(fout);
curl_easy_cleanup(curl);
return NULL;
} |
7e492164 |
#else
if(curl_easy_setopt(curl, CURLOPT_FILE, fp) != 0) {
fclose(fp);
free(fout);
curl_easy_cleanup(curl);
return NULL;
}
#endif |
f121cb96 |
|
87c9313e |
/* |
55a3f03b |
* If an item is in squid's cache get it from there (TCP_HIT/200) |
87c9313e |
* by default curl doesn't (TCP_CLIENT_REFRESH_MISS/200)
*/
headers = curl_slist_append(NULL, "Pragma:");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); |
9b4bb8b7 |
|
87c9313e |
/* These should be customisable */
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 30);
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10); |
393a6d67 |
#ifdef CURLOPT_MAXFILESIZE
curl_easy_setopt(curl, CURLOPT_MAXFILESIZE, 50*1024);
#endif |
9b4bb8b7 |
|
f121cb96 |
#ifdef CL_THREAD_SAFE |
00d46ae6 |
#ifdef CURLOPT_DNS_USE_GLOBAL_CACHE |
f121cb96 |
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
#endif |
00d46ae6 |
#endif |
15f4aa67 |
/*
* Prevent password: prompting with older versions
* FIXME: a better username?
*/ |
cd483c9b |
curl_easy_setopt(curl, CURLOPT_USERPWD, "username:password"); |
15f4aa67 |
|
f121cb96 |
/*
* FIXME: valgrind reports "pthread_mutex_unlock: mutex is not locked"
* from gethostbyaddr_r within this. It may be a bug in libcurl
* rather than this code, but I need to check, see Curl_resolv()
* If pushed really hard it will sometimes say
* Conditional jump or move depends on uninitialised value(s) and
* quit. But the program seems to work OK without valgrind...
* Perhaps Curl_resolv() isn't thread safe?
*/ |
87c9313e |
if(curl_easy_perform(curl) != CURLE_OK) {
cli_warnmsg("URL %s failed to download\n", url);
unlink(fout);
}
fclose(fp); |
f121cb96 |
curl_slist_free_all(headers); |
87c9313e |
curl_easy_cleanup(curl);
free(fout); |
f121cb96 |
return NULL; |
9b4bb8b7 |
}
#endif
#else
static void
checkURLs(message *m, const char *dir)
{
}
#endif
|
f2f25418 |
#ifdef HAVE_BACKTRACE |
4f1d0bfc |
static void |
02927896 |
sigsegv(int sig)
{
signal(SIGSEGV, SIG_DFL); |
f2f25418 |
print_trace(1); |
02927896 |
exit(SIGSEGV);
}
|
4f1d0bfc |
static void |
02927896 |
print_trace(int use_syslog)
{
void *array[10];
size_t size;
char **strings;
size_t i;
pid_t pid = getpid();
size = backtrace(array, 10);
strings = backtrace_symbols(array, size);
if(use_syslog == 0)
cli_dbgmsg("Backtrace of pid %d:\n", pid); |
f73920a4 |
else |
02927896 |
syslog(LOG_ERR, "Backtrace of pid %d:", pid);
for(i = 0; i < size; i++)
if(use_syslog) |
767f16ab |
syslog(LOG_ERR, "bt[%d]: %s", (int)i, strings[i]); |
02927896 |
else
cli_dbgmsg("%s\n", strings[i]);
|
ef822cfc |
/* TODO: dump the current email */
|
02927896 |
free(strings);
}
#endif |