libclamav/mbox.c
b151ef55
 /*
  *  Copyright (C) 2002 Nigel Horne <njh@bandsman.co.uk>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
7cef72ea
  *
  * Change History:
  * $Log: mbox.c,v $
e279f3ea
  * Revision 1.189  2004/11/27 14:39:01  nigelhorne
  * Honour section 7.2.6 of RFC1521
  *
9f43cc75
  * Revision 1.188  2004/11/27 14:17:35  nigelhorne
  * Handle attachments before the first mime section
  *
ef3cf57d
  * Revision 1.187  2004/11/27 13:16:56  nigelhorne
  * uuencode failures no longer fatal
  *
a1c924f9
  * Revision 1.186  2004/11/27 11:59:28  nigelhorne
  * Handle comments in the command part of headers
  *
39b5a552
  * Revision 1.185  2004/11/26 23:00:29  nigelhorne
  * Handle spaces after the final MIME boundary and binHex attachments after that boundary
  *
30fb8a0b
  * Revision 1.184  2004/11/26 21:51:48  nigelhorne
  * Scan uuencodes after the final MIME section
  *
699fafc3
  * Revision 1.183  2004/11/26 17:32:42  nigelhorne
  * Add debug message for end of multipart headers
  *
aa479b7d
  * Revision 1.182  2004/11/26 12:05:17  nigelhorne
  * Remove warning message
  *
0856891e
  * Revision 1.181  2004/11/22 15:18:51  nigelhorne
  * Performance work
  *
c29ebe66
  * Revision 1.180  2004/11/19 11:32:16  nigelhorne
  * Scan email footers (portions after the last MIME boundary
  *
8386482b
  * Revision 1.179  2004/11/18 18:09:07  nigelhorne
  * First draft of binhex.c
  *
2ed1bc5a
  * Revision 1.178  2004/11/15 13:58:50  nigelhorne
  * Fix obscure chance of memory leak
  *
9180b8bb
  * Revision 1.177  2004/11/12 22:22:21  nigelhorne
  * Performance speeded up
  *
d85c1fad
  * Revision 1.176  2004/11/12 09:41:45  nigelhorne
  * Parial mode now on by default
  *
ad642304
  * Revision 1.175  2004/11/11 22:15:46  nigelhorne
  * Rewrite handling of folded headers
  *
0a94ffaf
  * Revision 1.174  2004/11/10 10:08:45  nigelhorne
  * Fix escaped parenthesis in rfc822 comments
  *
74ca33e9
  * Revision 1.173  2004/11/09 19:40:06  nigelhorne
  * Find uuencoded files in preambles to multipart messages
  *
39d09964
  * Revision 1.172  2004/11/09 13:33:38  nigelhorne
  * Tidy
  *
2176c0e5
  * Revision 1.171  2004/11/09 12:24:32  nigelhorne
  * Better handling of mail-follow-urls when CURL is not installed
  *
d768ac5a
  * Revision 1.170  2004/11/09 10:08:02  nigelhorne
  * Added basic handling of folded headers in the main message
  *
28ea5910
  * Revision 1.169  2004/11/08 16:27:09  nigelhorne
  * Fix crash with correctly encoded uuencode files
  *
802c37fc
  * Revision 1.168  2004/11/08 10:26:22  nigelhorne
  * Fix crash if x-yencode is mistakenly guessed
  *
ad3d1172
  * Revision 1.167  2004/11/07 16:59:42  nigelhorne
  * Tidy
  *
5e5a162c
  * Revision 1.166  2004/11/07 16:39:00  nigelhorne
  * Handle para 4 of RFC2231
  *
0eb8bafc
  * Revision 1.165  2004/11/06 21:43:23  nigelhorne
  * Fix possible segfault in handling broken RFC2047 headers
  *
a77dc192
  * Revision 1.164  2004/11/04 10:13:41  nigelhorne
  * Rehashed readdir_r patch
  *
e0377124
  * Revision 1.163  2004/10/31 09:28:56  nigelhorne
  * Handle unbalanced quotes in multipart headers
  *
cbc2eaa9
  * Revision 1.162  2004/10/24 04:35:15  nigelhorne
  * Handle multipart/knowbot as multipart/mixed
  *
2c7b958d
  * Revision 1.161  2004/10/21 10:18:40  nigelhorne
  * PARTIAL: readdir_r even more options :-(
  *
67a25177
  * Revision 1.160  2004/10/21 09:41:07  nigelhorne
  * PARTIAL: add readdir_r fix to BeOS
  *
3a0946f5
  * Revision 1.159  2004/10/20 10:35:41  nigelhorne
  * Partial mode: fix possible stack corruption with Solaris
  *
e2a46f19
  * Revision 1.158  2004/10/17 09:29:21  nigelhorne
  * Advise to report broken emails
  *
9fc8173e
  * Revision 1.157  2004/10/16 20:53:28  nigelhorne
  * Tidy up
  *
db09f781
  * Revision 1.156  2004/10/16 19:09:39  nigelhorne
  * Handle BeMail (BeOS) files
  *
5f72fd3b
  * Revision 1.155  2004/10/16 17:23:04  nigelhorne
  * Handle colons in quotes in headers
  *
4fc38d69
  * Revision 1.154  2004/10/16 09:01:05  nigelhorne
  * Improved handling of wraparound headers
  *
95f98162
  * Revision 1.153  2004/10/14 21:18:49  nigelhorne
  * Harden the test for RFC2047 encoded headers
  *
291ac47f
  * Revision 1.152  2004/10/14 17:45:13  nigelhorne
  * RFC2047 on long lines produced by continuation headers
  *
138b73f6
  * Revision 1.151  2004/10/10 11:10:20  nigelhorne
  * Remove perror - replace with cli_errmsg
  *
6736d46f
  * Revision 1.150  2004/10/09 08:01:37  nigelhorne
  * Needs libcurl >= 7.11
  *
f8c25c7a
  * Revision 1.149  2004/10/06 17:21:30  nigelhorne
  * Fix RFC2298 handling broken by RFC1341 code
  *
9a7398ee
  * Revision 1.148  2004/10/05 15:41:53  nigelhorne
  * First draft of code to handle RFC1341
  *
b62a19da
  * Revision 1.147  2004/10/04 12:18:09  nigelhorne
  * Better warning message about PGP attachments not being scanned
  *
c3400886
  * Revision 1.146  2004/10/04 10:52:39  nigelhorne
  * Better error message on RFC2047 decode error
  *
5eeffbb9
  * Revision 1.145  2004/10/01 13:49:22  nigelhorne
  * Minor code tidy
  *
e94471f4
  * Revision 1.144  2004/10/01 07:55:36  nigelhorne
  * Better error message on message/partial
  *
a95c894a
  * Revision 1.143  2004/09/30 21:47:35  nigelhorne
  * Removed unneeded strdups
  *
37819555
  * Revision 1.142  2004/09/28 18:40:12  nigelhorne
  * Use stack rather than heap where possible
  *
d28e1902
  * Revision 1.141  2004/09/23 08:43:25  nigelhorne
  * Scan multipart/digest messages
  *
c07de365
  * Revision 1.140  2004/09/22 16:09:51  nigelhorne
  * Build if CURLOPT_DNS_USE_GLOBAL_CACHE isn't supported
  *
d6e30cce
  * Revision 1.139  2004/09/22 15:49:13  nigelhorne
  * Handle RFC2298 messages
  *
66df01fa
  * Revision 1.138  2004/09/22 15:21:50  nigelhorne
  * Fix typo
  *
02406150
  * Revision 1.137  2004/09/21 20:47:38  nigelhorne
  * FOLLOWURL: Set a default username and password for password protected pages
  *
05ea2522
  * Revision 1.136  2004/09/21 12:18:52  nigelhorne
  * Fallback to CURLOPT_FILE if CURLOPT_WRITEDATA isn't defined
  *
897fd9c7
  * Revision 1.135  2004/09/21 08:14:00  nigelhorne
  * Now compiles in machines with libcurl but without threads
  *
74c6f514
  * Revision 1.134  2004/09/20 17:08:43  nigelhorne
  * Some performance enhancements
  *
137740e1
  * Revision 1.133  2004/09/20 12:44:03  nigelhorne
  * Fix parsing error on mime arguments
  *
e9bdeb72
  * Revision 1.132  2004/09/20 08:31:56  nigelhorne
  * FOLLOWURLS now compiled if libcurl is found
  *
4d9c0ca8
  * Revision 1.131  2004/09/18 14:59:25  nigelhorne
  * Code tidy
  *
6fd6d771
  * Revision 1.130  2004/09/17 10:56:29  nigelhorne
  * Handle multiple content-type headers and use the most likely
  *
2bcec72b
  * Revision 1.129  2004/09/17 09:48:53  nigelhorne
  * Handle attempts to hide mime type
  *
90905415
  * Revision 1.128  2004/09/17 09:09:44  nigelhorne
  * Better handling of RFC822 comments
  *
0674e2af
  * Revision 1.127  2004/09/16 18:00:43  nigelhorne
  * Handle RFC2047
  *
de509b8e
  * Revision 1.126  2004/09/16 14:23:57  nigelhorne
  * Handle quotes around mime type
  *
31b05bcb
  * Revision 1.125  2004/09/16 12:59:36  nigelhorne
  * Handle = and space as header separaters
  *
21cd233d
  * Revision 1.124  2004/09/16 11:20:33  nigelhorne
  * Better handling of folded headers in multipart messages
  *
56d8328d
  * Revision 1.123  2004/09/16 08:56:19  nigelhorne
  * Handle RFC822 Comments
  *
3a0ef2ee
  * Revision 1.122  2004/09/15 22:09:26  nigelhorne
  * Handle spaces before colons
  *
0e3b08fc
  * Revision 1.121  2004/09/15 18:08:23  nigelhorne
  * Handle multiple encoding types
  *
a2d786fc
  * Revision 1.120  2004/09/15 08:47:07  nigelhorne
  * Cleaner way to initialise hrefs
  *
6da40aa1
  * Revision 1.119  2004/09/14 20:47:28  nigelhorne
  * Use new normalise code
  *
73b2c34c
  * Revision 1.118  2004/09/14 12:09:37  nigelhorne
  * Include old normalise code
  *
06d4e856
  * Revision 1.117  2004/09/13 16:44:01  kojm
  * minor cleanup
  *
e745ac7e
  * Revision 1.116  2004/09/13 13:16:28  nigelhorne
  * Return CL_EFORMAT on bad format
  *
7d3d11d0
  * Revision 1.115  2004/09/06 11:02:08  nigelhorne
  * Normalise HTML before scanning for URLs to download
  *
b4cb4486
  * Revision 1.114  2004/09/03 15:59:00  nigelhorne
  * Handle boundary= "foo"
  *
f1c33aa0
  * Revision 1.113  2004/08/26 09:33:20  nigelhorne
  * Scan Communigate Pro files
  *
a446de17
  * Revision 1.112  2004/08/23 13:15:16  nigelhorne
  * messageClearMarkers
  *
565c449d
  * Revision 1.111  2004/08/22 20:20:14  nigelhorne
  * Tidy
  *
e6b25cd3
  * Revision 1.110  2004/08/22 15:08:59  nigelhorne
  * messageExport
  *
1e06e1ab
  * Revision 1.109  2004/08/22 10:34:24  nigelhorne
  * Use fileblob
  *
de617e3e
  * Revision 1.108  2004/08/21 11:57:57  nigelhorne
  * Use line.[ch]
  *
c408cfa5
  * Revision 1.107  2004/08/20 04:55:07  nigelhorne
  * FOLLOWURL
  *
c9ae17be
  * Revision 1.106  2004/08/20 04:53:18  nigelhorne
  * Tidy up
  *
314ff77b
  * Revision 1.105  2004/08/18 21:35:08  nigelhorne
  * Multithread the FollowURL calls
  *
3eb12bae
  * Revision 1.104  2004/08/18 15:53:43  nigelhorne
  * Honour CL_MAILURL
  *
28498221
  * Revision 1.103  2004/08/18 10:49:45  nigelhorne
  * CHECKURLs was mistakenly turned on
  *
da812a6a
  * Revision 1.102  2004/08/18 07:45:20  nigelhorne
  * Use configure WITH_CURL value
  *
49674596
  * Revision 1.101  2004/08/17 08:28:32  nigelhorne
  * Support multitype/fax-message
  *
6b93ea0c
  * Revision 1.100  2004/08/12 10:36:09  nigelhorne
  * LIBCURL completed
  *
88771ffa
  * Revision 1.99  2004/08/11 15:28:39  nigelhorne
  * No longer needs curl.h
  *
f2b068fb
  * Revision 1.98  2004/08/11 14:46:22  nigelhorne
  * Better handling of false positive emails
  *
0fba2555
  * Revision 1.97  2004/08/10 14:02:22  nigelhorne
  * *** empty log message ***
  *
3fa72383
  * Revision 1.96  2004/08/10 08:14:00  nigelhorne
  * Enable CHECKURL
  *
5431ebba
  * Revision 1.95  2004/08/09 21:37:21  kojm
  * libclamav: add new option CL_MAILURL
  *
bf6f653d
  * Revision 1.94  2004/08/09 08:26:36  nigelhorne
  * Thread safe checkURL
  *
c5ed8336
  * Revision 1.93  2004/08/08 21:30:47  nigelhorne
  * First draft of CheckURL
  *
bac2c10a
  * Revision 1.92  2004/08/08 19:13:14  nigelhorne
  * Better handling of bounces
  *
d32343c3
  * Revision 1.91  2004/08/04 18:59:19  nigelhorne
  * Tidy up multipart handling
  *
79e432d2
  * Revision 1.90  2004/07/26 17:02:56  nigelhorne
  * Fix crash when debugging on SPARC
  *
9c70ef30
  * Revision 1.89  2004/07/26 09:12:12  nigelhorne
  * Fix crash when debugging on Solaris
  *
285a69b4
  * Revision 1.88  2004/07/20 14:35:29  nigelhorne
  * Some MYDOOM.I were getting through
  *
f91f55e0
  * Revision 1.87  2004/07/19 17:54:40  kojm
  * Use new patter matching algorithm. Cleanup.
  *
80a8c7d8
  * Revision 1.86  2004/07/06 09:32:45  nigelhorne
  * Better handling of Gibe.3 boundary exploit
  *
61db35a1
  * Revision 1.85  2004/06/30 19:48:58  nigelhorne
  * Some TR.Happy99.SKA were getting through
  *
89e9a596
  * Revision 1.84  2004/06/30 14:30:40  nigelhorne
  * Fix compilation error on Solaris
  *
d1382234
  * Revision 1.83  2004/06/28 11:44:45  nigelhorne
  * Remove empty parts
  *
735377bc
  * Revision 1.82  2004/06/25 13:56:38  nigelhorne
  * Optimise messages without other messages encapsulated within them
  *
f84fe2e8
  * Revision 1.81  2004/06/24 21:36:38  nigelhorne
  * Plug memory leak with large number of attachments
  *
784e2335
  * Revision 1.80  2004/06/23 16:23:25  nigelhorne
  * Further empty line optimisation
  *
98685ac1
  * Revision 1.79  2004/06/22 04:08:01  nigelhorne
  * Optimise empty lines
  *
006f738e
  * Revision 1.78  2004/06/21 10:21:19  nigelhorne
  * Fix crash when a multipart/mixed message contains many parts that need to be scanned as attachments
  *
6613d595
  * Revision 1.77  2004/06/18 10:07:12  nigelhorne
  * Allow any number of alternatives in multipart messages
  *
8a88fb93
  * Revision 1.76  2004/06/16 08:07:39  nigelhorne
  * Added thread safety
  *
93002b48
  * Revision 1.75  2004/06/14 09:07:10  nigelhorne
  * Handle spam using broken e-mail generators for multipart/alternative
  *
7b8fb055
  * Revision 1.74  2004/06/09 18:18:59  nigelhorne
  * Find uuencoded viruses in multipart/mixed that have no start of message boundaries
  *
4b0a2de6
  * Revision 1.73  2004/05/14 08:15:55  nigelhorne
  * Use mkstemp on cygwin
  *
a750c93c
  * Revision 1.72  2004/05/12 11:20:37  nigelhorne
  * More bounce message false positives handled
  *
92915cee
  * Revision 1.71  2004/05/10 11:35:11  nigelhorne
  * No need to update mbox.c for cli_filetype problem
0b244177
  *
2e0f78a6
  * Revision 1.69  2004/05/06 11:26:49  nigelhorne
  * Force attachments marked as RFC822 messages to be scanned
  *
3db105a2
  * Revision 1.68  2004/04/29 08:59:24  nigelhorne
  * Tidied up SetDispositionType
  *
7584963d
  * Revision 1.67  2004/04/23 10:47:41  nigelhorne
  * If an inline text portion has a filename treat is as an attachment
  *
bf497d0a
  * Revision 1.66  2004/04/14 08:32:21  nigelhorne
  * When debugging print the email number in mailboxes
  *
7baeb4a6
  * Revision 1.65  2004/04/07 18:18:07  nigelhorne
  * Some occurances of W97M.Lexar were let through
  *
4465fb04
  * Revision 1.64  2004/04/05 09:32:20  nigelhorne
  * Added SCAN_TO_DISC define
  *
4c927f11
  * Revision 1.63  2004/04/01 15:32:34  nigelhorne
  * Graceful exit if messageAddLine fails in strdup
  *
6638be41
  * Revision 1.62  2004/03/31 17:00:20  nigelhorne
  * Code tidy up free memory earlier
  *
74b5c349
  * Revision 1.61  2004/03/30 22:45:13  nigelhorne
  * Better handling of multipart/multipart messages
  *
ffd59a3e
  * Revision 1.60  2004/03/29 09:22:03  nigelhorne
  * Tidy up code and reduce shuffling of data
  *
c95ae98b
  * Revision 1.59  2004/03/26 11:08:36  nigelhorne
  * Use cli_writen
  *
02c9dc2a
  * Revision 1.58  2004/03/25 22:40:46  nigelhorne
  * Removed even more calls to realloc and some duplicated code
  *
627465e7
  * Revision 1.57  2004/03/21 17:19:49  nigelhorne
  * Handle bounce messages with no headers
  *
f5a4d7e8
  * Revision 1.56  2004/03/21 09:41:26  nigelhorne
  * Faster scanning for non MIME messages
  *
3e556ea8
  * Revision 1.55  2004/03/20 17:39:23  nigelhorne
  * First attempt to handle all bounces
  *
a980b067
  * Revision 1.54  2004/03/19 15:40:45  nigelhorne
  * Handle empty content-disposition types
  *
af852ae0
  * Revision 1.53  2004/03/19 08:08:02  nigelhorne
  * If a message part of a multipart contains an RFC822 message that has no encoding don't scan it
  *
b759d5eb
  * Revision 1.52  2004/03/18 21:51:41  nigelhorne
  * If a message only contains a single RFC822 message that has no encoding don't save for scanning
  *
bad123c6
  * Revision 1.51  2004/03/17 19:48:12  nigelhorne
  * Improved embedded RFC822 message handling
  *
09ccd6e0
  * Revision 1.50  2004/03/10 22:05:39  nigelhorne
  * Fix seg fault when a message in a multimessage mailbox fails to scan
  *
b0d8b0db
  * Revision 1.49  2004/03/04 13:01:58  nigelhorne
  * Ensure all bounces are rescanned by cl_mbox
  *
6e07998e
  * Revision 1.48  2004/02/27 12:16:26  nigelhorne
  * Catch lines just containing ':'
  *
39ff42ee
  * Revision 1.47  2004/02/23 10:13:08  nigelhorne
  * Handle spaces before : in headers
  *
1d53a315
  * Revision 1.46  2004/02/18 13:29:19  nigelhorne
  * Stop buffer overflows for files with very long suffixes
  *
26564cf5
  * Revision 1.45  2004/02/18 10:07:40  nigelhorne
  * Find some Yaha
  *
c7256385
  * Revision 1.44  2004/02/15 08:45:54  nigelhorne
  * Avoid scanning the same file twice
  *
0704dad8
  * Revision 1.43  2004/02/14 19:04:05  nigelhorne
  * Handle spaces in boundaries
  *
0dbec6b9
  * Revision 1.42  2004/02/14 17:23:45  nigelhorne
  * Had deleted O_BINARY by mistake
  *
d32e668f
  * Revision 1.41  2004/02/12 18:43:58  nigelhorne
  * Use mkstemp on Solaris
  *
a66ca28a
  * Revision 1.40  2004/02/11 08:15:59  nigelhorne
  * Use O_BINARY for cygwin
  *
8b242bb9
  * Revision 1.39  2004/02/06 13:46:08  kojm
  * Support for clamav-config.h
  *
b9ec1705
  * Revision 1.38  2004/02/04 13:29:48  nigelhorne
  * Handle partial writes - and print when write fails
  *
0bf1353d
  * Revision 1.37  2004/02/03 22:54:59  nigelhorne
  * Catch another example of Worm.Dumaru.Y
  *
a64bf87e
  * Revision 1.36  2004/02/02 09:52:57  nigelhorne
  * Some instances of Worm.Dumaru.Y got through the net
  *
5a01973c
  * Revision 1.35  2004/01/28 10:15:24  nigelhorne
  * Added support to scan some bounce messages
  *
5c7cf3f1
  * Revision 1.34  2004/01/24 17:43:37  nigelhorne
  * Removed (incorrect) warning about uninitialised variable
  *
2250ea69
  * Revision 1.33  2004/01/23 10:38:22  nigelhorne
  * Fixed memory leak in handling some multipart messages
  *
4e7ca2b1
  * Revision 1.32  2004/01/23 08:51:19  nigelhorne
  * Add detection of uuencoded viruses in single part multipart/mixed files
  *
9a35912c
  * Revision 1.31  2004/01/22 22:13:06  nigelhorne
  * Prevent infinite recursion on broken uuencoded files
  *
8c0250d5
  * Revision 1.30  2004/01/13 10:12:05  nigelhorne
  * Remove duplicate code when handling multipart messages
  *
0ada8f3e
  * Revision 1.29  2004/01/09 18:27:11  nigelhorne
  * ParseMimeHeader could corrupt arg
  *
7e572372
  * Revision 1.28  2004/01/09 15:07:42  nigelhorne
  * Re-engineered update 1.11 lost in recent changes
  *
68badbc1
  * Revision 1.27  2004/01/09 14:45:59  nigelhorne
  * Removed duplicated code in multipart handler
  *
852e3ce4
  * Revision 1.26  2004/01/09 10:20:54  nigelhorne
  * Locate uuencoded viruses hidden in text poritions of multipart/mixed mime messages
  *
441992ed
  * Revision 1.25  2004/01/06 14:41:18  nigelhorne
  * Handle headers which do not not have a space after the ':'
  *
f54a8635
  * Revision 1.24  2003/12/20 13:55:36  nigelhorne
  * Ensure multipart just save the bodies of attachments
  *
68be129f
  * Revision 1.23  2003/12/14 18:07:01  nigelhorne
  * Some viruses in embedded messages were not being found
  *
062ba8b0
  * Revision 1.22  2003/12/13 16:42:23  nigelhorne
  * call new cli_chomp
  *
7fca6080
  * Revision 1.21  2003/12/11 14:35:48  nigelhorne
  * Better handling of encapsulated messages
  *
f5e9abc8
  * Revision 1.20  2003/12/06 04:03:26  nigelhorne
  * Handle hand crafted emails that incorrectly set multipart headers
  *
2227f20e
  * Revision 1.19  2003/11/21 07:26:31  nigelhorne
  * Scan multipart alternatives that have no boundaries, finds some uuencoded happy99
  *
181c7548
  * Revision 1.18  2003/11/17 08:13:21  nigelhorne
  * Handle spaces at the end of lines of MIME headers
  *
04421a14
  * Revision 1.17  2003/11/06 05:06:42  nigelhorne
  * Some applications weren't being scanned
  *
295e425f
  * Revision 1.16  2003/11/04 08:24:00  nigelhorne
  * Handle multipart messages that have no text portion
  *
07cbf822
  * Revision 1.15  2003/10/12 20:13:49  nigelhorne
  * Use NO_STRTOK_R consistent with message.c
  *
fdc8a467
  * Revision 1.14  2003/10/12 12:37:11  nigelhorne
  * Appledouble encoded EICAR now found
  *
4674dc9a
  * Revision 1.13  2003/10/01 09:27:42  nigelhorne
  * Handle content-type header going over to a new line
  *
6ecba059
  * Revision 1.12  2003/09/29 17:10:19  nigelhorne
  * Moved stub from heap to stack since its maximum size is known
  *
47ab99fa
  * Revision 1.11  2003/09/29 12:58:32  nigelhorne
  * Handle Content-Type: /; name="eicar.com"
  *
7cef72ea
  * Revision 1.10  2003/09/28 10:06:34  nigelhorne
  * Compilable under SCO; removed duplicate code with message.c
  *
b151ef55
  */
e279f3ea
 static	char	const	rcsid[] = "$Id: mbox.c,v 1.189 2004/11/27 14:39:01 nigelhorne Exp $";
8b242bb9
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
b151ef55
 
 #ifndef	CL_DEBUG
897fd9c7
 #define	NDEBUG	/* map CLAMAV debug onto standard */
b151ef55
 #endif
 
 #ifdef CL_THREAD_SAFE
f5e9abc8
 #ifndef	_REENTRANT
b151ef55
 #define	_REENTRANT	/* for Solaris 2.8 */
 #endif
f5e9abc8
 #endif
b151ef55
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <errno.h>
 #include <assert.h>
 #include <string.h>
 #include <strings.h>
 #include <ctype.h>
 #include <time.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <sys/stat.h>
 #include <sys/types.h>
0bcad2b1
 #include <sys/param.h>
b151ef55
 #include <clamav.h>
9a7398ee
 #include <dirent.h>
67a25177
 #include <limits.h>
b151ef55
 
a77dc192
 #if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
 #include <stddef.h>
 #endif
 
8a88fb93
 #ifdef	CL_THREAD_SAFE
 #include <pthread.h>
 #endif
 
b151ef55
 #include "table.h"
 #include "mbox.h"
 #include "blob.h"
de617e3e
 #include "line.h"
b151ef55
 #include "text.h"
 #include "message.h"
 #include "others.h"
 #include "defaults.h"
7fca6080
 #include "str.h"
b151ef55
 
98685ac1
 #ifdef	CL_DEBUG
 #if __GLIBC__ == 2 && __GLIBC_MINOR__ >= 1
 #define HAVE_BACKTRACE
 #endif
89e9a596
 #endif
98685ac1
 
 #ifdef HAVE_BACKTRACE
 #include <execinfo.h>
 #include <signal.h>
 #include <syslog.h>
 
 static	void	sigsegv(int sig);
 static	void	print_trace(int use_syslog);
 #endif
 
07cbf822
 #if	defined(NO_STRTOK_R) || !defined(CL_THREAD_SAFE)
b151ef55
 #undef strtok_r
 #undef __strtok_r
 #define strtok_r(a,b,c)	strtok(a,b)
 #endif
 
 /* required for AIX and Tru64 */
 #ifdef TRUE
 #undef TRUE
 #endif
 #ifdef FALSE
 #undef FALSE
 #endif
 
6b93ea0c
 typedef enum	{ FALSE = 0, TRUE = 1 } bool;
 
3fa72383
 #define	SAVE_TO_DISC	/* multipart/message are saved in a temporary file */
49674596
 
e9bdeb72
 /*
  * Code does exist to run FOLLORURLS on systems without libcurl, however that
  * is not recommended so it is not compiled by default
  */
 #ifdef	WITH_CURL
 #define	FOLLOWURLS	/*
da812a6a
 			 * If an email contains URLs, check them - helps to
 			 * find Dialer.gen-45
6b93ea0c
 			 */
e9bdeb72
 #endif
3fa72383
 
3eb12bae
 #ifdef	FOLLOWURLS
da812a6a
 
6da40aa1
 #include "htmlnorm.h"
 
da812a6a
 #define	MAX_URLS	5	/*
49674596
 				 * Maximum number of URLs scanned in a message
 				 * part
 				 */
da812a6a
 #ifdef	WITH_CURL	/* Set in configure */
 /*
  * To build with WITH_CURL:
  * LDFLAGS=`curl-config --libs` ./configure ...
  */
88771ffa
 #include <curl/curl.h>
6736d46f
 
 /*
  * Needs curl >= 7.11 (I've heard that 7.9 can cause crashes and 7.10 is
  * untested)
  */
 #if	(LIBCURL_VERSION_MAJOR < 7)
 #undef	WITH_CURL	/* also undef FOLLOWURLS? */
88771ffa
 #endif
3eb12bae
 
138b73f6
 #if	(LIBCURL_VERSION_MAJOR == 7) && (LIBCURL_VERSION_MINOR < 10)
6736d46f
 #undef	WITH_CURL	/* also undef FOLLOWURLS? */
 #endif
 
 #endif	/*WITH_CURL*/
 
3eb12bae
 #else	/*!FOLLOWURLS*/
 #undef	WITH_CURL
6736d46f
 #endif	/*FOLLOWURLS*/
88771ffa
 
9a7398ee
 /*
c29ebe66
  * Define this to handle messages covered by section 7.3.2 of RFC1341.
9a7398ee
  *	This is experimental code so it is up to YOU to (1) ensure it's secure
291ac47f
  * (2) periodically trim the directory of old files
  *
  * If you use the load balancing feature of clamav-milter to run clamd on
d85c1fad
  * more than one machine you must make sure that .../partial is on a shared
291ac47f
  * network filesystem
9a7398ee
  */
d85c1fad
 #define	PARTIAL_DIR
9a7398ee
 
de617e3e
 static	message	*parseEmailHeaders(const message *m, const table_t *rfc821Table);
8c0250d5
 static	int	parseEmailHeader(message *m, const char *line, const table_t *rfc821Table);
565c449d
 static	int	parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t *rfc821Table, const table_t *subtypeTable, unsigned int options);
b151ef55
 static	int	boundaryStart(const char *line, const char *boundary);
 static	int	endOfMessage(const char *line, const char *boundary);
 static	int	initialiseTables(table_t **rfc821Table, table_t **subtypeTable);
 static	int	getTextPart(message *const messages[], size_t size);
 static	size_t	strip(char *buf, int len);
 static	bool	continuationMarker(const char *line);
 static	int	parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg);
5a01973c
 static	void	saveTextPart(message *m, const char *dir);
0674e2af
 static	char	*rfc2047(const char *in);
90905415
 static	char	*rfc822comments(const char *in);
9a7398ee
 #ifdef	PARTIAL_DIR
 static	int	rfc1341(message *m, const char *dir);
 #endif
3fa72383
 
c5ed8336
 static	void	checkURLs(message *m, const char *dir);
da812a6a
 #ifdef	WITH_CURL
314ff77b
 struct arg {
a95c894a
 	const char *url;
 	const char *dir;
314ff77b
 	char *filename;
 };
 #ifdef	CL_THREAD_SAFE
 static	void	*getURL(void *a);
 #else
 static	void	*getURL(struct arg *arg);
 #endif
3fa72383
 #endif
 
b151ef55
 /* Maximum line length according to RFC821 */
 #define	LINE_LENGTH	1000
 
 /* Hashcodes for our hash tables */
 #define	CONTENT_TYPE			1
 #define	CONTENT_TRANSFER_ENCODING	2
 #define	CONTENT_DISPOSITION		3
 
 /* Mime sub types */
 #define	PLAIN		1
 #define	ENRICHED	2
 #define	HTML		3
 #define	RICHTEXT	4
 #define	MIXED		5
 #define	ALTERNATIVE	6
 #define	DIGEST		7
 #define	SIGNED		8
 #define	PARALLEL	9
 #define	RELATED		10	/* RFC2387 */
 #define	REPORT		11	/* RFC1892 */
fdc8a467
 #define	APPLEDOUBLE	12	/* Handling of this in only noddy for now */
49674596
 #define	FAX		MIXED	/*
 				 * RFC3458
 				 * Drafts stated to treat is as mixed if it is
 				 * not known.  This disappeared in the final
 				 * version (except when talking about
 				 * voice-message), but it is good enough for us
 				 * since we do no validation of coversheet
 				 * presence etc. (which also has disappeared
 				 * in the final version)
 				 */
b62a19da
 #define	ENCRYPTED	13	/*
 				 * e.g. RFC2015
 				 * Content-Type: multipart/encrypted;
 				 * boundary="nextPart1383049.XCRrrar2yq";
 				 * protocol="application/pgp-encrypted"
 				 */
db09f781
 #define	X_BFILE		RELATED	/*
 				 * BeOS, expert two parts: the file and it's
 				 * attributes. The attributes part comes as
 				 *	Content-Type: application/x-be_attribute
 				 *		name="foo"
 				 * I can't find where it is defined, any
 				 * pointers would be appreciated. For now
 				 * we treat it as multipart/related
 				 */
cbc2eaa9
 #define	KNOWBOT		14	/* Unknown and undocumented format? */
b151ef55
 
 static	const	struct tableinit {
 	const	char	*key;
 	int	value;
 } rfc821headers[] = {
68badbc1
 	/* TODO: make these regular expressions */
b759d5eb
 	{	"Content-Type",			CONTENT_TYPE		},
39ff42ee
 	{	"Content-Transfer-Encoding",	CONTENT_TRANSFER_ENCODING	},
 	{	"Content-Disposition",		CONTENT_DISPOSITION	},
b151ef55
 	{	NULL,				0			}
4fc38d69
 }, mimeSubtypes[] = {	/* see RFC2045 */
b151ef55
 		/* subtypes of Text */
 	{	"plain",	PLAIN		},
 	{	"enriched",	ENRICHED	},
 	{	"html",		HTML		},
 	{	"richtext",	RICHTEXT	},
 		/* subtypes of Multipart */
 	{	"mixed",	MIXED		},
 	{	"alternative",	ALTERNATIVE	},
 	{	"digest",	DIGEST		},
 	{	"signed",	SIGNED		},
 	{	"parallel",	PARALLEL	},
 	{	"related",	RELATED		},
 	{	"report",	REPORT		},
fdc8a467
 	{	"appledouble",	APPLEDOUBLE	},
49674596
 	{	"fax-message",	FAX		},
b62a19da
 	{	"encrypted",	ENCRYPTED	},
db09f781
 	{	"x-bfile",	X_BFILE		},	/* BeOS */
cbc2eaa9
 	{	"knowbot",		KNOWBOT		},	/* ??? */
 	{	"knowbot-metadata",	KNOWBOT		},	/* ??? */
 	{	"knowbot-code",		KNOWBOT		},	/* ??? */
 	{	"knowbot-state",	KNOWBOT		},	/* ??? */
b151ef55
 	{	NULL,		0		}
 };
8a88fb93
 
 #ifdef	CL_THREAD_SAFE
 static	pthread_mutex_t	tables_mutex = PTHREAD_MUTEX_INITIALIZER;
 #endif
b151ef55
 
7cef72ea
 /* Maximum filenames under various systems */
 #ifndef	NAME_MAX	/* e.g. Linux */
 
 #ifdef	MAXNAMELEN	/* e.g. Solaris */
 #define	NAME_MAX	MAXNAMELEN
 #else
 
 #ifdef	FILENAME_MAX	/* e.g. SCO */
 #define	NAME_MAX	FILENAME_MAX
 #endif
 
 #endif
 
 #endif
 
0dbec6b9
 #ifndef	O_BINARY
 #define	O_BINARY	0
 #endif
 
b151ef55
 /*
  * TODO: when signal handling is added, need to remove temp files when a
7d3d11d0
  *	signal is received
b151ef55
  * TODO: add option to scan in memory not via temp files, perhaps with a
74b5c349
  * named pipe or memory mapped file, though this won't work on big e-mails
  * containing many levels of encapsulated messages - it'd just take too much
  * RAM
c6259ac5
  * TODO: parse .msg format files
fdc8a467
  * TODO: fully handle AppleDouble format, see
7d3d11d0
  *	http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
f54a8635
  * TODO: ensure parseEmailHeaders is always called before parseEmailBody
  * TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody
f1c33aa0
  * TODO: Look into TNEF. Is there anything that needs to be done here?
b151ef55
  */
 int
5431ebba
 cli_mbox(const char *dir, int desc, unsigned int options)
b151ef55
 {
c6259ac5
 	int retcode, i;
f54a8635
 	message *m, *body;
b151ef55
 	FILE *fd;
802c37fc
 	char buffer[LINE_LENGTH + 1];
0e3b08fc
 #ifdef HAVE_BACKTRACE
98685ac1
 	void (*segv)(int);
 #endif
49674596
 	static table_t *rfc821, *subtype;
b151ef55
 
 	cli_dbgmsg("in mbox()\n");
 
c6259ac5
 	i = dup(desc);
 	if((fd = fdopen(i, "rb")) == NULL) {
 		cli_errmsg("Can't open descriptor %d\n", desc);
 		close(i);
7d3d11d0
 		return CL_EOPEN;
c6259ac5
 	}
802c37fc
 	if(fgets(buffer, sizeof(buffer) - 1, fd) == NULL) {
c6259ac5
 		/* empty message */
 		fclose(fd);
7d3d11d0
 		return CL_CLEAN;
c6259ac5
 	}
b151ef55
 	m = messageCreate();
7b8fb055
 	if(m == NULL) {
c6259ac5
 		fclose(fd);
7d3d11d0
 		return CL_EMEM;
7b8fb055
 	}
 
8a88fb93
 #ifdef	CL_THREAD_SAFE
 	pthread_mutex_lock(&tables_mutex);
 #endif
49674596
 	if(rfc821 == NULL) {
 		assert(subtype == NULL);
7b8fb055
 
49674596
 		if(initialiseTables(&rfc821, &subtype) < 0) {
 			rfc821 = NULL;
 			subtype = NULL;
8a88fb93
 #ifdef	CL_THREAD_SAFE
 			pthread_mutex_unlock(&tables_mutex);
 #endif
7b8fb055
 			messageDestroy(m);
 			fclose(fd);
7d3d11d0
 			return CL_EMEM;
7b8fb055
 		}
b151ef55
 	}
8a88fb93
 #ifdef	CL_THREAD_SAFE
 	pthread_mutex_unlock(&tables_mutex);
 #endif
b151ef55
 
89e9a596
 #ifdef HAVE_BACKTRACE
98685ac1
 	segv = signal(SIGSEGV, sigsegv);
 #endif
 
f54a8635
 	/*
 	 * is it a UNIX style mbox with more than one
 	 * mail message, or just a single mail message?
 	 */
 	if(strncmp(buffer, "From ", 5) == 0) {
b151ef55
 		/*
c6259ac5
 		 * Have been asked to check a UNIX style mbox file, which
 		 * may contain more than one e-mail message to decode
b151ef55
 		 */
f54a8635
 		bool lastLineWasEmpty = FALSE;
bf497d0a
 		int messagenumber = 1;
b151ef55
 
c6259ac5
 		do {
 			/*cli_dbgmsg("read: %s", buffer);*/
b151ef55
 
f54a8635
 			cli_chomp(buffer);
 			if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) {
bf497d0a
 				cli_dbgmsg("Deal with email number %d\n", messagenumber++);
b151ef55
 				/*
f54a8635
 				 * End of a message in the mail box
b151ef55
 				 */
de617e3e
 				body = parseEmailHeaders(m, rfc821);
f2b068fb
 				if(body == NULL) {
 					messageReset(m);
 					continue;
 				}
f54a8635
 				messageDestroy(m);
 				if(messageGetBody(body))
565c449d
 					if(!parseEmailBody(body, NULL, dir, rfc821, subtype, options)) {
09ccd6e0
 						messageReset(body);
 						m = body;
 						continue;
 					}
b151ef55
 				/*
f54a8635
 				 * Starting a new message, throw away all the
 				 * information about the old one
b151ef55
 				 */
f54a8635
 				m = body;
 				messageReset(body);
b151ef55
 
c6259ac5
 				cli_dbgmsg("Finished processing message\n");
f54a8635
 			} else
a66ca28a
 				lastLineWasEmpty = (bool)(buffer[0] == '\0');
de617e3e
 			if(messageAddStr(m, buffer) < 0)
4c927f11
 				break;
802c37fc
 		} while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL);
bf497d0a
 
 		cli_dbgmsg("Deal with email number %d\n", messagenumber);
f2b068fb
 	} else {
7fca6080
 		/*
 		 * It's a single message, parse the headers then the body
f2b068fb
 		 * Ignore blank lines at the start of the message
 		 */
f1c33aa0
 		if(strncmp(buffer, "P I ", 4) == 0)
 			/*
 			 * CommuniGate Pro format: ignore headers until
 			 * blank line
 			 */
802c37fc
 			while((fgets(buffer, sizeof(buffer) - 1, fd) != NULL) &&
f1c33aa0
 				(strchr("\r\n", buffer[0]) == NULL))
 					;
 		/*
 		 * Ignore any blank lines at the top of the message
 		 */
f2b068fb
 		while(strchr("\r\n", buffer[0]) &&
802c37fc
 		     (fgets(buffer, sizeof(buffer) - 1, fd) != NULL))
6b93ea0c
 			;
 
802c37fc
 		buffer[LINE_LENGTH] = '\0';
 
f2b068fb
 		/*
 		 * FIXME: files full of new lines and nothing else are
 		 * handled ungracefully...
7fca6080
 		 */
de617e3e
 		do {
b759d5eb
 			/*
4465fb04
 			 * TODO: this needlessly creates a message object,
 			 * it'd be better if parseEmailHeaders could also
 			 * read in from a file. I do not want to lump the
 			 * parseEmailHeaders code here, that'd be a duplication
 			 * of code I want to avoid
b759d5eb
 			 */
de617e3e
 			(void)cli_chomp(buffer);
 			if(messageAddStr(m, buffer) < 0)
4c927f11
 				break;
802c37fc
 		} while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL);
f2b068fb
 	}
7fca6080
 
b151ef55
 	fclose(fd);
 
7d3d11d0
 	/*
 	 * This is not necessarily true, but since the only options are
 	 * CL_CLEAN and CL_VIRUS this is the better choice. It would be
 	 * nice to have CL_CONTINUESCANNING or something like that
 	 */
 	retcode = CL_CLEAN;
c6259ac5
 
de617e3e
 	body = parseEmailHeaders(m, rfc821);
f54a8635
 	messageDestroy(m);
f2b068fb
 	if(body) {
 		/*
 		 * Write out the last entry in the mailbox
 		 */
 		if(messageGetBody(body))
565c449d
 			if(!parseEmailBody(body, NULL, dir, rfc821, subtype, options))
e745ac7e
 				retcode = CL_EFORMAT;
b151ef55
 
f2b068fb
 		/*
 		 * Tidy up and quit
 		 */
 		messageDestroy(body);
 	}
b151ef55
 
 	cli_dbgmsg("cli_mbox returning %d\n", retcode);
 
89e9a596
 #ifdef HAVE_BACKTRACE
98685ac1
 	signal(SIGSEGV, segv);
 #endif
 
b151ef55
 	return retcode;
 }
 
 /*
7fca6080
  * The given message contains a raw e-mail.
  *
  * This function parses the headers of m and sets the message's arguments
68be129f
  *
  * Returns the message's body with the correct arguments set
735377bc
  *
  * The downside of this approach is that for a short time we have two copies
  * of the message in memory, the upside is that it makes for easier parsing
  * of encapsulated messages, and in the long run uses less memory in those
  * scenarios
7fca6080
  */
68be129f
 static message *
de617e3e
 parseEmailHeaders(const message *m, const table_t *rfc821)
7fca6080
 {
68be129f
 	bool inHeader = TRUE;
9fc8173e
 	bool contMarker = FALSE;
de617e3e
 	const text *t;
f54a8635
 	message *ret;
f2b068fb
 	bool anyHeadersFound = FALSE;
4fc38d69
 	int commandNumber = -1;
d768ac5a
 	char *fullline = NULL;
ad642304
 	size_t fulllinelength = 0;
f54a8635
 
98685ac1
 	cli_dbgmsg("parseEmailHeaders\n");
 
f54a8635
 	if(m == NULL)
 		return NULL;
 
 	ret = messageCreate();
7fca6080
 
de617e3e
 	for(t = messageGetBody(m); t; t = t->t_next) {
 		const char *buffer;
7fca6080
 
de617e3e
 		if(t->t_line)
 			buffer = lineGetData(t->t_line);
 		else
98685ac1
 			buffer = NULL;
7fca6080
 
a1c924f9
 		cli_dbgmsg("parseEmailHeaders: check '%s'\n", buffer ? buffer : "");
 
b4cb4486
 		if(inHeader) {
9180b8bb
 			if((buffer == NULL) && !contMarker) {
b4cb4486
 				/*
 				 * A blank line signifies the end of the header
 				 * and the start of the text
 				 */
7fca6080
 				cli_dbgmsg("End of header information\n");
d32343c3
 				inHeader = FALSE;
ad642304
 			} else {
0856891e
 				char *ptr;
ad642304
 				const char *qptr;
 				int quotes;
0856891e
 
 				if(buffer == NULL) {
 					contMarker = FALSE;
 					continue;
 				}
ad642304
 
 				if(fullline == NULL) {
9180b8bb
 					char cmd[LINE_LENGTH + 1];
 
 					/*
 					 * Continuation of line we're ignoring?
 					 */
 					if((buffer[0] == '\t') || (buffer[0] == ' ') || contMarker) {
 						contMarker = continuationMarker(buffer);
 						continue;
 					}
 
 					/*
 					 * Is this a header we're interested in?
 					 */
0856891e
 					if((strchr(buffer, ':') == NULL) ||
 					   (cli_strtokbuf(buffer, 0, ":", cmd) == NULL)) {
 						if(strncmp(buffer, "From ", 5) == 0)
 							anyHeadersFound = TRUE;
9180b8bb
 						continue;
0856891e
 					}
9180b8bb
 
a1c924f9
 					ptr = rfc822comments(cmd);
 					commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
 					if(ptr)
 						free(ptr);
9180b8bb
 
 					switch(commandNumber) {
 						case CONTENT_TRANSFER_ENCODING:
 						case CONTENT_DISPOSITION:
 						case CONTENT_TYPE:
0856891e
 							anyHeadersFound = TRUE;
9180b8bb
 							break;
 						default:
0856891e
 							if(strcasecmp(cmd, "From") == 0)
 								anyHeadersFound = TRUE;
 							else if(strcasecmp(cmd, "Received") == 0)
 								anyHeadersFound = TRUE;
 							else if(strcasecmp(cmd, "De") == 0)
 								anyHeadersFound = TRUE;
9180b8bb
 							continue;
 					}
 					fullline = strdup(buffer);
 					fulllinelength = strlen(buffer) + 1;
 				} else if(buffer) {
 					fulllinelength += strlen(buffer);
 					fullline = cli_realloc(fullline, fulllinelength);
 					strcat(fullline, buffer);
ad642304
 				}
3a0946f5
 
9fc8173e
 				contMarker = continuationMarker(buffer);
9180b8bb
 
ad642304
 				if(contMarker)
 					continue;
 
 				if(t->t_next && (t->t_next->t_line != NULL)) {
 					const char *next = lineGetData(t->t_next->t_line);
 
 					/*
0856891e
 					 * Section B.2 of RFC822 says TAB or
 					 * SPACE means a continuation of the
 					 * previous entry.
ad642304
 					 *
 					 * Add all the arguments on the line
 					 */
 					if((next[0] == '\t') || (next[0] == ' '))
 						continue;
 				}
 
 				quotes = 0;
 				for(qptr = buffer; *qptr; qptr++)
 					if(*qptr == '\"')
 						quotes++;
 
9180b8bb
 				if(quotes & 1)
ad642304
 					continue;
 
 				ptr = rfc822comments(fullline);
 				if(ptr) {
 					free(fullline);
 					fullline = ptr;
 				}
37819555
 
ad642304
 				if(parseEmailHeader(ret, fullline, rfc821) < 0)
 					continue;
b4cb4486
 
9180b8bb
 				free(fullline);
 				fullline = NULL;
d32343c3
 			}
9180b8bb
 		} else
09ccd6e0
 			/*cli_dbgmsg("Add line to body '%s'\n", buffer);*/
de617e3e
 			if(messageAddLine(ret, t->t_line) < 0)
80a8c7d8
 				break;
ffd59a3e
 	}
68be129f
 
d768ac5a
 	if(fullline) {
ad642304
 		if(*fullline) switch(commandNumber) {
 			case CONTENT_TRANSFER_ENCODING:
 			case CONTENT_DISPOSITION:
 			case CONTENT_TYPE:
 				cli_warnmsg("parseEmailHeaders: Fullline set '%s' - report to bugs@clamav.net\n", fullline);
 		}
d768ac5a
 		free(fullline);
 	}
 
f2b068fb
 	if(!anyHeadersFound) {
 		/*
 		 * False positive in believing we have an e-mail when we don't
 		 */
 		messageDestroy(ret);
 		cli_dbgmsg("parseEmailHeaders: no headers found, assuming it isn't an email\n");
 		return NULL;
 	}
 
4465fb04
 	messageClean(ret);
 
09ccd6e0
 	cli_dbgmsg("parseEmailHeaders: return\n");
 
68be129f
 	return ret;
7fca6080
 }
 
 /*
8c0250d5
  * Handle a header line of an email message
  */
 static int
49674596
 parseEmailHeader(message *m, const char *line, const table_t *rfc821)
8c0250d5
 {
de509b8e
 	char *cmd;
8c0250d5
 	int ret = -1;
 #ifdef CL_THREAD_SAFE
 	char *strptr;
 #endif
31b05bcb
 	const char *separater;
0674e2af
 	char *copy, tokenseparater[2];
8c0250d5
 
0704dad8
 	cli_dbgmsg("parseEmailHeader '%s'\n", line);
 
31b05bcb
 	/*
 	 * In RFC822 the separater between the key a value is a colon,
 	 * e.g.	Content-Transfer-Encoding: base64
 	 * However some MUA's are lapse about this and virus writers exploit
 	 * this hole, so we need to check all known possiblities
 	 */
 	for(separater = ":= "; *separater; separater++)
 		if(strchr(line, *separater) != NULL)
 			break;
 
 	if(*separater == '\0')
74b5c349
 		return -1;
 
0674e2af
 	copy = rfc2047(line);
 	if(copy == NULL)
 		return -1;
d1382234
 
31b05bcb
 	tokenseparater[0] = *separater;
 	tokenseparater[1] = '\0';
 
897fd9c7
 #ifdef	CL_THREAD_SAFE
31b05bcb
 	cmd = strtok_r(copy, tokenseparater, &strptr);
897fd9c7
 #else
 	cmd = strtok(copy, tokenseparater);
 #endif
8c0250d5
 
3a0ef2ee
 	if(cmd && (strstrip(cmd) > 0)) {
897fd9c7
 #ifdef	CL_THREAD_SAFE
8c0250d5
 		char *arg = strtok_r(NULL, "", &strptr);
897fd9c7
 #else
 		char *arg = strtok(NULL, "");
 #endif
8c0250d5
 
 		if(arg)
 			/*
 			 * Found a header such as
 			 * Content-Type: multipart/mixed;
 			 * set arg to be
 			 * "multipart/mixed" and cmd to
39ff42ee
 			 * be "Content-Type"
8c0250d5
 			 */
49674596
 			ret = parseMimeHeader(m, cmd, rfc821, arg);
8c0250d5
 	}
0674e2af
 	free(copy);
8c0250d5
 	return ret;
 }
 
 /*
b151ef55
  * This is a recursive routine.
  *
7fca6080
  * This function parses the body of mainMessage and saves its attachments in dir
  *
68be129f
  * mainMessage is the buffer to be parsed, it contains an e-mail's body, without
d32343c3
  * any headers. First time of calling it'll be
  * the whole message. Later it'll be parts of a multipart message
b151ef55
  * textIn is the plain text message being built up so far
  *
0bcad2b1
  * Returns:
b151ef55
  *	0 for fail
852e3ce4
  *	1 for success, attachments saved
  *	2 for success, attachments not saved
b151ef55
  */
 static int	/* success or fail */
565c449d
 parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t *rfc821Table, const table_t *subtypeTable, unsigned int options)
b151ef55
 {
6613d595
 	message **messages;	/* parts of a multipart message */
30fb8a0b
 	int inMimeHead, i, rc = 1, htmltextPart, multiparts = 0;
b151ef55
 	text *aText;
 	const char *cptr;
2250ea69
 	message *mainMessage;
565c449d
 	fileblob *fb;
b151ef55
 
565c449d
 	cli_dbgmsg("in parseEmailBody\n");
b151ef55
 
 	aText = textIn;
6613d595
 	messages = NULL;
2250ea69
 	mainMessage = messageIn;
b151ef55
 
 	/* Anything left to be parsed? */
0bcad2b1
 	if(mainMessage && (messageGetBody(mainMessage) != NULL)) {
b151ef55
 		mime_type mimeType;
30fb8a0b
 		int subtype, inhead;
b62a19da
 		const char *mimeSubtype, *boundary;
 		char *protocol;
b151ef55
 		const text *t_line;
f5e9abc8
 		/*bool isAlternative;*/
b151ef55
 		message *aMessage;
 
c6259ac5
 		cli_dbgmsg("Parsing mail file\n");
 
b151ef55
 		mimeType = messageGetMimeType(mainMessage);
 		mimeSubtype = messageGetMimeSubtype(mainMessage);
 
5eeffbb9
 		subtype = tableFind(subtypeTable, mimeSubtype);
 		if((mimeType == TEXT) && (subtype == PLAIN)) {
b151ef55
 			/*
 			 * This is effectively no encoding, notice that we
 			 * don't check that charset is us-ascii
 			 */
 			cli_dbgmsg("assume no encoding\n");
 			mimeType = NOMIME;
5eeffbb9
 			messageSetMimeSubtype(mainMessage, NULL);
b151ef55
 		}
 
c6259ac5
 		cli_dbgmsg("mimeType = %d\n", mimeType);
 
b151ef55
 		switch(mimeType) {
 		case NOMIME:
 			aText = textAddMessage(aText, mainMessage);
 			break;
 		case TEXT:
5eeffbb9
 			if(subtype == PLAIN)
89e9a596
 				/*
 				 * Consider what to do if this fails
 				 * (i.e. aText == NULL):
 				 * We mustn't just return since that could
 				 * cause a virus to be missed that we
 				 * could be capable of scanning. Ignoring
 				 * the error is probably the safest, we may be
 				 * able to scan anyway and we lose nothing
 				 */
b151ef55
 				aText = textCopy(messageGetBody(mainMessage));
5eeffbb9
 			else if((options&CL_SCAN_MAILURL) && (subtype == HTML))
 				checkURLs(mainMessage, dir);
b151ef55
 			break;
 		case MULTIPART:
 			boundary = messageFindArgument(mainMessage, "boundary");
 
 			if(boundary == NULL) {
 				cli_warnmsg("Multipart MIME message contains no boundaries\n");
2227f20e
 				/* Broken e-mail message */
 				mimeType = NOMIME;
 				/*
 				 * The break means that we will still
 				 * check if the file contains a uuencoded file
 				 */
 				break;
b151ef55
 			}
 
cbc2eaa9
 			/* Perhaps it should assume mixed? */
93002b48
 			if(mimeSubtype[0] == '\0') {
 				cli_warnmsg("Multipart has no subtype assuming alternative\n");
 				mimeSubtype = "alternative";
 				messageSetMimeSubtype(mainMessage, "alternative");
 			}
 
b151ef55
 			/*
 			 * Get to the start of the first message
 			 */
0704dad8
 			t_line = messageGetBody(mainMessage);
 
 			if(t_line == NULL) {
 				cli_warnmsg("Multipart MIME message has no body\n");
 				free((char *)boundary);
 				mimeType = NOMIME;
 				break;
 			}
 
 			do
74ca33e9
 				if(t_line->t_line) {
 					if(boundaryStart(lineGetData(t_line->t_line), boundary))
 						break;
 					/*
0856891e
 					 * Found a uuencoded/binhex file before the first multipart
 					 * TODO: check yEnc
74ca33e9
 					 */
0856891e
 					if(uuencodeBegin(mainMessage) == t_line) {
74ca33e9
 						if(messageGetEncoding(mainMessage) == NOENCODING) {
 							messageSetEncoding(mainMessage, "x-uuencode");
 							fb = messageToFileblob(mainMessage, dir);
 
 							if(fb)
 								fileblobDestroy(fb);
 						}
0856891e
 					} else if(binhexBegin(mainMessage) == t_line) {
 						if(messageGetEncoding(mainMessage) == NOENCODING) {
 							messageSetEncoding(mainMessage, "x-binhex");
 							fb = messageToFileblob(mainMessage, dir);
 
 							if(fb)
 								fileblobDestroy(fb);
 						}
9f43cc75
 					} else if(encodingLine(mainMessage) == t_line->t_next) {
 						/*
 						 * We look for the next line
 						 * since later on we'll skip
 						 * over the important line when
 						 * we think it's a blank line
 						 * at the top of the message -
 						 * which it would have been in
 						 * an RFC compliant world
 						 */
 						cli_dbgmsg("Found MIME attachment before the first MIME section\n");
 						if(messageGetEncoding(mainMessage) == NOENCODING)
 							break;
0856891e
 					}
74ca33e9
 				}
0704dad8
 			while((t_line = t_line->t_next) != NULL);
b151ef55
 
 			if(t_line == NULL) {
b4cb4486
 				cli_dbgmsg("Multipart MIME message contains no boundary lines\n");
bf8ea488
 				/*
 				 * Free added by Thomas Lamy
 				 * <Thomas.Lamy@in-online.net>
 				 */
 				free((char *)boundary);
2227f20e
 				mimeType = NOMIME;
 				/*
 				 * The break means that we will still
 				 * check if the file contains a uuencoded file
 				 */
 				break;
b151ef55
 			}
 			/*
 			 * Build up a table of all of the parts of this
 			 * multipart message. Remember, each part may itself
 			 * be a multipart message.
 			 */
 			inhead = 1;
 			inMimeHead = 0;
 
68be129f
 			/*
b62a19da
 			 * Parse the mainMessage object and create an array
 			 * of objects called messages, one for each of the
 			 * multiparts that mainMessage contains
 			 *
68be129f
 			 * This looks like parseEmailHeaders() - maybe there's
 			 * some duplication of code to be cleaned up
 			 */
6613d595
 			for(multiparts = 0; t_line; multiparts++) {
26564cf5
 				int lines = 0;
79e432d2
 				message **m;
26564cf5
 
79e432d2
 				m = cli_realloc(messages, ((multiparts + 1) * sizeof(message *)));
d32343c3
 				if(m == NULL)
79e432d2
 					break;
 				messages = m;
6613d595
 
b151ef55
 				aMessage = messages[multiparts] = messageCreate();
89e9a596
 				if(aMessage == NULL) {
 					multiparts--;
 					continue;
 				}
b151ef55
 
 				cli_dbgmsg("Now read in part %d\n", multiparts);
 
0bf1353d
 				/*
 				 * Ignore blank lines. There shouldn't be ANY
 				 * but some viruses insert them
 				 */
98685ac1
 				while((t_line = t_line->t_next) != NULL)
de617e3e
 					if(t_line->t_line &&
 					   /*(cli_chomp(t_line->t_text) > 0))*/
 					   (strlen(lineGetData(t_line->t_line)) > 0))
784e2335
 						break;
0bf1353d
 
 				if(t_line == NULL) {
 					cli_dbgmsg("Empty part\n");
61db35a1
 					/*
 					 * Remove this part unless there's
 					 * a uuencoded portion somewhere in
 					 * the complete message that we may
 					 * throw away by mistake if the MIME
 					 * encoding information is incorrect
 					 */
 					if(uuencodeBegin(mainMessage) == NULL) {
 						messageDestroy(aMessage);
 						--multiparts;
 					}
0bf1353d
 					continue;
 				}
 
 				do {
de617e3e
 					const char *line = lineGetData(t_line->t_line);
b151ef55
 
39b5a552
 					/*printf("inMimeHead %d inhead %d boundary '%s' line '%s' next '%s'\n",
30fb8a0b
 						inMimeHead, inhead, boundary, line,
 						t_line->t_next && t_line->t_next->t_line ? lineGetData(t_line->t_next->t_line) : "(null)");*/
b151ef55
 
e0377124
 					if(inMimeHead) {	/* continuation line */
98685ac1
 						if(line == NULL) {
699fafc3
 							/*inhead =*/ inMimeHead = 0;
98685ac1
 							continue;
 						}
7baeb4a6
 						/*
 						 * Handle continuation lines
 						 * because the previous line
21cd233d
 						 * ended with a ; or this line
 						 * starts with a white space
7baeb4a6
 						 */
21cd233d
 						cli_dbgmsg("Multipart %d: About to add mime Argument '%s'\n",
 							multiparts, line);
7baeb4a6
 						/*
 						 * Handle the case when it
 						 * isn't really a continuation
 						 * line:
 						 * Content-Type: application/octet-stream;
 						 * Content-Transfer-Encoding: base64
 						 */
 						parseEmailHeader(aMessage, line, rfc821Table);
 
b151ef55
 						while(isspace((int)*line))
 							line++;
 
 						if(*line == '\0') {
 							inhead = inMimeHead = 0;
 							continue;
 						}
 						/*
 						 * This may cause a trailing ';'
 						 * to be added if this test
 						 * fails - TODO: verify this
 						 */
 						inMimeHead = continuationMarker(line);
 						messageAddArgument(aMessage, line);
e0377124
 					} else if(inhead) {	/* handling normal headers */
ad642304
 						char *ptr;
 
98685ac1
 						if(line == NULL) {
 							/* empty line */
699fafc3
 							cli_dbgmsg("Multipart %d: End of header information\n",
 								multiparts);
b151ef55
 							inhead = 0;
 							continue;
 						}
a64bf87e
 						if(isspace((int)*line)) {
 							/*
 							 * The first line is
 							 * continuation line.
 							 * This is tricky
 							 * to handle, but
 							 * all we can do is our
 							 * best
 							 */
 							cli_dbgmsg("Part %d starts with a continuation line\n",
 								multiparts);
 							messageAddArgument(aMessage, line);
 							/*
 							 * Give it a default
 							 * MIME type since
 							 * that may be the
 							 * missing line
 							 *
 							 * Choose application to
 							 * force a save
 							 */
 							if(messageGetMimeType(aMessage) == NOMIME)
 								messageSetMimeType(aMessage, "application");
 							continue;
 						}
 
b151ef55
 						/*
 						 * Some clients are broken and
 						 * put white space after the ;
 						 */
 						inMimeHead = continuationMarker(line);
de617e3e
 						if(!inMimeHead) {
 							const text *next = t_line->t_next;
ad642304
 							char *fullline;
e0377124
 							int quotes = 0;
 							const char *qptr;
de617e3e
 
37819555
 							assert(strlen(line) <= LINE_LENGTH);
e0377124
 
ad642304
 							fullline = rfc822comments(line);
 							if(fullline == NULL)
 								fullline = strdup(line);
 
 							for(qptr = fullline; *qptr; qptr++)
e0377124
 								if(*qptr == '\"')
 									quotes++;
 
21cd233d
 							/*
 							 * Fold next lines to the end of this
 							 * if they start with a white space
e0377124
 							 * or if this line has an odd number of quotes:
 							 * Content-Type: application/octet-stream; name="foo
 							 * "
21cd233d
 							 */
 							while(next && next->t_line) {
de617e3e
 								const char *data = lineGetData(next->t_line);
21cd233d
 
e0377124
 								if((!isspace(data[0])) &&
 								   ((quotes & 1) == 0))
21cd233d
 									break;
 
 								ptr = cli_realloc(fullline,
 									strlen(fullline) + strlen(data) + 1);
 
 								if(ptr == NULL)
 									break;
de617e3e
 
21cd233d
 								fullline = ptr;
 								strcat(fullline, data);
 
e0377124
 								for(qptr = data; *qptr; qptr++)
 									if(*qptr == '\"')
 										quotes++;
 
21cd233d
 								t_line = next;
 								next = next->t_next;
de617e3e
 							}
21cd233d
 							cli_dbgmsg("Multipart %d: About to parse folded header '%s'\n",
 								multiparts, fullline);
68badbc1
 
21cd233d
 							parseEmailHeader(aMessage, fullline, rfc821Table);
 							free(fullline);
 						} else {
 							cli_dbgmsg("Multipart %d: About to parse header '%s'\n",
 								multiparts, line);
 
ad642304
 							ptr = rfc822comments(line);
 
 							parseEmailHeader(aMessage, (ptr) ? ptr : line, rfc821Table);
 
 							if(ptr)
 								free(ptr);
21cd233d
 						}
b151ef55
 					} else if(endOfMessage(line, boundary)) {
 						/*
 						 * Some viruses put information
 						 * *after* the end of message,
 						 * which presumably some broken
 						 * mail clients find, so we
 						 * can't assume that this
 						 * is the end of the message
 						 */
 						/* t_line = NULL;*/
 						break;
30fb8a0b
 					} else if(boundaryStart(line, boundary)) {
 						inhead = 1;
 						break;
26564cf5
 					} else {
de617e3e
 						if(messageAddLine(aMessage, t_line->t_line) < 0)
79e432d2
 							break;
26564cf5
 						lines++;
 					}
0bf1353d
 				} while((t_line = t_line->t_next) != NULL);
 
b151ef55
 				messageClean(aMessage);
26564cf5
 
 				cli_dbgmsg("Part %d has %d lines\n",
 					multiparts, lines);
b151ef55
 			}
 
 			free((char *)boundary);
 
6638be41
 			/*
cbc2eaa9
 			 * Preprocess. Anything special to be done before
 			 * we handle the multiparts?
b62a19da
 			 */
cbc2eaa9
 			switch(tableFind(subtypeTable, mimeSubtype)) {
 				case KNOWBOT:
 					/* TODO */
 					cli_dbgmsg("multipart/knowbot parsed as multipart/mixed for now\n");
 					mimeSubtype = "mixed";
 					break;
e279f3ea
 				case -1:
 					/*
 					 * According to section 7.2.6 of
 					 * RFC1521, unrecognised multiparts
 					 * should be treated as multipart/mixed.
 					 */
 					cli_warnmsg("Unsupported multipart format `%s', parsed as mixed\n", mimeSubtype);
 					mimeSubtype = "mixed";
 					break;
cbc2eaa9
 			}
b62a19da
 
 			/*
6638be41
 			 * We've finished message we're parsing
 			 */
 			if(mainMessage && (mainMessage != messageIn)) {
 				messageDestroy(mainMessage);
 				mainMessage = NULL;
2250ea69
 			}
b151ef55
 
6613d595
 			if(multiparts == 0) {
 				if(messages)
 					free(messages);
6638be41
 				return 2;	/* Nothing to do */
6613d595
 			}
6638be41
 
b151ef55
 			cli_dbgmsg("The message has %d parts\n", multiparts);
49674596
 			cli_dbgmsg("Find out the multipart type (%s)\n", mimeSubtype);
b151ef55
 
b62a19da
 			/*
 			 * We now have all the parts of the multipart message
 			 * in the messages array:
 			 *	message *messages[multiparts]
 			 * Let's decide what to do with them all
 			 */
b151ef55
 			switch(tableFind(subtypeTable, mimeSubtype)) {
 			case RELATED:
68be129f
 				cli_dbgmsg("Multipart related handler\n");
b151ef55
 				/*
295e425f
 				 * Have a look to see if there's HTML code
 				 * which will need scanning
b151ef55
 				 */
 				aMessage = NULL;
 				assert(multiparts > 0);
 
0bcad2b1
 				htmltextPart = getTextPart(messages, multiparts);
b151ef55
 
0bcad2b1
 				if(htmltextPart >= 0)
 					aText = textAddMessage(aText, messages[htmltextPart]);
b151ef55
 				else
 					/*
295e425f
 					 * There isn't an HTML bit. If there's a
 					 * multipart bit, it'll may be in there
 					 * somewhere
b151ef55
 					 */
 					for(i = 0; i < multiparts; i++)
 						if(messageGetMimeType(messages[i]) == MULTIPART) {
 							aMessage = messages[i];
0bcad2b1
 							htmltextPart = i;
b151ef55
 							break;
 						}
 
74c6f514
 				if(htmltextPart == -1)
295e425f
 					cli_dbgmsg("No HTML code found to be scanned");
74c6f514
 				else {
565c449d
 					rc = parseEmailBody(aMessage, aText, dir, rfc821Table, subtypeTable, options);
74c6f514
 					if(rc == 1) {
 						assert(aMessage == messages[htmltextPart]);
 						messageDestroy(aMessage);
 						messages[htmltextPart] = NULL;
 					}
 				}
b151ef55
 
 				/*
 				 * Fixed based on an idea from Stephen White <stephen@earth.li>
 				 * The message is confused about the difference
 				 * between alternative and related. Badtrans.B
 				 * suffers from this problem.
 				 *
 				 * Fall through in this case:
 				 * Content-Type: multipart/related;
 				 *	type="multipart/alternative"
 				 */
f5e9abc8
 				/*
 				 * Changed to always fall through based on
 				 * an idea from Michael Dankov <misha@btrc.ru>
 				 * that some viruses are completely confused
 				 * about the difference between related
 				 * and mixed
 				 */
 				/*cptr = messageFindArgument(mainMessage, "type");
b151ef55
 				if(cptr == NULL)
 					break;
 				isAlternative = (bool)(strcasecmp(cptr, "multipart/alternative") == 0);
 				free((char *)cptr);
 				if(!isAlternative)
f5e9abc8
 					break;*/
d28e1902
 			case DIGEST:
 				/*
 				 * According to section 5.1.5 RFC2046, the
 				 * default mime type of multipart/digest parts
 				 * is message/rfc822
 				 *
 				 * We consider them as alternative, wrong in
 				 * the strictest sense since they aren't
 				 * alternatives - all parts a valid - but it's
 				 * OK for our needs since it means each part
 				 * will be scanned
 				 */
b151ef55
 			case ALTERNATIVE:
 				cli_dbgmsg("Multipart alternative handler\n");
 
 				/*
 				 * Fall through - some clients are broken and
 				 * say alternative instead of mixed. The Klez
e279f3ea
 				 * virus is broken that way, and anyway we
 				 * wish to scan all of the alternatives
b151ef55
 				 */
 			case REPORT:
 				/*
 				 * According to section 1 of RFC1892, the
 				 * syntax of multipart/report is the same
 				 * as multipart/mixed. There are some required
 				 * parameters, but there's no need for us to
 				 * verify that they exist
 				 */
 			case MIXED:
fdc8a467
 			case APPLEDOUBLE:	/* not really supported */
b151ef55
 				/*
 				 * Look for attachments
 				 *
 				 * Not all formats are supported. If an
 				 * unsupported format turns out to be
 				 * common enough to implement, it is a simple
 				 * matter to add it
 				 */
2250ea69
 				if(aText) {
 					if(mainMessage && (mainMessage != messageIn))
 						messageDestroy(mainMessage);
b151ef55
 					mainMessage = NULL;
2250ea69
 				}
b151ef55
 
 				cli_dbgmsg("Mixed message with %d parts\n", multiparts);
 				for(i = 0; i < multiparts; i++) {
 					bool addAttachment = FALSE;
 					bool addToText = FALSE;
 					const char *dtype;
565c449d
 #ifndef	SAVE_TO_DISC
f54a8635
 					message *body;
d32343c3
 #endif
b151ef55
 
 					aMessage = messages[i];
 
74c6f514
 					if(aMessage == NULL)
 						continue;
b151ef55
 
 					cli_dbgmsg("Mixed message part %d is of type %d\n",
 						i, messageGetMimeType(aMessage));
 
 					switch(messageGetMimeType(aMessage)) {
 					case APPLICATION:
c6259ac5
 						addAttachment = TRUE;
b151ef55
 						break;
 					case NOMIME:
c29ebe66
 						cli_dbgmsg("No mime headers found in multipart part %d\n", i);
7b8fb055
 						if(mainMessage) {
39b5a552
 							if(uuencodeBegin(aMessage)) {
7b8fb055
 								cli_dbgmsg("Found uuencoded message in multipart/mixed mainMessage\n");
 								messageSetEncoding(mainMessage, "x-uuencode");
1e06e1ab
 								fb = messageToFileblob(mainMessage, dir);
7b8fb055
 
1e06e1ab
 								if(fb)
 									fileblobDestroy(fb);
7b8fb055
 							}
 							if(mainMessage != messageIn)
 								messageDestroy(mainMessage);
 							mainMessage = NULL;
30fb8a0b
 						} else if(aMessage) {
39b5a552
 							if(uuencodeBegin(aMessage)) {
30fb8a0b
 								cli_dbgmsg("Found uuencoded message in multipart/mixed non mime part\n");
 								messageSetEncoding(aMessage, "x-uuencode");
 								fb = messageToFileblob(aMessage, dir);
 
 								if(fb)
 									fileblobDestroy(fb);
 								assert(aMessage == messages[i]);
 								messageReset(messages[i]);
39b5a552
 							} else if(binhexBegin(aMessage)) {
 								cli_dbgmsg("Found binhex message in multipart/mixed non mime part\n");
 								messageSetEncoding(aMessage, "x-binhex");
 								fb = messageToFileblob(aMessage, dir);
 
 								if(fb)
 									fileblobDestroy(fb);
 								assert(aMessage == messages[i]);
 								messageReset(messages[i]);
30fb8a0b
 							}
7b8fb055
 						}
b151ef55
 						addToText = TRUE;
 						if(messageGetBody(aMessage) == NULL)
 							/*
 							 * No plain text version
 							 */
de617e3e
 							messageAddStr(aMessage, "No plain text alternative");
b151ef55
 						assert(messageGetBody(aMessage) != NULL);
 						break;
 					case TEXT:
ef3cf57d
 						dtype = messageGetDispositionType(aMessage);
852e3ce4
 						cli_dbgmsg("Mixed message text part disposition \"%s\"\n",
 							dtype);
b151ef55
 						if(strcasecmp(dtype, "attachment") == 0)
 							addAttachment = TRUE;
 						else if((*dtype == '\0') || (strcasecmp(dtype, "inline") == 0)) {
2250ea69
 							if(mainMessage && (mainMessage != messageIn))
 								messageDestroy(mainMessage);
b151ef55
 							mainMessage = NULL;
ef3cf57d
 							cptr = messageGetMimeSubtype(aMessage);
e6b25cd3
 							cli_dbgmsg("Mime subtype \"%s\"\n", cptr);
ef3cf57d
 							if(uuencodeBegin(aMessage)) {
852e3ce4
 								cli_dbgmsg("Found uuencoded message in multipart/mixed text portion\n");
 								messageSetEncoding(aMessage, "x-uuencode");
 								addAttachment = TRUE;
c9ae17be
 							} else if(tableFind(subtypeTable, cptr) == PLAIN) {
7584963d
 								char *filename;
852e3ce4
 								/*
 								 * Strictly speaking
c9ae17be
 								 * a text/plain part is
852e3ce4
 								 * not an attachment. We
 								 * pretend it is so that
 								 * we can decode and
 								 * scan it
 								 */
7584963d
 								filename = (char *)messageFindArgument(aMessage, "filename");
 								if(filename == NULL)
 									filename = (char *)messageFindArgument(aMessage, "name");
 
 								if(filename == NULL) {
 									cli_dbgmsg("Adding part to main message\n");
 									addToText = TRUE;
 								} else {
 									cli_dbgmsg("Treating %s as attachment\n",
 										filename);
 									free(filename);
 									addAttachment = TRUE;
 								}
852e3ce4
 							} else {
06d4e856
 								if(options&CL_SCAN_MAILURL)
c9ae17be
 									if(tableFind(subtypeTable, cptr) == HTML)
 										checkURLs(aMessage, dir);
b151ef55
 								messageAddArgument(aMessage, "filename=textportion");
 								addAttachment = TRUE;
 							}
 						} else {
b4cb4486
 							cli_dbgmsg("Text type %s is not supported\n", dtype);
b151ef55
 							continue;
 						}
 						break;
 					case MESSAGE:
2e0f78a6
 						/* Content-Type: message/rfc822 */
b151ef55
 						cli_dbgmsg("Found message inside multipart\n");
ffd59a3e
 						if(encodingLine(aMessage) == NULL) {
 							assert(aMessage == messages[i]);
 							messageDestroy(messages[i]);
 							messages[i] = NULL;
af852ae0
 							continue;
ffd59a3e
 						}
de617e3e
 						messageAddStrAtTop(aMessage,
0856891e
 							"Received: by clamd (message/rfc822)");
4465fb04
 #ifdef	SAVE_TO_DISC
 						/*
 						 * Save this embedded message
 						 * to a temporary file
 						 */
 						saveTextPart(aMessage, dir);
 						assert(aMessage == messages[i]);
 						messageDestroy(messages[i]);
 						messages[i] = NULL;
 #else
 						/*
 						 * Scan in memory, faster but
 						 * is open to DoS attacks when
 						 * many nested levels are
 						 * involved.
 						 */
735377bc
 						body = parseEmailHeaders(aMessage, rfc821Table, TRUE);
bad123c6
 						/*
 						 * We've fininished with the
 						 * original copy of the message,
 						 * so throw that away and
 						 * deal with the encapsulated
 						 * message as a message.
 						 * This can save a lot of memory
 						 */
 						assert(aMessage == messages[i]);
 						messageDestroy(messages[i]);
 						messages[i] = NULL;
f54a8635
 						if(body) {
565c449d
 							rc = parseEmailBody(body, NULL, dir, rfc821Table, subtypeTable, options);
f54a8635
 							messageDestroy(body);
 						}
4465fb04
 #endif
b151ef55
 						continue;
 					case MULTIPART:
 						/*
 						 * It's a multi part within a multi part
 						 * Run the message parser on this bit, it won't
 						 * be an attachment
 						 */
 						cli_dbgmsg("Found multipart inside multipart\n");
d32343c3
 						if(aMessage) {
 							/*
 							 * The headers were parsed when reading in the
 							 * whole multipart section
 							 */
565c449d
 							rc = parseEmailBody(aMessage, aText, dir, rfc821Table, subtypeTable, options);
d32343c3
 							cli_dbgmsg("Finished recursion\n");
 							assert(aMessage == messages[i]);
 							messageDestroy(messages[i]);
 							messages[i] = NULL;
f54a8635
 						} else {
565c449d
 							rc = parseEmailBody(NULL, NULL, dir, rfc821Table, subtypeTable, options);
2250ea69
 							if(mainMessage && (mainMessage != messageIn))
 								messageDestroy(mainMessage);
f54a8635
 							mainMessage = NULL;
 						}
b151ef55
 						continue;
 					case AUDIO:
 					case IMAGE:
c7256385
 					case VIDEO:
b151ef55
 						addAttachment = TRUE;
 						break;
 					default:
c7256385
 						cli_warnmsg("Only text and application attachments are supported, type = %d\n",
b151ef55
 							messageGetMimeType(aMessage));
 						continue;
 					}
 
 					/*
 					 * It must be either text or
 					 * an attachment. It can't be both
 					 */
 					assert(addToText || addAttachment);
 					assert(!(addToText && addAttachment));
 
c29ebe66
 					if(addToText) {
 						cli_dbgmsg("Adding to non mime-part\n");
b151ef55
 						aText = textAdd(aText, messageGetBody(aMessage));
c29ebe66
 					} else {
565c449d
 						fb = messageToFileblob(aMessage, dir);
b151ef55
 
1e06e1ab
 						if(fb)
 							fileblobDestroy(fb);
b151ef55
 					}
6638be41
 					assert(aMessage == messages[i]);
 					messageDestroy(messages[i]);
 					messages[i] = NULL;
b151ef55
 				}
 
565c449d
 				/* rc = parseEmailBody(NULL, NULL, dir, rfc821Table, subtypeTable, options); */
b151ef55
 				break;
 			case SIGNED:
 			case PARALLEL:
 				/*
 				 * If we're here it could be because we have a
 				 * multipart/mixed message, consisting of a
 				 * message followed by an attachment. That
 				 * message itself is a multipart/alternative
 				 * message and we need to dig out the plain
 				 * text part of that alternative
 				 */
0bcad2b1
 				htmltextPart = getTextPart(messages, multiparts);
 				if(htmltextPart == -1)
 					htmltextPart = 0;
b151ef55
 
565c449d
 				rc = parseEmailBody(messages[htmltextPart], aText, dir, rfc821Table, subtypeTable, options);
b151ef55
 				break;
b62a19da
 			case ENCRYPTED:
 				rc = 0;
cbc2eaa9
 				protocol = (char *)messageFindArgument(mainMessage, "protocol");
b62a19da
 				if(protocol) {
 					if(strcasecmp(protocol, "application/pgp-encrypted") == 0) {
 						/* RFC2015 */
 						cli_warnmsg("PGP encoded attachment not scanned\n");
 						rc = 2;
 					} else
 						cli_warnmsg("Unknown encryption protocol '%s' - report to bugs@clamav.net\n");
 					free(protocol);
 				} else
 					cli_warnmsg("Encryption method missing protocol name - report to bugs@clamav.net\n");
 
 				break;
b151ef55
 			default:
e279f3ea
 				assert(0);
b151ef55
 			}
 
2250ea69
 			if(mainMessage && (mainMessage != messageIn))
 				messageDestroy(mainMessage);
 
c29ebe66
 			if(aText && (textIn == NULL)) {
 				if((fb = fileblobCreate()) != NULL) {
 					cli_dbgmsg("Save non mime part\n");
 					fileblobSetFilename(fb, dir, "textpart");
0856891e
 					fileblobAddData(fb, "Received: by clamd (textpart)\n", 30);
c29ebe66
 
 					fb = textToFileblob(aText, fb);
 
 					fileblobDestroy(fb);
 				}
c6259ac5
 				textDestroy(aText);
c29ebe66
 			}
c6259ac5
 
0856891e
 			for(i = 0; i < multiparts; i++)
 				if(messages[i])
 					messageDestroy(messages[i]);
 
6613d595
 			if(messages)
 				free(messages);
 
b151ef55
 			return rc;
 
 		case MESSAGE:
 			/*
 			 * Check for forbidden encodings
 			 */
 			switch(messageGetEncoding(mainMessage)) {
 				case NOENCODING:
 				case EIGHTBIT:
 				case BINARY:
 					break;
 				default:
c6259ac5
 					cli_warnmsg("MIME type 'message' cannot be decoded\n");
b151ef55
 					break;
 			}
9a7398ee
 			rc = 0;
c6259ac5
 			if((strcasecmp(mimeSubtype, "rfc822") == 0) ||
 			   (strcasecmp(mimeSubtype, "delivery-status") == 0)) {
de617e3e
 				message *m = parseEmailHeaders(mainMessage, rfc821Table);
bad123c6
 				if(m) {
 					cli_dbgmsg("Decode rfc822");
 
4465fb04
 					if(mainMessage && (mainMessage != messageIn)) {
 						messageDestroy(mainMessage);
 						mainMessage = NULL;
74c6f514
 					} else
 						messageReset(mainMessage);
bad123c6
 					if(messageGetBody(m))
565c449d
 						rc = parseEmailBody(m, NULL, dir, rfc821Table, subtypeTable, options);
bad123c6
 
 					messageDestroy(m);
 				}
b151ef55
 				break;
f8c25c7a
 			} else if(strcasecmp(mimeSubtype, "disposition-notification") == 0) {
d6e30cce
 				/* RFC 2298 - handle like a normal email */
f8c25c7a
 				rc = 1;
d6e30cce
 				break;
f8c25c7a
 			} else if(strcasecmp(mimeSubtype, "partial") == 0) {
9a7398ee
 #ifdef	PARTIAL_DIR
 				/* RFC1341 message split over many emails */
 				if(rfc1341(mainMessage, dir) >= 0)
 					rc = 1;
 #else
e94471f4
 				cli_warnmsg("Partial message received from MUA/MTA - message cannot be scanned\n");
9a7398ee
 				rc = 0;
 #endif
 			} else if(strcasecmp(mimeSubtype, "external-body") == 0)
e94471f4
 				/* TODO */
b151ef55
 				cli_warnmsg("Attempt to send Content-type message/external-body trapped");
bf8ea488
 			else
e94471f4
 				cli_warnmsg("Unsupported message format `%s' - please report to bugs@clamav.net\n", mimeSubtype);
b151ef55
 
9a7398ee
 
2250ea69
 			if(mainMessage && (mainMessage != messageIn))
 				messageDestroy(mainMessage);
6613d595
 			if(messages)
 				free(messages);
9a7398ee
 			return rc;
b151ef55
 
 		case APPLICATION:
0bcad2b1
 			cptr = messageGetMimeSubtype(mainMessage);
 
04421a14
 			/*if((strcasecmp(cptr, "octet-stream") == 0) ||
 			   (strcasecmp(cptr, "x-msdownload") == 0)) {*/
 			{
565c449d
 				fb = messageToFileblob(mainMessage, dir);
b151ef55
 
1e06e1ab
 				if(fb) {
 					cli_dbgmsg("Saving main message as attachment\n");
 					fileblobDestroy(fb);
a446de17
 					messageClearMarkers(mainMessage);
b151ef55
 				}
04421a14
 			} /*else
 				cli_warnmsg("Discarded application not sent as attachment\n");*/
b151ef55
 			break;
 
 		case AUDIO:
 		case VIDEO:
 		case IMAGE:
 			break;
 
 		default:
 			cli_warnmsg("Message received with unknown mime encoding");
 			break;
 		}
 	}
 
d32343c3
 	if(aText && (textIn == NULL)) {
0856891e
 		cli_dbgmsg("Non mime part not scanned - if you believe this file contains a virus report to bugs@clamav.net\n");
 		/*if((fb = fileblobCreate()) != NULL) {
 			cli_dbgmsg("Save non mime part\n");
 			fileblobSetFilename(fb, dir, "textpart");
 			fileblobAddData(fb, "Received: by clamd (textpart)\n", 30);
 
 			fb = textToFileblob(aText, fb);
 
 			fileblobDestroy(fb);
 		}*/
d32343c3
 		textDestroy(aText);
 		aText = NULL;
 	}
 
565c449d
 	/*
 	 * No attachments - scan the text portions, often files
 	 * are hidden in HTML code
 	 */
 	cli_dbgmsg("%d multiparts found\n", multiparts);
 	for(i = 0; i < multiparts; i++) {
 		fb = messageToFileblob(messages[i], dir);
b151ef55
 
565c449d
 		if(fb) {
0e3b08fc
 			cli_dbgmsg("Saving multipart %d\n", i);
0bcad2b1
 
565c449d
 			fileblobDestroy(fb);
 		}
 	}
 
 	if(mainMessage) {
b151ef55
 		/*
565c449d
 		 * Look for uu-encoded main file
b151ef55
 		 */
565c449d
 		const text *t_line;
 
 		if((t_line = uuencodeBegin(mainMessage)) != NULL) {
 			cli_dbgmsg("Found uuencoded file\n");
0bcad2b1
 
565c449d
 			/*
 			 * Main part contains uuencoded section
 			 */
 			messageSetEncoding(mainMessage, "x-uuencode");
0bcad2b1
 
565c449d
 			if((fb = messageToFileblob(mainMessage, dir)) != NULL) {
 				if((cptr = fileblobGetFilename(fb)) != NULL)
 					cli_dbgmsg("Found uuencoded message %s\n", cptr);
1e06e1ab
 				fileblobDestroy(fb);
 			}
0856891e
 			rc = 1;
565c449d
 		} else if((encodingLine(mainMessage) != NULL) &&
f1c33aa0
 			  ((t_line = bounceBegin(mainMessage)) != NULL)) {
0856891e
 			const text *t, *start;
0bcad2b1
 			/*
565c449d
 			 * Attempt to save the original (unbounced)
 			 * message - clamscan will find that in the
 			 * directory and call us again (with any luck)
8386482b
 			 * having found an e-mail message to handle.
565c449d
 			 *
 			 * This finds a lot of false positives, the
8386482b
 			 * search that a content type is in the
565c449d
 			 * bounce (i.e. it's after the bounce header)
8386482b
 			 * helps a bit.
 			 *
 			 * messageAddLine
565c449d
 			 * optimisation could help here, but needs
 			 * careful thought, do it with line numbers
 			 * would be best, since the current method in
 			 * messageAddLine of checking encoding first
 			 * must remain otherwise non bounce messages
 			 * won't be scanned
0bcad2b1
 			 */
0856891e
 			for(t = start = t_line; t; t = t->t_next) {
 				char cmd[LINE_LENGTH + 1];
565c449d
 				const char *txt = lineGetData(t->t_line);
 
0856891e
 				if(txt == NULL)
 					continue;
 				if(cli_strtokbuf(txt, 0, ":", cmd) == NULL)
 					continue;
 
 				switch(tableFind(rfc821Table, cmd)) {
 					case CONTENT_TRANSFER_ENCODING:
 						if((strstr(txt, "7bit") == NULL) &&
 						   (strstr(txt, "8bit") == NULL))
 							break;
 						continue;
 					case CONTENT_DISPOSITION:
 						break;
 					case CONTENT_TYPE:
 						if(strstr(txt, "text/plain") != NULL)
 							t = NULL;
 						break;
 					default:
 						if(strcasecmp(cmd, "From") == 0)
 							start = t_line;
 						else if(strcasecmp(cmd, "Received") == 0)
 							start = t_line;
 						continue;
8386482b
 				}
0856891e
 				break;
565c449d
 			}
 			if(t && ((fb = fileblobCreate()) != NULL)) {
 				cli_dbgmsg("Found a bounce message\n");
 				fileblobSetFilename(fb, dir, "bounce");
0856891e
 				fb = textToFileblob(start, fb);
565c449d
 				fileblobDestroy(fb);
0856891e
 				rc = 1;
e745ac7e
 			} else
 				cli_dbgmsg("Not found a bounce message\n");
565c449d
 		} else {
 			bool saveIt;
0bcad2b1
 
565c449d
 			cli_dbgmsg("Not found uuencoded file\n");
2227f20e
 
565c449d
 			if(messageGetMimeType(mainMessage) == MESSAGE)
15c8cace
 				/*
565c449d
 				 * Quick peek, if the encapsulated
 				 * message has no
 				 * content encoding statement don't
 				 * bother saving to scan, it's safe
15c8cace
 				 */
565c449d
 				saveIt = (encodingLine(mainMessage) != NULL);
 			else if((t_line = encodingLine(mainMessage)) != NULL) {
92915cee
 				/*
565c449d
 				 * Some bounces include the message
 				 * body without the headers.
 				 * Unfortunately this generates a
 				 * lot of false positives that a bounce
 				 * has been found when it hasn't.
92915cee
 				 */
565c449d
 				if((fb = fileblobCreate()) != NULL) {
 					cli_dbgmsg("Found a bounce message with no header\n");
1e06e1ab
 					fileblobSetFilename(fb, dir, "bounce");
0856891e
 					fileblobAddData(fb, "Received: by clamd (bounce)\n", 28);
5a01973c
 
565c449d
 					fb = textToFileblob(t_line, fb);
b759d5eb
 
565c449d
 					fileblobDestroy(fb);
b759d5eb
 				}
565c449d
 				saveIt = FALSE;
74c6f514
 			} else if(multiparts == 0)
565c449d
 				/*
 				 * Save the entire text portion,
 				 * since it it may be an HTML file with
 				 * a JavaScript virus
 				 */
 				saveIt = TRUE;
74c6f514
 			else
 				saveIt = FALSE;
b151ef55
 
565c449d
 			if(saveIt) {
 				cli_dbgmsg("Saving text part to scan\n");
 				/*
 				 * TODO: May be better to save aText
 				 */
 				saveTextPart(mainMessage, dir);
74c6f514
 				if(mainMessage != messageIn) {
 					messageDestroy(mainMessage);
 					mainMessage = NULL;
 				} else
 					messageReset(mainMessage);
 				rc = 1;
c6259ac5
 			}
b151ef55
 		}
565c449d
 	} else
 		rc = (multiparts) ? 1 : 2;	/* anything saved? */
b151ef55
 
2250ea69
 	if(mainMessage && (mainMessage != messageIn))
 		messageDestroy(mainMessage);
 
6613d595
 	if(messages)
 		free(messages);
 
68be129f
 	cli_dbgmsg("parseEmailBody() returning %d\n", rc);
b151ef55
 
68be129f
 	return rc;
b151ef55
 }
 
 /*
  * Is the current line the start of a new section?
  *
  * New sections start with --boundary
  */
 static int
 boundaryStart(const char *line, const char *boundary)
 {
ad642304
 	char *ptr, *p;
2ed1bc5a
 	int rc;
ad642304
 
80a8c7d8
 	if(line == NULL)
 		return 0;	/* empty line */
 
c29ebe66
 	/*cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary);*/
ad642304
 
 	p = ptr = rfc822comments(line);
 	if(ptr == NULL)
aa479b7d
 		ptr = (char *)line;
ad642304
 
 	if(*ptr++ != '-') {
 		if(p)
 			free(p);
80a8c7d8
 		return 0;
ad642304
 	}
80a8c7d8
 
b151ef55
 	/*
80a8c7d8
 	 * Gibe.B3 is broken, it has:
b151ef55
 	 *	boundary="---- =_NextPart_000_01C31177.9DC7C000"
 	 * but it's boundaries look like
 	 *	------ =_NextPart_000_01C31177.9DC7C000
80a8c7d8
 	 * notice the one too few '-'.
 	 * Presumably this is a deliberate exploitation of a bug in some mail
 	 * clients.
 	 *
 	 * The trouble is that this creates a lot of false positives for
 	 * boundary conditions, if we're too lax about matches. We do our level
 	 * best to avoid these false positives. For example if we have
 	 * boundary="1" we want to ensure that we don't break out of every line
 	 * that has -1 in it instead of starting --1. This needs some more work.
b151ef55
 	 */
2ed1bc5a
 	if(strstr(ptr, boundary) != NULL)
 		rc = 1;
 	else if(*ptr++ != '-')
 		rc = 0;
 	else
 		rc = (strcasecmp(line, boundary) == 0);
 
ad642304
 	if(p)
 		free(p);
2ed1bc5a
 
 	if(rc == 1)
 		cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line);
 
 	return rc;
b151ef55
 }
 
 /*
  * Is the current line the end?
  *
  * The message ends with with --boundary--
  */
 static int
 endOfMessage(const char *line, const char *boundary)
 {
 	size_t len;
 
98685ac1
 	if(line == NULL)
 		return 0;
c29ebe66
 	/*cli_dbgmsg("endOfMessage: line = '%s' boundary = '%s'\n", line, boundary);*/
b151ef55
 	if(*line++ != '-')
 		return 0;
 	if(*line++ != '-')
 		return 0;
 	len = strlen(boundary);
c6259ac5
 	if(strncasecmp(line, boundary, len) != 0)
 		return 0;
ef3cf57d
 	/*
 	 * Use < rather than == because some broken mails have white
 	 * space after the boundary
 	 */
39b5a552
 	if(strlen(line) < (len + 2))
b151ef55
 		return 0;
 	line = &line[len];
 	if(*line++ != '-')
 		return 0;
 	return *line == '-';
 }
 
 /*
  * Initialise the various lookup tables
  */
 static int
 initialiseTables(table_t **rfc821Table, table_t **subtypeTable)
 {
 	const struct tableinit *tableinit;
 
 	/*
 	 * Initialise the various look up tables
 	 */
 	*rfc821Table = tableCreate();
 	assert(*rfc821Table != NULL);
 
 	for(tableinit = rfc821headers; tableinit->key; tableinit++)
7b8fb055
 		if(tableInsert(*rfc821Table, tableinit->key, tableinit->value) < 0) {
 			tableDestroy(*rfc821Table);
4d9c0ca8
 			*rfc821Table = NULL;
b151ef55
 			return -1;
7b8fb055
 		}
b151ef55
 
 	*subtypeTable = tableCreate();
 	assert(*subtypeTable != NULL);
 
 	for(tableinit = mimeSubtypes; tableinit->key; tableinit++)
 		if(tableInsert(*subtypeTable, tableinit->key, tableinit->value) < 0) {
 			tableDestroy(*rfc821Table);
7b8fb055
 			tableDestroy(*subtypeTable);
4d9c0ca8
 			*rfc821Table = NULL;
 			*subtypeTable = NULL;
b151ef55
 			return -1;
 		}
 
 	return 0;
 }
 
 /*
0bcad2b1
  * If there's a HTML text version use that, otherwise
b151ef55
  * use the first text part, otherwise just use the
0bcad2b1
  * first one around. HTML text is most likely to include
  * a scripting worm
b151ef55
  *
  * If we can't find one, return -1
  */
 static int
 getTextPart(message *const messages[], size_t size)
 {
 	size_t i;
b4cb4486
 	int textpart = -1;
b151ef55
 
 	for(i = 0; i < size; i++) {
 		assert(messages[i] != NULL);
b4cb4486
 		if(messageGetMimeType(messages[i]) == TEXT) {
 			if(strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0)
 				return (int)i;
 			textpart = (int)i;
 		}
b151ef55
 	}
b4cb4486
 	return textpart;
b151ef55
 }
 
 /*
  * strip -
4d9c0ca8
  *	Remove the trailing spaces from a buffer. Don't call this directly,
  * always call strstrip() which is a wrapper to this routine to be used with
  * NUL terminated strings. This code looks a bit strange because of it's
  * heritage from code that worked on strings that weren't necessarily NUL
  * terminated.
  * TODO: rewrite for clamAV
  *
b151ef55
  * Returns it's new length (a la strlen)
  *
  * len must be int not size_t because of the >= 0 test, it is sizeof(buf)
  *	not strlen(buf)
  */
 static size_t
 strip(char *buf, int len)
 {
 	register char *ptr;
 	register size_t i;
 
 	if((buf == NULL) || (len <= 0))
4d9c0ca8
 		return 0;
b151ef55
 
 	i = strlen(buf);
 	if(len > (int)(i + 1))
4d9c0ca8
 		return i;
b151ef55
 	ptr = &buf[--len];
 
 #if	defined(UNIX) || defined(C_LINUX) || defined(C_DARWIN)	/* watch - it may be in shared text area */
 	do
 		if(*ptr)
 			*ptr = '\0';
6b93ea0c
 	while((--len >= 0) && (!isgraph(*--ptr)) && (*ptr != '\n') && (*ptr != '\r'));
b151ef55
 #else	/* more characters can be displayed on DOS */
 	do
 #ifndef	REAL_MODE_DOS
 		if(*ptr)	/* C8.0 puts into a text area */
 #endif
 			*ptr = '\0';
 	while((--len >= 0) && ((*--ptr == '\0') || (isspace((int)*ptr))));
 #endif
 	return((size_t)(len + 1));
 }
 
 /*
  * strstrip:
  *	Strip a given string
  */
3db105a2
 size_t
b151ef55
 strstrip(char *s)
 {
 	if(s == (char *)NULL)
 		return(0);
98685ac1
 
b151ef55
 	return(strip(s, strlen(s) + 1));
 }
 
 /*
  * When parsing a MIME header see if this spans more than one line. A
  * semi-colon at the end of the line indicates that the MIME information
  * is continued on the next line.
  *
  * Some clients are broken and put white space after the ;
  */
 static bool
 continuationMarker(const char *line)
 {
 	const char *ptr;
 
98685ac1
 	if(line == NULL)
 		return FALSE;
b151ef55
 
 #ifdef	CL_DEBUG
 	cli_dbgmsg("continuationMarker(%s)\n", line);
 #endif
 
 	if(strlen(line) == 0)
 		return FALSE;
 
 	ptr = strchr(line, '\0');
 
 	assert(ptr != NULL);
 
752c34b9
 	while(ptr > line)
b151ef55
 		switch(*--ptr) {
 			case '\n':
 			case '\r':
 			case ' ':
 			case '\t':
 				continue;
 			case ';':
 				return TRUE;
 			default:
 				return FALSE;
 		}
 
 	return FALSE;
 }
 
 static int
 parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg)
 {
 #ifdef CL_THREAD_SAFE
 	char *strptr;
 #endif
56d8328d
 	char *copy, *ptr;
 	int commandNumber;
f2b068fb
 
b151ef55
 	cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg);
56d8328d
 
90905415
 	ptr = rfc822comments(cmd);
e9bdeb72
 	if(ptr) {
 		commandNumber = tableFind(rfc821Table, ptr);
 		free(ptr);
 	} else
 		commandNumber = tableFind(rfc821Table, cmd);
56d8328d
 
90905415
 	copy = rfc822comments(arg);
e9bdeb72
 	if(copy == NULL)
 		copy = strdup(arg);
56d8328d
 	if(copy == NULL)
 		return -1;
b151ef55
 
ad3d1172
 	ptr = copy;
 
56d8328d
 	switch(commandNumber) {
b151ef55
 		case CONTENT_TYPE:
 			/*
 			 * Fix for non RFC1521 compliant mailers
 			 * that send content-type: Text instead
 			 * of content-type: Text/Plain, or
 			 * just simply "Content-Type:"
 			 */
a8c7e017
 			if(arg == NULL)
f1c33aa0
 				/*
 				 * According to section 4 of RFC1521:
 				 * "Note also that a subtype specification is
 				 * MANDATORY. There are no default subtypes"
 				 *
21cd233d
 				 * We have to break this and make an assumption
f1c33aa0
 				 * for the subtype because virus writers and
 				 * email client writers don't get it right
 				 */
 				 cli_warnmsg("Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii\n");
b151ef55
 			else if(strchr(copy, '/') == NULL)
f1c33aa0
 				/*
 				 * Empty field, such as
 				 *	Content-Type:
 				 * which I believe is illegal according to
 				 * RFC1521
 				 */
b4cb4486
 				cli_dbgmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", copy);
b151ef55
 			else {
137740e1
 				int i;
6fd6d771
 				char *mimeArgs;	/* RHS of the ; */
 
b151ef55
 				/*
 				 * Some clients are broken and
 				 * put white space after the ;
 				 */
7e572372
 				if(*arg == '/') {
 					cli_warnmsg("Content-type '/' received, assuming application/octet-stream\n");
 					messageSetMimeType(m, "application");
 					messageSetMimeSubtype(m, "octet-stream");
 				} else {
 					/*
de509b8e
 					 * The content type could be in quotes:
 					 *	Content-Type: "multipart/mixed"
 					 * FIXME: this is a hack in that ignores
 					 *	the quotes, it doesn't handle
 					 *	them properly
7e572372
 					 */
e9bdeb72
 					while(isspace(*copy))
 						copy++;
 					if(copy[0] == '\"')
 						copy++;
de509b8e
 
e9bdeb72
 					if(copy[0] != '/') {
 						char *s;
 						char *mimeType;	/* LHS of the ; */
 
 						s = mimeType = cli_strtok(copy, 0, ";");
de509b8e
 						/*
6fd6d771
 						 * Handle
 						 * Content-Type: foo/bar multipart/mixed
 						 * and
 						 * Content-Type: multipart/mixed foo/bar
de509b8e
 						 */
6fd6d771
 						for(;;) {
897fd9c7
 #ifdef	CL_THREAD_SAFE
6fd6d771
 							int set = messageSetMimeType(m, strtok_r(s, "/", &strptr));
897fd9c7
 #else
 							int set = messageSetMimeType(m, strtok(s, "/"));
 #endif
6fd6d771
 
 							/*
 							 * Stephen White <stephen@earth.li>
 							 * Some clients put space after
 							 * the mime type but before
 							 * the ;
 							 */
897fd9c7
 #ifdef	CL_THREAD_SAFE
6fd6d771
 							s = strtok_r(NULL, ";", &strptr);
897fd9c7
 #else
 							s = strtok(NULL, ";");
 #endif
6fd6d771
 							if(s == NULL)
 								break;
 							if(set) {
e9bdeb72
 								size_t len = strstrip(s) - 1;
6fd6d771
 								if(s[len] == '\"') {
 									s[len] = '\0';
 									len = strstrip(s);
 								}
 								if(len) {
e9bdeb72
 									if(strchr(s, ' ')) {
 										char *t = cli_strtok(s, 0, " ");
6fd6d771
 
e9bdeb72
 										messageSetMimeSubtype(m, t);
 										free(t);
 									} else
 										messageSetMimeSubtype(m, s);
6fd6d771
 								}
de509b8e
 							}
6fd6d771
 
 							while(*s && !isspace(*s))
 								s++;
 							if(*s++ == '\0')
 								break;
 							if(*s == '\0')
 								break;
de509b8e
 						}
e9bdeb72
 						free(mimeType);
 					}
7e572372
 				}
b151ef55
 
 				/*
0704dad8
 				 * Add in all rest of the the arguments.
 				 * e.g. if the header is this:
 				 * Content-Type:', arg='multipart/mixed; boundary=foo
 				 * we find the boundary argument set it
b151ef55
 				 */
137740e1
 				i = 1;
 				while((mimeArgs = cli_strtok(copy, i++, ";")) != NULL) {
 					cli_dbgmsg("mimeArgs = '%s'\n", mimeArgs);
 
6fd6d771
 					messageAddArguments(m, mimeArgs);
 					free(mimeArgs);
 				}
b151ef55
 			}
 			break;
 		case CONTENT_TRANSFER_ENCODING:
 			messageSetEncoding(m, copy);
 			break;
 		case CONTENT_DISPOSITION:
897fd9c7
 #ifdef	CL_THREAD_SAFE
a980b067
 			arg = strtok_r(copy, ";", &strptr);
 			if(arg && *arg) {
 				messageSetDispositionType(m, arg);
 				messageAddArgument(m, strtok_r(NULL, "\r\n", &strptr));
 			}
897fd9c7
 #else
 			arg = strtok(copy, ";");
 			if(arg && *arg) {
 				messageSetDispositionType(m, arg);
 				messageAddArgument(m, strtok(NULL, "\r\n"));
 			}
 #endif
b151ef55
 	}
c6259ac5
 	free(ptr);
b151ef55
 
f2b068fb
 	return 0;
b151ef55
 }
 
68be129f
 /*
5a01973c
  * Save the text portion of the message
  */
 static void
 saveTextPart(message *m, const char *dir)
 {
1e06e1ab
 	fileblob *fb;
5a01973c
 
 	messageAddArgument(m, "filename=textportion");
1e06e1ab
 	if((fb = messageToFileblob(m, dir)) != NULL) {
5a01973c
 		/*
 		 * Save main part to scan that
 		 */
37819555
 		cli_dbgmsg("Saving main message\n");
5a01973c
 
1e06e1ab
 		fileblobDestroy(fb);
5a01973c
 	}
 }
 
90905415
 /*
e9bdeb72
  * Handle RFC822 comments in headers.
  * Returns a buffer without the comments or NULL on error or if the input
  * has no comments. The caller must free the returned buffer
  * See secion 3.4.3 of RFC822
90905415
  * TODO: handle comments that go on to more than one line
  */
 static char *
 rfc822comments(const char *in)
 {
 	const char *iptr;
 	char *out, *optr;
 	int backslash, inquote, commentlevel;
 
 	if(in == NULL)
e9bdeb72
 		return NULL;
90905415
 
 	if(strchr(in, '(') == NULL)
e9bdeb72
 		return NULL;
90905415
 
 	out = cli_malloc(strlen(in) + 1);
 	if(out == NULL)
 		return NULL;
 
 	backslash = commentlevel = inquote = 0;
 	optr = out;
 
 	cli_dbgmsg("rfc822comments: contains a comment\n");
 
 	for(iptr = in; *iptr; iptr++)
 		if(backslash) {
0a94ffaf
 			if(commentlevel == 0)
 				*optr++ = *iptr;
90905415
 			backslash = 0;
 		} else switch(*iptr) {
 			case '\\':
 				backslash = 1;
 				break;
 			case '\"':
 				inquote = !inquote;
 				break;
 			case '(':
 				commentlevel++;
 				break;
 			case ')':
 				if(commentlevel > 0)
 					commentlevel--;
 				break;
 			default:
 				if(commentlevel == 0)
 					*optr++ = *iptr;
 		}
 
 	if(backslash)	/* last character was a single backslash */
 		*optr++ = '\\';
 	*optr = '\0';
 
 	strstrip(out);
 
 	cli_dbgmsg("rfc822comments '%s'=>'%s'\n", in, out);
 
 	return out;
 }
0674e2af
 
 /*
  * Handle RFC2047 encoding. Returns a malloc'd buffer that the caller must
  * free, or NULL on error
  */
 static char *
 rfc2047(const char *in)
 {
 	char *out, *pout;
 	size_t len;
 
95f98162
 	if((strstr(in, "=?") == NULL) || (strstr(in, "?=") == NULL))
0674e2af
 		return strdup(in);
 
 	cli_dbgmsg("rfc2047 '%s'\n", in);
 	out = cli_malloc(strlen(in) + 1);
 
 	if(out == NULL)
 		return NULL;
 
 	pout = out;
 
 	/* For each RFC2047 string */
 	while(*in) {
291ac47f
 		char encoding, *ptr, *enctext;
0674e2af
 		message *m;
 		blob *b;
 
 		/* Find next RFC2047 string */
 		while(*in) {
 			if((*in == '=') && (in[1] == '?')) {
 				in += 2;
 				break;
 			}
 			*pout++ = *in++;
 		}
 		/* Skip over charset, find encoding */
 		while((*in != '?') && *in)
 			in++;
 		if(*in == '\0')
 			break;
 		encoding = *++in;
 		encoding = tolower(encoding);
 
 		if((encoding != 'q') && (encoding != 'b')) {
c3400886
 			cli_warnmsg("Unsupported RFC2047 encoding type '%c' - report to bugs@clamav.net\n", encoding);
 			free(out);
 			out = NULL;
0674e2af
 			break;
 		}
 		/* Skip to encoded text */
 		if(*++in != '?')
 			break;
 		if(*++in == '\0')
 			break;
 
291ac47f
 		enctext = strdup(in);
 		if(enctext == NULL) {
 			free(out);
 			out = NULL;
 			break;
 		}
0674e2af
 		in = strstr(in, "?=");
291ac47f
 		if(in == NULL) {
 			free(enctext);
0674e2af
 			break;
291ac47f
 		}
0674e2af
 		in += 2;
 		ptr = strstr(enctext, "?=");
 		assert(ptr != NULL);
 		*ptr = '\0';
 		/*cli_dbgmsg("Need to decode '%s' with method '%c'\n", enctext, encoding);*/
 
 		m = messageCreate();
37819555
 		if(m == NULL)
0674e2af
 			break;
 		messageAddStr(m, enctext);
291ac47f
 		free(enctext);
4d9c0ca8
 		switch(encoding) {
0674e2af
 			case 'q':
 				messageSetEncoding(m, "quoted-printable");
 				break;
 			case 'b':
 				messageSetEncoding(m, "base64");
 				break;
 		}
 		b = messageToBlob(m);
 		len = blobGetDataSize(b);
 		cli_dbgmsg("Decoded as '%*.*s'\n", len, len, blobGetData(b));
 		memcpy(pout, blobGetData(b), len);
 		blobDestroy(b);
 		messageDestroy(m);
 		if(pout[len - 1] == '\n')
 			pout += len - 1;
 		else
 			pout += len;
 
 	}
5e5a162c
 	if(out == NULL)
 		return NULL;
 
 	*pout = '\0';
0674e2af
 
5e5a162c
 	cli_dbgmsg("rfc2047 returns '%s'\n", out);
0674e2af
 	return out;
 }
 
9a7398ee
 #ifdef	PARTIAL_DIR
 /*
  * Handle partial messages
  */
 static int
 rfc1341(message *m, const char *dir)
 {
 	fileblob *fb;
3a0946f5
 	char *arg, *id, *number, *total, *oldfilename;
d85c1fad
 	const char *tmpdir;
 	char *pdir;
 
 #ifdef  CYGWIN
9180b8bb
 	if((tmpdir = getenv("TEMP")) == (char *)NULL)
 		if((tmpdir = getenv("TMP")) == (char *)NULL)
 			if((tmpdir = getenv("TMPDIR")) == (char *)NULL)
 				tmpdir = "C:\\";
d85c1fad
 #else
9180b8bb
 	if((tmpdir = getenv("TMPDIR")) == (char *)NULL)
 		if((tmpdir = getenv("TMP")) == (char *)NULL)
 			if((tmpdir = getenv("TEMP")) == (char *)NULL)
d85c1fad
 #ifdef	P_tmpdir
9180b8bb
 				tmpdir = P_tmpdir;
d85c1fad
 #else
9180b8bb
 				tmpdir = "/tmp";
d85c1fad
 #endif
 #endif
9a7398ee
 
9180b8bb
 	pdir = cli_malloc(strlen(tmpdir) + 16);
d85c1fad
 	if(pdir == NULL)
 		return -1;
9180b8bb
 
 	sprintf(pdir, "%s/clamav-partial", tmpdir);
d85c1fad
 
 	if((mkdir(pdir, 0700) < 0) && (errno != EEXIST)) {
 		cli_errmsg("Can't create the directory '%s'\n", pdir);
 		free(pdir);
9a7398ee
 		return -1;
3a0946f5
 	} else {
 		struct stat statb;
 
d85c1fad
 		if(stat(pdir, &statb) < 0) {
 			cli_errmsg("Can't stat the directory '%s'\n", pdir);
 			free(pdir);
3a0946f5
 			return -1;
 		}
 		if(statb.st_mode & 077)
 			cli_warnmsg("Insecure partial directory %s (mode 0%o)\n",
d85c1fad
 				pdir, statb.st_mode & 0777);
9a7398ee
 	}
 
 	id = (char *)messageFindArgument(m, "id");
d85c1fad
 	if(id == NULL) {
 		free(pdir);
9a7398ee
 		return -1;
d85c1fad
 	}
9a7398ee
 	number = (char *)messageFindArgument(m, "number");
 	if(number == NULL) {
 		free(id);
d85c1fad
 		free(pdir);
9a7398ee
 		return -1;
 	}
 
 	oldfilename = (char *)messageFindArgument(m, "filename");
 	if(oldfilename == NULL)
 		oldfilename = (char *)messageFindArgument(m, "name");
 
 	arg = cli_malloc(10 + strlen(id) + strlen(number));
 	sprintf(arg, "filename=%s%s", id, number);
 	messageAddArgument(m, arg);
 	free(arg);
 
 	if(oldfilename) {
 		cli_warnmsg("Must reset to %s\n", oldfilename);
 		free(oldfilename);
 	}
 
d85c1fad
 	if((fb = messageToFileblob(m, pdir)) == NULL) {
9a7398ee
 		free(id);
 		free(number);
d85c1fad
 		free(pdir);
9a7398ee
 		return -1;
 	}
 
 	fileblobDestroy(fb);
 
 	total = (char *)messageFindArgument(m, "total");
 	cli_dbgmsg("rfc1341: %s, %s of %s\n", id, number, (total) ? total : "?");
 	if(total) {
 		int n = atoi(number);
 		int t = atoi(total);
 		DIR *dd = NULL;
 
 		/*
 		 * If it's the last one - reassemble it
138b73f6
 		 * FIXME: this assumes that we receive the parts in order
9a7398ee
 		 */
d85c1fad
 		if((n == t) && ((dd = opendir(pdir)) != NULL)) {
9a7398ee
 			FILE *fout;
 			char outname[NAME_MAX + 1];
 
 			snprintf(outname, sizeof(outname) - 1, "%s/%s", dir, id);
 
 			cli_dbgmsg("outname: %s\n", outname);
 
 			fout = fopen(outname, "wb");
 			if(fout == NULL) {
138b73f6
 				cli_errmsg("Can't open '%s' for writing", outname);
9a7398ee
 				free(id);
 				free(total);
 				free(number);
 				closedir(dd);
d85c1fad
 				free(pdir);
9a7398ee
 				return -1;
 			}
 
 			for(n = 1; n <= t; n++) {
 				char filename[NAME_MAX + 1];
3a0946f5
 				const struct dirent *dent;
 #if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
a77dc192
 				union {
 					struct dirent d;
 					char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
 				} result;
3a0946f5
 #endif
9a7398ee
 
 				snprintf(filename, sizeof(filename), "%s%d", id, n);
2c7b958d
 
a77dc192
 #ifdef HAVE_READDIR_R_3
 				while((readdir_r(dd, &result.d, &dent) == 0) && dent) {
9a7398ee
 #elif defined(HAVE_READDIR_R_2)
a77dc192
 				while((dent = (struct dirent *)readdir_r(dd, &result.d))) {
2c7b958d
 #else	/*!HAVE_READDIR_R*/
9a7398ee
 				while((dent = readdir(dd))) {
 #endif
 					char fullname[NAME_MAX + 1];
 					FILE *fin;
 					char buffer[BUFSIZ];
 					int nblanks;
d85c1fad
 					extern short cli_leavetemps_flag;
9a7398ee
 
 					if(dent->d_ino == 0)
 						continue;
 
 					if(strncmp(filename, dent->d_name, strlen(filename)) != 0)
 						continue;
 
d85c1fad
 					sprintf(fullname, "%s/%s", pdir, dent->d_name);
9a7398ee
 					fin = fopen(fullname, "rb");
 					if(fin == NULL) {
138b73f6
 						cli_errmsg("Can't open '%s' for reading", fullname);
9a7398ee
 						fclose(fout);
 						unlink(outname);
 						free(id);
 						free(total);
 						free(number);
 						closedir(dd);
d85c1fad
 						free(pdir);
9a7398ee
 						return -1;
 					}
 					nblanks = 0;
 					while(fgets(buffer, sizeof(buffer), fin) != NULL)
 						/*
 						 * Ensure that trailing newlines
 						 * aren't copied
 						 */
 						if(buffer[0] == '\n') {
 							nblanks++;
 						} else {
 							if(nblanks)
 								do
 									putc('\n', fout);
 								while(--nblanks > 0);
 							fputs(buffer, fout);
 						}
 					fclose(fin);
d85c1fad
 
 					/* don't unlink if leave temps */
 					if(!cli_leavetemps_flag)
 						unlink(fullname);
9a7398ee
 					break;
 				}
 				rewinddir(dd);
 			}
 			closedir(dd);
 			fclose(fout);
 		}
 		free(number);
 	}
 	free(id);
 	free(total);
d85c1fad
 	free(pdir);
9a7398ee
 
 	return 0;
 }
 #endif
 
3eb12bae
 #ifdef	FOLLOWURLS
c5ed8336
 static void
 checkURLs(message *m, const char *dir)
 {
 	blob *b = messageToBlob(m);
 	size_t len;
6b93ea0c
 	table_t *t;
6da40aa1
 	int i, n;
314ff77b
 #if	defined(WITH_CURL) && defined(CL_THREAD_SAFE)
 	pthread_t tid[MAX_URLS];
 	struct arg args[MAX_URLS];
 #endif
6da40aa1
 	tag_arguments_t hrefs;
c5ed8336
 
 	if(b == NULL)
 		return;
 
 	len = blobGetDataSize(b);
 
e94471f4
 	if(len == 0) {
 		blobDestroy(b);
3eb12bae
 		return;
e94471f4
 	}
3eb12bae
 
6b93ea0c
 	/* TODO: make this size customisable */
 	if(len > 100*1024) {
 		cli_warnmsg("Viruses pointed to by URL not scanned in large message\n");
 		blobDestroy(b);
e94471f4
 		return;
6b93ea0c
 	}
 
e94471f4
 	blobClose(b);
6b93ea0c
 	t = tableCreate();
3a0ef2ee
 	if(t == NULL) {
 		blobDestroy(b);
 		return;
 	}
3eb12bae
 
a2d786fc
 	hrefs.count = 0;
 	hrefs.tag = hrefs.value = NULL;
6da40aa1
 
 	cli_dbgmsg("checkURLs: calling html_normalise_mem\n");
3a0ef2ee
 	if(!html_normalise_mem(blobGetData(b), len, NULL, &hrefs)) {
7d3d11d0
 		blobDestroy(b);
 		tableDestroy(t);
 		return;
3a0ef2ee
 	}
 	cli_dbgmsg("checkURLs: html_normalise_mem returned\n");
 
e745ac7e
 	/* TODO: Do we need to call remove_html_comments? */
6b93ea0c
 
6da40aa1
 	n = 0;
 
 	for(i = 0; i < hrefs.count; i++) {
a77dc192
 		const char *url = (const char *)hrefs.value[i];
6da40aa1
 
 		if(strncasecmp("http://", url, 7) == 0) {
 			char *ptr;
314ff77b
 #ifdef	WITH_CURL
 #ifndef	CL_THREAD_SAFE
 			struct arg arg;
 #endif
 
 #else	/*!WITH_CURL*/
bf6f653d
 #ifdef	CL_THREAD_SAFE
 			static pthread_mutex_t system_mutex = PTHREAD_MUTEX_INITIALIZER;
 #endif
6b93ea0c
 			struct stat statb;
 			char cmd[512];
314ff77b
 #endif	/*WITH_CURL*/
a95c894a
 			char name[NAME_MAX + 1];
6da40aa1
 
 			if(tableFind(t, url) == 1) {
 				cli_dbgmsg("URL %s already downloaded\n", url);
f2b068fb
 				continue;
 			}
de617e3e
 			if(n == MAX_URLS) {
 				cli_warnmsg("Not all URLs will be scanned\n");
 				break;
 			}
6da40aa1
 			(void)tableInsert(t, url, 1);
 			cli_dbgmsg("Downloading URL %s to be scanned\n", url);
2176c0e5
 			strncpy(name, url, sizeof(name) - 1);
39d09964
 			name[sizeof(name) - 1] = '\0';
6da40aa1
 			for(ptr = name; *ptr; ptr++)
 				if(*ptr == '/')
 					*ptr = '_';
c5ed8336
 
da812a6a
 #ifdef	WITH_CURL
314ff77b
 #ifdef	CL_THREAD_SAFE
a95c894a
 			args[n].dir = dir;
 			args[n].url = url;
314ff77b
 			args[n].filename = strdup(name);
 			pthread_create(&tid[n], NULL, getURL, &args[n]);
 #else
6da40aa1
 			arg.url = url;
314ff77b
 			arg.dir = dir;
 			arg.filename = name;
 			getURL(&arg);
 #endif
 
3fa72383
 #else
 			/*
 			 * TODO: maximum size and timeouts
 			 */
2176c0e5
 			len = sizeof(cmd) - 26 - strlen(dir) - strlen(name);
 #ifdef	CL_DEBUG
 			snprintf(cmd, sizeof(cmd) - 1, "GET -t10 %.*s >%s/%s", len, url, dir, name);
 #else
 			snprintf(cmd, sizeof(cmd) - 1, "GET -t10 %.*s >%s/%s 2>/dev/null", len, url, dir, name);
 #endif
 			cmd[sizeof(cmd) - 1] = '\0';
 
 #ifndef	WITH_CURL
 			for(ptr = cmd; *ptr; ptr++)
 				if(strchr(";&", *ptr))
 					*ptr = '_';
 #endif
 
c5ed8336
 			cli_dbgmsg("%s\n", cmd);
bf6f653d
 #ifdef	CL_THREAD_SAFE
 			pthread_mutex_lock(&system_mutex);
 #endif
c5ed8336
 			system(cmd);
bf6f653d
 #ifdef	CL_THREAD_SAFE
 			pthread_mutex_unlock(&system_mutex);
 #endif
 			snprintf(cmd, sizeof(cmd), "%s/%s", dir, name);
 			if(stat(cmd, &statb) >= 0)
 				if(statb.st_size == 0) {
6da40aa1
 					cli_warnmsg("URL %s failed to download\n", url);
bf6f653d
 					/*
 					 * Don't bother scanning an empty file
 					 */
 					(void)unlink(cmd);
 				}
3fa72383
 #endif
314ff77b
 			++n;
c5ed8336
 		}
 	}
 	blobDestroy(b);
f2b068fb
 	tableDestroy(t);
314ff77b
 
 #if	defined(WITH_CURL) && defined(CL_THREAD_SAFE)
d28e1902
 	assert(n <= MAX_URLS);
314ff77b
 	cli_dbgmsg("checkURLs: waiting for %d thread(s) to finish\n", n);
 	while(--n >= 0) {
 		pthread_join(tid[n], NULL);
 		free(args[n].filename);
 	}
 #endif
a95c894a
 	html_tag_arg_free(&hrefs);
c5ed8336
 }
 
da812a6a
 #ifdef	WITH_CURL
314ff77b
 static void *
 #ifdef	CL_THREAD_SAFE
 getURL(void *a)
 #else
 getURL(struct arg *arg)
 #endif
3fa72383
 {
 	char *fout;
6b93ea0c
 	CURL *curl;
3fa72383
 	FILE *fp;
6b93ea0c
 	struct curl_slist *headers;
 	static int initialised = 0;
314ff77b
 #ifdef	CL_THREAD_SAFE
 	static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
 	struct arg *arg = (struct arg *)a;
 #endif
 	const char *url = arg->url;
 	const char *dir = arg->dir;
 	const char *filename = arg->filename;
3fa72383
 
314ff77b
 #ifdef	CL_THREAD_SAFE
 	pthread_mutex_lock(&init_mutex);
 #endif
6b93ea0c
 	if(!initialised) {
314ff77b
 		if(curl_global_init(CURL_GLOBAL_NOTHING) != 0) {
 #ifdef	CL_THREAD_SAFE
 			pthread_mutex_unlock(&init_mutex);
 #endif
 			return NULL;
 		}
6b93ea0c
 		initialised = 1;
3fa72383
 	}
314ff77b
 #ifdef	CL_THREAD_SAFE
 	pthread_mutex_unlock(&init_mutex);
 #endif
 
6b93ea0c
 	/* easy isn't the word I'd use... */
 	curl = curl_easy_init();
 	if(curl == NULL)
314ff77b
 		return NULL;
da812a6a
 
6b93ea0c
 	(void)curl_easy_setopt(curl, CURLOPT_USERAGENT, "www.clamav.net");
 
 	if(curl_easy_setopt(curl, CURLOPT_URL, url) != 0)
314ff77b
 		return NULL;
6b93ea0c
 
3fa72383
 	fout = cli_malloc(strlen(dir) + strlen(filename) + 2);
 
da812a6a
 	if(fout == NULL) {
 		curl_easy_cleanup(curl);
314ff77b
 		return NULL;
da812a6a
 	}
3fa72383
 
a95c894a
 	snprintf(fout, NAME_MAX, "%s/%s", dir, filename);
3fa72383
 
 	fp = fopen(fout, "w");
 
 	if(fp == NULL) {
138b73f6
 		cli_errmsg("Can't open '%s' for writing", fout);
3fa72383
 		free(fout);
da812a6a
 		curl_easy_cleanup(curl);
314ff77b
 		return NULL;
3fa72383
 	}
05ea2522
 #ifdef	CURLOPT_WRITEDATA
314ff77b
 	if(curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp) != 0) {
 		fclose(fp);
 		free(fout);
 		curl_easy_cleanup(curl);
 		return NULL;
 	}
05ea2522
 #else
 	if(curl_easy_setopt(curl, CURLOPT_FILE, fp) != 0) {
 		fclose(fp);
 		free(fout);
 		curl_easy_cleanup(curl);
 		return NULL;
 	}
 #endif
314ff77b
 
6b93ea0c
 	/*
3eb12bae
 	 * If an item is in squid's cache get it from there (TCP_HIT/200)
6b93ea0c
 	 * by default curl doesn't (TCP_CLIENT_REFRESH_MISS/200)
 	 */
 	headers = curl_slist_append(NULL, "Pragma:");
 	curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
3fa72383
 
6b93ea0c
 	/* These should be customisable */
 	curl_easy_setopt(curl, CURLOPT_TIMEOUT, 30);
 	curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10);
49674596
 #ifdef	CURLOPT_MAXFILESIZE
 	curl_easy_setopt(curl, CURLOPT_MAXFILESIZE, 50*1024);
 #endif
3fa72383
 
314ff77b
 #ifdef  CL_THREAD_SAFE
c07de365
 #ifdef	CURLOPT_DNS_USE_GLOBAL_CACHE
314ff77b
 	curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
 #endif
c07de365
 #endif
02406150
 
 	/*
 	 * Prevent password: prompting with older versions
 	 * FIXME: a better username?
 	 */
66df01fa
 	curl_easy_setopt(curl, CURLOPT_USERPWD, "username:password");
02406150
 
314ff77b
 	/*
 	 * FIXME: valgrind reports "pthread_mutex_unlock: mutex is not locked"
 	 * from gethostbyaddr_r within this. It may be a bug in libcurl
 	 * rather than this code, but I need to check, see Curl_resolv()
 	 * If pushed really hard it will sometimes say
 	 * Conditional jump or move depends on uninitialised value(s) and
 	 * quit. But the program seems to work OK without valgrind...
 	 * Perhaps Curl_resolv() isn't thread safe?
 	 */
8386482b
 	/*
aa479b7d
 	 * On some C libraries (notably with FC3, glibc-2.3.3-74) you get a
 	 * memory leak * here in getaddrinfo(), see
0856891e
 	 *	https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=139559
8386482b
 	 */
 
6b93ea0c
 	if(curl_easy_perform(curl) != CURLE_OK) {
 		cli_warnmsg("URL %s failed to download\n", url);
 		unlink(fout);
 	}
 
 	fclose(fp);
314ff77b
 	curl_slist_free_all(headers);
6b93ea0c
 	curl_easy_cleanup(curl);
 	free(fout);
314ff77b
 
 	return NULL;
3fa72383
 }
 #endif
 
 #else
 static void
 checkURLs(message *m, const char *dir)
 {
 }
 #endif
 
d1382234
 #ifdef HAVE_BACKTRACE
f2b068fb
 static void
98685ac1
 sigsegv(int sig)
 {
 	signal(SIGSEGV, SIG_DFL);
d1382234
 	print_trace(1);
98685ac1
 	exit(SIGSEGV);
 }
 
f2b068fb
 static void
98685ac1
 print_trace(int use_syslog)
 {
 	void *array[10];
 	size_t size;
 	char **strings;
 	size_t i;
 	pid_t pid = getpid();
 
 	size = backtrace(array, 10);
 	strings = backtrace_symbols(array, size);
 
 	if(use_syslog == 0)
 		cli_dbgmsg("Backtrace of pid %d:\n", pid);
735377bc
 	else
98685ac1
 		syslog(LOG_ERR, "Backtrace of pid %d:", pid);
 
 	for(i = 0; i < size; i++)
 		if(use_syslog)
4d9c0ca8
 			syslog(LOG_ERR, "bt[%d]: %s", (int)i, strings[i]);
98685ac1
 		else
 			cli_dbgmsg("%s\n", strings[i]);
 
7d3d11d0
 	/* TODO: dump the current email */
 
98685ac1
 	free(strings);
 }
 #endif