...
|
...
|
@@ -106,7 +106,7 @@
|
106
|
106
|
static void sigsegv(int sig);
|
107
|
107
|
static void print_trace(int use_syslog);
|
108
|
108
|
|
109
|
|
-/*#define SAVE_TMP */ /* Save the file being worked on in tmp */
|
|
109
|
+/*#define SAVE_TMP */ /* Save the file being worked on in tmp */
|
110
|
110
|
#endif
|
111
|
111
|
|
112
|
112
|
#if defined(NO_STRTOK_R) || !defined(CL_THREAD_SAFE)
|
...
|
...
|
@@ -164,7 +164,7 @@ typedef enum {
|
164
|
164
|
|
165
|
165
|
/*
|
166
|
166
|
* Use CL_SCAN_MAIL_PARTIAL_MESSAGE to handle messages covered by section 7.3.2 of RFC1341.
|
167
|
|
- * This is experimental code so it is up to YOU to (1) ensure it's secure
|
|
167
|
+ * This is experimental code so it is up to YOU to (1) ensure it's secure
|
168
|
168
|
* (2) periodically trim the directory of old files
|
169
|
169
|
*
|
170
|
170
|
* If you use the load balancing feature of clamav-milter to run clamd on
|
...
|
...
|
@@ -176,7 +176,7 @@ typedef enum {
|
176
|
176
|
* Slows things down a lot and only catches unencoded copies
|
177
|
177
|
* of EICAR within bounces, which don't matter
|
178
|
178
|
*/
|
179
|
|
-//#define SCAN_UNENCODED_BOUNCES
|
|
179
|
+//#define SCAN_UNENCODED_BOUNCES
|
180
|
180
|
|
181
|
181
|
typedef struct mbox_ctx {
|
182
|
182
|
const char *dir;
|
...
|
...
|
@@ -278,14 +278,14 @@ static bool haveTooManyMIMEArguments(size_t argCnt, cli_ctx *ctx, bool *heuristi
|
278
|
278
|
* boundary="nextPart1383049.XCRrrar2yq"; \
|
279
|
279
|
* protocol="application/pgp-encrypted" \
|
280
|
280
|
*/
|
281
|
|
-#define X_BFILE RELATED /* \
|
282
|
|
- * BeOS, expert two parts: the file and it's \
|
283
|
|
- * attributes. The attributes part comes as \
|
284
|
|
- * Content-Type: application/x-be_attribute \
|
285
|
|
- * name="foo" \
|
286
|
|
- * I can't find where it is defined, any \
|
287
|
|
- * pointers would be appreciated. For now \
|
288
|
|
- * we treat it as multipart/related \
|
|
281
|
+#define X_BFILE RELATED /* \
|
|
282
|
+ * BeOS, expert two parts: the file and it's \
|
|
283
|
+ * attributes. The attributes part comes as \
|
|
284
|
+ * Content-Type: application/x-be_attribute \
|
|
285
|
+ * name="foo" \
|
|
286
|
+ * I can't find where it is defined, any \
|
|
287
|
+ * pointers would be appreciated. For now \
|
|
288
|
+ * we treat it as multipart/related \
|
289
|
289
|
*/
|
290
|
290
|
#define KNOWBOT 14 /* Unknown and undocumented format? */
|
291
|
291
|
|
...
|
...
|
@@ -346,18 +346,18 @@ int cli_mbox(const char *dir, cli_ctx *ctx)
|
346
|
346
|
|
347
|
347
|
/*
|
348
|
348
|
* TODO: when signal handling is added, need to remove temp files when a
|
349
|
|
- * signal is received
|
|
349
|
+ * signal is received
|
350
|
350
|
* TODO: add option to scan in memory not via temp files, perhaps with a
|
351
|
351
|
* named pipe or memory mapped file, though this won't work on big e-mails
|
352
|
352
|
* containing many levels of encapsulated messages - it'd just take too much
|
353
|
353
|
* RAM
|
354
|
354
|
* TODO: parse .msg format files
|
355
|
355
|
* TODO: fully handle AppleDouble format, see
|
356
|
|
- * http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
|
|
356
|
+ * http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
|
357
|
357
|
* TODO: ensure parseEmailHeaders is always called before parseEmailBody
|
358
|
358
|
* TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody
|
359
|
359
|
* TODO: Handle unexpected NUL bytes in header lines which stop strcmp()s:
|
360
|
|
- * e.g. \0Content-Type: application/binary;
|
|
360
|
+ * e.g. \0Content-Type: application/binary;
|
361
|
361
|
*/
|
362
|
362
|
static int
|
363
|
363
|
cli_parse_mbox(const char *dir, cli_ctx *ctx)
|
...
|
...
|
@@ -407,34 +407,34 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
|
407
|
407
|
#endif
|
408
|
408
|
|
409
|
409
|
/*
|
410
|
|
- * Is it a UNIX style mbox with more than one
|
411
|
|
- * mail message, or just a single mail message?
|
412
|
|
- *
|
413
|
|
- * TODO: It would be better if we called cli_magic_scan_dir here rather than
|
414
|
|
- * in cli_scanmail. Then we could improve the way mailboxes with more
|
415
|
|
- * than one message is handled, e.g. giving a better indication of
|
416
|
|
- * which message within the mailbox is infected
|
417
|
|
- */
|
|
410
|
+ * Is it a UNIX style mbox with more than one
|
|
411
|
+ * mail message, or just a single mail message?
|
|
412
|
+ *
|
|
413
|
+ * TODO: It would be better if we called cli_magic_scan_dir here rather than
|
|
414
|
+ * in cli_scanmail. Then we could improve the way mailboxes with more
|
|
415
|
+ * than one message is handled, e.g. giving a better indication of
|
|
416
|
+ * which message within the mailbox is infected
|
|
417
|
+ */
|
418
|
418
|
/*if((strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {*/
|
419
|
419
|
if (strncmp(buffer, "From ", 5) == 0) {
|
420
|
420
|
/*
|
421
|
|
- * Have been asked to check a UNIX style mbox file, which
|
422
|
|
- * may contain more than one e-mail message to decode
|
423
|
|
- *
|
424
|
|
- * It would be far better for scanners.c to do this splitting
|
425
|
|
- * and do this
|
426
|
|
- * FOR EACH mail in the mailbox
|
427
|
|
- * DO
|
428
|
|
- * pass this mail to cli_mbox --
|
429
|
|
- * scan this file
|
430
|
|
- * IF this file has a virus quit
|
431
|
|
- * THEN
|
432
|
|
- * return CL_VIRUS
|
433
|
|
- * FI
|
434
|
|
- * END
|
435
|
|
- * This would remove a problem with this code that it can
|
436
|
|
- * fill up the tmp directory before it starts scanning
|
437
|
|
- */
|
|
421
|
+ * Have been asked to check a UNIX style mbox file, which
|
|
422
|
+ * may contain more than one e-mail message to decode
|
|
423
|
+ *
|
|
424
|
+ * It would be far better for scanners.c to do this splitting
|
|
425
|
+ * and do this
|
|
426
|
+ * FOR EACH mail in the mailbox
|
|
427
|
+ * DO
|
|
428
|
+ * pass this mail to cli_mbox --
|
|
429
|
+ * scan this file
|
|
430
|
+ * IF this file has a virus quit
|
|
431
|
+ * THEN
|
|
432
|
+ * return CL_VIRUS
|
|
433
|
+ * FI
|
|
434
|
+ * END
|
|
435
|
+ * This would remove a problem with this code that it can
|
|
436
|
+ * fill up the tmp directory before it starts scanning
|
|
437
|
+ */
|
438
|
438
|
bool lastLineWasEmpty;
|
439
|
439
|
int messagenumber;
|
440
|
440
|
message *m = messageCreate(); /*Create an empty email */
|
...
|
...
|
@@ -452,8 +452,8 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
|
452
|
452
|
if (lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) {
|
453
|
453
|
cli_dbgmsg("Deal with message number %d\n", messagenumber++);
|
454
|
454
|
/*
|
455
|
|
- * End of a message in the mail box
|
456
|
|
- */
|
|
455
|
+ * End of a message in the mail box
|
|
456
|
+ */
|
457
|
457
|
bool heuristicFound = FALSE;
|
458
|
458
|
body = parseEmailHeaders(m, rfc821, &heuristicFound);
|
459
|
459
|
if (body == NULL) {
|
...
|
...
|
@@ -481,13 +481,13 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
|
481
|
481
|
}
|
482
|
482
|
}
|
483
|
483
|
/*
|
484
|
|
- * Starting a new message, throw away all the
|
485
|
|
- * information about the old one. It would
|
486
|
|
- * be best to be able to scan this message
|
487
|
|
- * now, but cli_magic_scan_file needs arguments
|
488
|
|
- * that haven't been passed here so it can't be
|
489
|
|
- * called
|
490
|
|
- */
|
|
484
|
+ * Starting a new message, throw away all the
|
|
485
|
+ * information about the old one. It would
|
|
486
|
+ * be best to be able to scan this message
|
|
487
|
+ * now, but cli_magic_scan_file needs arguments
|
|
488
|
+ * that haven't been passed here so it can't be
|
|
489
|
+ * called
|
|
490
|
+ */
|
491
|
491
|
m = body;
|
492
|
492
|
messageReset(body);
|
493
|
493
|
messageSetCTX(body, ctx);
|
...
|
...
|
@@ -498,9 +498,9 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
|
498
|
498
|
|
499
|
499
|
if (isuuencodebegin(buffer)) {
|
500
|
500
|
/*
|
501
|
|
- * Fast track visa to uudecode.
|
502
|
|
- * TODO: binhex, yenc
|
503
|
|
- */
|
|
501
|
+ * Fast track visa to uudecode.
|
|
502
|
+ * TODO: binhex, yenc
|
|
503
|
+ */
|
504
|
504
|
if (uudecodeFile(m, buffer, dir, map, &at) < 0)
|
505
|
505
|
if (messageAddStr(m, buffer) < 0)
|
506
|
506
|
break;
|
...
|
...
|
@@ -522,21 +522,21 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
|
522
|
522
|
messageDestroy(m);
|
523
|
523
|
} else {
|
524
|
524
|
/*
|
525
|
|
- * It's a single message, parse the headers then the body
|
526
|
|
- */
|
|
525
|
+ * It's a single message, parse the headers then the body
|
|
526
|
+ */
|
527
|
527
|
if (strncmp(buffer, "P I ", 4) == 0)
|
528
|
528
|
/*
|
529
|
|
- * CommuniGate Pro format: ignore headers until
|
530
|
|
- * blank line
|
531
|
|
- */
|
|
529
|
+ * CommuniGate Pro format: ignore headers until
|
|
530
|
+ * blank line
|
|
531
|
+ */
|
532
|
532
|
while (fmap_gets(map, buffer, &at, sizeof(buffer) - 1) &&
|
533
|
533
|
(strchr("\r\n", buffer[0]) == NULL))
|
534
|
534
|
;
|
535
|
535
|
/* getline_from_mbox could be using unlocked_stdio(3),
|
536
|
|
- * so lock file here */
|
|
536
|
+ * so lock file here */
|
537
|
537
|
/*
|
538
|
|
- * Ignore any blank lines at the top of the message
|
539
|
|
- */
|
|
538
|
+ * Ignore any blank lines at the top of the message
|
|
539
|
+ */
|
540
|
540
|
while (strchr("\r\n", buffer[0]) &&
|
541
|
541
|
(getline_from_mbox(buffer, sizeof(buffer) - 1, map, &at) != NULL))
|
542
|
542
|
;
|
...
|
...
|
@@ -552,8 +552,8 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
|
552
|
552
|
|
553
|
553
|
if (body) {
|
554
|
554
|
/*
|
555
|
|
- * Write out the last entry in the mailbox
|
556
|
|
- */
|
|
555
|
+ * Write out the last entry in the mailbox
|
|
556
|
+ */
|
557
|
557
|
if ((retcode == CL_SUCCESS) && messageGetBody(body)) {
|
558
|
558
|
messageSetCTX(body, ctx);
|
559
|
559
|
switch (parseEmailBody(body, NULL, &mctx, 0)) {
|
...
|
...
|
@@ -562,14 +562,14 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
|
562
|
562
|
break;
|
563
|
563
|
case FAIL:
|
564
|
564
|
/*
|
565
|
|
- * beware: cli_magic_scan_desc(),
|
566
|
|
- * changes this into CL_CLEAN, so only
|
567
|
|
- * use it to inform the higher levels
|
568
|
|
- * that we couldn't decode it because
|
569
|
|
- * it isn't an mbox, not to signal
|
570
|
|
- * decoding errors on what *is* a valid
|
571
|
|
- * mbox
|
572
|
|
- */
|
|
565
|
+ * beware: cli_magic_scan_desc(),
|
|
566
|
+ * changes this into CL_CLEAN, so only
|
|
567
|
+ * use it to inform the higher levels
|
|
568
|
+ * that we couldn't decode it because
|
|
569
|
+ * it isn't an mbox, not to signal
|
|
570
|
+ * decoding errors on what *is* a valid
|
|
571
|
+ * mbox
|
|
572
|
+ */
|
573
|
573
|
retcode = CL_EFORMAT;
|
574
|
574
|
break;
|
575
|
575
|
case MAXREC:
|
...
|
...
|
@@ -587,8 +587,8 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
|
587
|
587
|
if (body->isTruncated && retcode == CL_SUCCESS)
|
588
|
588
|
retcode = CL_EMEM;
|
589
|
589
|
/*
|
590
|
|
- * Tidy up and quit
|
591
|
|
- */
|
|
590
|
+ * Tidy up and quit
|
|
591
|
+ */
|
592
|
592
|
messageDestroy(body);
|
593
|
593
|
}
|
594
|
594
|
|
...
|
...
|
@@ -912,9 +912,9 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
|
912
|
912
|
}
|
913
|
913
|
|
914
|
914
|
/*
|
915
|
|
- * Don't blank lines which are only spaces from headers,
|
916
|
|
- * otherwise they'll be treated as the end of header marker
|
917
|
|
- */
|
|
915
|
+ * Don't blank lines which are only spaces from headers,
|
|
916
|
+ * otherwise they'll be treated as the end of header marker
|
|
917
|
+ */
|
918
|
918
|
if (lastWasBlank) {
|
919
|
919
|
lastWasBlank = FALSE;
|
920
|
920
|
if (boundaryStart(buffer, boundary)) {
|
...
|
...
|
@@ -926,9 +926,9 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
|
926
|
926
|
cli_dbgmsg("parseEmailFile: check '%s'\n", buffer);
|
927
|
927
|
|
928
|
928
|
/*
|
929
|
|
- * Ensure wide characters are handled where
|
930
|
|
- * sizeof(char) > 1
|
931
|
|
- */
|
|
929
|
+ * Ensure wide characters are handled where
|
|
930
|
+ * sizeof(char) > 1
|
|
931
|
+ */
|
932
|
932
|
if (line && isspace(line[0] & 0xFF)) {
|
933
|
933
|
char copy[sizeof(buffer)];
|
934
|
934
|
|
...
|
...
|
@@ -936,18 +936,18 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
|
936
|
936
|
strstrip(copy);
|
937
|
937
|
if (copy[0] == '\0') {
|
938
|
938
|
/*
|
939
|
|
- * The header line contains only white
|
940
|
|
- * space. This is not the end of the
|
941
|
|
- * headers according to RFC2822, but
|
942
|
|
- * some MUAs will handle it as though
|
943
|
|
- * it were, and virus writers exploit
|
944
|
|
- * this bug. We can't just break from
|
945
|
|
- * the loop here since that would allow
|
946
|
|
- * other exploits such as inserting a
|
947
|
|
- * white space line before the
|
948
|
|
- * content-type line. So we just have
|
949
|
|
- * to make a best guess. Sigh.
|
950
|
|
- */
|
|
939
|
+ * The header line contains only white
|
|
940
|
+ * space. This is not the end of the
|
|
941
|
+ * headers according to RFC2822, but
|
|
942
|
+ * some MUAs will handle it as though
|
|
943
|
+ * it were, and virus writers exploit
|
|
944
|
+ * this bug. We can't just break from
|
|
945
|
+ * the loop here since that would allow
|
|
946
|
+ * other exploits such as inserting a
|
|
947
|
+ * white space line before the
|
|
948
|
+ * content-type line. So we just have
|
|
949
|
+ * to make a best guess. Sigh.
|
|
950
|
+ */
|
951
|
951
|
if (head->bufferLen) {
|
952
|
952
|
char *header = getMallocedBufferFromList(head);
|
953
|
953
|
int needContinue = 0;
|
...
|
...
|
@@ -981,9 +981,9 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
|
981
|
981
|
}
|
982
|
982
|
if ((line == NULL) && (0 == head->bufferLen)) { /* empty line */
|
983
|
983
|
/*
|
984
|
|
- * A blank line signifies the end of
|
985
|
|
- * the header and the start of the text
|
986
|
|
- */
|
|
984
|
+ * A blank line signifies the end of
|
|
985
|
+ * the header and the start of the text
|
|
986
|
+ */
|
987
|
987
|
if (!anyHeadersFound)
|
988
|
988
|
/* Ignore the junk at the top */
|
989
|
989
|
continue;
|
...
|
...
|
@@ -1000,14 +1000,14 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
|
1000
|
1000
|
char cmd[RFC2821LENGTH + 1], out[RFC2821LENGTH + 1];
|
1001
|
1001
|
|
1002
|
1002
|
/*
|
1003
|
|
- * Continuation of line we're ignoring?
|
1004
|
|
- */
|
|
1003
|
+ * Continuation of line we're ignoring?
|
|
1004
|
+ */
|
1005
|
1005
|
if (isblank(line[0]))
|
1006
|
1006
|
continue;
|
1007
|
1007
|
|
1008
|
1008
|
/*
|
1009
|
|
- * Is this a header we're interested in?
|
1010
|
|
- */
|
|
1009
|
+ * Is this a header we're interested in?
|
|
1010
|
+ */
|
1011
|
1011
|
if ((strchr(line, ':') == NULL) ||
|
1012
|
1012
|
(cli_strtokbuf(line, 0, ":", cmd) == NULL)) {
|
1013
|
1013
|
if (strncmp(line, "From ", 5) == 0)
|
...
|
...
|
@@ -1051,20 +1051,20 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
|
1051
|
1051
|
|
1052
|
1052
|
if ((lookahead = fmap_need_off_once(map, *at, 1))) {
|
1053
|
1053
|
/*
|
1054
|
|
- * Section B.2 of RFC822 says TAB or
|
1055
|
|
- * SPACE means a continuation of the
|
1056
|
|
- * previous entry.
|
1057
|
|
- *
|
1058
|
|
- * Add all the arguments on the line
|
1059
|
|
- */
|
|
1054
|
+ * Section B.2 of RFC822 says TAB or
|
|
1055
|
+ * SPACE means a continuation of the
|
|
1056
|
+ * previous entry.
|
|
1057
|
+ *
|
|
1058
|
+ * Add all the arguments on the line
|
|
1059
|
+ */
|
1060
|
1060
|
if (isblank(*lookahead))
|
1061
|
1061
|
continue;
|
1062
|
1062
|
}
|
1063
|
1063
|
|
1064
|
1064
|
/*
|
1065
|
|
- * Handle broken headers, where the next
|
1066
|
|
- * line isn't indented by whitespace
|
1067
|
|
- */
|
|
1065
|
+ * Handle broken headers, where the next
|
|
1066
|
+ * line isn't indented by whitespace
|
|
1067
|
+ */
|
1068
|
1068
|
{
|
1069
|
1069
|
char *header = getMallocedBufferFromList(head); /*This is the issue */
|
1070
|
1070
|
int needContinue = 0;
|
...
|
...
|
@@ -1098,9 +1098,9 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
|
1098
|
1098
|
}
|
1099
|
1099
|
} else if (line && isuuencodebegin(line)) {
|
1100
|
1100
|
/*
|
1101
|
|
- * Fast track visa to uudecode.
|
1102
|
|
- * TODO: binhex, yenc
|
1103
|
|
- */
|
|
1101
|
+ * Fast track visa to uudecode.
|
|
1102
|
+ * TODO: binhex, yenc
|
|
1103
|
+ */
|
1104
|
1104
|
bodyIsEmpty = FALSE;
|
1105
|
1105
|
if (uudecodeFile(ret, line, dir, map, at) < 0)
|
1106
|
1106
|
if (messageAddStr(ret, line) < 0)
|
...
|
...
|
@@ -1108,10 +1108,10 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
|
1108
|
1108
|
} else {
|
1109
|
1109
|
if (line == NULL) {
|
1110
|
1110
|
/*
|
1111
|
|
- * Although this would save time and RAM, some
|
1112
|
|
- * phish signatures have been built which need
|
1113
|
|
- * the blank lines
|
1114
|
|
- */
|
|
1111
|
+ * Although this would save time and RAM, some
|
|
1112
|
+ * phish signatures have been built which need
|
|
1113
|
+ * the blank lines
|
|
1114
|
+ */
|
1115
|
1115
|
if (lastBodyLineWasBlank &&
|
1116
|
1116
|
(messageGetMimeType(ret) != TEXT)) {
|
1117
|
1117
|
cli_dbgmsg("Ignoring consecutive blank lines in the body\n");
|
...
|
...
|
@@ -1121,11 +1121,11 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
|
1121
|
1121
|
} else {
|
1122
|
1122
|
if (bodyIsEmpty) {
|
1123
|
1123
|
/*
|
1124
|
|
- * Broken message: new line in the
|
1125
|
|
- * middle of the headers, so the first
|
1126
|
|
- * line of the body is in fact
|
1127
|
|
- * the last lines of the header
|
1128
|
|
- */
|
|
1124
|
+ * Broken message: new line in the
|
|
1125
|
+ * middle of the headers, so the first
|
|
1126
|
+ * line of the body is in fact
|
|
1127
|
+ * the last lines of the header
|
|
1128
|
+ */
|
1129
|
1129
|
if (newline_in_header(line))
|
1130
|
1130
|
continue;
|
1131
|
1131
|
bodyIsEmpty = FALSE;
|
...
|
...
|
@@ -1151,8 +1151,8 @@ done:
|
1151
|
1151
|
|
1152
|
1152
|
if (!anyHeadersFound) {
|
1153
|
1153
|
/*
|
1154
|
|
- * False positive in believing we have an e-mail when we don't
|
1155
|
|
- */
|
|
1154
|
+ * False positive in believing we have an e-mail when we don't
|
|
1155
|
+ */
|
1156
|
1156
|
messageDestroy(ret);
|
1157
|
1157
|
cli_dbgmsg("parseEmailFile: no headers found, assuming it isn't an email\n");
|
1158
|
1158
|
return NULL;
|
...
|
...
|
@@ -1222,9 +1222,9 @@ parseEmailHeaders(message *m, const table_t *rfc821, bool *heuristicFound)
|
1222
|
1222
|
line ? line : "");
|
1223
|
1223
|
if (line == NULL) {
|
1224
|
1224
|
/*
|
1225
|
|
- * A blank line signifies the end of
|
1226
|
|
- * the header and the start of the text
|
1227
|
|
- */
|
|
1225
|
+ * A blank line signifies the end of
|
|
1226
|
+ * the header and the start of the text
|
|
1227
|
+ */
|
1228
|
1228
|
cli_dbgmsg("End of header information\n");
|
1229
|
1229
|
if (!anyHeadersFound) {
|
1230
|
1230
|
cli_dbgmsg("Nothing interesting in the header\n");
|
...
|
...
|
@@ -1240,14 +1240,14 @@ parseEmailHeaders(message *m, const table_t *rfc821, bool *heuristicFound)
|
1240
|
1240
|
char cmd[RFC2821LENGTH + 1];
|
1241
|
1241
|
|
1242
|
1242
|
/*
|
1243
|
|
- * Continuation of line we're ignoring?
|
1244
|
|
- */
|
|
1243
|
+ * Continuation of line we're ignoring?
|
|
1244
|
+ */
|
1245
|
1245
|
if (isblank(line[0]))
|
1246
|
1246
|
continue;
|
1247
|
1247
|
|
1248
|
1248
|
/*
|
1249
|
|
- * Is this a header we're interested in?
|
1250
|
|
- */
|
|
1249
|
+ * Is this a header we're interested in?
|
|
1250
|
+ */
|
1251
|
1251
|
if ((strchr(line, ':') == NULL) ||
|
1252
|
1252
|
(cli_strtokbuf(line, 0, ":", cmd) == NULL)) {
|
1253
|
1253
|
if (strncmp(line, "From ", 5) == 0)
|
...
|
...
|
@@ -1331,17 +1331,17 @@ parseEmailHeaders(message *m, const table_t *rfc821, bool *heuristicFound)
|
1331
|
1331
|
/* throw away leading blank lines */
|
1332
|
1332
|
continue;
|
1333
|
1333
|
/*
|
1334
|
|
- * Broken message: new line in the
|
1335
|
|
- * middle of the headers, so the first
|
1336
|
|
- * line of the body is in fact
|
1337
|
|
- * the last lines of the header
|
1338
|
|
- */
|
|
1334
|
+ * Broken message: new line in the
|
|
1335
|
+ * middle of the headers, so the first
|
|
1336
|
+ * line of the body is in fact
|
|
1337
|
+ * the last lines of the header
|
|
1338
|
+ */
|
1339
|
1339
|
if (newline_in_header(line))
|
1340
|
1340
|
continue;
|
1341
|
1341
|
bodyIsEmpty = FALSE;
|
1342
|
1342
|
}
|
1343
|
1343
|
/*if(t->t_line && isuuencodebegin(t->t_line))
|
1344
|
|
- puts("FIXME: add fast visa here");*/
|
|
1344
|
+ puts("FIXME: add fast visa here");*/
|
1345
|
1345
|
cli_dbgmsg("parseEmailHeaders: finished with headers, moving body\n");
|
1346
|
1346
|
messageMoveText(ret, t, m);
|
1347
|
1347
|
break;
|
...
|
...
|
@@ -1360,8 +1360,8 @@ parseEmailHeaders(message *m, const table_t *rfc821, bool *heuristicFound)
|
1360
|
1360
|
|
1361
|
1361
|
if (!anyHeadersFound) {
|
1362
|
1362
|
/*
|
1363
|
|
- * False positive in believing we have an e-mail when we don't
|
1364
|
|
- */
|
|
1363
|
+ * False positive in believing we have an e-mail when we don't
|
|
1364
|
+ */
|
1365
|
1365
|
messageDestroy(ret);
|
1366
|
1366
|
cli_dbgmsg("parseEmailHeaders: no headers found, assuming it isn't an email\n");
|
1367
|
1367
|
return NULL;
|
...
|
...
|
@@ -1393,11 +1393,11 @@ parseEmailHeader(message *m, const char *line, const table_t *rfc821, cli_ctx *c
|
1393
|
1393
|
cli_dbgmsg("parseEmailHeader '%s'\n", line);
|
1394
|
1394
|
|
1395
|
1395
|
/*
|
1396
|
|
- * In RFC822 the separator between the key a value is a colon,
|
1397
|
|
- * e.g. Content-Transfer-Encoding: base64
|
1398
|
|
- * However some MUA's are lapse about this and virus writers exploit
|
1399
|
|
- * this hole, so we need to check all known possibilities
|
1400
|
|
- */
|
|
1396
|
+ * In RFC822 the separator between the key a value is a colon,
|
|
1397
|
+ * e.g. Content-Transfer-Encoding: base64
|
|
1398
|
+ * However some MUA's are lapse about this and virus writers exploit
|
|
1399
|
+ * this hole, so we need to check all known possibilities
|
|
1400
|
+ */
|
1401
|
1401
|
for (separator = ":= "; *separator; separator++)
|
1402
|
1402
|
if (strchr(line, *separator) != NULL)
|
1403
|
1403
|
break;
|
...
|
...
|
@@ -1432,12 +1432,12 @@ parseEmailHeader(message *m, const char *line, const table_t *rfc821, cli_ctx *c
|
1432
|
1432
|
|
1433
|
1433
|
if (arg) {
|
1434
|
1434
|
/*
|
1435
|
|
- * Found a header such as
|
1436
|
|
- * Content-Type: multipart/mixed;
|
1437
|
|
- * set arg to be
|
1438
|
|
- * "multipart/mixed" and cmd to
|
1439
|
|
- * be "Content-Type"
|
1440
|
|
- */
|
|
1435
|
+ * Found a header such as
|
|
1436
|
+ * Content-Type: multipart/mixed;
|
|
1437
|
+ * set arg to be
|
|
1438
|
+ * "multipart/mixed" and cmd to
|
|
1439
|
+ * be "Content-Type"
|
|
1440
|
+ */
|
1441
|
1441
|
ret = parseMimeHeader(m, cmd, rfc821, arg, ctx, heuristicFound);
|
1442
|
1442
|
}
|
1443
|
1443
|
}
|
...
|
...
|
@@ -1694,8 +1694,8 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
1694
|
1694
|
/* FIXMELIMITS: this should be better integrated */
|
1695
|
1695
|
if (engine->maxreclevel)
|
1696
|
1696
|
/*
|
1697
|
|
- * This is approximate
|
1698
|
|
- */
|
|
1697
|
+ * This is approximate
|
|
1698
|
+ */
|
1699
|
1699
|
if (recursion_level > engine->maxreclevel) {
|
1700
|
1700
|
|
1701
|
1701
|
cli_dbgmsg("parseEmailBody: hit maximum recursion level (%u)\n", recursion_level);
|
...
|
...
|
@@ -1703,9 +1703,9 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
1703
|
1703
|
}
|
1704
|
1704
|
if (engine->maxfiles && (mctx->files >= engine->maxfiles)) {
|
1705
|
1705
|
/*
|
1706
|
|
- * FIXME: This is only approx - it may have already
|
1707
|
|
- * been exceeded
|
1708
|
|
- */
|
|
1706
|
+ * FIXME: This is only approx - it may have already
|
|
1707
|
+ * been exceeded
|
|
1708
|
+ */
|
1709
|
1709
|
cli_dbgmsg("parseEmailBody: number of files exceeded %u\n", engine->maxfiles);
|
1710
|
1710
|
return MAXFILES;
|
1711
|
1711
|
}
|
...
|
...
|
@@ -1743,20 +1743,20 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
1743
|
1743
|
subtype = tableFind(mctx->subtypeTable, mimeSubtype);
|
1744
|
1744
|
if ((mimeType == TEXT) && (subtype == PLAIN)) {
|
1745
|
1745
|
/*
|
1746
|
|
- * This is effectively no encoding, notice that we
|
1747
|
|
- * don't check that charset is us-ascii
|
1748
|
|
- */
|
|
1746
|
+ * This is effectively no encoding, notice that we
|
|
1747
|
+ * don't check that charset is us-ascii
|
|
1748
|
+ */
|
1749
|
1749
|
cli_dbgmsg("text/plain: Assume no attachments\n");
|
1750
|
1750
|
mimeType = NOMIME;
|
1751
|
1751
|
messageSetMimeSubtype(mainMessage, "");
|
1752
|
1752
|
} else if ((mimeType == MESSAGE) &&
|
1753
|
1753
|
(strcasecmp(mimeSubtype, "rfc822-headers") == 0)) {
|
1754
|
1754
|
/*
|
1755
|
|
- * RFC1892/RFC3462: section 2 text/rfc822-headers
|
1756
|
|
- * incorrectly sent as message/rfc822-headers
|
1757
|
|
- *
|
1758
|
|
- * Parse as text/plain, i.e. no mime
|
1759
|
|
- */
|
|
1755
|
+ * RFC1892/RFC3462: section 2 text/rfc822-headers
|
|
1756
|
+ * incorrectly sent as message/rfc822-headers
|
|
1757
|
+ *
|
|
1758
|
+ * Parse as text/plain, i.e. no mime
|
|
1759
|
+ */
|
1760
|
1760
|
cli_dbgmsg("Changing message/rfc822-headers to text/rfc822-headers\n");
|
1761
|
1761
|
mimeType = NOMIME;
|
1762
|
1762
|
messageSetMimeSubtype(mainMessage, "");
|
...
|
...
|
@@ -1772,23 +1772,23 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
1772
|
1772
|
break;
|
1773
|
1773
|
}
|
1774
|
1774
|
/*
|
1775
|
|
- * Fall through: some phishing mails claim they are
|
1776
|
|
- * text/plain, when they are in fact html
|
1777
|
|
- */
|
|
1775
|
+ * Fall through: some phishing mails claim they are
|
|
1776
|
+ * text/plain, when they are in fact html
|
|
1777
|
+ */
|
1778
|
1778
|
/* fall through */
|
1779
|
1779
|
case TEXT:
|
1780
|
1780
|
/* text/plain has been preprocessed as no encoding */
|
1781
|
1781
|
if (doPhishingScan) {
|
1782
|
1782
|
/*
|
1783
|
|
- * It would be better to save and scan the
|
1784
|
|
- * file and only checkURLs if it's found to be
|
1785
|
|
- * clean
|
1786
|
|
- */
|
|
1783
|
+ * It would be better to save and scan the
|
|
1784
|
+ * file and only checkURLs if it's found to be
|
|
1785
|
+ * clean
|
|
1786
|
+ */
|
1787
|
1787
|
checkURLs(mainMessage, mctx, &rc, (subtype == HTML));
|
1788
|
1788
|
/*
|
1789
|
|
- * There might be html sent without subtype
|
1790
|
|
- * html too, so scan them for phishing
|
1791
|
|
- */
|
|
1789
|
+ * There might be html sent without subtype
|
|
1790
|
+ * html too, so scan them for phishing
|
|
1791
|
+ */
|
1792
|
1792
|
if (rc == VIRUS)
|
1793
|
1793
|
infected = TRUE;
|
1794
|
1794
|
}
|
...
|
...
|
@@ -1808,9 +1808,9 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
1808
|
1808
|
/* Broken e-mail message */
|
1809
|
1809
|
mimeType = NOMIME;
|
1810
|
1810
|
/*
|
1811
|
|
- * The break means that we will still
|
1812
|
|
- * check if the file contains a uuencoded file
|
1813
|
|
- */
|
|
1811
|
+ * The break means that we will still
|
|
1812
|
+ * check if the file contains a uuencoded file
|
|
1813
|
+ */
|
1814
|
1814
|
break;
|
1815
|
1815
|
}
|
1816
|
1816
|
|
...
|
...
|
@@ -1824,8 +1824,8 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
1824
|
1824
|
}
|
1825
|
1825
|
|
1826
|
1826
|
/*
|
1827
|
|
- * Get to the start of the first message
|
1828
|
|
- */
|
|
1827
|
+ * Get to the start of the first message
|
|
1828
|
+ */
|
1829
|
1829
|
t_line = messageGetBody(mainMessage);
|
1830
|
1830
|
|
1831
|
1831
|
if (t_line == NULL) {
|
...
|
...
|
@@ -1840,10 +1840,10 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
1840
|
1840
|
if (boundaryStart(lineGetData(t_line->t_line), boundary))
|
1841
|
1841
|
break;
|
1842
|
1842
|
/*
|
1843
|
|
- * Found a binhex file before
|
1844
|
|
- * the first multipart
|
1845
|
|
- * TODO: check yEnc
|
1846
|
|
- */
|
|
1843
|
+ * Found a binhex file before
|
|
1844
|
+ * the first multipart
|
|
1845
|
+ * TODO: check yEnc
|
|
1846
|
+ */
|
1847
|
1847
|
if (binhexBegin(mainMessage) == t_line) {
|
1848
|
1848
|
if (exportBinhexMessage(mctx, mainMessage)) {
|
1849
|
1849
|
/* virus found */
|
...
|
...
|
@@ -1854,14 +1854,14 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
1854
|
1854
|
} else if (t_line->t_next &&
|
1855
|
1855
|
(encodingLine(mainMessage) == t_line->t_next)) {
|
1856
|
1856
|
/*
|
1857
|
|
- * We look for the next line
|
1858
|
|
- * since later on we'll skip
|
1859
|
|
- * over the important line when
|
1860
|
|
- * we think it's a blank line
|
1861
|
|
- * at the top of the message -
|
1862
|
|
- * which it would have been in
|
1863
|
|
- * an RFC compliant world
|
1864
|
|
- */
|
|
1857
|
+ * We look for the next line
|
|
1858
|
+ * since later on we'll skip
|
|
1859
|
+ * over the important line when
|
|
1860
|
+ * we think it's a blank line
|
|
1861
|
+ * at the top of the message -
|
|
1862
|
+ * which it would have been in
|
|
1863
|
+ * an RFC compliant world
|
|
1864
|
+ */
|
1865
|
1865
|
cli_dbgmsg("Found MIME attachment before the first MIME section \"%s\"\n",
|
1866
|
1866
|
lineGetData(t_line->t_next->t_line));
|
1867
|
1867
|
if (messageGetEncoding(mainMessage) == NOENCODING)
|
...
|
...
|
@@ -1876,38 +1876,38 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
1876
|
1876
|
free((char *)boundary);
|
1877
|
1877
|
mimeType = NOMIME;
|
1878
|
1878
|
/*
|
1879
|
|
- * The break means that we will still
|
1880
|
|
- * check if the file contains a yEnc/binhex file
|
1881
|
|
- */
|
|
1879
|
+ * The break means that we will still
|
|
1880
|
+ * check if the file contains a yEnc/binhex file
|
|
1881
|
+ */
|
1882
|
1882
|
break;
|
1883
|
1883
|
}
|
1884
|
1884
|
/*
|
1885
|
|
- * Build up a table of all of the parts of this
|
1886
|
|
- * multipart message. Remember, each part may itself
|
1887
|
|
- * be a multipart message.
|
1888
|
|
- */
|
|
1885
|
+ * Build up a table of all of the parts of this
|
|
1886
|
+ * multipart message. Remember, each part may itself
|
|
1887
|
+ * be a multipart message.
|
|
1888
|
+ */
|
1889
|
1889
|
inhead = 1;
|
1890
|
1890
|
inMimeHead = 0;
|
1891
|
1891
|
|
1892
|
1892
|
/*
|
1893
|
|
- * Re-read this variable in case mimeSubtype has changed
|
1894
|
|
- */
|
|
1893
|
+ * Re-read this variable in case mimeSubtype has changed
|
|
1894
|
+ */
|
1895
|
1895
|
subtype = tableFind(mctx->subtypeTable, mimeSubtype);
|
1896
|
1896
|
|
1897
|
1897
|
/*
|
1898
|
|
- * Parse the mainMessage object and create an array
|
1899
|
|
- * of objects called messages, one for each of the
|
1900
|
|
- * multiparts that mainMessage contains.
|
1901
|
|
- *
|
1902
|
|
- * This looks like parseEmailHeaders() - maybe there's
|
1903
|
|
- * some duplication of code to be cleaned up
|
1904
|
|
- *
|
1905
|
|
- * We may need to create an array rather than just
|
1906
|
|
- * save each part as it is found because not all
|
1907
|
|
- * elements will need scanning, and we don't yet know
|
1908
|
|
- * which of those elements it will be, except in
|
1909
|
|
- * the case of mixed, when all parts need to be scanned.
|
1910
|
|
- */
|
|
1898
|
+ * Parse the mainMessage object and create an array
|
|
1899
|
+ * of objects called messages, one for each of the
|
|
1900
|
+ * multiparts that mainMessage contains.
|
|
1901
|
+ *
|
|
1902
|
+ * This looks like parseEmailHeaders() - maybe there's
|
|
1903
|
+ * some duplication of code to be cleaned up
|
|
1904
|
+ *
|
|
1905
|
+ * We may need to create an array rather than just
|
|
1906
|
+ * save each part as it is found because not all
|
|
1907
|
+ * elements will need scanning, and we don't yet know
|
|
1908
|
+ * which of those elements it will be, except in
|
|
1909
|
+ * the case of mixed, when all parts need to be scanned.
|
|
1910
|
+ */
|
1911
|
1911
|
for (multiparts = 0; t_line && !infected; multiparts++) {
|
1912
|
1912
|
int lines = 0;
|
1913
|
1913
|
message **m;
|
...
|
...
|
@@ -1922,8 +1922,8 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
1922
|
1922
|
if (aMessage == NULL) {
|
1923
|
1923
|
multiparts--;
|
1924
|
1924
|
/* if allocation failed the first time,
|
1925
|
|
- * there's no point in retrying, just
|
1926
|
|
- * break out */
|
|
1925
|
+ * there's no point in retrying, just
|
|
1926
|
+ * break out */
|
1927
|
1927
|
break;
|
1928
|
1928
|
}
|
1929
|
1929
|
messageSetCTX(aMessage, mctx->ctx);
|
...
|
...
|
@@ -1931,9 +1931,9 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
1931
|
1931
|
cli_dbgmsg("Now read in part %d\n", multiparts);
|
1932
|
1932
|
|
1933
|
1933
|
/*
|
1934
|
|
- * Ignore blank lines. There shouldn't be ANY
|
1935
|
|
- * but some viruses insert them
|
1936
|
|
- */
|
|
1934
|
+ * Ignore blank lines. There shouldn't be ANY
|
|
1935
|
+ * but some viruses insert them
|
|
1936
|
+ */
|
1937
|
1937
|
while ((t_line = t_line->t_next) != NULL)
|
1938
|
1938
|
if (t_line->t_line &&
|
1939
|
1939
|
/*(cli_chomp(t_line->t_text) > 0))*/
|
...
|
...
|
@@ -1943,12 +1943,12 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
1943
|
1943
|
if (t_line == NULL) {
|
1944
|
1944
|
cli_dbgmsg("Empty part\n");
|
1945
|
1945
|
/*
|
1946
|
|
- * Remove this part unless there's
|
1947
|
|
- * a binhex portion somewhere in
|
1948
|
|
- * the complete message that we may
|
1949
|
|
- * throw away by mistake if the MIME
|
1950
|
|
- * encoding information is incorrect
|
1951
|
|
- */
|
|
1946
|
+ * Remove this part unless there's
|
|
1947
|
+ * a binhex portion somewhere in
|
|
1948
|
+ * the complete message that we may
|
|
1949
|
+ * throw away by mistake if the MIME
|
|
1950
|
+ * encoding information is incorrect
|
|
1951
|
+ */
|
1952
|
1952
|
if (mainMessage &&
|
1953
|
1953
|
(binhexBegin(mainMessage) == NULL)) {
|
1954
|
1954
|
messageDestroy(aMessage);
|
...
|
...
|
@@ -1960,9 +1960,11 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
1960
|
1960
|
do {
|
1961
|
1961
|
const char *line = lineGetData(t_line->t_line);
|
1962
|
1962
|
|
1963
|
|
- /*cli_dbgmsg("multipart %d: inMimeHead %d inhead %d boundary '%s' line '%s' next '%s'\n",
|
1964
|
|
- multiparts, inMimeHead, inhead, boundary, line,
|
1965
|
|
- t_line->t_next && t_line->t_next->t_line ? lineGetData(t_line->t_next->t_line) : "(null)");*/
|
|
1963
|
+ /*
|
|
1964
|
+ cli_dbgmsg("multipart %d: inMimeHead %d inhead %d boundary '%s' line '%s' next '%s'\n",
|
|
1965
|
+ multiparts, inMimeHead, inhead, boundary, line,
|
|
1966
|
+ t_line->t_next && t_line->t_next->t_line ? lineGetData(t_line->t_next->t_line) : "(null)");
|
|
1967
|
+ */
|
1966
|
1968
|
|
1967
|
1969
|
if (inMimeHead) { /* continuation line */
|
1968
|
1970
|
if (line == NULL) {
|
...
|
...
|
@@ -1970,20 +1972,20 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
1970
|
1970
|
continue;
|
1971
|
1971
|
}
|
1972
|
1972
|
/*
|
1973
|
|
- * Handle continuation lines
|
1974
|
|
- * because the previous line
|
1975
|
|
- * ended with a ; or this line
|
1976
|
|
- * starts with a white space
|
1977
|
|
- */
|
|
1973
|
+ * Handle continuation lines
|
|
1974
|
+ * because the previous line
|
|
1975
|
+ * ended with a ; or this line
|
|
1976
|
+ * starts with a white space
|
|
1977
|
+ */
|
1978
|
1978
|
cli_dbgmsg("Multipart %d: About to add mime Argument '%s'\n",
|
1979
|
1979
|
multiparts, line);
|
1980
|
1980
|
/*
|
1981
|
|
- * Handle the case when it
|
1982
|
|
- * isn't really a continuation
|
1983
|
|
- * line:
|
1984
|
|
- * Content-Type: application/octet-stream;
|
1985
|
|
- * Content-Transfer-Encoding: base64
|
1986
|
|
- */
|
|
1981
|
+ * Handle the case when it
|
|
1982
|
+ * isn't really a continuation
|
|
1983
|
+ * line:
|
|
1984
|
+ * Content-Type: application/octet-stream;
|
|
1985
|
+ * Content-Transfer-Encoding: base64
|
|
1986
|
+ */
|
1987
|
1987
|
parseEmailHeader(aMessage, line, mctx->rfc821Table, mctx->ctx, &heuristicFound);
|
1988
|
1988
|
if (heuristicFound) {
|
1989
|
1989
|
rc = VIRUS;
|
...
|
...
|
@@ -2005,22 +2007,22 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2005
|
2005
|
|
2006
|
2006
|
if (line == NULL) {
|
2007
|
2007
|
/*
|
2008
|
|
- * empty line, should the end of the headers,
|
2009
|
|
- * but some base64 decoders, e.g. uudeview, are broken
|
2010
|
|
- * and will handle this type of entry, decoding the
|
2011
|
|
- * base64 content...
|
2012
|
|
- * Content-Type: application/octet-stream; name=text.zip
|
2013
|
|
- * Content-Transfer-Encoding: base64
|
2014
|
|
- * Content-Disposition: attachment; filename="text.zip"
|
2015
|
|
- *
|
2016
|
|
- * Content-Disposition: attachment;
|
2017
|
|
- * filename=text.zip
|
2018
|
|
- * Content-Type: application/octet-stream;
|
2019
|
|
- * name=text.zip
|
2020
|
|
- * Content-Transfer-Encoding: base64
|
2021
|
|
- *
|
2022
|
|
- * UEsDBAoAAAAAAACgPjJ2RHw676gAAO+oAABEAAAAbWFpbF90ZXh0LWluZm8udHh0ICAgICAgICAg
|
2023
|
|
- */
|
|
2008
|
+ * empty line, should the end of the headers,
|
|
2009
|
+ * but some base64 decoders, e.g. uudeview, are broken
|
|
2010
|
+ * and will handle this type of entry, decoding the
|
|
2011
|
+ * base64 content...
|
|
2012
|
+ * Content-Type: application/octet-stream; name=text.zip
|
|
2013
|
+ * Content-Transfer-Encoding: base64
|
|
2014
|
+ * Content-Disposition: attachment; filename="text.zip"
|
|
2015
|
+ *
|
|
2016
|
+ * Content-Disposition: attachment;
|
|
2017
|
+ * filename=text.zip
|
|
2018
|
+ * Content-Type: application/octet-stream;
|
|
2019
|
+ * name=text.zip
|
|
2020
|
+ * Content-Transfer-Encoding: base64
|
|
2021
|
+ *
|
|
2022
|
+ * UEsDBAoAAAAAAACgPjJ2RHw676gAAO+oAABEAAAAbWFpbF90ZXh0LWluZm8udHh0ICAgICAgICAg
|
|
2023
|
+ */
|
2024
|
2024
|
const text *next = t_line->t_next;
|
2025
|
2025
|
|
2026
|
2026
|
if (next && next->t_line) {
|
...
|
...
|
@@ -2030,15 +2032,15 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2030
|
2030
|
(messageGetMimeType(aMessage) == APPLICATION) &&
|
2031
|
2031
|
data && strstr(data, "base64")) {
|
2032
|
2032
|
/*
|
2033
|
|
- * Handle this nightmare (note the blank
|
2034
|
|
- * line in the header and the incorrect
|
2035
|
|
- * content-transfer-encoding header)
|
2036
|
|
- *
|
2037
|
|
- * Content-Type: application/octet-stream; name="zipped_files.EXEX-Spanska: Yes
|
2038
|
|
- *
|
2039
|
|
- * r-Encoding: base64
|
2040
|
|
- * Content-Disposition: attachment; filename="zipped_files.EXE"
|
2041
|
|
- */
|
|
2033
|
+ * Handle this nightmare (note the blank
|
|
2034
|
+ * line in the header and the incorrect
|
|
2035
|
+ * content-transfer-encoding header)
|
|
2036
|
+ *
|
|
2037
|
+ * Content-Type: application/octet-stream; name="zipped_files.EXEX-Spanska: Yes
|
|
2038
|
+ *
|
|
2039
|
+ * r-Encoding: base64
|
|
2040
|
+ * Content-Disposition: attachment; filename="zipped_files.EXE"
|
|
2041
|
+ */
|
2042
|
2042
|
messageSetEncoding(aMessage, "base64");
|
2043
|
2043
|
cli_dbgmsg("Ignoring fake end of headers\n");
|
2044
|
2044
|
continue;
|
...
|
...
|
@@ -2056,25 +2058,25 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2056
|
2056
|
}
|
2057
|
2057
|
if (isspace((int)*line)) {
|
2058
|
2058
|
/*
|
2059
|
|
- * The first line is
|
2060
|
|
- * continuation line.
|
2061
|
|
- * This is tricky
|
2062
|
|
- * to handle, but
|
2063
|
|
- * all we can do is our
|
2064
|
|
- * best
|
2065
|
|
- */
|
|
2059
|
+ * The first line is
|
|
2060
|
+ * continuation line.
|
|
2061
|
+ * This is tricky
|
|
2062
|
+ * to handle, but
|
|
2063
|
+ * all we can do is our
|
|
2064
|
+ * best
|
|
2065
|
+ */
|
2066
|
2066
|
cli_dbgmsg("Part %d starts with a continuation line\n",
|
2067
|
2067
|
multiparts);
|
2068
|
2068
|
messageAddArgument(aMessage, line);
|
2069
|
2069
|
/*
|
2070
|
|
- * Give it a default
|
2071
|
|
- * MIME type since
|
2072
|
|
- * that may be the
|
2073
|
|
- * missing line
|
2074
|
|
- *
|
2075
|
|
- * Choose application to
|
2076
|
|
- * force a save
|
2077
|
|
- */
|
|
2070
|
+ * Give it a default
|
|
2071
|
+ * MIME type since
|
|
2072
|
+ * that may be the
|
|
2073
|
+ * missing line
|
|
2074
|
+ *
|
|
2075
|
+ * Choose application to
|
|
2076
|
+ * force a save
|
|
2077
|
+ */
|
2078
|
2078
|
if (messageGetMimeType(aMessage) == NOMIME)
|
2079
|
2079
|
messageSetMimeType(aMessage, "application");
|
2080
|
2080
|
continue;
|
...
|
...
|
@@ -2091,12 +2093,12 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2091
|
2091
|
/*quotes = count_quotes(fullline);*/
|
2092
|
2092
|
|
2093
|
2093
|
/*
|
2094
|
|
- * Fold next lines to the end of this
|
2095
|
|
- * if they start with a white space
|
2096
|
|
- * or if this line has an odd number of quotes:
|
2097
|
|
- * Content-Type: application/octet-stream; name="foo
|
2098
|
|
- * "
|
2099
|
|
- */
|
|
2094
|
+ * Fold next lines to the end of this
|
|
2095
|
+ * if they start with a white space
|
|
2096
|
+ * or if this line has an odd number of quotes:
|
|
2097
|
+ * Content-Type: application/octet-stream; name="foo
|
|
2098
|
+ * "
|
|
2099
|
+ */
|
2100
|
2100
|
while (t_line && next_is_folded_header(t_line)) {
|
2101
|
2101
|
const char *data;
|
2102
|
2102
|
size_t datasz;
|
...
|
...
|
@@ -2107,11 +2109,11 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2107
|
2107
|
|
2108
|
2108
|
if (data[1] == '\0') {
|
2109
|
2109
|
/*
|
2110
|
|
- * Broken message: the
|
2111
|
|
- * blank line at the end
|
2112
|
|
- * of the headers isn't blank -
|
2113
|
|
- * it contains a space
|
2114
|
|
- */
|
|
2110
|
+ * Broken message: the
|
|
2111
|
+ * blank line at the end
|
|
2112
|
+ * of the headers isn't blank -
|
|
2113
|
+ * it contains a space
|
|
2114
|
+ */
|
2115
|
2115
|
cli_dbgmsg("Multipart %d: headers not terminated by blank line\n",
|
2116
|
2116
|
multiparts);
|
2117
|
2117
|
inhead = FALSE;
|
...
|
...
|
@@ -2140,13 +2142,13 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2140
|
2140
|
}
|
2141
|
2141
|
} else if (boundaryEnd(line, boundary)) {
|
2142
|
2142
|
/*
|
2143
|
|
- * Some viruses put information
|
2144
|
|
- * *after* the end of message,
|
2145
|
|
- * which presumably some broken
|
2146
|
|
- * mail clients find, so we
|
2147
|
|
- * can't assume that this
|
2148
|
|
- * is the end of the message
|
2149
|
|
- */
|
|
2143
|
+ * Some viruses put information
|
|
2144
|
+ * *after* the end of message,
|
|
2145
|
+ * which presumably some broken
|
|
2146
|
+ * mail clients find, so we
|
|
2147
|
+ * can't assume that this
|
|
2148
|
+ * is the end of the message
|
|
2149
|
+ */
|
2150
|
2150
|
/* t_line = NULL;*/
|
2151
|
2151
|
break;
|
2152
|
2152
|
} else if (boundaryStart(line, boundary)) {
|
...
|
...
|
@@ -2163,11 +2165,11 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2163
|
2163
|
multiparts, lines, (int)rc);
|
2164
|
2164
|
|
2165
|
2165
|
/*
|
2166
|
|
- * Only save in the array of messages if some
|
2167
|
|
- * decision will be taken on whether to scan.
|
2168
|
|
- * If all parts will be scanned then save to
|
2169
|
|
- * file straight away
|
2170
|
|
- */
|
|
2166
|
+ * Only save in the array of messages if some
|
|
2167
|
+ * decision will be taken on whether to scan.
|
|
2168
|
+ * If all parts will be scanned then save to
|
|
2169
|
+ * file straight away
|
|
2170
|
+ */
|
2171
|
2171
|
switch (subtype) {
|
2172
|
2172
|
case MIXED:
|
2173
|
2173
|
case ALTERNATIVE:
|
...
|
...
|
@@ -2197,13 +2199,13 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2197
|
2197
|
case SIGNED:
|
2198
|
2198
|
case PARALLEL:
|
2199
|
2199
|
/* all the subtypes that we handle
|
2200
|
|
- * (all from the switch(tableFind...) below)
|
2201
|
|
- * must be listed here */
|
|
2200
|
+ * (all from the switch(tableFind...) below)
|
|
2201
|
+ * must be listed here */
|
2202
|
2202
|
break;
|
2203
|
2203
|
default:
|
2204
|
2204
|
/* this is a subtype that we
|
2205
|
|
- * don't handle anyway,
|
2206
|
|
- * don't store */
|
|
2205
|
+ * don't handle anyway,
|
|
2206
|
+ * don't store */
|
2207
|
2207
|
if (messages[multiparts]) {
|
2208
|
2208
|
messageDestroy(messages[multiparts]);
|
2209
|
2209
|
messages[multiparts] = NULL;
|
...
|
...
|
@@ -2220,9 +2222,9 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2220
|
2220
|
}
|
2221
|
2221
|
|
2222
|
2222
|
/*
|
2223
|
|
- * Preprocess. Anything special to be done before
|
2224
|
|
- * we handle the multiparts?
|
2225
|
|
- */
|
|
2223
|
+ * Preprocess. Anything special to be done before
|
|
2224
|
+ * we handle the multiparts?
|
|
2225
|
+ */
|
2226
|
2226
|
switch (subtype) {
|
2227
|
2227
|
case KNOWBOT:
|
2228
|
2228
|
/* TODO */
|
...
|
...
|
@@ -2231,18 +2233,18 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2231
|
2231
|
break;
|
2232
|
2232
|
case -1:
|
2233
|
2233
|
/*
|
2234
|
|
- * According to section 7.2.6 of
|
2235
|
|
- * RFC1521, unrecognized multiparts
|
2236
|
|
- * should be treated as multipart/mixed.
|
2237
|
|
- */
|
|
2234
|
+ * According to section 7.2.6 of
|
|
2235
|
+ * RFC1521, unrecognized multiparts
|
|
2236
|
+ * should be treated as multipart/mixed.
|
|
2237
|
+ */
|
2238
|
2238
|
cli_dbgmsg("Unsupported multipart format `%s', parsed as mixed\n", mimeSubtype);
|
2239
|
2239
|
mimeSubtype = "mixed";
|
2240
|
2240
|
break;
|
2241
|
2241
|
}
|
2242
|
2242
|
|
2243
|
2243
|
/*
|
2244
|
|
- * We've finished message we're parsing
|
2245
|
|
- */
|
|
2244
|
+ * We've finished message we're parsing
|
|
2245
|
+ */
|
2246
|
2246
|
if (mainMessage && (mainMessage != messageIn)) {
|
2247
|
2247
|
messageDestroy(mainMessage);
|
2248
|
2248
|
mainMessage = NULL;
|
...
|
...
|
@@ -2264,8 +2266,8 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2264
|
2264
|
mctx->wrkobj = saveobj;
|
2265
|
2265
|
#endif
|
2266
|
2266
|
/*
|
2267
|
|
- * Nothing to do
|
2268
|
|
- */
|
|
2267
|
+ * Nothing to do
|
|
2268
|
+ */
|
2269
|
2269
|
switch (rc) {
|
2270
|
2270
|
case VIRUS:
|
2271
|
2271
|
return VIRUS;
|
...
|
...
|
@@ -2279,18 +2281,18 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2279
|
2279
|
cli_dbgmsg("Find out the multipart type (%s)\n", mimeSubtype);
|
2280
|
2280
|
|
2281
|
2281
|
/*
|
2282
|
|
- * We now have all the parts of the multipart message
|
2283
|
|
- * in the messages array:
|
2284
|
|
- * message *messages[multiparts]
|
2285
|
|
- * Let's decide what to do with them all
|
2286
|
|
- */
|
|
2282
|
+ * We now have all the parts of the multipart message
|
|
2283
|
+ * in the messages array:
|
|
2284
|
+ * message *messages[multiparts]
|
|
2285
|
+ * Let's decide what to do with them all
|
|
2286
|
+ */
|
2287
|
2287
|
switch (tableFind(mctx->subtypeTable, mimeSubtype)) {
|
2288
|
2288
|
case RELATED:
|
2289
|
2289
|
cli_dbgmsg("Multipart related handler\n");
|
2290
|
2290
|
/*
|
2291
|
|
- * Have a look to see if there's HTML code
|
2292
|
|
- * which will need scanning
|
2293
|
|
- */
|
|
2291
|
+ * Have a look to see if there's HTML code
|
|
2292
|
+ * which will need scanning
|
|
2293
|
+ */
|
2294
|
2294
|
aMessage = NULL;
|
2295
|
2295
|
assert(multiparts > 0);
|
2296
|
2296
|
|
...
|
...
|
@@ -2303,10 +2305,10 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2303
|
2303
|
}
|
2304
|
2304
|
} else {
|
2305
|
2305
|
/*
|
2306
|
|
- * There isn't an HTML bit. If there's a
|
2307
|
|
- * multipart bit, it'll may be in there
|
2308
|
|
- * somewhere
|
2309
|
|
- */
|
|
2306
|
+ * There isn't an HTML bit. If there's a
|
|
2307
|
+ * multipart bit, it'll may be in there
|
|
2308
|
+ * somewhere
|
|
2309
|
+ */
|
2310
|
2310
|
for (i = 0; i < multiparts; i++) {
|
2311
|
2311
|
if (messageGetMimeType(messages[i]) == MULTIPART) {
|
2312
|
2312
|
aMessage = messages[i];
|
...
|
...
|
@@ -2336,58 +2338,58 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2336
|
2336
|
}
|
2337
|
2337
|
|
2338
|
2338
|
/*
|
2339
|
|
- * The message is confused about the difference
|
2340
|
|
- * between alternative and related. Badtrans.B
|
2341
|
|
- * suffers from this problem.
|
2342
|
|
- *
|
2343
|
|
- * Fall through in this case:
|
2344
|
|
- * Content-Type: multipart/related;
|
2345
|
|
- * type="multipart/alternative"
|
2346
|
|
- */
|
|
2339
|
+ * The message is confused about the difference
|
|
2340
|
+ * between alternative and related. Badtrans.B
|
|
2341
|
+ * suffers from this problem.
|
|
2342
|
+ *
|
|
2343
|
+ * Fall through in this case:
|
|
2344
|
+ * Content-Type: multipart/related;
|
|
2345
|
+ * type="multipart/alternative"
|
|
2346
|
+ */
|
2347
|
2347
|
/* fall through */
|
2348
|
2348
|
case DIGEST:
|
2349
|
2349
|
/*
|
2350
|
|
- * According to section 5.1.5 RFC2046, the
|
2351
|
|
- * default mime type of multipart/digest parts
|
2352
|
|
- * is message/rfc822
|
2353
|
|
- *
|
2354
|
|
- * We consider them as alternative, wrong in
|
2355
|
|
- * the strictest sense since they aren't
|
2356
|
|
- * alternatives - all parts a valid - but it's
|
2357
|
|
- * OK for our needs since it means each part
|
2358
|
|
- * will be scanned
|
2359
|
|
- */
|
|
2350
|
+ * According to section 5.1.5 RFC2046, the
|
|
2351
|
+ * default mime type of multipart/digest parts
|
|
2352
|
+ * is message/rfc822
|
|
2353
|
+ *
|
|
2354
|
+ * We consider them as alternative, wrong in
|
|
2355
|
+ * the strictest sense since they aren't
|
|
2356
|
+ * alternatives - all parts a valid - but it's
|
|
2357
|
+ * OK for our needs since it means each part
|
|
2358
|
+ * will be scanned
|
|
2359
|
+ */
|
2360
|
2360
|
case ALTERNATIVE:
|
2361
|
2361
|
cli_dbgmsg("Multipart alternative handler\n");
|
2362
|
2362
|
|
2363
|
2363
|
/*
|
2364
|
|
- * Fall through - some clients are broken and
|
2365
|
|
- * say alternative instead of mixed. The Klez
|
2366
|
|
- * virus is broken that way, and anyway we
|
2367
|
|
- * wish to scan all of the alternatives
|
2368
|
|
- */
|
|
2364
|
+ * Fall through - some clients are broken and
|
|
2365
|
+ * say alternative instead of mixed. The Klez
|
|
2366
|
+ * virus is broken that way, and anyway we
|
|
2367
|
+ * wish to scan all of the alternatives
|
|
2368
|
+ */
|
2369
|
2369
|
/* fall through */
|
2370
|
2370
|
case REPORT:
|
2371
|
2371
|
/*
|
2372
|
|
- * According to section 1 of RFC1892, the
|
2373
|
|
- * syntax of multipart/report is the same
|
2374
|
|
- * as multipart/mixed. There are some required
|
2375
|
|
- * parameters, but there's no need for us to
|
2376
|
|
- * verify that they exist
|
2377
|
|
- */
|
|
2372
|
+ * According to section 1 of RFC1892, the
|
|
2373
|
+ * syntax of multipart/report is the same
|
|
2374
|
+ * as multipart/mixed. There are some required
|
|
2375
|
+ * parameters, but there's no need for us to
|
|
2376
|
+ * verify that they exist
|
|
2377
|
+ */
|
2378
|
2378
|
case ENCRYPTED:
|
2379
|
2379
|
/* MUAs without encryption plugins can display as multipart/mixed,
|
2380
|
|
- * just scan it*/
|
|
2380
|
+ * just scan it*/
|
2381
|
2381
|
case MIXED:
|
2382
|
2382
|
case APPLEDOUBLE: /* not really supported */
|
2383
|
2383
|
/*
|
2384
|
|
- * Look for attachments
|
2385
|
|
- *
|
2386
|
|
- * Not all formats are supported. If an
|
2387
|
|
- * unsupported format turns out to be
|
2388
|
|
- * common enough to implement, it is a simple
|
2389
|
|
- * matter to add it
|
2390
|
|
- */
|
|
2384
|
+ * Look for attachments
|
|
2385
|
+ *
|
|
2386
|
+ * Not all formats are supported. If an
|
|
2387
|
+ * unsupported format turns out to be
|
|
2388
|
+ * common enough to implement, it is a simple
|
|
2389
|
+ * matter to add it
|
|
2390
|
+ */
|
2391
|
2391
|
if (aText) {
|
2392
|
2392
|
if (mainMessage && (mainMessage != messageIn))
|
2393
|
2393
|
messageDestroy(mainMessage);
|
...
|
...
|
@@ -2414,13 +2416,13 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2414
|
2414
|
case SIGNED:
|
2415
|
2415
|
case PARALLEL:
|
2416
|
2416
|
/*
|
2417
|
|
- * If we're here it could be because we have a
|
2418
|
|
- * multipart/mixed message, consisting of a
|
2419
|
|
- * message followed by an attachment. That
|
2420
|
|
- * message itself is a multipart/alternative
|
2421
|
|
- * message and we need to dig out the plain
|
2422
|
|
- * text part of that alternative
|
2423
|
|
- */
|
|
2417
|
+ * If we're here it could be because we have a
|
|
2418
|
+ * multipart/mixed message, consisting of a
|
|
2419
|
+ * message followed by an attachment. That
|
|
2420
|
+ * message itself is a multipart/alternative
|
|
2421
|
+ * message and we need to dig out the plain
|
|
2422
|
+ * text part of that alternative
|
|
2423
|
+ */
|
2424
|
2424
|
if (messages) {
|
2425
|
2425
|
htmltextPart = getTextPart(messages, multiparts);
|
2426
|
2426
|
if (htmltextPart == -1)
|
...
|
...
|
@@ -2463,8 +2465,8 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2463
|
2463
|
|
2464
|
2464
|
case MESSAGE:
|
2465
|
2465
|
/*
|
2466
|
|
- * Check for forbidden encodings
|
2467
|
|
- */
|
|
2466
|
+ * Check for forbidden encodings
|
|
2467
|
+ */
|
2468
|
2468
|
switch (messageGetEncoding(mainMessage)) {
|
2469
|
2469
|
case NOENCODING:
|
2470
|
2470
|
case EIGHTBIT:
|
...
|
...
|
@@ -2526,18 +2528,18 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2526
|
2526
|
default:
|
2527
|
2527
|
cli_dbgmsg("Message received with unknown mime encoding - assume application\n");
|
2528
|
2528
|
/*
|
2529
|
|
- * Some Yahoo emails attach as
|
2530
|
|
- * Content-Type: X-unknown/unknown;
|
2531
|
|
- * instead of
|
2532
|
|
- * Content-Type: application/unknown;
|
2533
|
|
- * so let's try our best to salvage something
|
2534
|
|
- */
|
|
2529
|
+ * Some Yahoo emails attach as
|
|
2530
|
+ * Content-Type: X-unknown/unknown;
|
|
2531
|
+ * instead of
|
|
2532
|
+ * Content-Type: application/unknown;
|
|
2533
|
+ * so let's try our best to salvage something
|
|
2534
|
+ */
|
2535
|
2535
|
/* fall through */
|
2536
|
2536
|
case APPLICATION:
|
2537
|
2537
|
/*cptr = messageGetMimeSubtype(mainMessage);
|
2538
|
2538
|
|
2539
|
|
- if((strcasecmp(cptr, "octet-stream") == 0) ||
|
2540
|
|
- (strcasecmp(cptr, "x-msdownload") == 0)) {*/
|
|
2539
|
+ if((strcasecmp(cptr, "octet-stream") == 0) ||
|
|
2540
|
+ (strcasecmp(cptr, "x-msdownload") == 0)) {*/
|
2541
|
2541
|
{
|
2542
|
2542
|
fb = messageToFileblob(mainMessage, mctx->dir, 1);
|
2543
|
2543
|
|
...
|
...
|
@@ -2553,7 +2555,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2553
|
2553
|
messageReset(mainMessage);
|
2554
|
2554
|
}
|
2555
|
2555
|
} /*else
|
2556
|
|
- cli_warnmsg("Discarded application not sent as attachment\n");*/
|
|
2556
|
+ cli_warnmsg("Discarded application not sent as attachment\n");*/
|
2557
|
2557
|
break;
|
2558
|
2558
|
|
2559
|
2559
|
case AUDIO:
|
...
|
...
|
@@ -2602,12 +2604,12 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2602
|
2602
|
break;
|
2603
|
2603
|
|
2604
|
2604
|
/*
|
2605
|
|
- * We've found what looks like the start of a bounce
|
2606
|
|
- * message. Only bother saving if it really is a bounce
|
2607
|
|
- * message, this helps to speed up scanning of ping-pong
|
2608
|
|
- * messages that have lots of bounces within bounces in
|
2609
|
|
- * them
|
2610
|
|
- */
|
|
2605
|
+ * We've found what looks like the start of a bounce
|
|
2606
|
+ * message. Only bother saving if it really is a bounce
|
|
2607
|
+ * message, this helps to speed up scanning of ping-pong
|
|
2608
|
+ * messages that have lots of bounces within bounces in
|
|
2609
|
+ * them
|
|
2610
|
+ */
|
2611
|
2611
|
for (; lookahead; lookahead = lookahead->t_next) {
|
2612
|
2612
|
l = lookahead->t_line;
|
2613
|
2613
|
|
...
|
...
|
@@ -2616,17 +2618,17 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2616
|
2616
|
s = lineGetData(l);
|
2617
|
2617
|
if (strncasecmp(s, "Content-Type:", 13) == 0) {
|
2618
|
2618
|
/*
|
2619
|
|
- * Don't bother with text/plain or
|
2620
|
|
- * text/html
|
2621
|
|
- */
|
|
2619
|
+ * Don't bother with text/plain or
|
|
2620
|
+ * text/html
|
|
2621
|
+ */
|
2622
|
2622
|
if (CLI_STRCASESTR(s, "text/plain") != NULL)
|
2623
|
2623
|
/*
|
2624
|
|
- * Don't bother to save the
|
2625
|
|
- * unuseful part, read past
|
2626
|
|
- * the headers then we'll go
|
2627
|
|
- * on to look for the next
|
2628
|
|
- * bounce message
|
2629
|
|
- */
|
|
2624
|
+ * Don't bother to save the
|
|
2625
|
+ * unuseful part, read past
|
|
2626
|
+ * the headers then we'll go
|
|
2627
|
+ * on to look for the next
|
|
2628
|
+ * bounce message
|
|
2629
|
+ */
|
2630
|
2630
|
continue;
|
2631
|
2631
|
if ((!doPhishingScan) &&
|
2632
|
2632
|
(CLI_STRCASESTR(s, "text/html") != NULL))
|
...
|
...
|
@@ -2643,14 +2645,14 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2643
|
2643
|
}
|
2644
|
2644
|
|
2645
|
2645
|
/*
|
2646
|
|
- * Prescan the bounce message to see if there's likely
|
2647
|
|
- * to be anything nasty.
|
2648
|
|
- * This algorithm is hand crafted and may be breakable
|
2649
|
|
- * so all submissions are welcome. It's best NOT to
|
2650
|
|
- * remove this however you may be tempted, because it
|
2651
|
|
- * significantly speeds up the scanning of multiple
|
2652
|
|
- * bounces (i.e. bounces within many bounces)
|
2653
|
|
- */
|
|
2646
|
+ * Prescan the bounce message to see if there's likely
|
|
2647
|
+ * to be anything nasty.
|
|
2648
|
+ * This algorithm is hand crafted and may be breakable
|
|
2649
|
+ * so all submissions are welcome. It's best NOT to
|
|
2650
|
+ * remove this however you may be tempted, because it
|
|
2651
|
+ * significantly speeds up the scanning of multiple
|
|
2652
|
+ * bounces (i.e. bounces within many bounces)
|
|
2653
|
+ */
|
2654
|
2654
|
for (; lookahead; lookahead = lookahead->t_next) {
|
2655
|
2655
|
l = lookahead->t_line;
|
2656
|
2656
|
|
...
|
...
|
@@ -2717,15 +2719,15 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2717
|
2717
|
}
|
2718
|
2718
|
|
2719
|
2719
|
/*
|
2720
|
|
- * No attachments - scan the text portions, often files
|
2721
|
|
- * are hidden in HTML code
|
2722
|
|
- */
|
|
2720
|
+ * No attachments - scan the text portions, often files
|
|
2721
|
+ * are hidden in HTML code
|
|
2722
|
+ */
|
2723
|
2723
|
if (mainMessage && (rc != VIRUS)) {
|
2724
|
2724
|
text *t_line;
|
2725
|
2725
|
|
2726
|
2726
|
/*
|
2727
|
|
- * Look for uu-encoded main file
|
2728
|
|
- */
|
|
2727
|
+ * Look for uu-encoded main file
|
|
2728
|
+ */
|
2729
|
2729
|
if (mainMessage->body_first != NULL &&
|
2730
|
2730
|
(encodingLine(mainMessage) != NULL) &&
|
2731
|
2731
|
((t_line = bounceBegin(mainMessage)) != NULL))
|
...
|
...
|
@@ -2735,20 +2737,20 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2735
|
2735
|
|
2736
|
2736
|
if (messageGetMimeType(mainMessage) == MESSAGE)
|
2737
|
2737
|
/*
|
2738
|
|
- * Quick peek, if the encapsulated
|
2739
|
|
- * message has no
|
2740
|
|
- * content encoding statement don't
|
2741
|
|
- * bother saving to scan, it's safe
|
2742
|
|
- */
|
|
2738
|
+ * Quick peek, if the encapsulated
|
|
2739
|
+ * message has no
|
|
2740
|
+ * content encoding statement don't
|
|
2741
|
+ * bother saving to scan, it's safe
|
|
2742
|
+ */
|
2743
|
2743
|
saveIt = (bool)(encodingLine(mainMessage) != NULL);
|
2744
|
2744
|
else if (mainMessage->body_last != NULL && (t_line = encodingLine(mainMessage)) != NULL) {
|
2745
|
2745
|
/*
|
2746
|
|
- * Some bounces include the message
|
2747
|
|
- * body without the headers.
|
2748
|
|
- * FIXME: Unfortunately this generates a
|
2749
|
|
- * lot of false positives that a bounce
|
2750
|
|
- * has been found when it hasn't.
|
2751
|
|
- */
|
|
2746
|
+ * Some bounces include the message
|
|
2747
|
+ * body without the headers.
|
|
2748
|
+ * FIXME: Unfortunately this generates a
|
|
2749
|
+ * lot of false positives that a bounce
|
|
2750
|
+ * has been found when it hasn't.
|
|
2751
|
+ */
|
2752
|
2752
|
if ((fb = fileblobCreate()) != NULL) {
|
2753
|
2753
|
cli_dbgmsg("Found a bounce message with no header at '%s'\n",
|
2754
|
2754
|
lineGetData(t_line->t_line));
|
...
|
...
|
@@ -2765,10 +2767,10 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2765
|
2765
|
saveIt = FALSE;
|
2766
|
2766
|
} else
|
2767
|
2767
|
/*
|
2768
|
|
- * Save the entire text portion,
|
2769
|
|
- * since it it may be an HTML file with
|
2770
|
|
- * a JavaScript virus or a phish
|
2771
|
|
- */
|
|
2768
|
+ * Save the entire text portion,
|
|
2769
|
+ * since it it may be an HTML file with
|
|
2770
|
+ * a JavaScript virus or a phish
|
|
2771
|
+ */
|
2772
|
2772
|
saveIt = TRUE;
|
2773
|
2773
|
|
2774
|
2774
|
if (saveIt) {
|
...
|
...
|
@@ -2785,7 +2787,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
|
2785
|
2785
|
}
|
2786
|
2786
|
}
|
2787
|
2787
|
} /*else
|
2788
|
|
- rc = OK_ATTACHMENTS_NOT_SAVED; */
|
|
2788
|
+ rc = OK_ATTACHMENTS_NOT_SAVED; */
|
2789
|
2789
|
/* nothing saved */
|
2790
|
2790
|
|
2791
|
2791
|
if (mainMessage && (mainMessage != messageIn))
|
...
|
...
|
@@ -2870,33 +2872,33 @@ boundaryStart(const char *line, const char *boundary)
|
2870
|
2870
|
}
|
2871
|
2871
|
|
2872
|
2872
|
/*
|
2873
|
|
- * Gibe.B3 is broken, it has:
|
2874
|
|
- * boundary="---- =_NextPart_000_01C31177.9DC7C000"
|
2875
|
|
- * but it's boundaries look like
|
2876
|
|
- * ------ =_NextPart_000_01C31177.9DC7C000
|
2877
|
|
- * notice the one too few '-'.
|
2878
|
|
- * Presumably this is a deliberate exploitation of a bug in some mail
|
2879
|
|
- * clients.
|
2880
|
|
- *
|
2881
|
|
- * The trouble is that this creates a lot of false positives for
|
2882
|
|
- * boundary conditions, if we're too lax about matches. We do our level
|
2883
|
|
- * best to avoid these false positives. For example if we have
|
2884
|
|
- * boundary="1" we want to ensure that we don't break out of every line
|
2885
|
|
- * that has -1 in it instead of starting --1. This needs some more work.
|
2886
|
|
- *
|
2887
|
|
- * Look with and without RFC822 comments stripped, I've seen some
|
2888
|
|
- * samples where () are taken as comments in boundaries and some where
|
2889
|
|
- * they're not. Irrespective of whatever RFC2822 says, we need to find
|
2890
|
|
- * viruses in both types of mails.
|
2891
|
|
- */
|
|
2873
|
+ * Gibe.B3 is broken, it has:
|
|
2874
|
+ * boundary="---- =_NextPart_000_01C31177.9DC7C000"
|
|
2875
|
+ * but it's boundaries look like
|
|
2876
|
+ * ------ =_NextPart_000_01C31177.9DC7C000
|
|
2877
|
+ * notice the one too few '-'.
|
|
2878
|
+ * Presumably this is a deliberate exploitation of a bug in some mail
|
|
2879
|
+ * clients.
|
|
2880
|
+ *
|
|
2881
|
+ * The trouble is that this creates a lot of false positives for
|
|
2882
|
+ * boundary conditions, if we're too lax about matches. We do our level
|
|
2883
|
+ * best to avoid these false positives. For example if we have
|
|
2884
|
+ * boundary="1" we want to ensure that we don't break out of every line
|
|
2885
|
+ * that has -1 in it instead of starting --1. This needs some more work.
|
|
2886
|
+ *
|
|
2887
|
+ * Look with and without RFC822 comments stripped, I've seen some
|
|
2888
|
+ * samples where () are taken as comments in boundaries and some where
|
|
2889
|
+ * they're not. Irrespective of whatever RFC2822 says, we need to find
|
|
2890
|
+ * viruses in both types of mails.
|
|
2891
|
+ */
|
2892
|
2892
|
if ((strstr(&ptr[1], boundary) != NULL) || (strstr(newline, boundary) != NULL)) {
|
2893
|
2893
|
const char *k = ptr;
|
2894
|
2894
|
|
2895
|
2895
|
/*
|
2896
|
|
- * We need to ensure that we don't match --11=-=-=11 when
|
2897
|
|
- * looking for --1=-=-=1 in well behaved headers, that's a
|
2898
|
|
- * false positive problem mentioned above
|
2899
|
|
- */
|
|
2896
|
+ * We need to ensure that we don't match --11=-=-=11 when
|
|
2897
|
+ * looking for --1=-=-=1 in well behaved headers, that's a
|
|
2898
|
+ * false positive problem mentioned above
|
|
2899
|
+ */
|
2900
|
2900
|
rc = 0;
|
2901
|
2901
|
do
|
2902
|
2902
|
if (strcmp(++k, boundary) == 0) {
|
...
|
...
|
@@ -2980,9 +2982,9 @@ boundaryEnd(const char *line, const char *boundary)
|
2980
|
2980
|
return 0;
|
2981
|
2981
|
}
|
2982
|
2982
|
/*
|
2983
|
|
- * Use < rather than == because some broken mails have white
|
2984
|
|
- * space after the boundary
|
2985
|
|
- */
|
|
2983
|
+ * Use < rather than == because some broken mails have white
|
|
2984
|
+ * space after the boundary
|
|
2985
|
+ */
|
2986
|
2986
|
if (strlen(p) < (len + 2)) {
|
2987
|
2987
|
if (newline != line)
|
2988
|
2988
|
free(newline);
|
...
|
...
|
@@ -3021,8 +3023,8 @@ initialiseTables(table_t **rfc821Table, table_t **subtypeTable)
|
3021
|
3021
|
const struct tableinit *tableinit;
|
3022
|
3022
|
|
3023
|
3023
|
/*
|
3024
|
|
- * Initialise the various look up tables
|
3025
|
|
- */
|
|
3024
|
+ * Initialise the various look up tables
|
|
3025
|
+ */
|
3026
|
3026
|
*rfc821Table = tableCreate();
|
3027
|
3027
|
assert(*rfc821Table != NULL);
|
3028
|
3028
|
|
...
|
...
|
@@ -3074,7 +3076,7 @@ getTextPart(message *const messages[], size_t size)
|
3074
|
3074
|
|
3075
|
3075
|
/*
|
3076
|
3076
|
* strip -
|
3077
|
|
- * Remove the trailing spaces from a buffer. Don't call this directly,
|
|
3077
|
+ * Remove the trailing spaces from a buffer. Don't call this directly,
|
3078
|
3078
|
* always call strstrip() which is a wrapper to this routine to be used with
|
3079
|
3079
|
* NUL terminated strings. This code looks a bit strange because of it's
|
3080
|
3080
|
* heritage from code that worked on strings that weren't necessarily NUL
|
...
|
...
|
@@ -3084,7 +3086,7 @@ getTextPart(message *const messages[], size_t size)
|
3084
|
3084
|
* Returns it's new length (a la strlen)
|
3085
|
3085
|
*
|
3086
|
3086
|
* len must be int not size_t because of the >= 0 test, it is sizeof(buf)
|
3087
|
|
- * not strlen(buf)
|
|
3087
|
+ * not strlen(buf)
|
3088
|
3088
|
*/
|
3089
|
3089
|
static size_t
|
3090
|
3090
|
strip(char *buf, int len)
|
...
|
...
|
@@ -3118,7 +3120,7 @@ strip(char *buf, int len)
|
3118
|
3118
|
|
3119
|
3119
|
/*
|
3120
|
3120
|
* strstrip:
|
3121
|
|
- * Strip a given string
|
|
3121
|
+ * Strip a given string
|
3122
|
3122
|
*/
|
3123
|
3123
|
size_t
|
3124
|
3124
|
strstrip(char *s)
|
...
|
...
|
@@ -3165,29 +3167,29 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
|
3165
|
3165
|
switch (commandNumber) {
|
3166
|
3166
|
case CONTENT_TYPE:
|
3167
|
3167
|
/*
|
3168
|
|
- * Fix for non RFC1521 compliant mailers
|
3169
|
|
- * that send content-type: Text instead
|
3170
|
|
- * of content-type: Text/Plain, or
|
3171
|
|
- * just simply "Content-Type:"
|
3172
|
|
- */
|
|
3168
|
+ * Fix for non RFC1521 compliant mailers
|
|
3169
|
+ * that send content-type: Text instead
|
|
3170
|
+ * of content-type: Text/Plain, or
|
|
3171
|
+ * just simply "Content-Type:"
|
|
3172
|
+ */
|
3173
|
3173
|
if (arg == NULL)
|
3174
|
3174
|
/*
|
3175
|
|
- * According to section 4 of RFC1521:
|
3176
|
|
- * "Note also that a subtype specification is
|
3177
|
|
- * MANDATORY. There are no default subtypes"
|
3178
|
|
- *
|
3179
|
|
- * We have to break this and make an assumption
|
3180
|
|
- * for the subtype because virus writers and
|
3181
|
|
- * email client writers don't get it right
|
3182
|
|
- */
|
|
3175
|
+ * According to section 4 of RFC1521:
|
|
3176
|
+ * "Note also that a subtype specification is
|
|
3177
|
+ * MANDATORY. There are no default subtypes"
|
|
3178
|
+ *
|
|
3179
|
+ * We have to break this and make an assumption
|
|
3180
|
+ * for the subtype because virus writers and
|
|
3181
|
+ * email client writers don't get it right
|
|
3182
|
+ */
|
3183
|
3183
|
cli_dbgmsg("Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii\n");
|
3184
|
3184
|
else if (strchr(ptr, '/') == NULL)
|
3185
|
3185
|
/*
|
3186
|
|
- * Empty field, such as
|
3187
|
|
- * Content-Type:
|
3188
|
|
- * which I believe is illegal according to
|
3189
|
|
- * RFC1521
|
3190
|
|
- */
|
|
3186
|
+ * Empty field, such as
|
|
3187
|
+ * Content-Type:
|
|
3188
|
+ * which I believe is illegal according to
|
|
3189
|
+ * RFC1521
|
|
3190
|
+ */
|
3191
|
3191
|
cli_dbgmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", ptr);
|
3192
|
3192
|
else {
|
3193
|
3193
|
int i;
|
...
|
...
|
@@ -3200,21 +3202,21 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
|
3200
|
3200
|
return -1;
|
3201
|
3201
|
}
|
3202
|
3202
|
/*
|
3203
|
|
- * Some clients are broken and
|
3204
|
|
- * put white space after the ;
|
3205
|
|
- */
|
|
3203
|
+ * Some clients are broken and
|
|
3204
|
+ * put white space after the ;
|
|
3205
|
+ */
|
3206
|
3206
|
if (*arg == '/') {
|
3207
|
3207
|
cli_dbgmsg("Content-type '/' received, assuming application/octet-stream\n");
|
3208
|
3208
|
messageSetMimeType(m, "application");
|
3209
|
3209
|
messageSetMimeSubtype(m, "octet-stream");
|
3210
|
3210
|
} else {
|
3211
|
3211
|
/*
|
3212
|
|
- * The content type could be in quotes:
|
3213
|
|
- * Content-Type: "multipart/mixed"
|
3214
|
|
- * FIXME: this is a hack in that ignores
|
3215
|
|
- * the quotes, it doesn't handle
|
3216
|
|
- * them properly
|
3217
|
|
- */
|
|
3212
|
+ * The content type could be in quotes:
|
|
3213
|
+ * Content-Type: "multipart/mixed"
|
|
3214
|
+ * FIXME: this is a hack in that ignores
|
|
3215
|
+ * the quotes, it doesn't handle
|
|
3216
|
+ * them properly
|
|
3217
|
+ */
|
3218
|
3218
|
while (isspace(*ptr))
|
3219
|
3219
|
ptr++;
|
3220
|
3220
|
if (ptr[0] == '\"')
|
...
|
...
|
@@ -3228,11 +3230,11 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
|
3228
|
3228
|
|
3229
|
3229
|
s = cli_strtokbuf(ptr, 0, ";", buf);
|
3230
|
3230
|
/*
|
3231
|
|
- * Handle
|
3232
|
|
- * Content-Type: foo/bar multipart/mixed
|
3233
|
|
- * and
|
3234
|
|
- * Content-Type: multipart/mixed foo/bar
|
3235
|
|
- */
|
|
3231
|
+ * Handle
|
|
3232
|
+ * Content-Type: foo/bar multipart/mixed
|
|
3233
|
+ * and
|
|
3234
|
+ * Content-Type: multipart/mixed foo/bar
|
|
3235
|
+ */
|
3236
|
3236
|
if (s && *s) {
|
3237
|
3237
|
char *buf2 = cli_strdup(buf);
|
3238
|
3238
|
|
...
|
...
|
@@ -3284,11 +3286,11 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
|
3284
|
3284
|
}
|
3285
|
3285
|
|
3286
|
3286
|
/*
|
3287
|
|
- * Add in all rest of the the arguments.
|
3288
|
|
- * e.g. if the header is this:
|
3289
|
|
- * Content-Type:', arg='multipart/mixed; boundary=foo
|
3290
|
|
- * we find the boundary argument set it
|
3291
|
|
- */
|
|
3287
|
+ * Add in all rest of the the arguments.
|
|
3288
|
+ * e.g. if the header is this:
|
|
3289
|
+ * Content-Type:', arg='multipart/mixed; boundary=foo
|
|
3290
|
+ * we find the boundary argument set it
|
|
3291
|
+ */
|
3292
|
3292
|
i = 1;
|
3293
|
3293
|
while (cli_strtokbuf(ptr, i++, ";", buf) != NULL) {
|
3294
|
3294
|
cli_dbgmsg("mimeArgs = '%s'\n", buf);
|
...
|
...
|
@@ -3319,12 +3321,12 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
|
3319
|
3319
|
}
|
3320
|
3320
|
if (!messageHasFilename(m))
|
3321
|
3321
|
/*
|
3322
|
|
- * Handle this type of header, without
|
3323
|
|
- * a filename (e.g. some Worm.Torvil.D)
|
3324
|
|
- * Content-ID: <nRfkHdrKsAxRU>
|
3325
|
|
- * Content-Transfer-Encoding: base64
|
3326
|
|
- * Content-Disposition: attachment
|
3327
|
|
- */
|
|
3322
|
+ * Handle this type of header, without
|
|
3323
|
+ * a filename (e.g. some Worm.Torvil.D)
|
|
3324
|
+ * Content-ID: <nRfkHdrKsAxRU>
|
|
3325
|
+ * Content-Transfer-Encoding: base64
|
|
3326
|
+ * Content-Disposition: attachment
|
|
3327
|
+ */
|
3328
|
3328
|
messageAddArgument(m, "filename=unknown");
|
3329
|
3329
|
}
|
3330
|
3330
|
if (copy)
|
...
|
...
|
@@ -3346,8 +3348,8 @@ saveTextPart(mbox_ctx *mctx, message *m, int destroy_text)
|
3346
|
3346
|
messageAddArgument(m, "filename=textportion");
|
3347
|
3347
|
if ((fb = messageToFileblob(m, mctx->dir, destroy_text)) != NULL) {
|
3348
|
3348
|
/*
|
3349
|
|
- * Save main part to scan that
|
3350
|
|
- */
|
|
3349
|
+ * Save main part to scan that
|
|
3350
|
+ */
|
3351
|
3351
|
cli_dbgmsg("Saving main message\n");
|
3352
|
3352
|
|
3353
|
3353
|
mctx->files++;
|
...
|
...
|
@@ -3359,7 +3361,7 @@ saveTextPart(mbox_ctx *mctx, message *m, int destroy_text)
|
3359
|
3359
|
/*
|
3360
|
3360
|
* Handle RFC822 comments in headers.
|
3361
|
3361
|
* If out == NULL, return a buffer without the comments, the caller must free
|
3362
|
|
- * the returned buffer
|
|
3362
|
+ * the returned buffer
|
3363
|
3363
|
* Return NULL on error or if the input * has no comments.
|
3364
|
3364
|
* See section 3.4.3 of RFC822
|
3365
|
3365
|
* TODO: handle comments that go on to more than one line
|
...
|
...
|
@@ -3653,9 +3655,9 @@ rfc1341(mbox_ctx *mctx, message *m)
|
3653
|
3653
|
|
3654
|
3654
|
free(total);
|
3655
|
3655
|
/*
|
3656
|
|
- * If it's the last one - reassemble it
|
3657
|
|
- * FIXME: this assumes that we receive the parts in order
|
3658
|
|
- */
|
|
3656
|
+ * If it's the last one - reassemble it
|
|
3657
|
+ * FIXME: this assumes that we receive the parts in order
|
|
3658
|
+ */
|
3659
|
3659
|
if ((n == t) && ((dd = opendir(pdir)) != NULL)) {
|
3660
|
3660
|
FILE *fout;
|
3661
|
3661
|
char outname[PATH_MAX + 1];
|
...
|
...
|
@@ -3746,9 +3748,9 @@ rfc1341(mbox_ctx *mctx, message *m)
|
3746
|
3746
|
nblanks = 0;
|
3747
|
3747
|
while (fgets(buffer, sizeof(buffer) - 1, fin) != NULL)
|
3748
|
3748
|
/*
|
3749
|
|
- * Ensure that trailing newlines
|
3750
|
|
- * aren't copied
|
3751
|
|
- */
|
|
3749
|
+ * Ensure that trailing newlines
|
|
3750
|
+ * aren't copied
|
|
3751
|
+ */
|
3752
|
3752
|
if (buffer[0] == '\n')
|
3753
|
3753
|
nblanks++;
|
3754
|
3754
|
else {
|
...
|
...
|
@@ -3822,7 +3824,7 @@ static void extract_text_urls(const unsigned char *mem, size_t len, tag_argument
|
3822
|
3822
|
for (url_len = 4; off + url_len < len && url_len < (sizeof(url) - 1); url_len++) {
|
3823
|
3823
|
unsigned char c = mem[off + url_len];
|
3824
|
3824
|
/* smart compilers will compile this if into
|
3825
|
|
- * a single bt + jb instruction */
|
|
3825
|
+ * a single bt + jb instruction */
|
3826
|
3826
|
if (c == ' ' || c == '\n' || c == '\t')
|
3827
|
3827
|
break;
|
3828
|
3828
|
}
|
...
|
...
|
@@ -3901,9 +3903,9 @@ checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html)
|
3901
|
3901
|
|
3902
|
3902
|
if (!hrefs.scanContents)
|
3903
|
3903
|
/*
|
3904
|
|
- * Don't waste time extracting hrefs (parsing html), nobody
|
3905
|
|
- * will need it
|
3906
|
|
- */
|
|
3904
|
+ * Don't waste time extracting hrefs (parsing html), nobody
|
|
3905
|
+ * will need it
|
|
3906
|
+ */
|
3907
|
3907
|
return;
|
3908
|
3908
|
|
3909
|
3909
|
hrefs.count = 0;
|
...
|
...
|
@@ -3915,10 +3917,10 @@ checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html)
|
3915
|
3915
|
if (hrefs.scanContents) {
|
3916
|
3916
|
if (phishingScan(mctx->ctx, &hrefs) == CL_VIRUS) {
|
3917
|
3917
|
/*
|
3918
|
|
- * FIXME: message objects' contents are
|
3919
|
|
- * encapsulated so we should not access
|
3920
|
|
- * the members directly
|
3921
|
|
- */
|
|
3918
|
+ * FIXME: message objects' contents are
|
|
3919
|
+ * encapsulated so we should not access
|
|
3920
|
+ * the members directly
|
|
3921
|
+ */
|
3922
|
3922
|
mainMessage->isInfected = TRUE;
|
3923
|
3923
|
*rc = VIRUS;
|
3924
|
3924
|
cli_dbgmsg("PH:Phishing found\n");
|
...
|
...
|
@@ -4003,9 +4005,9 @@ getline_from_mbox(char *buffer, size_t buffer_len, fmap_t *map, size_t *at)
|
4003
|
4003
|
size_t input_len = MIN(map->len - *at, buffer_len + 1);
|
4004
|
4004
|
src = cursrc = fmap_need_off_once(map, *at, input_len);
|
4005
|
4005
|
|
4006
|
|
- /* we check for eof from the result of GETC()
|
4007
|
|
- * if(feof(fin))
|
4008
|
|
- return NULL;*/
|
|
4006
|
+ /* we check for eof from the result of GETC()
|
|
4007
|
+ if(feof(fin))
|
|
4008
|
+ return NULL;*/
|
4009
|
4009
|
if (!src) {
|
4010
|
4010
|
cli_dbgmsg("getline_from_mbox: fmap need failed\n");
|
4011
|
4011
|
return NULL;
|
...
|
...
|
@@ -4070,9 +4072,9 @@ isBounceStart(mbox_ctx *mctx, const char *line)
|
4070
|
4070
|
if (*line == '\0')
|
4071
|
4071
|
return FALSE;
|
4072
|
4072
|
/*if((strncmp(line, "From ", 5) == 0) && !isalnum(line[5]))
|
4073
|
|
- return FALSE;
|
4074
|
|
- if((strncmp(line, ">From ", 6) == 0) && !isalnum(line[6]))
|
4075
|
|
- return FALSE;*/
|
|
4073
|
+ return FALSE;
|
|
4074
|
+ if((strncmp(line, ">From ", 6) == 0) && !isalnum(line[6]))
|
|
4075
|
+ return FALSE;*/
|
4076
|
4076
|
|
4077
|
4077
|
len = strlen(line);
|
4078
|
4078
|
if ((len < 6) || (len >= 72))
|
...
|
...
|
@@ -4102,7 +4104,7 @@ isBounceStart(mbox_ctx *mctx, const char *line)
|
4102
|
4102
|
|
4103
|
4103
|
/*
|
4104
|
4104
|
* Extract a binhexEncoded message, return if it's found to be infected as we
|
4105
|
|
- * extract it
|
|
4105
|
+ * extract it
|
4106
|
4106
|
*/
|
4107
|
4107
|
static bool
|
4108
|
4108
|
exportBinhexMessage(mbox_ctx *mctx, message *m)
|
...
|
...
|
@@ -4139,24 +4141,24 @@ exportBounceMessage(mbox_ctx *mctx, text *start)
|
4139
|
4139
|
fileblob *fb;
|
4140
|
4140
|
|
4141
|
4141
|
/*
|
4142
|
|
- * Attempt to save the original (unbounced)
|
4143
|
|
- * message - clamscan will find that in the
|
4144
|
|
- * directory and call us again (with any luck)
|
4145
|
|
- * having found an e-mail message to handle.
|
4146
|
|
- *
|
4147
|
|
- * This finds a lot of false positives, the
|
4148
|
|
- * search that a content type is in the
|
4149
|
|
- * bounce (i.e. it's after the bounce header)
|
4150
|
|
- * helps a bit.
|
4151
|
|
- *
|
4152
|
|
- * messageAddLine
|
4153
|
|
- * optimization could help here, but needs
|
4154
|
|
- * careful thought, do it with line numbers
|
4155
|
|
- * would be best, since the current method in
|
4156
|
|
- * messageAddLine of checking encoding first
|
4157
|
|
- * must remain otherwise non bounce messages
|
4158
|
|
- * won't be scanned
|
4159
|
|
- */
|
|
4142
|
+ * Attempt to save the original (unbounced)
|
|
4143
|
+ * message - clamscan will find that in the
|
|
4144
|
+ * directory and call us again (with any luck)
|
|
4145
|
+ * having found an e-mail message to handle.
|
|
4146
|
+ *
|
|
4147
|
+ * This finds a lot of false positives, the
|
|
4148
|
+ * search that a content type is in the
|
|
4149
|
+ * bounce (i.e. it's after the bounce header)
|
|
4150
|
+ * helps a bit.
|
|
4151
|
+ *
|
|
4152
|
+ * messageAddLine
|
|
4153
|
+ * optimization could help here, but needs
|
|
4154
|
+ * careful thought, do it with line numbers
|
|
4155
|
+ * would be best, since the current method in
|
|
4156
|
+ * messageAddLine of checking encoding first
|
|
4157
|
+ * must remain otherwise non bounce messages
|
|
4158
|
+ * won't be scanned
|
|
4159
|
+ */
|
4160
|
4160
|
for (t = start; t; t = t->t_next) {
|
4161
|
4161
|
const char *txt = lineGetData(t->t_line);
|
4162
|
4162
|
char cmd[RFC2821LENGTH + 1];
|
...
|
...
|
@@ -4327,8 +4329,8 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m
|
4327
|
4327
|
addToText = TRUE;
|
4328
|
4328
|
if (messageGetBody(aMessage) == NULL)
|
4329
|
4329
|
/*
|
4330
|
|
- * No plain text version
|
4331
|
|
- */
|
|
4330
|
+ * No plain text version
|
|
4331
|
+ */
|
4332
|
4332
|
cli_dbgmsg("No plain text alternative\n");
|
4333
|
4333
|
break;
|
4334
|
4334
|
case TEXT:
|
...
|
...
|
@@ -4348,10 +4350,10 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m
|
4348
|
4348
|
if ((tableFind(mctx->subtypeTable, cptr) == PLAIN) &&
|
4349
|
4349
|
(messageGetEncoding(aMessage) == NOENCODING)) {
|
4350
|
4350
|
/*
|
4351
|
|
- * Strictly speaking, a text/plain part
|
4352
|
|
- * is not an attachment. We pretend it
|
4353
|
|
- * is so that we can decode and scan it
|
4354
|
|
- */
|
|
4351
|
+ * Strictly speaking, a text/plain part
|
|
4352
|
+ * is not an attachment. We pretend it
|
|
4353
|
+ * is so that we can decode and scan it
|
|
4354
|
+ */
|
4355
|
4355
|
if (!messageHasFilename(aMessage)) {
|
4356
|
4356
|
cli_dbgmsg("Adding part to main message\n");
|
4357
|
4357
|
addToText = TRUE;
|
...
|
...
|
@@ -4379,15 +4381,15 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m
|
4379
|
4379
|
case BINARY:
|
4380
|
4380
|
if (encodingLine(aMessage) == NULL) {
|
4381
|
4381
|
/*
|
4382
|
|
- * This means that the message
|
4383
|
|
- * has no attachments
|
4384
|
|
- *
|
4385
|
|
- * The test for
|
4386
|
|
- * messageGetEncoding is needed
|
4387
|
|
- * since encodingLine won't have
|
4388
|
|
- * been set if the message
|
4389
|
|
- * itself has been encoded
|
4390
|
|
- */
|
|
4382
|
+ * This means that the message
|
|
4383
|
+ * has no attachments
|
|
4384
|
+ *
|
|
4385
|
+ * The test for
|
|
4386
|
+ * messageGetEncoding is needed
|
|
4387
|
+ * since encodingLine won't have
|
|
4388
|
+ * been set if the message
|
|
4389
|
+ * itself has been encoded
|
|
4390
|
+ */
|
4391
|
4391
|
cli_dbgmsg("Unencoded multipart/message will not be scanned\n");
|
4392
|
4392
|
assert(aMessage == messages[i]);
|
4393
|
4393
|
messageDestroy(messages[i]);
|
...
|
...
|
@@ -4400,14 +4402,14 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m
|
4400
|
4400
|
}
|
4401
|
4401
|
#endif
|
4402
|
4402
|
#if 0
|
4403
|
|
- messageAddStrAtTop(aMessage,
|
4404
|
|
- "Received: by clamd (message/rfc822)");
|
|
4403
|
+ messageAddStrAtTop(aMessage,
|
|
4404
|
+ "Received: by clamd (message/rfc822)");
|
4405
|
4405
|
#endif
|
4406
|
4406
|
#ifdef SAVE_TO_DISC
|
4407
|
4407
|
/*
|
4408
|
|
- * Save this embedded message
|
4409
|
|
- * to a temporary file
|
4410
|
|
- */
|
|
4408
|
+ * Save this embedded message
|
|
4409
|
+ * to a temporary file
|
|
4410
|
+ */
|
4411
|
4411
|
if (saveTextPart(mctx, aMessage, 1) == CL_VIRUS)
|
4412
|
4412
|
*rc = VIRUS;
|
4413
|
4413
|
assert(aMessage == messages[i]);
|
...
|
...
|
@@ -4415,19 +4417,19 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m
|
4415
|
4415
|
messages[i] = NULL;
|
4416
|
4416
|
#else
|
4417
|
4417
|
/*
|
4418
|
|
- * Scan in memory, faster but is open to DoS attacks
|
4419
|
|
- * when many nested levels are involved.
|
4420
|
|
- */
|
|
4418
|
+ * Scan in memory, faster but is open to DoS attacks
|
|
4419
|
+ * when many nested levels are involved.
|
|
4420
|
+ */
|
4421
|
4421
|
body = parseEmailHeaders(aMessage, mctx->rfc821Table);
|
4422
|
4422
|
|
4423
|
4423
|
/*
|
4424
|
|
- * We've finished with the
|
4425
|
|
- * original copy of the message,
|
4426
|
|
- * so throw that away and
|
4427
|
|
- * deal with the encapsulated
|
4428
|
|
- * message as a message.
|
4429
|
|
- * This can save a lot of memory
|
4430
|
|
- */
|
|
4424
|
+ * We've finished with the
|
|
4425
|
+ * original copy of the message,
|
|
4426
|
+ * so throw that away and
|
|
4427
|
+ * deal with the encapsulated
|
|
4428
|
+ * message as a message.
|
|
4429
|
+ * This can save a lot of memory
|
|
4430
|
+ */
|
4431
|
4431
|
assert(aMessage == messages[i]);
|
4432
|
4432
|
messageDestroy(messages[i]);
|
4433
|
4433
|
messages[i] = NULL;
|
...
|
...
|
@@ -4448,19 +4450,19 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m
|
4448
|
4448
|
return mainMessage;
|
4449
|
4449
|
case MULTIPART:
|
4450
|
4450
|
/*
|
4451
|
|
- * It's a multi part within a multi part
|
4452
|
|
- * Run the message parser on this bit, it won't
|
4453
|
|
- * be an attachment
|
4454
|
|
- */
|
|
4451
|
+ * It's a multi part within a multi part
|
|
4452
|
+ * Run the message parser on this bit, it won't
|
|
4453
|
+ * be an attachment
|
|
4454
|
+ */
|
4455
|
4455
|
cli_dbgmsg("Found multipart inside multipart\n");
|
4456
|
4456
|
#if HAVE_JSON
|
4457
|
4457
|
mctx->wrkobj = thisobj;
|
4458
|
4458
|
#endif
|
4459
|
4459
|
if (aMessage) {
|
4460
|
4460
|
/*
|
4461
|
|
- * The headers were parsed when reading in the
|
4462
|
|
- * whole multipart section
|
4463
|
|
- */
|
|
4461
|
+ * The headers were parsed when reading in the
|
|
4462
|
+ * whole multipart section
|
|
4463
|
+ */
|
4464
|
4464
|
*rc = parseEmailBody(aMessage, *tptr, mctx, recursion_level + 1);
|
4465
|
4465
|
cli_dbgmsg("Finished recursion, rc = %d\n", (int)*rc);
|
4466
|
4466
|
assert(aMessage == messages[i]);
|
...
|
...
|
@@ -4575,35 +4577,35 @@ next_is_folded_header(const text *t)
|
4575
|
4575
|
data = lineGetData(next->t_line);
|
4576
|
4576
|
|
4577
|
4577
|
/*
|
4578
|
|
- * Section B.2 of RFC822 says TAB or SPACE means a continuation of the
|
4579
|
|
- * previous entry.
|
4580
|
|
- */
|
|
4578
|
+ * Section B.2 of RFC822 says TAB or SPACE means a continuation of the
|
|
4579
|
+ * previous entry.
|
|
4580
|
+ */
|
4581
|
4581
|
if (isblank(data[0]))
|
4582
|
4582
|
return TRUE;
|
4583
|
4583
|
|
4584
|
4584
|
if (strchr(data, '=') == NULL)
|
4585
|
4585
|
/*
|
4586
|
|
- * Avoid false positives with
|
4587
|
|
- * Content-Type: text/html;
|
4588
|
|
- * Content-Transfer-Encoding: quoted-printable
|
4589
|
|
- */
|
|
4586
|
+ * Avoid false positives with
|
|
4587
|
+ * Content-Type: text/html;
|
|
4588
|
+ * Content-Transfer-Encoding: quoted-printable
|
|
4589
|
+ */
|
4590
|
4590
|
return FALSE;
|
4591
|
4591
|
|
4592
|
4592
|
/*
|
4593
|
|
- * Some are broken and don't fold headers lines
|
4594
|
|
- * correctly as per section 2.2.3 of RFC2822.
|
4595
|
|
- * Generally they miss the white space at
|
4596
|
|
- * the start of the fold line:
|
4597
|
|
- * Content-Type: multipart/related;
|
4598
|
|
- * type="multipart/alternative";
|
4599
|
|
- * boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
|
4600
|
|
- * should read:
|
4601
|
|
- * Content-Type: multipart/related;
|
4602
|
|
- * type="multipart/alternative";
|
4603
|
|
- * boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
|
4604
|
|
- * Since we're a virus checker not an RFC
|
4605
|
|
- * verifier we need to handle these
|
4606
|
|
- */
|
|
4593
|
+ * Some are broken and don't fold headers lines
|
|
4594
|
+ * correctly as per section 2.2.3 of RFC2822.
|
|
4595
|
+ * Generally they miss the white space at
|
|
4596
|
+ * the start of the fold line:
|
|
4597
|
+ * Content-Type: multipart/related;
|
|
4598
|
+ * type="multipart/alternative";
|
|
4599
|
+ * boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
|
|
4600
|
+ * should read:
|
|
4601
|
+ * Content-Type: multipart/related;
|
|
4602
|
+ * type="multipart/alternative";
|
|
4603
|
+ * boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
|
|
4604
|
+ * Since we're a virus checker not an RFC
|
|
4605
|
+ * verifier we need to handle these
|
|
4606
|
+ */
|
4607
|
4607
|
data = lineGetData(t->t_line);
|
4608
|
4608
|
|
4609
|
4609
|
ptr = strchr(data, '\0');
|