git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@834 77e5149b-7576-45b1-b177-96237e5ba77b
Nigel Horne authored on 2004/09/06 20:05:44... | ... |
@@ -1,3 +1,7 @@ |
1 |
+Mon Sep 6 12:04:08 BST 2004 (njh) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav/mbox.c: Normalise the HTML before looking for URLs to scan |
|
4 |
+ |
|
1 | 5 |
Sun Sep 5 18:16:13 CEST 2004 (tk) |
2 | 6 |
---------------------------------- |
3 | 7 |
* libclamav/pe.c: fix pointer arithmetic in FSG block (bug reported by Nigel) |
... | ... |
@@ -17,6 +17,9 @@ |
17 | 17 |
* |
18 | 18 |
* Change History: |
19 | 19 |
* $Log: mbox.c,v $ |
20 |
+ * Revision 1.115 2004/09/06 11:02:08 nigelhorne |
|
21 |
+ * Normalise HTML before scanning for URLs to download |
|
22 |
+ * |
|
20 | 23 |
* Revision 1.114 2004/09/03 15:59:00 nigelhorne |
21 | 24 |
* Handle boundary= "foo" |
22 | 25 |
* |
... | ... |
@@ -330,7 +333,7 @@ |
330 | 330 |
* Compilable under SCO; removed duplicate code with message.c |
331 | 331 |
* |
332 | 332 |
*/ |
333 |
-static char const rcsid[] = "$Id: mbox.c,v 1.114 2004/09/03 15:59:00 nigelhorne Exp $"; |
|
333 |
+static char const rcsid[] = "$Id: mbox.c,v 1.115 2004/09/06 11:02:08 nigelhorne Exp $"; |
|
334 | 334 |
|
335 | 335 |
#if HAVE_CONFIG_H |
336 | 336 |
#include "clamav-config.h" |
... | ... |
@@ -542,16 +545,14 @@ static pthread_mutex_t tables_mutex = PTHREAD_MUTEX_INITIALIZER; |
542 | 542 |
|
543 | 543 |
/* |
544 | 544 |
* TODO: when signal handling is added, need to remove temp files when a |
545 |
- * signal is received |
|
545 |
+ * signal is received |
|
546 | 546 |
* TODO: add option to scan in memory not via temp files, perhaps with a |
547 | 547 |
* named pipe or memory mapped file, though this won't work on big e-mails |
548 | 548 |
* containing many levels of encapsulated messages - it'd just take too much |
549 | 549 |
* RAM |
550 |
- * TODO: if debug is enabled, catch a segfault and dump the current e-mail |
|
551 |
- * in it's entirety, then call abort() |
|
552 | 550 |
* TODO: parse .msg format files |
553 | 551 |
* TODO: fully handle AppleDouble format, see |
554 |
- * http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf |
|
552 |
+ * http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf |
|
555 | 553 |
* TODO: ensure parseEmailHeaders is always called before parseEmailBody |
556 | 554 |
* TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody |
557 | 555 |
* TODO: Look into TNEF. Is there anything that needs to be done here? |
... | ... |
@@ -574,17 +575,17 @@ cli_mbox(const char *dir, int desc, unsigned int options) |
574 | 574 |
if((fd = fdopen(i, "rb")) == NULL) { |
575 | 575 |
cli_errmsg("Can't open descriptor %d\n", desc); |
576 | 576 |
close(i); |
577 |
- return -1; |
|
577 |
+ return CL_EOPEN; |
|
578 | 578 |
} |
579 | 579 |
if(fgets(buffer, sizeof(buffer), fd) == NULL) { |
580 | 580 |
/* empty message */ |
581 | 581 |
fclose(fd); |
582 |
- return 0; |
|
582 |
+ return CL_CLEAN; |
|
583 | 583 |
} |
584 | 584 |
m = messageCreate(); |
585 | 585 |
if(m == NULL) { |
586 | 586 |
fclose(fd); |
587 |
- return -1; |
|
587 |
+ return CL_EMEM; |
|
588 | 588 |
} |
589 | 589 |
|
590 | 590 |
#ifdef CL_THREAD_SAFE |
... | ... |
@@ -601,7 +602,7 @@ cli_mbox(const char *dir, int desc, unsigned int options) |
601 | 601 |
#endif |
602 | 602 |
messageDestroy(m); |
603 | 603 |
fclose(fd); |
604 |
- return -1; |
|
604 |
+ return CL_EMEM; |
|
605 | 605 |
} |
606 | 606 |
} |
607 | 607 |
#ifdef CL_THREAD_SAFE |
... | ... |
@@ -700,7 +701,12 @@ cli_mbox(const char *dir, int desc, unsigned int options) |
700 | 700 |
|
701 | 701 |
fclose(fd); |
702 | 702 |
|
703 |
- retcode = 0; |
|
703 |
+ /* |
|
704 |
+ * This is not necessarily true, but since the only options are |
|
705 |
+ * CL_CLEAN and CL_VIRUS this is the better choice. It would be |
|
706 |
+ * nice to have CL_CONTINUESCANNING or something like that |
|
707 |
+ */ |
|
708 |
+ retcode = CL_CLEAN; |
|
704 | 709 |
|
705 | 710 |
body = parseEmailHeaders(m, rfc821); |
706 | 711 |
messageDestroy(m); |
... | ... |
@@ -710,6 +716,9 @@ cli_mbox(const char *dir, int desc, unsigned int options) |
710 | 710 |
*/ |
711 | 711 |
if(messageGetBody(body)) |
712 | 712 |
if(!parseEmailBody(body, NULL, dir, rfc821, subtype, options)) |
713 |
+ /* |
|
714 |
+ * There is no mailformed e-mail return code |
|
715 |
+ */ |
|
713 | 716 |
retcode = -1; |
714 | 717 |
|
715 | 718 |
/* |
... | ... |
@@ -2098,7 +2107,7 @@ static void |
2098 | 2098 |
checkURLs(message *m, const char *dir) |
2099 | 2099 |
{ |
2100 | 2100 |
blob *b = messageToBlob(m); |
2101 |
- char *ptr; |
|
2101 |
+ char *ptr, *normalised; |
|
2102 | 2102 |
size_t len; |
2103 | 2103 |
table_t *t; |
2104 | 2104 |
int n; |
... | ... |
@@ -2124,7 +2133,13 @@ checkURLs(message *m, const char *dir) |
2124 | 2124 |
t = tableCreate(); |
2125 | 2125 |
|
2126 | 2126 |
n = 0; |
2127 |
- ptr = (char *)blobGetData(b); |
|
2127 |
+ normalised = ptr = html_normalize(blobGetData(b), len); |
|
2128 |
+ |
|
2129 |
+ if(normalised == NULL) { |
|
2130 |
+ blobDestroy(b); |
|
2131 |
+ tableDestroy(t); |
|
2132 |
+ return; |
|
2133 |
+ } |
|
2128 | 2134 |
|
2129 | 2135 |
/* |
2130 | 2136 |
* cli_memstr(ptr, len, "<a href=", 8) |
... | ... |
@@ -2132,7 +2147,6 @@ checkURLs(message *m, const char *dir) |
2132 | 2132 |
* and it returns the place that the 'needle' was found |
2133 | 2133 |
*/ |
2134 | 2134 |
while(len >= 8) { |
2135 |
- /* FIXME: allow any number of white space */ |
|
2136 | 2135 |
if(strncasecmp(ptr, "<a href=", 8) == 0) { |
2137 | 2136 |
#ifdef WITH_CURL |
2138 | 2137 |
#ifndef CL_THREAD_SAFE |
... | ... |
@@ -2227,6 +2241,7 @@ checkURLs(message *m, const char *dir) |
2227 | 2227 |
} |
2228 | 2228 |
blobDestroy(b); |
2229 | 2229 |
tableDestroy(t); |
2230 |
+ free(normalised); |
|
2230 | 2231 |
|
2231 | 2232 |
#if defined(WITH_CURL) && defined(CL_THREAD_SAFE) |
2232 | 2233 |
cli_dbgmsg("checkURLs: waiting for %d thread(s) to finish\n", n); |
... | ... |
@@ -2390,6 +2405,8 @@ print_trace(int use_syslog) |
2390 | 2390 |
else |
2391 | 2391 |
cli_dbgmsg("%s\n", strings[i]); |
2392 | 2392 |
|
2393 |
+ /* TODO: dump the current email */ |
|
2394 |
+ |
|
2393 | 2395 |
free(strings); |
2394 | 2396 |
} |
2395 | 2397 |
#endif |