Browse code

Better handling of false positive emails

git-svn: trunk@742

Nigel Horne authored on 2004/08/11 23:48:13
Showing 2 changed files
... ...
@@ -1,3 +1,10 @@
1
+Wed Aug 11 15:46:56 BST 2004 (njh)
2
+----------------------------------
3
+  * libclamav/mbox.c:	Better handling of false positive emails, that is
4
+				parts of data embedded in emails which look
5
+				like other emails to be scanned, but aren't
6
+				Thanks to Trog for the idea
7
+
1 8
 Wed Aug 11 11:34:57 BST 2004 (njh)
2 9
 ----------------------------------
3 10
   * clamav-milter:	Installed a new isLocalAddr checker written by
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: mbox.c,v $
20
+ * Revision 1.98  2004/08/11 14:46:22  nigelhorne
21
+ * Better handling of false positive emails
22
+ *
20 23
  * Revision 1.97  2004/08/10 14:02:22  nigelhorne
21 24
  * *** empty log message ***
22 25
  *
... ...
@@ -279,7 +282,7 @@
279 279
  * Compilable under SCO; removed duplicate code with message.c
280 280
  *
281 281
  */
282
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.97 2004/08/10 14:02:22 nigelhorne Exp $";
282
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.98 2004/08/11 14:46:22 nigelhorne Exp $";
283 283
 
284 284
 #if HAVE_CONFIG_H
285 285
 #include "clamav-config.h"
... ...
@@ -358,7 +361,7 @@ static	void	print_trace(int use_syslog);
358 358
 /*#define	CHECKURLS	/* If an email contains URLs, check them */
359 359
 /*#define	LIBCURL	/* Needs support from "configure" */
360 360
 
361
-typedef enum    { FALSE = 0, TRUE = 1 } bool;
361
+typedef enum	{ FALSE = 0, TRUE = 1 } bool;
362 362
 
363 363
 static	message	*parseEmailHeaders(message *m, const table_t *rfc821Table, bool destroy);
364 364
 static	int	parseEmailHeader(message *m, const char *line, const table_t *rfc821Table);
... ...
@@ -373,12 +376,10 @@ static	int	parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Tab
373 373
 static	void	saveTextPart(message *m, const char *dir);
374 374
 static	bool	saveFile(const blob *b, const char *dir);
375 375
 
376
-#ifdef	CHECKURLS
377 376
 static	void	checkURLs(message *m, const char *dir);
378 377
 #ifdef	LIBCURL
379 378
 static	void	getURL(const char *url, const char *dir, const char *filename);
380 379
 #endif
381
-#endif
382 380
 
383 381
 
384 382
 /* Maximum number of attachments that we accept */
... ...
@@ -549,6 +550,10 @@ cli_mbox(const char *dir, int desc, unsigned int options)
549 549
 				 * End of a message in the mail box
550 550
 				 */
551 551
 				body = parseEmailHeaders(m, rfc821Table, TRUE);
552
+				if(body == NULL) {
553
+					messageReset(m);
554
+					continue;
555
+				}
552 556
 				messageDestroy(m);
553 557
 				if(messageGetBody(body))
554 558
 					if(!parseEmailBody(body,  NULL, 0, NULL, dir, rfc821Table, subtypeTable, options)) {
... ...
@@ -571,9 +576,17 @@ cli_mbox(const char *dir, int desc, unsigned int options)
571 571
 		} while(fgets(buffer, sizeof(buffer), fd) != NULL);
572 572
 
573 573
 		cli_dbgmsg("Deal with email number %d\n", messagenumber);
574
-	} else
574
+	} else {
575 575
 		/*
576 576
 		 * It's a single message, parse the headers then the body
577
+		 * Ignore blank lines at the start of the message
578
+		 */
579
+		while(strchr("\r\n", buffer[0]) &&
580
+		     (fgets(buffer, sizeof(buffer), fd) != NULL))
581
+		     	;
582
+		/*
583
+		 * FIXME: files full of new lines and nothing else are
584
+		 * handled ungracefully...
577 585
 		 */
578 586
 		do
579 587
 			/*
... ...
@@ -589,6 +602,7 @@ cli_mbox(const char *dir, int desc, unsigned int options)
589 589
 			if(messageAddLine(m, buffer, 1) < 0)
590 590
 				break;
591 591
 		while(fgets(buffer, sizeof(buffer), fd) != NULL);
592
+	}
592 593
 
593 594
 	fclose(fd);
594 595
 
... ...
@@ -596,17 +610,19 @@ cli_mbox(const char *dir, int desc, unsigned int options)
596 596
 
597 597
 	body = parseEmailHeaders(m, rfc821Table, TRUE);
598 598
 	messageDestroy(m);
599
-	/*
600
-	 * Write out the last entry in the mailbox
601
-	 */
602
-	if(messageGetBody(body))
603
-		if(!parseEmailBody(body, NULL, 0, NULL, dir, rfc821Table, subtypeTable, options))
604
-			retcode = -1;
599
+	if(body) {
600
+		/*
601
+		 * Write out the last entry in the mailbox
602
+		 */
603
+		if(messageGetBody(body))
604
+			if(!parseEmailBody(body, NULL, 0, NULL, dir, rfc821Table, subtypeTable, options))
605
+				retcode = -1;
605 606
 
606
-	/*
607
-	 * Tidy up and quit
608
-	 */
609
-	messageDestroy(body);
607
+		/*
608
+		 * Tidy up and quit
609
+		 */
610
+		messageDestroy(body);
611
+	}
610 612
 
611 613
 	cli_dbgmsg("cli_mbox returning %d\n", retcode);
612 614
 
... ...
@@ -637,6 +653,7 @@ parseEmailHeaders(message *m, const table_t *rfc821Table, bool destroy)
637 637
 	bool inHeader = TRUE;
638 638
 	text *t;
639 639
 	message *ret;
640
+	bool anyHeadersFound = FALSE;
640 641
 
641 642
 	cli_dbgmsg("parseEmailHeaders\n");
642 643
 
... ...
@@ -692,7 +709,9 @@ parseEmailHeaders(message *m, const table_t *rfc821Table, bool destroy)
692 692
 				cli_dbgmsg("End of header information\n");
693 693
 				inHeader = FALSE;
694 694
 			} else {
695
-				(void)parseEmailHeader(ret, buffer, rfc821Table);
695
+				if((parseEmailHeader(ret, buffer, rfc821Table) >= 0) ||
696
+				   (strncasecmp(buffer, "From ", 5) == 0))
697
+					anyHeadersFound = TRUE;
696 698
 				free(buffer);
697 699
 			}
698 700
 		} else {
... ...
@@ -702,6 +721,15 @@ parseEmailHeaders(message *m, const table_t *rfc821Table, bool destroy)
702 702
 		}
703 703
 	}
704 704
 
705
+	if(!anyHeadersFound) {
706
+		/*
707
+		 * False positive in believing we have an e-mail when we don't
708
+		 */
709
+		messageDestroy(ret);
710
+		cli_dbgmsg("parseEmailHeaders: no headers found, assuming it isn't an email\n");
711
+		return NULL;
712
+	}
713
+
705 714
 	messageClean(ret);
706 715
 
707 716
 	cli_dbgmsg("parseEmailHeaders: return\n");
... ...
@@ -2033,6 +2061,9 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
2033 2033
 	char *copy = strdup(arg);
2034 2034
 	char *ptr = copy;
2035 2035
 
2036
+	if(copy == NULL)
2037
+		return -1;
2038
+
2036 2039
 	cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg);
2037 2040
 	strstrip(copy);
2038 2041
 
... ...
@@ -2098,7 +2129,7 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
2098 2098
 	}
2099 2099
 	free(ptr);
2100 2100
 
2101
-	return type;
2101
+	return 0;
2102 2102
 }
2103 2103
 
2104 2104
 /*
... ...
@@ -2228,6 +2259,7 @@ checkURLs(message *m, const char *dir)
2228 2228
 	blob *b = messageToBlob(m);
2229 2229
 	char *ptr;
2230 2230
 	size_t len;
2231
+	table_t *t = tableCreate();
2231 2232
 
2232 2233
 	if(b == NULL)
2233 2234
 		return;
... ...
@@ -2259,7 +2291,7 @@ checkURLs(message *m, const char *dir)
2259 2259
 			if(len == 0)
2260 2260
 				break;
2261 2261
 			ptr = p2;
2262
-			while((len > 0) && (isalnum(*ptr) || strchr("?/:.", *ptr))) {
2262
+			while((len > 0) && (isalnum(*ptr) || strchr("./?:%", *ptr))) {
2263 2263
 				ptr++;
2264 2264
 				len--;
2265 2265
 			}
... ...
@@ -2268,6 +2300,13 @@ checkURLs(message *m, const char *dir)
2268 2268
 			*ptr = '\0';
2269 2269
 			if(strncasecmp(p2, "mailto:", 7) == 0)
2270 2270
 				continue;
2271
+			if(*p2 == '\0')
2272
+				continue;
2273
+			if(tableFind(t, p2) == 1) {
2274
+				cli_dbgmsg("URL %s already downloaded\n", p2);
2275
+				continue;
2276
+			}
2277
+			(void)tableInsert(t, p2, 1);
2271 2278
 			cli_dbgmsg("Downloading URL %s to be scanned\n", p2);
2272 2279
 			strncpy(name, p2, sizeof(name));
2273 2280
 			for(p3 = name; *p3; p3++)
... ...
@@ -2280,7 +2319,7 @@ checkURLs(message *m, const char *dir)
2280 2280
 			/*
2281 2281
 			 * TODO: maximum size and timeouts
2282 2282
 			 */
2283
-			snprintf(cmd, sizeof(cmd), "GET %s > %s/%s", p2, dir, name);
2283
+			snprintf(cmd, sizeof(cmd), "GET -t10 %s > %s/%s 2>/dev/null", p2, dir, name);
2284 2284
 			cli_dbgmsg("%s\n", cmd);
2285 2285
 #ifdef	CL_THREAD_SAFE
2286 2286
 			pthread_mutex_lock(&system_mutex);
... ...
@@ -2304,6 +2343,7 @@ checkURLs(message *m, const char *dir)
2304 2304
 		len--;
2305 2305
 	}
2306 2306
 	blobDestroy(b);
2307
+	tableDestroy(t);
2307 2308
 }
2308 2309
 
2309 2310
 #ifdef	LIBCURL
... ...
@@ -2357,7 +2397,7 @@ checkURLs(message *m, const char *dir)
2357 2357
 #endif
2358 2358
 
2359 2359
 #ifdef HAVE_BACKTRACE
2360
-	static void
2360
+static void
2361 2361
 sigsegv(int sig)
2362 2362
 {
2363 2363
 	signal(SIGSEGV, SIG_DFL);
... ...
@@ -2365,7 +2405,7 @@ sigsegv(int sig)
2365 2365
 	exit(SIGSEGV);
2366 2366
 }
2367 2367
 
2368
-	static void
2368
+static void
2369 2369
 print_trace(int use_syslog)
2370 2370
 {
2371 2371
 	void *array[10];