Browse code

Fuzzy logic lookup of content-type

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@1230 77e5149b-7576-45b1-b177-96237e5ba77b

Nigel Horne authored on 2005/01/06 06:57:44
Showing 2 changed files
... ...
@@ -1,3 +1,10 @@
1
+Wed Jan  5 21:55:49 GMT 2005 (njh)
2
+----------------------------------
3
+  * libclamav/message.c:	Guess incorrect content-type, e.g.
4
+			Content-Type: ultipart/mixed
5
+				instead of
6
+			Content-Type: multipart/mixed
7
+
1 8
 Wed Jan  5 21:09:14 GMT 2005 (njh)
2 9
 ----------------------------------
3 10
   * libclamav/message.c:	Fix crash caused when looking for non-existant
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: message.c,v $
20
+ * Revision 1.137  2005/01/05 21:54:05  nigelhorne
21
+ * Fuzzy logic lookup of content-type
22
+ *
20 23
  * Revision 1.136  2005/01/05 21:07:15  nigelhorne
21 24
  * Fix crash when looking for uuencoded attachment fails
22 25
  *
... ...
@@ -405,7 +408,7 @@
405 405
  * uuencodebegin() no longer static
406 406
  *
407 407
  */
408
-static	char	const	rcsid[] = "$Id: message.c,v 1.136 2005/01/05 21:07:15 nigelhorne Exp $";
408
+static	char	const	rcsid[] = "$Id: message.c,v 1.137 2005/01/05 21:54:05 nigelhorne Exp $";
409 409
 
410 410
 #if HAVE_CONFIG_H
411 411
 #include "clamav-config.h"
... ...
@@ -597,8 +600,9 @@ messageSetMimeType(message *mess, const char *type)
597 597
 #ifdef	CL_THREAD_SAFE
598 598
 	static pthread_mutex_t mime_mutex = PTHREAD_MUTEX_INITIALIZER;
599 599
 #endif
600
-	static table_t *mime_table;
600
+	const struct mime_map *m;
601 601
 	int typeval;
602
+	static table_t *mime_table;
602 603
 
603 604
 	assert(mess != NULL);
604 605
 	if(type == NULL) {
... ...
@@ -617,8 +621,6 @@ messageSetMimeType(message *mess, const char *type)
617 617
 	pthread_mutex_lock(&mime_mutex);
618 618
 #endif
619 619
 	if(mime_table == NULL) {
620
-		const struct mime_map *m;
621
-
622 620
 		mime_table = tableCreate();
623 621
 		if(mime_table == NULL) {
624 622
 #ifdef	CL_THREAD_SAFE
... ...
@@ -646,7 +648,8 @@ messageSetMimeType(message *mess, const char *type)
646 646
 	if(typeval != -1) {
647 647
 		mess->mimeType = (mime_type)typeval;
648 648
 		return 1;
649
-	} else if(mess->mimeType == NOMIME) {
649
+	}
650
+	if(mess->mimeType == NOMIME) {
650 651
 		if(strncasecmp(type, "x-", 2) == 0)
651 652
 			mess->mimeType = MEXTENSION;
652 653
 		else {
... ...
@@ -666,8 +669,26 @@ messageSetMimeType(message *mess, const char *type)
666 666
 				 *	Content-Type: text/plain
667 667
 				 * as an attachment
668 668
 				 */
669
-				cli_warnmsg("Unknown MIME type: `%s', set to Application - report to bugs@clamav.net\n", type);
670
-				mess->mimeType = APPLICATION;
669
+				int highestSimil = 0, t = -1;
670
+				const char *closest = NULL;
671
+
672
+				for(m = mime_map; m->string; m++) {
673
+					const int s = simil(m->string, type);
674
+
675
+					if(s > highestSimil) {
676
+						highestSimil = s;
677
+						closest = m->string;
678
+						t = m->type;
679
+					}
680
+				}
681
+				if(highestSimil >= 50) {
682
+					cli_dbgmsg("Unknown MIME type \"%s\" - guessing as %s (%u%% certainty)\n",
683
+						type, closest, highestSimil);
684
+					mess->mimeType = t;
685
+				} else {
686
+					cli_warnmsg("Unknown MIME type: `%s', set to Application - report to bugs@clamav.net\n", type);
687
+					mess->mimeType = APPLICATION;
688
+				}
671 689
 			}
672 690
 		}
673 691
 		return 1;
... ...
@@ -1121,7 +1142,7 @@ messageSetEncoding(message *m, const char *enctype)
1121 1121
 			 * 50% is arbitary. For example 7bi will match as
1122 1122
 			 * 66% certain to be 7bit
1123 1123
 			 */
1124
-			if(closest && (highestSimil >= 50)) {
1124
+			if(highestSimil >= 50) {
1125 1125
 				cli_dbgmsg("Unknown encoding type \"%s\" - guessing as %s (%u%% certainty)\n",
1126 1126
 					type, closest, highestSimil);
1127 1127
 				messageSetEncoding(m, closest);