Browse code

don't scan comment.html and script.html. add failsafe dconf option. (bb #851) set scanscript limit to 512k TODO don't generate above 2 files better condition for bb #849

git-svn: trunk@3659

Török Edvin authored on 2008/02/20 03:43:42
Showing 5 changed files
... ...
@@ -1,3 +1,12 @@
1
+Tue Feb 19 19:42:23 EET 2008 (edwin)
2
+------------------------------------
3
+  * libclamav/scanners.c, dconf.c:
4
+	don't scan comment.html and script.html.
5
+	add failsafe dconf option. (bb #851) 
6
+	set scanscript limit to 512k
7
+  * libclamav/htmlnorm.c: TODO don't generate above 2 files
8
+  * libclamav/regex/regcomp.c: better condition for bb #849
9
+
1 10
 Tue Feb 19 15:35:47 CET 2008 (tk)
2 11
 ---------------------------------
3 12
   * libclamunrar/unrarvm.c: fix declaration of rar_dbgmsg() (bb#848)
... ...
@@ -87,6 +87,7 @@ static struct dconf_module modules[] = {
87 87
     { "DOCUMENT",   "RTF",	    DOC_CONF_RTF,	    1 },
88 88
     { "DOCUMENT",   "PDF",	    DOC_CONF_PDF,	    1 },
89 89
     { "DOCUMENT",   "SCRIPT",	    DOC_CONF_SCRIPT,	    1 },
90
+    { "DOCUMENT",   "HTMLSKIPRAW",  DOC_CONF_HTML_SKIPRAW,  1 },
90 91
 
91 92
     { "MAIL",	    "MBOX",	    MAIL_CONF_MBOX,	    1 },
92 93
     { "MAIL",	    "TNEF",	    MAIL_CONF_TNEF,	    1 },
... ...
@@ -71,10 +71,11 @@ struct cli_dconf {
71 71
 #define ARCH_CONF_AUTOIT    0x2000
72 72
 
73 73
 /* Document flags */
74
-#define DOC_CONF_HTML	    0x1
75
-#define DOC_CONF_RTF	    0x2
76
-#define DOC_CONF_PDF	    0x4
77
-#define DOC_CONF_SCRIPT	    0x8
74
+#define DOC_CONF_HTML		0x1
75
+#define DOC_CONF_RTF		0x2
76
+#define DOC_CONF_PDF		0x4
77
+#define DOC_CONF_SCRIPT 	0x8
78
+#define DOC_CONF_HTML_SKIPRAW	0x16
78 79
 
79 80
 /* Mail flags */
80 81
 #define MAIL_CONF_MBOX	    0x1
... ...
@@ -1060,7 +1060,7 @@ allocset(struct parse *p)
1060 1060
 		(void) memset((char *)p->g->setbits + (nbytes - css), 0, css);
1061 1061
 	}
1062 1062
 
1063
-	if(!p->g->sets)
1063
+	if(!p->g->sets || !p->g->setbits)
1064 1064
 		goto nomem;
1065 1065
 
1066 1066
 	cs = &p->g->sets[no];
... ...
@@ -885,8 +885,6 @@ static int cli_scanhtml(int desc, cli_ctx *ctx)
885 885
 	char *tempname, fullname[1024];
886 886
 	int ret=CL_CLEAN, fd;
887 887
 	struct stat sb;
888
-	struct stat first_stat;
889
-
890 888
 
891 889
     cli_dbgmsg("in cli_scanhtml()\n");
892 890
 
... ...
@@ -898,6 +896,7 @@ static int cli_scanhtml(int desc, cli_ctx *ctx)
898 898
     /* Because HTML detection is FP-prone and html_normalise_fd() needs to
899 899
      * mmap the file don't normalise files larger than 10 MB.
900 900
      */
901
+
901 902
     if(sb.st_size > 10485760) {
902 903
 	cli_dbgmsg("cli_scanhtml: exiting (file larger than 10 MB)\n");
903 904
 	return CL_CLEAN;
... ...
@@ -914,47 +913,13 @@ static int cli_scanhtml(int desc, cli_ctx *ctx)
914 914
     snprintf(fullname, 1024, "%s/nocomment.html", tempname);
915 915
     fd = open(fullname, O_RDONLY|O_BINARY);
916 916
     if (fd >= 0) {
917
-	if(fstat(fd, &first_stat) == -1) {
918
-		cli_errmsg("cli_scanhtml: fstat() failed for %s: %d\n", fullname, fd);
919
-		close(fd);
920
-		ret = CL_EIO;
921
-	} else {
922
-		ret = cli_scandesc(fd, ctx, 0, CL_TYPE_HTML, 0, NULL);
923
-		close(fd);
924
-	}
925
-    }
926
-
927
-    if (ret == CL_CLEAN) {
928
-	snprintf(fullname, 1024, "%s/comment.html", tempname);
929
-	fd = open(fullname, O_RDONLY|O_BINARY);
930
-	if (fd >= 0) {
931
-	    if(fstat(fd, &sb) == -1) {
932
-		cli_errmsg("cli_scanhtml: fstat() failed for %s: %d\n", fullname, fd);
933
-		close(fd);
934
-		ret = CL_EIO;
935
-	    } else {
936
-		    if(sb.st_size != first_stat.st_size) {
937
-			    /* scan only if HTML contained comments, otherwise we already scanned it
938
-			     * above */
939
-			    ret = cli_scandesc(fd, ctx, 0, CL_TYPE_HTML, 0, NULL);
940
-		    } else {
941
-			    cli_dbgmsg("Skipping comment.html because it is identical to nocomment.html\n");
942
-		    }
943
-		    close(fd);
944
-	    }
945
-	}
946
-    }
947
-
948
-    if (ret == CL_CLEAN) {
949
-	snprintf(fullname, 1024, "%s/script.html", tempname);
950
-	fd = open(fullname, O_RDONLY|O_BINARY);
951
-	if (fd >= 0) {
952 917
 	    ret = cli_scandesc(fd, ctx, 0, CL_TYPE_HTML, 0, NULL);
953 918
 	    close(fd);
954
-	}
955 919
     }
956 920
 
957
-    if(ret == CL_CLEAN) {
921
+    if(ret == CL_CLEAN && sb.st_size < 2097152) {
922
+	    /* limit to 2 MB, we're not interesting in scanning large files in notags form */
923
+	    /* TODO: don't even create notags if file is over 2 MB */
958 924
 	    snprintf(fullname, 1024, "%s/notags.html", tempname);
959 925
 	    fd = open(fullname, O_RDONLY|O_BINARY);
960 926
 	    if(fd >= 0) {
... ...
@@ -993,7 +958,7 @@ static int cli_scanscript(int desc, cli_ctx *ctx)
993 993
 	}
994 994
 
995 995
 	/* don't normalize files that are too large */
996
-	if(sb.st_size > 409600) {
996
+	if(sb.st_size > 524288) {
997 997
 		cli_dbgmsg("cli_scanscript: exiting (file larger than 400 kB)\n");
998 998
 		return CL_CLEAN;
999 999
 	}
... ...
@@ -1923,8 +1888,8 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
1923 1923
 	}
1924 1924
     }
1925 1925
 
1926
-    /* CL_TYPE_HTML: raw HTML file already scanned in cli_scanhtml() */
1927
-    if(type != CL_TYPE_IGNORED && type != CL_TYPE_HTML && ret != CL_VIRUS && !ctx->engine->sdb) {
1926
+    /* CL_TYPE_HTML: raw HTML files are not scanned, unless safety measure activated via DCONF */
1927
+    if(type != CL_TYPE_IGNORED && (type != CL_TYPE_HTML || !(DCONF_DOC & DOC_CONF_HTML_SKIPRAW)) && ret != CL_VIRUS && !ctx->engine->sdb) {
1928 1928
 	if(cli_scanraw(desc, ctx, type, typercg) == CL_VIRUS)
1929 1929
 	    return CL_VIRUS;
1930 1930
     }