Browse code

add htmlnorm unit tests and fix jsnorm space normalization

git-svn: trunk@4201

Török Edvin authored on 2008/09/24 05:52:22
Showing 25 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1 @@
0
+<a href="data:application/octet-stream;base64,TVpQAAIAAAAEAA8A//8AALgAAAAhAAAAQAAaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAALtxEEAAM8BQUIvzU1NQsClAMARmrHn5ujEAeA2tUP9mcA4fvjEA6eX/tAnNIbRMzSFiDAoBAnB2FwIeTgwEL9rMEAAAAAAAAAAAAAAAAAAAwBAAAIAQAAAAAAAAAAAAAAAAAADaEAAA9BAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAS0VSTkVMMzIuRExMAABFeGl0UHJvY2VzcwBVU0VSMzIuRExMAENMQU1lc3NhZ2VCb3hBAOYQAAAAAAAAPz8/P1BFAABMAQEAYUNhQgAAAAAAAAAA4ACOgQsBAhkABAAAAAYAAAAAAABAEAAAABAAAEAAAAAAAEAAABAAAAACAAABAAAAAAAAAAMACgAAAAAAACAAAAAEAAAAAAAAAgAAAAAAEAAAIAAAAAAQAAAQAAAAAAAAEAAAAAAAAAAAAAAAhBAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAW0NMQU1BVl0AEAAAABAAAAACAAABAAAAAAAAAAAAAAAAAAAAAAAAwA==">t</a>
... ...
@@ -337,7 +337,11 @@ static inline int buf_outs(const char *s, struct buf *buf)
337 337
 	i = buf->pos;
338 338
 	while(*s) {
339 339
 		while(i < buf_len && *s) {
340
-			buf->buf[i++] = tolower((unsigned char)(*s++));
340
+			if(isspace(*s))
341
+				buf->buf[i++] = ' ';
342
+			else
343
+				buf->buf[i++] = tolower((unsigned char)(*s));
344
+			++s;
341 345
 		}
342 346
 		if(i == buf_len) {
343 347
 			if(write(buf->outfd, buf->buf, buf_len) < 0)
... ...
@@ -882,11 +886,15 @@ void cli_js_output(struct parser_state *state, const char *tempdir)
882 882
 		/* separate multiple scripts with \n */
883 883
 		buf_outc('\n', &buf);
884 884
 	}
885
+	buf_outs("<script>", &buf);
885 886
 	state->current = state->global;
886 887
 	for(i = 0; i < state->tokens.cnt; i++) {
887 888
 		if(state_update_scope(state, &state->tokens.data[i]))
888 889
 			lastchar = output_token(&state->tokens.data[i], state->current, &buf, lastchar);
889 890
 	}
891
+	/* add /script if not already there */
892
+	if(buf.pos < 9 || memcmp(buf.buf + buf.pos - 9, "</script>", 9))
893
+		buf_outs("</script>", &buf);
890 894
 	if(write(buf.outfd, buf.buf, buf.pos) < 0) {
891 895
 		cli_dbgmsg(MODULE "I/O error");
892 896
 	}
... ...
@@ -1512,7 +1520,9 @@ static int yylex(YYSTYPE *lvalp, yyscan_t  scanner)
1512 1512
 				return parseNumber(lvalp, scanner);
1513 1513
 			case SinglelineComment:
1514 1514
 				while(scanner->pos < scanner->insize) {
1515
-					if(in[scanner->pos] == '\n')
1515
+					/* htmlnorm converts \n to space, so
1516
+					 * stop on space too */
1517
+					if(in[scanner->pos] == '\n' || in[scanner->pos] == ' ')
1516 1518
 						break;
1517 1519
 					scanner->pos++;
1518 1520
 				}
... ...
@@ -4,7 +4,7 @@ FILES = clam-v2.rar clam-v3.rar clam.cab clam.exe.bz2 clam.exe clam.zip \
4 4
 	clam-nsis.exe clam-petite.exe clam-upack.exe clam-wwpack.exe clam.pdf\
5 5
 	clam.mail clam.ppt clam.tnef clam.ea05.exe clam.ea06.exe clam.d64.zip\
6 6
 	clam.exe.mbox.base64 clam.exe.mbox.uu clam.exe.binhex clam.ole.doc \
7
-	clam.impl.zip
7
+	clam.impl.zip clam.exe.html
8 8
 
9 9
 SPLIT_DIR=$(top_srcdir)/test/.split
10 10
 
... ...
@@ -197,7 +197,7 @@ FILES = clam-v2.rar clam-v3.rar clam.cab clam.exe.bz2 clam.exe clam.zip \
197 197
 	clam-nsis.exe clam-petite.exe clam-upack.exe clam-wwpack.exe clam.pdf\
198 198
 	clam.mail clam.ppt clam.tnef clam.ea05.exe clam.ea06.exe clam.d64.zip\
199 199
 	clam.exe.mbox.base64 clam.exe.mbox.uu clam.exe.binhex clam.ole.doc \
200
-	clam.impl.zip
200
+	clam.impl.zip clam.exe.html
201 201
 
202 202
 SPLIT_DIR = $(top_srcdir)/test/.split
203 203
 EXTRA_DIST = .split
... ...
@@ -15,7 +15,10 @@ check_PROGRAMS = $(programs)
15 15
 check_SCRIPTS = $(scripts)
16 16
 
17 17
 if HAVE_LIBCHECK
18
-check_clamav_SOURCES = check_clamav.c check_jsnorm.c check_str.c check_regex.c checks.h $(top_builddir)/libclamav/clamav.h check_disasm.c check_uniq.c check_matchers.c
18
+check_clamav_SOURCES = check_clamav.c checks.h $(top_builddir)/libclamav/clamav.h\
19
+		       check_jsnorm.c check_str.c check_regex.c\
20
+		       check_disasm.c check_uniq.c check_matchers.c\
21
+		       check_htmlnorm.c
19 22
 check_clamav_CPPFLAGS = @CHECK_CPPFLAGS@ -DSRCDIR=\"$(abs_srcdir)\"
20 23
 check_clamav_LDADD = $(top_builddir)/libclamav/libclamav.la @THREAD_LIBS@ @CHECK_LIBS@
21 24
 else
... ...
@@ -41,7 +44,7 @@ lcov: $(LCOV_HTML)
41 41
 DIRECTORIES=--directory . --directory ../libclamav --directory ../clamd --directory ../freshclam --directory ../sigtool --directory ../clamscan --directory ../clamdscan
42 42
 .libs/check_clamav.gcda: $(TESTS)
43 43
 	$(LCOV_LCOV) $(DIRECTORIES) --zerocounters
44
-	@$(MAKE) check VALGRIND=
44
+	@$(MAKE) check VALGRIND= LIBDUMA=no LIBEFENCE=no
45 45
 
46 46
 $(LCOV_OUTPUT): .libs/check_clamav.gcda
47 47
 	$(LCOV_LCOV) --capture $(DIRECTORIES) --output-file $@
... ...
@@ -51,9 +51,9 @@ CONFIG_HEADER = $(top_builddir)/clamav-config.h
51 51
 CONFIG_CLEAN_FILES =
52 52
 am__EXEEXT_1 = check_clamav$(EXEEXT)
53 53
 am__check_clamav_SOURCES_DIST = check_clamav_skip.c check_clamav.c \
54
-	check_jsnorm.c check_str.c check_regex.c checks.h \
55
-	$(top_builddir)/libclamav/clamav.h check_disasm.c check_uniq.c \
56
-	check_matchers.c
54
+	checks.h $(top_builddir)/libclamav/clamav.h check_jsnorm.c \
55
+	check_str.c check_regex.c check_disasm.c check_uniq.c \
56
+	check_matchers.c check_htmlnorm.c
57 57
 @HAVE_LIBCHECK_FALSE@am_check_clamav_OBJECTS =  \
58 58
 @HAVE_LIBCHECK_FALSE@	check_clamav-check_clamav_skip.$(OBJEXT)
59 59
 @HAVE_LIBCHECK_TRUE@am_check_clamav_OBJECTS =  \
... ...
@@ -63,7 +63,8 @@ am__check_clamav_SOURCES_DIST = check_clamav_skip.c check_clamav.c \
63 63
 @HAVE_LIBCHECK_TRUE@	check_clamav-check_regex.$(OBJEXT) \
64 64
 @HAVE_LIBCHECK_TRUE@	check_clamav-check_disasm.$(OBJEXT) \
65 65
 @HAVE_LIBCHECK_TRUE@	check_clamav-check_uniq.$(OBJEXT) \
66
-@HAVE_LIBCHECK_TRUE@	check_clamav-check_matchers.$(OBJEXT)
66
+@HAVE_LIBCHECK_TRUE@	check_clamav-check_matchers.$(OBJEXT) \
67
+@HAVE_LIBCHECK_TRUE@	check_clamav-check_htmlnorm.$(OBJEXT)
67 68
 check_clamav_OBJECTS = $(am_check_clamav_OBJECTS)
68 69
 @HAVE_LIBCHECK_TRUE@check_clamav_DEPENDENCIES =  \
69 70
 @HAVE_LIBCHECK_TRUE@	$(top_builddir)/libclamav/libclamav.la
... ...
@@ -234,7 +235,11 @@ scripts = check_clamd.sh check_freshclam.sh check_sigtool.sh check_clamscan.sh\
234 234
 TESTS_ENVIRONMENT = export abs_srcdir=$(abs_srcdir) AWK=$(AWK);
235 235
 check_SCRIPTS = $(scripts)
236 236
 @HAVE_LIBCHECK_FALSE@check_clamav_SOURCES = check_clamav_skip.c
237
-@HAVE_LIBCHECK_TRUE@check_clamav_SOURCES = check_clamav.c check_jsnorm.c check_str.c check_regex.c checks.h $(top_builddir)/libclamav/clamav.h check_disasm.c check_uniq.c check_matchers.c
237
+@HAVE_LIBCHECK_TRUE@check_clamav_SOURCES = check_clamav.c checks.h $(top_builddir)/libclamav/clamav.h\
238
+@HAVE_LIBCHECK_TRUE@		       check_jsnorm.c check_str.c check_regex.c\
239
+@HAVE_LIBCHECK_TRUE@		       check_disasm.c check_uniq.c check_matchers.c\
240
+@HAVE_LIBCHECK_TRUE@		       check_htmlnorm.c
241
+
238 242
 @HAVE_LIBCHECK_TRUE@check_clamav_CPPFLAGS = @CHECK_CPPFLAGS@ -DSRCDIR=\"$(abs_srcdir)\"
239 243
 @HAVE_LIBCHECK_TRUE@check_clamav_LDADD = $(top_builddir)/libclamav/libclamav.la @THREAD_LIBS@ @CHECK_LIBS@
240 244
 CLEANFILES = lcov.out *.gcno *.gcda *.log $(FILES) test-stderr.log clamscan.log valgrind-*.log duma.log duma2.log
... ...
@@ -298,6 +303,7 @@ distclean-compile:
298 298
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/check_clamav-check_clamav.Po@am__quote@
299 299
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/check_clamav-check_clamav_skip.Po@am__quote@
300 300
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/check_clamav-check_disasm.Po@am__quote@
301
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/check_clamav-check_htmlnorm.Po@am__quote@
301 302
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/check_clamav-check_jsnorm.Po@am__quote@
302 303
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/check_clamav-check_matchers.Po@am__quote@
303 304
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/check_clamav-check_regex.Po@am__quote@
... ...
@@ -437,6 +443,20 @@ check_clamav-check_matchers.obj: check_matchers.c
437 437
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
438 438
 @am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(check_clamav_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o check_clamav-check_matchers.obj `if test -f 'check_matchers.c'; then $(CYGPATH_W) 'check_matchers.c'; else $(CYGPATH_W) '$(srcdir)/check_matchers.c'; fi`
439 439
 
440
+check_clamav-check_htmlnorm.o: check_htmlnorm.c
441
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(check_clamav_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT check_clamav-check_htmlnorm.o -MD -MP -MF $(DEPDIR)/check_clamav-check_htmlnorm.Tpo -c -o check_clamav-check_htmlnorm.o `test -f 'check_htmlnorm.c' || echo '$(srcdir)/'`check_htmlnorm.c
442
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/check_clamav-check_htmlnorm.Tpo $(DEPDIR)/check_clamav-check_htmlnorm.Po
443
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='check_htmlnorm.c' object='check_clamav-check_htmlnorm.o' libtool=no @AMDEPBACKSLASH@
444
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
445
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(check_clamav_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o check_clamav-check_htmlnorm.o `test -f 'check_htmlnorm.c' || echo '$(srcdir)/'`check_htmlnorm.c
446
+
447
+check_clamav-check_htmlnorm.obj: check_htmlnorm.c
448
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(check_clamav_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT check_clamav-check_htmlnorm.obj -MD -MP -MF $(DEPDIR)/check_clamav-check_htmlnorm.Tpo -c -o check_clamav-check_htmlnorm.obj `if test -f 'check_htmlnorm.c'; then $(CYGPATH_W) 'check_htmlnorm.c'; else $(CYGPATH_W) '$(srcdir)/check_htmlnorm.c'; fi`
449
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/check_clamav-check_htmlnorm.Tpo $(DEPDIR)/check_clamav-check_htmlnorm.Po
450
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='check_htmlnorm.c' object='check_clamav-check_htmlnorm.obj' libtool=no @AMDEPBACKSLASH@
451
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
452
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(check_clamav_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o check_clamav-check_htmlnorm.obj `if test -f 'check_htmlnorm.c'; then $(CYGPATH_W) 'check_htmlnorm.c'; else $(CYGPATH_W) '$(srcdir)/check_htmlnorm.c'; fi`
453
+
440 454
 mostlyclean-libtool:
441 455
 	-rm -f *.lo
442 456
 
... ...
@@ -710,7 +730,7 @@ $(top_builddir)/test/clam.exe:
710 710
 @ENABLE_COVERAGE_TRUE@lcov: $(LCOV_HTML)
711 711
 @ENABLE_COVERAGE_TRUE@.libs/check_clamav.gcda: $(TESTS)
712 712
 @ENABLE_COVERAGE_TRUE@	$(LCOV_LCOV) $(DIRECTORIES) --zerocounters
713
-@ENABLE_COVERAGE_TRUE@	@$(MAKE) check VALGRIND=
713
+@ENABLE_COVERAGE_TRUE@	@$(MAKE) check VALGRIND= LIBDUMA=no LIBEFENCE=no
714 714
 
715 715
 @ENABLE_COVERAGE_TRUE@$(LCOV_OUTPUT): .libs/check_clamav.gcda
716 716
 @ENABLE_COVERAGE_TRUE@	$(LCOV_LCOV) --capture $(DIRECTORIES) --output-file $@
717 717
new file mode 100644
... ...
@@ -0,0 +1 @@
0
+<a href="example.com">aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa</a><a href="example.com">aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa</a><a href="example.com">aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa</a><a href="example.com">aa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bba</a>ab 
0 1
\ No newline at end of file
1 2
new file mode 100644
... ...
@@ -0,0 +1 @@
0
+ example.com aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa example.com aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa example.com aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa example.com aa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bbaa bba ab  
0 1
\ No newline at end of file
... ...
@@ -326,11 +326,50 @@ int open_testfile(const char *name)
326 326
 	sprintf(str, "%s/%s", srcdir, name);
327 327
 
328 328
 	fd = open(str, O_RDONLY);
329
-	fail_unless(fd >= 0, "open()");
329
+	fail_unless(fd >= 0, "open() failed: %s", str);
330 330
 	free(str);
331 331
 	return fd;
332 332
 }
333 333
 
334
+void diff_file_mem(int fd, const char *ref, size_t len)
335
+{
336
+	size_t p, reflen = len;
337
+	char *buf = cli_malloc(len);
338
+
339
+	fail_unless(!!buf, "unable to malloc buffer: %d", len);
340
+	fail_unless(read(fd, buf, len) == len,  "file is smaller: %lu, expected: %lu", p, len);
341
+	p = 0;
342
+	while(len > 0) {
343
+		char c1 = ref[p];
344
+		char c2 = buf[p];
345
+		fail_unless(c1 == c2, "file contents mismatch at byte: %lu, was: %c, expected: %c", p, c2, c1);
346
+		p++;
347
+		len--;
348
+	}
349
+	free(buf);
350
+	p = lseek(fd, 0, SEEK_END);
351
+        fail_unless(p == reflen, "trailing garbage, file size: %ld, expected: %ld", p, reflen);
352
+	close(fd);
353
+}
354
+
355
+void diff_files(int fd, int ref_fd)
356
+{
357
+	char *ref;
358
+	ssize_t nread;
359
+	off_t siz = lseek(ref_fd, 0, SEEK_END);
360
+	fail_unless(siz != -1, "lseek failed");
361
+
362
+	ref = cli_malloc(siz);
363
+	fail_unless(!!ref, "unable to malloc buffer: %d", siz);
364
+
365
+	fail_unless(lseek(ref_fd, 0, SEEK_SET) == 0,"lseek failed");
366
+	nread = read(ref_fd, ref, siz);
367
+        fail_unless(nread == siz, "short read, expected: %ld, was: %ld", siz, nread);
368
+	close(ref_fd);
369
+	diff_file_mem(fd, ref, siz);
370
+	free(ref);
371
+}
372
+
334 373
 int main(int argc, char **argv)
335 374
 {
336 375
     int nf;
... ...
@@ -347,6 +386,7 @@ int main(int argc, char **argv)
347 347
     srunner_add_suite(sr, test_disasm_suite());
348 348
     srunner_add_suite(sr, test_uniq_suite());
349 349
     srunner_add_suite(sr, test_matchers_suite());
350
+    srunner_add_suite(sr, test_htmlnorm_suite());
350 351
 
351 352
     srunner_set_log(sr, "test.log");
352 353
     if(freopen("test-stderr.log","w+",stderr) == NULL) {
353 354
new file mode 100644
... ...
@@ -0,0 +1,155 @@
0
+/*
1
+ *  Unit tests for HTML normalizer;
2
+ *
3
+ *  Copyright (C) 2008 Sourcefire, Inc.
4
+ *
5
+ *  Authors: Török Edvin
6
+ *
7
+ *  This program is free software; you can redistribute it and/or modify
8
+ *  it under the terms of the GNU General Public License version 2 as
9
+ *  published by the Free Software Foundation.
10
+ *
11
+ *  This program is distributed in the hope that it will be useful,
12
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
+ *  GNU General Public License for more details.
15
+ *
16
+ *  You should have received a copy of the GNU General Public License
17
+ *  along with this program; if not, write to the Free Software
18
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19
+ *  MA 02110-1301, USA.
20
+ */
21
+#include <check.h>
22
+#include <fcntl.h>
23
+#include "checks.h"
24
+#include "../libclamav/dconf.h"
25
+#include "../libclamav/htmlnorm.h"
26
+#include "../libclamav/others.h"
27
+
28
+static char *dir;
29
+static struct cli_dconf *dconf;
30
+
31
+static void htmlnorm_setup(void)
32
+{
33
+	dconf = cli_dconf_init();
34
+	dir = cli_gentemp(NULL);
35
+	fail_unless(!!dconf, "failed to init dconf");
36
+	fail_unless(!!dir, "cli_gentemp failed");
37
+}
38
+
39
+static void htmlnorm_teardown(void)
40
+{
41
+	free(dconf);
42
+	fail_unless(cli_rmdirs(dir) == 0, "rmdirs failed");
43
+	free(dir);
44
+	dir = NULL;
45
+}
46
+
47
+static struct test {
48
+	const char *input;
49
+	const char *nocommentref;
50
+	const char *notagsref;
51
+	const char *jsref;
52
+} tests[] = {
53
+	/* NULL means don't test it */
54
+	{"input/htmlnorm_buf.html","buf.nocomment.ref","buf.notags.ref",NULL},
55
+	{"input/htmlnorm_encode.html","encode.nocomment.ref",NULL,"encode.js.ref"},
56
+	{"input/htmlnorm_js_test.html","js.nocomment.ref",NULL,"js.js.ref"},
57
+	{"input/htmlnorm_test.html","test.nocomment.ref","test.notags.ref",NULL},
58
+	{"input/htmlnorm_urls.html","urls.nocomment.ref","urls.notags.ref",NULL}
59
+};
60
+
61
+#ifdef CHECK_HAVE_LOOPS
62
+
63
+static void check_dir(const char *dir, const struct test *test)
64
+{
65
+	char filename[4096];
66
+	int fd, reffd;
67
+
68
+	if (test->nocommentref) {
69
+		snprintf(filename, sizeof(filename), "%s/nocomment.html", dir);
70
+		fd = open(filename, O_RDONLY);
71
+		fail_unless(fd > 0,"unable to open: %s", filename);
72
+		reffd = open_testfile(test->nocommentref);
73
+
74
+		diff_files(fd, reffd);
75
+
76
+		close(reffd);
77
+		close(fd);
78
+	}
79
+	if (test->notagsref) {
80
+		snprintf(filename, sizeof(filename), "%s/notags.html", dir);
81
+		fd = open(filename, O_RDONLY);
82
+		fail_unless(fd > 0,"unable to open: %s", filename);
83
+		reffd = open_testfile(test->notagsref);
84
+
85
+		diff_files(fd, reffd);
86
+
87
+		close(reffd);
88
+		close(fd);
89
+	}
90
+	if (test->jsref) {
91
+		snprintf(filename, sizeof(filename), "%s/javascript", dir);
92
+		fd = open(filename, O_RDONLY);
93
+		fail_unless(fd > 0,"unable to open: %s", filename);
94
+		reffd = open_testfile(test->jsref);
95
+
96
+		diff_files(fd, reffd);
97
+
98
+		close(reffd);
99
+		close(fd);
100
+	}
101
+}
102
+
103
+START_TEST (test_htmlnorm_api)
104
+{
105
+	int fd;
106
+	tag_arguments_t hrefs;
107
+
108
+	memset(&hrefs, 0, sizeof(hrefs));
109
+
110
+	fd = open_testfile(tests[_i].input);
111
+	fail_unless(fd > 0,"open_testfile failed");
112
+
113
+
114
+	fail_unless(mkdir(dir, 0700) == 0,"mkdir failed");
115
+	fail_unless(html_normalise_fd(fd, dir, NULL, dconf) == 1, "html_normalise_fd failed");
116
+	check_dir(dir, &tests[_i]);
117
+	fail_unless(cli_rmdirs(dir) == 0, "rmdirs failed");
118
+
119
+	fail_unless(mkdir(dir, 0700) == 0,"mkdir failed");
120
+	fail_unless(html_normalise_fd(fd, dir, NULL, NULL) == 1, "html_normalise_fd failed");
121
+	fail_unless(cli_rmdirs(dir) == 0, "rmdirs failed");
122
+
123
+	fail_unless(mkdir(dir, 0700) == 0,"mkdir failed");
124
+	fail_unless(html_normalise_fd(fd, dir, &hrefs, dconf) == 1, "html_normalise_fd failed");
125
+	fail_unless(cli_rmdirs(dir) == 0, "rmdirs failed");
126
+	html_tag_arg_free(&hrefs);
127
+
128
+	memset(&hrefs, 0, sizeof(hrefs));
129
+	hrefs.scanContents = 1;
130
+	fail_unless(mkdir(dir, 0700) == 0,"mkdir failed");
131
+	fail_unless(html_normalise_fd(fd, dir, &hrefs, dconf) == 1, "html_normalise_fd failed");
132
+	fail_unless(cli_rmdirs(dir) == 0, "rmdirs failed");
133
+	html_tag_arg_free(&hrefs);
134
+
135
+	close(fd);
136
+}
137
+END_TEST
138
+#endif
139
+
140
+Suite *test_htmlnorm_suite(void)
141
+{
142
+	Suite *s = suite_create("htmlnorm");
143
+	TCase *tc_htmlnorm_api;
144
+
145
+	tc_htmlnorm_api = tcase_create("htmlnorm api");
146
+	suite_add_tcase (s, tc_htmlnorm_api);
147
+#ifdef CHECK_HAVE_LOOPS	
148
+	tcase_add_loop_test(tc_htmlnorm_api, test_htmlnorm_api, 0, sizeof(tests)/sizeof(tests[0]));
149
+#endif
150
+	tcase_add_unchecked_fixture(tc_htmlnorm_api,
151
+					htmlnorm_setup, htmlnorm_teardown);
152
+
153
+	return s;
154
+}
... ...
@@ -249,83 +249,50 @@ static void tokenizer_test(const char *in, const char *expected, int split)
249 249
 	cli_js_output(state, tmpdir);
250 250
 	snprintf(filename, 1023, "%s/javascript", tmpdir);
251 251
 
252
-	buf = cli_malloc(len + 1);
253
-	if(!buf) {
254
-		jstest_teardown();
255
-		fail("malloc buffer");
256
-	}
257
-
258 252
 	fd = open(filename, O_RDONLY);
259 253
 	if(fd < 0) {
260 254
 		jstest_teardown();
261 255
 		fail("failed to open output file: %s", filename);
262 256
 	}
263 257
 
264
-	p = read(fd, buf, len);
265
-	if(p != len) {
266
-		close(fd);
267
-		jstest_teardown();
268
-		fail("file is smaller: %lu, expected: %lu", p, len);
269
-	}
270
-	p = lseek(fd, 0, SEEK_CUR);
271
-	fail_unless(p == len, "lseek position incorrect: %ld != %ld", p, len);
272
-	p = 0;
273
-	while(len > 0) {
274
-		char c1 = expected[p];
275
-		char c2 = buf[p];
276
-		if(c1 != c2) {
277
-			close(fd);
278
-			jstest_teardown();
279
-			fail("file contents mismatch at byte: %lu, was: %c, expected: %c", p, c2, c1);
280
-		}
281
-		p++;
282
-		len--;
283
-	}
284
-	free(buf);
285
-	p2 = lseek(fd, 0, SEEK_END);
286
-	if(p != p2) {
287
-		close(fd);
288
-		jstest_teardown();
289
-		fail("trailing garbage, file size: %ld, expected: %ld", p2, p);
290
-	}
291
-	close(fd);
258
+	diff_file_mem(fd, expected, len);
292 259
 }
293 260
 
294 261
 static const char jstest_buf0[] =
295 262
 "function foo(a, b) {\n"\
296 263
 "var x = 1.9e2*2*a/ 4.;\n"\
297
-"var y = 'test\\'tst';//var foo=5\n"\
264
+"var y = 'test\\'tst';//var\n"\
298 265
 "x=b[5],/* multiline\nvar z=6;\nsome*some/other**/"\
299 266
 "z=x/y;/* multiline oneline */var t=z/a;\n"\
300 267
 "z=[test;testi];"\
301
-"document.writeln('something');}";
268
+"document.writeln('something\n');}";
302 269
 
303 270
 static const char jstest_expected0[] =
304
-"function n000(n001,n002){"\
271
+"<script>function n000(n001,n002){"\
305 272
 "var n003=190*2*n001/4;"\
306 273
 "var n004=\"test\'tst\";"\
307 274
 "n003=n002[5],"\
308 275
 "z=n003/n004;var n005=z/n001;"\
309 276
 "z=[test;testi];"\
310
-"document.writeln(\"something\");}";
277
+"document.writeln(\"something \");}</script>";
311 278
 
312 279
 static const char jstest_buf1[] =
313 280
 "function () { var id\\u1234tx;}";
314 281
 
315 282
 static const char jstest_expected1[] =
316
-"function(){var n000;}";
283
+"<script>function(){var n000;}</script>";
317 284
 
318 285
 static const char jstest_buf2[] =
319 286
 "function () { var tst=\"a\"+'bc'+     'd'; }";
320 287
 
321 288
 static const char jstest_expected2[] =
322
-"function(){var n000=\"abcd\";}";
289
+"<script>function(){var n000=\"abcd\";}</script>";
323 290
 
324 291
 static const char jstest_buf3[] =
325 292
 "dF('bmfsu%2639%2638x11u%2638%263%3A%264C1');";
326 293
 
327 294
 static const char jstest_expected3[] =
328
-"alert(\"w00t\");";
295
+"<script>alert(\"w00t\");</script>";
329 296
 
330 297
 #define B64 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
331 298
 
... ...
@@ -334,61 +301,61 @@ static char jstest_buf4[] =
334 334
 "qbphzrag.jevgr(harfpncrriny(qS('tV%285%3O%285%3Nsdwjl%28585%3N7%28586Q%28585%3N7%3P%7P55l%28585%3N7%3P%28585%3N7%28586R%28585%3N8T5%285%3N%285%3P%286R3'));";
335 335
 
336 336
 static char jstest_expected4[] =
337
-"qbphzrag.jevgr(\"<fpevcg ynathntr=\"wninfpevcg\">shapgvba qs(f){ine f1=harfpncr(f.fhofge(0,f.yratgu-1)); ine g='';sbe(v=0;v<f1.yratgu;v++)g+=fgevat.sebzpunepbqr(f1.punepbqrng(v)-f.fhofge(f.yratgu-1,1));qbphzrag.jevgr(harfpncr(g));}</fpevcg>\");riny();nyreg(\"j00g\");";
337
+"<fpevcg>qbphzrag.jevgr(\"<fpevcg ynathntr=\"wninfpevcg\">shapgvba qs(f){ine f1=harfpncr(f.fhofge(0,f.yratgu-1)); ine g='';sbe(v=0;v<f1.yratgu;v++)g+=fgevat.sebzpunepbqr(f1.punepbqrng(v)-f.fhofge(f.yratgu-1,1));qbphzrag.jevgr(harfpncr(g));}</fpevcg>\");riny();nyreg(\"j00g\");</fpevcg>";
338 338
 
339 339
 static char jstest_buf5[] =
340 340
 "shapgvba (c,n,p,x,r,e){}('0(\\'1\\');',2,2,'nyreg|j00g'.fcyvg('|'),0,{});";
341 341
 
342 342
 static const char jstest_expected5[] =
343
-"function(n000,n001,n002,n003,n004,n005){}(alert(\"w00t\"););";
343
+"<script>function(n000,n001,n002,n003,n004,n005){}(alert(\"w00t\"););</script>";
344 344
 
345 345
 static const char jstest_buf6[] =
346 346
 "function $(p,a,c,k,e,d){} something(); $('0(\\'1\\');',2,2,'alert|w00t'.split('|'),0,{});";
347 347
 
348 348
 static const char jstest_expected6[] =
349
-"function n000(n001,n002,n003,n004,n005,n006){}something();$(alert(\"w00t\"););";
349
+"<script>function n000(n001,n002,n003,n004,n005,n006){}something();$(alert(\"w00t\"););</script>";
350 350
 
351 351
 static const char jstest_buf7[] =
352 352
 "var z=\"tst" B64 "tst\";";
353 353
 
354 354
 static const char jstest_expected7[] =
355
-"var n000=\"tst" B64 "tst\";";
355
+"<script>var n000=\"tst" B64 "tst\";</script>";
356 356
 
357 357
 static const char jstest_buf8[] =
358 358
 "var z=\'tst" B64 "tst\';";
359 359
 
360 360
 static const char jstest_expected8[] =
361
-"var n000=\"tst" B64 "tst\";";
361
+"<script>var n000=\"tst" B64 "tst\";</script>";
362 362
 
363 363
 static char jstest_buf9[] =
364 364
 "riny(harfpncr('%61%6p%65%72%74%28%27%74%65%73%74%27%29%3o'));";
365 365
 
366 366
 static const char jstest_expected9[] =
367
-"alert(\"test\");";
367
+"<script>alert(\"test\");</script>";
368 368
 
369 369
 static const char jstest_buf10[] =
370 370
 "function $ $() dF(x); function (p,a,c,k,e,r){function $(){}";
371 371
 
372 372
 static const char jstest_expected10[] =
373
-"function n000 n000()n001(x);function(n002,n003,n004,n005,n006,n007){function n008(){}";
373
+"<script>function n000 n000()n001(x);function(n002,n003,n004,n005,n006,n007){function n008(){}</script>";
374 374
 
375 375
 static const char jstest_buf11[] =
376 376
 "var x=123456789 ;";
377 377
 
378 378
 static const char jstest_expected11[] =
379
-"var n000=123456789;";
379
+"<script>var n000=123456789;</script>";
380 380
 
381 381
 static const char jstest_buf12[] =
382 382
 "var x='test\\u0000test';";
383 383
 
384 384
 static const char jstest_expected12[] =
385
-"var n000=\"test\x1test\";";
385
+"<script>var n000=\"test\x1test\";</script>";
386 386
 
387 387
 static const char jstest_buf13[] =
388 388
 "var x\\s12345";
389 389
 
390 390
 static const char jstest_expected13[] =
391
-"var n000";
391
+"<script>var n000</script>";
392 392
 
393 393
 
394 394
 static struct {
... ...
@@ -430,7 +397,10 @@ START_TEST (js_buffer)
430 430
 	const size_t len = 512*1024;
431 431
 	const char s[] = "x=\"";
432 432
 	const char e[] = "\"";
433
+	const char s_exp[] = "<script>";
434
+	const char e_exp[] = "</script>";
433 435
 	char *tst = malloc(len);
436
+	char *exp = malloc(len + sizeof(s_exp) + sizeof(e_exp) - 2);
434 437
 
435 438
 	fail_unless(!!tst, "malloc");
436 439
 
... ...
@@ -438,7 +408,11 @@ START_TEST (js_buffer)
438 438
 	strncpy(tst, s, strlen(s));
439 439
 	strncpy(tst + len - sizeof(e), e, sizeof(e));
440 440
 
441
-	tokenizer_test(tst,tst,1);
441
+	strncpy(exp, s_exp, len);
442
+	strncpy(exp + sizeof(s_exp) - 1, tst, len-1);
443
+	strncpy(exp + sizeof(s_exp) + len - 2, e_exp, sizeof(e_exp));
444
+
445
+	tokenizer_test(tst,exp,1);
442 446
 	free(tst);
443 447
 }
444 448
 END_TEST
... ...
@@ -10,6 +10,10 @@ Suite *test_regex_suite(void);
10 10
 Suite *test_disasm_suite(void);
11 11
 Suite *test_uniq_suite(void);
12 12
 Suite *test_matchers_suite(void);
13
+Suite *test_htmlnorm_suite(void);
13 14
 void errmsg_expected(void);
14 15
 int open_testfile(const char *name);
16
+void diff_files(int fd, int reffd);
17
+void diff_file_mem(int fd, const char *ref, size_t len);
18
+
15 19
 #endif
16 20
new file mode 100644
... ...
@@ -0,0 +1,2 @@
0
+<script><!--function n000(){alert("test@<test>  ");}</script>
1
+<script><!--function n000(){var n001="";</script>
0 2
\ No newline at end of file
1 3
new file mode 100644
... ...
@@ -0,0 +1 @@
0
+<script language="jscript.encode"><script language="javascript"><!--// //comment1 //comment2functionfoo(){alert("test@<test>");}//--></script><script language="jscript.encode"><script language="javascript"><!--// functionfoo(){varx="";//--></script>
0 1
\ No newline at end of file
1 2
new file mode 100644
2 3
Binary files /dev/null and b/unit_tests/input/htmlnorm_buf.html differ
3 4
new file mode 100644
... ...
@@ -0,0 +1,7 @@
0
+<script language='JScript.Encode'><!--//
1
+//comment1
2
+//comment2#@~^LAAAAA==W!x^DkKxP6WKc#Pls+MYcvD+/D@$@!D+/D@*@#@&E#p8Vg0AAA==^#~@
3
+//--></script>
4
+<script language='JScript.Encode'><!--//
5
+#@~^JyAAAA==@#@&0;	mDkW	P6GWv#~	@#@&@#@&58gIAA==^#~@ 
6
+//--></script>
0 7
new file mode 100644
... ...
@@ -0,0 +1,13 @@
0
+<script>
1
+	function() { var x = "htmlJS"+"testsignature"; }
2
+</script>
3
+<script>
4
+	<!--
5
+	</something>
6
+	function() { var x = "htmlJS"+"testsignature"; }
7
+	-->
8
+</script>
9
+<script>
10
+	//some comment
11
+	function() { var x = "htmlJS"+"testsignature"; }
12
+</script>
0 13
new file mode 100644
1 14
Binary files /dev/null and b/unit_tests/input/htmlnorm_test.html differ
2 15
new file mode 100644
... ...
@@ -0,0 +1,8 @@
0
+	<form action="fake.example.com">
1
+		<img src="real.example.com">
2
+		<img dynsrc="real.example.com">
3
+		<a title="&ouml;real.example.com" href="fake.example.com">real.example.com<a
4
+				href="real.example.com">real.example.com</a>
5
+			<iframe src='fake.example.com'>
6
+				<area href='real.example.com'>
7
+		</form>
0 8
new file mode 100644
... ...
@@ -0,0 +1,3 @@
0
+<script>function(){var n000="htmljstestsignature";}</script>
1
+<script><!--</something>function(){var n000="htmljstestsignature";}--></script>
2
+<script>comment function(){var n000="htmljstestsignature";}</script>
0 3
\ No newline at end of file
1 4
new file mode 100644
... ...
@@ -0,0 +1 @@
0
+<script>function(){varx="htmljs"+"testsignature";}</script><script><!--</something>function(){varx="htmljs"+"testsignature";}--></script><script>//somecommentfunction(){varx="htmljs"+"testsignature";}</script>
0 1
\ No newline at end of file
1 2
new file mode 100644
... ...
@@ -0,0 +1 @@
0

                
0 1
\ No newline at end of file
1 2
new file mode 100644
... ...
@@ -0,0 +1 @@
0
+ htmltexttestsignature html t ags t e st sig na tu r e quote\ s quote\"here\" s entity 
0 1
\ No newline at end of file
1 2
new file mode 100644
... ...
@@ -0,0 +1 @@
0

                
0 1
\ No newline at end of file
1 2
new file mode 100644
... ...
@@ -0,0 +1 @@
0
+ real.example.com fake.example.com real.example.com real.example.com real.example.com 
0 1
\ No newline at end of file