Browse code

JS normalizer fixes (bb #1155)

git-svn: trunk@4202

Török Edvin authored on 2008/09/24 05:52:27
Showing 4 changed files
... ...
@@ -579,6 +579,26 @@ static void screnc_decode(unsigned char *ptr, struct screnc_state *s)
579 579
 	}
580 580
 }
581 581
 
582
+static void js_process(struct parser_state *js_state, const char *js_begin, const char *js_end,
583
+		const char *line, const char *ptr, int in_script, const char *dirname)
584
+{
585
+	if(!js_begin)
586
+		js_begin = line;
587
+	if(!js_end)
588
+		js_end = ptr;
589
+	if(js_end > js_begin &&
590
+			CLI_ISCONTAINED(line, 8192, js_begin, 1) &&
591
+			CLI_ISCONTAINED(line, 8192, js_end, 1)) {
592
+		cli_js_process_buffer(js_state, js_begin, js_end - js_begin);
593
+	}
594
+	if(!in_script) {
595
+		/*  we found a /script, normalize script now */
596
+		cli_js_parse_done(js_state);
597
+		cli_js_output(js_state, dirname);
598
+		cli_js_destroy(js_state);
599
+	}
600
+}
601
+
582 602
 static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag_arguments_t *hrefs,const struct cli_dconf* dconf)
583 603
 {
584 604
 	int fd_tmp, tag_length, tag_arg_length, binary;
... ...
@@ -1021,8 +1041,12 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1021 1021
 					next_state = HTML_NORM;
1022 1022
 					if (strcmp(tag, "/script") == 0) {
1023 1023
 						in_script = FALSE;
1024
-						if(js_state)
1024
+						if(js_state) {
1025 1025
 							js_end = ptr;
1026
+							js_process(js_state, js_begin, js_end, line, ptr, in_script, dirname);
1027
+							js_state = NULL;
1028
+							js_begin = js_end = NULL;
1029
+						}
1026 1030
 						/*don't output newlines in nocomment.html
1027 1031
 						 * html_output_c(file_buff_o2, '\n');*/
1028 1032
 					}
... ...
@@ -1054,15 +1078,15 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1054 1054
 						next_state = HTML_JSDECODE;
1055 1055
 						/* we already output the old tag, output the new tag now */
1056 1056
 						html_output_tag(file_buff_o2, tag, &tag_args);
1057
-					} else if(strcmp(tag, "script") == 0) {
1058
-						in_script = TRUE;
1059
-						if(dconf_js && !js_state) {
1060
-							js_state = cli_js_init();
1061
-							if(!js_state) {
1062
-								cli_dbgmsg("htmlnorm: Failed to initialize js parser");
1063
-							}
1064
-							js_begin = ptr;
1057
+					}
1058
+					in_script = TRUE;
1059
+					if(dconf_js && !js_state) {
1060
+						js_state = cli_js_init();
1061
+						if(!js_state) {
1062
+							cli_dbgmsg("htmlnorm: Failed to initialize js parser");
1065 1063
 						}
1064
+						js_begin = ptr;
1065
+						js_end = NULL;
1066 1066
 					}
1067 1067
 				} else if(strcmp(tag, "%@") == 0) {
1068 1068
 					arg_value = html_tag_arg_value(&tag_args, "language");
... ...
@@ -1332,7 +1356,8 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1332 1332
 				look_for_screnc = TRUE;
1333 1333
 				ptr_screnc = strstr(ptr, "#@~^");
1334 1334
 				if(ptr_screnc) {
1335
-					*ptr_screnc = '\0';
1335
+					ptr_screnc[0] = '/';
1336
+					ptr_screnc[1] = '/';
1336 1337
 					ptr_screnc += 4;
1337 1338
 				}
1338 1339
 				state = next_state;
... ...
@@ -1341,6 +1366,8 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1341 1341
 			case HTML_JSDECODE:
1342 1342
 				/* Check for start marker */
1343 1343
 				if (strncmp(ptr, "#@~^", 4) == 0) {
1344
+					ptr[0] = '/';
1345
+					ptr[1] = '/';
1344 1346
 					ptr += 4;
1345 1347
 					state = HTML_JSDECODE_LENGTH;
1346 1348
 					next_state = HTML_BAD_STATE;
... ...
@@ -1367,12 +1394,13 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1367 1367
 				state = HTML_JSDECODE_DECRYPT;
1368 1368
 				in_screnc = TRUE;
1369 1369
 				next_state = HTML_BAD_STATE;
1370
+				/* for JS normalizer */
1371
+				ptr[7] = '\n';
1370 1372
 				ptr += 8;
1371 1373
 				break;
1372 1374
 			case HTML_JSDECODE_DECRYPT:
1373 1375
 				screnc_decode(ptr, &screnc_state);
1374 1376
 				if(!screnc_state.length) {
1375
-					html_output_str(file_buff_o2, "</script>\n", 10);
1376 1377
 					state = HTML_NORM;
1377 1378
 					next_state = HTML_BAD_STATE;
1378 1379
 					in_screnc = FALSE;
... ...
@@ -1581,22 +1609,9 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1581 1581
 		ptrend = NULL;
1582 1582
 
1583 1583
 		if(js_state) {
1584
-			if(!js_begin)
1585
-				js_begin = line;
1586
-			if(!js_end)
1587
-				js_end = ptr;
1588
-			if(js_end > js_begin &&
1589
-					CLI_ISCONTAINED(line, 8192, js_begin, 1) &&
1590
-					CLI_ISCONTAINED(line, 8192, js_end, 1)) {
1591
-
1592
-				cli_js_process_buffer(js_state, js_begin, js_end - js_begin);
1593
-			}
1584
+			js_process(js_state, js_begin, js_end, line, ptr, in_script, dirname);
1594 1585
 			js_begin = js_end = NULL;
1595 1586
 			if(!in_script) {
1596
-				/*  we found a /script, normalize script now */
1597
-				cli_js_parse_done(js_state);
1598
-				cli_js_output(js_state, dirname);
1599
-				cli_js_destroy(js_state);
1600 1587
 				js_state = NULL;
1601 1588
 			}
1602 1589
 		}
... ...
@@ -337,7 +337,8 @@ void diff_file_mem(int fd, const char *ref, size_t len)
337 337
 	char *buf = cli_malloc(len);
338 338
 
339 339
 	fail_unless(!!buf, "unable to malloc buffer: %d", len);
340
-	fail_unless(read(fd, buf, len) == len,  "file is smaller: %lu, expected: %lu", p, len);
340
+	p = read(fd, buf, len);
341
+	fail_unless(p == len,  "file is smaller: %lu, expected: %lu", p, len);
341 342
 	p = 0;
342 343
 	while(len > 0) {
343 344
 		char c1 = ref[p];
... ...
@@ -21,6 +21,7 @@
21 21
  */
22 22
 #include <check.h>
23 23
 #include <fcntl.h>
24
+#include <string.h>
24 25
 #include "checks.h"
25 26
 #include "../libclamav/dconf.h"
26 27
 #include "../libclamav/htmlnorm.h"
... ...
@@ -413,6 +413,7 @@ START_TEST (js_buffer)
413 413
 	strncpy(exp + sizeof(s_exp) + len - 2, e_exp, sizeof(e_exp));
414 414
 
415 415
 	tokenizer_test(tst,exp,1);
416
+	free(exp);
416 417
 	free(tst);
417 418
 }
418 419
 END_TEST