Browse code

* libclamav/htmlnorm.c: generate only nocomment.html (always contains script too) and notags.html (bb #851) * libclamav/hashtab.h: len and data were reversed, invalidating entitylist.h * libclamav/filetypes_int.h: improve HTML filetype detection (bb #853)

git-svn: trunk@3660

Török Edvin authored on 2008/02/21 00:49:43
Showing 3 changed files
... ...
@@ -99,23 +99,16 @@ static const char *ftypes_int[] = {
99 99
   "*:0a52656365697665643a20{-2048}0a436f6e74656e742d547970653a20:Mail file:CL_TYPE_MAIL",
100 100
   "*:0a52656365697665643a20{-2048}0a436f6e74656e742d747970653a20:Mail file:CL_TYPE_MAIL",
101 101
   "*:4d494d452d56657273696f6e3a20{-2048}0a436f6e74656e742d547970653a20:Mail file:CL_TYPE_MAIL",
102
-  "*:3c62723e:HTML data:CL_TYPE_HTML",
103
-  "*:3c42723e:HTML data:CL_TYPE_HTML",
104
-  "*:3c42523e:HTML data:CL_TYPE_HTML",
105
-  "*:3c703e:HTML data:CL_TYPE_HTML",
106
-  "*:3c503e:HTML data:CL_TYPE_HTML",
107
-  "*:68726566:HTML data:CL_TYPE_HTML",
108
-  "*:48726566:HTML data:CL_TYPE_HTML",
109
-  "*:48524546:HTML data:CL_TYPE_HTML",
102
+  "*:3c4120*(68|48)(72|52)6566:HTML data:CL_TYPE_HTML",
103
+  "*:3c6120*(68|48)(72|52)6566:HTML data:CL_TYPE_HTML",
104
+  "*:3c6120*(68|48)(72|52)4546:HTML data:CL_TYPE_HTML",
105
+  "*:3c4120*(68|48)(72|52)4546:HTML data:CL_TYPE_HTML",
110 106
   "*:3c68746d6c3e:HTML data:CL_TYPE_HTML",
111 107
   "*:3c48544d4c3e:HTML data:CL_TYPE_HTML",
112 108
   "*:3c48746d6c3e:HTML data:CL_TYPE_HTML",
113 109
   "*:3c686561643e:HTML data:CL_TYPE_HTML",
114 110
   "*:3c484541443e:HTML data:CL_TYPE_HTML",
115 111
   "*:3c486561643e:HTML data:CL_TYPE_HTML",
116
-  "*:3c666f6e74:HTML data:CL_TYPE_HTML",
117
-  "*:3c466f6e74:HTML data:CL_TYPE_HTML",
118
-  "*:3c464f4e54:HTML data:CL_TYPE_HTML",
119 112
   "*:3c696d67:HTML data:CL_TYPE_HTML",
120 113
   "*:3c494d47:HTML data:CL_TYPE_HTML",
121 114
   "*:3c496d67:HTML data:CL_TYPE_HTML",
... ...
@@ -60,8 +60,8 @@ typedef struct {
60 60
 struct element
61 61
 {
62 62
 	const char* key;
63
-	size_t len;
64 63
 	element_data data;
64
+	size_t len;
65 65
 };
66 66
 
67 67
 struct hashtable {
... ...
@@ -292,7 +292,7 @@ static void html_output_flush(file_buff_t *fbuff)
292 292
 	}
293 293
 }
294 294
 
295
-static void html_output_c(file_buff_t *fbuff1, file_buff_t *fbuff2, unsigned char c)
295
+static inline void html_output_c(file_buff_t *fbuff1, unsigned char c)
296 296
 {
297 297
 	if (fbuff1) {
298 298
 		if (fbuff1->length == HTML_FILE_BUFF_LEN) {
... ...
@@ -300,12 +300,6 @@ static void html_output_c(file_buff_t *fbuff1, file_buff_t *fbuff2, unsigned cha
300 300
 		}
301 301
 		fbuff1->buffer[fbuff1->length++] = c;
302 302
 	}
303
-	if (fbuff2) {
304
-		if (fbuff2->length == HTML_FILE_BUFF_LEN) {
305
-			html_output_flush(fbuff2);
306
-		}
307
-		fbuff2->buffer[fbuff2->length++] = c;
308
-	}
309 303
 }
310 304
 
311 305
 static void html_output_str(file_buff_t *fbuff, const unsigned char *str, int len)
... ...
@@ -421,21 +415,21 @@ static void html_output_tag(file_buff_t *fbuff, char *tag, tag_arguments_t *tags
421 421
 {
422 422
 	int i, j, len;
423 423
 
424
-	html_output_c(fbuff, NULL, '<');
424
+	html_output_c(fbuff, '<');
425 425
 	html_output_str(fbuff, tag, strlen(tag));
426 426
 	for (i=0; i < tags->count; i++) {
427
-		html_output_c(fbuff, NULL, ' ');
427
+		html_output_c(fbuff, ' ');
428 428
 		html_output_str(fbuff, tags->tag[i], strlen(tags->tag[i]));
429 429
 		if (tags->value[i]) {
430 430
 			html_output_str(fbuff, "=\"", 2);
431 431
 			len = strlen(tags->value[i]);
432 432
 			for (j=0 ; j<len ; j++) {
433
-				html_output_c(fbuff, NULL, tolower(tags->value[i][j]));
433
+				html_output_c(fbuff, tolower(tags->value[i][j]));
434 434
 			}
435
-			html_output_c(fbuff, NULL, '"');
435
+			html_output_c(fbuff, '"');
436 436
 		}
437 437
 	}
438
-	html_output_c(fbuff, NULL, '>');
438
+	html_output_c(fbuff, '>');
439 439
 }
440 440
 
441 441
 void html_tag_arg_free(tag_arguments_t *tags)
... ...
@@ -509,7 +503,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
509 509
 	tag_arguments_t tag_args;
510 510
 	quoted_state quoted;
511 511
 	unsigned long length;
512
-	file_buff_t *file_buff_o1, *file_buff_o2, *file_buff_script, *file_buff_text;
512
+	file_buff_t *file_buff_o2, *file_buff_text;
513 513
 	file_buff_t *file_tmp_o1;
514 514
 	int in_ahref=0;/* index of <a> tag, whose contents we are parsing. Indexing starts from 1, 0 means outside of <a>*/
515 515
 	unsigned char* href_contents_begin=NULL;/*beginning of the next portion of <a> contents*/
... ...
@@ -549,94 +543,46 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
549 549
 	if (dirname) {
550 550
 		snprintf(filename, 1024, "%s/rfc2397", dirname);
551 551
 		if (mkdir(filename, 0700) && errno != EEXIST) {
552
-			file_buff_o1 = file_buff_o2 = file_buff_script = file_buff_text = NULL;
553
-			goto abort;
554
-		}
555
-		file_buff_o1 = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
556
-		if (!file_buff_o1) {
557
-			file_buff_o1 = file_buff_o2 = file_buff_script = file_buff_text = NULL;
552
+			file_buff_o2 = file_buff_text = NULL;
558 553
 			goto abort;
559 554
 		}
560 555
 
561 556
 		file_buff_o2 = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
562 557
 		if (!file_buff_o2) {
563
-			free(file_buff_o1);
564
-			file_buff_o1 = file_buff_o2 = file_buff_script = file_buff_text = NULL;
565
-			goto abort;
566
-		}
567
-
568
-		file_buff_script = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
569
-		if (!file_buff_script) {
570
-			free(file_buff_o1);
571
-			free(file_buff_o2);
572
-			file_buff_o1 = file_buff_o2 = file_buff_script = file_buff_text = NULL;
573
-			goto abort;
574
-		}
575
-
576
-		snprintf(filename, 1024, "%s/comment.html", dirname);
577
-		file_buff_o1->fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
578
-		if (!file_buff_o1->fd) {
579
-			cli_dbgmsg("open failed: %s\n", filename);
580
-			free(file_buff_o1);
581
-			free(file_buff_o2);
582
-			free(file_buff_script);
583
-			file_buff_o1 = file_buff_o2 = file_buff_script = file_buff_text = NULL;
558
+			file_buff_o2 = file_buff_text = NULL;
584 559
 			goto abort;
585 560
 		}
586 561
 
562
+		/* this will still contains scripts that are inside comments */
587 563
 		snprintf(filename, 1024, "%s/nocomment.html", dirname);
588 564
 		file_buff_o2->fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
589 565
 		if (!file_buff_o2->fd) {
590 566
 			cli_dbgmsg("open failed: %s\n", filename);
591
-			close(file_buff_o1->fd);
592
-			free(file_buff_o1);
593
-			free(file_buff_o2);
594
-			free(file_buff_script);
595
-			file_buff_o1 = file_buff_o2 = file_buff_script = file_buff_text = NULL;
596
-			goto abort;
597
-		}
598
-
599
-		snprintf(filename, 1024, "%s/script.html", dirname);
600
-		file_buff_script->fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
601
-		if (!file_buff_script->fd) {
602
-			cli_dbgmsg("open failed: %s\n", filename);
603
-			close(file_buff_o1->fd);
604
-			close(file_buff_o2->fd);
605
-			free(file_buff_o1);
606 567
 			free(file_buff_o2);
607
-			free(file_buff_script);
608
-			file_buff_o1 = file_buff_o2 = file_buff_script = file_buff_text = NULL;
568
+			file_buff_o2 = file_buff_text = NULL;
609 569
 			goto abort;
610 570
 		}
611 571
 
612 572
 		file_buff_text = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
613 573
 		if(!file_buff_text) {
614
-			free(file_buff_o1);
615 574
 			free(file_buff_o2);
616
-			free(file_buff_script);
617
-			file_buff_o1 = file_buff_o2 = file_buff_script = file_buff_text = NULL;
575
+			file_buff_o2 = file_buff_text = NULL;
618 576
 			goto abort;
619 577
 		}
578
+
620 579
 		snprintf(filename, 1024, "%s/notags.html", dirname);
621 580
 		file_buff_text->fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
622 581
 		if(!file_buff_text->fd) {
623 582
 			cli_dbgmsg("open failed: %s\n", filename);
624
-			close(file_buff_o1->fd);
625 583
 			close(file_buff_o2->fd);
626
-			free(file_buff_o1);
627 584
 			free(file_buff_o2);
628
-			free(file_buff_script);
629 585
 			free(file_buff_text);
630
-			file_buff_o1 = file_buff_o2 = file_buff_script = file_buff_text = NULL;
586
+			file_buff_o2 = file_buff_text = NULL;
631 587
 		}
632
-		file_buff_o1->length = 0;
633 588
 		file_buff_o2->length = 0;
634
-		file_buff_script->length = 0;
635 589
 		file_buff_text->length = 0;
636 590
 	} else {
637
-		file_buff_o1 = NULL;
638 591
 		file_buff_o2 = NULL;
639
-		file_buff_script = NULL;
640 592
 		file_buff_text = NULL;
641 593
 	}
642 594
 
... ...
@@ -687,7 +633,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
687 687
 				if (isspace(*ptr)) {
688 688
 					ptr++;
689 689
 				} else {
690
-					html_output_c(file_buff_o1, file_buff_o2, ' ');
690
+					html_output_c(file_buff_o2, ' ');
691 691
 					state = next_state;
692 692
 					next_state = HTML_BAD_STATE;
693 693
 				}
... ...
@@ -695,11 +641,9 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
695 695
 			case HTML_NORM:
696 696
 				if (*ptr == '<') {
697 697
 					ptrend=ptr; /* for use by scanContents */
698
-					html_output_c(file_buff_o1, file_buff_o2, '<');
699
-					if (in_script) {
700
-						html_output_c(file_buff_script, NULL, '<');
701
-					} else if(!text_space_written) {
702
-						html_output_c(file_buff_text, NULL, ' ');
698
+					html_output_c(file_buff_o2, '<');
699
+					if (!in_script && !text_space_written) {
700
+						html_output_c(file_buff_text, ' ');
703 701
 						text_space_written = TRUE;
704 702
 					}
705 703
 					if(hrefs && hrefs->scanContents && in_ahref && href_contents_begin) {
... ...
@@ -713,25 +657,23 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
713 713
 					next_state = HTML_TAG;
714 714
 				} else if (isspace(*ptr)) {
715 715
 					if(!text_space_written && !in_script) {
716
-						html_output_c(file_buff_text, NULL, ' ');
716
+						html_output_c(file_buff_text, ' ');
717 717
 						text_space_written = TRUE;
718 718
 					}
719 719
 					state = HTML_TRIM_WS;
720 720
 					next_state = HTML_NORM;
721 721
 				} else if (*ptr == '&') {
722 722
 					if(!text_space_written && !in_script) {
723
-						html_output_c(file_buff_text, NULL, ' ');
723
+						html_output_c(file_buff_text, ' ');
724 724
 						text_space_written = TRUE;
725 725
 					}
726 726
 					state = HTML_CHAR_REF;
727 727
 					next_state = HTML_NORM;
728 728
 					ptr++;
729 729
 				} else {
730
-					html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
731
-					if (in_script) {
732
-						html_output_c(file_buff_script, NULL, tolower(*ptr));
733
-					} else {
734
-						html_output_c(file_buff_text, NULL, tolower(*ptr));
730
+					html_output_c(file_buff_o2, tolower(*ptr));
731
+					if (!in_script) {
732
+						html_output_c(file_buff_text, tolower(*ptr));
735 733
 						text_space_written = FALSE;
736 734
 					}
737 735
 					ptr++;
... ...
@@ -740,31 +682,26 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
740 740
 			case HTML_TAG:
741 741
 				if ((tag_length == 0) && (*ptr == '!')) {
742 742
 					/* Comment */
743
-					html_output_c(file_buff_o1, NULL, '!');
744 743
 					if (in_script) {
745
-						html_output_c(file_buff_script, NULL, '!');
746
-					}
747
-					/* Need to rewind in the no-comment output stream */
748
-					if (file_buff_o2 && (file_buff_o2->length > 0)) {
749
-						file_buff_o2->length--;
744
+						/* we still write scripts to nocomment.html */
745
+						html_output_c(file_buff_o2, '!');
746
+					} else {
747
+						/* Need to rewind in the no-comment output stream */
748
+						if (file_buff_o2 && (file_buff_o2->length > 0)) {
749
+							file_buff_o2->length--;
750
+						}
750 751
 					}
751 752
 					state = HTML_COMMENT;
752 753
 					next_state = HTML_BAD_STATE;
753 754
 					ptr++;
754 755
 				} else if (*ptr == '>') {
755
-					html_output_c(file_buff_o1, file_buff_o2, '>');
756
-					if (in_script) {
757
-						html_output_c(file_buff_script, NULL, '>');
758
-					}
756
+					html_output_c(file_buff_o2, '>');
759 757
 					ptr++;
760 758
 					tag[tag_length] = '\0';
761 759
 					state = HTML_SKIP_WS;
762 760
 					next_state = HTML_PROCESS_TAG;
763 761
 				} else if (!isspace(*ptr)) {
764
-					html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
765
-					if (in_script) {
766
-						html_output_c(file_buff_script, NULL, tolower(*ptr));
767
-					}
762
+					html_output_c(file_buff_o2, tolower(*ptr));
768 763
 					if (tag_length < HTML_STR_LENGTH) {
769 764
 						tag[tag_length++] = tolower(*ptr);
770 765
 					}
... ...
@@ -778,7 +715,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
778 778
 				break;
779 779
 			case HTML_TAG_ARG:
780 780
 				if (*ptr == '=') {
781
-					html_output_c(file_buff_o1, file_buff_o2, '=');
781
+					html_output_c(file_buff_o2, '=');
782 782
 					tag_arg[tag_arg_length] = '\0';
783 783
 					ptr++;
784 784
 					state = HTML_SKIP_WS;
... ...
@@ -792,7 +729,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
792 792
 					state = HTML_SKIP_WS;
793 793
 					next_state = HTML_TAG_ARG_EQUAL;
794 794
 				} else if (*ptr == '>') {
795
-					html_output_c(file_buff_o1, file_buff_o2, '>');
795
+					html_output_c(file_buff_o2, '>');
796 796
 					if (tag_arg_length > 0) {
797 797
 						tag_arg[tag_arg_length] = '\0';
798 798
 						html_tag_arg_add(&tag_args, tag_arg, NULL);
... ...
@@ -803,9 +740,9 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
803 803
 				} else {
804 804
 					if (tag_arg_length == 0) {
805 805
 						/* Start of new tag - add space */
806
-						html_output_c(file_buff_o1, file_buff_o2,' ');
806
+						html_output_c(file_buff_o2,' ');
807 807
 					}
808
-					html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
808
+					html_output_c(file_buff_o2, tolower(*ptr));
809 809
 					if (tag_arg_length < HTML_STR_LENGTH) {
810 810
 						tag_arg[tag_arg_length++] = tolower(*ptr);
811 811
 					}
... ...
@@ -814,7 +751,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
814 814
 				break;
815 815
 			case HTML_TAG_ARG_EQUAL:
816 816
 				if (*ptr == '=') {
817
-					html_output_c(file_buff_o1, file_buff_o2, '=');
817
+					html_output_c(file_buff_o2, '=');
818 818
 					ptr++;
819 819
 					state = HTML_SKIP_WS;
820 820
 					escape = FALSE;
... ...
@@ -836,15 +773,12 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
836 836
 					/* RFC2397 inline data */
837 837
 
838 838
 					/* Rewind one byte so we don't recursuive */
839
-					if (file_buff_o1 && (file_buff_o1->length > 0)) {
840
-						file_buff_o1->length--;
841
-					}
842 839
 					if (file_buff_o2 && (file_buff_o2->length > 0)) {
843 840
 						file_buff_o2->length--;
844 841
 					}
845 842
 
846 843
 					if (quoted != NOT_QUOTED) {
847
-						html_output_c(file_buff_o1, file_buff_o2, '"');
844
+						html_output_c(file_buff_o2, '"');
848 845
 					}
849 846
 					tag_val_length = 0;
850 847
 					state = HTML_RFC2397_TYPE;
... ...
@@ -853,15 +787,12 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
853 853
 					/* RFC2397 inline data */
854 854
 
855 855
 					/* Rewind one byte so we don't recursuive */
856
-					if (file_buff_o1 && (file_buff_o1->length > 0)) {
857
-						file_buff_o1->length--;
858
-					}
859 856
 					if (file_buff_o2 && (file_buff_o2->length > 0)) {
860 857
 						file_buff_o2->length--;
861 858
 					}
862 859
 
863 860
 					if (quoted != NOT_QUOTED) {
864
-						html_output_c(file_buff_o1, file_buff_o2, '"');
861
+						html_output_c(file_buff_o2, '"');
865 862
 					}
866 863
 
867 864
 					tag_val_length = 0;
... ...
@@ -874,14 +805,14 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
874 874
 				} else if (*ptr == '\'') {
875 875
 					if (tag_val_length == 0) {
876 876
 						quoted = SINGLE_QUOTED;
877
-						html_output_c(file_buff_o1, file_buff_o2, '"');
877
+						html_output_c(file_buff_o2, '"');
878 878
 						if (tag_val_length < HTML_STR_LENGTH) {
879 879
 							tag_val[tag_val_length++] = '"';
880 880
 						}
881 881
 						ptr++;
882 882
 					} else {
883 883
 						if (!escape && (quoted==SINGLE_QUOTED)) {
884
-							html_output_c(file_buff_o1, file_buff_o2, '"');
884
+							html_output_c(file_buff_o2, '"');
885 885
 							if (tag_val_length < HTML_STR_LENGTH) {
886 886
 								tag_val[tag_val_length++] = '"';
887 887
 							}
... ...
@@ -892,7 +823,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
892 892
 							tag_arg_length=0;
893 893
 							next_state = HTML_TAG_ARG;
894 894
 						} else {
895
-							html_output_c(file_buff_o1, file_buff_o2, '"');
895
+							html_output_c(file_buff_o2, '"');
896 896
 							if (tag_val_length < HTML_STR_LENGTH) {
897 897
 								tag_val[tag_val_length++] = '"';
898 898
 							}
... ...
@@ -902,14 +833,14 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
902 902
 				} else if (*ptr == '"') {
903 903
 					if (tag_val_length == 0) {
904 904
 						quoted = DOUBLE_QUOTED;
905
-						html_output_c(file_buff_o1, file_buff_o2, '"');
905
+						html_output_c(file_buff_o2, '"');
906 906
 						if (tag_val_length < HTML_STR_LENGTH) {
907 907
 							tag_val[tag_val_length++] = '"';
908 908
 						}
909 909
 						ptr++;
910 910
 					} else {
911 911
 						if (!escape && (quoted==DOUBLE_QUOTED)) {
912
-							html_output_c(file_buff_o1, file_buff_o2, '"');
912
+							html_output_c(file_buff_o2, '"');
913 913
 							if (tag_val_length < HTML_STR_LENGTH) {
914 914
 								tag_val[tag_val_length++] = '"';
915 915
 							}
... ...
@@ -920,7 +851,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
920 920
 							tag_arg_length=0;
921 921
 							next_state = HTML_TAG_ARG;
922 922
 						} else {
923
-							html_output_c(file_buff_o1, file_buff_o2, '"');
923
+							html_output_c(file_buff_o2, '"');
924 924
 							if (tag_val_length < HTML_STR_LENGTH) {
925 925
 								tag_val[tag_val_length++] = '"';
926 926
 							}
... ...
@@ -935,7 +866,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
935 935
 						tag_arg_length=0;
936 936
 						next_state = HTML_TAG_ARG;
937 937
 					} else {
938
-						html_output_c(file_buff_o1, file_buff_o2, *ptr);
938
+						html_output_c(file_buff_o2, *ptr);
939 939
 						if (tag_val_length < HTML_STR_LENGTH) {
940 940
 							if (isspace(*ptr)) {
941 941
 								tag_val[tag_val_length++] = ' ';
... ...
@@ -950,7 +881,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
950 950
 						ptr++;
951 951
 					}
952 952
 				} else {
953
-					html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
953
+					html_output_c(file_buff_o2, tolower(*ptr));
954 954
 					if (tag_val_length < HTML_STR_LENGTH) {
955 955
 						tag_val[tag_val_length++] = *ptr;
956 956
 					}
... ...
@@ -964,13 +895,14 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
964 964
 				}
965 965
 				break;
966 966
 			case HTML_COMMENT:
967
-				html_output_c(file_buff_o1, NULL, tolower(*ptr));
968 967
 				if (in_script) {
969
-					html_output_c(file_buff_script, NULL, tolower(*ptr));
968
+					/* dump script to nocomment.html, since we no longer have
969
+					 * comment.html/script.html */
970
+					html_output_c(file_buff_o2, tolower(*ptr));
970 971
 				}
971 972
 				if (*ptr == '>') {
972 973
 					state = HTML_SKIP_WS;
973
-					next_state = HTML_NORM;	
974
+					next_state = HTML_NORM;
974 975
 				}
975 976
 				ptr++;
976 977
 				break;
... ...
@@ -985,7 +917,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
985 985
 					next_state = HTML_NORM;
986 986
 					if (strcmp(tag, "/script") == 0) {
987 987
 						in_script=FALSE;
988
-						html_output_c(file_buff_script, NULL, '\n');
988
+						html_output_c(file_buff_o2, '\n');
989 989
 					}
990 990
 					if (hrefs && hrefs->scanContents && in_ahref) {
991 991
 						if(strcmp(tag,"/a") == 0) {
... ...
@@ -1002,18 +934,22 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1002 1002
 					}
1003 1003
 				} else if (strcmp(tag, "script") == 0) {
1004 1004
 					arg_value = html_tag_arg_value(&tag_args, "language");
1005
+					/* TODO: maybe we can output all tags only via html_output_tag */
1005 1006
 					if (arg_value && (strcasecmp(arg_value, "jscript.encode") == 0)) {
1006 1007
 						html_tag_arg_set(&tag_args, "language", "javascript");
1007 1008
 						state = HTML_SKIP_WS;
1008 1009
 						next_state = HTML_JSDECODE;
1010
+						/* we already output the old tag, output the new tag now */
1011
+						html_output_tag(file_buff_o2, tag, &tag_args);
1009 1012
 					} else if (arg_value && (strcasecmp(arg_value, "vbscript.encode") == 0)) {
1010 1013
 						html_tag_arg_set(&tag_args, "language", "vbscript");
1011 1014
 						state = HTML_SKIP_WS;
1012 1015
 						next_state = HTML_JSDECODE;
1016
+						/* we already output the old tag, output the new tag now */
1017
+						html_output_tag(file_buff_o2, tag, &tag_args);
1013 1018
 					} else {
1014 1019
 						in_script = TRUE;
1015 1020
 					}
1016
-					html_output_tag(file_buff_script, tag, &tag_args);
1017 1021
 				} else if (hrefs) {
1018 1022
 					if(in_ahref && !href_contents_begin)
1019 1023
 						href_contents_begin=ptr;
... ...
@@ -1133,13 +1069,13 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1133 1133
 					arg_value = html_tag_arg_value(&tag_args, "href");
1134 1134
 					if(arg_value && arg_value[0]) {
1135 1135
 						html_output_str(file_buff_text, arg_value, strlen(arg_value));
1136
-						html_output_c(file_buff_text, NULL, ' ');
1136
+						html_output_c(file_buff_text, ' ');
1137 1137
 					}
1138 1138
 				} else if (strcmp(tag, "img") == 0) {
1139 1139
 					arg_value = html_tag_arg_value(&tag_args, "src");
1140 1140
 					if(arg_value && arg_value[0]) {
1141 1141
 						html_output_str(file_buff_text, arg_value, strlen(arg_value));
1142
-						html_output_c(file_buff_text, NULL, ' ');
1142
+						html_output_c(file_buff_text, ' ');
1143 1143
 					}
1144 1144
 				}
1145 1145
 				html_tag_arg_free(&tag_args);
... ...
@@ -1157,7 +1093,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1157 1157
 						if(next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
1158 1158
 							tag_val[tag_val_length++] = '&';
1159 1159
 						}
1160
-						html_output_c(file_buff_o1, file_buff_o2, '&');
1160
+						html_output_c(file_buff_o2, '&');
1161 1161
 
1162 1162
 						state = next_state;
1163 1163
 						next_state = HTML_BAD_STATE;
... ...
@@ -1173,20 +1109,20 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1173 1173
 					if(normalized) {
1174 1174
 						for(i=0; i < strlen(normalized); i++) {
1175 1175
 							const unsigned char c = normalized[i]&0xff;
1176
-							html_output_c(file_buff_o1, file_buff_o2, c);
1176
+							html_output_c(file_buff_o2, c);
1177 1177
 							if (next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
1178 1178
 								tag_val[tag_val_length++] = c;
1179 1179
 							}
1180 1180
 						}
1181 1181
 					}
1182 1182
 					else {
1183
-						html_output_c(file_buff_o1, file_buff_o2, '&');
1183
+						html_output_c(file_buff_o2, '&');
1184 1184
 						if (next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
1185 1185
 								tag_val[tag_val_length++] = '&';
1186 1186
 						}
1187 1187
 						for(i=0; i < entity_val_length; i++) {
1188 1188
 							const char c = tolower(entity_val[i]);
1189
-							html_output_c(file_buff_o1, file_buff_o2, c);
1189
+							html_output_c(file_buff_o2, c);
1190 1190
 							if (next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
1191 1191
 								tag_val[tag_val_length++] = c;
1192 1192
 							}
... ...
@@ -1194,7 +1130,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1194 1194
 						if (next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
1195 1195
 							tag_val[tag_val_length++] = ';';
1196 1196
 						}
1197
-						html_output_c(file_buff_o1, file_buff_o2, ';');
1197
+						html_output_c(file_buff_o2, ';');
1198 1198
 					}
1199 1199
 					entity_val_length = 0;
1200 1200
 					state = next_state;
... ...
@@ -1210,10 +1146,10 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1210 1210
 						if (next_state==HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
1211 1211
 								tag_val[tag_val_length++] = '&';
1212 1212
 						}
1213
-						html_output_c(file_buff_o1, file_buff_o2, '&');
1213
+						html_output_c(file_buff_o2, '&');
1214 1214
 						for(i=0; i < entity_val_length; i++) {
1215 1215
 							const char c = tolower(entity_val[i]);
1216
-							html_output_c(file_buff_o1, file_buff_o2, c);
1216
+							html_output_c(file_buff_o2, c);
1217 1217
 							if (next_state==HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
1218 1218
 								tag_val[tag_val_length++] = c;
1219 1219
 							}
... ...
@@ -1235,17 +1171,16 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1235 1235
 					if(dconf_entconv) {
1236 1236
 
1237 1237
 						if(value < 0x80)
1238
-							html_output_c(file_buff_o1, file_buff_o2, tolower(value));
1238
+							html_output_c(file_buff_o2, tolower(value));
1239 1239
 						else {
1240 1240
 							unsigned char buff[10];
1241 1241
 							unsigned char* out = u16_normalize_tobuffer(value, buff, 10);
1242
-							if(out) {
1243
-								html_output_str(file_buff_o1, buff, out-buff);
1244
-								html_output_str(file_buff_o2, buff, out-buff);
1242
+							if(out && out>buff) {
1243
+								html_output_str(file_buff_o2, buff, out-buff-1);
1245 1244
 							}
1246 1245
 						}
1247 1246
 					} else
1248
-							html_output_c(file_buff_o1, file_buff_o2, tolower(value&0xff));
1247
+							html_output_c(file_buff_o2, tolower(value&0xff));
1249 1248
 					state = next_state;
1250 1249
 					next_state = HTML_BAD_STATE;
1251 1250
 					ptr++;
... ...
@@ -1262,7 +1197,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1262 1262
 					}
1263 1263
 					ptr++;
1264 1264
 				} else {
1265
-					html_output_c(file_buff_o1, file_buff_o2, value);
1265
+					html_output_c(file_buff_o2, value);
1266 1266
 					state = next_state;
1267 1267
 					next_state = HTML_BAD_STATE;
1268 1268
 				}
... ...
@@ -1274,8 +1209,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1274 1274
 					state = HTML_JSDECODE_LENGTH;
1275 1275
 					next_state = HTML_BAD_STATE;
1276 1276
 				} else {
1277
-					html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
1278
-					html_output_c(file_buff_script, NULL, tolower(*ptr));
1277
+					html_output_c(file_buff_o2, tolower(*ptr));
1279 1278
 					ptr++;
1280 1279
 				}
1281 1280
 				break;
... ...
@@ -1300,7 +1234,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1300 1300
 				break;
1301 1301
 			case HTML_JSDECODE_DECRYPT:
1302 1302
 				if (length == 0) {
1303
-					html_output_str(file_buff_script, "</script>\n", 10);
1303
+					html_output_str(file_buff_o2, "</script>\n", 10);
1304 1304
 					length = 12;
1305 1305
 					state = HTML_SKIP_LENGTH;
1306 1306
 					next_state = HTML_NORM;
... ...
@@ -1317,29 +1251,23 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1317 1317
 							ptr--;
1318 1318
 							break;
1319 1319
 						case 0x21:
1320
-							html_output_c(file_buff_o1, file_buff_o2, 0x3c);
1321
-							html_output_c(file_buff_script, NULL, 0x3c);
1320
+							html_output_c(file_buff_o2, 0x3c);
1322 1321
 							break;
1323 1322
 						case 0x23:
1324
-							html_output_c(file_buff_o1, file_buff_o2, 0x0d);
1325
-							html_output_c(file_buff_script, NULL, 0x0d);
1323
+							html_output_c(file_buff_o2, 0x0d);
1326 1324
 							break;
1327 1325
 						case 0x24:
1328
-							html_output_c(file_buff_o1, file_buff_o2, 0x40);
1329
-							html_output_c(file_buff_script, NULL, 0x40);
1326
+							html_output_c(file_buff_o2, 0x40);
1330 1327
 							break;
1331 1328
 						case 0x26:
1332
-							html_output_c(file_buff_o1, file_buff_o2, 0x0a);
1333
-							html_output_c(file_buff_script, NULL, 0x0a);
1329
+							html_output_c(file_buff_o2, 0x0a);
1334 1330
 							break;
1335 1331
 						case 0x2a:
1336
-							html_output_c(file_buff_o1, file_buff_o2, 0x3e);
1337
-							html_output_c(file_buff_script, NULL, 0x3e);
1332
+							html_output_c(file_buff_o2, 0x3e);
1338 1333
 							break;
1339 1334
 						}
1340 1335
 					} else {
1341
-						html_output_c(file_buff_o1, file_buff_o2, value);
1342
-						html_output_c(file_buff_script, NULL, tolower(value));
1336
+						html_output_c(file_buff_o2, value);
1343 1337
 					}
1344 1338
 				}
1345 1339
 				table_pos = (table_pos + 1) % 64;
... ...
@@ -1438,11 +1366,11 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1438 1438
 						html_output_str(file_tmp_o1, "text/plain\n", 11);
1439 1439
 					}
1440 1440
 					html_output_str(file_tmp_o1, tag_val, tag_val_length);
1441
-					html_output_c(file_tmp_o1, NULL, '\n');
1441
+					html_output_c(file_tmp_o1, '\n');
1442 1442
 					if (strstr(tag_val, ";base64") != NULL) {
1443 1443
 						html_output_str(file_tmp_o1, "Content-transfer-encoding: base64\n", 34);
1444 1444
 					}
1445
-					html_output_c(file_tmp_o1, NULL, '\n');
1445
+					html_output_c(file_tmp_o1, '\n');
1446 1446
 				} else {
1447 1447
 					file_tmp_o1 = NULL;
1448 1448
 				}
... ...
@@ -1465,7 +1393,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1465 1465
 						state = HTML_RFC2397_FINISH;
1466 1466
 						ptr++;
1467 1467
 					} else {
1468
-						html_output_c(file_tmp_o1, NULL, *ptr);
1468
+						html_output_c(file_tmp_o1, *ptr);
1469 1469
 						ptr++;
1470 1470
 					}
1471 1471
 				} else if (*ptr == '\"') {
... ...
@@ -1473,7 +1401,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1473 1473
 						state = HTML_RFC2397_FINISH;
1474 1474
 						ptr++;
1475 1475
 					} else {
1476
-						html_output_c(file_tmp_o1, NULL, *ptr);
1476
+						html_output_c(file_tmp_o1, *ptr);
1477 1477
 						ptr++;
1478 1478
 					}
1479 1479
 				} else if (isspace(*ptr) || (*ptr == '>')) {
... ...
@@ -1481,11 +1409,11 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1481 1481
 						state = HTML_RFC2397_FINISH;
1482 1482
 						ptr++;
1483 1483
 					} else {
1484
-						html_output_c(file_tmp_o1, NULL, *ptr);
1484
+						html_output_c(file_tmp_o1, *ptr);
1485 1485
 						ptr++;
1486 1486
 					}
1487 1487
 				} else {
1488
-					html_output_c(file_tmp_o1, NULL, *ptr);
1488
+					html_output_c(file_tmp_o1, *ptr);
1489 1489
 					ptr++;
1490 1490
 				}
1491 1491
 				if (*ptr == '\\') {
... ...
@@ -1508,12 +1436,12 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1508 1508
 				break;
1509 1509
 			case HTML_RFC2397_ESC:
1510 1510
 				if (length == 2) {
1511
-					html_output_c(file_tmp_o1, NULL, value);
1511
+					html_output_c(file_tmp_o1, value);
1512 1512
 				} else if (length == 1) {
1513
-					html_output_c(file_tmp_o1, NULL, '%');
1514
-					html_output_c(file_tmp_o1, NULL, value+'0');
1513
+					html_output_c(file_tmp_o1, '%');
1514
+					html_output_c(file_tmp_o1, value+'0');
1515 1515
 				} else {
1516
-					html_output_c(file_tmp_o1, NULL, '%');
1516
+					html_output_c(file_tmp_o1, '%');
1517 1517
 				}
1518 1518
 				state = HTML_RFC2397_DATA;
1519 1519
 				break;
... ...
@@ -1552,13 +1480,13 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1552 1552
 		normalized = entity_norm(&conv, entity_val);
1553 1553
 		if(normalized) {
1554 1554
 			for(i=0; i < strlen(normalized); i++)
1555
-				html_output_c(file_buff_o1, file_buff_o2, normalized[i]&0xff);
1555
+				html_output_c(file_buff_o2, normalized[i]&0xff);
1556 1556
 		}
1557 1557
 		else {
1558 1558
 			if(entity_val_length) {
1559
-				html_output_c(file_buff_o1, file_buff_o2, '&');
1559
+				html_output_c(file_buff_o2, '&');
1560 1560
 				for(i=0; i < entity_val_length; i++)
1561
-					html_output_c(file_buff_o1, file_buff_o2, tolower(entity_val[i]));
1561
+					html_output_c(file_buff_o2, tolower(entity_val[i]));
1562 1562
 			}
1563 1563
 		}
1564 1564
 	}
... ...
@@ -1573,21 +1501,11 @@ abort:
1573 1573
 	if (!m_area) {
1574 1574
 		fclose(stream_in);
1575 1575
 	}
1576
-	if (file_buff_o1) {
1577
-		html_output_flush(file_buff_o1);
1578
-		close(file_buff_o1->fd);
1579
-		free(file_buff_o1);
1580
-	}
1581 1576
 	if (file_buff_o2) {
1582 1577
 		html_output_flush(file_buff_o2);
1583 1578
 		close(file_buff_o2->fd);
1584 1579
 		free(file_buff_o2);
1585 1580
 	}
1586
-	if (file_buff_script) {
1587
-		html_output_flush(file_buff_script);
1588
-		close(file_buff_script->fd);
1589
-		free(file_buff_script);
1590
-	}
1591 1581
 	if(file_buff_text) {
1592 1582
 		html_output_flush(file_buff_text);
1593 1583
 		close(file_buff_text->fd);
... ...
@@ -1722,19 +1640,19 @@ int html_screnc_decode(int fd, const char *dirname)
1722 1722
 			case HTML_SPECIAL_CHAR:
1723 1723
 				switch (*ptr) {
1724 1724
 				case 0x21:
1725
-					html_output_c(&file_buff, NULL, 0x3c);
1725
+					html_output_c(&file_buff, 0x3c);
1726 1726
 					break;
1727 1727
 				case 0x23:
1728
-					html_output_c(&file_buff, NULL, 0x0d);
1728
+					html_output_c(&file_buff, 0x0d);
1729 1729
 					break;
1730 1730
 				case 0x24:
1731
-					html_output_c(&file_buff, NULL, 0x40);
1731
+					html_output_c(&file_buff, 0x40);
1732 1732
 					break;				
1733 1733
 				case 0x26:
1734
-					html_output_c(&file_buff, NULL, 0x0a);
1734
+					html_output_c(&file_buff, 0x0a);
1735 1735
 					break;
1736 1736
 				case 0x2a:
1737
-					html_output_c(&file_buff, NULL, 0x3e);
1737
+					html_output_c(&file_buff, 0x3e);
1738 1738
 					break;
1739 1739
 				}
1740 1740
 				ptr++;
... ...
@@ -1747,7 +1665,7 @@ int html_screnc_decode(int fd, const char *dirname)
1747 1747
 					if (result == 0xFF) { /* special character */
1748 1748
 						state = HTML_SPECIAL_CHAR;
1749 1749
 					} else {
1750
-						html_output_c(&file_buff, NULL, (char)result);
1750
+						html_output_c(&file_buff, (char)result);
1751 1751
 					}
1752 1752
 				}
1753 1753
 				ptr++;