git-svn: trunk@3675
Török Edvin authored on 2008/02/26 20:05:11... | ... |
@@ -1,3 +1,7 @@ |
1 |
+Tue Feb 26 12:06:48 EET 2008 (edwin) |
|
2 |
+------------------------------------ |
|
3 |
+ * libclamav/htmlnorm.c: strip spaces from scripts, normalize screnc |
|
4 |
+ |
|
1 | 5 |
Mon Feb 25 13:01:46 EET 2008 (edwin) |
2 | 6 |
------------------------------------ |
3 | 7 |
* revert the Makefile changes in r3671. |
... | ... |
@@ -633,7 +633,8 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
633 | 633 |
if (isspace(*ptr)) { |
634 | 634 |
ptr++; |
635 | 635 |
} else { |
636 |
- html_output_c(file_buff_o2, ' '); |
|
636 |
+ if(!in_script) |
|
637 |
+ html_output_c(file_buff_o2, ' '); |
|
637 | 638 |
state = next_state; |
638 | 639 |
next_state = HTML_BAD_STATE; |
639 | 640 |
} |
... | ... |
@@ -673,8 +674,15 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
673 | 673 |
} else { |
674 | 674 |
html_output_c(file_buff_o2, tolower(*ptr)); |
675 | 675 |
if (!in_script) { |
676 |
- html_output_c(file_buff_text, tolower(*ptr)); |
|
677 |
- text_space_written = FALSE; |
|
676 |
+ if(*ptr < 0x20) { |
|
677 |
+ if(!text_space_written) { |
|
678 |
+ html_output_c(file_buff_text, ' '); |
|
679 |
+ text_space_written = TRUE; |
|
680 |
+ } |
|
681 |
+ } else { |
|
682 |
+ html_output_c(file_buff_text, tolower(*ptr)); |
|
683 |
+ text_space_written = FALSE; |
|
684 |
+ } |
|
678 | 685 |
} |
679 | 686 |
ptr++; |
680 | 687 |
} |
... | ... |
@@ -710,7 +718,9 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
710 | 710 |
tag[tag_length] = '\0'; |
711 | 711 |
state = HTML_SKIP_WS; |
712 | 712 |
tag_arg_length = 0; |
713 |
- next_state = HTML_TAG_ARG; |
|
713 |
+ /* if we're inside a script we only care for </script>. |
|
714 |
+ * if we'd go to HTML_TAG_ARG whitespace would be inconsistently normalized*/ |
|
715 |
+ next_state = !in_script ? HTML_TAG_ARG : HTML_NORM; |
|
714 | 716 |
} |
715 | 717 |
break; |
716 | 718 |
case HTML_TAG_ARG: |
... | ... |
@@ -895,7 +905,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
895 | 895 |
} |
896 | 896 |
break; |
897 | 897 |
case HTML_COMMENT: |
898 |
- if (in_script) { |
|
898 |
+ if (in_script && !isspace(*ptr)) { |
|
899 | 899 |
/* dump script to nocomment.html, since we no longer have |
900 | 900 |
* comment.html/script.html */ |
901 | 901 |
html_output_c(file_buff_o2, tolower(*ptr)); |
... | ... |
@@ -1070,12 +1080,14 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
1070 | 1070 |
if(arg_value && arg_value[0]) { |
1071 | 1071 |
html_output_str(file_buff_text, arg_value, strlen(arg_value)); |
1072 | 1072 |
html_output_c(file_buff_text, ' '); |
1073 |
+ text_space_written = TRUE; |
|
1073 | 1074 |
} |
1074 | 1075 |
} else if (strcmp(tag, "img") == 0) { |
1075 | 1076 |
arg_value = html_tag_arg_value(&tag_args, "src"); |
1076 | 1077 |
if(arg_value && arg_value[0]) { |
1077 | 1078 |
html_output_str(file_buff_text, arg_value, strlen(arg_value)); |
1078 | 1079 |
html_output_c(file_buff_text, ' '); |
1080 |
+ text_space_written = TRUE; |
|
1079 | 1081 |
} |
1080 | 1082 |
} |
1081 | 1083 |
html_tag_arg_free(&tag_args); |
... | ... |
@@ -1253,21 +1265,27 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
1253 | 1253 |
case 0x21: |
1254 | 1254 |
html_output_c(file_buff_o2, 0x3c); |
1255 | 1255 |
break; |
1256 |
+ /* |
|
1256 | 1257 |
case 0x23: |
1257 | 1258 |
html_output_c(file_buff_o2, 0x0d); |
1258 | 1259 |
break; |
1260 |
+ we strip whitespace |
|
1261 |
+ */ |
|
1259 | 1262 |
case 0x24: |
1260 | 1263 |
html_output_c(file_buff_o2, 0x40); |
1261 | 1264 |
break; |
1265 |
+ /* |
|
1262 | 1266 |
case 0x26: |
1263 | 1267 |
html_output_c(file_buff_o2, 0x0a); |
1264 | 1268 |
break; |
1269 |
+ we strip whitespace |
|
1270 |
+ */ |
|
1265 | 1271 |
case 0x2a: |
1266 | 1272 |
html_output_c(file_buff_o2, 0x3e); |
1267 | 1273 |
break; |
1268 | 1274 |
} |
1269 |
- } else { |
|
1270 |
- html_output_c(file_buff_o2, value); |
|
1275 |
+ } else if(!isspace(value&0xff)) { |
|
1276 |
+ html_output_c(file_buff_o2, tolower(value&0xff)); |
|
1271 | 1277 |
} |
1272 | 1278 |
} |
1273 | 1279 |
table_pos = (table_pos + 1) % 64; |
... | ... |
@@ -1642,15 +1660,19 @@ int html_screnc_decode(int fd, const char *dirname) |
1642 | 1642 |
case 0x21: |
1643 | 1643 |
html_output_c(&file_buff, 0x3c); |
1644 | 1644 |
break; |
1645 |
- case 0x23: |
|
1645 |
+ /*case 0x23: |
|
1646 | 1646 |
html_output_c(&file_buff, 0x0d); |
1647 | 1647 |
break; |
1648 |
+ we strip whitespace |
|
1649 |
+ */ |
|
1648 | 1650 |
case 0x24: |
1649 | 1651 |
html_output_c(&file_buff, 0x40); |
1650 |
- break; |
|
1651 |
- case 0x26: |
|
1652 |
+ break; |
|
1653 |
+ /*case 0x26: |
|
1652 | 1654 |
html_output_c(&file_buff, 0x0a); |
1653 | 1655 |
break; |
1656 |
+ we strip whitespace |
|
1657 |
+ */ |
|
1654 | 1658 |
case 0x2a: |
1655 | 1659 |
html_output_c(&file_buff, 0x3e); |
1656 | 1660 |
break; |
... | ... |
@@ -1664,8 +1686,8 @@ int html_screnc_decode(int fd, const char *dirname) |
1664 | 1664 |
result = decrypt_tables[table_order[table_pos]][*ptr]; |
1665 | 1665 |
if (result == 0xFF) { /* special character */ |
1666 | 1666 |
state = HTML_SPECIAL_CHAR; |
1667 |
- } else { |
|
1668 |
- html_output_c(&file_buff, (char)result); |
|
1667 |
+ } else if(!isspace(result&0xff)) { |
|
1668 |
+ html_output_c(&file_buff, tolower(result&0xff)); |
|
1669 | 1669 |
} |
1670 | 1670 |
} |
1671 | 1671 |
ptr++; |