normalize big5 dot inside hrefs too
Török Edvin authored on 2012/01/17 03:15:49... | ... |
@@ -479,9 +479,13 @@ static inline void html_tag_contents_append(struct tag_contents *cont, const uns |
479 | 479 |
if (mbchar && (c < 0x80 || mbchar >= 0x10000)) { |
480 | 480 |
if (mbchar == 0xE38082 || mbchar == 0xEFBC8E |
481 | 481 |
|| mbchar == 0xEFB992 || |
482 |
- mbchar == 0xA143 || mbchar == 0xA144 || |
|
483 |
- mbchar == 0xA14F) { |
|
482 |
+ mbchar == 0xA1 && (c == 0x43 || c == 0x44 || c == 0x4F)) { |
|
484 | 483 |
cont->contents[i++] = '.'; |
484 |
+ if (mbchar == 0xA1) { |
|
485 |
+ --i; |
|
486 |
+ mbchar = 0; |
|
487 |
+ continue; |
|
488 |
+ } |
|
485 | 489 |
} else { |
486 | 490 |
uint8_t c0 = mbchar >> 16; |
487 | 491 |
uint8_t c1 = (mbchar >> 8)&0xff; |
... | ... |
@@ -659,6 +663,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
659 | 659 |
const unsigned char *js_begin = NULL, *js_end = NULL; |
660 | 660 |
struct tag_contents contents; |
661 | 661 |
uint32_t mbchar = 0; |
662 |
+ uint32_t mbchar2 = 0; |
|
662 | 663 |
|
663 | 664 |
tag_args.scanContents=0;/* do we need to store the contents of <a></a>?*/ |
664 | 665 |
contents.pos = 0; |
... | ... |
@@ -781,11 +786,15 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
781 | 781 |
if (*ptr < 0x80 || mbchar >= 0x10000) { |
782 | 782 |
if (mbchar == 0xE38082 || mbchar == 0xEFBC8E |
783 | 783 |
|| mbchar == 0xEFB992 || |
784 |
- mbchar == 0xA143 || mbchar == 0xA144 || |
|
785 |
- mbchar == 0xA14F) { |
|
784 |
+ mbchar == 0xA1 && (*ptr == 0x43 || *ptr == 0x44 || *ptr == 0x4F)) { |
|
786 | 785 |
/* bb #4097 */ |
787 | 786 |
html_output_c(file_buff_o2, '.'); |
788 | 787 |
html_output_c(file_buff_text, '.'); |
788 |
+ if (mbchar == 0xA1) { |
|
789 |
+ ptr++; |
|
790 |
+ mbchar = 0; |
|
791 |
+ continue; |
|
792 |
+ } |
|
789 | 793 |
} else { |
790 | 794 |
uint8_t c0 = mbchar >> 16; |
791 | 795 |
uint8_t c1 = (mbchar >> 8)&0xff; |
... | ... |
@@ -1072,11 +1081,45 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
1072 | 1072 |
ptr++; |
1073 | 1073 |
} |
1074 | 1074 |
} else { |
1075 |
+ if (mbchar2 && (*ptr < 0x80 || mbchar2 >= 0x10000)) { |
|
1076 |
+ if (mbchar2 == 0xE38082 || mbchar2 == 0xEFBC8E |
|
1077 |
+ || mbchar2 == 0xEFB992 || |
|
1078 |
+ mbchar2 == 0xA1 && (*ptr == 0x43 || *ptr == 0x44 || *ptr == 0x4F)) { |
|
1079 |
+ html_output_c(file_buff_o2, '.'); |
|
1080 |
+ if (tag_val_length < HTML_STR_LENGTH) |
|
1081 |
+ tag_val[tag_val_length++] = '.'; |
|
1082 |
+ if (mbchar2 == 0xA1) { |
|
1083 |
+ ptr++; |
|
1084 |
+ mbchar2 = 0; |
|
1085 |
+ continue; |
|
1086 |
+ } |
|
1087 |
+ } else { |
|
1088 |
+ uint8_t c0 = mbchar2 >> 16; |
|
1089 |
+ uint8_t c1 = (mbchar2 >> 8)&0xff; |
|
1090 |
+ uint8_t c2 = (mbchar2 & 0xff); |
|
1091 |
+ if (c0) |
|
1092 |
+ html_output_c(file_buff_o2, c0); |
|
1093 |
+ if (c0 || c1) |
|
1094 |
+ html_output_c(file_buff_o2, c1); |
|
1095 |
+ html_output_c(file_buff_o2, c2); |
|
1096 |
+ if (c0 && tag_val_length < HTML_STR_LENGTH) |
|
1097 |
+ tag_val[tag_val_length++] = c0; |
|
1098 |
+ if ((c0 || c1) && tag_val_length < HTML_STR_LENGTH) |
|
1099 |
+ tag_val[tag_val_length++] = c1; |
|
1100 |
+ if (tag_val_length < HTML_STR_LENGTH) |
|
1101 |
+ tag_val[tag_val_length++] = c2; |
|
1102 |
+ } |
|
1103 |
+ mbchar2 = 0; |
|
1104 |
+ } |
|
1105 |
+ if (*ptr >= 0x80) |
|
1106 |
+ mbchar2 = (mbchar2 << 8) | *ptr; |
|
1107 |
+ else { |
|
1075 | 1108 |
html_output_c(file_buff_o2, tolower(*ptr)); |
1076 | 1109 |
if (tag_val_length < HTML_STR_LENGTH) { |
1077 | 1110 |
tag_val[tag_val_length++] = *ptr; |
1078 | 1111 |
} |
1079 |
- ptr++; |
|
1112 |
+ } |
|
1113 |
+ ptr++; |
|
1080 | 1114 |
} |
1081 | 1115 |
|
1082 | 1116 |
if (*ptr == '\\') { |