Browse code

bb #4097

normalize big5 dot inside hrefs too

Török Edvin authored on 2012/01/17 03:15:49
Showing 1 changed files
... ...
@@ -479,9 +479,13 @@ static inline void html_tag_contents_append(struct tag_contents *cont, const uns
479 479
             if (mbchar && (c < 0x80 || mbchar >= 0x10000)) {
480 480
                 if (mbchar == 0xE38082 || mbchar == 0xEFBC8E
481 481
                     || mbchar == 0xEFB992 ||
482
-                    mbchar == 0xA143 || mbchar == 0xA144 ||
483
-                    mbchar == 0xA14F) {
482
+                    mbchar == 0xA1 && (c == 0x43 || c == 0x44 || c == 0x4F)) {
484 483
                     cont->contents[i++] = '.';
484
+                    if (mbchar == 0xA1) {
485
+                        --i;
486
+                        mbchar = 0;
487
+                        continue;
488
+                    }
485 489
                 } else {
486 490
                     uint8_t c0 = mbchar >> 16;
487 491
                     uint8_t c1 = (mbchar >> 8)&0xff;
... ...
@@ -659,6 +663,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
659 659
 	const unsigned char *js_begin = NULL, *js_end = NULL;
660 660
 	struct tag_contents contents;
661 661
         uint32_t mbchar = 0;
662
+        uint32_t mbchar2 = 0;
662 663
 
663 664
 	tag_args.scanContents=0;/* do we need to store the contents of <a></a>?*/
664 665
 	contents.pos = 0;
... ...
@@ -781,11 +786,15 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
781 781
                                 if (*ptr < 0x80 || mbchar >= 0x10000) {
782 782
                                     if (mbchar == 0xE38082 || mbchar == 0xEFBC8E
783 783
                                         || mbchar == 0xEFB992 ||
784
-                                        mbchar == 0xA143 || mbchar == 0xA144 ||
785
-                                        mbchar == 0xA14F) {
784
+                                        mbchar == 0xA1 && (*ptr == 0x43 || *ptr == 0x44 || *ptr == 0x4F)) {
786 785
                                         /* bb #4097 */
787 786
                                         html_output_c(file_buff_o2, '.');
788 787
                                         html_output_c(file_buff_text, '.');
788
+                                        if (mbchar == 0xA1) {
789
+                                            ptr++;
790
+                                            mbchar = 0;
791
+                                            continue;
792
+                                        }
789 793
                                     } else {
790 794
                                         uint8_t c0 = mbchar >> 16;
791 795
                                         uint8_t c1 = (mbchar >> 8)&0xff;
... ...
@@ -1072,11 +1081,45 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1072 1072
 						ptr++;
1073 1073
 					}
1074 1074
 				} else {
1075
+                                    if (mbchar2 && (*ptr < 0x80 || mbchar2 >= 0x10000)) {
1076
+                                        if (mbchar2 == 0xE38082 || mbchar2 == 0xEFBC8E
1077
+                                            || mbchar2 == 0xEFB992 ||
1078
+                                            mbchar2 == 0xA1 && (*ptr == 0x43 || *ptr == 0x44 || *ptr == 0x4F)) {
1079
+                                            html_output_c(file_buff_o2, '.');
1080
+                                            if (tag_val_length < HTML_STR_LENGTH)
1081
+						tag_val[tag_val_length++] = '.';
1082
+                                            if (mbchar2 == 0xA1) {
1083
+                                                ptr++;
1084
+                                                mbchar2 = 0;
1085
+                                                continue;
1086
+                                            }
1087
+                                        } else {
1088
+                                            uint8_t c0 = mbchar2 >> 16;
1089
+                                            uint8_t c1 = (mbchar2 >> 8)&0xff;
1090
+                                            uint8_t c2 = (mbchar2 & 0xff);
1091
+                                            if (c0)
1092
+                                                html_output_c(file_buff_o2, c0);
1093
+                                            if (c0 || c1)
1094
+                                                html_output_c(file_buff_o2, c1);
1095
+                                            html_output_c(file_buff_o2, c2);
1096
+                                            if (c0 && tag_val_length < HTML_STR_LENGTH)
1097
+						tag_val[tag_val_length++] = c0;
1098
+                                            if ((c0 || c1) && tag_val_length < HTML_STR_LENGTH)
1099
+						tag_val[tag_val_length++] = c1;
1100
+                                            if (tag_val_length < HTML_STR_LENGTH)
1101
+						tag_val[tag_val_length++] = c2;
1102
+					}
1103
+                                        mbchar2 = 0;
1104
+                                    }
1105
+                                    if (*ptr >= 0x80)
1106
+                                        mbchar2 = (mbchar2 << 8) | *ptr;
1107
+                                    else {
1075 1108
 					html_output_c(file_buff_o2, tolower(*ptr));
1076 1109
 					if (tag_val_length < HTML_STR_LENGTH) {
1077 1110
 						tag_val[tag_val_length++] = *ptr;
1078 1111
 					}
1079
-					ptr++;
1112
+                                    }
1113
+				    ptr++;
1080 1114
 				}
1081 1115
 
1082 1116
 				if (*ptr == '\\') {