... | ... |
@@ -80,14 +80,19 @@ static int td_isascii(const unsigned char *buf, unsigned int len) |
80 | 80 |
{ |
81 | 81 |
unsigned int i; |
82 | 82 |
|
83 |
- /* Check for the Byte-Order-Mark for UTF-8 */ |
|
84 |
- if ((len >= 3) && |
|
85 |
- (buf[0] == 0xEF) && |
|
86 |
- (buf[1] == 0xBB) && |
|
87 |
- (buf[2] == 0xBF)) |
|
88 |
- { |
|
89 |
- return 0; |
|
90 |
- } |
|
83 |
+ // @TODO: UTF8 BOM Detection. |
|
84 |
+ // The following BOM detection results in False Negatives in regression testing |
|
85 |
+ // which can be eliminated by adding a condition to call cli_scanhtml for CL_TYPE_TEXT_UTF8 |
|
86 |
+ // in scanners.c:cli_scanraw(). However, cli_scanhtml was written for ASCII and has |
|
87 |
+ // not been validated to correctly handle multibyte UTF8. |
|
88 |
+ // /* Check for the Byte-Order-Mark for UTF-8 */ |
|
89 |
+ // if ((len >= 3) && |
|
90 |
+ // (buf[0] == 0xEF) && |
|
91 |
+ // (buf[1] == 0xBB) && |
|
92 |
+ // (buf[2] == 0xBF)) |
|
93 |
+ // { |
|
94 |
+ // return 0; |
|
95 |
+ // } |
|
91 | 96 |
|
92 | 97 |
/* Validate that the data all falls within the bounds of |
93 | 98 |
* plain ASCII, ISO-8859 text, and non-ISO extended ASCII (Mac, IBM PC) |