GitList

@@ -80,14 +80,19 @@ static int td_isascii(const unsigned char *buf, unsigned int len)
+                     {
                      	unsigned int i;
                     -	/* Check for the Byte-Order-Mark for UTF-8 */
                     -	if ((len >= 3) &&
                     -		(buf[0] == 0xEF) &&
                     -		(buf[1] == 0xBB) &&
                     -		(buf[2] == 0xBF))
                     -	{
                     -		return 0;
                     -	}
                     +	// @TODO:  UTF8 BOM Detection.
                     +	//    The following BOM detection results in False Negatives in regression testing
                     +	//    which can be eliminated by adding a condition to call cli_scanhtml for CL_TYPE_TEXT_UTF8
                     +	//    in scanners.c:cli_scanraw().  However, cli_scanhtml was written for ASCII and has
                     +	//    not been validated to correctly handle multibyte UTF8.
                     +	// /* Check for the Byte-Order-Mark for UTF-8 */
                     +	// if ((len >= 3) &&
                     +	// 	(buf[0] == 0xEF) &&
                     +	// 	(buf[1] == 0xBB) &&
                     +	// 	(buf[2] == 0xBF))
                     +	// {
                     +	// 	return 0;
                     +	// }
                      	/* Validate that the data all falls within the bounds of
                      	 * plain ASCII, ISO-8859 text, and non-ISO extended ASCII (Mac, IBM PC)

commenting out UTF8 BOM detection code for the time being until cli_scanhtml can be validated to handle multibyte utf8