Browse code

adding utf-8 BOM detection to ascii check

Micah Snyder authored on 2017/11/14 01:50:40
Showing 1 changed files
... ...
@@ -80,9 +80,21 @@ static int td_isascii(const unsigned char *buf, unsigned int len)
80 80
 {
81 81
 	unsigned int i;
82 82
 
83
-    for(i = 0; i < len; i++)
84
-	if(text_chars[buf[i]] == F)
85
-	    return 0;
83
+	/* Check for the Byte-Order-Mark for UTF-8 */
84
+	if ((len >= 3) &&
85
+		(buf[0] == 0xEF) &&
86
+		(buf[1] == 0xBB) &&
87
+		(buf[2] == 0xBF))
88
+	{
89
+		return 0;
90
+	}
91
+
92
+	/* Validate that the data all falls within the bounds of 
93
+	 * plain ASCII, ISO-8859 text, and non-ISO extended ASCII (Mac, IBM PC)
94
+	 */
95
+	for(i = 0; i < len; i++)
96
+		if(text_chars[buf[i]] == F)
97
+			return 0;
86 98
 
87 99
     return 1;
88 100
 }