Browse code

bb1636 - utf-16be detection

Shawn Webb authored on 2012/11/21 00:39:37
Showing 1 changed files
... ...
@@ -142,7 +142,7 @@ static int td_isutf8(const unsigned char *buf, unsigned int len)
142 142
 
143 143
 static int td_isutf16(const unsigned char *buf, unsigned int len)
144 144
 {
145
-	unsigned int be = 1, nobom = 0, i, c, bad = 0;
145
+	unsigned int be = 1, nobom = 0, i, c, bad = 0, high = 0;
146 146
 
147 147
 
148 148
     if(len < 2)
... ...
@@ -169,9 +169,14 @@ static int td_isutf16(const unsigned char *buf, unsigned int len)
169 169
 		return 0;
170 170
 	    else
171 171
 		bad++;
172
-	}
172
+	} else if (c >= 128) {
173
+        high++;
174
+    }
173 175
     }
174 176
 
177
+    if (nobom && high >= len / 4)
178
+        return 0;
179
+
175 180
     if(!nobom && bad >= len / 2)
176 181
 	return 0;
177 182