Browse code

try to detect international text data

git-svn: trunk@1448

Tomasz Kojm authored on 2005/03/29 09:55:06
Showing 2 changed files
... ...
@@ -1,6 +1,10 @@
1
+Tue Mar 29 02:50:30 CEST 2005 (tk)
2
+----------------------------------
3
+  * libclamav/filetypes.c: try to detect international text data
4
+
1 5
 Tue Mar 29 02:02:05 CEST 2005 (tk)
2 6
 ----------------------------------
3
-  * libclamav: detect and ignore ISO 9660/High Sierra CD-ROM data
7
+  * libclamav/filetypes.c: detect and ignore ISO 9660/High Sierra CD-ROM data
4 8
 
5 9
 Mon Mar 28 12:05:17 BST 2005 (njh)
6 10
 ----------------------------------
... ...
@@ -173,6 +173,26 @@ static const struct cli_smagic_s cli_smagic[] = {
173 173
     {NULL,  NULL,   CL_TYPE_UNKNOWN_DATA}
174 174
 };
175 175
 
176
+static char internat[256] = {
177
+    /* TODO: Remember to buy a beer to Joerg Wunsch <joerg@FreeBSD.ORG> */
178
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,  /* 0x0X */
179
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,  /* 0x1X */
180
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x2X */
181
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x3X */
182
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x4X */
183
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x5X */
184
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x6X */
185
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,  /* 0x7X */
186
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x8X */
187
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x9X */
188
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0xaX */
189
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0xbX */
190
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0xcX */
191
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0xdX */
192
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0xeX */
193
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1   /* 0xfX */
194
+};
195
+
176 196
 cli_file_t cli_filetype(const char *buf, size_t buflen)
177 197
 {
178 198
 	int i, ascii = 1, len;
... ...
@@ -189,7 +209,7 @@ cli_file_t cli_filetype(const char *buf, size_t buflen)
189 189
 
190 190
     buflen < 25 ? (len = buflen) : (len = 25);
191 191
     for(i = 0; i < len; i++)
192
-	if(!iscntrl(buf[i]) && !isprint(buf[i])) { /* FIXME: handle international chars */
192
+	if(!iscntrl(buf[i]) && !isprint(buf[i]) && !internat[buf[i] & 0xff]) {
193 193
 	    ascii = 0;
194 194
 	    break;
195 195
 	}