Browse code

win32: UTF #6 - clamd proto

aCaB authored on 2011/04/19 00:25:23
Showing 13 changed files
... ...
@@ -1,3 +1,8 @@
1
+Mon Apr 18 17:23:10 CEST 2011 (acab)
2
+------------------------------------
3
+ * clamd, win32: Conversion to UTF8 is hopefully completefinal.
4
+		 See win32/README for important changes! (bb#2343)
5
+
1 6
 Sun Apr 17 16:09:28 CEST 2011 (acab)
2 7
 ------------------------------------
3 8
  * win32/compat: more UTF8 stuff. Things *appear* to be working ok now.
... ...
@@ -420,7 +420,7 @@ static int dispatch_command(client_conn_t *conn, enum commands cmd, const char *
420 420
 	case COMMAND_SCAN:
421 421
 	case COMMAND_CONTSCAN:
422 422
 	case COMMAND_MULTISCAN:
423
-	    dup_conn->filename = strdup(argument);
423
+	    dup_conn->filename = cli_strdup_to_utf8(argument);
424 424
 	    if (!dup_conn->filename) {
425 425
 		logg("!Failed to allocate memory for filename\n");
426 426
 		ret = -1;
... ...
@@ -2701,31 +2701,17 @@ int cl_scanfile(const char *filename, const char **virname, unsigned long int *s
2701 2701
 int cl_scanfile_callback(const char *filename, const char **virname, unsigned long int *scanned, const struct cl_engine *engine, unsigned int scanoptions, void *context)
2702 2702
 {
2703 2703
 	int fd, ret;
2704
-#ifdef _WIN32
2705
-	char utf8[PATH_MAX+1];
2706
-	wchar_t tmpw[PATH_MAX+1];
2704
+	char *fname = cli_to_utf8_maybe_alloc(filename);
2707 2705
 
2708
-	while(1) {
2709
-	    /* Try UTF8 input first */
2710
-	    if(MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, filename, -1, tmpw, PATH_MAX)) {
2711
-		/* XP acts funny on MB_ERR_INVALID_CHARS, so we translate back and compare */
2712
-		if(WideCharToMultiByte(CP_UTF8, 0, tmpw, -1, utf8, PATH_MAX, NULL, NULL) && !strcmp(filename, utf8))
2713
-		    break;
2714
-	    }
2715
-	    /* Then assume ACP */
2716
-	    if(MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, filename, -1, tmpw, PATH_MAX)) {
2717
-		if(WideCharToMultiByte(CP_UTF8, 0, tmpw, -1, utf8, PATH_MAX, NULL, NULL)) {
2718
-		    filename = utf8;
2719
-		    break;
2720
-		}
2721
-	    }
2722
-	    cli_errmsg("cl_scanfile_callback: Can't translate %s to UTF-8\n", filename);
2706
+    if(!fname)
2723 2707
 	    return CL_EARG;
2724
-	}
2725
-#endif
2726
-    if((fd = safe_open(filename, O_RDONLY|O_BINARY)) == -1)
2708
+
2709
+    if((fd = safe_open(fname, O_RDONLY|O_BINARY)) == -1)
2727 2710
 	return CL_EOPEN;
2728 2711
 
2712
+    if(fname != filename)
2713
+	free(fname);
2714
+
2729 2715
     ret = cl_scandesc_callback(fd, virname, scanned, engine, scanoptions, context);
2730 2716
     close(fd);
2731 2717
 
... ...
@@ -40,6 +40,8 @@ typedef	unsigned	int	in_addr_t;
40 40
 
41 41
 /* Nothing is safe in windows, not even open */
42 42
 #define safe_open open
43
+#define cli_to_utf8_maybe_alloc(x) (x)
44
+#define cli_strdup_to_utf8(x) strdup(x)
43 45
 #ifndef WORDS_BIGENDIAN
44 46
 #define WORDS_BIGENDIAN 0
45 47
 #endif
... ...
@@ -450,7 +450,7 @@ void mprintf(const char *str, ...)
450 450
 	    break;
451 451
 	}
452 452
 	/* FIXME CHECK IT'S REALLY UTF8 */
453
-	nubuff = malloc(tmplen);
453
+	nubuff = (char *)malloc(tmplen);
454 454
 	if(!nubuff) {
455 455
 	    free(tmpw);
456 456
 	    break;
... ...
@@ -1,7 +1,26 @@
1 1
 ClamAV for Win32
2 2
 ----------------
3 3
 
4
-0- Requirements
4
+--- News ---
5
+
6
+Starting from version 0.98 the windows version of ClamAV requires all the
7
+input to be UTF-8 encoded.
8
+This affects:
9
+- the API, notably the cl_scanfile() function
10
+- clamd socket input, e.g. the commands SCAN, CONTSCAN, MUTLISCAN, etc.
11
+- clamd socket output, i.e replies to the above queries
12
+
13
+For legacy reasons ANSI (i.e. CP_ACP) input will still be accepted and
14
+processed as before, but with two important remarks:
15
+First, socket replies to ANSI queries will still be UTF-8 encoded.
16
+Second, ANSI sequences which are also valid UTF-8 sequences will be handled
17
+as UTF-8.
18
+
19
+As a side note, console output (stdin and stderr) will always be OEM encoded,
20
+even when redirected to a file.
21
+
22
+
23
+--- Requirements ---
5 24
 
6 25
 To build the source code you will need:
7 26
 - Git for windows with a git "shell"
... ...
@@ -11,7 +30,8 @@ compile the 64bit target; this configuration is therefore not supported.
11 11
 
12 12
 To run the binaries at least Windows XP is required.
13 13
 
14
-1- Getting the code
14
+
15
+--- Getting the code ---
15 16
 
16 17
 The win32 source code is merged in the ClamAV repository and is available
17 18
 via git.
... ...
@@ -19,14 +39,14 @@ Clone the repository with: git clone git://git.clamav.net/git/clamav-devel
19 19
 See http://www.clamav.net/download/sources for more info.
20 20
 
21 21
 
22
-2- Code configuration
22
+--- Code configuration ---
23 23
 
24 24
 After downloading the source code, minimal configuration is required:
25 25
 just run the win32/configure.bat script *from within the git shell*.
26 26
 Skip this step if you are building from an official release tarball.
27 27
 
28 28
 
29
-3- Compilation
29
+--- Compilation ---
30 30
 
31 31
 Open win32/ClamAV.sln in Visual Studio and build all.
32 32
 The output directory for the binaries is either /win32/(Win32|x64)/Debug or
... ...
@@ -37,24 +57,28 @@ https://connect.microsoft.com/VisualStudio/feedback/details/556158
37 37
 Use MSBuild instead.
38 38
 
39 39
 
40
-4- Special notes
40
+--- Special notes ---
41 41
 
42 42
 The ClamAV tools in win32 are the same as in unix, so refer to their respective
43 43
 manpage for general usage.
44 44
 The major differences are listed below:
45 45
 - Config files path search order:
46
-  1- The content of the registry key "HKEY_LOCAL_MACHINE/Software/ClamAV/ConfDir"
46
+  1- The content of the registry key
47
+     "HKEY_LOCAL_MACHINE/Software/ClamAV/ConfDir"
47 48
   2- The directory where libclamav.dll is located
48 49
   3- "C:\ClamAV"
49 50
 
50 51
 - Database files path search order:
51
-  1- The content of the registry key "HKEY_LOCAL_MACHINE/Software/ClamAV/DataDir"
52
-  2- The directory "database" inside the directory where libclamav.dll is located
52
+  1- The content of the registry key
53
+     "HKEY_LOCAL_MACHINE/Software/ClamAV/DataDir"
54
+  2- The directory "database" inside the directory where libclamav.dll is
55
+     located
53 56
   3- "C:\ClamAV\db"
54 57
 
55 58
 - Globbing
56
-Since the windows command prompt doesn't take care of wildcard expansion, minimal
57
-emulation of unix glob() is performed internally. It supports "*" and "?" only.
59
+Since the windows command prompt doesn't take care of wildcard expansion,
60
+minimal emulation of unix glob() is performed internally.
61
+It supports "*" and "?" only.
58 62
 
59 63
 - File paths
60 64
 Please always use the backslash as the path separator.
... ...
@@ -66,7 +90,7 @@ chunks; such builds won't be able to handle large databases.
66 66
 Just do yourself a favour and always build in release mode.
67 67
 
68 68
 
69
-5- Special thanks
69
+--- Special thanks ---
70 70
 
71 71
 Special thanks to Gianluigi Tiesi and Mark Pizzolato for their valuable help in
72 72
 coding and testing.
... ...
@@ -67,7 +67,7 @@
67 67
     #define wrapper.
68 68
 */
69 69
 
70
-static int glob_add(const char *path, int *argc, char ***argv) {
70
+static int glob_add(char *path, int *argc, char ***argv) {
71 71
     char *tail = strchr(path, '*'), *tailqmark;
72 72
     char *dup1, *dup2, *dir, *base, *taildirsep, *tailwldsep;
73 73
     struct dirent *de;
... ...
@@ -123,8 +123,11 @@ static int glob_add(const char *path, int *argc, char ***argv) {
123 123
     if(!tailwldsep)
124 124
 	tailwldsep = tail + taillen;
125 125
 
126
-    dup1 = strdup(path);
127
-    dup2 = strdup(path);
126
+    baselen = strlen(path) + 1;
127
+    dup1 = (char *)_alloca(baselen * 2);
128
+    memcpy(dup1, path, baselen);
129
+    dup2 = dup1 + baselen;
130
+    memcpy(dup2, path, baselen);
128 131
 
129 132
     if(!mergedir) {
130 133
 	dir = dirname(dup1);
... ...
@@ -177,8 +180,7 @@ static int glob_add(const char *path, int *argc, char ***argv) {
177 177
 	}
178 178
     }
179 179
     if(d) closedir(d);
180
-    free(dup1);
181
-    free(dup2);
180
+    _freea(dup1);
182 181
     free(path);
183 182
     return outlen;
184 183
 }
185 184
new file mode 100644
... ...
@@ -0,0 +1,86 @@
0
+/*
1
+ *  Copyright (C) 2011 Sourcefire, Inc.
2
+ *
3
+ *  Authors: aCaB <acab@clamav.net>
4
+ *
5
+ *  This program is free software; you can redistribute it and/or modify
6
+ *  it under the terms of the GNU General Public License version 2 as
7
+ *  published by the Free Software Foundation.
8
+ *
9
+ *  This program is distributed in the hope that it will be useful,
10
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+ *  GNU General Public License for more details.
13
+ *
14
+ *  You should have received a copy of the GNU General Public License
15
+ *  along with this program; if not, write to the Free Software
16
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17
+ *  MA 02110-1301, USA.
18
+ */
19
+
20
+#if HAVE_CONFIG_H
21
+#include "clamav-config.h"
22
+#endif
23
+
24
+#include "utf8_util.h"
25
+
26
+char *cli_strdup_to_utf8(const char *s) {
27
+    char *r = cli_to_utf8_maybe_alloc(s);
28
+    if(!r) return NULL;
29
+    if(r == s) return strdup(r);
30
+    return r;
31
+}
32
+
33
+#define MAYBE_FREE_W do { if(wdup != tmpw) free(wdup); } while (0)
34
+#define MAYBE_FREE_U do { if(utf8 != tmpu) free(utf8); } while (0)
35
+char *cli_to_utf8_maybe_alloc(const char *s) {
36
+    int len = strlen(s) + 1;
37
+    wchar_t tmpw[1024], *wdup;
38
+    char tmpu[1024], *utf8;
39
+
40
+    if(len >= sizeof(tmpw) / sizeof(*tmpw)) {
41
+	wdup = (wchar_t *)malloc(len * sizeof(wchar_t));
42
+	if(!wdup) return NULL;
43
+    } else
44
+	wdup = tmpw;
45
+
46
+    /* Check if already UTF8 first... */
47
+    if(MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, s, -1, wdup, len)) {
48
+	/* XP acts funny on MB_ERR_INVALID_CHARS, so we translate back and compare
49
+	   On Vista+ the flag is honored and there is no such overhead */
50
+	int ulen;
51
+	if((ulen = WideCharToMultiByte(CP_UTF8, 0, wdup, -1, NULL, 0, NULL, NULL))) {
52
+	    if(ulen > sizeof(tmpu)) {
53
+		utf8 = (char *)malloc(ulen);
54
+		if(!utf8) {
55
+		    MAYBE_FREE_W;
56
+		    return NULL;
57
+		}
58
+	    } else
59
+		utf8 = tmpu;
60
+	    if(WideCharToMultiByte(CP_UTF8, 0, wdup, -1, utf8, ulen, NULL, NULL) && !strcmp(s, utf8)) {
61
+		    MAYBE_FREE_W;
62
+		    MAYBE_FREE_U;
63
+		    return s;
64
+	    }
65
+	    MAYBE_FREE_U;
66
+	}
67
+	/* We should never land here */
68
+    }
69
+
70
+    /* ... then assume ANSI */
71
+    if(MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, s, -1, wdup, len)) {
72
+	if((len = WideCharToMultiByte(CP_UTF8, 0, wdup, -1, NULL, 0, NULL, NULL))) {
73
+	    if((utf8 = (char *)malloc(len))) {
74
+		if(WideCharToMultiByte(CP_UTF8, 0, wdup, -1, utf8, len, NULL, NULL)) {
75
+		    MAYBE_FREE_W;
76
+		    return utf8;
77
+		}
78
+		free(utf8);
79
+	    }
80
+	}
81
+    }
82
+    MAYBE_FREE_W;
83
+    return NULL;
84
+}
85
+
0 86
new file mode 100644
... ...
@@ -0,0 +1,27 @@
0
+/*
1
+ *  Copyright (C) 2011 Sourcefire, Inc.
2
+ *
3
+ *  Authors: aCaB <acab@clamav.net>
4
+ *
5
+ *  This program is free software; you can redistribute it and/or modify
6
+ *  it under the terms of the GNU General Public License version 2 as
7
+ *  published by the Free Software Foundation.
8
+ *
9
+ *  This program is distributed in the hope that it will be useful,
10
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+ *  GNU General Public License for more details.
13
+ *
14
+ *  You should have received a copy of the GNU General Public License
15
+ *  along with this program; if not, write to the Free Software
16
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17
+ *  MA 02110-1301, USA.
18
+ */
19
+
20
+#ifndef __UTF8_UTIL_H
21
+#define __UTF8_UTIL_H
22
+
23
+char *cli_to_utf8_maybe_alloc(const char *s);
24
+char *cli_strdup_to_utf8(const char *s);
25
+
26
+#endif
0 27
\ No newline at end of file
... ...
@@ -212,4 +212,5 @@ EXPORTS w32_strerror @44341 NONAME
212 212
 EXPORTS w32_strerror_r @44342 NONAME
213 213
 EXPORTS inet_addr @44343 NONAME
214 214
 EXPORTS fcntl @44344 NONAME
215
-
215
+EXPORTS cli_to_utf8_maybe_alloc @44345 NONAME
216
+EXPORTS cli_strdup_to_utf8 @44346 NONAME
... ...
@@ -1,5 +1,4 @@
1 1
 <?xml version="1.0" encoding="utf-8"?>
2
-
3 2
 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
4 3
   <ItemGroup Label="ProjectConfigurations">
5 4
     <ProjectConfiguration Include="Debug|Win32">
... ...
@@ -333,6 +332,7 @@
333 333
     <ClCompile Include="compat\net.c"/>
334 334
     <ClCompile Include="compat\random.c"/>
335 335
     <ClCompile Include="compat\snprintf.c"/>
336
+    <ClCompile Include="compat\utf8_util.c" />
336 337
     <ClCompile Include="compat\w32_errno.c"/>
337 338
     <ClCompile Include="compat\w32_stat.c"/>
338 339
   </ItemGroup>
... ...
@@ -356,4 +356,4 @@
356 356
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets"/>
357 357
   <ImportGroup Label="ExtensionTargets">
358 358
   </ImportGroup>
359
-</Project>
359
+</Project>
360 360
\ No newline at end of file
... ...
@@ -159,9 +159,6 @@
159 159
     <ClCompile Include="..\libclamav\matcher-bm.c">
160 160
       <Filter>Source Files</Filter>
161 161
     </ClCompile>
162
-    <ClCompile Include="..\libclamav\matcher-md5.c">
163
-      <Filter>Source Files</Filter>
164
-    </ClCompile>
165 162
     <ClCompile Include="..\libclamav\mbox.c">
166 163
       <Filter>Source Files</Filter>
167 164
     </ClCompile>
... ...
@@ -477,5 +474,26 @@
477 477
     <ClCompile Include="3rdparty\zlib\gzwrite.c">
478 478
       <Filter>Source Files\zlib</Filter>
479 479
     </ClCompile>
480
+    <ClCompile Include="..\libclamav\png.c">
481
+      <Filter>Source Files</Filter>
482
+    </ClCompile>
483
+    <ClCompile Include="..\libclamav\jpeg.c">
484
+      <Filter>Source Files</Filter>
485
+    </ClCompile>
486
+    <ClCompile Include="..\libclamav\swf.c">
487
+      <Filter>Source Files</Filter>
488
+    </ClCompile>
489
+    <ClCompile Include="..\libclamav\matcher-hash.c">
490
+      <Filter>Source Files</Filter>
491
+    </ClCompile>
492
+    <ClCompile Include="..\libclamav\sha1.c">
493
+      <Filter>Source Files</Filter>
494
+    </ClCompile>
495
+    <ClCompile Include="..\libclamav\events.c">
496
+      <Filter>Source Files</Filter>
497
+    </ClCompile>
498
+    <ClCompile Include="compat\utf8_util.c">
499
+      <Filter>Source Files\compat</Filter>
500
+    </ClCompile>
480 501
   </ItemGroup>
481 502
 </Project>
482 503
\ No newline at end of file
... ...
@@ -22,6 +22,7 @@ extern "C"
22 22
 #include "w32_errno.h"
23 23
 #include "w32_stat.h"
24 24
 #include "random.h"
25
+#include "utf8_util.h"
25 26
 
26 27
 #ifdef __cplusplus
27 28
 }