Browse code

Windows: Fix unicode filename and file share scans

At least some unicode filenames may fail to scan in 0.102.4+ because
while Windows char* strings may be UTF8, the GetFinalPathNameByHandleA
function does not return UTF8 strings and instead does lossy conversion
to ASCII. To fix this, we need to use GetFinalPathNameByHandleW instead
and then convert from UTF16-LE to UTF8.

While fixing this bug, I found and fixed a couple other serious issues
with the Win32 implementation of cli_codepage_to_utf8().

If a file is on a network share, the realpath comes back with a path
name that looks like "\\\\?\\UNC\\<host>\\<share>\\...". In thi scase,
the "\\\\?\\UNC\\" prefix is critical or else clamscan.exe won't be able
to open the file. This patch checks for the "\\\\?\\UNC" prefix and if
it exists, it keeps the prefix, else it trims the "\\\\?\\" portion as
before. This should fix scanning of files on network shares.

Micah Snyder authored on 2020/09/11 06:14:22
Showing 4 changed files
... ...
@@ -772,7 +772,6 @@ int encoding_normalize_toascii(const m_area_t* in_m_area, const char* initial_en
772 772
 }
773 773
 
774 774
 cl_error_t cli_codepage_to_utf8(char* in, size_t in_size, uint16_t codepage, char** out, size_t* out_size)
775
-
776 775
 {
777 776
     cl_error_t status = CL_BREAK;
778 777
 
... ...
@@ -907,7 +906,7 @@ cl_error_t cli_codepage_to_utf8(char* in, size_t in_size, uint16_t codepage, cha
907 907
             }
908 908
 
909 909
             out_utf8 = cli_malloc(out_utf8_size + 1);
910
-            if (NULL == lpWideCharStr) {
910
+            if (NULL == out_utf8) {
911 911
                 cli_dbgmsg("cli_codepage_to_utf8: failed to allocate memory for wide char to utf-8 string.\n");
912 912
                 status = CL_EMEM;
913 913
                 goto done;
... ...
@@ -928,6 +927,9 @@ cl_error_t cli_codepage_to_utf8(char* in, size_t in_size, uint16_t codepage, cha
928 928
                 goto done;
929 929
             }
930 930
 
931
+            /* Set a null byte, since null-terminator is not provided when in_size is provided */
932
+            out_utf8[out_utf8_size] = '\0';
933
+
931 934
 #elif defined(HAVE_ICONV)
932 935
 
933 936
             uint32_t attempt, i;
... ...
@@ -970,7 +970,7 @@ static cl_error_t hfsplus_walk_catalog(cli_ctx *ctx, hfsPlusVolumeHeader *volHea
970 970
                     /*
971 971
                      * The name is contained in nodeBuf[recordStart + 2 + 4 + 2 : recordStart + 2 + 4 + 2 + name_length * 2] encoded as UTF-16BE.
972 972
                      */
973
-                    if (CL_SUCCESS != cli_codepage_to_utf8((char *)index, name_length * 2, 1201, &name_utf8, &name_utf8_size)) {
973
+                    if (CL_SUCCESS != cli_codepage_to_utf8((char *)index, name_length * 2, CODEPAGE_UTF16_BE, &name_utf8, &name_utf8_size)) {
974 974
                         cli_errmsg("hfsplus_walk_catalog: failed to convert UTF-16BE to UTF-8\n");
975 975
                         name_utf8 = NULL;
976 976
                     }
... ...
@@ -60,6 +60,7 @@
60 60
 #include "regex/regex.h"
61 61
 #include "matcher-ac.h"
62 62
 #include "str.h"
63
+#include "entconv.h"
63 64
 
64 65
 #define MSGBUFSIZ 8192
65 66
 
... ...
@@ -1245,47 +1246,64 @@ cl_error_t cli_get_filepath_from_filedesc(int desc, char **filepath)
1245 1245
     }
1246 1246
 
1247 1247
 #elif _WIN32
1248
-    DWORD dwRet                   = 0;
1249
-    intptr_t hFile                = _get_osfhandle(desc);
1250
-    char *long_evaluated_filepath = NULL;
1248
+    DWORD dwRet                     = 0;
1249
+    intptr_t hFile                  = _get_osfhandle(desc);
1250
+    WCHAR *long_evaluated_filepathW = NULL;
1251
+    char *long_evaluated_filepathA  = NULL;
1252
+    size_t evaluated_filepath_len   = 0;
1253
+    cl_error_t conv_result;
1251 1254
 
1252 1255
     if (NULL == filepath) {
1253 1256
         cli_errmsg("cli_get_filepath_from_filedesc: Invalid args.\n");
1254 1257
         goto done;
1255 1258
     }
1256 1259
 
1257
-    dwRet = GetFinalPathNameByHandleA((HANDLE)hFile, NULL, 0, VOLUME_NAME_DOS);
1260
+    dwRet = GetFinalPathNameByHandleW((HANDLE)hFile, NULL, 0, VOLUME_NAME_DOS);
1258 1261
     if (dwRet == 0) {
1259 1262
         cli_errmsg("cli_get_filepath_from_filedesc: Failed to resolve filename for descriptor %d\n", desc);
1260 1263
         status = CL_EOPEN;
1261 1264
         goto done;
1262 1265
     }
1263 1266
 
1264
-    long_evaluated_filepath = calloc(dwRet + 1, 1);
1265
-    if (NULL == long_evaluated_filepath) {
1267
+    long_evaluated_filepathW = calloc(dwRet + 1, sizeof(WCHAR));
1268
+    if (NULL == long_evaluated_filepathW) {
1266 1269
         cli_errmsg("cli_get_filepath_from_filedesc: Failed to allocate %u bytes to store filename\n", dwRet + 1);
1267 1270
         status = CL_EMEM;
1268 1271
         goto done;
1269 1272
     }
1270 1273
 
1271
-    dwRet = GetFinalPathNameByHandleA((HANDLE)hFile, long_evaluated_filepath, dwRet + 1, VOLUME_NAME_DOS);
1274
+    dwRet = GetFinalPathNameByHandleW((HANDLE)hFile, long_evaluated_filepathW, dwRet + 1, VOLUME_NAME_DOS);
1272 1275
     if (dwRet == 0) {
1273 1276
         cli_errmsg("cli_get_filepath_from_filedesc: Failed to resolve filename for descriptor %d\n", desc);
1274
-        free(long_evaluated_filepath);
1275
-        long_evaluated_filepath = NULL;
1276
-        status                  = CL_EOPEN;
1277
+        status = CL_EOPEN;
1277 1278
         goto done;
1278 1279
     }
1279 1280
 
1280
-    evaluated_filepath = calloc(strlen(long_evaluated_filepath) - strlen("\\\\?\\") + 1, 1);
1281
-    if (NULL == evaluated_filepath) {
1282
-        cli_errmsg("cli_get_filepath_from_filedesc: Failed to allocate %u bytes to store filename\n", dwRet + 1);
1283
-        status = CL_EMEM;
1284
-        goto done;
1281
+    if (0 == wcsncmp(L"\\\\?\\UNC", long_evaluated_filepathW, wcslen(L"\\\\?\\UNC"))) {
1282
+        conv_result = cli_codepage_to_utf8(
1283
+            long_evaluated_filepathW,
1284
+            (wcslen(long_evaluated_filepathW)) * sizeof(WCHAR),
1285
+            CODEPAGE_UTF16_LE,
1286
+            &evaluated_filepath,
1287
+            &evaluated_filepath_len);
1288
+        if (CL_SUCCESS != conv_result) {
1289
+            cli_errmsg("cli_get_filepath_from_filedesc: Failed to convert UTF16_LE filename to UTF8\n", dwRet + 1);
1290
+            status = CL_EOPEN;
1291
+            goto done;
1292
+        }
1293
+    } else {
1294
+        conv_result = cli_codepage_to_utf8(
1295
+            long_evaluated_filepathW + wcslen(L"\\\\?\\"),
1296
+            (wcslen(long_evaluated_filepathW) - wcslen(L"\\\\?\\")) * sizeof(WCHAR),
1297
+            CODEPAGE_UTF16_LE,
1298
+            &evaluated_filepath,
1299
+            &evaluated_filepath_len);
1300
+        if (CL_SUCCESS != conv_result) {
1301
+            cli_errmsg("cli_get_filepath_from_filedesc: Failed to convert UTF16_LE filename to UTF8\n", dwRet + 1);
1302
+            status = CL_EOPEN;
1303
+            goto done;
1304
+        }
1285 1305
     }
1286
-    memcpy(evaluated_filepath,
1287
-           long_evaluated_filepath + strlen("\\\\?\\"),
1288
-           strlen(long_evaluated_filepath) - strlen("\\\\?\\"));
1289 1306
 
1290 1307
 #else
1291 1308
 
... ...
@@ -1295,15 +1313,15 @@ cl_error_t cli_get_filepath_from_filedesc(int desc, char **filepath)
1295 1295
 
1296 1296
 #endif
1297 1297
 
1298
-    cli_dbgmsg("cli_get_filepath_from_filedesc: File path for fd [%d] is: %s\n", desc, *filepath);
1298
+    cli_dbgmsg("cli_get_filepath_from_filedesc: File path for fd [%d] is: %s\n", desc, evaluated_filepath);
1299 1299
     status    = CL_SUCCESS;
1300 1300
     *filepath = evaluated_filepath;
1301 1301
 
1302 1302
 done:
1303 1303
 
1304 1304
 #ifdef _WIN32
1305
-    if (NULL != long_evaluated_filepath) {
1306
-        free(long_evaluated_filepath);
1305
+    if (NULL != long_evaluated_filepathW) {
1306
+        free(long_evaluated_filepathW);
1307 1307
     }
1308 1308
 #endif
1309 1309
     return status;
... ...
@@ -3773,7 +3773,7 @@ static cl_error_t parse_formula(FILE *out_file, char data[], unsigned data_size)
3773 3773
                     if (str_len > data_size - data_pos) {
3774 3774
                         str_len = data_size - data_pos;
3775 3775
                     }
3776
-                    if (CL_SUCCESS == cli_codepage_to_utf8(&data[data_pos + 3], str_len, 1200, &utf8, &utf8_size)) {
3776
+                    if (CL_SUCCESS == cli_codepage_to_utf8(&data[data_pos + 3], str_len, CODEPAGE_UTF16_LE, &utf8, &utf8_size)) {
3777 3777
                         if (0 < utf8_size) {
3778 3778
                             size_written = fwrite(utf8, 1, utf8_size, out_file);
3779 3779
                             free(utf8);
... ...
@@ -4326,7 +4326,7 @@ cl_error_t cli_xlm_extract_macros(const char *dir, cli_ctx *ctx, struct uniq *U,
4326 4326
                             string_length = biff_header.length - 3;
4327 4327
                         }
4328 4328
 
4329
-                        if (CL_SUCCESS == cli_codepage_to_utf8(&data[3], string_length, 1200, &utf8, &utf8_size)) {
4329
+                        if (CL_SUCCESS == cli_codepage_to_utf8(&data[3], string_length, CODEPAGE_UTF16_LE, &utf8, &utf8_size)) {
4330 4330
                             if (0 < utf8_size) {
4331 4331
                                 size_written = fwrite(utf8, 1, utf8_size, out_file);
4332 4332
                                 free(utf8);