At least some unicode filenames may fail to scan in 0.102.4+ because
while Windows char* strings may be UTF8, the GetFinalPathNameByHandleA
function does not return UTF8 strings and instead does lossy conversion
to ASCII. To fix this, we need to use GetFinalPathNameByHandleW instead
and then convert from UTF16-LE to UTF8.
While fixing this bug, I found and fixed a couple other serious issues
with the Win32 implementation of cli_codepage_to_utf8().
If a file is on a network share, the realpath comes back with a path
name that looks like "\\\\?\\UNC\\<host>\\<share>\\...". In thi scase,
the "\\\\?\\UNC\\" prefix is critical or else clamscan.exe won't be able
to open the file. This patch checks for the "\\\\?\\UNC" prefix and if
it exists, it keeps the prefix, else it trims the "\\\\?\\" portion as
before. This should fix scanning of files on network shares.
... | ... |
@@ -772,7 +772,6 @@ int encoding_normalize_toascii(const m_area_t* in_m_area, const char* initial_en |
772 | 772 |
} |
773 | 773 |
|
774 | 774 |
cl_error_t cli_codepage_to_utf8(char* in, size_t in_size, uint16_t codepage, char** out, size_t* out_size) |
775 |
- |
|
776 | 775 |
{ |
777 | 776 |
cl_error_t status = CL_BREAK; |
778 | 777 |
|
... | ... |
@@ -907,7 +906,7 @@ cl_error_t cli_codepage_to_utf8(char* in, size_t in_size, uint16_t codepage, cha |
907 | 907 |
} |
908 | 908 |
|
909 | 909 |
out_utf8 = cli_malloc(out_utf8_size + 1); |
910 |
- if (NULL == lpWideCharStr) { |
|
910 |
+ if (NULL == out_utf8) { |
|
911 | 911 |
cli_dbgmsg("cli_codepage_to_utf8: failed to allocate memory for wide char to utf-8 string.\n"); |
912 | 912 |
status = CL_EMEM; |
913 | 913 |
goto done; |
... | ... |
@@ -928,6 +927,9 @@ cl_error_t cli_codepage_to_utf8(char* in, size_t in_size, uint16_t codepage, cha |
928 | 928 |
goto done; |
929 | 929 |
} |
930 | 930 |
|
931 |
+ /* Set a null byte, since null-terminator is not provided when in_size is provided */ |
|
932 |
+ out_utf8[out_utf8_size] = '\0'; |
|
933 |
+ |
|
931 | 934 |
#elif defined(HAVE_ICONV) |
932 | 935 |
|
933 | 936 |
uint32_t attempt, i; |
... | ... |
@@ -970,7 +970,7 @@ static cl_error_t hfsplus_walk_catalog(cli_ctx *ctx, hfsPlusVolumeHeader *volHea |
970 | 970 |
/* |
971 | 971 |
* The name is contained in nodeBuf[recordStart + 2 + 4 + 2 : recordStart + 2 + 4 + 2 + name_length * 2] encoded as UTF-16BE. |
972 | 972 |
*/ |
973 |
- if (CL_SUCCESS != cli_codepage_to_utf8((char *)index, name_length * 2, 1201, &name_utf8, &name_utf8_size)) { |
|
973 |
+ if (CL_SUCCESS != cli_codepage_to_utf8((char *)index, name_length * 2, CODEPAGE_UTF16_BE, &name_utf8, &name_utf8_size)) { |
|
974 | 974 |
cli_errmsg("hfsplus_walk_catalog: failed to convert UTF-16BE to UTF-8\n"); |
975 | 975 |
name_utf8 = NULL; |
976 | 976 |
} |
... | ... |
@@ -60,6 +60,7 @@ |
60 | 60 |
#include "regex/regex.h" |
61 | 61 |
#include "matcher-ac.h" |
62 | 62 |
#include "str.h" |
63 |
+#include "entconv.h" |
|
63 | 64 |
|
64 | 65 |
#define MSGBUFSIZ 8192 |
65 | 66 |
|
... | ... |
@@ -1245,47 +1246,64 @@ cl_error_t cli_get_filepath_from_filedesc(int desc, char **filepath) |
1245 | 1245 |
} |
1246 | 1246 |
|
1247 | 1247 |
#elif _WIN32 |
1248 |
- DWORD dwRet = 0; |
|
1249 |
- intptr_t hFile = _get_osfhandle(desc); |
|
1250 |
- char *long_evaluated_filepath = NULL; |
|
1248 |
+ DWORD dwRet = 0; |
|
1249 |
+ intptr_t hFile = _get_osfhandle(desc); |
|
1250 |
+ WCHAR *long_evaluated_filepathW = NULL; |
|
1251 |
+ char *long_evaluated_filepathA = NULL; |
|
1252 |
+ size_t evaluated_filepath_len = 0; |
|
1253 |
+ cl_error_t conv_result; |
|
1251 | 1254 |
|
1252 | 1255 |
if (NULL == filepath) { |
1253 | 1256 |
cli_errmsg("cli_get_filepath_from_filedesc: Invalid args.\n"); |
1254 | 1257 |
goto done; |
1255 | 1258 |
} |
1256 | 1259 |
|
1257 |
- dwRet = GetFinalPathNameByHandleA((HANDLE)hFile, NULL, 0, VOLUME_NAME_DOS); |
|
1260 |
+ dwRet = GetFinalPathNameByHandleW((HANDLE)hFile, NULL, 0, VOLUME_NAME_DOS); |
|
1258 | 1261 |
if (dwRet == 0) { |
1259 | 1262 |
cli_errmsg("cli_get_filepath_from_filedesc: Failed to resolve filename for descriptor %d\n", desc); |
1260 | 1263 |
status = CL_EOPEN; |
1261 | 1264 |
goto done; |
1262 | 1265 |
} |
1263 | 1266 |
|
1264 |
- long_evaluated_filepath = calloc(dwRet + 1, 1); |
|
1265 |
- if (NULL == long_evaluated_filepath) { |
|
1267 |
+ long_evaluated_filepathW = calloc(dwRet + 1, sizeof(WCHAR)); |
|
1268 |
+ if (NULL == long_evaluated_filepathW) { |
|
1266 | 1269 |
cli_errmsg("cli_get_filepath_from_filedesc: Failed to allocate %u bytes to store filename\n", dwRet + 1); |
1267 | 1270 |
status = CL_EMEM; |
1268 | 1271 |
goto done; |
1269 | 1272 |
} |
1270 | 1273 |
|
1271 |
- dwRet = GetFinalPathNameByHandleA((HANDLE)hFile, long_evaluated_filepath, dwRet + 1, VOLUME_NAME_DOS); |
|
1274 |
+ dwRet = GetFinalPathNameByHandleW((HANDLE)hFile, long_evaluated_filepathW, dwRet + 1, VOLUME_NAME_DOS); |
|
1272 | 1275 |
if (dwRet == 0) { |
1273 | 1276 |
cli_errmsg("cli_get_filepath_from_filedesc: Failed to resolve filename for descriptor %d\n", desc); |
1274 |
- free(long_evaluated_filepath); |
|
1275 |
- long_evaluated_filepath = NULL; |
|
1276 |
- status = CL_EOPEN; |
|
1277 |
+ status = CL_EOPEN; |
|
1277 | 1278 |
goto done; |
1278 | 1279 |
} |
1279 | 1280 |
|
1280 |
- evaluated_filepath = calloc(strlen(long_evaluated_filepath) - strlen("\\\\?\\") + 1, 1); |
|
1281 |
- if (NULL == evaluated_filepath) { |
|
1282 |
- cli_errmsg("cli_get_filepath_from_filedesc: Failed to allocate %u bytes to store filename\n", dwRet + 1); |
|
1283 |
- status = CL_EMEM; |
|
1284 |
- goto done; |
|
1281 |
+ if (0 == wcsncmp(L"\\\\?\\UNC", long_evaluated_filepathW, wcslen(L"\\\\?\\UNC"))) { |
|
1282 |
+ conv_result = cli_codepage_to_utf8( |
|
1283 |
+ long_evaluated_filepathW, |
|
1284 |
+ (wcslen(long_evaluated_filepathW)) * sizeof(WCHAR), |
|
1285 |
+ CODEPAGE_UTF16_LE, |
|
1286 |
+ &evaluated_filepath, |
|
1287 |
+ &evaluated_filepath_len); |
|
1288 |
+ if (CL_SUCCESS != conv_result) { |
|
1289 |
+ cli_errmsg("cli_get_filepath_from_filedesc: Failed to convert UTF16_LE filename to UTF8\n", dwRet + 1); |
|
1290 |
+ status = CL_EOPEN; |
|
1291 |
+ goto done; |
|
1292 |
+ } |
|
1293 |
+ } else { |
|
1294 |
+ conv_result = cli_codepage_to_utf8( |
|
1295 |
+ long_evaluated_filepathW + wcslen(L"\\\\?\\"), |
|
1296 |
+ (wcslen(long_evaluated_filepathW) - wcslen(L"\\\\?\\")) * sizeof(WCHAR), |
|
1297 |
+ CODEPAGE_UTF16_LE, |
|
1298 |
+ &evaluated_filepath, |
|
1299 |
+ &evaluated_filepath_len); |
|
1300 |
+ if (CL_SUCCESS != conv_result) { |
|
1301 |
+ cli_errmsg("cli_get_filepath_from_filedesc: Failed to convert UTF16_LE filename to UTF8\n", dwRet + 1); |
|
1302 |
+ status = CL_EOPEN; |
|
1303 |
+ goto done; |
|
1304 |
+ } |
|
1285 | 1305 |
} |
1286 |
- memcpy(evaluated_filepath, |
|
1287 |
- long_evaluated_filepath + strlen("\\\\?\\"), |
|
1288 |
- strlen(long_evaluated_filepath) - strlen("\\\\?\\")); |
|
1289 | 1306 |
|
1290 | 1307 |
#else |
1291 | 1308 |
|
... | ... |
@@ -1295,15 +1313,15 @@ cl_error_t cli_get_filepath_from_filedesc(int desc, char **filepath) |
1295 | 1295 |
|
1296 | 1296 |
#endif |
1297 | 1297 |
|
1298 |
- cli_dbgmsg("cli_get_filepath_from_filedesc: File path for fd [%d] is: %s\n", desc, *filepath); |
|
1298 |
+ cli_dbgmsg("cli_get_filepath_from_filedesc: File path for fd [%d] is: %s\n", desc, evaluated_filepath); |
|
1299 | 1299 |
status = CL_SUCCESS; |
1300 | 1300 |
*filepath = evaluated_filepath; |
1301 | 1301 |
|
1302 | 1302 |
done: |
1303 | 1303 |
|
1304 | 1304 |
#ifdef _WIN32 |
1305 |
- if (NULL != long_evaluated_filepath) { |
|
1306 |
- free(long_evaluated_filepath); |
|
1305 |
+ if (NULL != long_evaluated_filepathW) { |
|
1306 |
+ free(long_evaluated_filepathW); |
|
1307 | 1307 |
} |
1308 | 1308 |
#endif |
1309 | 1309 |
return status; |
... | ... |
@@ -3773,7 +3773,7 @@ static cl_error_t parse_formula(FILE *out_file, char data[], unsigned data_size) |
3773 | 3773 |
if (str_len > data_size - data_pos) { |
3774 | 3774 |
str_len = data_size - data_pos; |
3775 | 3775 |
} |
3776 |
- if (CL_SUCCESS == cli_codepage_to_utf8(&data[data_pos + 3], str_len, 1200, &utf8, &utf8_size)) { |
|
3776 |
+ if (CL_SUCCESS == cli_codepage_to_utf8(&data[data_pos + 3], str_len, CODEPAGE_UTF16_LE, &utf8, &utf8_size)) { |
|
3777 | 3777 |
if (0 < utf8_size) { |
3778 | 3778 |
size_written = fwrite(utf8, 1, utf8_size, out_file); |
3779 | 3779 |
free(utf8); |
... | ... |
@@ -4326,7 +4326,7 @@ cl_error_t cli_xlm_extract_macros(const char *dir, cli_ctx *ctx, struct uniq *U, |
4326 | 4326 |
string_length = biff_header.length - 3; |
4327 | 4327 |
} |
4328 | 4328 |
|
4329 |
- if (CL_SUCCESS == cli_codepage_to_utf8(&data[3], string_length, 1200, &utf8, &utf8_size)) { |
|
4329 |
+ if (CL_SUCCESS == cli_codepage_to_utf8(&data[3], string_length, CODEPAGE_UTF16_LE, &utf8, &utf8_size)) { |
|
4330 | 4330 |
if (0 < utf8_size) { |
4331 | 4331 |
size_written = fwrite(utf8, 1, utf8_size, out_file); |
4332 | 4332 |
free(utf8); |