This commit resolves https://bugzilla.clamav.net/show_bug.cgi?id=12673
Changes in 0.103 to order of operations for creating fmaps and
performaing hashes of fmaps resulted errors when scanning files that are
4096M and a different (but related) error when scanning files > 4096M.
This is despite the fact that scanning is supposed to be limited to
--max-scansize (MaxScanSize) and was also apparently limited to
INT_MAX - 2 (aka ~1.999999G) back in 2014 to alleviate reported crashes
for a few large file formats.
(see https://bugzilla.clamav.net/show_bug.cgi?id=10960)
This last limitation was not documented, so I added it to the sample
clamd.conf.
Anyways, the main issue is that the fmap module was using "unsigned int"
and was then enforcing a limitation (verbose error messages) when that
a map length exceeded the capapacity of an unsigned int. This commit
switches the associated variables over to uint64_t, and while fmaps are
still limited to size_t in other places, the fmap module will at least
work with files > 4G on 64bit systems.
In testing this, I found that the time to hash a file, particularly when
hashing a file on an NTFS partition from Linux was really slow because
we were hashing in FILEBUFF chunks (about 8K) at a time. Increasing
this to 10MB chunks speeds up scanning of large files.
Finally, now that hashing is performed immediately when an fmap is
created for a file, hashing of files larger than max-scansize was
occuring. This commit adds checks to bail out early if the file size
exceeds the maximum before creating an fmap. It will alert with the
Heuristics.Limits.Exceeded name if the heuristic is enabled.
Also fixed CheckFmapFeatures.cmake module that detects if
sysconf(_SC_PAGESIZE) is available.
... | ... |
@@ -540,7 +540,7 @@ Sets the maximum amount of data to be scanned for each input file. Archives and |
540 | 540 |
Default: 100M |
541 | 541 |
.TP |
542 | 542 |
\fBMaxFileSize SIZE\fR |
543 |
-Files larger than this limit won't be scanned. Affects the input file itself as well as files contained inside it (when the input file is an archive, a document or some other kind of container). \fBWarning: disabling this limit or setting it too high may result in severe damage to the system.\fR |
|
543 |
+Files larger than this limit won't be scanned. Affects the input file itself as well as files contained inside it (when the input file is an archive, a document or some other kind of container). \fBWarning: disabling this limit or setting it too high may result in severe damage to the system. Technical design limitations prevent ClamAV from scanning files greater than 2 GB at this time.\fR |
|
544 | 544 |
.br |
545 | 545 |
Default: 25M |
546 | 546 |
.TP |
... | ... |
@@ -524,6 +524,8 @@ Example |
524 | 524 |
# Value of 0 disables the limit. |
525 | 525 |
# Note: disabling this limit or setting it too high may result in severe damage |
526 | 526 |
# to the system. |
527 |
+# Technical design limitations prevent ClamAV from scanning files greater than |
|
528 |
+# 2 GB at this time. |
|
527 | 529 |
# Default: 25M |
528 | 530 |
#MaxFileSize 30M |
529 | 531 |
|
... | ... |
@@ -788,4 +790,3 @@ Example |
788 | 788 |
# |
789 | 789 |
# Default: 5000 |
790 | 790 |
# BytecodeTimeout 1000 |
791 |
- |
... | ... |
@@ -85,9 +85,9 @@ pthread_mutex_t fmap_mutex = PTHREAD_MUTEX_INITIALIZER; |
85 | 85 |
|
86 | 86 |
#define fmap_bitmap (m->bitmap) |
87 | 87 |
|
88 |
-static inline unsigned int fmap_align_items(unsigned int sz, unsigned int al); |
|
89 |
-static inline unsigned int fmap_align_to(unsigned int sz, unsigned int al); |
|
90 |
-static inline unsigned int fmap_which_page(fmap_t *m, size_t at); |
|
88 |
+static inline uint64_t fmap_align_items(uint64_t sz, uint64_t al); |
|
89 |
+static inline uint64_t fmap_align_to(uint64_t sz, uint64_t al); |
|
90 |
+static inline uint64_t fmap_which_page(fmap_t *m, size_t at); |
|
91 | 91 |
|
92 | 92 |
static const void *handle_need(fmap_t *m, size_t at, size_t len, int lock); |
93 | 93 |
static void handle_unneed_off(fmap_t *m, size_t at, size_t len); |
... | ... |
@@ -164,7 +164,7 @@ static void unmap_win32(fmap_t *m) |
164 | 164 |
|
165 | 165 |
fmap_t *fmap_check_empty(int fd, off_t offset, size_t len, int *empty, const char *name) |
166 | 166 |
{ /* WIN32 */ |
167 |
- unsigned int pages, mapsz; |
|
167 |
+ uint64_t pages, mapsz; |
|
168 | 168 |
int pgsz = cli_getpagesize(); |
169 | 169 |
STATBUF st; |
170 | 170 |
fmap_t *m = NULL; |
... | ... |
@@ -346,7 +346,7 @@ extern cl_fmap_t *cl_fmap_open_handle(void *handle, size_t offset, size_t len, |
346 | 346 |
clcb_pread pread_cb, int use_aging) |
347 | 347 |
{ |
348 | 348 |
cl_error_t status = CL_EMEM; |
349 |
- unsigned int pages; |
|
349 |
+ uint64_t pages; |
|
350 | 350 |
size_t mapsz, bitmap_size; |
351 | 351 |
cl_fmap_t *m = NULL; |
352 | 352 |
int pgsz = cli_getpagesize(); |
... | ... |
@@ -368,7 +368,7 @@ extern cl_fmap_t *cl_fmap_open_handle(void *handle, size_t offset, size_t len, |
368 | 368 |
|
369 | 369 |
pages = fmap_align_items(len, pgsz); |
370 | 370 |
|
371 |
- bitmap_size = pages * sizeof(uint32_t); |
|
371 |
+ bitmap_size = pages * sizeof(uint64_t); |
|
372 | 372 |
mapsz = pages * pgsz; |
373 | 373 |
|
374 | 374 |
m = cli_calloc(1, sizeof(fmap_t)); |
... | ... |
@@ -450,10 +450,10 @@ static void fmap_aging(fmap_t *m) |
450 | 450 |
#ifdef ANONYMOUS_MAP |
451 | 451 |
if (!m->aging) return; |
452 | 452 |
if (m->paged * m->pgsz > UNPAGE_THRSHLD_HI) { /* we alloc'd too much */ |
453 |
- unsigned int i, avail = 0, freeme[2048], maxavail = MIN(sizeof(freeme) / sizeof(*freeme), m->paged - UNPAGE_THRSHLD_LO / m->pgsz) - 1; |
|
453 |
+ uint64_t i, avail = 0, freeme[2048], maxavail = MIN(sizeof(freeme) / sizeof(*freeme), m->paged - UNPAGE_THRSHLD_LO / m->pgsz) - 1; |
|
454 | 454 |
|
455 | 455 |
for (i = 0; i < m->pages; i++) { |
456 |
- uint32_t s = fmap_bitmap[i]; |
|
456 |
+ uint64_t s = fmap_bitmap[i]; |
|
457 | 457 |
if ((s & (FM_MASK_PAGED | FM_MASK_LOCKED)) == FM_MASK_PAGED) { |
458 | 458 |
/* page is paged and not locked: dec age */ |
459 | 459 |
if (s & FM_MASK_COUNT) fmap_bitmap[i]--; |
... | ... |
@@ -464,7 +464,7 @@ static void fmap_aging(fmap_t *m) |
464 | 464 |
avail++; |
465 | 465 |
} else { |
466 | 466 |
/* Insert sort onto a stack'd array - same performance as quickselect */ |
467 |
- unsigned int insert_to = MIN(maxavail, avail) - 1, age = fmap_bitmap[i] & FM_MASK_COUNT; |
|
467 |
+ uint64_t insert_to = MIN(maxavail, avail) - 1, age = fmap_bitmap[i] & FM_MASK_COUNT; |
|
468 | 468 |
if (avail <= maxavail || (fmap_bitmap[freeme[maxavail]] & FM_MASK_COUNT) > age) { |
469 | 469 |
while ((fmap_bitmap[freeme[insert_to]] & FM_MASK_COUNT) > age) { |
470 | 470 |
freeme[insert_to + 1] = freeme[insert_to]; |
... | ... |
@@ -513,15 +513,15 @@ static void fmap_aging(fmap_t *m) |
513 | 513 |
#endif |
514 | 514 |
} |
515 | 515 |
|
516 |
-static int fmap_readpage(fmap_t *m, uint64_t first_page, uint32_t count, uint32_t lock_count) |
|
516 |
+static int fmap_readpage(fmap_t *m, uint64_t first_page, uint64_t count, uint64_t lock_count) |
|
517 | 517 |
{ |
518 | 518 |
size_t readsz = 0, eintr_off; |
519 | 519 |
char *pptr = NULL, errtxt[256]; |
520 |
- uint32_t sbitmap; |
|
520 |
+ uint64_t sbitmap; |
|
521 | 521 |
uint64_t i, page = first_page, force_read = 0; |
522 | 522 |
|
523 |
- if ((size_t)(m->real_len) > (size_t)(UINT_MAX)) { |
|
524 |
- cli_dbgmsg("fmap_readage: size of file exceeds total prefaultible page size (unpacked file is too large)\n"); |
|
523 |
+ if ((uint64_t)(m->real_len) > (uint64_t)(m->pages * m->pgsz)) { |
|
524 |
+ cli_dbgmsg("fmap_readpage: size of file exceeds total prefaultible page size (unpacked file is too large)\n"); |
|
525 | 525 |
return 1; |
526 | 526 |
} |
527 | 527 |
|
... | ... |
@@ -573,7 +573,7 @@ static int fmap_readpage(fmap_t *m, uint64_t first_page, uint32_t count, uint32_ |
573 | 573 |
if (force_read) { |
574 | 574 |
/* we have some pending reads to perform */ |
575 | 575 |
if (m->handle_is_fd) { |
576 |
- unsigned int j; |
|
576 |
+ uint64_t j; |
|
577 | 577 |
int _fd = (int)(ptrdiff_t)m->handle; |
578 | 578 |
for (j = first_page; j < page; j++) { |
579 | 579 |
if (fmap_bitmap[j] & FM_MASK_SEEN) { |
... | ... |
@@ -613,7 +613,7 @@ static int fmap_readpage(fmap_t *m, uint64_t first_page, uint32_t count, uint32_ |
613 | 613 |
cli_strerror(errno, errtxt, sizeof(errtxt)); |
614 | 614 |
cli_errmsg("fmap_readpage: pread error: %s\n", errtxt); |
615 | 615 |
} else { |
616 |
- cli_warnmsg("fmap_readpage: pread fail: asked for %lu bytes @ offset %lu, got %lu\n", (long unsigned int)readsz, (long unsigned int)target_offset, (long unsigned int)got); |
|
616 |
+ cli_warnmsg("fmap_readpage: pread fail: asked for %zu bytes @ offset %zu, got %zd\n", readsz, (size_t)target_offset, got); |
|
617 | 617 |
} |
618 | 618 |
return 1; |
619 | 619 |
} |
... | ... |
@@ -672,9 +672,9 @@ static const void *handle_need(fmap_t *m, size_t at, size_t len, int lock) |
672 | 672 |
return (void *)ret; |
673 | 673 |
} |
674 | 674 |
|
675 |
-static void fmap_unneed_page(fmap_t *m, unsigned int page) |
|
675 |
+static void fmap_unneed_page(fmap_t *m, uint64_t page) |
|
676 | 676 |
{ |
677 |
- uint32_t s = fmap_bitmap[page]; |
|
677 |
+ uint64_t s = fmap_bitmap[page]; |
|
678 | 678 |
|
679 | 679 |
if ((s & (FM_MASK_PAGED | FM_MASK_LOCKED)) == (FM_MASK_PAGED | FM_MASK_LOCKED)) { |
680 | 680 |
/* page is paged and locked: check lock count */ |
... | ... |
@@ -693,7 +693,7 @@ static void fmap_unneed_page(fmap_t *m, unsigned int page) |
693 | 693 |
|
694 | 694 |
static void handle_unneed_off(fmap_t *m, size_t at, size_t len) |
695 | 695 |
{ |
696 |
- unsigned int i, first_page, last_page; |
|
696 |
+ uint64_t i, first_page, last_page; |
|
697 | 697 |
if (!m->aging) return; |
698 | 698 |
if (!len) { |
699 | 699 |
cli_warnmsg("fmap_unneed: attempted void unneed\n"); |
... | ... |
@@ -737,7 +737,7 @@ static void unmap_malloc(fmap_t *m) |
737 | 737 |
|
738 | 738 |
static const void *handle_need_offstr(fmap_t *m, size_t at, size_t len_hint) |
739 | 739 |
{ |
740 |
- unsigned int i, first_page, last_page; |
|
740 |
+ uint64_t i, first_page, last_page; |
|
741 | 741 |
void *ptr = (void *)((char *)m->data + at); |
742 | 742 |
|
743 | 743 |
if (!len_hint || len_hint > m->real_len - at) |
... | ... |
@@ -753,7 +753,7 @@ static const void *handle_need_offstr(fmap_t *m, size_t at, size_t len_hint) |
753 | 753 |
|
754 | 754 |
for (i = first_page; i <= last_page; i++) { |
755 | 755 |
char *thispage = (char *)m->data + i * m->pgsz; |
756 |
- unsigned int scanat, scansz; |
|
756 |
+ uint64_t scanat, scansz; |
|
757 | 757 |
|
758 | 758 |
if (fmap_readpage(m, i, 1, 1)) { |
759 | 759 |
last_page = i - 1; |
... | ... |
@@ -777,7 +777,7 @@ static const void *handle_need_offstr(fmap_t *m, size_t at, size_t len_hint) |
777 | 777 |
|
778 | 778 |
static const void *handle_gets(fmap_t *m, char *dst, size_t *at, size_t max_len) |
779 | 779 |
{ |
780 |
- unsigned int i, first_page, last_page; |
|
780 |
+ uint64_t i, first_page, last_page; |
|
781 | 781 |
char *src = (void *)((char *)m->data + *at); |
782 | 782 |
char *endptr = NULL; |
783 | 783 |
size_t len = MIN(max_len - 1, m->real_len - *at); |
... | ... |
@@ -793,7 +793,7 @@ static const void *handle_gets(fmap_t *m, char *dst, size_t *at, size_t max_len) |
793 | 793 |
|
794 | 794 |
for (i = first_page; i <= last_page; i++) { |
795 | 795 |
char *thispage = (char *)m->data + i * m->pgsz; |
796 |
- unsigned int scanat, scansz; |
|
796 |
+ uint64_t scanat, scansz; |
|
797 | 797 |
|
798 | 798 |
if (fmap_readpage(m, i, 1, 0)) |
799 | 799 |
return NULL; |
... | ... |
@@ -941,17 +941,17 @@ fmap_t *fmap(int fd, off_t offset, size_t len, const char *name) |
941 | 941 |
return fmap_check_empty(fd, offset, len, &unused, name); |
942 | 942 |
} |
943 | 943 |
|
944 |
-static inline unsigned int fmap_align_items(unsigned int sz, unsigned int al) |
|
944 |
+static inline uint64_t fmap_align_items(uint64_t sz, uint64_t al) |
|
945 | 945 |
{ |
946 | 946 |
return sz / al + (sz % al != 0); |
947 | 947 |
} |
948 | 948 |
|
949 |
-static inline unsigned int fmap_align_to(unsigned int sz, unsigned int al) |
|
949 |
+static inline uint64_t fmap_align_to(uint64_t sz, uint64_t al) |
|
950 | 950 |
{ |
951 | 951 |
return al * fmap_align_items(sz, al); |
952 | 952 |
} |
953 | 953 |
|
954 |
-static inline unsigned int fmap_which_page(fmap_t *m, size_t at) |
|
954 |
+static inline uint64_t fmap_which_page(fmap_t *m, size_t at) |
|
955 | 955 |
{ |
956 | 956 |
return at / m->pgsz; |
957 | 957 |
} |
... | ... |
@@ -984,8 +984,8 @@ cl_error_t fmap_dump_to_file(fmap_t *map, const char *filepath, const char *tmpd |
984 | 984 |
} else if ((start_offset != 0) && (end_offset != map->real_len)) { |
985 | 985 |
/* If we're only dumping a portion of the file, inlcude the offsets in the prefix,... |
986 | 986 |
* e.g. tmp filename will become something like: filebase.500-1200.<randhex> */ |
987 |
- uint32_t prefix_len = strlen(filebase) + 1 + SIZE_T_CHARLEN + 1 + SIZE_T_CHARLEN + 1; |
|
988 |
- prefix = malloc(prefix_len); |
|
987 |
+ size_t prefix_len = strlen(filebase) + 1 + SIZE_T_CHARLEN + 1 + SIZE_T_CHARLEN + 1; |
|
988 |
+ prefix = malloc(prefix_len); |
|
989 | 989 |
if (NULL == prefix) { |
990 | 990 |
cli_errmsg("fmap_dump_to_file: Failed to allocate memory for tempfile prefix.\n"); |
991 | 991 |
if (NULL != filebase) |
... | ... |
@@ -1086,7 +1086,7 @@ cl_error_t fmap_get_MD5(unsigned char *hash, fmap_t *map) |
1086 | 1086 |
|
1087 | 1087 |
while (todo) { |
1088 | 1088 |
const void *buf; |
1089 |
- size_t readme = todo < FILEBUFF ? todo : FILEBUFF; |
|
1089 |
+ size_t readme = todo < 1024 * 1024 * 10 ? todo : 1024 * 1024 * 10; |
|
1090 | 1090 |
|
1091 | 1091 |
if (!(buf = fmap_need_off_once(map, at, readme))) { |
1092 | 1092 |
cl_hash_destroy(hashctx); |
... | ... |
@@ -46,12 +46,12 @@ struct cl_fmap { |
46 | 46 |
|
47 | 47 |
/* internal */ |
48 | 48 |
time_t mtime; |
49 |
- unsigned int pages; |
|
49 |
+ uint64_t pages; |
|
50 | 50 |
uint64_t pgsz; |
51 |
- unsigned int paged; |
|
52 |
- unsigned short aging; |
|
53 |
- unsigned short dont_cache_flag; |
|
54 |
- unsigned short handle_is_fd; |
|
51 |
+ uint64_t paged; |
|
52 |
+ uint16_t aging; |
|
53 |
+ uint16_t dont_cache_flag; |
|
54 |
+ uint16_t handle_is_fd; |
|
55 | 55 |
|
56 | 56 |
/* memory interface */ |
57 | 57 |
const void *data; |
... | ... |
@@ -81,7 +81,7 @@ struct cl_fmap { |
81 | 81 |
HANDLE mh; |
82 | 82 |
#endif |
83 | 83 |
unsigned char maphash[16]; |
84 |
- uint32_t *bitmap; |
|
84 |
+ uint64_t *bitmap; |
|
85 | 85 |
char *name; |
86 | 86 |
}; |
87 | 87 |
|
... | ... |
@@ -1506,7 +1506,8 @@ static cl_error_t vba_scandata(const unsigned char *data, size_t len, cli_ctx *c |
1506 | 1506 |
cli_ac_freedata(&tmdata); |
1507 | 1507 |
cli_ac_freedata(&gmdata); |
1508 | 1508 |
|
1509 |
- return (ret != CL_CLEAN) ? ret : viruses_found ? CL_VIRUS : CL_CLEAN; |
|
1509 |
+ return (ret != CL_CLEAN) ? ret : viruses_found ? CL_VIRUS |
|
1510 |
+ : CL_CLEAN; |
|
1510 | 1511 |
} |
1511 | 1512 |
|
1512 | 1513 |
#define min(x, y) ((x) < (y) ? (x) : (y)) |
... | ... |
@@ -4746,7 +4747,12 @@ static cl_error_t scan_common(cl_fmap_t *map, const char *filepath, const char * |
4746 | 4746 |
return CL_ENULLARG; |
4747 | 4747 |
} |
4748 | 4748 |
|
4749 |
- /* We have a limit of around 2.17GB (INT_MAX - 2). Enforce it here. */ |
|
4749 |
+ /* We have a limit of around 2GB (INT_MAX - 2). Enforce it here. */ |
|
4750 |
+ /* TODO: Large file support is large-ly untested. Remove this restriction |
|
4751 |
+ * and test with a large set of large files of various types. libclamav's |
|
4752 |
+ * integer type safety has come a long way since 2014, so it's possible |
|
4753 |
+ * we could lift this restriction, but at least one of the parsers is |
|
4754 |
+ * bound to behave badly with large files. */ |
|
4750 | 4755 |
if ((size_t)(map->real_len) > (size_t)(INT_MAX - 2)) |
4751 | 4756 |
return CL_CLEAN; |
4752 | 4757 |
|
... | ... |
@@ -5023,10 +5029,20 @@ cl_error_t cl_scandesc_callback(int desc, const char *filename, const char **vir |
5023 | 5023 |
goto done; |
5024 | 5024 |
} |
5025 | 5025 |
if (sb.st_size <= 5) { |
5026 |
- cli_dbgmsg("cl_scandesc_callback: File too small (%u bytes), ignoring\n", (unsigned int)sb.st_size); |
|
5026 |
+ cli_dbgmsg("cl_scandesc_callback: File too small (" STDu64 " bytes), ignoring\n", (uint64_t)sb.st_size); |
|
5027 | 5027 |
status = CL_CLEAN; |
5028 | 5028 |
goto done; |
5029 | 5029 |
} |
5030 |
+ if ((uint64_t)sb.st_size > engine->maxfilesize) { |
|
5031 |
+ cli_dbgmsg("cl_scandesc_callback: File too large (" STDu64 " bytes), ignoring\n", (uint64_t)sb.st_size); |
|
5032 |
+ if (scanoptions->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX) { |
|
5033 |
+ engine->cb_virus_found(desc, "Heuristics.Limits.Exceeded", context); |
|
5034 |
+ status = CL_VIRUS; |
|
5035 |
+ } else { |
|
5036 |
+ status = CL_CLEAN; |
|
5037 |
+ } |
|
5038 |
+ goto done; |
|
5039 |
+ } |
|
5030 | 5040 |
|
5031 | 5041 |
if (NULL != filename) { |
5032 | 5042 |
(void)cli_basename(filename, strlen(filename), &filename_base); |
... | ... |
@@ -5053,6 +5069,15 @@ done: |
5053 | 5053 |
|
5054 | 5054 |
cl_error_t cl_scanmap_callback(cl_fmap_t *map, const char *filename, const char **virname, unsigned long int *scanned, const struct cl_engine *engine, struct cl_scan_options *scanoptions, void *context) |
5055 | 5055 |
{ |
5056 |
+ if (map->real_len > engine->maxfilesize) { |
|
5057 |
+ cli_dbgmsg("cl_scandesc_callback: File too large (%zu bytes), ignoring\n", map->real_len); |
|
5058 |
+ if (scanoptions->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX) { |
|
5059 |
+ engine->cb_virus_found(fmap_fd(map), "Heuristics.Limits.Exceeded", context); |
|
5060 |
+ return CL_VIRUS; |
|
5061 |
+ } |
|
5062 |
+ return CL_CLEAN; |
|
5063 |
+ } |
|
5064 |
+ |
|
5056 | 5065 |
return scan_common(map, filename, virname, scanned, engine, scanoptions, context); |
5057 | 5066 |
} |
5058 | 5067 |
|
... | ... |
@@ -497,6 +497,8 @@ TCPAddr 127.0.0.1 |
497 | 497 |
# Value of 0 disables the limit. |
498 | 498 |
# Note: disabling this limit or setting it too high may result in severe damage |
499 | 499 |
# to the system. |
500 |
+# Technical design limitations prevent ClamAV from scanning files greater than |
|
501 |
+# 2 GB at this time. |
|
500 | 502 |
# Default: 25M |
501 | 503 |
#MaxFileSize 30M |
502 | 504 |
|
... | ... |
@@ -653,4 +655,3 @@ TCPAddr 127.0.0.1 |
653 | 653 |
# |
654 | 654 |
# Default: 5000 |
655 | 655 |
# BytecodeTimeout 1000 |
656 |
- |