Browse code

Fix errors when scanning files > 4G

This commit resolves https://bugzilla.clamav.net/show_bug.cgi?id=12673

Changes in 0.103 to order of operations for creating fmaps and
performaing hashes of fmaps resulted errors when scanning files that are
4096M and a different (but related) error when scanning files > 4096M.
This is despite the fact that scanning is supposed to be limited to
--max-scansize (MaxScanSize) and was also apparently limited to
INT_MAX - 2 (aka ~1.999999G) back in 2014 to alleviate reported crashes
for a few large file formats.
(see https://bugzilla.clamav.net/show_bug.cgi?id=10960)
This last limitation was not documented, so I added it to the sample
clamd.conf.

Anyways, the main issue is that the fmap module was using "unsigned int"
and was then enforcing a limitation (verbose error messages) when that
a map length exceeded the capapacity of an unsigned int. This commit
switches the associated variables over to uint64_t, and while fmaps are
still limited to size_t in other places, the fmap module will at least
work with files > 4G on 64bit systems.

In testing this, I found that the time to hash a file, particularly when
hashing a file on an NTFS partition from Linux was really slow because
we were hashing in FILEBUFF chunks (about 8K) at a time. Increasing
this to 10MB chunks speeds up scanning of large files.

Finally, now that hashing is performed immediately when an fmap is
created for a file, hashing of files larger than max-scansize was
occuring. This commit adds checks to bail out early if the file size
exceeds the maximum before creating an fmap. It will alert with the
Heuristics.Limits.Exceeded name if the heuristic is enabled.

Also fixed CheckFmapFeatures.cmake module that detects if
sysconf(_SC_PAGESIZE) is available.

Micah Snyder authored on 2021/03/30 13:40:48
Showing 7 changed files
... ...
@@ -124,9 +124,7 @@ check_symbol_exists(getpagesize unistd.h HAVE_GETPAGESIZE)
124 124
 check_c_source_compiles(
125 125
     "
126 126
         #include <sys/types.h>
127
-        #if HAVE_UNISTD_H
128 127
         #include <unistd.h>
129
-        #endif
130 128
         int main(void)
131 129
         {
132 130
             int x = sysconf(_SC_PAGESIZE);
... ...
@@ -540,7 +540,7 @@ Sets the maximum amount of data to be scanned for each input file. Archives and
540 540
 Default: 100M
541 541
 .TP
542 542
 \fBMaxFileSize SIZE\fR
543
-Files larger than this limit won't be scanned. Affects the input file itself as well as files contained inside it (when the input file is an archive, a document or some other kind of container). \fBWarning: disabling this limit or setting it too high may result in severe damage to the system.\fR
543
+Files larger than this limit won't be scanned. Affects the input file itself as well as files contained inside it (when the input file is an archive, a document or some other kind of container). \fBWarning: disabling this limit or setting it too high may result in severe damage to the system. Technical design limitations prevent ClamAV from scanning files greater than 2 GB at this time.\fR
544 544
 .br
545 545
 Default: 25M
546 546
 .TP
... ...
@@ -524,6 +524,8 @@ Example
524 524
 # Value of 0 disables the limit.
525 525
 # Note: disabling this limit or setting it too high may result in severe damage
526 526
 # to the system.
527
+# Technical design limitations prevent ClamAV from scanning files greater than
528
+# 2 GB at this time.
527 529
 # Default: 25M
528 530
 #MaxFileSize 30M
529 531
 
... ...
@@ -788,4 +790,3 @@ Example
788 788
 #
789 789
 # Default: 5000
790 790
 # BytecodeTimeout 1000
791
-
... ...
@@ -85,9 +85,9 @@ pthread_mutex_t fmap_mutex = PTHREAD_MUTEX_INITIALIZER;
85 85
 
86 86
 #define fmap_bitmap (m->bitmap)
87 87
 
88
-static inline unsigned int fmap_align_items(unsigned int sz, unsigned int al);
89
-static inline unsigned int fmap_align_to(unsigned int sz, unsigned int al);
90
-static inline unsigned int fmap_which_page(fmap_t *m, size_t at);
88
+static inline uint64_t fmap_align_items(uint64_t sz, uint64_t al);
89
+static inline uint64_t fmap_align_to(uint64_t sz, uint64_t al);
90
+static inline uint64_t fmap_which_page(fmap_t *m, size_t at);
91 91
 
92 92
 static const void *handle_need(fmap_t *m, size_t at, size_t len, int lock);
93 93
 static void handle_unneed_off(fmap_t *m, size_t at, size_t len);
... ...
@@ -164,7 +164,7 @@ static void unmap_win32(fmap_t *m)
164 164
 
165 165
 fmap_t *fmap_check_empty(int fd, off_t offset, size_t len, int *empty, const char *name)
166 166
 { /* WIN32 */
167
-    unsigned int pages, mapsz;
167
+    uint64_t pages, mapsz;
168 168
     int pgsz = cli_getpagesize();
169 169
     STATBUF st;
170 170
     fmap_t *m = NULL;
... ...
@@ -346,7 +346,7 @@ extern cl_fmap_t *cl_fmap_open_handle(void *handle, size_t offset, size_t len,
346 346
                                       clcb_pread pread_cb, int use_aging)
347 347
 {
348 348
     cl_error_t status = CL_EMEM;
349
-    unsigned int pages;
349
+    uint64_t pages;
350 350
     size_t mapsz, bitmap_size;
351 351
     cl_fmap_t *m = NULL;
352 352
     int pgsz     = cli_getpagesize();
... ...
@@ -368,7 +368,7 @@ extern cl_fmap_t *cl_fmap_open_handle(void *handle, size_t offset, size_t len,
368 368
 
369 369
     pages = fmap_align_items(len, pgsz);
370 370
 
371
-    bitmap_size = pages * sizeof(uint32_t);
371
+    bitmap_size = pages * sizeof(uint64_t);
372 372
     mapsz       = pages * pgsz;
373 373
 
374 374
     m = cli_calloc(1, sizeof(fmap_t));
... ...
@@ -450,10 +450,10 @@ static void fmap_aging(fmap_t *m)
450 450
 #ifdef ANONYMOUS_MAP
451 451
     if (!m->aging) return;
452 452
     if (m->paged * m->pgsz > UNPAGE_THRSHLD_HI) { /* we alloc'd too much */
453
-        unsigned int i, avail = 0, freeme[2048], maxavail = MIN(sizeof(freeme) / sizeof(*freeme), m->paged - UNPAGE_THRSHLD_LO / m->pgsz) - 1;
453
+        uint64_t i, avail = 0, freeme[2048], maxavail = MIN(sizeof(freeme) / sizeof(*freeme), m->paged - UNPAGE_THRSHLD_LO / m->pgsz) - 1;
454 454
 
455 455
         for (i = 0; i < m->pages; i++) {
456
-            uint32_t s = fmap_bitmap[i];
456
+            uint64_t s = fmap_bitmap[i];
457 457
             if ((s & (FM_MASK_PAGED | FM_MASK_LOCKED)) == FM_MASK_PAGED) {
458 458
                 /* page is paged and not locked: dec age */
459 459
                 if (s & FM_MASK_COUNT) fmap_bitmap[i]--;
... ...
@@ -464,7 +464,7 @@ static void fmap_aging(fmap_t *m)
464 464
                     avail++;
465 465
                 } else {
466 466
                     /* Insert sort onto a stack'd array - same performance as quickselect */
467
-                    unsigned int insert_to = MIN(maxavail, avail) - 1, age = fmap_bitmap[i] & FM_MASK_COUNT;
467
+                    uint64_t insert_to = MIN(maxavail, avail) - 1, age = fmap_bitmap[i] & FM_MASK_COUNT;
468 468
                     if (avail <= maxavail || (fmap_bitmap[freeme[maxavail]] & FM_MASK_COUNT) > age) {
469 469
                         while ((fmap_bitmap[freeme[insert_to]] & FM_MASK_COUNT) > age) {
470 470
                             freeme[insert_to + 1] = freeme[insert_to];
... ...
@@ -513,15 +513,15 @@ static void fmap_aging(fmap_t *m)
513 513
 #endif
514 514
 }
515 515
 
516
-static int fmap_readpage(fmap_t *m, uint64_t first_page, uint32_t count, uint32_t lock_count)
516
+static int fmap_readpage(fmap_t *m, uint64_t first_page, uint64_t count, uint64_t lock_count)
517 517
 {
518 518
     size_t readsz = 0, eintr_off;
519 519
     char *pptr    = NULL, errtxt[256];
520
-    uint32_t sbitmap;
520
+    uint64_t sbitmap;
521 521
     uint64_t i, page = first_page, force_read = 0;
522 522
 
523
-    if ((size_t)(m->real_len) > (size_t)(UINT_MAX)) {
524
-        cli_dbgmsg("fmap_readage: size of file exceeds total prefaultible page size (unpacked file is too large)\n");
523
+    if ((uint64_t)(m->real_len) > (uint64_t)(m->pages * m->pgsz)) {
524
+        cli_dbgmsg("fmap_readpage: size of file exceeds total prefaultible page size (unpacked file is too large)\n");
525 525
         return 1;
526 526
     }
527 527
 
... ...
@@ -573,7 +573,7 @@ static int fmap_readpage(fmap_t *m, uint64_t first_page, uint32_t count, uint32_
573 573
         if (force_read) {
574 574
             /* we have some pending reads to perform */
575 575
             if (m->handle_is_fd) {
576
-                unsigned int j;
576
+                uint64_t j;
577 577
                 int _fd = (int)(ptrdiff_t)m->handle;
578 578
                 for (j = first_page; j < page; j++) {
579 579
                     if (fmap_bitmap[j] & FM_MASK_SEEN) {
... ...
@@ -613,7 +613,7 @@ static int fmap_readpage(fmap_t *m, uint64_t first_page, uint32_t count, uint32_
613 613
                     cli_strerror(errno, errtxt, sizeof(errtxt));
614 614
                     cli_errmsg("fmap_readpage: pread error: %s\n", errtxt);
615 615
                 } else {
616
-                    cli_warnmsg("fmap_readpage: pread fail: asked for %lu bytes @ offset %lu, got %lu\n", (long unsigned int)readsz, (long unsigned int)target_offset, (long unsigned int)got);
616
+                    cli_warnmsg("fmap_readpage: pread fail: asked for %zu bytes @ offset %zu, got %zd\n", readsz, (size_t)target_offset, got);
617 617
                 }
618 618
                 return 1;
619 619
             }
... ...
@@ -672,9 +672,9 @@ static const void *handle_need(fmap_t *m, size_t at, size_t len, int lock)
672 672
     return (void *)ret;
673 673
 }
674 674
 
675
-static void fmap_unneed_page(fmap_t *m, unsigned int page)
675
+static void fmap_unneed_page(fmap_t *m, uint64_t page)
676 676
 {
677
-    uint32_t s = fmap_bitmap[page];
677
+    uint64_t s = fmap_bitmap[page];
678 678
 
679 679
     if ((s & (FM_MASK_PAGED | FM_MASK_LOCKED)) == (FM_MASK_PAGED | FM_MASK_LOCKED)) {
680 680
         /* page is paged and locked: check lock count */
... ...
@@ -693,7 +693,7 @@ static void fmap_unneed_page(fmap_t *m, unsigned int page)
693 693
 
694 694
 static void handle_unneed_off(fmap_t *m, size_t at, size_t len)
695 695
 {
696
-    unsigned int i, first_page, last_page;
696
+    uint64_t i, first_page, last_page;
697 697
     if (!m->aging) return;
698 698
     if (!len) {
699 699
         cli_warnmsg("fmap_unneed: attempted void unneed\n");
... ...
@@ -737,7 +737,7 @@ static void unmap_malloc(fmap_t *m)
737 737
 
738 738
 static const void *handle_need_offstr(fmap_t *m, size_t at, size_t len_hint)
739 739
 {
740
-    unsigned int i, first_page, last_page;
740
+    uint64_t i, first_page, last_page;
741 741
     void *ptr = (void *)((char *)m->data + at);
742 742
 
743 743
     if (!len_hint || len_hint > m->real_len - at)
... ...
@@ -753,7 +753,7 @@ static const void *handle_need_offstr(fmap_t *m, size_t at, size_t len_hint)
753 753
 
754 754
     for (i = first_page; i <= last_page; i++) {
755 755
         char *thispage = (char *)m->data + i * m->pgsz;
756
-        unsigned int scanat, scansz;
756
+        uint64_t scanat, scansz;
757 757
 
758 758
         if (fmap_readpage(m, i, 1, 1)) {
759 759
             last_page = i - 1;
... ...
@@ -777,7 +777,7 @@ static const void *handle_need_offstr(fmap_t *m, size_t at, size_t len_hint)
777 777
 
778 778
 static const void *handle_gets(fmap_t *m, char *dst, size_t *at, size_t max_len)
779 779
 {
780
-    unsigned int i, first_page, last_page;
780
+    uint64_t i, first_page, last_page;
781 781
     char *src     = (void *)((char *)m->data + *at);
782 782
     char *endptr  = NULL;
783 783
     size_t len    = MIN(max_len - 1, m->real_len - *at);
... ...
@@ -793,7 +793,7 @@ static const void *handle_gets(fmap_t *m, char *dst, size_t *at, size_t max_len)
793 793
 
794 794
     for (i = first_page; i <= last_page; i++) {
795 795
         char *thispage = (char *)m->data + i * m->pgsz;
796
-        unsigned int scanat, scansz;
796
+        uint64_t scanat, scansz;
797 797
 
798 798
         if (fmap_readpage(m, i, 1, 0))
799 799
             return NULL;
... ...
@@ -941,17 +941,17 @@ fmap_t *fmap(int fd, off_t offset, size_t len, const char *name)
941 941
     return fmap_check_empty(fd, offset, len, &unused, name);
942 942
 }
943 943
 
944
-static inline unsigned int fmap_align_items(unsigned int sz, unsigned int al)
944
+static inline uint64_t fmap_align_items(uint64_t sz, uint64_t al)
945 945
 {
946 946
     return sz / al + (sz % al != 0);
947 947
 }
948 948
 
949
-static inline unsigned int fmap_align_to(unsigned int sz, unsigned int al)
949
+static inline uint64_t fmap_align_to(uint64_t sz, uint64_t al)
950 950
 {
951 951
     return al * fmap_align_items(sz, al);
952 952
 }
953 953
 
954
-static inline unsigned int fmap_which_page(fmap_t *m, size_t at)
954
+static inline uint64_t fmap_which_page(fmap_t *m, size_t at)
955 955
 {
956 956
     return at / m->pgsz;
957 957
 }
... ...
@@ -984,8 +984,8 @@ cl_error_t fmap_dump_to_file(fmap_t *map, const char *filepath, const char *tmpd
984 984
         } else if ((start_offset != 0) && (end_offset != map->real_len)) {
985 985
             /* If we're only dumping a portion of the file, inlcude the offsets in the prefix,...
986 986
 			 * e.g. tmp filename will become something like:  filebase.500-1200.<randhex> */
987
-            uint32_t prefix_len = strlen(filebase) + 1 + SIZE_T_CHARLEN + 1 + SIZE_T_CHARLEN + 1;
988
-            prefix              = malloc(prefix_len);
987
+            size_t prefix_len = strlen(filebase) + 1 + SIZE_T_CHARLEN + 1 + SIZE_T_CHARLEN + 1;
988
+            prefix            = malloc(prefix_len);
989 989
             if (NULL == prefix) {
990 990
                 cli_errmsg("fmap_dump_to_file: Failed to allocate memory for tempfile prefix.\n");
991 991
                 if (NULL != filebase)
... ...
@@ -1086,7 +1086,7 @@ cl_error_t fmap_get_MD5(unsigned char *hash, fmap_t *map)
1086 1086
 
1087 1087
     while (todo) {
1088 1088
         const void *buf;
1089
-        size_t readme = todo < FILEBUFF ? todo : FILEBUFF;
1089
+        size_t readme = todo < 1024 * 1024 * 10 ? todo : 1024 * 1024 * 10;
1090 1090
 
1091 1091
         if (!(buf = fmap_need_off_once(map, at, readme))) {
1092 1092
             cl_hash_destroy(hashctx);
... ...
@@ -46,12 +46,12 @@ struct cl_fmap {
46 46
 
47 47
     /* internal */
48 48
     time_t mtime;
49
-    unsigned int pages;
49
+    uint64_t pages;
50 50
     uint64_t pgsz;
51
-    unsigned int paged;
52
-    unsigned short aging;
53
-    unsigned short dont_cache_flag;
54
-    unsigned short handle_is_fd;
51
+    uint64_t paged;
52
+    uint16_t aging;
53
+    uint16_t dont_cache_flag;
54
+    uint16_t handle_is_fd;
55 55
 
56 56
     /* memory interface */
57 57
     const void *data;
... ...
@@ -81,7 +81,7 @@ struct cl_fmap {
81 81
     HANDLE mh;
82 82
 #endif
83 83
     unsigned char maphash[16];
84
-    uint32_t *bitmap;
84
+    uint64_t *bitmap;
85 85
     char *name;
86 86
 };
87 87
 
... ...
@@ -1506,7 +1506,8 @@ static cl_error_t vba_scandata(const unsigned char *data, size_t len, cli_ctx *c
1506 1506
     cli_ac_freedata(&tmdata);
1507 1507
     cli_ac_freedata(&gmdata);
1508 1508
 
1509
-    return (ret != CL_CLEAN) ? ret : viruses_found ? CL_VIRUS : CL_CLEAN;
1509
+    return (ret != CL_CLEAN) ? ret : viruses_found ? CL_VIRUS
1510
+                                                   : CL_CLEAN;
1510 1511
 }
1511 1512
 
1512 1513
 #define min(x, y) ((x) < (y) ? (x) : (y))
... ...
@@ -4746,7 +4747,12 @@ static cl_error_t scan_common(cl_fmap_t *map, const char *filepath, const char *
4746 4746
         return CL_ENULLARG;
4747 4747
     }
4748 4748
 
4749
-    /* We have a limit of around 2.17GB (INT_MAX - 2). Enforce it here. */
4749
+    /* We have a limit of around 2GB (INT_MAX - 2). Enforce it here. */
4750
+    /* TODO: Large file support is large-ly untested. Remove this restriction
4751
+     * and test with a large set of large files of various types. libclamav's
4752
+     * integer type safety has come a long way since 2014, so it's possible
4753
+     * we could lift this restriction, but at least one of the parsers is
4754
+     * bound to behave badly with large files. */
4750 4755
     if ((size_t)(map->real_len) > (size_t)(INT_MAX - 2))
4751 4756
         return CL_CLEAN;
4752 4757
 
... ...
@@ -5023,10 +5029,20 @@ cl_error_t cl_scandesc_callback(int desc, const char *filename, const char **vir
5023 5023
         goto done;
5024 5024
     }
5025 5025
     if (sb.st_size <= 5) {
5026
-        cli_dbgmsg("cl_scandesc_callback: File too small (%u bytes), ignoring\n", (unsigned int)sb.st_size);
5026
+        cli_dbgmsg("cl_scandesc_callback: File too small (" STDu64 " bytes), ignoring\n", (uint64_t)sb.st_size);
5027 5027
         status = CL_CLEAN;
5028 5028
         goto done;
5029 5029
     }
5030
+    if ((uint64_t)sb.st_size > engine->maxfilesize) {
5031
+        cli_dbgmsg("cl_scandesc_callback: File too large (" STDu64 " bytes), ignoring\n", (uint64_t)sb.st_size);
5032
+        if (scanoptions->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX) {
5033
+            engine->cb_virus_found(desc, "Heuristics.Limits.Exceeded", context);
5034
+            status = CL_VIRUS;
5035
+        } else {
5036
+            status = CL_CLEAN;
5037
+        }
5038
+        goto done;
5039
+    }
5030 5040
 
5031 5041
     if (NULL != filename) {
5032 5042
         (void)cli_basename(filename, strlen(filename), &filename_base);
... ...
@@ -5053,6 +5069,15 @@ done:
5053 5053
 
5054 5054
 cl_error_t cl_scanmap_callback(cl_fmap_t *map, const char *filename, const char **virname, unsigned long int *scanned, const struct cl_engine *engine, struct cl_scan_options *scanoptions, void *context)
5055 5055
 {
5056
+    if (map->real_len > engine->maxfilesize) {
5057
+        cli_dbgmsg("cl_scandesc_callback: File too large (%zu bytes), ignoring\n", map->real_len);
5058
+        if (scanoptions->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX) {
5059
+            engine->cb_virus_found(fmap_fd(map), "Heuristics.Limits.Exceeded", context);
5060
+            return CL_VIRUS;
5061
+        }
5062
+        return CL_CLEAN;
5063
+    }
5064
+
5056 5065
     return scan_common(map, filename, virname, scanned, engine, scanoptions, context);
5057 5066
 }
5058 5067
 
... ...
@@ -497,6 +497,8 @@ TCPAddr 127.0.0.1
497 497
 # Value of 0 disables the limit.
498 498
 # Note: disabling this limit or setting it too high may result in severe damage
499 499
 # to the system.
500
+# Technical design limitations prevent ClamAV from scanning files greater than
501
+# 2 GB at this time.
500 502
 # Default: 25M
501 503
 #MaxFileSize 30M
502 504
 
... ...
@@ -653,4 +655,3 @@ TCPAddr 127.0.0.1
653 653
 #
654 654
 # Default: 5000
655 655
 # BytecodeTimeout 1000
656
-