Browse code

Fix invalid zip & macho scan recursion

If zip content is detected within a file by way of the embedded file
type recognition scan (in `scanraw()`), a raw scan of that "ZIPSFX" will
detect all subsequent zip entries as new ZIPSFX's. Though they aren't
actually scanned later, it shows up in the metadata JSON. This commit
prevents embedded file type detection for ZIPSFX like we already have
for ZIP.

Semi-related, the mach-o unibin parser presently allows scanning of FAT
partitions anywhere in the fmap, to include the very beginning of the
fmap. This would be an infinite loop, scanning the same file over and
over again, were it not for the scan recursion limit. With the recursion
limit, it's ok, but still bad behavior. This commit prevents scanning
FAT files from the mach-o unibin parser where the offset is less than
the end of the headers.

Also fixed an unsigned integer comparison in the OLE2 parser that
might overflow.

Micah Snyder (micasnyd) authored on 2021/06/19 03:23:46
Showing 3 changed files
... ...
@@ -562,6 +562,15 @@ int cli_scanmacho_unibin(cli_ctx *ctx)
562 562
         cli_dbgmsg("UNIBIN: Binary %u of %u\n", i + 1, fat_header.nfats);
563 563
         cli_dbgmsg("UNIBIN: File offset: %u\n", fat_arch.offset);
564 564
         cli_dbgmsg("UNIBIN: File size: %u\n", fat_arch.size);
565
+
566
+        /* The offset must be greater than the location of the header or we risk
567
+           re-scanning the same data over and over again. The scan recursion max
568
+           will save us, but it will still cause other problems and waste CPU. */
569
+        if (fat_arch.offset < at) {
570
+            cli_dbgmsg("Invalid fat offset: %d\n", fat_arch.offset);
571
+            RETURN_BROKEN;
572
+        }
573
+
565 574
         ret = cli_magic_scan_nested_fmap_type(map, fat_arch.offset, fat_arch.size, ctx, CL_TYPE_ANY, NULL);
566 575
         if (ret == CL_VIRUS)
567 576
             break;
... ...
@@ -673,7 +673,7 @@ ole2_walk_property_tree(ole2_header_t *hdr, const char *dir, int32_t prop_index,
673 673
                 break;
674 674
             case 2: /* File */
675 675
                 ole2_listmsg("file node\n");
676
-                if (ctx && ctx->engine->maxfiles && ctx->scannedfiles + *file_count > ctx->engine->maxfiles) {
676
+                if (ctx && ctx->engine->maxfiles && ((*file_count > ctx->engine->maxfiles) || (ctx->scannedfiles > ctx->engine->maxfiles - *file_count))) {
677 677
                     cli_dbgmsg("OLE2: files limit reached (max: %u)\n", ctx->engine->maxfiles);
678 678
                     ole2_list_delete(&node_list);
679 679
                     return CL_EMAXFILES;
... ...
@@ -3123,6 +3123,7 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
3123 3123
         (type != CL_TYPE_GPT) &&       /* Omit GPT files because it's an image format that we can extract and scan manually. */
3124 3124
         (type != CL_TYPE_CPIO_OLD) &&  /* Omit CPIO_OLD files because it's an image format that we can extract and scan manually. */
3125 3125
         (type != CL_TYPE_ZIP) &&       /* Omit ZIP files because it'll detect each zip file entry as SFXZIP, which is a waste. We'll extract it and then scan. */
3126
+        (type != CL_TYPE_ZIPSFX) &&    /* Omit ZIPSFX files because we should've already detected each entry with embedded file type recognition already! */
3126 3127
         (type != CL_TYPE_OLD_TAR) &&   /* Omit OLD TAR files because it's a raw archive format that we can extract and scan manually. */
3127 3128
         (type != CL_TYPE_POSIX_TAR)) { /* Omit POSIX TAR files because it's a raw archive format that we can extract and scan manually. */
3128 3129
         /*