Browse code

Modifies zip scanning behavior so it scans files using zip records from the catalogue which provides deduplication of file records resulting in faster extraction and scan time and reducing the likelihood of alerting on non-malicious duplicate file entries as overlapping files.

Micah Snyder authored on 2019/09/09 12:17:43
Showing 1 changed files
... ...
@@ -61,7 +61,7 @@
61 61
 #define ZIP_MAGIC_FILE_BEGIN_SPLIT_OR_SPANNED       (0x08074b50)
62 62
 // clang-format on
63 63
 
64
-#define ZIP_MAX_NUM_OVERLAPPING_FILES 100
64
+#define ZIP_MAX_NUM_OVERLAPPING_FILES 5
65 65
 
66 66
 #define ZIP_CRC32(r, c, b, l) \
67 67
     do {                      \
... ...
@@ -71,9 +71,13 @@
71 71
 
72 72
 #define ZIP_RECORDS_CHECK_BLOCKSIZE 100
73 73
 struct zip_record {
74
-    uint32_t local_offset;
74
+    uint32_t local_header_offset;
75 75
     uint32_t local_header_size;
76
-    uint32_t local_data_size;
76
+    uint32_t compressed_size;
77
+    uint32_t uncompressed_size;
78
+    uint16_t method;
79
+    uint16_t flags;
80
+    int encrypted;
77 81
 };
78 82
 
79 83
 static int wrap_inflateinit2(void *a, int b)
... ...
@@ -685,11 +689,6 @@ static unsigned int parse_local_file_header(
685 685
     zip += LOCAL_HEADER_elen;
686 686
     zsize -= LOCAL_HEADER_elen;
687 687
 
688
-    if (NULL != record) {
689
-        record->local_header_size = zip - local_header;
690
-        record->local_data_size   = csize;
691
-    }
692
-
693 688
     if (!csize) { /* FIXME: what's used for method0 files? csize or usize? Nothing in the specs, needs testing */
694 689
         cli_dbgmsg("cli_unzip: local header - skipping empty file\n");
695 690
     } else {
... ...
@@ -708,7 +707,16 @@ static unsigned int parse_local_file_header(
708 708
                 if (fmap_need_ptr_once(map, zip, csize))
709 709
                     *ret = unz(zip, csize, usize, LOCAL_HEADER_method, LOCAL_HEADER_flags, num_files_unzipped, ctx, tmpd, zcb);
710 710
             }
711
+        } else {
712
+            record->local_header_offset = loff;
713
+            record->local_header_size   = zip - local_header;
714
+            record->compressed_size     = csize;
715
+            record->uncompressed_size   = usize;
716
+            record->method              = LOCAL_HEADER_method;
717
+            record->flags               = LOCAL_HEADER_flags;
718
+            record->encrypted           = (LOCAL_HEADER_flags & F_ENCR) ? 1 : 0;
711 719
         }
720
+
712 721
         zip += csize;
713 722
         zsize -= csize;
714 723
     }
... ...
@@ -751,9 +759,8 @@ static unsigned int parse_local_file_header(
751 751
  * @param[in,out] ctx                   scan context
752 752
  * @param tmpd                          temp directory path name
753 753
  * @param requests                      (optional) structure use to search the zip for files by name
754
- * @return unsigned int                 returns the size of the file header in the central directory, or 0 if no more files
755 754
  * @param record                        (optional) a pointer to a struct to store file record information.
756
- * @return unsigned int
755
+ * @return unsigned int                 returns the size of the file header in the central directory, or 0 if no more files
757 756
  */
758 757
 static unsigned int
759 758
 parse_central_directory_file_header(
... ...
@@ -818,9 +825,6 @@ parse_central_directory_file_header(
818 818
 
819 819
     if (!requests) {
820 820
         if (CENTRAL_HEADER_off < zsize - SIZEOF_LOCAL_HEADER) {
821
-            if (NULL != record) {
822
-                record->local_offset = CENTRAL_HEADER_off;
823
-            }
824 821
             parse_local_file_header(map,
825 822
                                     CENTRAL_HEADER_off,
826 823
                                     zsize - CENTRAL_HEADER_off,
... ...
@@ -876,46 +880,63 @@ static int sort_by_file_offset(const void *first, const void *second)
876 876
 
877 877
     /* Avoid return x - y, which can cause undefined behaviour
878 878
        because of signed integer overflow. */
879
-    if (a->local_offset < b->local_offset)
879
+    if (a->local_header_offset < b->local_header_offset)
880 880
         return -1;
881
-    else if (a->local_offset > b->local_offset)
881
+    else if (a->local_header_offset > b->local_header_offset)
882 882
         return 1;
883 883
 
884 884
     return 0;
885 885
 }
886 886
 
887 887
 /**
888
- * @brief Search the central directory for overlapping files.
888
+ * @brief Create a catalogue of the central directory.
889 889
  *
890
- * This function indexes every file in the central directory and sorts them by file entry offset.
890
+ * This function indexes every file in the central directory.
891
+ * It creates a zip record catalogue and sorts them by file entry offset.
891 892
  * Then it iterates the sorted file records looking for overlapping files.
892 893
  *
893
- * @param ctx
894
- * @param map
895
- * @param fsize
896
- * @param coff
894
+ * The caller is responsible for freeing the catalogue.
895
+ * The catalogue may contain duplicate items, which should be skipped.
896
+ *
897
+ * @param ctx               The scanning context
898
+ * @param map               The file map
899
+ * @param fsize             The file size
900
+ * @param coff              The central directory offset
901
+ * @param[out] catalogue    A catalogue of zip_records found in the central directory.
902
+ * @param[out] num_records  The number of records in the catalogue.
897 903
  * @return cl_error_t  CL_CLEAN if no overlapping files
898 904
  * @return cl_error_t  CL_VIRUS if overlapping files and heuristic alerts are enabled
899 905
  * @return cl_error_t  CL_EFORMAT if overlapping files and heuristic alerts are disabled
900 906
  * @return cl_error_t  CL_ETIMEOUT if the scan time limit is exceeded.
901 907
  * @return cl_error_t  CL_EMEM for memory allocation errors.
902 908
  */
903
-cl_error_t check_for_overlapping_files(cli_ctx *ctx,
904
-                                       fmap_t *map,
905
-                                       uint32_t fsize,
906
-                                       uint32_t coff)
909
+cl_error_t index_the_central_directory(
910
+    cli_ctx *ctx,
911
+    fmap_t *map,
912
+    uint32_t fsize,
913
+    uint32_t coff,
914
+    struct zip_record **catalogue,
915
+    size_t *num_records)
907 916
 {
908 917
     cl_error_t status = CL_CLEAN;
909 918
     cl_error_t ret    = CL_CLEAN;
910 919
 
920
+    size_t num_record_blocks = 0;
921
+    size_t index             = 0;
922
+
911 923
     struct zip_record *zip_catalogue = NULL;
912
-    size_t num_record_blocks         = 0;
913
-    size_t index                     = 0;
914 924
     size_t records_count             = 0;
925
+    struct zip_record *curr_record   = NULL;
926
+    struct zip_record *prev_record   = NULL;
927
+    uint32_t num_overlapping_files   = 0;
915 928
 
916
-    struct zip_record *curr_record = NULL;
917
-    struct zip_record *prev_record = NULL;
918
-    uint32_t nOverlappingFiles     = 0;
929
+    if (NULL == catalogue || NULL == num_records) {
930
+        cli_errmsg("index_the_central_directory: Invalid NULL arguments\n");
931
+        goto done;
932
+    }
933
+
934
+    *catalogue   = NULL;
935
+    *num_records = 0;
919 936
 
920 937
     zip_catalogue = (struct zip_record *)cli_malloc(sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE);
921 938
     if (NULL == zip_catalogue) {
... ...
@@ -936,7 +957,7 @@ cl_error_t check_for_overlapping_files(cli_ctx *ctx,
936 936
                                                             ctx,
937 937
                                                             NULL, // tmpd not required
938 938
                                                             NULL,
939
-                                                            &zip_catalogue[records_count]))) {
939
+                                                            &(zip_catalogue[records_count])))) {
940 940
         index++;
941 941
 
942 942
         if (cli_checktimelimit(ctx) != CL_SUCCESS) {
... ...
@@ -970,64 +991,75 @@ cl_error_t check_for_overlapping_files(cli_ctx *ctx,
970 970
             }
971 971
             num_record_blocks++;
972 972
             /* zero out the memory for the new records */
973
-            memset(&zip_catalogue[records_count], 0, sizeof(struct zip_record) * (ZIP_RECORDS_CHECK_BLOCKSIZE * num_record_blocks - records_count));
973
+            memset(&(zip_catalogue[records_count]), 0, sizeof(struct zip_record) * (ZIP_RECORDS_CHECK_BLOCKSIZE * num_record_blocks - records_count));
974 974
         }
975 975
     }
976 976
 
977
-    if (records_count < 2) {
978
-        goto done;
979
-    }
980
-
981
-    /*
982
-     * Sort the records by local file offset
983
-     */
984
-    cli_qsort(zip_catalogue, records_count, sizeof(struct zip_record), sort_by_file_offset);
985
-
986
-    /*
987
-     * Detect overlapping files.
988
-     */
989
-    for (index = 1; index < records_count; index++) {
990
-        prev_record = &zip_catalogue[index - 1];
991
-        curr_record = &zip_catalogue[index];
992
-
993
-        /* Check for integer overflow in 32bit size & offset values */
994
-        if ((UINT32_MAX - (prev_record->local_header_size + prev_record->local_data_size) < prev_record->local_offset) ||
995
-            (UINT32_MAX - (curr_record->local_header_size + curr_record->local_data_size) < curr_record->local_offset)) {
996
-            cli_dbgmsg("cli_unzip: Integer overflow detected; invalid data sizes in zip file headers.\n");
997
-            status = CL_EFORMAT;
998
-            goto done;
999
-        }
1000
-
1001
-        if (((curr_record->local_offset >= prev_record->local_offset) && (curr_record->local_offset < prev_record->local_offset + prev_record->local_header_size + prev_record->local_data_size)) ||
1002
-            ((prev_record->local_offset >= curr_record->local_offset) && (prev_record->local_offset < curr_record->local_offset + curr_record->local_header_size + curr_record->local_data_size))) {
1003
-            /* Overlapping file detected */
1004
-            nOverlappingFiles++;
977
+    if (records_count > 1) {
978
+        /*
979
+         * Sort the records by local file offset
980
+         */
981
+        cli_qsort(zip_catalogue, records_count, sizeof(struct zip_record), sort_by_file_offset);
982
+
983
+        /*
984
+         * Detect overlapping files.
985
+         */
986
+        for (index = 1; index < records_count; index++) {
987
+            prev_record = &(zip_catalogue[index - 1]);
988
+            curr_record = &(zip_catalogue[index]);
989
+
990
+            /* Check for integer overflow in 32bit size & offset values */
991
+            if ((UINT32_MAX - (prev_record->local_header_size + prev_record->compressed_size) < prev_record->local_header_offset) ||
992
+                (UINT32_MAX - (curr_record->local_header_size + curr_record->compressed_size) < curr_record->local_header_offset)) {
993
+                cli_dbgmsg("cli_unzip: Integer overflow detected; invalid data sizes in zip file headers.\n");
994
+                status = CL_EFORMAT;
995
+                goto done;
996
+            }
1005 997
 
1006
-            cli_dbgmsg("cli_unzip: Overlapping files detected.\n");
1007
-            cli_dbgmsg("    previous file end:  %u\n", prev_record->local_offset + prev_record->local_header_size + prev_record->local_data_size);
1008
-            cli_dbgmsg("    current file start: %u\n", curr_record->local_offset);
998
+            if (((curr_record->local_header_offset >= prev_record->local_header_offset) && (curr_record->local_header_offset < prev_record->local_header_offset + prev_record->local_header_size + prev_record->compressed_size)) ||
999
+                ((prev_record->local_header_offset >= curr_record->local_header_offset) && (prev_record->local_header_offset < curr_record->local_header_offset + curr_record->local_header_size + curr_record->compressed_size))) {
1000
+                /* Overlapping file detected */
1001
+                num_overlapping_files++;
1009 1002
 
1010
-            if (ZIP_MAX_NUM_OVERLAPPING_FILES < nOverlappingFiles) {
1011
-                if (SCAN_HEURISTICS) {
1012
-                    status = cli_append_virus(ctx, "Heuristics.Zip.OverlappingFiles");
1003
+                if ((curr_record->local_header_offset == prev_record->local_header_offset) &&
1004
+                    (curr_record->local_header_size == prev_record->local_header_size) &&
1005
+                    (curr_record->compressed_size == prev_record->compressed_size)) {
1006
+                    cli_dbgmsg("cli_unzip: Ignoring duplicate file entry @ 0x%x.\n", curr_record->local_header_offset);
1013 1007
                 } else {
1014
-                    status = CL_EFORMAT;
1008
+                    cli_dbgmsg("cli_unzip: Overlapping files detected.\n");
1009
+                    cli_dbgmsg("    previous file end:  %u\n", prev_record->local_header_offset + prev_record->local_header_size + prev_record->compressed_size);
1010
+                    cli_dbgmsg("    current file start: %u\n", curr_record->local_header_offset);
1011
+
1012
+                    if (ZIP_MAX_NUM_OVERLAPPING_FILES < num_overlapping_files) {
1013
+                        if (SCAN_HEURISTICS) {
1014
+                            status = cli_append_virus(ctx, "Heuristics.Zip.OverlappingFiles");
1015
+                        } else {
1016
+                            status = CL_EFORMAT;
1017
+                        }
1018
+                        goto done;
1019
+                    }
1015 1020
                 }
1016
-                break;
1017 1021
             }
1018
-        }
1019 1022
 
1020
-        if (cli_checktimelimit(ctx) != CL_SUCCESS) {
1021
-            cli_dbgmsg("cli_unzip: Time limit reached (max: %u)\n", ctx->engine->maxscantime);
1022
-            status = CL_ETIMEOUT;
1023
-            goto done;
1023
+            if (cli_checktimelimit(ctx) != CL_SUCCESS) {
1024
+                cli_dbgmsg("cli_unzip: Time limit reached (max: %u)\n", ctx->engine->maxscantime);
1025
+                status = CL_ETIMEOUT;
1026
+                goto done;
1027
+            }
1024 1028
         }
1025 1029
     }
1026 1030
 
1031
+    *catalogue   = zip_catalogue;
1032
+    *num_records = records_count;
1033
+    status       = CL_SUCCESS;
1034
+
1027 1035
 done:
1028 1036
 
1029
-    if (NULL != zip_catalogue) {
1030
-        free(zip_catalogue);
1037
+    if (CL_SUCCESS != status) {
1038
+        if (NULL != zip_catalogue) {
1039
+            free(zip_catalogue);
1040
+            zip_catalogue = NULL;
1041
+        }
1031 1042
     }
1032 1043
 
1033 1044
     return status;
... ...
@@ -1045,6 +1077,9 @@ cl_error_t cli_unzip(cli_ctx *ctx)
1045 1045
 #if HAVE_JSON
1046 1046
     int toval = 0;
1047 1047
 #endif
1048
+    struct zip_record *zip_catalogue = NULL;
1049
+    size_t records_count             = 0;
1050
+    size_t i;
1048 1051
 
1049 1052
     cli_dbgmsg("in cli_unzip\n");
1050 1053
     fsize = (uint32_t)map->len;
... ...
@@ -1082,21 +1117,63 @@ cl_error_t cli_unzip(cli_ctx *ctx)
1082 1082
     if (coff) {
1083 1083
         cli_dbgmsg("cli_unzip: central directory header offset: @%x\n", coff);
1084 1084
 
1085
-        ret = check_for_overlapping_files(ctx, map, fsize, coff);
1085
+        /*
1086
+         * Index the central directory first.
1087
+         */
1088
+        ret = index_the_central_directory(
1089
+            ctx,
1090
+            map,
1091
+            fsize,
1092
+            coff,
1093
+            &zip_catalogue,
1094
+            &records_count);
1086 1095
         if (CL_SUCCESS != ret) {
1087 1096
             goto done;
1088 1097
         }
1089 1098
 
1090
-        while ((coff = parse_central_directory_file_header(map,
1091
-                                                           coff,
1092
-                                                           fsize,
1093
-                                                           &num_files_unzipped,
1094
-                                                           file_count + 1,
1095
-                                                           &ret,
1096
-                                                           ctx,
1097
-                                                           tmpd,
1098
-                                                           NULL,
1099
-                                                           NULL))) {
1099
+        /*
1100
+         * Then decrypt/unzip & scan each unique file entry.
1101
+         */
1102
+        for (i = 0; i < records_count; i++) {
1103
+            const uint8_t *compressed_data = NULL;
1104
+
1105
+            if ((i > 0) &&
1106
+                (zip_catalogue[i].local_header_offset == zip_catalogue[i - 1].local_header_offset) &&
1107
+                (zip_catalogue[i].local_header_size == zip_catalogue[i - 1].local_header_size) &&
1108
+                (zip_catalogue[i].compressed_size == zip_catalogue[i - 1].compressed_size)) {
1109
+
1110
+                /* Duplicate file entry, skip. */
1111
+                cli_dbgmsg("cli_unzip: Skipping unzipping of duplicate file entry: @ 0x%x\n", zip_catalogue[i].local_header_offset);
1112
+                continue;
1113
+            }
1114
+
1115
+            compressed_data = fmap_need_off(map, zip_catalogue[i].local_header_offset + zip_catalogue[i].local_header_size, SIZEOF_LOCAL_HEADER);
1116
+
1117
+            if (zip_catalogue[i].encrypted) {
1118
+                if (fmap_need_ptr_once(map, compressed_data, zip_catalogue[i].compressed_size))
1119
+                    ret = zdecrypt(
1120
+                        compressed_data,
1121
+                        zip_catalogue[i].compressed_size,
1122
+                        zip_catalogue[i].uncompressed_size,
1123
+                        fmap_need_off(map, zip_catalogue[i].local_header_offset, SIZEOF_LOCAL_HEADER),
1124
+                        &num_files_unzipped,
1125
+                        ctx,
1126
+                        tmpd,
1127
+                        zip_scan_cb);
1128
+            } else {
1129
+                if (fmap_need_ptr_once(map, compressed_data, zip_catalogue[i].compressed_size))
1130
+                    ret = unz(
1131
+                        compressed_data,
1132
+                        zip_catalogue[i].compressed_size,
1133
+                        zip_catalogue[i].uncompressed_size,
1134
+                        zip_catalogue[i].method,
1135
+                        zip_catalogue[i].flags,
1136
+                        &num_files_unzipped,
1137
+                        ctx,
1138
+                        tmpd,
1139
+                        zip_scan_cb);
1140
+            }
1141
+
1100 1142
             file_count++;
1101 1143
             if (ctx->engine->maxfiles && num_files_unzipped >= ctx->engine->maxfiles) {
1102 1144
                 cli_dbgmsg("cli_unzip: Files limit reached (max: %u)\n", ctx->engine->maxfiles);
... ...
@@ -1164,6 +1241,12 @@ cl_error_t cli_unzip(cli_ctx *ctx)
1164 1164
     }
1165 1165
 
1166 1166
 done:
1167
+
1168
+    if (NULL != zip_catalogue) {
1169
+        free(zip_catalogue);
1170
+        zip_catalogue = NULL;
1171
+    }
1172
+
1167 1173
     if (NULL != tmpd) {
1168 1174
         if (!ctx->engine->keeptmp) {
1169 1175
             cli_rmdirs(tmpd);