Browse code

Added the zip scanning improvements found in v0.102.0 where it scans files using zip records from a sorted catalogue which provides deduplication of file records resulting in faster extraction and scan time and reducing the likelihood of alerting on non-malicious duplicate file entries as overlapping files.

Micah Snyder authored on 2019/10/29 23:20:24
Showing 4 changed files
... ...
@@ -7,6 +7,12 @@ Note: This file refers to the source tarball. Things described here may differ
7 7
 
8 8
 ClamAV 0.101.5 is a security patch release that addresses the following issues.
9 9
 
10
+- Added the zip scanning improvements found in v0.102.0 where it scans files
11
+  using zip records from a sorted catalogue which provides deduplication of
12
+  file records resulting in faster extraction and scan time and reducing the
13
+  likelihood of alerting on non-malicious duplicate file entries as overlapping
14
+  files.
15
+
10 16
 - Signature load time is significantly reduced by changing to a more efficient
11 17
   algorithm for loading signature patterns and allocating the AC trie.
12 18
   Patch courtesy of Alberto Wu.
... ...
@@ -216,7 +216,7 @@ cli_file_t cli_filetype(const unsigned char *buf, size_t buflen, const struct cl
216 216
 
217 217
 int is_tar(const unsigned char *buf, unsigned int nbytes);
218 218
 
219
-/* organize by length, cannot exceed SIZEOF_LH */
219
+/* organize by length, cannot exceed SIZEOF_LOCAL_HEADER */
220 220
 const struct ooxml_ftcodes {
221 221
     const char *entry;
222 222
     size_t len;
... ...
@@ -319,7 +319,7 @@ cli_file_t cli_filetype2(fmap_t *map, const struct cl_engine *engine, cli_file_t
319 319
 		    cli_dbgmsg("Recognized POSIX tar file\n");
320 320
 		    return CL_TYPE_POSIX_TAR;
321 321
 	    }
322
-	} else if (ret == CL_TYPE_ZIP && bread > 2*(SIZEOF_LH+5)) {
322
+	} else if (ret == CL_TYPE_ZIP && bread > 2*(SIZEOF_LOCAL_HEADER+5)) {
323 323
             const char lhdr_magic[4] = {0x50,0x4b,0x03,0x04};
324 324
             const unsigned char *zbuff = buff;
325 325
             uint32_t zread = bread;
... ...
@@ -329,11 +329,11 @@ cli_file_t cli_filetype2(fmap_t *map, const struct cl_engine *engine, cli_file_t
329 329
             int lhc = 0;
330 330
             int zi, i, likely_ooxml = 0;
331 331
             cli_file_t ret2;
332
-            
332
+
333 333
             for (zi=0; zi<32; zi++) {
334 334
                 znamep = (const unsigned char *)cli_memstr((const char *)znamep, zlen, lhdr_magic, 4);
335 335
                 if (NULL != znamep) {
336
-                    znamep += SIZEOF_LH;
336
+                    znamep += SIZEOF_LOCAL_HEADER;
337 337
                     zlen = zread - (znamep - zbuff);
338 338
                     if (zlen > OOXML_DETECT_MAXLEN) {
339 339
                         for (i = 0; ooxml_detect[i].entry; i++) {
... ...
@@ -367,8 +367,8 @@ cli_file_t cli_filetype2(fmap_t *map, const struct cl_engine *engine, cli_file_t
367 367
                 }
368 368
 
369 369
                 if (znamep == NULL) {
370
-                    if (map->len-zoff > SIZEOF_LH) {
371
-                        zoff -= SIZEOF_LH+OOXML_DETECT_MAXLEN+1; /* remap for SIZEOF_LH+filelen for header overlap map boundary */ 
370
+                    if (map->len-zoff > SIZEOF_LOCAL_HEADER) {
371
+                        zoff -= SIZEOF_LOCAL_HEADER+OOXML_DETECT_MAXLEN+1; /* remap for SIZEOF_LOCAL_HEADER+filelen for header overlap map boundary */
372 372
                         zread = MIN(MAGIC_BUFFER_SIZE, map->len-zoff);
373 373
                         zbuff = fmap_need_off_once(map, zoff, zread);
374 374
                         if (zbuff == NULL) {
... ...
@@ -440,7 +440,7 @@ cli_file_t cli_filetype2(fmap_t *map, const struct cl_engine *engine, cli_file_t
440 440
 		    if((encoding = encoding_detect_bom(buff, bread))) {
441 441
 			    unsigned char decodedbuff[(MAGIC_BUFFER_SIZE+1)*2];
442 442
 			    m_area_t in_area, out_area;
443
-			    
443
+
444 444
 			    memset(decodedbuff, 0, sizeof(decodedbuff));
445 445
 
446 446
 			    in_area.buffer = (unsigned char *) buff;
... ...
@@ -450,7 +450,7 @@ cli_file_t cli_filetype2(fmap_t *map, const struct cl_engine *engine, cli_file_t
450 450
 			    out_area.length = sizeof(decodedbuff);
451 451
 			    out_area.offset = 0;
452 452
 
453
-			    /* in htmlnorm we simply skip over \0 chars, allowing HTML parsing in any unicode 
453
+			    /* in htmlnorm we simply skip over \0 chars, allowing HTML parsing in any unicode
454 454
 			     * (multibyte characters will not be exactly handled, but that is not a problem).
455 455
 			     * However when detecting whether a file is HTML or not, we need exact conversion.
456 456
 			     * (just eliminating zeros and matching would introduce false positives */
... ...
@@ -54,6 +54,13 @@
54 54
 #define UNZIP_PRIVATE
55 55
 #include "unzip.h"
56 56
 
57
+// clang-format off
58
+#define ZIP_MAGIC_CENTRAL_DIRECTORY_RECORD_BEGIN    (0x02014b50)
59
+#define ZIP_MAGIC_CENTRAL_DIRECTORY_RECORD_END      (0x06054b50)
60
+#define ZIP_MAGIC_LOCAL_FILE_HEADER                 (0x04034b50)
61
+#define ZIP_MAGIC_FILE_BEGIN_SPLIT_OR_SPANNED       (0x08074b50)
62
+// clang-format on
63
+
57 64
 #define ZIP_MAX_NUM_OVERLAPPING_FILES 5
58 65
 
59 66
 #define ZIP_CRC32(r, c, b, l) \
... ...
@@ -62,25 +69,60 @@
62 62
         r = ~r;               \
63 63
     } while (0)
64 64
 
65
+#define ZIP_RECORDS_CHECK_BLOCKSIZE 100
66
+struct zip_record {
67
+    uint32_t local_header_offset;
68
+    uint32_t local_header_size;
69
+    uint32_t compressed_size;
70
+    uint32_t uncompressed_size;
71
+    uint16_t method;
72
+    uint16_t flags;
73
+    int encrypted;
74
+};
75
+
65 76
 static int wrap_inflateinit2(void *a, int b)
66 77
 {
67 78
     return inflateInit2(a, b);
68 79
 }
69 80
 
70
-static int unz(const uint8_t *src, uint32_t csize, uint32_t usize, uint16_t method, uint16_t flags, unsigned int *fu, cli_ctx *ctx, char *tmpd, zip_cb zcb)
81
+/**
82
+ * @brief uncompress file from zip
83
+ *
84
+ * @param src                           pointer to compressed data
85
+ * @param csize                         size of compressed data
86
+ * @param usize                         expected size of uncompressed data
87
+ * @param method                        compression method
88
+ * @param flags                         local header flags
89
+ * @param[in,out] num_files_unzipped    current number of files that have been unzipped
90
+ * @param[in,out] ctx                   scan context
91
+ * @param tmpd                          temp directory path name
92
+ * @param zcb                           callback function to invoke after extraction (default: scan)
93
+ * @return cl_error_t                   CL_EPARSE = could not apply a password
94
+ */
95
+static cl_error_t unz(
96
+    const uint8_t *src,
97
+    uint32_t csize,
98
+    uint32_t usize,
99
+    uint16_t method,
100
+    uint16_t flags,
101
+    unsigned int *num_files_unzipped,
102
+    cli_ctx *ctx,
103
+    char *tmpd,
104
+    zip_cb zcb)
71 105
 {
72 106
     char name[1024], obuf[BUFSIZ];
73 107
     char *tempfile = name;
74
-    int of, ret = CL_CLEAN;
75
-    unsigned int res = 1, written = 0;
108
+    int out_file, ret = CL_CLEAN;
109
+    int res              = 1;
110
+    unsigned int written = 0;
76 111
 
77 112
     if (tmpd) {
78
-        snprintf(name, sizeof(name), "%s" PATHSEP "zip.%03u", tmpd, *fu);
113
+        snprintf(name, sizeof(name), "%s" PATHSEP "zip.%03u", tmpd, *num_files_unzipped);
79 114
         name[sizeof(name) - 1] = '\0';
80 115
     } else {
81 116
         if (!(tempfile = cli_gentemp(ctx->engine->tmpdir))) return CL_EMEM;
82 117
     }
83
-    if ((of = open(tempfile, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) == -1) {
118
+    if ((out_file = open(tempfile, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) == -1) {
84 119
         cli_warnmsg("cli_unzip: failed to create temporary file %s\n", tempfile);
85 120
         if (!tmpd) free(tempfile);
86 121
         return CL_ETMPFILE;
... ...
@@ -88,11 +130,11 @@ static int unz(const uint8_t *src, uint32_t csize, uint32_t usize, uint16_t meth
88 88
     switch (method) {
89 89
         case ALG_STORED:
90 90
             if (csize < usize) {
91
-                unsigned int fake = *fu + 1;
91
+                unsigned int fake = *num_files_unzipped + 1;
92 92
                 cli_dbgmsg("cli_unzip: attempting to inflate stored file with inconsistent size\n");
93 93
                 if ((ret = unz(src, csize, usize, ALG_DEFLATE, 0, &fake, ctx, tmpd, zcb)) == CL_CLEAN) {
94
-                    (*fu)++;
95
-                    res = fake - (*fu);
94
+                    (*num_files_unzipped)++;
95
+                    res = fake - (*num_files_unzipped);
96 96
                 } else
97 97
                     break;
98 98
             }
... ...
@@ -101,7 +143,7 @@ static int unz(const uint8_t *src, uint32_t csize, uint32_t usize, uint16_t meth
101 101
                     cli_dbgmsg("cli_unzip: trimming output size to maxfilesize (%lu)\n", (long unsigned int)ctx->engine->maxfilesize);
102 102
                     csize = ctx->engine->maxfilesize;
103 103
                 }
104
-                if (cli_writen(of, src, csize) != (int)csize)
104
+                if (cli_writen(out_file, src, csize) != (int)csize)
105 105
                     ret = CL_EWRITE;
106 106
                 else
107 107
                     res = 0;
... ...
@@ -166,7 +208,7 @@ static int unz(const uint8_t *src, uint32_t csize, uint32_t usize, uint16_t meth
166 166
                         res = Z_STREAM_END;
167 167
                         break;
168 168
                     }
169
-                    if (cli_writen(of, obuf, sizeof(obuf) - (*avail_out)) != (int)(sizeof(obuf) - (*avail_out))) {
169
+                    if (cli_writen(out_file, obuf, sizeof(obuf) - (*avail_out)) != (int)(sizeof(obuf) - (*avail_out))) {
170 170
                         cli_warnmsg("cli_unzip: falied to write %lu inflated bytes\n", (unsigned long int)sizeof(obuf) - (*avail_out));
171 171
                         ret = CL_EWRITE;
172 172
                         res = 100;
... ...
@@ -209,7 +251,7 @@ static int unz(const uint8_t *src, uint32_t csize, uint32_t usize, uint16_t meth
209 209
                         res = BZ_STREAM_END;
210 210
                         break;
211 211
                     }
212
-                    if (cli_writen(of, obuf, sizeof(obuf) - strm.avail_out) != (int)(sizeof(obuf) - strm.avail_out)) {
212
+                    if (cli_writen(out_file, obuf, sizeof(obuf) - strm.avail_out) != (int)(sizeof(obuf) - strm.avail_out)) {
213 213
                         cli_warnmsg("cli_unzip: falied to write %lu bunzipped bytes\n", (long unsigned int)sizeof(obuf) - strm.avail_out);
214 214
                         ret = CL_EWRITE;
215 215
                         res = 100;
... ...
@@ -245,7 +287,7 @@ static int unz(const uint8_t *src, uint32_t csize, uint32_t usize, uint16_t meth
245 245
                         res = 0;
246 246
                         break;
247 247
                     }
248
-                    if (cli_writen(of, obuf, sizeof(obuf) - strm.avail_out) != (int)(sizeof(obuf) - strm.avail_out)) {
248
+                    if (cli_writen(out_file, obuf, sizeof(obuf) - strm.avail_out) != (int)(sizeof(obuf) - strm.avail_out)) {
249 249
                         cli_warnmsg("cli_unzip: falied to write %lu exploded bytes\n", (unsigned long int)sizeof(obuf) - strm.avail_out);
250 250
                         ret = CL_EWRITE;
251 251
                         res = 100;
... ...
@@ -290,24 +332,24 @@ static int unz(const uint8_t *src, uint32_t csize, uint32_t usize, uint16_t meth
290 290
     }
291 291
 
292 292
     if (!res) {
293
-        (*fu)++;
293
+        (*num_files_unzipped)++;
294 294
         cli_dbgmsg("cli_unzip: extracted to %s\n", tempfile);
295
-        if (lseek(of, 0, SEEK_SET) == -1) {
295
+        if (lseek(out_file, 0, SEEK_SET) == -1) {
296 296
             cli_dbgmsg("cli_unzip: call to lseek() failed\n");
297 297
             if (!(tmpd))
298 298
                 free(tempfile);
299
-            close(of);
299
+            close(out_file);
300 300
             return CL_ESEEK;
301 301
         }
302
-        ret = zcb(of, tempfile, ctx);
303
-        close(of);
302
+        ret = zcb(out_file, tempfile, ctx);
303
+        close(out_file);
304 304
         if (!ctx->engine->keeptmp)
305 305
             if (cli_unlink(tempfile)) ret = CL_EUNLINK;
306 306
         if (!tmpd) free(tempfile);
307 307
         return ret;
308 308
     }
309 309
 
310
-    close(of);
310
+    close(out_file);
311 311
     if (!ctx->engine->keeptmp)
312 312
         if (cli_unlink(tempfile)) ret = CL_EUNLINK;
313 313
     if (!tmpd) free(tempfile);
... ...
@@ -319,7 +361,6 @@ static int unz(const uint8_t *src, uint32_t csize, uint32_t usize, uint16_t meth
319 319
 static inline void zupdatekey(uint32_t key[3], unsigned char input)
320 320
 {
321 321
     unsigned char tmp[1];
322
-    unsigned long crctmp;
323 322
 
324 323
     tmp[0] = input;
325 324
     ZIP_CRC32(key[0], key[0], tmp, 1);
... ...
@@ -354,13 +395,36 @@ static inline unsigned char zdecryptbyte(uint32_t key[3])
354 354
     return ((temp * (temp ^ 1)) >> 8);
355 355
 }
356 356
 
357
-/* zip decrypt, CL_EPARSE = could not apply a password, csize includes the decryption header */
358
-/* TODO - search for strong encryption header (0x0017) and handle them */
359
-static inline int zdecrypt(const uint8_t *src, uint32_t csize, uint32_t usize, const uint8_t *lh, unsigned int *fu, cli_ctx *ctx, char *tmpd, zip_cb zcb)
357
+/**
358
+ * @brief zip decrypt.
359
+ *
360
+ * TODO - search for strong encryption header (0x0017) and handle them
361
+ *
362
+ * @param src
363
+ * @param csize                         size of compressed data; includes the decryption header
364
+ * @param usize                         expected size of uncompressed data
365
+ * @param local_header
366
+ * @param[in,out] num_files_unzipped    current number of files that have been unzipped
367
+ * @param[in,out] ctx                   scan context
368
+ * @param tmpd                          temp directory path name
369
+ * @param zcb                           callback function to invoke after extraction (default: scan)
370
+ * @return cl_error_t                   CL_EPARSE = could not apply a password
371
+ */
372
+static inline cl_error_t zdecrypt(
373
+    const uint8_t *src,
374
+    uint32_t csize,
375
+    uint32_t usize,
376
+    const uint8_t *local_header,
377
+    unsigned int *num_files_unzipped,
378
+    cli_ctx *ctx,
379
+    char *tmpd,
380
+    zip_cb zcb)
360 381
 {
361
-    int i, ret, v = 0;
382
+    cl_error_t ret;
383
+    int v = 0;
384
+    uint32_t i;
362 385
     uint32_t key[3];
363
-    uint8_t eh[12]; /* encryption header buffer */
386
+    uint8_t encryption_header[12]; /* encryption header buffer */
364 387
     struct cli_pwdb *password, *pass_any, *pass_zip;
365 388
 
366 389
     if (!ctx || !ctx->engine)
... ...
@@ -381,58 +445,58 @@ static inline int zdecrypt(const uint8_t *src, uint32_t csize, uint32_t usize, c
381 381
         zinitkey(key, password);
382 382
 
383 383
         /* decrypting the encryption header */
384
-        memcpy(eh, src, SIZEOF_EH);
384
+        memcpy(encryption_header, src, SIZEOF_ENCRYPTION_HEADER);
385 385
 
386
-        for (i = 0; i < SIZEOF_EH; i++) {
387
-            eh[i] ^= zdecryptbyte(key);
388
-            zupdatekey(key, eh[i]);
386
+        for (i = 0; i < SIZEOF_ENCRYPTION_HEADER; i++) {
387
+            encryption_header[i] ^= zdecryptbyte(key);
388
+            zupdatekey(key, encryption_header[i]);
389 389
         }
390 390
 
391 391
         /* verify that the password is correct */
392
-        if (LH_version > 20) { /* higher than 2.0 */
393
-            uint16_t a = eh[SIZEOF_EH - 1];
392
+        if (LOCAL_HEADER_version > 20) { /* higher than 2.0 */
393
+            uint16_t a = encryption_header[SIZEOF_ENCRYPTION_HEADER - 1];
394 394
 
395
-            if (LH_flags & F_USEDD) {
396
-                cli_dbgmsg("cli_unzip: decrypt - (v%u) >> 0x%02x 0x%x (moddate)\n", LH_version, a, LH_mtime);
397
-                if (a == ((LH_mtime >> 8) & 0xff))
395
+            if (LOCAL_HEADER_flags & F_USEDD) {
396
+                cli_dbgmsg("cli_unzip: decrypt - (v%u) >> 0x%02x 0x%x (moddate)\n", LOCAL_HEADER_version, a, LOCAL_HEADER_mtime);
397
+                if (a == ((LOCAL_HEADER_mtime >> 8) & 0xff))
398 398
                     v = 1;
399 399
             } else {
400
-                cli_dbgmsg("cli_unzip: decrypt - (v%u) >> 0x%02x 0x%x (crc32)\n", LH_version, a, LH_crc32);
401
-                if (a == ((LH_crc32 >> 24) & 0xff))
400
+                cli_dbgmsg("cli_unzip: decrypt - (v%u) >> 0x%02x 0x%x (crc32)\n", LOCAL_HEADER_version, a, LOCAL_HEADER_crc32);
401
+                if (a == ((LOCAL_HEADER_crc32 >> 24) & 0xff))
402 402
                     v = 1;
403 403
             }
404 404
         } else {
405
-            uint16_t a = eh[SIZEOF_EH - 1], b = eh[SIZEOF_EH - 2];
405
+            uint16_t a = encryption_header[SIZEOF_ENCRYPTION_HEADER - 1], b = encryption_header[SIZEOF_ENCRYPTION_HEADER - 2];
406 406
 
407
-            if (LH_flags & F_USEDD) {
408
-                cli_dbgmsg("cli_unzip: decrypt - (v%u) >> 0x0000%02x%02x 0x%x (moddate)\n", LH_version, a, b, LH_mtime);
409
-                if ((b | (a << 8)) == (LH_mtime & 0xffff))
407
+            if (LOCAL_HEADER_flags & F_USEDD) {
408
+                cli_dbgmsg("cli_unzip: decrypt - (v%u) >> 0x0000%02x%02x 0x%x (moddate)\n", LOCAL_HEADER_version, a, b, LOCAL_HEADER_mtime);
409
+                if ((b | (a << 8)) == (LOCAL_HEADER_mtime & 0xffff))
410 410
                     v = 1;
411 411
             } else {
412
-                cli_dbgmsg("cli_unzip: decrypt - (v%u) >> 0x0000%02x%02x 0x%x (crc32)\n", LH_version, eh[SIZEOF_EH - 1], eh[SIZEOF_EH - 2], LH_crc32);
413
-                if ((b | (a << 8)) == ((LH_crc32 >> 16) & 0xffff))
412
+                cli_dbgmsg("cli_unzip: decrypt - (v%u) >> 0x0000%02x%02x 0x%x (crc32)\n", LOCAL_HEADER_version, encryption_header[SIZEOF_ENCRYPTION_HEADER - 1], encryption_header[SIZEOF_ENCRYPTION_HEADER - 2], LOCAL_HEADER_crc32);
413
+                if ((b | (a << 8)) == ((LOCAL_HEADER_crc32 >> 16) & 0xffff))
414 414
                     v = 1;
415 415
             }
416 416
         }
417 417
 
418 418
         if (v) {
419 419
             char name[1024], obuf[BUFSIZ];
420
-            char *tempfile       = name;
420
+            char *tempfile = name;
421 421
             unsigned int written = 0, total = 0;
422 422
             fmap_t *dcypt_map;
423 423
             const uint8_t *dcypt_zip;
424
-            int of;
424
+            int out_file;
425 425
 
426 426
             cli_dbgmsg("cli_unzip: decrypt - password [%s] matches\n", password->name);
427 427
 
428 428
             /* output decrypted data to tempfile */
429 429
             if (tmpd) {
430
-                snprintf(name, sizeof(name), "%s" PATHSEP "zip.decrypt.%03u", tmpd, *fu);
430
+                snprintf(name, sizeof(name), "%s" PATHSEP "zip.decrypt.%03u", tmpd, *num_files_unzipped);
431 431
                 name[sizeof(name) - 1] = '\0';
432 432
             } else {
433 433
                 if (!(tempfile = cli_gentemp(ctx->engine->tmpdir))) return CL_EMEM;
434 434
             }
435
-            if ((of = open(tempfile, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) == -1) {
435
+            if ((out_file = open(tempfile, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) == -1) {
436 436
                 cli_warnmsg("cli_unzip: decrypt - failed to create temporary file %s\n", tempfile);
437 437
                 if (!tmpd) free(tempfile);
438 438
                 return CL_ETMPFILE;
... ...
@@ -444,7 +508,7 @@ static inline int zdecrypt(const uint8_t *src, uint32_t csize, uint32_t usize, c
444 444
 
445 445
                 written++;
446 446
                 if (written >= BUFSIZ) {
447
-                    if (cli_writen(of, obuf, written) != (int)written) {
447
+                    if (cli_writen(out_file, obuf, written) != (int)written) {
448 448
                         ret = CL_EWRITE;
449 449
                         goto zd_clean;
450 450
                     }
... ...
@@ -453,7 +517,7 @@ static inline int zdecrypt(const uint8_t *src, uint32_t csize, uint32_t usize, c
453 453
                 }
454 454
             }
455 455
             if (written) {
456
-                if (cli_writen(of, obuf, written) != (int)written) {
456
+                if (cli_writen(out_file, obuf, written) != (int)written) {
457 457
                     ret = CL_EWRITE;
458 458
                     goto zd_clean;
459 459
                 }
... ...
@@ -464,7 +528,7 @@ static inline int zdecrypt(const uint8_t *src, uint32_t csize, uint32_t usize, c
464 464
             cli_dbgmsg("cli_unzip: decrypt - decrypted %u bytes to %s\n", total, tempfile);
465 465
 
466 466
             /* decrypt data to new fmap -> buffer */
467
-            if (!(dcypt_map = fmap(of, 0, total))) {
467
+            if (!(dcypt_map = fmap(out_file, 0, total))) {
468 468
                 cli_warnmsg("cli_unzip: decrypt - failed to create fmap on decrypted file %s\n", tempfile);
469 469
                 ret = CL_EMAP;
470 470
                 goto zd_clean;
... ...
@@ -478,12 +542,12 @@ static inline int zdecrypt(const uint8_t *src, uint32_t csize, uint32_t usize, c
478 478
             }
479 479
 
480 480
             /* call unz on decrypted output */
481
-            ret = unz(dcypt_zip, csize - SIZEOF_EH, usize, LH_method, LH_flags, fu, ctx, tmpd, zcb);
481
+            ret = unz(dcypt_zip, csize - SIZEOF_ENCRYPTION_HEADER, usize, LOCAL_HEADER_method, LOCAL_HEADER_flags, num_files_unzipped, ctx, tmpd, zcb);
482 482
 
483 483
             /* clean-up and return */
484 484
             funmap(dcypt_map);
485 485
         zd_clean:
486
-            close(of);
486
+            close(out_file);
487 487
             if (!ctx->engine->keeptmp)
488 488
                 if (cli_unlink(tempfile)) {
489 489
                     if (!tmpd) free(tempfile);
... ...
@@ -503,36 +567,67 @@ static inline int zdecrypt(const uint8_t *src, uint32_t csize, uint32_t usize, c
503 503
     return CL_SUCCESS;
504 504
 }
505 505
 
506
-static unsigned int lhdr(fmap_t *map, uint32_t loff, uint32_t zsize, unsigned int *fu, unsigned int fc, const uint8_t *ch, int *ret, cli_ctx *ctx, char *tmpd, int detect_encrypted, zip_cb zcb, uint32_t *file_local_header_size, uint32_t *file_local_data_size)
506
+/**
507
+ * @brief Parse, extract, and scan a file using the local file header.
508
+ *
509
+ * Usage of the `record` parameter will alter behavior so it only collect file record metadata and does not extract or scan any files.
510
+ *
511
+ * @param map                           fmap for the file
512
+ * @param loff                          offset of the local file header
513
+ * @param zsize                         size of the zip file
514
+ * @param[in,out] num_files_unzipped    current number of files that have been unzipped
515
+ * @param file_count                    current number of files that have been discovered
516
+ * @param central_header                offset of central directory header
517
+ * @param[out] ret                      The status code
518
+ * @param[in,out] ctx                   scan context
519
+ * @param tmpd                          temp directory path name
520
+ * @param detect_encrypted              bool: if encrypted files should raise heuristic alert
521
+ * @param zcb                           callback function to invoke after extraction (default: scan)
522
+ * @param record                        (optional) a pointer to a struct to store file record information.
523
+ * @return unsigned int                 returns the size of the file header + file data, so zip file can be indexed without the central directory
524
+ */
525
+static unsigned int parse_local_file_header(
526
+    fmap_t *map,
527
+    uint32_t loff,
528
+    uint32_t zsize,
529
+    unsigned int *num_files_unzipped,
530
+    unsigned int file_count,
531
+    const uint8_t *central_header, /* pointer to central header. */
532
+    cl_error_t *ret,
533
+    cli_ctx *ctx,
534
+    char *tmpd,
535
+    int detect_encrypted,
536
+    zip_cb zcb,
537
+    struct zip_record *record)
507 538
 {
508
-    const uint8_t *lh, *zip;
539
+    const uint8_t *local_header, *zip;
509 540
     char name[256];
510 541
     uint32_t csize, usize;
511 542
     int virus_found = 0;
512 543
 
513
-    if (!(lh = fmap_need_off(map, loff, SIZEOF_LH))) {
514
-        cli_dbgmsg("cli_unzip: lh - out of file\n");
544
+    if (!(local_header = fmap_need_off(map, loff, SIZEOF_LOCAL_HEADER))) {
545
+        cli_dbgmsg("cli_unzip: local header - out of file\n");
515 546
         return 0;
516 547
     }
517
-    if (LH_magic != 0x04034b50) {
518
-        if (!ch)
519
-            cli_dbgmsg("cli_unzip: lh - wrkcomplete\n");
548
+    if (LOCAL_HEADER_magic != ZIP_MAGIC_LOCAL_FILE_HEADER) {
549
+        if (!central_header)
550
+            cli_dbgmsg("cli_unzip: local header - wrkcomplete\n");
520 551
         else
521
-            cli_dbgmsg("cli_unzip: lh - bad magic\n");
522
-        fmap_unneed_off(map, loff, SIZEOF_LH);
552
+            cli_dbgmsg("cli_unzip: local header - bad magic\n");
553
+        fmap_unneed_off(map, loff, SIZEOF_LOCAL_HEADER);
523 554
         return 0;
524 555
     }
525 556
 
526
-    zip = lh + SIZEOF_LH;
527
-    zsize -= SIZEOF_LH;
557
+    zip = local_header + SIZEOF_LOCAL_HEADER;
558
+    zsize -= SIZEOF_LOCAL_HEADER;
528 559
 
529
-    if (zsize <= LH_flen) {
530
-        cli_dbgmsg("cli_unzip: lh - fname out of file\n");
531
-        fmap_unneed_off(map, loff, SIZEOF_LH);
560
+    if (zsize <= LOCAL_HEADER_flen) {
561
+        cli_dbgmsg("cli_unzip: local header - fname out of file\n");
562
+        fmap_unneed_off(map, loff, SIZEOF_LOCAL_HEADER);
532 563
         return 0;
533 564
     }
534 565
     if (ctx->engine->cdb || cli_debug_flag) {
535
-        uint32_t nsize = (LH_flen >= sizeof(name)) ? sizeof(name) - 1 : LH_flen;
566
+        uint32_t nsize = (LOCAL_HEADER_flen >= sizeof(name)) ? sizeof(name) - 1 : LOCAL_HEADER_flen;
536 567
         const char *src;
537 568
         if (nsize && (src = fmap_need_ptr_once(map, zip, nsize))) {
538 569
             memcpy(name, zip, nsize);
... ...
@@ -540,79 +635,88 @@ static unsigned int lhdr(fmap_t *map, uint32_t loff, uint32_t zsize, unsigned in
540 540
         } else
541 541
             name[0] = '\0';
542 542
     }
543
-    zip += LH_flen;
544
-    zsize -= LH_flen;
543
+    zip += LOCAL_HEADER_flen;
544
+    zsize -= LOCAL_HEADER_flen;
545 545
 
546
-    cli_dbgmsg("cli_unzip: lh - ZMDNAME:%d:%s:%u:%u:%x:%u:%u:%u\n", ((LH_flags & F_ENCR) != 0), name, LH_usize, LH_csize, LH_crc32, LH_method, fc, ctx->recursion);
546
+    cli_dbgmsg("cli_unzip: local header - ZMDNAME:%d:%s:%u:%u:%x:%u:%u:%u\n",
547
+               ((LOCAL_HEADER_flags & F_ENCR) != 0), name, LOCAL_HEADER_usize, LOCAL_HEADER_csize, LOCAL_HEADER_crc32, LOCAL_HEADER_method, file_count, ctx->recursion);
547 548
     /* ZMDfmt virname:encrypted(0-1):filename(exact|*):usize(exact|*):csize(exact|*):crc32(exact|*):method(exact|*):fileno(exact|*):maxdepth(exact|*) */
548 549
 
549
-    if (cli_matchmeta(ctx, name, LH_csize, LH_usize, (LH_flags & F_ENCR) != 0, fc, LH_crc32, NULL) == CL_VIRUS) {
550
+    /* Scan file header metadata. */
551
+    if (cli_matchmeta(ctx, name, LOCAL_HEADER_csize, LOCAL_HEADER_usize, (LOCAL_HEADER_flags & F_ENCR) != 0, file_count, LOCAL_HEADER_crc32, NULL) == CL_VIRUS) {
550 552
         *ret = CL_VIRUS;
551 553
         if (!SCAN_ALLMATCHES)
552 554
             return 0;
553 555
         virus_found = 1;
554 556
     }
555 557
 
556
-    if (LH_flags & F_MSKED) {
557
-        cli_dbgmsg("cli_unzip: lh - header has got unusable masked data\n");
558
+    if (LOCAL_HEADER_flags & F_MSKED) {
559
+        cli_dbgmsg("cli_unzip: local header - header has got unusable masked data\n");
558 560
         /* FIXME: need to find/craft a sample */
559
-        fmap_unneed_off(map, loff, SIZEOF_LH);
561
+        fmap_unneed_off(map, loff, SIZEOF_LOCAL_HEADER);
560 562
         return 0;
561 563
     }
562 564
 
563
-    if (detect_encrypted && (LH_flags & F_ENCR) && SCAN_HEURISTIC_ENCRYPTED_ARCHIVE) {
565
+    if (detect_encrypted && (LOCAL_HEADER_flags & F_ENCR) && SCAN_HEURISTIC_ENCRYPTED_ARCHIVE) {
564 566
         cli_dbgmsg("cli_unzip: Encrypted files found in archive.\n");
565 567
         *ret = cli_append_virus(ctx, "Heuristics.Encrypted.Zip");
566 568
         if ((*ret == CL_VIRUS && !SCAN_ALLMATCHES) || *ret != CL_CLEAN) {
567
-            fmap_unneed_off(map, loff, SIZEOF_LH);
569
+            fmap_unneed_off(map, loff, SIZEOF_LOCAL_HEADER);
568 570
             return 0;
569 571
         }
570 572
         virus_found = 1;
571 573
     }
572 574
 
573
-    if (LH_flags & F_USEDD) {
574
-        cli_dbgmsg("cli_unzip: lh - has data desc\n");
575
-        if (!ch) {
576
-            fmap_unneed_off(map, loff, SIZEOF_LH);
575
+    if (LOCAL_HEADER_flags & F_USEDD) {
576
+        cli_dbgmsg("cli_unzip: local header - has data desc\n");
577
+        if (!central_header) {
578
+            fmap_unneed_off(map, loff, SIZEOF_LOCAL_HEADER);
577 579
             return 0;
578 580
         } else {
579
-            usize = CH_usize;
580
-            csize = CH_csize;
581
+            usize = CENTRAL_HEADER_usize;
582
+            csize = CENTRAL_HEADER_csize;
581 583
         }
582 584
     } else {
583
-        usize = LH_usize;
584
-        csize = LH_csize;
585
+        usize = LOCAL_HEADER_usize;
586
+        csize = LOCAL_HEADER_csize;
585 587
     }
586 588
 
587
-    if (zsize <= LH_elen) {
588
-        cli_dbgmsg("cli_unzip: lh - extra out of file\n");
589
-        fmap_unneed_off(map, loff, SIZEOF_LH);
589
+    if (zsize <= LOCAL_HEADER_elen) {
590
+        cli_dbgmsg("cli_unzip: local header - extra out of file\n");
591
+        fmap_unneed_off(map, loff, SIZEOF_LOCAL_HEADER);
590 592
         return 0;
591 593
     }
592
-    zip += LH_elen;
593
-    zsize -= LH_elen;
594
-
595
-    if (NULL != file_local_header_size)
596
-        *file_local_header_size = zip - lh;
597
-    if (NULL != file_local_data_size)
598
-        *file_local_data_size = csize;
594
+    zip += LOCAL_HEADER_elen;
595
+    zsize -= LOCAL_HEADER_elen;
599 596
 
600 597
     if (!csize) { /* FIXME: what's used for method0 files? csize or usize? Nothing in the specs, needs testing */
601
-        cli_dbgmsg("cli_unzip: lh - skipping empty file\n");
598
+        cli_dbgmsg("cli_unzip: local header - skipping empty file\n");
602 599
     } else {
603 600
         if (zsize < csize) {
604
-            cli_dbgmsg("cli_unzip: lh - stream out of file\n");
605
-            fmap_unneed_off(map, loff, SIZEOF_LH);
601
+            cli_dbgmsg("cli_unzip: local header - stream out of file\n");
602
+            fmap_unneed_off(map, loff, SIZEOF_LOCAL_HEADER);
606 603
             return 0;
607 604
         }
608 605
 
609
-        if (LH_flags & F_ENCR) {
610
-            if (fmap_need_ptr_once(map, zip, csize))
611
-                *ret = zdecrypt(zip, csize, usize, lh, fu, ctx, tmpd, zcb);
606
+        /* Don't actually unzip if we're just collecting the file record information (offset, sizes) */
607
+        if (NULL == record) {
608
+            if (LOCAL_HEADER_flags & F_ENCR) {
609
+                if (fmap_need_ptr_once(map, zip, csize))
610
+                    *ret = zdecrypt(zip, csize, usize, local_header, num_files_unzipped, ctx, tmpd, zcb);
611
+            } else {
612
+                if (fmap_need_ptr_once(map, zip, csize))
613
+                    *ret = unz(zip, csize, usize, LOCAL_HEADER_method, LOCAL_HEADER_flags, num_files_unzipped, ctx, tmpd, zcb);
614
+            }
612 615
         } else {
613
-            if (fmap_need_ptr_once(map, zip, csize))
614
-                *ret = unz(zip, csize, usize, LH_method, LH_flags, fu, ctx, tmpd, zcb);
616
+            record->local_header_offset = loff;
617
+            record->local_header_size   = zip - local_header;
618
+            record->compressed_size     = csize;
619
+            record->uncompressed_size   = usize;
620
+            record->method              = LOCAL_HEADER_method;
621
+            record->flags               = LOCAL_HEADER_flags;
622
+            record->encrypted           = (LOCAL_HEADER_flags & F_ENCR) ? 1 : 0;
615 623
         }
624
+
616 625
         zip += csize;
617 626
         zsize -= csize;
618 627
     }
... ...
@@ -620,17 +724,17 @@ static unsigned int lhdr(fmap_t *map, uint32_t loff, uint32_t zsize, unsigned in
620 620
     if (virus_found != 0)
621 621
         *ret = CL_VIRUS;
622 622
 
623
-    fmap_unneed_off(map, loff, SIZEOF_LH); /* unneed now. block is guaranteed to exists till the next need */
624
-    if (LH_flags & F_USEDD) {
623
+    fmap_unneed_off(map, loff, SIZEOF_LOCAL_HEADER); /* unneed now. block is guaranteed to exists till the next need */
624
+    if (LOCAL_HEADER_flags & F_USEDD) {
625 625
         if (zsize < 12) {
626
-            cli_dbgmsg("cli_unzip: lh - data desc out of file\n");
626
+            cli_dbgmsg("cli_unzip: local header - data desc out of file\n");
627 627
             return 0;
628 628
         }
629 629
         zsize -= 12;
630 630
         if (fmap_need_ptr_once(map, zip, 4)) {
631
-            if (cli_readint32(zip) == 0x08074b50) {
631
+            if (cli_readint32(zip) == ZIP_MAGIC_FILE_BEGIN_SPLIT_OR_SPANNED) {
632 632
                 if (zsize < 4) {
633
-                    cli_dbgmsg("cli_unzip: lh - data desc out of file\n");
633
+                    cli_dbgmsg("cli_unzip: local header - data desc out of file\n");
634 634
                     return 0;
635 635
                 }
636 636
                 zip += 4;
... ...
@@ -638,85 +742,117 @@ static unsigned int lhdr(fmap_t *map, uint32_t loff, uint32_t zsize, unsigned in
638 638
         }
639 639
         zip += 12;
640 640
     }
641
-    return zip - lh;
641
+    return zip - local_header;
642 642
 }
643 643
 
644
-static unsigned int chdr(fmap_t *map, uint32_t coff, uint32_t zsize, unsigned int *fu, unsigned int fc, int *ret, cli_ctx *ctx, char *tmpd, struct zip_requests *requests, uint32_t *file_local_offset, uint32_t *file_local_header_size, uint32_t *file_local_data_size)
644
+/**
645
+ * @brief Parse, extract, and scan a file by iterating the central directory.
646
+ *
647
+ * Usage of the `record` parameter will alter behavior so it only collect file record metadata and does not extract or scan any files.
648
+ *
649
+ * @param map                           fmap for the file
650
+ * @param coff                          offset of the file header in the central directory
651
+ * @param zsize                         size of the zip file
652
+ * @param[in,out] num_files_unzipped    current number of files that have been unzipped
653
+ * @param file_count                    current number of files that have been discovered
654
+ * @param[out] ret                      The status code
655
+ * @param[in,out] ctx                   scan context
656
+ * @param tmpd                          temp directory path name
657
+ * @param requests                      (optional) structure use to search the zip for files by name
658
+ * @param record                        (optional) a pointer to a struct to store file record information.
659
+ * @return unsigned int                 returns the size of the file header in the central directory, or 0 if no more files
660
+ */
661
+static unsigned int
662
+parse_central_directory_file_header(
663
+    fmap_t *map,
664
+    uint32_t coff,
665
+    uint32_t zsize,
666
+    unsigned int *num_files_unzipped,
667
+    unsigned int file_count,
668
+    cl_error_t *ret,
669
+    cli_ctx *ctx,
670
+    char *tmpd,
671
+    struct zip_requests *requests,
672
+    struct zip_record *record)
645 673
 {
646 674
     char name[256];
647 675
     int last = 0;
648
-    const uint8_t *ch;
676
+    const uint8_t *central_header;
649 677
     int virus_found = 0;
650 678
 
651
-    if (NULL != file_local_offset)
652
-        *file_local_offset = 0;
653
-    if (NULL != file_local_header_size)
654
-        *file_local_header_size = 0;
655
-    if (NULL != file_local_data_size)
656
-        *file_local_data_size = 0;
657
-
658
-    if (!(ch = fmap_need_off(map, coff, SIZEOF_CH)) || CH_magic != 0x02014b50) {
659
-        if (ch) fmap_unneed_ptr(map, ch, SIZEOF_CH);
660
-        cli_dbgmsg("cli_unzip: ch - wrkcomplete\n");
679
+    if (!(central_header = fmap_need_off(map, coff, SIZEOF_CENTRAL_HEADER)) || CENTRAL_HEADER_magic != ZIP_MAGIC_CENTRAL_DIRECTORY_RECORD_BEGIN) {
680
+        if (central_header) fmap_unneed_ptr(map, central_header, SIZEOF_CENTRAL_HEADER);
681
+        cli_dbgmsg("cli_unzip: central header - wrkcomplete\n");
661 682
         return 0;
662 683
     }
663
-    coff += SIZEOF_CH;
684
+    coff += SIZEOF_CENTRAL_HEADER;
664 685
 
665
-    cli_dbgmsg("cli_unzip: ch - flags %x - method %x - csize %x - usize %x - flen %x - elen %x - clen %x - disk %x - off %x\n", CH_flags, CH_method, CH_csize, CH_usize, CH_flen, CH_elen, CH_clen, CH_dsk, CH_off);
686
+    cli_dbgmsg("cli_unzip: central header - flags %x - method %x - csize %x - usize %x - flen %x - elen %x - clen %x - disk %x - off %x\n",
687
+               CENTRAL_HEADER_flags, CENTRAL_HEADER_method, CENTRAL_HEADER_csize, CENTRAL_HEADER_usize, CENTRAL_HEADER_flen, CENTRAL_HEADER_extra_len, CENTRAL_HEADER_comment_len, CENTRAL_HEADER_disk_num, CENTRAL_HEADER_off);
666 688
 
667
-    if (zsize - coff <= CH_flen) {
668
-        cli_dbgmsg("cli_unzip: ch - fname out of file\n");
689
+    if (zsize - coff <= CENTRAL_HEADER_flen) {
690
+        cli_dbgmsg("cli_unzip: central header - fname out of file\n");
669 691
         last = 1;
670 692
     }
671 693
 
672 694
     name[0] = '\0';
673 695
     if (!last) {
674
-        unsigned int size = (CH_flen >= sizeof(name)) ? sizeof(name) - 1 : CH_flen;
696
+        unsigned int size = (CENTRAL_HEADER_flen >= sizeof(name)) ? sizeof(name) - 1 : CENTRAL_HEADER_flen;
675 697
         const char *src   = fmap_need_off_once(map, coff, size);
676 698
         if (src) {
677 699
             memcpy(name, src, size);
678 700
             name[size] = '\0';
679
-            cli_dbgmsg("cli_unzip: ch - fname: %s\n", name);
701
+            cli_dbgmsg("cli_unzip: central header - fname: %s\n", name);
680 702
         }
681 703
     }
682
-    coff += CH_flen;
704
+    coff += CENTRAL_HEADER_flen;
683 705
 
684 706
     /* requests do not supply a ctx; also prevent multiple scans */
685
-    if (ctx && cli_matchmeta(ctx, name, CH_csize, CH_usize, (CH_flags & F_ENCR) != 0, fc, CH_crc32, NULL) == CL_VIRUS)
707
+    if (ctx && cli_matchmeta(ctx, name, CENTRAL_HEADER_csize, CENTRAL_HEADER_usize, (CENTRAL_HEADER_flags & F_ENCR) != 0, file_count, CENTRAL_HEADER_crc32, NULL) == CL_VIRUS)
686 708
         virus_found = 1;
687 709
 
688
-    if (zsize - coff <= CH_elen && !last) {
689
-        cli_dbgmsg("cli_unzip: ch - extra out of file\n");
710
+    if (zsize - coff <= CENTRAL_HEADER_extra_len && !last) {
711
+        cli_dbgmsg("cli_unzip: central header - extra out of file\n");
690 712
         last = 1;
691 713
     }
692
-    coff += CH_elen;
714
+    coff += CENTRAL_HEADER_extra_len;
693 715
 
694
-    if (zsize - coff < CH_clen && !last) {
695
-        cli_dbgmsg("cli_unzip: ch - comment out of file\n");
716
+    if (zsize - coff < CENTRAL_HEADER_comment_len && !last) {
717
+        cli_dbgmsg("cli_unzip: central header - comment out of file\n");
696 718
         last = 1;
697 719
     }
698
-    coff += CH_clen;
720
+    coff += CENTRAL_HEADER_comment_len;
699 721
 
700 722
     if (!requests) {
701
-        if (CH_off < zsize - SIZEOF_LH) {
702
-            if (NULL != file_local_offset)
703
-                *file_local_offset = CH_off;
704
-            lhdr(map, CH_off, zsize - CH_off, fu, fc, ch, ret, ctx, tmpd, 1, zip_scan_cb, file_local_header_size, file_local_data_size);
705
-        } else
706
-            cli_dbgmsg("cli_unzip: ch - local hdr out of file\n");
723
+        if (CENTRAL_HEADER_off < zsize - SIZEOF_LOCAL_HEADER) {
724
+            parse_local_file_header(map,
725
+                                    CENTRAL_HEADER_off,
726
+                                    zsize - CENTRAL_HEADER_off,
727
+                                    num_files_unzipped,
728
+                                    file_count,
729
+                                    central_header,
730
+                                    ret,
731
+                                    ctx,
732
+                                    tmpd,
733
+                                    1,
734
+                                    zip_scan_cb,
735
+                                    record);
736
+        } else {
737
+            cli_dbgmsg("cli_unzip: central header - local hdr out of file\n");
738
+        }
707 739
     } else {
708 740
         int i;
709 741
         size_t len;
710 742
 
711 743
         if (!last) {
712 744
             for (i = 0; i < requests->namecnt; ++i) {
713
-                cli_dbgmsg("checking for %i: %s\n", i, requests->names[i]);
745
+                cli_dbgmsg("cli_unzip: central header - checking for %i: %s\n", i, requests->names[i]);
714 746
 
715 747
                 len = MIN(sizeof(name) - 1, requests->namelens[i]);
716 748
                 if (!strncmp(requests->names[i], name, len)) {
717 749
                     requests->match = 1;
718 750
                     requests->found = i;
719
-                    requests->loff  = CH_off;
751
+                    requests->loff  = CENTRAL_HEADER_off;
720 752
                 }
721 753
             }
722 754
         }
... ...
@@ -724,67 +860,322 @@ static unsigned int chdr(fmap_t *map, uint32_t coff, uint32_t zsize, unsigned in
724 724
 
725 725
     if (virus_found == 1)
726 726
         *ret = CL_VIRUS;
727
-    fmap_unneed_ptr(map, ch, SIZEOF_CH);
727
+    fmap_unneed_ptr(map, central_header, SIZEOF_CENTRAL_HEADER);
728 728
     return (last ? 0 : coff);
729 729
 }
730 730
 
731
-int cli_unzip(cli_ctx *ctx)
731
+/**
732
+ * @brief Sort zip_record structures based on local file offset.
733
+ *
734
+ * @param first
735
+ * @param second
736
+ * @return int 1 if first record's offset is higher than second's.
737
+ * @return int 0 if first and second reocrd offsets are equal.
738
+ * @return int -1 if first record's offset is less than second's.
739
+ */
740
+static int sort_by_file_offset(const void *first, const void *second)
741
+{
742
+    const struct zip_record *a = (const struct zip_record *)first;
743
+    const struct zip_record *b = (const struct zip_record *)second;
744
+
745
+    /* Avoid return x - y, which can cause undefined behaviour
746
+       because of signed integer overflow. */
747
+    if (a->local_header_offset < b->local_header_offset)
748
+        return -1;
749
+    else if (a->local_header_offset > b->local_header_offset)
750
+        return 1;
751
+
752
+    return 0;
753
+}
754
+
755
+/**
756
+ * @brief Create a catalogue of the central directory.
757
+ *
758
+ * This function indexes every file in the central directory.
759
+ * It creates a zip record catalogue and sorts them by file entry offset.
760
+ * Then it iterates the sorted file records looking for overlapping files.
761
+ *
762
+ * The caller is responsible for freeing the catalogue.
763
+ * The catalogue may contain duplicate items, which should be skipped.
764
+ *
765
+ * @param ctx               The scanning context
766
+ * @param map               The file map
767
+ * @param fsize             The file size
768
+ * @param coff              The central directory offset
769
+ * @param[out] catalogue    A catalogue of zip_records found in the central directory.
770
+ * @param[out] num_records  The number of records in the catalogue.
771
+ * @return cl_error_t  CL_CLEAN if no overlapping files
772
+ * @return cl_error_t  CL_VIRUS if overlapping files and heuristic alerts are enabled
773
+ * @return cl_error_t  CL_EFORMAT if overlapping files and heuristic alerts are disabled
774
+ * @return cl_error_t  CL_ETIMEOUT if the scan time limit is exceeded.
775
+ * @return cl_error_t  CL_EMEM for memory allocation errors.
776
+ */
777
+cl_error_t index_the_central_directory(
778
+    cli_ctx *ctx,
779
+    fmap_t *map,
780
+    uint32_t fsize,
781
+    uint32_t coff,
782
+    struct zip_record **catalogue,
783
+    size_t *num_records)
784
+{
785
+    cl_error_t status = CL_CLEAN;
786
+    cl_error_t ret    = CL_CLEAN;
787
+
788
+    size_t num_record_blocks = 0;
789
+    size_t index             = 0;
790
+
791
+    struct zip_record *zip_catalogue = NULL;
792
+    size_t records_count             = 0;
793
+    struct zip_record *curr_record   = NULL;
794
+    struct zip_record *prev_record   = NULL;
795
+    uint32_t num_overlapping_files   = 0;
796
+
797
+    if (NULL == catalogue || NULL == num_records) {
798
+        cli_errmsg("index_the_central_directory: Invalid NULL arguments\n");
799
+        goto done;
800
+    }
801
+
802
+    *catalogue   = NULL;
803
+    *num_records = 0;
804
+
805
+    zip_catalogue = (struct zip_record *)cli_malloc(sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE);
806
+    if (NULL == zip_catalogue) {
807
+        status = CL_EMEM;
808
+        goto done;
809
+    }
810
+    num_record_blocks = 1;
811
+    memset(zip_catalogue, 0, sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE);
812
+
813
+    cli_dbgmsg("cli_unzip: checking for non-recursive zip bombs...\n");
814
+
815
+    while (0 != (coff = parse_central_directory_file_header(map,
816
+                                                            coff,
817
+                                                            fsize,
818
+                                                            NULL, // num_files_unziped not required
819
+                                                            index + 1,
820
+                                                            &ret,
821
+                                                            ctx,
822
+                                                            NULL, // tmpd not required
823
+                                                            NULL,
824
+                                                            &(zip_catalogue[records_count])))) {
825
+        index++;
826
+
827
+        if (cli_checktimelimit(ctx) != CL_SUCCESS) {
828
+            cli_dbgmsg("cli_unzip: Time limit reached (max: %u)\n", ctx->engine->maxscantime);
829
+            status = CL_ETIMEOUT;
830
+            goto done;
831
+        }
832
+
833
+        /* stop checking file entries if we'll exceed maxfiles */
834
+        if (ctx->engine->maxfiles && records_count >= ctx->engine->maxfiles) {
835
+            cli_dbgmsg("cli_unzip: Files limit reached (max: %u)\n", ctx->engine->maxfiles);
836
+            break;
837
+        }
838
+        records_count++;
839
+
840
+        if (records_count % ZIP_RECORDS_CHECK_BLOCKSIZE == 0) {
841
+            cli_dbgmsg("   cli_unzip: Exceeded zip record block size, allocating more space...\n");
842
+
843
+            /* allocate more space for zip records */
844
+            if (sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE * (num_record_blocks + 1) <
845
+                sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE * (num_record_blocks)) {
846
+                cli_errmsg("cli_unzip: Number of file records in zip will exceed the max for current architecture (integer overflow)\n");
847
+                status = CL_EFORMAT;
848
+                goto done;
849
+            }
850
+
851
+            zip_catalogue = cli_realloc2(zip_catalogue, sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE * (num_record_blocks + 1));
852
+            if (NULL == zip_catalogue) {
853
+                status = CL_EMEM;
854
+                goto done;
855
+            }
856
+            num_record_blocks++;
857
+            /* zero out the memory for the new records */
858
+            memset(&(zip_catalogue[records_count]), 0, sizeof(struct zip_record) * (ZIP_RECORDS_CHECK_BLOCKSIZE * num_record_blocks - records_count));
859
+        }
860
+    }
861
+
862
+    if (records_count > 1) {
863
+        /*
864
+         * Sort the records by local file offset
865
+         */
866
+        cli_qsort(zip_catalogue, records_count, sizeof(struct zip_record), sort_by_file_offset);
867
+
868
+        /*
869
+         * Detect overlapping files.
870
+         */
871
+        for (index = 1; index < records_count; index++) {
872
+            prev_record = &(zip_catalogue[index - 1]);
873
+            curr_record = &(zip_catalogue[index]);
874
+
875
+            /* Check for integer overflow in 32bit size & offset values */
876
+            if ((UINT32_MAX - (prev_record->local_header_size + prev_record->compressed_size) < prev_record->local_header_offset) ||
877
+                (UINT32_MAX - (curr_record->local_header_size + curr_record->compressed_size) < curr_record->local_header_offset)) {
878
+                cli_dbgmsg("cli_unzip: Integer overflow detected; invalid data sizes in zip file headers.\n");
879
+                status = CL_EFORMAT;
880
+                goto done;
881
+            }
882
+
883
+            if (((curr_record->local_header_offset >= prev_record->local_header_offset) && (curr_record->local_header_offset < prev_record->local_header_offset + prev_record->local_header_size + prev_record->compressed_size)) ||
884
+                ((prev_record->local_header_offset >= curr_record->local_header_offset) && (prev_record->local_header_offset < curr_record->local_header_offset + curr_record->local_header_size + curr_record->compressed_size))) {
885
+                /* Overlapping file detected */
886
+                num_overlapping_files++;
887
+
888
+                if ((curr_record->local_header_offset == prev_record->local_header_offset) &&
889
+                    (curr_record->local_header_size == prev_record->local_header_size) &&
890
+                    (curr_record->compressed_size == prev_record->compressed_size)) {
891
+                    cli_dbgmsg("cli_unzip: Ignoring duplicate file entry @ 0x%x.\n", curr_record->local_header_offset);
892
+                } else {
893
+                    cli_dbgmsg("cli_unzip: Overlapping files detected.\n");
894
+                    cli_dbgmsg("    previous file end:  %u\n", prev_record->local_header_offset + prev_record->local_header_size + prev_record->compressed_size);
895
+                    cli_dbgmsg("    current file start: %u\n", curr_record->local_header_offset);
896
+
897
+                    if (ZIP_MAX_NUM_OVERLAPPING_FILES < num_overlapping_files) {
898
+                        if (SCAN_HEURISTICS) {
899
+                            status = cli_append_virus(ctx, "Heuristics.Zip.OverlappingFiles");
900
+                        } else {
901
+                            status = CL_EFORMAT;
902
+                        }
903
+                        goto done;
904
+                    }
905
+                }
906
+            }
907
+
908
+            if (cli_checktimelimit(ctx) != CL_SUCCESS) {
909
+                cli_dbgmsg("cli_unzip: Time limit reached (max: %u)\n", ctx->engine->maxscantime);
910
+                status = CL_ETIMEOUT;
911
+                goto done;
912
+            }
913
+        }
914
+    }
915
+
916
+    *catalogue   = zip_catalogue;
917
+    *num_records = records_count;
918
+    status       = CL_SUCCESS;
919
+
920
+done:
921
+
922
+    if (CL_SUCCESS != status) {
923
+        if (NULL != zip_catalogue) {
924
+            free(zip_catalogue);
925
+            zip_catalogue = NULL;
926
+        }
927
+    }
928
+
929
+    return status;
930
+}
931
+
932
+cl_error_t cli_unzip(cli_ctx *ctx)
732 933
 {
733
-    unsigned int fc = 0, fu = 0;
734
-    int ret = CL_CLEAN;
934
+    unsigned int file_count = 0, num_files_unzipped = 0;
935
+    cl_error_t ret = CL_CLEAN;
735 936
     uint32_t fsize, lhoff = 0, coff = 0;
736 937
     fmap_t *map = *ctx->fmap;
737
-    char *tmpd;
938
+    char *tmpd  = NULL;
738 939
     const char *ptr;
739 940
     int virus_found = 0;
740 941
 #if HAVE_JSON
741 942
     int toval = 0;
742 943
 #endif
743
-    int bZipBombDetected                 = 0;
744
-    uint32_t cur_file_local_offset       = 0;
745
-    uint32_t cur_file_local_header_size  = 0;
746
-    uint32_t cur_file_local_data_size    = 0;
747
-    uint32_t prev_file_local_offset      = 0;
748
-    uint32_t prev_file_local_header_size = 0;
749
-    uint32_t prev_file_local_data_size   = 0;
944
+    struct zip_record *zip_catalogue = NULL;
945
+    size_t records_count             = 0;
946
+    size_t i;
750 947
 
751 948
     cli_dbgmsg("in cli_unzip\n");
752 949
     fsize = (uint32_t)map->len;
753 950
     if (sizeof(off_t) != sizeof(uint32_t) && (size_t)fsize != map->len) {
754 951
         cli_dbgmsg("cli_unzip: file too big\n");
755
-        return CL_CLEAN;
952
+        ret = CL_CLEAN;
953
+        goto done;
756 954
     }
757
-    if (fsize < SIZEOF_CH) {
955
+    if (fsize < SIZEOF_CENTRAL_HEADER) {
758 956
         cli_dbgmsg("cli_unzip: file too short\n");
759
-        return CL_CLEAN;
957
+        ret = CL_CLEAN;
958
+        goto done;
760 959
     }
761 960
     if (!(tmpd = cli_gentemp(ctx->engine->tmpdir))) {
762
-        return CL_ETMPDIR;
961
+        ret = CL_ETMPDIR;
962
+        goto done;
763 963
     }
764 964
     if (mkdir(tmpd, 0700)) {
765 965
         cli_dbgmsg("cli_unzip: Can't create temporary directory %s\n", tmpd);
766
-        free(tmpd);
767
-        return CL_ETMPDIR;
966
+        ret = CL_ETMPDIR;
967
+        goto done;
768 968
     }
769 969
 
770 970
     for (coff = fsize - 22; coff > 0; coff--) { /* sizeof(EOC)==22 */
771 971
         if (!(ptr = fmap_need_off_once(map, coff, 20)))
772 972
             continue;
773
-        if (cli_readint32(ptr) == 0x06054b50) {
973
+        if (cli_readint32(ptr) == ZIP_MAGIC_CENTRAL_DIRECTORY_RECORD_END) {
774 974
             uint32_t chptr = cli_readint32(&ptr[16]);
775
-            if (!CLI_ISCONTAINED(0, fsize, chptr, SIZEOF_CH)) continue;
975
+            if (!CLI_ISCONTAINED(0, fsize, chptr, SIZEOF_CENTRAL_HEADER)) continue;
776 976
             coff = chptr;
777 977
             break;
778 978
         }
779 979
     }
780 980
 
781 981
     if (coff) {
782
-        uint32_t nOverlappingFiles = 0;
982
+        cli_dbgmsg("cli_unzip: central directory header offset: @%x\n", coff);
983
+
984
+        /*
985
+         * Index the central directory first.
986
+         */
987
+        ret = index_the_central_directory(
988
+            ctx,
989
+            map,
990
+            fsize,
991
+            coff,
992
+            &zip_catalogue,
993
+            &records_count);
994
+        if (CL_SUCCESS != ret) {
995
+            goto done;
996
+        }
997
+
998
+        /*
999
+         * Then decrypt/unzip & scan each unique file entry.
1000
+         */
1001
+        for (i = 0; i < records_count; i++) {
1002
+            const uint8_t *compressed_data = NULL;
783 1003
 
784
-        cli_dbgmsg("cli_unzip: central @%x\n", coff);
785
-        while ((coff = chdr(map, coff, fsize, &fu, fc + 1, &ret, ctx, tmpd, NULL, &cur_file_local_offset, &cur_file_local_header_size, &cur_file_local_data_size))) {
786
-            fc++;
787
-            if (ctx->engine->maxfiles && fu >= ctx->engine->maxfiles) {
1004
+            if ((i > 0) &&
1005
+                (zip_catalogue[i].local_header_offset == zip_catalogue[i - 1].local_header_offset) &&
1006
+                (zip_catalogue[i].local_header_size == zip_catalogue[i - 1].local_header_size) &&
1007
+                (zip_catalogue[i].compressed_size == zip_catalogue[i - 1].compressed_size)) {
1008
+
1009
+                /* Duplicate file entry, skip. */
1010
+                cli_dbgmsg("cli_unzip: Skipping unzipping of duplicate file entry: @ 0x%x\n", zip_catalogue[i].local_header_offset);
1011
+                continue;
1012
+            }
1013
+
1014
+            compressed_data = fmap_need_off(map, zip_catalogue[i].local_header_offset + zip_catalogue[i].local_header_size, SIZEOF_LOCAL_HEADER);
1015
+
1016
+            if (zip_catalogue[i].encrypted) {
1017
+                if (fmap_need_ptr_once(map, compressed_data, zip_catalogue[i].compressed_size))
1018
+                    ret = zdecrypt(
1019
+                        compressed_data,
1020
+                        zip_catalogue[i].compressed_size,
1021
+                        zip_catalogue[i].uncompressed_size,
1022
+                        fmap_need_off(map, zip_catalogue[i].local_header_offset, SIZEOF_LOCAL_HEADER),
1023
+                        &num_files_unzipped,
1024
+                        ctx,
1025
+                        tmpd,
1026
+                        zip_scan_cb);
1027
+            } else {
1028
+                if (fmap_need_ptr_once(map, compressed_data, zip_catalogue[i].compressed_size))
1029
+                    ret = unz(
1030
+                        compressed_data,
1031
+                        zip_catalogue[i].compressed_size,
1032
+                        zip_catalogue[i].uncompressed_size,
1033
+                        zip_catalogue[i].method,
1034
+                        zip_catalogue[i].flags,
1035
+                        &num_files_unzipped,
1036
+                        ctx,
1037
+                        tmpd,
1038
+                        zip_scan_cb);
1039
+            }
1040
+
1041
+            file_count++;
1042
+            if (ctx->engine->maxfiles && num_files_unzipped >= ctx->engine->maxfiles) {
788 1043
                 cli_dbgmsg("cli_unzip: Files limit reached (max: %u)\n", ctx->engine->maxfiles);
789 1044
                 ret = CL_EMAXFILES;
790 1045
             }
... ...
@@ -793,31 +1184,6 @@ int cli_unzip(cli_ctx *ctx)
793 793
                 cli_dbgmsg("cli_unzip: Time limit reached (max: %u)\n", ctx->engine->maxscantime);
794 794
                 ret = CL_ETIMEOUT;
795 795
             }
796
-            /*
797
-     * Detect overlapping files and zip bombs.
798
-     */
799
-            if ((((cur_file_local_offset > prev_file_local_offset) && (cur_file_local_offset < prev_file_local_offset + prev_file_local_header_size + prev_file_local_data_size)) ||
800
-                 ((prev_file_local_offset > cur_file_local_offset) && (prev_file_local_offset < cur_file_local_offset + cur_file_local_header_size + cur_file_local_data_size))) &&
801
-                (cur_file_local_header_size + cur_file_local_data_size > 0)) {
802
-                /* Overlapping file detected */
803
-                nOverlappingFiles++;
804
-
805
-                cli_dbgmsg("cli_unzip: Overlapping files detected.\n");
806
-                cli_dbgmsg("    previous file end:  %u\n", prev_file_local_offset + prev_file_local_header_size + prev_file_local_data_size);
807
-                cli_dbgmsg("    current file start: %u\n", cur_file_local_offset);
808
-                if (ZIP_MAX_NUM_OVERLAPPING_FILES < nOverlappingFiles) {
809
-                    if (SCAN_HEURISTICS) {
810
-                        ret         = cli_append_virus(ctx, "Heuristics.Zip.OverlappingFiles");
811
-                        virus_found = 1;
812
-                    } else {
813
-                        ret = CL_EFORMAT;
814
-                    }
815
-                    bZipBombDetected = 1;
816
-                }
817
-            }
818
-            prev_file_local_offset      = cur_file_local_offset;
819
-            prev_file_local_header_size = cur_file_local_header_size;
820
-            prev_file_local_data_size   = cur_file_local_data_size;
821 796
 
822 797
 #if HAVE_JSON
823 798
             if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) {
... ...
@@ -825,27 +1191,44 @@ int cli_unzip(cli_ctx *ctx)
825 825
             }
826 826
 #endif
827 827
             if (ret != CL_CLEAN) {
828
-                if (ret == CL_VIRUS && SCAN_ALLMATCHES && !bZipBombDetected) {
828
+                if (ret == CL_VIRUS && SCAN_ALLMATCHES) {
829 829
                     ret         = CL_CLEAN;
830 830
                     virus_found = 1;
831
-                } else
831
+                } else {
832 832
                     break;
833
+                }
833 834
             }
834 835
         }
835
-    } else
836
+    } else {
836 837
         cli_dbgmsg("cli_unzip: central not found, using localhdrs\n");
837
-    if (virus_found == 1)
838
+    }
839
+
840
+    if (virus_found == 1) {
838 841
         ret = CL_VIRUS;
839
-    if (fu <= (fc / 4)) { /* FIXME: make up a sane ratio or remove the whole logic */
840
-        fc = 0;
841
-        while (ret == CL_CLEAN && lhoff < fsize && (coff = lhdr(map, lhoff, fsize - lhoff, &fu, fc + 1, NULL, &ret, ctx, tmpd, 1, zip_scan_cb, NULL, NULL))) {
842
-            fc++;
842
+    }
843
+    if (num_files_unzipped <= (file_count / 4)) { /* FIXME: make up a sane ratio or remove the whole logic */
844
+        file_count = 0;
845
+        while ((ret == CL_CLEAN) &&
846
+               (lhoff < fsize) &&
847
+               (0 != (coff = parse_local_file_header(map,
848
+                                                     lhoff,
849
+                                                     fsize - lhoff,
850
+                                                     &num_files_unzipped,
851
+                                                     file_count + 1,
852
+                                                     NULL,
853
+                                                     &ret,
854
+                                                     ctx,
855
+                                                     tmpd,
856
+                                                     1,
857
+                                                     zip_scan_cb,
858
+                                                     NULL)))) {
859
+            file_count++;
843 860
             lhoff += coff;
844 861
             if (SCAN_ALLMATCHES && ret == CL_VIRUS) {
845 862
                 ret         = CL_CLEAN;
846 863
                 virus_found = 1;
847 864
             }
848
-            if (ctx->engine->maxfiles && fu >= ctx->engine->maxfiles) {
865
+            if (ctx->engine->maxfiles && num_files_unzipped >= ctx->engine->maxfiles) {
849 866
                 cli_dbgmsg("cli_unzip: Files limit reached (max: %u)\n", ctx->engine->maxfiles);
850 867
                 ret = CL_EMAXFILES;
851 868
             }
... ...
@@ -857,8 +1240,19 @@ int cli_unzip(cli_ctx *ctx)
857 857
         }
858 858
     }
859 859
 
860
-    if (!ctx->engine->keeptmp) cli_rmdirs(tmpd);
861
-    free(tmpd);
860
+done:
861
+
862
+    if (NULL != zip_catalogue) {
863
+        free(zip_catalogue);
864
+        zip_catalogue = NULL;
865
+    }
866
+
867
+    if (NULL != tmpd) {
868
+        if (!ctx->engine->keeptmp) {
869
+            cli_rmdirs(tmpd);
870
+        }
871
+        free(tmpd);
872
+    }
862 873
 
863 874
     if (ret == CL_CLEAN && virus_found)
864 875
         ret = CL_VIRUS;
... ...
@@ -866,35 +1260,50 @@ int cli_unzip(cli_ctx *ctx)
866 866
     return ret;
867 867
 }
868 868
 
869
-int unzip_single_internal(cli_ctx *ctx, off_t lhoffl, zip_cb zcb)
869
+cl_error_t unzip_single_internal(cli_ctx *ctx, off_t local_header_offset, zip_cb zcb)
870 870
 {
871
-    int ret         = CL_CLEAN;
872
-    unsigned int fu = 0;
871
+    cl_error_t ret = CL_CLEAN;
872
+
873
+    unsigned int num_files_unzipped = 0;
873 874
     uint32_t fsize;
874 875
     fmap_t *map = *ctx->fmap;
875 876
 
876 877
     cli_dbgmsg("in cli_unzip_single\n");
877
-    fsize = (uint32_t)(map->len - lhoffl);
878
-    if (lhoffl < 0 || (size_t)lhoffl > map->len || (sizeof(off_t) != sizeof(uint32_t) && (size_t)fsize != map->len - lhoffl)) {
878
+    fsize = (uint32_t)(map->len - local_header_offset);
879
+    if ((local_header_offset < 0) ||
880
+        ((size_t)local_header_offset > map->len) ||
881
+        ((sizeof(off_t) != sizeof(uint32_t)) && ((size_t)fsize != map->len - local_header_offset))) {
882
+
879 883
         cli_dbgmsg("cli_unzip: bad offset\n");
880 884
         return CL_CLEAN;
881 885
     }
882
-    if (fsize < SIZEOF_LH) {
886
+    if (fsize < SIZEOF_LOCAL_HEADER) {
883 887
         cli_dbgmsg("cli_unzip: file too short\n");
884 888
         return CL_CLEAN;
885 889
     }
886 890
 
887
-    lhdr(map, lhoffl, fsize, &fu, 0, NULL, &ret, ctx, NULL, 0, zcb, NULL, NULL);
891
+    parse_local_file_header(map,
892
+                            local_header_offset,
893
+                            fsize,
894
+                            &num_files_unzipped,
895
+                            0,
896
+                            NULL,
897
+                            &ret,
898
+                            ctx,
899
+                            NULL,
900
+                            0,
901
+                            zcb,
902
+                            NULL);
888 903
 
889 904
     return ret;
890 905
 }
891 906
 
892
-int cli_unzip_single(cli_ctx *ctx, off_t lhoffl)
907
+cl_error_t cli_unzip_single(cli_ctx *ctx, off_t local_header_offset)
893 908
 {
894
-    return unzip_single_internal(ctx, lhoffl, zip_scan_cb);
909
+    return unzip_single_internal(ctx, local_header_offset, zip_scan_cb);
895 910
 }
896 911
 
897
-int unzip_search_add(struct zip_requests *requests, const char *name, size_t nlen)
912
+cl_error_t unzip_search_add(struct zip_requests *requests, const char *name, size_t nlen)
898 913
 {
899 914
     cli_dbgmsg("in unzip_search_add\n");
900 915
 
... ...
@@ -912,14 +1321,14 @@ int unzip_search_add(struct zip_requests *requests, const char *name, size_t nle
912 912
     return CL_SUCCESS;
913 913
 }
914 914
 
915
-int unzip_search(cli_ctx *ctx, fmap_t *map, struct zip_requests *requests)
915
+cl_error_t unzip_search(cli_ctx *ctx, fmap_t *map, struct zip_requests *requests)
916 916
 {
917
-    unsigned int fc = 0;
918
-    fmap_t *zmap    = map;
917
+    unsigned int file_count = 0;
918
+    fmap_t *zmap            = map;
919 919
     size_t fsize;
920 920
     uint32_t coff = 0;
921 921
     const char *ptr;
922
-    int ret = CL_CLEAN;
922
+    cl_error_t ret = CL_CLEAN;
923 923
 #if HAVE_JSON
924 924
     uint32_t toval = 0;
925 925
 #endif
... ...
@@ -937,7 +1346,7 @@ int unzip_search(cli_ctx *ctx, fmap_t *map, struct zip_requests *requests)
937 937
         cli_dbgmsg("unzip_search: file too big\n");
938 938
         return CL_CLEAN;
939 939
     }
940
-    if (fsize < SIZEOF_CH) {
940
+    if (fsize < SIZEOF_CENTRAL_HEADER) {
941 941
         cli_dbgmsg("unzip_search: file too short\n");
942 942
         return CL_CLEAN;
943 943
     }
... ...
@@ -945,23 +1354,32 @@ int unzip_search(cli_ctx *ctx, fmap_t *map, struct zip_requests *requests)
945 945
     for (coff = fsize - 22; coff > 0; coff--) { /* sizeof(EOC)==22 */
946 946
         if (!(ptr = fmap_need_off_once(zmap, coff, 20)))
947 947
             continue;
948
-        if (cli_readint32(ptr) == 0x06054b50) {
948
+        if (cli_readint32(ptr) == ZIP_MAGIC_CENTRAL_DIRECTORY_RECORD_END) {
949 949
             uint32_t chptr = cli_readint32(&ptr[16]);
950
-            if (!CLI_ISCONTAINED(0, fsize, chptr, SIZEOF_CH)) continue;
950
+            if (!CLI_ISCONTAINED(0, fsize, chptr, SIZEOF_CENTRAL_HEADER)) continue;
951 951
             coff = chptr;
952 952
             break;
953 953
         }
954 954
     }
955 955
 
956 956
     if (coff) {
957
-        cli_dbgmsg("unzip_search: central @%x\n", coff);
958
-        while (ret == CL_CLEAN && (coff = chdr(zmap, coff, fsize, NULL, fc + 1, &ret, ctx, NULL, requests, NULL, NULL, NULL))) {
957
+        cli_dbgmsg("unzip_search: central directory header offset: @%x\n", coff);
958
+        while (ret == CL_CLEAN && (coff = parse_central_directory_file_header(zmap,
959
+                                                                              coff,
960
+                                                                              fsize,
961
+                                                                              NULL,
962
+                                                                              file_count + 1,
963
+                                                                              &ret,
964
+                                                                              ctx,
965
+                                                                              NULL,
966
+                                                                              requests,
967
+                                                                              NULL))) {
959 968
             if (requests->match) {
960 969
                 ret = CL_VIRUS;
961 970
             }
962 971
 
963
-            fc++;
964
-            if (ctx && ctx->engine->maxfiles && fc >= ctx->engine->maxfiles) {
972
+            file_count++;
973
+            if (ctx && ctx->engine->maxfiles && file_count >= ctx->engine->maxfiles) {
965 974
                 cli_dbgmsg("cli_unzip: Files limit reached (max: %u)\n", ctx->engine->maxfiles);
966 975
                 ret = CL_EMAXFILES;
967 976
             }
... ...
@@ -978,10 +1396,10 @@ int unzip_search(cli_ctx *ctx, fmap_t *map, struct zip_requests *requests)
978 978
     return ret;
979 979
 }
980 980
 
981
-int unzip_search_single(cli_ctx *ctx, const char *name, size_t nlen, uint32_t *loff)
981
+cl_error_t unzip_search_single(cli_ctx *ctx, const char *name, size_t nlen, uint32_t *loff)
982 982
 {
983 983
     struct zip_requests requests;
984
-    int ret;
984
+    cl_error_t ret;
985 985
 
986 986
     cli_dbgmsg("in unzip_search_single\n");
987 987
     if (!ctx) {
... ...
@@ -41,16 +41,15 @@ struct zip_requests {
41 41
     int found, match;
42 42
 };
43 43
 
44
-int cli_unzip(cli_ctx *);
45
-int cli_unzip_single_internal(cli_ctx *, off_t, zip_cb);
46
-int unzip_single_internal(cli_ctx *, off_t, zip_cb);
47
-int cli_unzip_single(cli_ctx *, off_t);
48
-
49
-int unzip_search_add(struct zip_requests *, const char *, size_t);
50
-int unzip_search(cli_ctx *, fmap_t *, struct zip_requests *);
51
-int unzip_search_single(cli_ctx *, const char *, size_t, uint32_t *);
44
+cl_error_t cli_unzip(cli_ctx *);
45
+cl_error_t unzip_single_internal(cli_ctx *, off_t, zip_cb);
46
+cl_error_t cli_unzip_single(cli_ctx *, off_t);
52 47
 
48
+cl_error_t unzip_search_add(struct zip_requests *, const char *, size_t);
49
+cl_error_t unzip_search(cli_ctx *, fmap_t *, struct zip_requests *);
50
+cl_error_t unzip_search_single(cli_ctx *, const char *, size_t, uint32_t *);
53 51
 
52
+// clang-format off
54 53
 #ifdef UNZIP_PRIVATE
55 54
 #define F_ENCR  (1<<0)
56 55
 #define F_ALGO1 (1<<1)
... ...
@@ -68,33 +67,36 @@ int unzip_search_single(cli_ctx *, const char *, size_t, uint32_t *);
68 68
 #define F_MSKED (1<<13)
69 69
 #define F_RSVD3 (1<<14)
70 70
 #define F_RSVD4 (1<<15)
71
+// clang-format on
71 72
 
72 73
 enum ALGO {
73
-  ALG_STORED,
74
-  ALG_SHRUNK,
75
-  ALG_REDUCE1,
76
-  ALG_REDUCE2,
77
-  ALG_REDUCE3,
78
-  ALG_REDUCE4,
79
-  ALG_IMPLODE,
80
-  ALG_TOKENZD,
81
-  ALG_DEFLATE,
82
-  ALG_DEFLATE64,
83
-  ALG_OLDTERSE,
84
-  ALG_RSVD1,
85
-  ALG_BZIP2,
86
-  ALG_RSVD2,
87
-  ALG_LZMA,
88
-  ALG_RSVD3,
89
-  ALG_RSVD4,
90
-  ALG_RSVD5,
91
-  ALG_NEWTERSE,
92
-  ALG_LZ77,
93
-  ALG_WAVPACK = 97,
94
-  ALG_PPMD
74
+    ALG_STORED,
75
+    ALG_SHRUNK,
76
+    ALG_REDUCE1,
77
+    ALG_REDUCE2,
78
+    ALG_REDUCE3,
79
+    ALG_REDUCE4,
80
+    ALG_IMPLODE,
81
+    ALG_TOKENZD,
82
+    ALG_DEFLATE,
83
+    ALG_DEFLATE64,
84
+    ALG_OLDTERSE,
85
+    ALG_RSVD1,
86
+    ALG_BZIP2,
87
+    ALG_RSVD2,
88
+    ALG_LZMA,
89
+    ALG_RSVD3,
90
+    ALG_RSVD4,
91
+    ALG_RSVD5,
92
+    ALG_NEWTERSE,
93
+    ALG_LZ77,
94
+    ALG_WAVPACK = 97,
95
+    ALG_PPMD
95 96
 };
96 97
 
97
-
98
+/*
99
+ * Local File Header format:
100
+ */
98 101
 /* struct LH { */
99 102
 /*   uint32_t magic; */
100 103
 /*   uint16_t version; */
... ...
@@ -110,18 +112,26 @@ enum ALGO {
110 110
 /*   char extra[elen] */
111 111
 /* } __attribute__((packed)); */
112 112
 
113
-#define LH_magic	((uint32_t)cli_readint32((uint8_t *)(lh)+0))
114
-#define LH_version	((uint16_t)cli_readint16((uint8_t *)(lh)+4))
115
-#define LH_flags	((uint16_t)cli_readint16((uint8_t *)(lh)+6))
116
-#define LH_method	((uint16_t)cli_readint16((uint8_t *)(lh)+8))
117
-#define LH_mtime	((uint32_t)cli_readint32((uint8_t *)(lh)+10))
118
-#define LH_crc32	((uint32_t)cli_readint32((uint8_t *)(lh)+14))
119
-#define LH_csize	((uint32_t)cli_readint32((uint8_t *)(lh)+18))
120
-#define LH_usize	((uint32_t)cli_readint32((uint8_t *)(lh)+22))
121
-#define LH_flen 	((uint16_t)cli_readint16((uint8_t *)(lh)+26))
122
-#define LH_elen 	((uint16_t)cli_readint16((uint8_t *)(lh)+28))
123
-#define SIZEOF_LH 30
113
+/*
114
+ * Local File Header convenience macros:
115
+ */
116
+// clang-format off
117
+#define LOCAL_HEADER_magic   ((uint32_t)cli_readint32((uint8_t *)(local_header)+0))
118
+#define LOCAL_HEADER_version ((uint16_t)cli_readint16((uint8_t *)(local_header)+4))
119
+#define LOCAL_HEADER_flags   ((uint16_t)cli_readint16((uint8_t *)(local_header)+6))
120
+#define LOCAL_HEADER_method  ((uint16_t)cli_readint16((uint8_t *)(local_header)+8))
121
+#define LOCAL_HEADER_mtime   ((uint32_t)cli_readint32((uint8_t *)(local_header)+10))
122
+#define LOCAL_HEADER_crc32   ((uint32_t)cli_readint32((uint8_t *)(local_header)+14))
123
+#define LOCAL_HEADER_csize   ((uint32_t)cli_readint32((uint8_t *)(local_header)+18))
124
+#define LOCAL_HEADER_usize   ((uint32_t)cli_readint32((uint8_t *)(local_header)+22))
125
+#define LOCAL_HEADER_flen    ((uint16_t)cli_readint16((uint8_t *)(local_header)+26))
126
+#define LOCAL_HEADER_elen    ((uint16_t)cli_readint16((uint8_t *)(local_header)+28))
127
+#define SIZEOF_LOCAL_HEADER 30
128
+// clang-format on
124 129
 
130
+/*
131
+ * Central Directory File Header format:
132
+ */
125 133
 /* struct CH { */
126 134
 /*   uint32_t magic; */
127 135
 /*   uint16_t vermade; */
... ...
@@ -144,25 +154,30 @@ enum ALGO {
144 144
 /*   char comment[clen] */
145 145
 /* } __attribute__((packed)); */
146 146
 
147
-#define CH_magic	((uint32_t)cli_readint32((uint8_t *)(ch)+0))
148
-#define CH_vermade	((uint16_t)cli_readint16((uint8_t *)(ch)+4))
149
-#define CH_verneed	((uint16_t)cli_readint16((uint8_t *)(ch)+6))
150
-#define CH_flags	((uint16_t)cli_readint16((uint8_t *)(ch)+8))
151
-#define CH_method	((uint16_t)cli_readint16((uint8_t *)(ch)+10))
152
-#define CH_mtime	((uint32_t)cli_readint32((uint8_t *)(ch)+12))
153
-#define CH_crc32	((uint32_t)cli_readint32((uint8_t *)(ch)+16))
154
-#define CH_csize	((uint32_t)cli_readint32((uint8_t *)(ch)+20))
155
-#define CH_usize	((uint32_t)cli_readint32((uint8_t *)(ch)+24))
156
-#define CH_flen 	((uint16_t)cli_readint16((uint8_t *)(ch)+28))
157
-#define CH_elen 	((uint16_t)cli_readint16((uint8_t *)(ch)+30))
158
-#define CH_clen 	((uint16_t)cli_readint16((uint8_t *)(ch)+32))
159
-#define CH_dsk  	((uint16_t)cli_readint16((uint8_t *)(ch)+34))
160
-#define CH_iattrib	((uint16_t)cli_readint16((uint8_t *)(ch)+36))
161
-#define CH_eattrib	((uint32_t)cli_readint32((uint8_t *)(ch)+38))
162
-#define CH_off  	((uint32_t)cli_readint32((uint8_t *)(ch)+42))
163
-#define SIZEOF_CH 46
164
-
165
-#define SIZEOF_EH 12
147
+/*
148
+ * Central Directory File Header convenience macro's:
149
+ */
150
+// clang-format off
151
+#define CENTRAL_HEADER_magic        ((uint32_t)cli_readint32((uint8_t *)(central_header)+0))
152
+#define CENTRAL_HEADER_vermade      ((uint16_t)cli_readint16((uint8_t *)(central_header)+4))
153
+#define CENTRAL_HEADER_verneed      ((uint16_t)cli_readint16((uint8_t *)(central_header)+6))
154
+#define CENTRAL_HEADER_flags        ((uint16_t)cli_readint16((uint8_t *)(central_header)+8))
155
+#define CENTRAL_HEADER_method       ((uint16_t)cli_readint16((uint8_t *)(central_header)+10))
156
+#define CENTRAL_HEADER_mtime        ((uint32_t)cli_readint32((uint8_t *)(central_header)+12))
157
+#define CENTRAL_HEADER_crc32        ((uint32_t)cli_readint32((uint8_t *)(central_header)+16))
158
+#define CENTRAL_HEADER_csize        ((uint32_t)cli_readint32((uint8_t *)(central_header)+20))
159
+#define CENTRAL_HEADER_usize        ((uint32_t)cli_readint32((uint8_t *)(central_header)+24))
160
+#define CENTRAL_HEADER_flen         ((uint16_t)cli_readint16((uint8_t *)(central_header)+28))
161
+#define CENTRAL_HEADER_extra_len    ((uint16_t)cli_readint16((uint8_t *)(central_header)+30))
162
+#define CENTRAL_HEADER_comment_len  ((uint16_t)cli_readint16((uint8_t *)(central_header)+32))
163
+#define CENTRAL_HEADER_disk_num     ((uint16_t)cli_readint16((uint8_t *)(central_header)+34))
164
+#define CENTRAL_HEADER_iattrib      ((uint16_t)cli_readint16((uint8_t *)(central_header)+36))
165
+#define CENTRAL_HEADER_eattrib      ((uint32_t)cli_readint32((uint8_t *)(central_header)+38))
166
+#define CENTRAL_HEADER_off          ((uint32_t)cli_readint32((uint8_t *)(central_header)+42))
167
+// clang-format on
168
+
169
+#define SIZEOF_CENTRAL_HEADER 46
170
+#define SIZEOF_ENCRYPTION_HEADER 12
166 171
 #endif /* UNZIP_PRIVATE */
167 172
 
168 173
 #endif /* __UNZIP_H */