Browse code

Tests: add tests for malformed and concatenated zips

Val Snyder authored on 2025/03/23 11:40:57
Showing 1 changed files
... ...
@@ -7,6 +7,7 @@ Run clamscan tests.
7 7
 import os
8 8
 from zipfile import ZIP_DEFLATED, ZipFile
9 9
 import sys
10
+import hashlib
10 11
 
11 12
 sys.path.append('../unit_tests')
12 13
 import testcase
... ...
@@ -274,6 +275,192 @@ class TC(testcase.TestCase):
274 274
         ]
275 275
         self.verify_output(output.out, expected=expected_results)
276 276
 
277
+    def test_zip_plus_zip(self):
278
+        self.step_name('Test that clam will the clam.zip and also another zip concatenated to the end.')
279
+
280
+        # Build a file that is the clam.zip archive with a zip concatenated on that contains the not_eicar test string file.
281
+        clam_zip = TC.path_build / 'unit_tests' / 'input' / 'clamav_hdb_scanfiles' / 'clam.zip'
282
+
283
+        not_eicar_zip = TC.path_tmp / 'not-eicar.zip'
284
+        with ZipFile(str(not_eicar_zip), 'w', ZIP_DEFLATED) as zf:
285
+            zf.writestr('not-eicar.txt', b"CLAMAV-TEST-STRING-NOT-EICAR")
286
+
287
+        testfile = TC.path_tmp / 'clam.zip.not_eicar.zipsfx'
288
+        testfile.write_bytes(clam_zip.read_bytes() + not_eicar_zip.read_bytes())
289
+
290
+        command = '{valgrind} {valgrind_args} {clamscan} -d {clam_exe_db} -d {not_eicar_db} --allmatch {testfiles}'.format(
291
+            valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan,
292
+            # We can't use the hash sig for this clam.exe program because the hash goes out the window when we concatenate on the zip.
293
+            clam_exe_db=TC.path_db / 'clam.ndb',
294
+            not_eicar_db=TC.path_source / 'unit_tests' / 'input' / 'other_sigs' / 'Clamav-Unit-Test-Signature.ndb',
295
+            testfiles=testfile,
296
+        )
297
+        output = self.execute_command(command)
298
+
299
+        assert output.ec == 1  # virus
300
+
301
+        expected_results = [
302
+            'Test.NDB.UNOFFICIAL FOUND',
303
+            'NDB.Clamav-Unit-Test-Signature.UNOFFICIAL FOUND',
304
+        ]
305
+        self.verify_output(output.out, expected=expected_results)
306
+
307
+    def test_zip_all_files(self):
308
+        self.step_name('Test that clam will extract all files from a zip.')
309
+
310
+        testfile = TC.path_tmp / 'multi-file.zip'
311
+        with ZipFile(str(testfile), 'w', ZIP_DEFLATED) as zf:
312
+            zf.writestr('file-0.txt', b"Test file 0")
313
+            zf.writestr('file-1.txt', b"Test file 1")
314
+            zf.writestr('file-2.txt', b"Test file 2")
315
+            zf.writestr('file-3.txt', b"Test file 3")
316
+
317
+        # Calculate sha256 and len for all files
318
+        sha256s = {}
319
+        with ZipFile(str(testfile), 'r') as zf:
320
+            for name in zf.namelist():
321
+                data = zf.read(name)
322
+                sha256s[name] = ( hashlib.sha256(data).hexdigest(), len(data) )
323
+
324
+        # Make sha256 signatures for all files
325
+        with open(TC.path_db / 'missing_entries.hsb', 'w') as f:
326
+            for name, data in sha256s.items():
327
+                f.write(f"{data[0]}:{data[1]}:{name}.NDB:73\n")
328
+
329
+        command = '{valgrind} {valgrind_args} {clamscan} -d {missing_entries_db} --allmatch {testfiles}'.format(
330
+            valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan,
331
+            # We can't use the hash sig for this clam.exe program because the hash goes out the window when we concatenate on the zip.
332
+            missing_entries_db=TC.path_db / 'missing_entries.hsb',
333
+            testfiles=testfile,
334
+        )
335
+        output = self.execute_command(command)
336
+
337
+        assert output.ec == 1  # virus
338
+
339
+        expected_results = [
340
+            'file-0.txt.NDB.UNOFFICIAL FOUND',
341
+            'file-1.txt.NDB.UNOFFICIAL FOUND',
342
+            'file-2.txt.NDB.UNOFFICIAL FOUND',
343
+            'file-3.txt.NDB.UNOFFICIAL FOUND',
344
+        ]
345
+        self.verify_output(output.out, expected=expected_results)
346
+
347
+    def test_zip_no_central_directory(self):
348
+        self.step_name('Test that clam will extract files from a zip with no central directory.')
349
+
350
+        testfile = TC.path_tmp / 'multi-file-no-central.zip'
351
+        with ZipFile(str(testfile), 'w', ZIP_DEFLATED) as zf:
352
+            zf.writestr('file-0.txt', b"Test file 0")
353
+            zf.writestr('file-1.txt', b"Test file 1")
354
+            zf.writestr('file-2.txt', b"Test file 2")
355
+            zf.writestr('file-3.txt', b"Test file 3")
356
+
357
+        # Calculate sha256 and len for all files
358
+        sha256s = {}
359
+        with ZipFile(str(testfile), 'r') as zf:
360
+            for name in zf.namelist():
361
+                data = zf.read(name)
362
+                sha256s[name] = ( hashlib.sha256(data).hexdigest(), len(data) )
363
+
364
+        # Make sha256 signatures for all files
365
+        with open(TC.path_db / 'missing_entries.hsb', 'w') as f:
366
+            for name, data in sha256s.items():
367
+                f.write(f"{data[0]}:{data[1]}:{name}.NDB:73\n")
368
+
369
+        # Remove the central directory
370
+        with open(str(testfile), 'r+b') as f:
371
+            # find the start of the central directory, which has a 4-byte signature 'PK\x05\x06'
372
+            while f.read(4) != b'PK\x01\x02':
373
+                pass
374
+            # rewind 4 bytes
375
+            f.seek(-4, os.SEEK_CUR)
376
+            # truncate the central directory
377
+            f.truncate()
378
+
379
+        command = '{valgrind} {valgrind_args} {clamscan} -d {missing_entries_db} --allmatch {testfiles}'.format(
380
+            valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan,
381
+            # We can't use the hash sig for this clam.exe program because the hash goes out the window when we concatenate on the zip.
382
+            missing_entries_db=TC.path_db / 'missing_entries.hsb',
383
+            testfiles=testfile,
384
+        )
385
+        output = self.execute_command(command)
386
+
387
+        assert output.ec == 1  # virus
388
+
389
+        expected_results = [
390
+            'file-0.txt.NDB.UNOFFICIAL FOUND',
391
+            'file-1.txt.NDB.UNOFFICIAL FOUND',
392
+            'file-2.txt.NDB.UNOFFICIAL FOUND',
393
+            'file-3.txt.NDB.UNOFFICIAL FOUND',
394
+        ]
395
+        self.verify_output(output.out, expected=expected_results)
396
+
397
+    def test_zip_missing_centrals(self):
398
+        self.step_name('Test that clam will detect files omitted from zip central directory.')
399
+
400
+        testfile = TC.path_tmp / 'multi-file-missing-centrals.zip'
401
+        with ZipFile(str(testfile), 'w', ZIP_DEFLATED) as zf:
402
+            zf.writestr('file-0.txt', b"Test file 0")
403
+            zf.writestr('file-1.txt', b"Test file 1")
404
+            zf.writestr('file-2.txt', b"Test file 2")
405
+            zf.writestr('file-3.txt', b"Test file 3")
406
+
407
+        # Calculate sha256 and len for all files
408
+        sha256s = {}
409
+        with ZipFile(str(testfile), 'r') as zf:
410
+            for name in zf.namelist():
411
+                data = zf.read(name)
412
+                sha256s[name] = ( hashlib.sha256(data).hexdigest(), len(data) )
413
+
414
+        # Make sha256 signatures for all files
415
+        with open(TC.path_db / 'missing_entries.hsb', 'w') as f:
416
+            for name, data in sha256s.items():
417
+                f.write(f"{data[0]}:{data[1]}:{name}.NDB:73\n")
418
+
419
+        # Remove the central directory entries for file-2.txt and file-4.txt
420
+        with open(str(testfile), 'r+b') as f:
421
+            # find the first central directory record. Each will have a 4-byte signature 'PK\x01\x02'
422
+            while f.read(4) != b'PK\x01\x02':
423
+                # rewind 3 bytes, because it might not be aligned
424
+                f.seek(-3, os.SEEK_CUR)
425
+
426
+            # get the offset
427
+            central_dir_offset = f.tell()
428
+
429
+            # read the central directory
430
+            central_dir = f.read()
431
+
432
+            # truncate the central directory
433
+            f.truncate(central_dir_offset)
434
+
435
+            # seek to the end of the file
436
+            f.seek(0, os.SEEK_END)
437
+
438
+            # write just the central directory entries for file-1.txt and file-3.txt
439
+            split_central_dir = central_dir.split(b'PK\x01\x02')
440
+            #f.write(split_central_dir[0])
441
+            f.write(split_central_dir[1])
442
+            #f.write(split_central_dir[2])
443
+            f.write(split_central_dir[3]) # note the last one also has the end of central directory record. That's fine.
444
+
445
+        command = '{valgrind} {valgrind_args} {clamscan} -d {missing_entries_db} --allmatch {testfiles}'.format(
446
+            valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan,
447
+            # We can't use the hash sig for this clam.exe program because the hash goes out the window when we concatenate on the zip.
448
+            missing_entries_db=TC.path_db / 'missing_entries.hsb',
449
+            testfiles=testfile,
450
+        )
451
+        output = self.execute_command(command)
452
+
453
+        assert output.ec == 1  # virus
454
+
455
+        expected_results = [
456
+            'file-0.txt.NDB.UNOFFICIAL FOUND',
457
+            'file-1.txt.NDB.UNOFFICIAL FOUND',
458
+            'file-2.txt.NDB.UNOFFICIAL FOUND',
459
+            'file-3.txt.NDB.UNOFFICIAL FOUND',
460
+        ]
461
+        self.verify_output(output.out, expected=expected_results)
462
+
277 463
     def test_pe_allmatch(self):
278 464
         self.step_name('Test that clam will detect a string in test.exe with a wide variety of signatures written or generated for the file.')
279 465