Browse code

bb#11145 - added function to determine ooxml filetype unzip: adjusted dir/file searching mechanism

Kevin Lin authored on 2014/10/15 06:15:18
Showing 5 changed files
... ...
@@ -267,7 +267,8 @@ cli_file_t cli_filetype2(fmap_t *map, const struct cl_engine *engine, cli_file_t
267 267
             const unsigned char * znamep = buff;
268 268
             int32_t zlen = bread;
269 269
             int lhc = 0;
270
-            int zi;
270
+            int zi, likely_ooxml = 0;
271
+            cli_file_t ret2;
271 272
             
272 273
             for (zi=0; zi<32; zi++) {
273 274
                 znamep = (const unsigned char *)cli_memstr((const char *)znamep, zlen, lhdr_magic, 4);
... ...
@@ -284,9 +285,34 @@ cli_file_t cli_filetype2(fmap_t *map, const struct cl_engine *engine, cli_file_t
284 284
                         } else if (0 == memcmp(znamep, "word/", 5)) {
285 285
                             cli_dbgmsg("Recognized OOXML Word file\n");
286 286
                             return CL_TYPE_OOXML_WORD;
287
+                        } else if (0 == memcmp(znamep, "docProps/", 5)) {
288
+                            likely_ooxml = 1;
289
+                        }
290
+
291
+                        if (++lhc > 2) {
292
+                            /* only check first three zip headers unless likely ooxml */
293
+                            if (likely_ooxml) {
294
+                                cli_dbgmsg("Likely OOXML, checking additional zip headers\n");
295
+                                if ((ret2 = cli_ooxml_filetype(NULL, map)) != CL_SUCCESS) {
296
+                                    /* either an error or retyping has occurred, return error or just CL_TYPE_ZIP? */
297
+                                    switch (ret2) {
298
+                                    case CL_TYPE_OOXML_XL:
299
+                                        cli_dbgmsg("Recognized OOXML XL file\n");
300
+                                        break;
301
+                                    case CL_TYPE_OOXML_PPT:
302
+                                        cli_dbgmsg("Recognized OOXML PPT file\n");
303
+                                        break;
304
+                                    case CL_TYPE_OOXML_WORD:
305
+                                        cli_dbgmsg("Recognized OOXML WORD file\n");
306
+                                        break;
307
+                                    default:
308
+                                        cli_dbgmsg("unexpected ooxml_filetype return: %i\n", ret2);
309
+                                    }
310
+                                    return ret2;
311
+                                }
312
+                            }
313
+                            break;
287 314
                         }
288
-                        if (++lhc > 2)
289
-                            break; /* only check first three zip headers */
290 315
                     }
291 316
                     else {
292 317
                         znamep = NULL; /* force to map more */
... ...
@@ -482,7 +482,7 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
482 482
         if (!xmlStrcmp(CT, (const xmlChar *)"application/vnd.openxmlformats-package.core-properties+xml")) {
483 483
             if (!core) {
484 484
                 /* default: /docProps/core.xml*/
485
-                tmp = unzip_search(ctx, (const char *)(PN+1), xmlStrlen(PN)-1, &loff);
485
+                tmp = unzip_search_single(ctx, (const char *)(PN+1), xmlStrlen(PN)-1, &loff);
486 486
                 if (tmp == CL_ETIMEOUT) {
487 487
                     ret = tmp;
488 488
                 }
... ...
@@ -500,7 +500,7 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
500 500
         else if (!xmlStrcmp(CT, (const xmlChar *)"application/vnd.openxmlformats-officedocument.extended-properties+xml")) {
501 501
             if (!extn) {
502 502
                 /* default: /docProps/app.xml */
503
-                tmp = unzip_search(ctx, (const char *)(PN+1), xmlStrlen(PN)-1, &loff);
503
+                tmp = unzip_search_single(ctx, (const char *)(PN+1), xmlStrlen(PN)-1, &loff);
504 504
                 if (tmp == CL_ETIMEOUT) {
505 505
                     ret = tmp;
506 506
                 }
... ...
@@ -518,7 +518,7 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
518 518
         else if (!xmlStrcmp(CT, (const xmlChar *)"application/vnd.openxmlformats-officedocument.custom-properties+xml")) {
519 519
             if (!cust) {
520 520
                 /* default: /docProps/custom.xml */
521
-                tmp = unzip_search(ctx, (const char *)(PN+1), xmlStrlen(PN)-1, &loff);
521
+                tmp = unzip_search_single(ctx, (const char *)(PN+1), xmlStrlen(PN)-1, &loff);
522 522
                 if (tmp == CL_ETIMEOUT) {
523 523
                     ret = tmp;
524 524
                 }
... ...
@@ -573,6 +573,39 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
573 573
 }
574 574
 #endif /* HAVE_LIBXML2 && HAVE_JSON */
575 575
 
576
+int cli_ooxml_filetype(cli_ctx *ctx, fmap_t *map)
577
+{
578
+    struct zip_requests requests;
579
+    int ret;
580
+
581
+    memset(&requests, 0, sizeof(struct zip_requests));
582
+
583
+    if ((ret = unzip_search_add(&requests, "xl/", 3)) != CL_SUCCESS) {
584
+        return CL_SUCCESS;
585
+    }
586
+    if ((ret = unzip_search_add(&requests, "ppt/", 4)) != CL_SUCCESS) {
587
+        return CL_SUCCESS;
588
+    }
589
+    if ((ret = unzip_search_add(&requests, "word/", 5)) != CL_SUCCESS) {
590
+        return CL_SUCCESS;
591
+    }
592
+
593
+    if ((ret = unzip_search(ctx, map, &requests)) == CL_VIRUS) {
594
+        switch (requests.found) {
595
+        case 0:
596
+            return CL_TYPE_OOXML_XL;
597
+        case 1:
598
+            return CL_TYPE_OOXML_PPT;
599
+        case 2:
600
+            return CL_TYPE_OOXML_WORD;
601
+        default:
602
+            return CL_SUCCESS;
603
+        }
604
+    }
605
+
606
+    return CL_SUCCESS;
607
+}
608
+
576 609
 int cli_process_ooxml(cli_ctx *ctx)
577 610
 {
578 611
 #if HAVE_LIBXML2 && HAVE_JSON
... ...
@@ -585,7 +618,7 @@ int cli_process_ooxml(cli_ctx *ctx)
585 585
     }
586 586
 
587 587
     /* find "[Content Types].xml" */
588
-    tmp = unzip_search(ctx, "[Content_Types].xml", 18, &loff);
588
+    tmp = unzip_search_single(ctx, "[Content_Types].xml", 18, &loff);
589 589
     if (tmp == CL_ETIMEOUT) {
590 590
         return CL_ETIMEOUT;
591 591
     }
... ...
@@ -26,6 +26,7 @@
26 26
 #endif
27 27
 
28 28
 #include "others.h"
29
+int cli_ooxml_filetype(cli_ctx *, fmap_t *);
29 30
 int cli_process_ooxml(cli_ctx *);
30 31
 
31 32
 #endif
... ...
@@ -53,14 +53,6 @@
53 53
 #define UNZIP_PRIVATE
54 54
 #include "unzip.h"
55 55
 
56
-typedef struct zip_request {
57
-    const char *name;
58
-    size_t namelen;
59
-    uint32_t loff;
60
-
61
-    int found;
62
-} zip_request_t;
63
-
64 56
 static int wrap_inflateinit2(void *a, int b) {
65 57
   return inflateInit2(a, b);
66 58
 }
... ...
@@ -427,7 +419,7 @@ static unsigned int lhdr(fmap_t *map, uint32_t loff,uint32_t zsize, unsigned int
427 427
   return zip-lh;
428 428
 }
429 429
 
430
-static unsigned int chdr(fmap_t *map, uint32_t coff, uint32_t zsize, unsigned int *fu, unsigned int fc, int *ret, cli_ctx *ctx, char *tmpd, zip_request_t *request) {
430
+static unsigned int chdr(fmap_t *map, uint32_t coff, uint32_t zsize, unsigned int *fu, unsigned int fc, int *ret, cli_ctx *ctx, char *tmpd, struct zip_requests *requests) {
431 431
   char name[256];
432 432
   int last = 0;
433 433
   const uint8_t *ch;
... ...
@@ -447,7 +439,7 @@ static unsigned int chdr(fmap_t *map, uint32_t coff, uint32_t zsize, unsigned in
447 447
   }
448 448
 
449 449
   name[0]='\0';
450
-  if((cli_debug_flag && !last) || request) {
450
+  if((cli_debug_flag && !last) || requests) {
451 451
       unsigned int size = (CH_flen>=sizeof(name))?sizeof(name)-1:CH_flen;
452 452
       const char *src = fmap_need_off_once(map, coff, size);
453 453
       if(src) {
... ...
@@ -470,16 +462,26 @@ static unsigned int chdr(fmap_t *map, uint32_t coff, uint32_t zsize, unsigned in
470 470
   }
471 471
   coff+=CH_clen;
472 472
 
473
-  if (!request) {
473
+  if (!requests) {
474 474
       if(CH_off<zsize-SIZEOF_LH) {
475 475
           lhdr(map, CH_off, zsize-CH_off, fu, fc, ch, ret, ctx, tmpd, 1, zip_scan_cb);
476 476
       } else cli_dbgmsg("cli_unzip: ch - local hdr out of file\n");
477 477
   }
478 478
   else {
479
-      size_t len = MIN(sizeof(name)-1, request->namelen);
480
-      if (!last && !strncmp(request->name, name, len)) {
481
-          request->found = 1;
482
-          request->loff = CH_off;
479
+      int i;
480
+      size_t len;
481
+
482
+      if (!last) {
483
+          for (i = 0; i < requests->namecnt; ++i) {
484
+              cli_dbgmsg("checking for %i: %s\n", i, requests->names[i]);
485
+
486
+              len = MIN(sizeof(name)-1, requests->namelens[i]);      
487
+              if (!strncmp(requests->names[i], name, len)) {
488
+                  requests->match = 1;
489
+                  requests->found = i;
490
+                  requests->loff = CH_off;
491
+              }
492
+          }
483 493
       }
484 494
   }
485 495
 
... ...
@@ -603,27 +605,46 @@ int cli_unzip_single(cli_ctx *ctx, off_t lhoffl) {
603 603
     return unzip_single_internal(ctx, lhoffl, zip_scan_cb);
604 604
 }
605 605
 
606
-int unzip_search(cli_ctx *ctx, const char *name, size_t nlen, uint32_t *loff)
606
+int unzip_search_add(struct zip_requests *requests, const char *name, size_t nlen)
607
+{
608
+    cli_dbgmsg("in unzip_search_add\n");
609
+
610
+    if (requests->namecnt >= MAX_ZIP_REQUESTS) {
611
+        cli_dbgmsg("DEBUGGING MESSAGE GOES HERE!\n");
612
+        return CL_BREAK;
613
+    }
614
+
615
+    cli_dbgmsg("unzip_search_add: adding %s (len %llu)\n", name, (long long unsigned)nlen);
616
+
617
+    requests->names[requests->namecnt] = name;
618
+    requests->namelens[requests->namecnt] = nlen;
619
+    requests->namecnt++;
620
+
621
+    return CL_SUCCESS;
622
+}
623
+
624
+int unzip_search(cli_ctx *ctx, fmap_t *map, struct zip_requests *requests)
607 625
 {
608 626
     unsigned int fc = 0;
609
-    fmap_t *map;
627
+    fmap_t *zmap = map;
610 628
     size_t fsize;
611 629
     uint32_t coff = 0;
612 630
     const char *ptr;
613
-    zip_request_t request; 
614 631
     int ret = CL_CLEAN;
615 632
 #if HAVE_JSON
616 633
     uint32_t toval = 0;
617 634
 #endif
618
-
619 635
     cli_dbgmsg("in unzip_search\n");
620
-    if (!ctx) {
636
+
637
+    if ((!ctx && !map) || !requests) {
621 638
         return CL_ENULLARG;
622 639
     }
623 640
 
624
-    map = *ctx->fmap;
625
-    fsize = map->len;
626
-    if(sizeof(off_t)!=sizeof(uint32_t) && fsize!=map->len) {
641
+    /* get priority to given map over *ctx->fmap */
642
+    if (ctx && !map)
643
+        zmap = *ctx->fmap;
644
+    fsize = zmap->len;
645
+    if(sizeof(off_t)!=sizeof(uint32_t) && fsize!=zmap->len) {
627 646
         cli_dbgmsg("unzip_search: file too big\n");
628 647
         return CL_CLEAN;
629 648
     }
... ...
@@ -633,7 +654,7 @@ int unzip_search(cli_ctx *ctx, const char *name, size_t nlen, uint32_t *loff)
633 633
     }
634 634
 
635 635
     for(coff=fsize-22 ; coff>0 ; coff--) { /* sizeof(EOC)==22 */
636
-        if(!(ptr = fmap_need_off_once(map, coff, 20)))
636
+        if(!(ptr = fmap_need_off_once(zmap, coff, 20)))
637 637
             continue;
638 638
         if(cli_readint32(ptr)==0x06054b50) {
639 639
             uint32_t chptr = cli_readint32(&ptr[16]);
... ...
@@ -643,25 +664,20 @@ int unzip_search(cli_ctx *ctx, const char *name, size_t nlen, uint32_t *loff)
643 643
         }
644 644
     }
645 645
 
646
-    request.name = name;
647
-    request.namelen = nlen;
648
-    request.found = 0;
649
-
650 646
     if(coff) {
651 647
         cli_dbgmsg("unzip_search: central @%x\n", coff);
652
-        while(ret==CL_CLEAN && (coff=chdr(map, coff, fsize, NULL, fc+1, &ret, ctx, NULL, &request))) {
653
-            if (request.found) {
654
-                *loff = request.loff;
648
+        while(ret==CL_CLEAN && (coff=chdr(zmap, coff, fsize, NULL, fc+1, &ret, ctx, NULL, requests))) {
649
+            if (requests->match) {
655 650
                 return CL_VIRUS;
656 651
             }
657 652
 
658 653
             fc++;
659
-            if (ctx->engine->maxfiles && fc >= ctx->engine->maxfiles) {
654
+            if (ctx && ctx->engine->maxfiles && fc >= ctx->engine->maxfiles) {
660 655
                 cli_dbgmsg("cli_unzip: Files limit reached (max: %u)\n", ctx->engine->maxfiles);
661 656
                 ret=CL_EMAXFILES;
662 657
             }
663 658
 #if HAVE_JSON
664
-            if (cli_json_timeout_cycle_check(ctx, (int *)(&toval)) != CL_SUCCESS) {
659
+            if (ctx && cli_json_timeout_cycle_check(ctx, (int *)(&toval)) != CL_SUCCESS) {
665 660
                 return CL_ETIMEOUT;
666 661
             }
667 662
 #endif
... ...
@@ -673,3 +689,26 @@ int unzip_search(cli_ctx *ctx, const char *name, size_t nlen, uint32_t *loff)
673 673
     return ret;
674 674
 }
675 675
 
676
+int unzip_search_single(cli_ctx *ctx, const char *name, size_t nlen, uint32_t *loff)
677
+{
678
+    struct zip_requests requests;
679
+    int ret;
680
+
681
+    cli_dbgmsg("in unzip_search_single\n");
682
+    if (!ctx) {
683
+        return CL_ENULLARG;
684
+    }
685
+
686
+    memset(&requests, 0, sizeof(struct zip_requests));
687
+
688
+    if ((ret = unzip_search_add(&requests, name, nlen)) != CL_SUCCESS) {
689
+        return ret;
690
+    }
691
+
692
+    if ((ret = unzip_search(ctx, NULL, &requests)) == CL_VIRUS) {
693
+        *loff = requests.loff;
694
+    }
695
+
696
+    return ret;
697
+}
698
+
... ...
@@ -25,15 +25,30 @@
25 25
 #include "clamav-config.h"
26 26
 #endif
27 27
 
28
+#include "others.h"
29
+
28 30
 typedef int (*zip_cb)(int fd, cli_ctx *ctx);
29 31
 #define zip_scan_cb cli_magic_scandesc
30 32
 
31
-#include "others.h"
33
+#define MAX_ZIP_REQUESTS 10
34
+struct zip_requests {
35
+    const char *names[MAX_ZIP_REQUESTS];
36
+    size_t namelens[MAX_ZIP_REQUESTS];
37
+    int namecnt;
38
+
39
+    uint32_t loff;
40
+    int found, match;
41
+};
42
+
32 43
 int cli_unzip(cli_ctx *);
33 44
 int cli_unzip_single_internal(cli_ctx *, off_t, zip_cb);
34
-int unzip_single_internal(cli_ctx *ctx, off_t lhoffl, zip_cb zcb);
45
+int unzip_single_internal(cli_ctx *, off_t, zip_cb);
35 46
 int cli_unzip_single(cli_ctx *, off_t);
36
-int unzip_search(cli_ctx *, const char *, size_t, uint32_t *);
47
+
48
+int unzip_search_add(struct zip_requests *, const char *, size_t);
49
+int unzip_search(cli_ctx *, fmap_t *, struct zip_requests *);
50
+int unzip_search_single(cli_ctx *, const char *, size_t, uint32_t *);
51
+
37 52
 
38 53
 #ifdef UNZIP_PRIVATE
39 54
 #define F_ENCR  (1<<0)