Browse code

ooxml_hwp: add support for filetyping and preclassification

Kevin Lin authored on 2015/12/18 06:16:55
Showing 5 changed files
... ...
@@ -125,6 +125,7 @@ static const struct ftmap_s {
125 125
     { "CL_TYPE_HWP3",		CL_TYPE_HWP3		},
126 126
     { "CL_TYPE_XML_HWP",	CL_TYPE_XML_HWP		},
127 127
     { "CL_TYPE_HWPOLE2",	CL_TYPE_HWPOLE2		},
128
+    { "CL_TYPE_OOXML_HWP",	CL_TYPE_OOXML_HWP	},
128 129
     { NULL,			CL_TYPE_IGNORED		}
129 130
 };
130 131
 
... ...
@@ -223,6 +224,9 @@ int is_tar(const unsigned char *buf, unsigned int nbytes);
223 223
 #define OOXML_CONTENTTYPES "[ContentTypes].xml"
224 224
 #define OOXML_CONTENTTYPES_LEN (sizeof(OOXML_CONTENTTYPES)-1)
225 225
 
226
+#define OOXML_HWP_CONTENTS "Contents/content.hpf"
227
+#define OOXML_HWP_CONTENTS_LEN (sizeof(OOXML_HWP_CONTENTS)-1)
228
+
226 229
 cli_file_t cli_filetype2(fmap_t *map, const struct cl_engine *engine, cli_file_t basetype)
227 230
 {
228 231
 	unsigned char buffer[MAGIC_BUFFER_SIZE];
... ...
@@ -328,20 +332,29 @@ cli_file_t cli_filetype2(fmap_t *map, const struct cl_engine *engine, cli_file_t
328 328
                         if (zlen >= OOXML_DOCPROPS_DIR_LEN) {
329 329
                             if (0 == memcmp(znamep, OOXML_DOCPROPS_DIR, OOXML_DOCPROPS_DIR_LEN)) {
330 330
                                 likely_ooxml = 1;
331
-                            } else { 
332
-                                if  (zlen >= OOXML_CONTENTTYPES_LEN) {
333
-                                    if (0 == memcmp(znamep, OOXML_CONTENTTYPES, OOXML_CONTENTTYPES_LEN)) {
334
-                                        likely_ooxml = 1;
335
-                                    }
336
-                                } else {
337
-                                    znamep = NULL;
338
-                                    break;
339
-                                }
340 331
                             }
341 332
                         } else {
342 333
                             znamep = NULL;
343 334
                             break;
344 335
                         }
336
+			if  (zlen >= OOXML_CONTENTTYPES_LEN) {
337
+			    if (0 == memcmp(znamep, OOXML_CONTENTTYPES, OOXML_CONTENTTYPES_LEN)) {
338
+				likely_ooxml = 1;
339
+			    }
340
+			} else {
341
+			    znamep = NULL;
342
+			    break;
343
+			}
344
+			if (zlen >= OOXML_HWP_CONTENTS_LEN) {
345
+                            if (0 == memcmp(znamep, OOXML_HWP_CONTENTS, OOXML_HWP_CONTENTS_LEN)) {
346
+                                cli_dbgmsg("Recognized OOXML HWP file\n");
347
+                                return CL_TYPE_OOXML_HWP;
348
+                            }
349
+                        } else {
350
+                            znamep = NULL;
351
+                            break;
352
+                        }
353
+
345 354
 
346 355
                         if (++lhc > 2) {
347 356
                             /* only check first three zip headers unless likely ooxml */
... ...
@@ -86,6 +86,7 @@ typedef enum {
86 86
     CL_TYPE_OOXML_XL,
87 87
     CL_TYPE_INTERNAL,
88 88
     CL_TYPE_HWP3,
89
+    CL_TYPE_OOXML_HWP,
89 90
 
90 91
     /* Section for partition types */
91 92
     CL_TYPE_PART_ANY, /* unknown partition type */
... ...
@@ -25,6 +25,7 @@
25 25
 
26 26
 #include "clamav.h"
27 27
 #include "cltypes.h"
28
+#include "filetypes.h"
28 29
 #include "others.h"
29 30
 #include "unzip.h"
30 31
 #if HAVE_JSON
... ...
@@ -47,6 +48,7 @@
47 47
 
48 48
 #if HAVE_LIBXML2 && HAVE_JSON
49 49
 
50
+/*** OOXML MSDOC ***/
50 51
 static const struct key_entry ooxml_keys[] = {
51 52
     { "coreproperties",     "CoreProperties",     MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
52 53
     { "title",              "Title",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
... ...
@@ -341,6 +343,48 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
341 341
     xmlFreeTextReader(reader);
342 342
     return ret;
343 343
 }
344
+
345
+/*** OOXML HWP ***/
346
+static const struct key_entry ooxml_hwp_keys[] = {
347
+    { "hcfversion",         "HCFVersion",         MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
348
+
349
+    { "package",            "Properties",         MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
350
+    { "metadata",           "Metadata",           MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
351
+    { "title",              "Title",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
352
+    { "language",           "Language",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
353
+    { "meta",               "MetaFields",         MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB | MSXML_JSON_COUNT | MSXML_JSON_MULTI },
354
+    { "item",               "Contents",           MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB | MSXML_JSON_COUNT | MSXML_JSON_MULTI }
355
+};
356
+static size_t num_ooxml_hwp_keys = sizeof(ooxml_hwp_keys) / sizeof(struct key_entry);
357
+
358
+static int ooxml_hwp_cb(int fd, cli_ctx *ctx)
359
+{
360
+    int ret = CL_SUCCESS;
361
+    xmlTextReaderPtr reader = NULL;
362
+
363
+    cli_dbgmsg("in ooxml_hwp_cb\n");
364
+
365
+    /* perform engine limit checks in temporary tracking session */
366
+    ret = ooxml_updatelimits(fd, ctx);
367
+    if (ret != CL_CLEAN)
368
+        return ret;
369
+
370
+    reader = xmlReaderForFd(fd, "ooxml_hwp.xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS);
371
+    if (reader == NULL) {
372
+        cli_dbgmsg("ooxml_hwp_cb: xmlReaderForFd error\n");
373
+        return CL_SUCCESS; // internal error from libxml2
374
+    }
375
+
376
+    ret = cli_msxml_parse_document(ctx, reader, ooxml_hwp_keys, num_ooxml_hwp_keys, 1, NULL);
377
+
378
+    if (ret != CL_SUCCESS && ret != CL_ETIMEOUT && ret != CL_BREAK)
379
+        cli_warnmsg("ooxml_hwp_cb: encountered issue in parsing properties document\n");
380
+
381
+    xmlTextReaderClose(reader);
382
+    xmlFreeTextReader(reader);
383
+    return ret;
384
+}
385
+
344 386
 #endif /* HAVE_LIBXML2 && HAVE_JSON */
345 387
 
346 388
 int cli_ooxml_filetype(cli_ctx *ctx, fmap_t *map)
... ...
@@ -376,41 +420,71 @@ int cli_ooxml_filetype(cli_ctx *ctx, fmap_t *map)
376 376
     return CL_SUCCESS;
377 377
 }
378 378
 
379
-int cli_process_ooxml(cli_ctx *ctx)
379
+int cli_process_ooxml(cli_ctx *ctx, int type)
380 380
 {
381 381
 #if HAVE_LIBXML2 && HAVE_JSON
382 382
     uint32_t loff = 0;
383
-    int tmp = CL_SUCCESS;
383
+    int ret = CL_SUCCESS;
384 384
 
385 385
     cli_dbgmsg("in cli_process_ooxml\n");
386 386
     if (!ctx) {
387 387
         return CL_ENULLARG;
388 388
     }
389 389
 
390
-    /* find "[Content Types].xml" */
391
-    tmp = unzip_search_single(ctx, "[Content_Types].xml", 18, &loff);
392
-    if (tmp == CL_ETIMEOUT) {
393
-        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
394
-        return CL_ETIMEOUT;
395
-    }
396
-    else if (tmp != CL_VIRUS) {
397
-        cli_dbgmsg("cli_process_ooxml: failed to find ""[Content_Types].xml""!\n");
398
-        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_CONTENT_TYPES");
399
-        return CL_EFORMAT;
390
+    if (type == CL_TYPE_OOXML_HWP) {
391
+        /* two files: version.xml and Contents/content.hpf */
392
+        ret = unzip_search_single(ctx, "version.xml", 11, &loff);
393
+        if (ret == CL_ETIMEOUT) {
394
+            cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
395
+            return CL_ETIMEOUT;
396
+        }
397
+        else if (ret != CL_VIRUS) {
398
+            cli_dbgmsg("cli_process_ooxml: failed to find ""version.xml""!\n");
399
+            cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_HWP_VERSION");
400
+            return CL_EFORMAT;
401
+        }
402
+        ret = unzip_single_internal(ctx, loff, ooxml_hwp_cb);
403
+
404
+        if (ret == CL_SUCCESS) {
405
+            ret = unzip_search_single(ctx, "Contents/content.hpf", 20, &loff);
406
+            if (ret == CL_ETIMEOUT) {
407
+                cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
408
+                return CL_ETIMEOUT;
409
+            }
410
+            else if (ret != CL_VIRUS) {
411
+                cli_dbgmsg("cli_process_ooxml: failed to find ""Contents/content.hpf""!\n");
412
+                cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_HWP_CONTENT");
413
+                return CL_EFORMAT;
414
+            }
415
+            ret = unzip_single_internal(ctx, loff, ooxml_hwp_cb);
416
+        }
417
+    } else {
418
+        /* find "[Content Types].xml" */
419
+        ret = unzip_search_single(ctx, "[Content_Types].xml", 19, &loff);
420
+        if (ret == CL_ETIMEOUT) {
421
+            cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
422
+            return CL_ETIMEOUT;
423
+        }
424
+        else if (ret != CL_VIRUS) {
425
+            cli_dbgmsg("cli_process_ooxml: failed to find ""[Content_Types].xml""!\n");
426
+            cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_CONTENT_TYPES");
427
+            return CL_EFORMAT;
428
+        }
429
+        cli_dbgmsg("cli_process_ooxml: found ""[Content_Types].xml"" @ %x\n", loff);
430
+
431
+        ret = unzip_single_internal(ctx, loff, ooxml_content_cb);
400 432
     }
401
-    cli_dbgmsg("cli_process_ooxml: found ""[Content_Types].xml"" @ %x\n", loff);
402 433
 
403
-    tmp = unzip_single_internal(ctx, loff, ooxml_content_cb);
404
-    if (tmp == CL_ETIMEOUT)
434
+    if (ret == CL_ETIMEOUT)
405 435
         cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
406
-    else if (tmp == CL_EMEM)
436
+    else if (ret == CL_EMEM)
407 437
         cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_OUTOFMEM");
408
-    else if (tmp == CL_EMAXSIZE)
438
+    else if (ret == CL_EMAXSIZE)
409 439
         cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXSIZE");
410
-    else if (tmp == CL_EMAXFILES)
440
+    else if (ret == CL_EMAXFILES)
411 441
         cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXFILES");
412 442
 
413
-    return tmp;
443
+    return ret;
414 444
 #else
415 445
     UNUSEDPARAM(ctx);
416 446
     cli_dbgmsg("in cli_process_ooxml\n");
... ...
@@ -26,7 +26,8 @@
26 26
 #endif
27 27
 
28 28
 #include "others.h"
29
+
29 30
 int cli_ooxml_filetype(cli_ctx *, fmap_t *);
30
-int cli_process_ooxml(cli_ctx *);
31
+int cli_process_ooxml(cli_ctx *, int);
31 32
 
32 33
 #endif
... ...
@@ -2686,7 +2686,8 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
2686 2686
                 type == CL_TYPE_XML_XL ||
2687 2687
                 type == CL_TYPE_HWP3 ||
2688 2688
                 type == CL_TYPE_XML_HWP ||
2689
-                type == CL_TYPE_HWPOLE2) {
2689
+                type == CL_TYPE_HWPOLE2 ||
2690
+		type == CL_TYPE_OOXML_HWP) {
2690 2691
                 ctx->properties = json_object_new_object();
2691 2692
                 if (NULL == ctx->properties) {
2692 2693
                     cli_errmsg("magic_scandesc: no memory for json properties object\n");
... ...
@@ -2890,22 +2891,24 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
2890 2890
 	    }
2891 2891
 	    break;
2892 2892
 
2893
-        case CL_TYPE_OOXML_WORD:
2894
-        case CL_TYPE_OOXML_PPT:
2895
-        case CL_TYPE_OOXML_XL:
2893
+	case CL_TYPE_OOXML_WORD:
2894
+	case CL_TYPE_OOXML_PPT:
2895
+	case CL_TYPE_OOXML_XL:
2896
+	case CL_TYPE_OOXML_HWP:
2896 2897
 #if HAVE_JSON
2897
-            if ((ctx->options & CL_SCAN_FILE_PROPERTIES) && (ctx->wrkproperty != NULL)) {
2898
-                ret = cli_process_ooxml(ctx);
2899
-                if (ret == CL_EMEM || ret == CL_ENULLARG) {
2900
-                    /* critical error */
2901
-                    break;
2902
-                }
2903
-                else if (ret != CL_SUCCESS) {
2904
-                    /* allow for the CL_TYPE_ZIP scan to occur; cli_process_ooxml other possible returns: */
2905
-                    /* CL_ETIMEOUT, CL_EMAXSIZE, CL_EMAXFILES, CL_EPARSE, CL_EFORMAT, CL_BREAK, CL_ESTAT  */
2906
-                    ret = CL_SUCCESS;
2907
-                }
2908
-            }
2898
+	    if ((ctx->options & CL_SCAN_FILE_PROPERTIES) && (ctx->wrkproperty != NULL)) {
2899
+		ret = cli_process_ooxml(ctx, type);
2900
+
2901
+		if (ret == CL_EMEM || ret == CL_ENULLARG) {
2902
+		    /* critical error */
2903
+		    break;
2904
+		}
2905
+		else if (ret != CL_SUCCESS) {
2906
+		    /* allow for the CL_TYPE_ZIP scan to occur; cli_process_ooxml other possible returns: */
2907
+		    /* CL_ETIMEOUT, CL_EMAXSIZE, CL_EMAXFILES, CL_EPARSE, CL_EFORMAT, CL_BREAK, CL_ESTAT  */
2908
+		    ret = CL_SUCCESS;
2909
+		}
2910
+	    }
2909 2911
 #endif
2910 2912
 	case CL_TYPE_ZIP:
2911 2913
 	    ctx->container_type = CL_TYPE_ZIP;