/* * OOXML JSON Internals * * Copyright (C) 2014-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * * Authors: Kevin Lin * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License version 2 as published by the * Free Software Foundation. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 51 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #if HAVE_CONFIG_H #include "clamav-config.h" #endif #include "clamav.h" #include "filetypes.h" #include "others.h" #include "unzip.h" #if HAVE_JSON #include "json.h" #endif #include "json_api.h" #include "msxml_parser.h" #include "ooxml.h" #if HAVE_LIBXML2 #include #endif #if HAVE_LIBXML2 && HAVE_JSON /*** OOXML MSDOC ***/ static const struct key_entry ooxml_keys[] = { { "coreproperties", "CoreProperties", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB }, { "title", "Title", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "subject", "Subject", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "creator", "Author", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "keywords", "Keywords", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "comments", "Comments", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "description", "Description", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "lastmodifiedby", "LastAuthor", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "revision", "Revision", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "created", "Created", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "modified", "Modified", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "category", "Category", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "contentstatus", "ContentStatus", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "properties", "ExtendedProperties", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB }, { "application", "Application", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "appversion", "AppVersion", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "characters", "Characters", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "characterswithspaces", "CharactersWithSpaces", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "company", "Company", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "digsig", "DigSig", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "docsecurity", "DocSecurity", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, //{ "headingpairs", "HeadingPairs", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "hiddenslides", "HiddenSlides", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "hlinks", "HLinks", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "hyperlinkbase", "HyperlinkBase", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "hyperlinkschanged", "HyperlinksChanged", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "lines", "Lines", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "linksuptodate", "LinksUpToDate", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "manager", "Manager", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "mmclips", "MultimediaClips", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "notes", "Notes", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "pages", "Pages", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "paragraphs", "Paragraphs", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "presentationformat", "PresentationFormat", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, //{ "properties", "Properties", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "scalecrop", "ScaleCrop", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "shareddoc", "SharedDocs", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "slides", "Slides", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "template", "Template", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, //{ "titleofparts", "TitleOfParts", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "totaltime", "TotalTime", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "words", "Words", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, /* Should NOT Exist */ { "bindata", "BinaryData", MSXML_SCAN_B64 | MSXML_JSON_COUNT | MSXML_JSON_ROOT } }; static size_t num_ooxml_keys = sizeof(ooxml_keys) / sizeof(struct key_entry); static int ooxml_updatelimits(int fd, cli_ctx *ctx) { STATBUF sb; if (FSTAT(fd, &sb) == -1) { cli_errmsg("ooxml_updatelimits: Can't fstat descriptor %d\n", fd); return CL_ESTAT; } return cli_updatelimits(ctx, sb.st_size); } static int ooxml_parse_document(int fd, cli_ctx *ctx) { int ret = CL_SUCCESS; xmlTextReaderPtr reader = NULL; cli_dbgmsg("in ooxml_parse_document\n"); /* perform engine limit checks in temporary tracking session */ ret = ooxml_updatelimits(fd, ctx); if (ret != CL_CLEAN) return ret; reader = xmlReaderForFd(fd, "properties.xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS); if (reader == NULL) { cli_dbgmsg("ooxml_parse_document: xmlReaderForFd error\n"); return CL_SUCCESS; // internal error from libxml2 } ret = cli_msxml_parse_document(ctx, reader, ooxml_keys, num_ooxml_keys, MSXML_FLAG_JSON, NULL); if (ret != CL_SUCCESS && ret != CL_ETIMEOUT && ret != CL_BREAK) cli_warnmsg("ooxml_parse_document: encountered issue in parsing properties document\n"); xmlTextReaderClose(reader); xmlFreeTextReader(reader); return ret; } static int ooxml_core_cb(int fd, const char* filepath, cli_ctx *ctx) { int ret; cli_dbgmsg("in ooxml_core_cb\n"); ret = ooxml_parse_document(fd, ctx); if (ret == CL_EPARSE) cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_CORE_XMLPARSER"); else if (ret == CL_EFORMAT) cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_CORE_MALFORMED"); return ret; } static int ooxml_extn_cb(int fd, const char* filepath, cli_ctx *ctx) { int ret; cli_dbgmsg("in ooxml_extn_cb\n"); ret = ooxml_parse_document(fd, ctx); if (ret == CL_EPARSE) cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EXTN_XMLPARSER"); else if (ret == CL_EFORMAT) cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EXTN_MALFORMED"); return ret; } static int ooxml_content_cb(int fd, const char* filepath, cli_ctx *ctx) { int ret = CL_SUCCESS, tmp, toval = 0, state; int core=0, extn=0, cust=0, dsig=0; int mcore=0, mextn=0, mcust=0; const xmlChar *name, *value, *CT, *PN; xmlTextReaderPtr reader = NULL; uint32_t loff; unsigned long sav_scansize = ctx->scansize; unsigned int sav_scannedfiles = ctx->scannedfiles; cli_dbgmsg("in ooxml_content_cb\n"); /* perform engine limit checks in temporary tracking session */ ret = ooxml_updatelimits(fd, ctx); if (ret != CL_CLEAN) return ret; /* apply a reader to the document */ reader = xmlReaderForFd(fd, "[Content_Types].xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS); if (reader == NULL) { cli_dbgmsg("ooxml_content_cb: xmlReaderForFd error for ""[Content_Types].xml""\n"); cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_XML_READER_FD"); ctx->scansize = sav_scansize; ctx->scannedfiles = sav_scannedfiles; return CL_SUCCESS; // libxml2 failed! } /* locate core-properties, extended-properties, and custom-properties (optional) */ while ((state = xmlTextReaderRead(reader)) == 1) { if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) { ret = CL_ETIMEOUT; goto ooxml_content_exit; } name = xmlTextReaderConstLocalName(reader); if (name == NULL) continue; if (strcmp((const char *)name, "Override")) continue; if (xmlTextReaderHasAttributes(reader) != 1) continue; CT = PN = NULL; while (xmlTextReaderMoveToNextAttribute(reader) == 1) { name = xmlTextReaderConstLocalName(reader); value = xmlTextReaderConstValue(reader); if (name == NULL || value == NULL) continue; if (!xmlStrcmp(name, (const xmlChar *)"ContentType")) { CT = value; } else if (!xmlStrcmp(name, (const xmlChar *)"PartName")) { PN = value; } cli_dbgmsg("%s: %s\n", name, value); } if (!CT && !PN) continue; if (!xmlStrcmp(CT, (const xmlChar *)"application/vnd.openxmlformats-package.core-properties+xml")) { /* default: /docProps/core.xml*/ tmp = unzip_search_single(ctx, (const char *)(PN+1), xmlStrlen(PN)-1, &loff); if (tmp == CL_ETIMEOUT) { ret = tmp; } else if (tmp != CL_VIRUS) { cli_dbgmsg("cli_process_ooxml: failed to find core properties file \"%s\"!\n", PN); mcore++; } else { cli_dbgmsg("ooxml_content_cb: found core properties file \"%s\" @ %x\n", PN, loff); if (!core) { tmp = unzip_single_internal(ctx, loff, ooxml_core_cb); if (tmp == CL_ETIMEOUT || tmp == CL_EMEM) { ret = tmp; } } core++; } } else if (!xmlStrcmp(CT, (const xmlChar *)"application/vnd.openxmlformats-officedocument.extended-properties+xml")) { /* default: /docProps/app.xml */ tmp = unzip_search_single(ctx, (const char *)(PN+1), xmlStrlen(PN)-1, &loff); if (tmp == CL_ETIMEOUT) { ret = tmp; } else if (tmp != CL_VIRUS) { cli_dbgmsg("cli_process_ooxml: failed to find extended properties file \"%s\"!\n", PN); mextn++; } else { cli_dbgmsg("ooxml_content_cb: found extended properties file \"%s\" @ %x\n", PN, loff); if (!extn) { tmp = unzip_single_internal(ctx, loff, ooxml_extn_cb); if (tmp == CL_ETIMEOUT || tmp == CL_EMEM) { ret = tmp; } } extn++; } } else if (!xmlStrcmp(CT, (const xmlChar *)"application/vnd.openxmlformats-officedocument.custom-properties+xml")) { /* default: /docProps/custom.xml */ tmp = unzip_search_single(ctx, (const char *)(PN+1), xmlStrlen(PN)-1, &loff); if (tmp == CL_ETIMEOUT) { ret = tmp; } else if (tmp != CL_VIRUS) { cli_dbgmsg("cli_process_ooxml: failed to find custom properties file \"%s\"!\n", PN); mcust++; } else { cli_dbgmsg("ooxml_content_cb: found custom properties file \"%s\" @ %x\n", PN, loff); /* custom properties are not parsed */ cust++; } } else if (!xmlStrcmp(CT, (const xmlChar *)"application/vnd.openxmlformats-package.digital-signature-xmlsignature+xml")) { dsig++; } if (ret != CL_SUCCESS) goto ooxml_content_exit; } ooxml_content_exit: if (core) { cli_jsonint(ctx->wrkproperty, "CorePropertiesFileCount", core); if (core > 1) cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_CORE_PROPFILES"); } else if (!mcore) cli_dbgmsg("cli_process_ooxml: file does not contain core properties file\n"); if (mcore) { cli_jsonint(ctx->wrkproperty, "CorePropertiesMissingFileCount", mcore); cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_CORE_PROPFILES"); } if (extn) { cli_jsonint(ctx->wrkproperty, "ExtendedPropertiesFileCount", extn); if (extn > 1) cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_EXTN_PROPFILES"); } else if (!mextn) cli_dbgmsg("cli_process_ooxml: file does not contain extended properties file\n"); if (mextn) { cli_jsonint(ctx->wrkproperty, "ExtendedPropertiesMissingFileCount", mextn); cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_EXTN_PROPFILES"); } if (cust) { cli_jsonint(ctx->wrkproperty, "CustomPropertiesFileCount", cust); if (cust > 1) cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_CUSTOM_PROPFILES"); } else if (!mcust) cli_dbgmsg("cli_process_ooxml: file does not contain custom properties file\n"); if (mcust) { cli_jsonint(ctx->wrkproperty, "CustomPropertiesMissingFileCount", mcust); cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_CUST_PROPFILES"); } if (dsig) { cli_jsonint(ctx->wrkproperty, "DigitalSignaturesCount", dsig); } /* restore the engine tracking limits; resets session limit tracking */ ctx->scansize = sav_scansize; ctx->scannedfiles = sav_scannedfiles; xmlTextReaderClose(reader); xmlFreeTextReader(reader); return ret; } /*** OOXML HWP ***/ static const struct key_entry ooxml_hwp_keys[] = { { "hcfversion", "HCFVersion", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB }, { "package", "Properties", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB }, { "metadata", "Metadata", MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB }, { "title", "Title", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "language", "Language", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "meta", "MetaFields", MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB | MSXML_JSON_VALUE | MSXML_JSON_COUNT | MSXML_JSON_MULTI }, { "item", "Contents", MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB | MSXML_JSON_COUNT | MSXML_JSON_MULTI } }; static size_t num_ooxml_hwp_keys = sizeof(ooxml_hwp_keys) / sizeof(struct key_entry); static int ooxml_hwp_cb(int fd, const char* filepath, cli_ctx *ctx) { int ret = CL_SUCCESS; xmlTextReaderPtr reader = NULL; cli_dbgmsg("in ooxml_hwp_cb\n"); /* perform engine limit checks in temporary tracking session */ ret = ooxml_updatelimits(fd, ctx); if (ret != CL_CLEAN) return ret; reader = xmlReaderForFd(fd, "ooxml_hwp.xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS); if (reader == NULL) { cli_dbgmsg("ooxml_hwp_cb: xmlReaderForFd error\n"); return CL_SUCCESS; // internal error from libxml2 } ret = cli_msxml_parse_document(ctx, reader, ooxml_hwp_keys, num_ooxml_hwp_keys, MSXML_FLAG_JSON, NULL); if (ret != CL_SUCCESS && ret != CL_ETIMEOUT && ret != CL_BREAK) cli_warnmsg("ooxml_hwp_cb: encountered issue in parsing properties document\n"); xmlTextReaderClose(reader); xmlFreeTextReader(reader); return ret; } #endif /* HAVE_LIBXML2 && HAVE_JSON */ int cli_ooxml_filetype(cli_ctx *ctx, fmap_t *map) { struct zip_requests requests; int ret; memset(&requests, 0, sizeof(struct zip_requests)); if ((ret = unzip_search_add(&requests, "xl/", 3)) != CL_SUCCESS) { return CL_SUCCESS; } if ((ret = unzip_search_add(&requests, "ppt/", 4)) != CL_SUCCESS) { return CL_SUCCESS; } if ((ret = unzip_search_add(&requests, "word/", 5)) != CL_SUCCESS) { return CL_SUCCESS; } if ((ret = unzip_search_add(&requests, "Contents/content.hpf", 22)) != CL_SUCCESS) { return CL_SUCCESS; } if ((ret = unzip_search(ctx, map, &requests)) == CL_VIRUS) { switch (requests.found) { case 0: return CL_TYPE_OOXML_XL; case 1: return CL_TYPE_OOXML_PPT; case 2: return CL_TYPE_OOXML_WORD; case 3: return CL_TYPE_OOXML_HWP; default: return CL_SUCCESS; } } return CL_SUCCESS; } int cli_process_ooxml(cli_ctx *ctx, int type) { #if HAVE_LIBXML2 && HAVE_JSON uint32_t loff = 0; int ret = CL_SUCCESS; cli_dbgmsg("in cli_process_ooxml\n"); if (!ctx) { return CL_ENULLARG; } if (type == CL_TYPE_OOXML_HWP) { /* two files: version.xml and Contents/content.hpf */ ret = unzip_search_single(ctx, "version.xml", 11, &loff); if (ret == CL_ETIMEOUT) { cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT"); return CL_ETIMEOUT; } else if (ret != CL_VIRUS) { cli_dbgmsg("cli_process_ooxml: failed to find ""version.xml""!\n"); cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_HWP_VERSION"); return CL_EFORMAT; } ret = unzip_single_internal(ctx, loff, ooxml_hwp_cb); if (ret == CL_SUCCESS) { ret = unzip_search_single(ctx, "Contents/content.hpf", 20, &loff); if (ret == CL_ETIMEOUT) { cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT"); return CL_ETIMEOUT; } else if (ret != CL_VIRUS) { cli_dbgmsg("cli_process_ooxml: failed to find ""Contents/content.hpf""!\n"); cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_HWP_CONTENT"); return CL_EFORMAT; } ret = unzip_single_internal(ctx, loff, ooxml_hwp_cb); } } else { /* find "[Content Types].xml" */ ret = unzip_search_single(ctx, "[Content_Types].xml", 19, &loff); if (ret == CL_ETIMEOUT) { cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT"); return CL_ETIMEOUT; } else if (ret != CL_VIRUS) { cli_dbgmsg("cli_process_ooxml: failed to find ""[Content_Types].xml""!\n"); cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_CONTENT_TYPES"); return CL_EFORMAT; } cli_dbgmsg("cli_process_ooxml: found ""[Content_Types].xml"" @ %x\n", loff); ret = unzip_single_internal(ctx, loff, ooxml_content_cb); } if (ret == CL_ETIMEOUT) cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT"); else if (ret == CL_EMEM) cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_OUTOFMEM"); else if (ret == CL_EMAXSIZE) cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXSIZE"); else if (ret == CL_EMAXFILES) cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXFILES"); return ret; #else UNUSEDPARAM(ctx); cli_dbgmsg("in cli_process_ooxml\n"); #if !HAVE_LIBXML2 cli_dbgmsg("cli_process_ooxml: libxml2 needs to enabled!\n"); #endif #if !HAVE_JSON cli_dbgmsg("cli_process_ooxml: libjson needs to enabled!\n"); #endif return CL_SUCCESS; #endif }