25556519 |
/*
* OOXML JSON Internals
*
* Copyright (C) 2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
*
* Authors: Kevin Lin
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License version 2 as published by the
* Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
|
60d8d2c3 |
#include "clamav.h" |
25556519 |
#include "cltypes.h"
#include "others.h"
#include "unzip.h" |
e91e4383 |
#if HAVE_JSON |
51f8cc3c |
#include "json.h" |
e91e4383 |
#endif |
25556519 |
#include "json_api.h"
|
cd94be7a |
#include "ooxml.h"
|
25556519 |
#if HAVE_LIBXML2
#ifdef _WIN32
#ifndef LIBXML_WRITER_ENABLED
#define LIBXML_WRITER_ENABLED 1
#endif
#endif
#include <libxml/xmlreader.h>
#endif
|
871b862e |
#define OOXML_DEBUG 0
|
a41009bc |
#if HAVE_LIBXML2 && HAVE_JSON
|
25556519 |
#define OOXML_JSON_RECLEVEL 16 |
afd00a72 |
#define OOXML_JSON_RECLEVEL_MAX 5
#define OOXML_JSON_STRLEN_MAX 100 |
25556519 |
|
6c951663 |
#define check_state(state) \
do { \
if (state == -1) { \
cli_warnmsg("check_state: CL_EPARSE @ ln%d\n", __LINE__); \
return CL_EPARSE; \
} \
else if (state == 0) { \
cli_dbgmsg("check_state: CL_BREAK @ ln%d\n", __LINE__); \
return CL_BREAK; \
} \
} while(0)
|
d48fd4bb |
static int ooxml_is_int(const char *value, size_t len, int32_t *val) |
25556519 |
{ |
d48fd4bb |
long val2; |
25556519 |
char *endptr = NULL;
|
d48fd4bb |
val2 = strtol(value, &endptr, 10); |
25556519 |
if (endptr != value+len) {
return 0;
}
|
d48fd4bb |
*val = (int32_t)(val2 & 0x0000ffff); |
25556519 |
return 1;
}
|
2fd832e5 |
static int ooxml_parse_value(json_object *wrkptr, const char *arrname, const xmlChar *node_value) |
d48fd4bb |
{ |
2fd832e5 |
json_object *newobj, *arrobj; |
d48fd4bb |
int val;
|
2fd832e5 |
arrobj = cli_jsonarray(wrkptr, arrname); |
d6b181ac |
if (arrobj == NULL) {
return CL_EMEM;
} |
2fd832e5 |
|
cd94be7a |
if (ooxml_is_int((const char *)node_value, xmlStrlen(node_value), &val)) { |
2fd832e5 |
newobj = json_object_new_int(val); |
d48fd4bb |
} |
cd94be7a |
else if (!xmlStrcmp(node_value, (const xmlChar *)"true")) { |
2fd832e5 |
newobj = json_object_new_boolean(1); |
d48fd4bb |
} |
cd94be7a |
else if (!xmlStrcmp(node_value, (const xmlChar *)"false")) { |
2fd832e5 |
newobj = json_object_new_boolean(0); |
d48fd4bb |
}
else { |
cd94be7a |
newobj = json_object_new_string((const char *)node_value); |
d48fd4bb |
} |
2fd832e5 |
if (NULL == newobj) {
cli_errmsg("ooxml_parse_value: no memory for json value for [%s]\n", arrname);
return CL_EMEM; |
d48fd4bb |
} |
2fd832e5 |
json_object_array_add(arrobj, newobj);
return CL_SUCCESS; |
d48fd4bb |
}
|
afd00a72 |
static const char *ooxml_keys[] = {
"coreproperties",
"title",
"subject",
"creator",
"keywords",
"comments",
"description",
"lastmodifiedby",
"revision",
"created",
"modified",
"category",
"contentstatus",
"properties",
"application",
"appversion",
"characters",
"characterswithspaces",
"company",
"digsig",
"docsecurity",
//"headingpairs",
"hiddenslides",
"hlinks",
"hyperlinkbase",
"hyperlinkschanged",
"lines",
"linksuptodate",
"manager",
"mmclips",
"notes",
"pages",
"paragraphs",
"presentationformat",
"properties",
"scalecrop",
"shareddoc",
"slides",
"template",
//"titlesofparts",
"totaltime",
"words"
};
static const char *ooxml_json_keys[] = {
"CoreProperties",
"Title",
"Subject",
"Author",
"Keywords",
"Comments",
"Description",
"LastAuthor",
"Revision",
"Created",
"Modified",
"Category",
"ContentStatus",
"ExtendedProperties",
"Application",
"AppVersion",
"Characters",
"CharactersWithSpaces",
"Company",
"DigSig",
"DocSecurity",
//"HeadingPairs",
"HiddenSlides",
"HLinks",
"HyperlinkBase",
"HyperlinksChanged",
"Lines",
"LinksUpToDate",
"Manager", |
871b862e |
"MultimediaClips", |
afd00a72 |
"Notes",
"Pages",
"Paragraphs",
"PresentationFormat",
"Properties",
"ScaleCrop",
"SharedDoc",
"Slides",
"Template",
//"TitlesOfParts",
"TotalTime",
"Words"
};
static size_t num_ooxml_keys = 40; //42
static const char *ooxml_check_key(const char* key, size_t keylen)
{
unsigned i;
if (keylen > OOXML_JSON_STRLEN_MAX-1) {
cli_dbgmsg("ooxml_check_key: key name too long\n");
return NULL;
}
for (i = 0; i < num_ooxml_keys; ++i) {
//cli_dbgmsg("%d %d %s %s %s %s\n", keylen, strlen(ooxml_keys[i]), key, keycmp, ooxml_keys[i], ooxml_json_keys[i]); |
871b862e |
if (keylen == strlen(ooxml_keys[i]) && !strncasecmp(key, ooxml_keys[i], keylen)) { |
afd00a72 |
return ooxml_json_keys[i];
}
}
return NULL;
} |
a41009bc |
|
3827b2cc |
static int ooxml_parse_element(cli_ctx *ctx, xmlTextReaderPtr reader, json_object *wrkptr, int rlvl, json_object *root) |
afd00a72 |
{
const char *element_tag = NULL, *end_tag = NULL;
const xmlChar *node_name = NULL, *node_value = NULL; |
a763e44b |
json_object *thisjobj = NULL; |
6c951663 |
int node_type, ret = CL_SUCCESS, endtag = 0, toval = 0, state = 1; |
afd00a72 |
cli_dbgmsg("in ooxml_parse_element @ layer %d\n", rlvl);
/* check recursion level */
if (rlvl >= OOXML_JSON_RECLEVEL_MAX) { |
cd94be7a |
cli_dbgmsg("ooxml_parse_element: reached ooxml json recursion limit\n"); |
3827b2cc |
cli_jsonbool(root, "HitRecursiveLimit", 1); |
6c951663 |
/* skip it */
state = xmlTextReaderNext(reader);
check_state(state); |
d48fd4bb |
return CL_SUCCESS; |
afd00a72 |
}
/* acquire element type */
node_type = xmlTextReaderNodeType(reader); |
6c951663 |
if (node_type == -1)
return CL_EPARSE;
|
afd00a72 |
if (node_type != XML_READER_TYPE_ELEMENT) {
cli_dbgmsg("ooxml_parse_element: first node typed %d, not %d\n", node_type, XML_READER_TYPE_ELEMENT);
return CL_EPARSE; /* first type is not an element */
}
node_name = xmlTextReaderConstLocalName(reader);
if (!node_name) {
cli_dbgmsg("ooxml_parse_element: element tag node nameless\n");
return CL_EPARSE; /* no name, nameless */
} |
cd94be7a |
element_tag = ooxml_check_key((const char *)node_name, xmlStrlen(node_name)); |
afd00a72 |
if (!element_tag) {
cli_dbgmsg("ooxml_parse_element: invalid element tag [%s]\n", node_name); |
d48fd4bb |
/* skip it */ |
6c951663 |
state = xmlTextReaderNext(reader);
check_state(state); |
d48fd4bb |
return CL_SUCCESS; |
afd00a72 |
}
|
2fd832e5 |
/* generate json object */
thisjobj = cli_jsonobj(wrkptr, element_tag);
if (!thisjobj) {
return CL_EPARSE;
}
cli_dbgmsg("ooxml_parse_element: generated json object [%s]\n", element_tag);
|
d6b181ac |
if (rlvl == 0)
root = thisjobj;
|
2fd832e5 |
/* handle attributes */ |
6c951663 |
state = xmlTextReaderHasAttributes(reader);
if (state == 1) { |
a763e44b |
json_object *attributes;
attributes = cli_jsonobj(thisjobj, "Attributes"); |
d6b181ac |
if (!attributes) { |
a763e44b |
return CL_EPARSE;
}
cli_dbgmsg("ooxml_parse_element: retrieved json object [Attributes]\n");
while (xmlTextReaderMoveToNextAttribute(reader) == 1) {
const xmlChar *name, *value;
name = xmlTextReaderConstLocalName(reader);
value = xmlTextReaderConstValue(reader);
if (name == NULL || value == NULL) continue;
cli_dbgmsg("%s: %s\n", name, value);
|
cd94be7a |
cli_jsonstr(attributes, name, (const char *)value); |
a763e44b |
}
} |
6c951663 |
else if (state == -1)
return CL_EPARSE; |
afd00a72 |
|
6c951663 |
state = xmlTextReaderIsEmptyElement(reader);
if (state == 1) {
state = xmlTextReaderNext(reader);
check_state(state); |
d6b181ac |
return CL_SUCCESS;
} |
6c951663 |
else if (state == -1)
return CL_EPARSE; |
afd00a72 |
|
d48fd4bb |
/* advance to first content node */ |
6c951663 |
state = xmlTextReaderRead(reader);
check_state(state); |
d48fd4bb |
/* parse until the end element tag */ |
6c951663 |
while (!endtag) { |
cfc405e5 |
if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) {
return CL_ETIMEOUT;
}
|
afd00a72 |
node_type = xmlTextReaderNodeType(reader); |
6c951663 |
if (node_type == -1)
return CL_EPARSE;
|
afd00a72 |
switch (node_type) {
case XML_READER_TYPE_ELEMENT: |
3827b2cc |
ret = ooxml_parse_element(ctx, reader, thisjobj, rlvl+1, root); |
afd00a72 |
if (ret != CL_SUCCESS) {
return ret;
}
break; |
d48fd4bb |
|
afd00a72 |
case XML_READER_TYPE_END_ELEMENT:
cli_dbgmsg("in ooxml_parse_element @ layer %d closed\n", rlvl);
node_name = xmlTextReaderConstLocalName(reader);
if (!node_name) {
cli_dbgmsg("ooxml_parse_element: element end tag node nameless\n");
return CL_EPARSE; /* no name, nameless */
} |
d48fd4bb |
|
cd94be7a |
end_tag = ooxml_check_key((const char *)node_name, xmlStrlen(node_name)); |
d48fd4bb |
if (!end_tag) {
cli_dbgmsg("ooxml_parse_element: invalid element end tag [%s]\n", node_name);
return CL_EFORMAT; /* unrecognized element tag */
}
if (strncmp(element_tag, end_tag, strlen(element_tag))) {
cli_dbgmsg("ooxml_parse_element: element tag does not match end tag\n");
return CL_EFORMAT; |
afd00a72 |
} |
d48fd4bb |
/* advance to next element tag */ |
6c951663 |
state = xmlTextReaderRead(reader);
check_state(state); |
d48fd4bb |
endtag = 1;
break;
|
afd00a72 |
case XML_READER_TYPE_TEXT: |
d48fd4bb |
node_value = xmlTextReaderConstValue(reader); |
afd00a72 |
|
2fd832e5 |
ret = ooxml_parse_value(thisjobj, "Value", node_value); |
d48fd4bb |
if (ret != CL_SUCCESS)
return ret; |
afd00a72 |
|
d48fd4bb |
cli_dbgmsg("ooxml_parse_element: added json value [%s: %s]\n", element_tag, node_value); |
afd00a72 |
|
d48fd4bb |
/* advance to next element tag */ |
6c951663 |
state = xmlTextReaderRead(reader);
check_state(state); |
afd00a72 |
break; |
d48fd4bb |
|
afd00a72 |
default: |
871b862e |
#if OOXML_DEBUG |
afd00a72 |
node_name = xmlTextReaderConstLocalName(reader);
node_value = xmlTextReaderConstValue(reader);
cli_dbgmsg("ooxml_parse_element: unhandled xml node %s [%d]: %s\n", node_name, node_type, node_value); |
871b862e |
#endif |
6c951663 |
state = xmlTextReaderNext(reader);
check_state(state); |
d6b181ac |
return CL_SUCCESS; |
afd00a72 |
}
}
return CL_SUCCESS;
}
static int ooxml_parse_document(int fd, cli_ctx *ctx)
{
int ret = CL_SUCCESS;
xmlTextReaderPtr reader = NULL;
cli_dbgmsg("in ooxml_parse_document\n");
|
fb05a793 |
reader = xmlReaderForFd(fd, "properties.xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS); |
afd00a72 |
if (reader == NULL) {
cli_dbgmsg("ooxml_parse_document: xmlReaderForFd error\n");
return CL_SUCCESS; // internal error from libxml2
}
/* move reader to first element */
if (xmlTextReaderRead(reader) != 1) {
return CL_SUCCESS; /* libxml2 failed */
}
|
3827b2cc |
ret = ooxml_parse_element(ctx, reader, ctx->wrkproperty, 0, NULL); |
afd00a72 |
|
6c951663 |
if (ret != CL_SUCCESS && ret != CL_ETIMEOUT && ret != CL_BREAK) { |
7509af32 |
cli_warnmsg("ooxml_parse_document: encountered issue in parsing properties document\n"); |
d6b181ac |
cli_jsonbool(ctx->wrkproperty, "ParseError", 1); |
7509af32 |
} |
d6b181ac |
|
afd00a72 |
xmlTextReaderClose(reader);
xmlFreeTextReader(reader);
return ret;
} |
25556519 |
static int ooxml_core_cb(int fd, cli_ctx *ctx)
{
cli_dbgmsg("in ooxml_core_cb\n"); |
afd00a72 |
return ooxml_parse_document(fd, ctx);
//return ooxml_basic_json(fd, ctx, "CoreProperties"); |
25556519 |
}
static int ooxml_extn_cb(int fd, cli_ctx *ctx)
{
cli_dbgmsg("in ooxml_extn_cb\n"); |
afd00a72 |
return ooxml_parse_document(fd, ctx);
//return ooxml_basic_json(fd, ctx, "ExtendedProperties"); |
25556519 |
}
static int ooxml_content_cb(int fd, cli_ctx *ctx)
{ |
20b45621 |
int ret = CL_SUCCESS, tmp, toval = 0; |
a41ab49e |
int core=0, extn=0, cust=0, dsig=0; |
d48fd4bb |
int mcore=0, mextn=0, mcust=0; |
25556519 |
const xmlChar *name, *value, *CT, *PN;
xmlTextReaderPtr reader = NULL;
uint32_t loff;
cli_dbgmsg("in ooxml_content_cb\n");
|
fb05a793 |
reader = xmlReaderForFd(fd, "[Content_Types].xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS); |
25556519 |
if (reader == NULL) {
cli_dbgmsg("ooxml_content_cb: xmlReaderForFd error for ""[Content_Types].xml""\n");
return CL_SUCCESS; // libxml2 failed!
}
/* locate core-properties, extended-properties, and custom-properties (optional) */
while (xmlTextReaderRead(reader) == 1) { |
20b45621 |
if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) {
ret = CL_ETIMEOUT;
goto ooxml_content_exit;
}
|
25556519 |
name = xmlTextReaderConstLocalName(reader);
if (name == NULL) continue;
|
cd94be7a |
if (strcmp((const char *)name, "Override")) continue; |
25556519 |
|
6c951663 |
if (xmlTextReaderHasAttributes(reader) != 1) continue; |
25556519 |
|
871b862e |
CT = PN = NULL; |
25556519 |
while (xmlTextReaderMoveToNextAttribute(reader) == 1) {
name = xmlTextReaderConstLocalName(reader);
value = xmlTextReaderConstValue(reader);
if (name == NULL || value == NULL) continue;
|
cd94be7a |
if (!xmlStrcmp(name, (const xmlChar *)"ContentType")) { |
25556519 |
CT = value;
} |
cd94be7a |
else if (!xmlStrcmp(name, (const xmlChar *)"PartName")) { |
25556519 |
PN = value;
}
cli_dbgmsg("%s: %s\n", name, value);
}
if (!CT && !PN) continue;
|
cd94be7a |
if (!xmlStrcmp(CT, (const xmlChar *)"application/vnd.openxmlformats-package.core-properties+xml")) { |
871b862e |
if (!core) {
/* default: /docProps/core.xml*/ |
cd94be7a |
tmp = unzip_search(ctx, (const char *)(PN+1), xmlStrlen(PN)-1, &loff); |
871b862e |
if (tmp == CL_ETIMEOUT) {
ret = tmp;
}
else if (tmp != CL_VIRUS) {
cli_dbgmsg("cli_process_ooxml: failed to find core properties file \"%s\"!\n", PN); |
d48fd4bb |
mcore++; |
871b862e |
}
else {
cli_dbgmsg("ooxml_content_cb: found core properties file \"%s\" @ %x\n", PN, loff);
ret = unzip_single_internal(ctx, loff, ooxml_core_cb); |
d48fd4bb |
core++; |
871b862e |
} |
25556519 |
}
} |
cd94be7a |
else if (!xmlStrcmp(CT, (const xmlChar *)"application/vnd.openxmlformats-officedocument.extended-properties+xml")) { |
871b862e |
if (!extn) {
/* default: /docProps/app.xml */ |
cd94be7a |
tmp = unzip_search(ctx, (const char *)(PN+1), xmlStrlen(PN)-1, &loff); |
871b862e |
if (tmp == CL_ETIMEOUT) {
ret = tmp;
}
else if (tmp != CL_VIRUS) {
cli_dbgmsg("cli_process_ooxml: failed to find extended properties file \"%s\"!\n", PN); |
d48fd4bb |
mextn++; |
871b862e |
}
else {
cli_dbgmsg("ooxml_content_cb: found extended properties file \"%s\" @ %x\n", PN, loff);
ret = unzip_single_internal(ctx, loff, ooxml_extn_cb); |
d48fd4bb |
extn++; |
871b862e |
} |
25556519 |
}
} |
cd94be7a |
else if (!xmlStrcmp(CT, (const xmlChar *)"application/vnd.openxmlformats-officedocument.custom-properties+xml")) { |
871b862e |
if (!cust) {
/* default: /docProps/custom.xml */ |
cd94be7a |
tmp = unzip_search(ctx, (const char *)(PN+1), xmlStrlen(PN)-1, &loff); |
871b862e |
if (tmp == CL_ETIMEOUT) {
ret = tmp;
}
else if (tmp != CL_VIRUS) {
cli_dbgmsg("cli_process_ooxml: failed to find custom properties file \"%s\"!\n", PN); |
d48fd4bb |
mcust++; |
871b862e |
}
else {
cli_dbgmsg("ooxml_content_cb: found custom properties file \"%s\" @ %x\n", PN, loff); |
d48fd4bb |
cust++; |
871b862e |
//ret = unzip_single_internal(ctx, loff, ooxml_cust_cb);
} |
25556519 |
}
} |
cd94be7a |
else if (!xmlStrcmp(CT, (const xmlChar *)"application/vnd.openxmlformats-package.digital-signature-xmlsignature+xml")) { |
871b862e |
dsig++; |
a41ab49e |
} |
25556519 |
if (ret != CL_SUCCESS)
goto ooxml_content_exit;
}
|
d48fd4bb |
if (core) |
871b862e |
cli_jsonint(ctx->wrkproperty, "CorePropertiesFileCount", core); |
d48fd4bb |
else if (!mcore) |
25556519 |
cli_dbgmsg("cli_process_ooxml: file does not contain core properties file\n"); |
d48fd4bb |
if (mcore)
cli_jsonint(ctx->wrkproperty, "CorePropertiesMissingFileCount", core);
if (extn) |
871b862e |
cli_jsonint(ctx->wrkproperty, "ExtendedPropertiesFileCount", extn); |
d48fd4bb |
else if (!mextn) |
25556519 |
cli_dbgmsg("cli_process_ooxml: file does not contain extended properties file\n"); |
d48fd4bb |
if (mextn)
cli_jsonint(ctx->wrkproperty, "ExtendedPropertiesMissingFileCount", extn);
if (cust) |
871b862e |
cli_jsonint(ctx->wrkproperty, "CustomPropertiesFileCount", cust); |
d48fd4bb |
else if (!mcust) |
25556519 |
cli_dbgmsg("cli_process_ooxml: file does not contain custom properties file\n"); |
d48fd4bb |
if (mcust)
cli_jsonint(ctx->wrkproperty, "CustomPropertiesMissingFileCount", cust);
|
871b862e |
if (dsig) {
cli_jsonint(ctx->wrkproperty, "DigitalSignaturesCount", dsig);
} |
25556519 |
ooxml_content_exit:
xmlTextReaderClose(reader);
xmlFreeTextReader(reader);
return ret;
} |
20b45621 |
#endif /* HAVE_LIBXML2 && HAVE_JSON */ |
25556519 |
int cli_process_ooxml(cli_ctx *ctx)
{ |
a41009bc |
#if HAVE_LIBXML2 && HAVE_JSON |
25556519 |
uint32_t loff = 0; |
20b45621 |
int tmp = CL_SUCCESS; |
25556519 |
cli_dbgmsg("in cli_processooxml\n");
if (!ctx) {
return CL_ENULLARG;
}
/* find "[Content Types].xml" */ |
20b45621 |
tmp = unzip_search(ctx, "[Content_Types].xml", 18, &loff);
if (tmp == CL_ETIMEOUT) {
return CL_ETIMEOUT;
}
else if (tmp != CL_VIRUS) { |
25556519 |
cli_dbgmsg("cli_process_ooxml: failed to find ""[Content_Types].xml""!\n");
return CL_EFORMAT;
}
cli_dbgmsg("cli_process_ooxml: found ""[Content_Types].xml"" @ %x\n", loff);
return unzip_single_internal(ctx, loff, ooxml_content_cb);
#else |
cd94be7a |
UNUSEDPARAM(ctx); |
25556519 |
cli_dbgmsg("in cli_processooxml\n"); |
a41009bc |
#if !HAVE_LIBXML2 |
25556519 |
cli_dbgmsg("cli_process_ooxml: libxml2 needs to enabled!"); |
a41009bc |
#endif
#if !HAVE_JSON
cli_dbgmsg("cli_process_ooxml: libjson needs to enabled!");
#endif |
25556519 |
return CL_SUCCESS;
#endif
} |