libclamav/msxml_parser.c
5994bee6
 /*
c2df9f79
  * Extract component parts of various MS XML files (e.g. MS Office 2003 XML Documents)
5994bee6
  *
e1cbc270
  * Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
5994bee6
  * Copyright (C) 2007-2013 Sourcefire, Inc.
  *
  * Authors: Kevin Lin
  *
  * This program is free software; you can redistribute it and/or modify it under
  * the terms of the GNU General Public License version 2 as published by the
  * Free Software Foundation.
  *
  * This program is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
  * You should have received a copy of the GNU General Public License along with
  * this program; if not, write to the Free Software Foundation, Inc., 51
  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  */
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 
 #include "clamav.h"
 #include "others.h"
 #include "conv.h"
d2efc60c
 #include "scanners.h"
5994bee6
 #include "json_api.h"
 #include "msxml_parser.h"
 
 #if HAVE_LIBXML2
 #include <libxml/xmlreader.h>
 
cb740321
 #define MSXML_VERBIOSE 0
5994bee6
 #if MSXML_VERBIOSE
 #define cli_msxmlmsg(...) cli_dbgmsg(__VA_ARGS__)
 #else
 #define cli_msxmlmsg(...)
 #endif
 
288057e9
 #define check_state(state)                                                   \
     do {                                                                     \
         if (state == -1) {                                                   \
5994bee6
             cli_warnmsg("check_state[msxml]: CL_EPARSE @ ln%d\n", __LINE__); \
288057e9
             return CL_EPARSE;                                                \
         } else if (state == 0) {                                             \
             cli_dbgmsg("check_state[msxml]: CL_BREAK @ ln%d\n", __LINE__);   \
             return CL_BREAK;                                                 \
         }                                                                    \
     } while (0)
5994bee6
 
cb740321
 #define track_json(mxctx) (mxctx->ictx->flags & MSXML_FLAG_JSON)
6732844a
 
cb740321
 struct msxml_ictx {
c2df9f79
     cli_ctx *ctx;
6732844a
     uint32_t flags;
c2df9f79
     const struct key_entry *keys;
     size_t num_keys;
 
 #if HAVE_JSON
     json_object *root;
6732844a
     int toval;
c2df9f79
 #endif
 };
5994bee6
 
288057e9
 struct key_entry blank_key = {NULL, NULL, 0};
5994bee6
 
cb740321
 static const struct key_entry *msxml_check_key(struct msxml_ictx *ictx, const xmlChar *key, size_t keylen)
5994bee6
 {
     unsigned i;
 
288057e9
     if (keylen > MSXML_JSON_STRLEN_MAX - 1) {
5994bee6
         cli_dbgmsg("msxml_check_key: key name too long\n");
         return &blank_key;
     }
 
cb740321
     for (i = 0; i < ictx->num_keys; ++i) {
         if (keylen == strlen(ictx->keys[i].key) && !strncasecmp((char *)key, ictx->keys[i].key, keylen)) {
             return &ictx->keys[i];
5994bee6
         }
     }
 
     return &blank_key;
 }
 
288057e9
 static void msxml_error_handler(void *arg, const char *msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator)
f17cd8d1
 {
288057e9
     int line     = xmlTextReaderLocatorLineNumber(locator);
f17cd8d1
     xmlChar *URI = xmlTextReaderLocatorBaseURI(locator);
 
0efcd558
     UNUSEDPARAM(arg);
 
f17cd8d1
     switch (severity) {
288057e9
         case XML_PARSER_SEVERITY_WARNING:
         case XML_PARSER_SEVERITY_VALIDITY_WARNING:
             cli_dbgmsg("%s:%d: parser warning : %s", (char *)URI, line, msg);
             break;
         case XML_PARSER_SEVERITY_ERROR:
         case XML_PARSER_SEVERITY_VALIDITY_ERROR:
             cli_dbgmsg("%s:%d: parser error : %s", (char *)URI, line, msg);
             break;
         default:
             cli_dbgmsg("%s:%d: unknown severity : %s", (char *)URI, line, msg);
             break;
f17cd8d1
     }
25a8a0f9
     free(URI);
f17cd8d1
 }
 
d7fa810a
 #if HAVE_JSON
 static int msxml_is_int(const char *value, size_t len, int32_t *val)
 {
     long val2;
     char *endptr = NULL;
 
     val2 = strtol(value, &endptr, 10);
288057e9
     if (endptr != value + len) {
d7fa810a
         return 0;
     }
 
     *val = (int32_t)(val2 & 0x0000ffff);
 
     return 1;
 }
 
 static int msxml_parse_value(json_object *wrkptr, const char *arrname, const xmlChar *node_value)
 {
     json_object *newobj, *arrobj;
     int val;
 
     if (!wrkptr)
         return CL_ENULLARG;
 
     arrobj = cli_jsonarray(wrkptr, arrname);
     if (arrobj == NULL) {
         return CL_EMEM;
     }
 
     if (msxml_is_int((const char *)node_value, xmlStrlen(node_value), &val)) {
         newobj = json_object_new_int(val);
288057e9
     } else if (!xmlStrcmp(node_value, (const xmlChar *)"true")) {
d7fa810a
         newobj = json_object_new_boolean(1);
288057e9
     } else if (!xmlStrcmp(node_value, (const xmlChar *)"false")) {
d7fa810a
         newobj = json_object_new_boolean(0);
288057e9
     } else {
d7fa810a
         newobj = json_object_new_string((const char *)node_value);
     }
 
     if (NULL == newobj) {
         cli_errmsg("msxml_parse_value: no memory for json value for [%s]\n", arrname);
         return CL_EMEM;
     }
 
     json_object_array_add(arrobj, newobj);
     return CL_SUCCESS;
 }
 #endif /* HAVE_JSON */
 
416456da
 #define MAX_ATTRIBS 20
1629a661
 static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader, int rlvl, void *jptr)
5994bee6
 {
     const xmlChar *element_name = NULL;
     const xmlChar *node_name = NULL, *node_value = NULL;
     const struct key_entry *keyinfo;
416456da
     struct attrib_entry attribs[MAX_ATTRIBS];
     int ret, virus = 0, state, node_type, endtag = 0, num_attribs = 0;
cb740321
     cli_ctx *ctx = mxctx->ictx->ctx;
5994bee6
 #if HAVE_JSON
288057e9
     json_object *root     = mxctx->ictx->root;
     json_object *parent   = (json_object *)jptr;
5994bee6
     json_object *thisjobj = NULL;
1629a661
 #else
288057e9
     void *parent   = NULL;
1629a661
     void *thisjobj = NULL;
5994bee6
 #endif
 
     cli_msxmlmsg("in msxml_parse_element @ layer %d\n", rlvl);
 
     /* check recursion level */
     if (rlvl >= MSXML_RECLEVEL_MAX) {
         cli_dbgmsg("msxml_parse_element: reached msxml json recursion limit\n");
4e2ae35b
 
 #if HAVE_JSON
6732844a
         if (track_json(mxctx)) {
cb740321
             int tmp = cli_json_parse_error(root, "MSXML_RECURSIVE_LIMIT");
4e2ae35b
             if (tmp != CL_SUCCESS)
                 return tmp;
         }
 #endif
 
5994bee6
         /* skip it */
         state = xmlTextReaderNext(reader);
         check_state(state);
         return CL_SUCCESS;
     }
 
     /* acquire element type */
     node_type = xmlTextReaderNodeType(reader);
     if (node_type == -1)
         return CL_EPARSE;
 
288057e9
     node_name  = xmlTextReaderConstLocalName(reader);
5994bee6
     node_value = xmlTextReaderConstValue(reader);
 
     /* branch on node type */
     switch (node_type) {
288057e9
         case XML_READER_TYPE_ELEMENT:
             cli_msxmlmsg("msxml_parse_element: ELEMENT %s [%d]: %s\n", node_name, node_type, node_value);
5994bee6
 
288057e9
             /* storing the element name for verification/collection */
             element_name = node_name;
             if (!element_name) {
                 cli_dbgmsg("msxml_parse_element: element tag node nameless\n");
4e2ae35b
 #if HAVE_JSON
288057e9
                 if (track_json(mxctx)) {
                     int tmp = cli_json_parse_error(root, "MSXML_NAMELESS_ELEMENT");
                     if (tmp != CL_SUCCESS)
                         return tmp;
                 }
4e2ae35b
 #endif
288057e9
                 return CL_EPARSE; /* no name, nameless */
             }
5994bee6
 
288057e9
             /* determine if the element is interesting */
             keyinfo = msxml_check_key(mxctx->ictx, element_name, xmlStrlen(element_name));
1629a661
 
288057e9
             cli_msxmlmsg("key:  %s\n", keyinfo->key);
             cli_msxmlmsg("name: %s\n", keyinfo->name);
             cli_msxmlmsg("type: 0x%x\n", keyinfo->type);
1629a661
 
288057e9
             /* element and contents are ignored */
             if (keyinfo->type & MSXML_IGNORE_ELEM) {
                 cli_msxmlmsg("msxml_parse_element: IGNORING ELEMENT %s\n", keyinfo->name);
5994bee6
 
288057e9
                 state = xmlTextReaderNext(reader);
                 check_state(state);
                 return CL_SUCCESS;
5994bee6
             }
 
288057e9
 #if HAVE_JSON
             if (track_json(mxctx) && (keyinfo->type & MSXML_JSON_TRACK)) {
                 if (keyinfo->type & MSXML_JSON_ROOT)
                     thisjobj = cli_jsonobj(root, keyinfo->name);
                 else if (keyinfo->type & MSXML_JSON_WRKPTR)
                     thisjobj = cli_jsonobj(parent, keyinfo->name);
5994bee6
 
288057e9
                 if (!thisjobj) {
523e4264
                     return CL_EMEM;
                 }
288057e9
                 cli_msxmlmsg("msxml_parse_element: generated json object [%s]\n", keyinfo->name);
523e4264
 
288057e9
                 /* count this element */
                 if (thisjobj && (keyinfo->type & MSXML_JSON_COUNT)) {
                     json_object *counter = NULL;
523e4264
 
288057e9
                     if (!json_object_object_get_ex(thisjobj, "Count", &counter)) { /* object not found */
                         cli_jsonint(thisjobj, "Count", 1);
                     } else {
                         int value = json_object_get_int(counter);
                         cli_jsonint(thisjobj, "Count", value + 1);
                     }
                     cli_msxmlmsg("msxml_parse_element: retrieved json object [Count]\n");
                 }
5994bee6
 
288057e9
                 /* check if multiple entries are allowed */
                 if (thisjobj && (keyinfo->type & MSXML_JSON_MULTI)) {
                     /* replace this object with an array entry object */
                     json_object *multi = cli_jsonarray(thisjobj, "Multi");
                     if (!multi) {
                         return CL_EMEM;
e5d4ae99
                     }
288057e9
                     cli_msxmlmsg("msxml_parse_element: generated or retrieved json multi array\n");
5994bee6
 
288057e9
                     thisjobj = cli_jsonobj(multi, NULL);
                     if (!thisjobj)
                         return CL_EMEM;
                     cli_msxmlmsg("msxml_parse_element: generated json multi entry object\n");
                 }
5994bee6
 
288057e9
                 /* handle attributes */
                 if (thisjobj && (keyinfo->type & MSXML_JSON_ATTRIB)) {
                     state = xmlTextReaderHasAttributes(reader);
                     if (state == 1) {
                         json_object *attributes;
                         const xmlChar *name, *value;
 
                         attributes = cli_jsonobj(thisjobj, "Attributes");
                         if (!attributes) {
                             return CL_EPARSE;
                         }
                         cli_msxmlmsg("msxml_parse_element: retrieved json object [Attributes]\n");
 
                         while (xmlTextReaderMoveToNextAttribute(reader) == 1) {
                             name  = xmlTextReaderConstLocalName(reader);
                             value = xmlTextReaderConstValue(reader);
 
                             cli_msxmlmsg("\t%s: %s\n", name, value);
                             cli_jsonstr(attributes, (char *)name, (const char *)value);
                         }
                     } else if (state == -1)
                         return CL_EPARSE;
5994bee6
                 }
             }
 #endif
288057e9
             /* populate attributes for scanning callback - BROKEN, probably from the fact the reader is pointed to the attribute from previously parsing attributes */
             if ((keyinfo->type & MSXML_SCAN_CB) && mxctx->scan_cb) {
                 state = xmlTextReaderHasAttributes(reader);
                 if (state == 0) {
                     state = xmlTextReaderMoveToFirstAttribute(reader);
                     if (state == 1) {
                         /* read first attribute (current head) */
                         attribs[num_attribs].key   = (const char *)xmlTextReaderConstLocalName(reader);
                         attribs[num_attribs].value = (const char *)xmlTextReaderConstValue(reader);
                         num_attribs++;
                     } else if (state == -1) {
                         return CL_EPARSE;
                     }
                 }
 
                 /* start reading attributes or read remainder of attributes */
416456da
                 if (state == 1) {
288057e9
                     cli_msxmlmsg("msxml_parse_element: adding attributes to scanning context\n");
 
                     while ((num_attribs < MAX_ATTRIBS) && (xmlTextReaderMoveToNextAttribute(reader) == 1)) {
                         attribs[num_attribs].key   = (const char *)xmlTextReaderConstLocalName(reader);
                         attribs[num_attribs].value = (const char *)xmlTextReaderConstValue(reader);
                         num_attribs++;
                     }
416456da
                 } else if (state == -1) {
                     return CL_EPARSE;
                 }
             }
 
288057e9
             /* check self-containment */
             state = xmlTextReaderMoveToElement(reader);
             if (state == -1)
416456da
                 return CL_EPARSE;
5994bee6
 
288057e9
             state = xmlTextReaderIsEmptyElement(reader);
             if (state == 1) {
                 cli_msxmlmsg("msxml_parse_element: SELF-CLOSING\n");
5994bee6
 
288057e9
                 state = xmlTextReaderNext(reader);
                 check_state(state);
                 return CL_SUCCESS;
             } else if (state == -1)
                 return CL_EPARSE;
5994bee6
 
288057e9
             /* advance to first content node */
             state = xmlTextReaderRead(reader);
5994bee6
             check_state(state);
 
288057e9
             while (!endtag) {
6c627868
 #if HAVE_JSON
288057e9
                 if (track_json(mxctx) && (cli_json_timeout_cycle_check(ctx, &(mxctx->ictx->toval)) != CL_SUCCESS))
                     return CL_ETIMEOUT;
6c627868
 #endif
 
288057e9
                 node_type = xmlTextReaderNodeType(reader);
                 if (node_type == -1)
                     return CL_EPARSE;
5994bee6
 
288057e9
                 switch (node_type) {
                     case XML_READER_TYPE_ELEMENT:
                         ret = msxml_parse_element(mxctx, reader, rlvl + 1, thisjobj ? thisjobj : parent);
                         if (ret != CL_SUCCESS || (!SCAN_ALLMATCHES && ret == CL_VIRUS)) {
                             return ret;
                         } else if (SCAN_ALLMATCHES && ret == CL_VIRUS) {
                             virus = 1;
                         }
                         break;
5994bee6
 
288057e9
                     case XML_READER_TYPE_TEXT:
                         node_value = xmlTextReaderConstValue(reader);
5994bee6
 
288057e9
                         cli_msxmlmsg("TEXT: %s\n", node_value);
5994bee6
 
1629a661
 #if HAVE_JSON
288057e9
                         if (thisjobj && (keyinfo->type & MSXML_JSON_VALUE)) {
d7fa810a
 
288057e9
                             ret = msxml_parse_value(thisjobj, "Value", node_value);
                             if (ret != CL_SUCCESS)
                                 return ret;
d7fa810a
 
288057e9
                             cli_msxmlmsg("msxml_parse_element: added json value [%s: %s]\n", keyinfo->name, (const char *)node_value);
                         }
1629a661
 #endif
288057e9
                         /* callback-based scanning mechanism for embedded objects (used by HWPML) */
                         if ((keyinfo->type & MSXML_SCAN_CB) && mxctx->scan_cb) {
                             char name[1024];
                             char *tempfile = name;
                             int of;
                             size_t vlen = strlen((const char *)node_value);
 
                             cli_msxmlmsg("BINARY CALLBACK DATA!\n");
 
                             if ((ret = cli_gentempfd(ctx->engine->tmpdir, &tempfile, &of)) != CL_SUCCESS) {
                                 cli_warnmsg("msxml_parse_element: failed to create temporary file %s\n", tempfile);
                                 return ret;
                             }
 
                             if (cli_writen(of, (char *)node_value, vlen) != vlen) {
                                 close(of);
                                 if (!(ctx->engine->keeptmp))
                                     cli_unlink(tempfile);
                                 free(tempfile);
                                 return CL_EWRITE;
                             }
 
                             cli_dbgmsg("msxml_parse_element: extracted binary data to %s\n", tempfile);
 
                             ret = mxctx->scan_cb(of, tempfile, ctx, num_attribs, attribs, mxctx->scan_data);
                             close(of);
                             if (!(ctx->engine->keeptmp))
                                 cli_unlink(tempfile);
                             free(tempfile);
                             if (ret != CL_SUCCESS && (ret != CL_VIRUS || (!SCAN_ALLMATCHES && ret == CL_VIRUS))) {
                                 return ret;
                             } else if (SCAN_ALLMATCHES && ret == CL_VIRUS) {
                                 virus = 1;
                             }
                         }
 
                         /* scanning protocol for embedded objects encoded in base64 (used by MSXML) */
                         if (keyinfo->type & MSXML_SCAN_B64) {
                             char name[1024];
                             char *decoded, *tempfile = name;
                             size_t decodedlen;
                             int of;
 
                             cli_msxmlmsg("BINARY DATA!\n");
 
                             decoded = (char *)cl_base64_decode((char *)node_value, strlen((const char *)node_value), NULL, &decodedlen, 0);
                             if (!decoded) {
                                 cli_warnmsg("msxml_parse_element: failed to decode base64-encoded binary data\n");
                                 state = xmlTextReaderRead(reader);
                                 check_state(state);
                                 break;
                             }
 
                             if ((ret = cli_gentempfd(ctx->engine->tmpdir, &tempfile, &of)) != CL_SUCCESS) {
                                 cli_warnmsg("msxml_parse_element: failed to create temporary file %s\n", tempfile);
                                 free(decoded);
                                 return ret;
                             }
 
6c03dc5d
                             if (cli_writen(of, decoded, decodedlen) != decodedlen) {
288057e9
                                 free(decoded);
                                 close(of);
                                 if (!(ctx->engine->keeptmp))
                                     cli_unlink(tempfile);
                                 free(tempfile);
                                 return CL_EWRITE;
                             }
                             free(decoded);
 
                             cli_dbgmsg("msxml_parse_element: extracted binary data to %s\n", tempfile);
 
                             ret = cli_magic_scandesc(of, tempfile, ctx);
                             close(of);
                             if (!(ctx->engine->keeptmp))
                                 cli_unlink(tempfile);
                             free(tempfile);
                             if (ret != CL_SUCCESS && (ret != CL_VIRUS || (!SCAN_ALLMATCHES && ret == CL_VIRUS))) {
                                 return ret;
                             } else if (SCAN_ALLMATCHES && ret == CL_VIRUS) {
                                 virus = 1;
                             }
                         }
 
                         /* advance to next node */
5994bee6
                         state = xmlTextReaderRead(reader);
                         check_state(state);
                         break;
 
288057e9
                     case XML_READER_TYPE_COMMENT:
                         node_value = xmlTextReaderConstValue(reader);
cb740321
 
288057e9
                         cli_msxmlmsg("COMMENT: %s\n", node_value);
cb740321
 
288057e9
                         /* callback-based scanning mechanism for comments (used by MHTML) */
                         if ((keyinfo->type & MSXML_COMMENT_CB) && mxctx->comment_cb) {
cb740321
 #if HAVE_JSON
288057e9
                             ret = mxctx->comment_cb((const char *)node_value, ctx, thisjobj, mxctx->comment_data);
cb740321
 #else
288057e9
                             ret = mxctx->comment_cb((const char *)node_value, ctx, NULL, mxctx->comment_data);
cb740321
 #endif
288057e9
                             if (ret != CL_SUCCESS && (ret != CL_VIRUS || (!SCAN_ALLMATCHES && ret == CL_VIRUS))) {
                                 return ret;
                             } else if (SCAN_ALLMATCHES && ret == CL_VIRUS) {
                                 virus = 1;
                             }
                         }
 
                         /* advance to next node */
                         state = xmlTextReaderRead(reader);
                         check_state(state);
                         break;
cb740321
 
288057e9
                     case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
                         /* advance to next node */
                         state = xmlTextReaderRead(reader);
                         check_state(state);
                         break;
5994bee6
 
288057e9
                     case XML_READER_TYPE_END_ELEMENT:
                         cli_msxmlmsg("in msxml_parse_element @ layer %d closed\n", rlvl);
                         node_name = xmlTextReaderConstLocalName(reader);
                         if (!node_name) {
                             cli_dbgmsg("msxml_parse_element: element end tag node nameless\n");
                             return CL_EPARSE; /* no name, nameless */
                         }
5994bee6
 
288057e9
                         if (xmlStrcmp(element_name, node_name)) {
                             cli_dbgmsg("msxml_parse_element: element tag does not match end tag %s != %s\n", element_name, node_name);
                             return CL_EFORMAT;
                         }
5994bee6
 
288057e9
                         /* advance to next element tag */
                         state = xmlTextReaderRead(reader);
                         check_state(state);
5994bee6
 
288057e9
                         endtag = 1;
                         break;
5994bee6
 
288057e9
                     default:
                         node_name  = xmlTextReaderConstLocalName(reader);
                         node_value = xmlTextReaderConstValue(reader);
5994bee6
 
288057e9
                         cli_dbgmsg("msxml_parse_element: unhandled xml secondary node %s [%d]: %s\n", node_name, node_type, node_value);
5994bee6
 
288057e9
                         state = xmlTextReaderRead(reader);
                         check_state(state);
                 }
5994bee6
             }
 
288057e9
             break;
         case XML_READER_TYPE_PROCESSING_INSTRUCTION:
             cli_msxmlmsg("msxml_parse_element: PROCESSING INSTRUCTION %s [%d]: %s\n", node_name, node_type, node_value);
             break;
         case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
             cli_msxmlmsg("msxml_parse_element: SIGNIFICANT WHITESPACE %s [%d]: %s\n", node_name, node_type, node_value);
             break;
         case XML_READER_TYPE_END_ELEMENT:
             cli_msxmlmsg("msxml_parse_element: END ELEMENT %s [%d]: %s\n", node_name, node_type, node_value);
             return (virus ? CL_VIRUS : CL_SUCCESS);
         default:
             cli_dbgmsg("msxml_parse_element: unhandled xml primary node %s [%d]: %s\n", node_name, node_type, node_value);
5994bee6
     }
 
3d374eec
     return (virus ? CL_VIRUS : CL_SUCCESS);
5994bee6
 }
 
22cb38ed
 /* reader initialization and closing handled by caller */
cb740321
 int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, uint32_t flags, struct msxml_ctx *mxctx)
5994bee6
 {
cb740321
     struct msxml_ctx reserve;
     struct msxml_ictx ictx;
3d374eec
     int state, virus = 0, ret = CL_SUCCESS;
5994bee6
 
f8f2ff94
     if (!ctx)
         return CL_ENULLARG;
 
cb740321
     if (!mxctx) {
         memset(&reserve, 0, sizeof(reserve));
         mxctx = &reserve;
     }
 
288057e9
     ictx.ctx      = ctx;
     ictx.flags    = flags;
     ictx.keys     = keys;
cb740321
     ictx.num_keys = num_keys;
5994bee6
 #if HAVE_JSON
6732844a
     if (flags & MSXML_FLAG_JSON) {
cb740321
         ictx.root = ctx->wrkproperty;
25a8a0f9
         /* JSON Sanity Check */
cb740321
         if (!ictx.root)
             ictx.flags &= ~MSXML_FLAG_JSON;
         ictx.toval = 0;
6c627868
     }
6732844a
 #else
cb740321
     ictx.flags &= ~MSXML_FLAG_JSON;
5994bee6
 #endif
cb740321
     mxctx->ictx = &ictx;
5994bee6
 
6732844a
     /* Error Handler (setting handler on tree walker causes segfault) */
     if (!(flags & MSXML_FLAG_WALK))
4016c0f9
         //xmlTextReaderSetErrorHandler(reader, NULL, NULL); /* xml default handler */
         xmlTextReaderSetErrorHandler(reader, msxml_error_handler, NULL);
f17cd8d1
 
5994bee6
     /* Main Processing Loop */
     while ((state = xmlTextReaderRead(reader)) == 1) {
1629a661
 #if HAVE_JSON
cb740321
         if ((ictx.flags & MSXML_FLAG_JSON) && (cli_json_timeout_cycle_check(ictx.ctx, &(ictx.toval)) != CL_SUCCESS))
6c627868
             return CL_ETIMEOUT;
 
cb740321
         ret = msxml_parse_element(mxctx, reader, 0, ictx.root);
1629a661
 #else
cb740321
         ret = msxml_parse_element(mxctx, reader, 0, NULL);
1629a661
 #endif
288057e9
         if (ret == CL_SUCCESS)
             ;
048a88e6
         else if (SCAN_ALLMATCHES && ret == CL_VIRUS) {
3d374eec
             /* non-allmatch simply propagates it down to return through ret */
             virus = 1;
         } else if (ret == CL_VIRUS || ret == CL_ETIMEOUT || ret == CL_BREAK) {
             cli_dbgmsg("cli_msxml_parse_document: encountered halt event in parsing xml document\n");
             break;
         } else {
5994bee6
             cli_warnmsg("cli_msxml_parse_document: encountered issue in parsing xml document\n");
             break;
         }
     }
 
     if (state == -1)
3d374eec
         ret = CL_EPARSE;
5994bee6
 
4e2ae35b
 #if HAVE_JSON
     /* Parse General Error Handler */
cb740321
     if (ictx.flags & MSXML_FLAG_JSON) {
4e2ae35b
         int tmp = CL_SUCCESS;
 
288057e9
         switch (ret) {
             case CL_SUCCESS:
             case CL_BREAK: /* OK */
                 break;
             case CL_VIRUS:
                 tmp = cli_json_parse_error(ictx.root, "MSXML_INTR_VIRUS");
                 break;
             case CL_ETIMEOUT:
                 tmp = cli_json_parse_error(ictx.root, "MSXML_INTR_TIMEOUT");
                 break;
             case CL_EPARSE:
                 tmp = cli_json_parse_error(ictx.root, "MSXML_ERROR_XMLPARSER");
                 break;
             case CL_EMEM:
                 tmp = cli_json_parse_error(ictx.root, "MSXML_ERROR_OUTOFMEM");
                 break;
             case CL_EFORMAT:
                 tmp = cli_json_parse_error(ictx.root, "MSXML_ERROR_MALFORMED");
                 break;
             default:
                 tmp = cli_json_parse_error(ictx.root, "MSXML_ERROR_OTHER");
                 break;
4e2ae35b
         }
 
         if (tmp)
             return tmp;
     }
 #endif
 
22cb38ed
     /* non-critical return suppression */
5994bee6
     if (ret == CL_ETIMEOUT || ret == CL_BREAK)
3d374eec
         ret = CL_SUCCESS;
5994bee6
 
f773990c
     /* important but non-critical suppression */
     if (ret == CL_EPARSE) {
         cli_dbgmsg("cli_msxml_parse_document: suppressing parsing error to continue scan\n");
3d374eec
         ret = CL_SUCCESS;
f773990c
     }
 
3d374eec
     return (virus ? CL_VIRUS : ret);
5994bee6
 }
 
 #endif /* HAVE_LIBXML2 */