libclamav/hwp.c
9a1232d2
 /*
  * HWP Stuff
  * 
78a06dae
  * Copyright (C) 2015, 2017 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
9a1232d2
  * 
  * Authors: Kevin Lin
  * 
  * This program is free software; you can redistribute it and/or modify it under
  * the terms of the GNU General Public License version 2 as published by the
  * Free Software Foundation.
  * 
  * This program is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  * 
  * You should have received a copy of the GNU General Public License along with
  * this program; if not, write to the Free Software Foundation, Inc., 51
  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  */
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
d2eea44a
 #if HAVE_LIBXML2
 #ifdef _WIN32
 #ifndef LIBXML_WRITER_ENABLED
 #define LIBXML_WRITER_ENABLED 1
 #endif
 #endif
 #include <libxml/xmlreader.h>
5cab01bd
 #endif
 
9a1232d2
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <string.h>
 #include <ctype.h>
 #include <zlib.h>
 
21c40de6
 #if HAVE_ICONV
 #include <iconv.h>
 #endif
 
9a1232d2
 #include "clamav.h"
 #include "fmap.h"
5b2a3e5a
 #include "str.h"
80ba6f2c
 #include "conv.h"
9a1232d2
 #include "others.h"
 #include "scanners.h"
d2eea44a
 #include "msxml_parser.h"
 #include "msxml.h"
9a1232d2
 #include "json_api.h"
 #include "hwp.h"
 #if HAVE_JSON
 #include "msdoc.h"
 #endif
 
8ce46496
 #define HWP5_DEBUG  0
209c142e
 #define HWP3_DEBUG  0
 #define HWP3_VERIFY 0
6cd5a9dc
 #define HWPML_DEBUG 0
0361351c
 #if HWP5_DEBUG
 #define hwp5_debug(...) cli_dbgmsg(__VA_ARGS__)
 #else
 #define hwp5_debug(...) ;
 #endif
5cab01bd
 #if HWP3_DEBUG
 #define hwp3_debug(...) cli_dbgmsg(__VA_ARGS__)
 #else
 #define hwp3_debug(...) ;
 #endif
d2eea44a
 #if HWPML_DEBUG
 #define hwpml_debug(...) cli_dbgmsg(__VA_ARGS__)
 #else
 #define hwpml_debug(...) ;
 #endif
5cab01bd
 
fad48cf5
 typedef int (*hwp_cb )(void *cbdata, int fd, cli_ctx *ctx);
 static int decompress_and_callback(cli_ctx *ctx, fmap_t *input, off_t at, size_t len, const char *parent, hwp_cb cb, void *cbdata)
 {
80ba6f2c
     int zret, ofd, in, ret = CL_SUCCESS;
fad48cf5
     off_t off_in = at;
80ba6f2c
     size_t count, remain = 1, outsize = 0;
fad48cf5
     z_stream zstrm;
     char *tmpname;
     unsigned char inbuf[FILEBUFF], outbuf[FILEBUFF];
 
     if (!ctx || !input || !cb)
         return CL_ENULLARG;
 
c09f95f4
     if (len)
         remain = len;
 
fad48cf5
     /* reserve tempfile for output and callback */
     if ((ret = cli_gentempfd(ctx->engine->tmpdir, &tmpname, &ofd)) != CL_SUCCESS) {
         cli_errmsg("%s: Can't generate temporary file\n", parent);
         return ret;
     }
 
     /* initialize zlib inflation stream */
     memset(&zstrm, 0, sizeof(zstrm));
     zstrm.zalloc = Z_NULL;
     zstrm.zfree = Z_NULL;
     zstrm.opaque = Z_NULL;
     zstrm.next_in = inbuf;
     zstrm.next_out = outbuf;
     zstrm.avail_in = 0;
     zstrm.avail_out = FILEBUFF;
 
     zret = inflateInit2(&zstrm, -15);
     if (zret != Z_OK) {
0361351c
         cli_errmsg("%s: Can't initialize zlib inflation stream\n", parent);
fad48cf5
         ret = CL_EUNPACK;
         goto dc_end;
     }
 
     /* inflation loop */
     do {
         if (zstrm.avail_in == 0) {
             zstrm.next_in = inbuf;
7b020ffe
             in = fmap_readn(input, inbuf, off_in, FILEBUFF);
             if (in < 0) {
fad48cf5
                 cli_errmsg("%s: Error reading stream\n", parent);
                 ret = CL_EUNPACK;
                 goto dc_end;
             }
7b020ffe
             if (!in)
fad48cf5
                 break;
 
c09f95f4
             if (len) {
7b020ffe
                 if (remain < in)
                     in = remain;
                 remain -= in;
c09f95f4
             }
7b020ffe
             zstrm.avail_in = in;
             off_in += in;
fad48cf5
         }
         zret = inflate(&zstrm, Z_SYNC_FLUSH);
         count = FILEBUFF - zstrm.avail_out;
         if (count) {
6fd36840
             if ((ret = cli_checklimits("HWP", ctx, outsize + count, 0, 0)) != CL_SUCCESS)
fad48cf5
                 break;
 
             if (cli_writen(ofd, outbuf, count) != count) {
                 cli_errmsg("%s: Can't write to file %s\n", parent, tmpname);
                 ret = CL_EWRITE;
                 goto dc_end;
             }
             outsize += count;
         }
         zstrm.next_out = outbuf;
         zstrm.avail_out = FILEBUFF;
c09f95f4
     } while(zret == Z_OK && remain);
fad48cf5
 
7b020ffe
     cli_dbgmsg("%s: Decompressed %llu bytes to %s\n", parent, (long long unsigned)outsize, tmpname);
 
fad48cf5
     /* post inflation checks */
     if (zret != Z_STREAM_END && zret != Z_OK) {
         if (outsize == 0) {
             cli_infomsg(ctx, "%s: Error decompressing stream. No data decompressed.\n", parent);
             ret = CL_EUNPACK;
             goto dc_end;
         }
 
         cli_infomsg(ctx, "%s: Error decompressing stream. Scanning what was decompressed.\n", parent);
     }
c09f95f4
 
7b020ffe
     /* check for limits exceeded or zlib failure */
     if (ret == CL_SUCCESS && (zret == Z_STREAM_END || zret == Z_OK)) {
6fd36840
         if (len && remain > 0)
             cli_infomsg(ctx, "%s: Error decompressing stream. Not all requested input was converted\n", parent);
fad48cf5
 
6fd36840
         /* scanning inflated stream */
         ret = cb(cbdata, ofd, ctx);
ddc5038d
     } else {
         /* default to scanning what we got */
         ret = cli_magic_scandesc(ofd, ctx);
6fd36840
     }
fad48cf5
 
     /* clean-up */
  dc_end:
     zret = inflateEnd(&zstrm);
7b020ffe
     if (zret != Z_OK) {
         cli_errmsg("%s: Error closing zlib inflation stream\n", parent);
         if (ret == CL_SUCCESS)
             ret = CL_EUNPACK;
     }
fad48cf5
     close(ofd);
     if (!ctx->engine->keeptmp)
         if (cli_unlink(tmpname))
             ret = CL_EUNLINK;
     free(tmpname);
     return ret;
 }
 
21c40de6
 /* convert HANGUL_NUMERICAL to UTF-8 encoding using iconv library, converts to base64 encoding if no iconv or failure */
 #define HANGUL_NUMERICAL 0
 static char *convert_hstr_to_utf8(const char *begin, size_t sz, const char *parent, int *ret)
 {
     int rc = CL_SUCCESS;
     char *res=NULL;
 #if HANGUL_NUMERICAL && HAVE_ICONV
     char *p1, *p2, *inbuf = NULL, *outbuf = NULL;
     size_t inlen, outlen;
     iconv_t cd;
 
     do {
         p1 = inbuf = cli_calloc(1, sz+1);
         if (!inbuf) {
             cli_errmsg("%s: Failed to allocate memory for encoding conversion buffer\n", parent);
             rc = CL_EMEM;
             break;
         }
         memcpy(inbuf, begin, sz);
         p2 = outbuf = cli_calloc(1, sz+1);
         if (!outbuf) {
             cli_errmsg("%s: Failed to allocate memory for encoding conversion buffer\n", parent);
             rc = CL_EMEM;
             break;
         }
         inlen = outlen = sz;
 
         cd = iconv_open("UTF-8", "UNICODE");
         if (cd == (iconv_t)(-1)) {
             char errbuf[128];
             cli_strerror(errno, errbuf, sizeof(errbuf));
             cli_errmsg("%s: Failed to initialize iconv for encoding %s: %s\n", parent, HANGUL_NUMERICAL, errbuf);
             break;
         }
 
         iconv(cd, (char **)(&p1), &inlen, &p2, &outlen);
         iconv_close(cd);
 
         /* no data was converted */
         if (outlen == sz)
             break;
 
         outbuf[sz - outlen] = '\0';
 
         if (!(res = strdup(outbuf))) {
             cli_errmsg("%s: Failed to allocate memory for encoding conversion buffer\n", parent);
             rc = CL_EMEM;
             break;
         }
     } while(0);
 
     if (inbuf)
         free(inbuf);
     if (outbuf)
         free(outbuf);
 #endif
     /* safety base64 encoding */
     if (!res && (rc == CL_SUCCESS)) {
80ba6f2c
         char *tmpbuf;
 
         tmpbuf = cli_calloc(1, sz+1);
         if (tmpbuf) {
             memcpy(tmpbuf, begin, sz);
 
             res = (char *)cl_base64_encode(tmpbuf, sz);
             if (res)
                 rc = CL_VIRUS; /* used as placeholder */
             else
                 rc = CL_EMEM;
 
             free(tmpbuf);
         } else {
             cli_errmsg("%s: Failed to allocate memory for temporary buffer\n", parent);
21c40de6
             rc = CL_EMEM;
80ba6f2c
         }
21c40de6
     }
 
     (*ret) = rc;
     return res;
 }
 
6cd5a9dc
 /*** HWPOLE2 ***/
 int cli_scanhwpole2(cli_ctx *ctx)
 {
     fmap_t *map = *ctx->fmap;
     uint32_t usize, asize;
 
     asize = (uint32_t)(map->len - sizeof(usize));
 
     if (fmap_readn(map, &usize, 0, sizeof(usize)) != sizeof(usize)) {
         cli_errmsg("HWPOLE2: Failed to read uncompressed ole2 filesize\n");
         return CL_EREAD;
     }
 
     if (usize != asize)
         cli_warnmsg("HWPOLE2: Mismatched uncompressed prefix and size: %u != %u\n", usize, asize);
     else
         cli_dbgmsg("HWPOLE2: Matched uncompressed prefix and size: %u == %u\n", usize, asize);
 
7b020ffe
     return cli_map_scandesc(map, 4, 0, ctx, CL_TYPE_ANY);
     //return cli_map_scandesc(map, 4, 0, ctx, CL_TYPE_OLE2);
6cd5a9dc
 }
 
5cab01bd
 /*** HWP5 ***/
 
9103b7e9
 int cli_hwp5header(cli_ctx *ctx, hwp5_header_t *hwp5)
5acdcfcb
 {
     if (!ctx || !hwp5)
         return CL_ENULLARG;
 
d4518645
 #if HAVE_JSON
     if (ctx->options & CL_SCAN_FILE_PROPERTIES) {
         json_object *header, *flags;
5acdcfcb
 
d4518645
         header = cli_jsonobj(ctx->wrkproperty, "Hwp5Header");
         if (!header) {
             cli_errmsg("HWP5.x: No memory for Hwp5Header object\n");
             return CL_EMEM;
         }
5acdcfcb
 
d4518645
         /* magic */
78a06dae
         cli_jsonstr(header, "Magic", (char*)hwp5->signature);
5acdcfcb
 
d4518645
         /* version */
         cli_jsonint(header, "RawVersion", hwp5->version);
5acdcfcb
 
d4518645
         /* flags */
         cli_jsonint(header, "RawFlags", hwp5->flags);
5acdcfcb
 
d4518645
         flags = cli_jsonarray(header, "Flags");
         if (!flags) {
             cli_errmsg("HWP5.x: No memory for Hwp5Header/Flags array\n");
             return CL_EMEM;
         }
5acdcfcb
 
d4518645
         if (hwp5->flags & HWP5_COMPRESSED) {
             cli_jsonstr(flags, NULL, "HWP5_COMPRESSED");
         }
         if (hwp5->flags & HWP5_PASSWORD) {
             cli_jsonstr(flags, NULL, "HWP5_PASSWORD");
         }
         if (hwp5->flags & HWP5_DISTRIBUTABLE) {
             cli_jsonstr(flags, NULL, "HWP5_DISTRIBUTABLE");
         }
         if (hwp5->flags & HWP5_SCRIPT) {
             cli_jsonstr(flags, NULL, "HWP5_SCRIPT");
         }
         if (hwp5->flags & HWP5_DRM) {
             cli_jsonstr(flags, NULL, "HWP5_DRM");
         }
         if (hwp5->flags & HWP5_XMLTEMPLATE) {
             cli_jsonstr(flags, NULL, "HWP5_XMLTEMPLATE");
         }
         if (hwp5->flags & HWP5_HISTORY) {
             cli_jsonstr(flags, NULL, "HWP5_HISTORY");
         }
         if (hwp5->flags & HWP5_CERT_SIGNED) {
             cli_jsonstr(flags, NULL, "HWP5_CERT_SIGNED");
         }
         if (hwp5->flags & HWP5_CERT_ENCRYPTED) {
             cli_jsonstr(flags, NULL, "HWP5_CERT_ENCRYPTED");
         }
         if (hwp5->flags & HWP5_CERT_EXTRA) {
             cli_jsonstr(flags, NULL, "HWP5_CERT_EXTRA");
         }
         if (hwp5->flags & HWP5_CERT_DRM) {
             cli_jsonstr(flags, NULL, "HWP5_CERT_DRM");
         }
         if (hwp5->flags & HWP5_CCL) {
             cli_jsonstr(flags, NULL, "HWP5_CCL");
         }
     }
5acdcfcb
 #endif
     return CL_SUCCESS;
 }
 
0361351c
 static int hwp5_cb(void *cbdata, int fd, cli_ctx *ctx)
 {
     if (fd < 0 || !ctx)
         return CL_ENULLARG;
 
6cd5a9dc
     return cli_magic_scandesc(fd, ctx);
0361351c
 }
 
9103b7e9
 int cli_scanhwp5_stream(cli_ctx *ctx, hwp5_header_t *hwp5, char *name, int fd)
9a1232d2
 {
10149855
     hwp5_debug("HWP5.x: NAME: %s\n", name ? name : "(NULL)");
0361351c
 
     if (fd < 0) {
         cli_errmsg("HWP5.x: Invalid file descriptor argument\n");
         return CL_ENULLARG;
     }
9a1232d2
 
10149855
     if (name) {
         /* encrypted and compressed streams */
         if (!strncmp(name, "bin", 3) || !strncmp(name, "jscriptversion", 14) ||
             !strncmp(name, "defaultjscript", 14) || !strncmp(name, "section", 7) ||
             !strncmp(name, "viewtext", 8) || !strncmp(name, "docinfo", 7)) {
9a1232d2
 
10149855
             if (hwp5->flags & HWP5_PASSWORD) {
                 cli_dbgmsg("HWP5.x: Password encrypted stream, scanning as-is\n");
                 return cli_magic_scandesc(fd, ctx);
0361351c
             }
 
10149855
             if (hwp5->flags & HWP5_COMPRESSED) {
                 /* DocInfo JSON Handling */
                 STATBUF statbuf;
                 fmap_t *input;
                 int ret;
 
                 hwp5_debug("HWP5.x: Sending %s for decompress and scan\n", name);
 
                 /* fmap the input file for easier manipulation */
                 if (FSTAT(fd, &statbuf) == -1) {
                     cli_errmsg("HWP5.x: Can't stat file descriptor\n");
                     return CL_ESTAT;
                 }
 
                 input = fmap(fd, 0, statbuf.st_size);
                 if (!input) {
                     cli_errmsg("HWP5.x: Failed to get fmap for input stream\n");
                     return CL_EMAP;
                 }
                 ret = decompress_and_callback(ctx, input, 0, 0, "HWP5.x", hwp5_cb, NULL);
                 funmap(input);
                 return ret;
0361351c
             }
9a1232d2
         }
 
 #if HAVE_JSON
10149855
         /* JSON Output Summary Information */
         if (ctx->options & CL_SCAN_FILE_PROPERTIES && ctx->properties != NULL) {
             if (name && !strncmp(name, "_5_hwpsummaryinformation", 24)) {
                 cli_dbgmsg("HWP5.x: Detected a '_5_hwpsummaryinformation' stream\n");
                 /* JSONOLE2 - what to do if something breaks? */
                 if (cli_ole2_summary_json(ctx, fd, 2) == CL_ETIMEOUT)
                     return CL_ETIMEOUT;
             }
9a1232d2
         }
 
 #endif
10149855
     }
9a1232d2
 
     /* normal streams */
     return cli_magic_scandesc(fd, ctx);
 }
9103b7e9
 
5cab01bd
 /*** HWP3 ***/
 
 /* all fields use little endian and unicode encoding, if appliable */
 
 //File Identification Information - (30 total bytes)
 #define HWP3_IDENTITY_INFO_SIZE 30
 
 //Document Information - (128 total bytes)
 #define HWP3_DOCINFO_SIZE 128
 
 #define DI_WRITEPROT   24  /* offset 24 (4 bytes) - write protection */
 #define DI_EXTERNAPP   28  /* offset 28 (2 bytes) - external application */
21c40de6
 #define DI_PNAME       32  /* offset 32 (40 x 1 bytes) - print name */
 #define DI_ANNOTE      72  /* offset 72 (24 x 1 bytes) - annotation */
5cab01bd
 #define DI_PASSWD      96  /* offset 96 (2 bytes) - password protected */
 #define DI_COMPRESSED  124 /* offset 124 (1 byte) - compression */
 #define DI_INFOBLKSIZE 126 /* offset 126 (2 bytes) - information block length */
21c40de6
 struct hwp3_docinfo {
5cab01bd
     uint32_t di_writeprot;
     uint16_t di_externapp;
     uint16_t di_passwd;
     uint8_t  di_compressed;
     uint16_t di_infoblksize;
 };
 
 //Document Summary - (1008 total bytes)
 #define HWP3_DOCSUMMARY_SIZE 1008
 struct hwp3_docsummary_entry {
     off_t offset;
     const char *name;
 } hwp3_docsummary_fields[] = {
     { 0,   "Title" },    /* offset 0 (56 x 2 bytes) - title */
     { 112, "Subject" },  /* offset 112 (56 x 2 bytes) - subject */
     { 224, "Author" },   /* offset 224 (56 x 2 bytes) - author */
     { 336, "Date" },     /* offset 336 (56 x 2 bytes) - date */
     { 448, "Keyword1" }, /* offset 448 (2 x 56 x 2 bytes) - keywords */
     { 560, "Keyword2" },
 
5b2a3e5a
     { 672, "Etc0" },  /* offset 672 (3 x 56 x 2 bytes) - etc */
     { 784, "Etc1" },
     { 896, "Etc2" }
5cab01bd
 };
 #define NUM_DOCSUMMARY_FIELDS sizeof(hwp3_docsummary_fields)/sizeof(struct hwp3_docsummary_entry)
 
8f4f3666
 //Document Paragraph Information - (43 or 230 total bytes)
cb29b646
 #define HWP3_PARAINFO_SIZE_S  43
 #define HWP3_PARAINFO_SIZE_L  230
 #define HWP3_LINEINFO_SIZE    14
 #define HWP3_CHARSHPDATA_SIZE 31
 
94737d52
 #define HWP3_FIELD_LENGTH  512
cb29b646
 
66ed5fe5
 #define PI_PPFS      0  /* offset 0 (1 byte)  - prior paragraph format style */
 #define PI_NCHARS    1  /* offset 1 (2 bytes) - character count */
 #define PI_NLINES    3  /* offset 3 (2 bytes) - line count */
 #define PI_IFSC      5  /* offset 5 (1 byte)  - including font style of characters */
 #define PI_FLAGS     6  /* offset 6 (1 byte)  - other flags */
cb29b646
 #define PI_SPECIAL   7  /* offset 7 (4 bytes) - special characters markers */
66ed5fe5
 #define PI_ISTYLE    11 /* offset 11 (1 byte) - paragraph style index */
8f4f3666
 
cb29b646
 #define PLI_LOFF   0   /* offset 0 (2 bytes) - line starting offset */
 #define PLI_LCOR   2   /* offset 2 (2 bytes) - line blank correction */
 #define PLI_LHEI   4   /* offset 4 (2 bytes) - line max char height */
 #define PLI_LPAG   12  /* offset 12 (2 bytes) - line pagination*/
 
 #define PCSD_SIZE   0  /* offset 0 (2 bytes) - size of characters */
 #define PCSD_PROP   26 /* offset 26 (1 byte) - properties */
 
fad48cf5
 static inline int parsehwp3_docinfo(cli_ctx *ctx, off_t offset, struct hwp3_docinfo *docinfo)
5cab01bd
 {
     const uint8_t *hwp3_ptr;
21c40de6
     int iret;
5cab01bd
 
     //TODO: use fmap_readn?
fad48cf5
     if (!(hwp3_ptr = fmap_need_off_once(*ctx->fmap, offset, HWP3_DOCINFO_SIZE))) {
0361351c
         cli_errmsg("HWP3.x: Failed to read fmap for hwp docinfo\n");
5cab01bd
         return CL_EMAP;
     }
 
     memcpy(&(docinfo->di_writeprot), hwp3_ptr+DI_WRITEPROT, sizeof(docinfo->di_writeprot));
     memcpy(&(docinfo->di_externapp), hwp3_ptr+DI_EXTERNAPP, sizeof(docinfo->di_externapp));
     memcpy(&(docinfo->di_passwd), hwp3_ptr+DI_PASSWD, sizeof(docinfo->di_passwd));
     memcpy(&(docinfo->di_compressed), hwp3_ptr+DI_COMPRESSED, sizeof(docinfo->di_compressed));
     memcpy(&(docinfo->di_infoblksize), hwp3_ptr+DI_INFOBLKSIZE, sizeof(docinfo->di_infoblksize));
 
     docinfo->di_writeprot = le32_to_host(docinfo->di_writeprot);
     docinfo->di_externapp = le16_to_host(docinfo->di_externapp);
     docinfo->di_passwd = le16_to_host(docinfo->di_passwd);
     docinfo->di_infoblksize = le16_to_host(docinfo->di_infoblksize);
 
     hwp3_debug("HWP3.x: di_writeprot:   %u\n", docinfo->di_writeprot);
     hwp3_debug("HWP3.x: di_externapp:   %u\n", docinfo->di_externapp);
     hwp3_debug("HWP3.x: di_passwd:      %u\n", docinfo->di_passwd);
     hwp3_debug("HWP3.x: di_compressed:  %u\n", docinfo->di_compressed);
     hwp3_debug("HWP3.x: di_infoblksize: %u\n", docinfo->di_infoblksize);
 
 #if HAVE_JSON
d4518645
     if (ctx->options & CL_SCAN_FILE_PROPERTIES) {
         json_object *header, *flags;
21c40de6
         char *str;
5cab01bd
 
d4518645
         header = cli_jsonobj(ctx->wrkproperty, "Hwp3Header");
         if (!header) {
             cli_errmsg("HWP3.x: No memory for Hwp3Header object\n");
             return CL_EMEM;
         }
5cab01bd
 
d4518645
         flags = cli_jsonarray(header, "Flags");
         if (!flags) {
             cli_errmsg("HWP5.x: No memory for Hwp5Header/Flags array\n");
             return CL_EMEM;
         }
 
         if (docinfo->di_writeprot) {
             cli_jsonstr(flags, NULL, "HWP3_WRITEPROTECTED"); /* HWP3_DISTRIBUTABLE */
         }
         if (docinfo->di_externapp) {
             cli_jsonstr(flags, NULL, "HWP3_EXTERNALAPPLICATION");
         }
         if (docinfo->di_passwd) {
             cli_jsonstr(flags, NULL, "HWP3_PASSWORD");
         }
         if (docinfo->di_compressed) {
             cli_jsonstr(flags, NULL, "HWP3_COMPRESSED");
         }
21c40de6
 
         /* Printed File Name */
78a06dae
         str = convert_hstr_to_utf8((char*)(hwp3_ptr+DI_PNAME), 40, "HWP3.x", &iret);
21c40de6
         if (!str || (iret == CL_EMEM))
             return CL_EMEM;
 
         if (iret == CL_VIRUS)
             cli_jsonbool(header, "PrintName_base64", 1);
 
         hwp3_debug("HWP3.x: di_pname:   %s\n", str);
         cli_jsonstr(header, "PrintName", str);
         free(str);
 
         /* Annotation */
78a06dae
         str = convert_hstr_to_utf8((char*)(hwp3_ptr+DI_ANNOTE), 24, "HWP3.x", &iret);
21c40de6
         if (!str || (iret == CL_EMEM))
             return CL_EMEM;
 
         if (iret == CL_VIRUS)
             cli_jsonbool(header, "Annotation_base64", 1);
 
         hwp3_debug("HWP3.x: di_annote:  %s\n", str);
         cli_jsonstr(header, "Annotation", str);
         free(str);
5cab01bd
     }
 #endif
 
     return CL_SUCCESS;
 }
 
fad48cf5
 static inline int parsehwp3_docsummary(cli_ctx *ctx, off_t offset)
9103b7e9
 {
5cab01bd
 #if HAVE_JSON
     const uint8_t *hwp3_ptr;
     char *str;
21c40de6
     int i, iret, ret;
5cab01bd
     json_object *summary;
 
d4518645
     if (!(ctx->options & CL_SCAN_FILE_PROPERTIES))
         return CL_SUCCESS;
 
fad48cf5
     if (!(hwp3_ptr = fmap_need_off_once(*ctx->fmap, offset, HWP3_DOCSUMMARY_SIZE))) {
0361351c
         cli_errmsg("HWP3.x: Failed to read fmap for hwp docinfo\n");
5cab01bd
         return CL_EMAP;
     }
 
     summary = cli_jsonobj(ctx->wrkproperty, "Hwp3SummaryInfo");
     if (!summary) {
         cli_errmsg("HWP3.x: No memory for json object\n");
         return CL_EMEM;
     }
 
     for (i = 0; i < NUM_DOCSUMMARY_FIELDS; i++) {
78a06dae
         str = convert_hstr_to_utf8((char*)(hwp3_ptr + hwp3_docsummary_fields[i].offset), 112, "HWP3.x", &iret);
21c40de6
         if (!str || (iret == CL_EMEM))
             return CL_EMEM;
 
         if (iret == CL_VIRUS) {
5b2a3e5a
             char *b64;
             size_t b64len = strlen(hwp3_docsummary_fields[i].name)+8;
             b64 = cli_calloc(1, b64len);
             if (!b64) {
                 cli_errmsg("HWP3.x: Failed to allocate memory for b64 boolean\n");
                 return CL_EMEM;
             }
             snprintf(b64, b64len, "%s_base64", hwp3_docsummary_fields[i].name);
             cli_jsonbool(summary, b64, 1);
             free(b64);
         }
5cab01bd
 
         hwp3_debug("HWP3.x: %s, %s\n", hwp3_docsummary_fields[i].name, str);
         ret = cli_jsonstr(summary, hwp3_docsummary_fields[i].name, str);
         free(str);
         if (ret != CL_SUCCESS)
             return ret;
     }
fad48cf5
 #else
d2eea44a
     UNUSEDPARAM(ctx);
     UNUSEDPARAM(offset);
5cab01bd
 #endif
9103b7e9
     return CL_SUCCESS;
 }
5cab01bd
 
a9a669eb
 #if HWP3_VERIFY
 #define HWP3_PSPECIAL_VERIFY(map, offset, second, id, match)            \
     do {                                                                \
     if (fmap_readn(map, &match, offset+second, sizeof(match)) != sizeof(match)) \
         return CL_EREAD;                                                \
                                                                         \
     match = le16_to_host(match);                                        \
                                                                         \
     if (id != match) {                                                  \
         cli_errmsg("HWP3.x: ID %u block fails verification\n", id);     \
         return CL_EFORMAT;                                              \
     }                                                                   \
     } while(0)
26d2bfa0
 
a9a669eb
 #else
 #define HWP3_PSPECIAL_VERIFY(map, offset, second, id, match)
 #endif
26d2bfa0
 
 static inline int parsehwp3_paragraph(cli_ctx *ctx, fmap_t *map, int p, int level, off_t *roffset, int *last)
cb29b646
 {
     off_t offset = *roffset;
73606e9f
     off_t new_offset;
26d2bfa0
     uint16_t nchars, nlines, content;
bfb56489
     uint8_t ppfs, ifsc, cfsb;
26d2bfa0
     int i, c, l, sp = 0, term = 0, ret = CL_SUCCESS;
02778844
 #if HWP3_VERIFY
     uint16_t match;
 #endif
cb29b646
 #if HWP3_DEBUG
     /* other paragraph info */
5383c49b
     uint8_t flags, istyle;
8658dbdc
     uint16_t fsize;
cb29b646
     uint32_t special;
 
     /* line info */
     uint16_t loff, lcor, lhei, lpag;
 
     /* char shape data */
     uint16_t pcsd_size;
     uint8_t pcsd_prop;
 #endif
 
26d2bfa0
     hwp3_debug("HWP3.x: recursion level: %d\n", level);
     hwp3_debug("HWP3.x: Paragraph[%d, %d] starts @ offset %llu\n", level, p, (long long unsigned)offset);
 
731c8e62
     if (level >= ctx->engine->maxrechwp3)
26d2bfa0
         return CL_EMAXREC;
cb29b646
 
     if (fmap_readn(map, &ppfs, offset+PI_PPFS, sizeof(ppfs)) != sizeof(ppfs))
         return CL_EREAD;
 
     if (fmap_readn(map, &nchars, offset+PI_NCHARS, sizeof(nchars)) != sizeof(nchars))
         return CL_EREAD;
 
     nchars = le16_to_host(nchars);
 
     if (fmap_readn(map, &nlines, offset+PI_NLINES, sizeof(nlines)) != sizeof(nlines))
         return CL_EREAD;
 
     nlines = le16_to_host(nlines);
 
5383c49b
     if (fmap_readn(map, &ifsc, offset+PI_IFSC, sizeof(ifsc)) != sizeof(ifsc))
         return CL_EREAD;
 
26d2bfa0
     hwp3_debug("HWP3.x: Paragraph[%d, %d]: ppfs   %u\n", level, p, ppfs);
     hwp3_debug("HWP3.x: Paragraph[%d, %d]: nchars %u\n", level, p, nchars);
     hwp3_debug("HWP3.x: Paragraph[%d, %d]: nlines %u\n", level, p, nlines);
5383c49b
     hwp3_debug("HWP3.x: Paragraph[%d, %d]: ifsc   %u\n", level, p, ifsc);
cb29b646
 
 #if HWP3_DEBUG
     if (fmap_readn(map, &flags, offset+PI_FLAGS, sizeof(flags)) != sizeof(flags))
         return CL_EREAD;
 
     if (fmap_readn(map, &special, offset+PI_SPECIAL, sizeof(special)) != sizeof(special))
         return CL_EREAD;
 
     if (fmap_readn(map, &istyle, offset+PI_ISTYLE, sizeof(istyle)) != sizeof(istyle))
         return CL_EREAD;
 
8658dbdc
     if (fmap_readn(map, &fsize, offset+12, sizeof(fsize)) != sizeof(fsize))
         return CL_EREAD;
 
26d2bfa0
     hwp3_debug("HWP3.x: Paragraph[%d, %d]: flags  %x\n", level, p, flags);
     hwp3_debug("HWP3.x: Paragraph[%d, %d]: spcl   %x\n", level, p, special);
     hwp3_debug("HWP3.x: Paragraph[%d, %d]: istyle %u\n", level, p, istyle);
8658dbdc
     hwp3_debug("HWP3.x: Paragraph[%d, %d]: fsize  %u\n", level, p, fsize);
cb29b646
 #endif
 
     /* detected empty paragraph marker => end-of-paragraph list */
     if (nchars == 0) {
         hwp3_debug("HWP3.x: Detected end-of-paragraph list @ offset %llu\n", (long long unsigned)offset);
26d2bfa0
         hwp3_debug("HWP3.x: end recursion level: %d\n", level);
8e311097
         (*roffset) = offset + HWP3_PARAINFO_SIZE_S;
cb29b646
         (*last) = 1;
         return CL_SUCCESS;
     }
 
8e311097
     if (ppfs)
         offset += HWP3_PARAINFO_SIZE_S;
     else
         offset += HWP3_PARAINFO_SIZE_L;
 
5383c49b
     /* line information blocks */
cb29b646
 #if HWP3_DEBUG
5383c49b
     for (i = 0; (i < nlines) && (offset < map->len); i++) {
         hwp3_debug("HWP3.x: Paragraph[%d, %d]: Line %d information starts @ offset %llu\n", level, p, i, (long long unsigned)offset);
         if (fmap_readn(map, &loff, offset+PLI_LOFF, sizeof(loff)) != sizeof(loff))
             return CL_EREAD;
cb29b646
 
5383c49b
         if (fmap_readn(map, &lcor, offset+PLI_LCOR, sizeof(lcor)) != sizeof(lcor))
             return CL_EREAD;
cb29b646
 
5383c49b
         if (fmap_readn(map, &lhei, offset+PLI_LHEI, sizeof(lhei)) != sizeof(lhei))
             return CL_EREAD;
cb29b646
 
5383c49b
         if (fmap_readn(map, &lpag, offset+PLI_LPAG, sizeof(lpag)) != sizeof(lpag))
             return CL_EREAD;
 
         loff = le16_to_host(loff);
         lcor = le16_to_host(lcor);
         lhei = le16_to_host(lhei);
         lpag = le16_to_host(lpag);
cb29b646
 
5383c49b
         hwp3_debug("HWP3.x: Paragraph[%d, %d]: Line %d: loff %u\n", level, p, i, loff);
         hwp3_debug("HWP3.x: Paragraph[%d, %d]: Line %d: lcor %x\n", level, p, i, lcor);
         hwp3_debug("HWP3.x: Paragraph[%d, %d]: Line %d: lhei %u\n", level, p, i, lhei);
         hwp3_debug("HWP3.x: Paragraph[%d, %d]: Line %d: lpag %u\n", level, p, i, lpag);
cb29b646
 
5383c49b
         offset += HWP3_LINEINFO_SIZE;
     }
 #else
73606e9f
     new_offset = offset + (nlines * HWP3_LINEINFO_SIZE);
     if ((new_offset <= offset) || (new_offset >= map->len)) {
         cli_errmsg("HWP3.x: Paragraph[%d, %d]: length value is too high, invalid. %u\n", level, p, nlines);
         return CL_EPARSE;
     }
     offset = new_offset;
cb29b646
 #endif
 
5383c49b
     if (offset >= map->len)
         return CL_EFORMAT;
cb29b646
 
5383c49b
     if (ifsc) {
85d4c20e
         for (i = 0, c = 0; i < nchars; i++) {
             /* examine byte for cs data type */
bfb56489
             if (fmap_readn(map, &cfsb, offset, sizeof(cfsb)) != sizeof(cfsb))
85d4c20e
                 return CL_EREAD;
cb29b646
 
bfb56489
             offset += sizeof(cfsb);
85d4c20e
 
bfb56489
             switch(cfsb) {
85d4c20e
             case 0: /* character shape block */
bfb56489
                 hwp3_debug("HWP3.x: Paragraph[%d, %d]: character font style data @ offset %llu\n", level, p, (long long unsigned)offset);
cb29b646
 
 #if HWP3_DEBUG
85d4c20e
                 if (fmap_readn(map, &pcsd_size, offset+PCSD_SIZE, sizeof(pcsd_size)) != sizeof(pcsd_size))
                     return CL_EREAD;
cb29b646
 
85d4c20e
                 if (fmap_readn(map, &pcsd_prop, offset+PCSD_PROP, sizeof(pcsd_prop)) != sizeof(pcsd_prop))
                     return CL_EREAD;
cb29b646
 
85d4c20e
                 pcsd_size = le16_to_host(pcsd_size);
cb29b646
 
bfb56489
                 hwp3_debug("HWP3.x: Paragraph[%d, %d]: CFS %u: pcsd_size %u\n", level, p, 0, pcsd_size);
                 hwp3_debug("HWP3.x: Paragraph[%d, %d]: CFS %u: pcsd_prop %x\n", level, p, 0, pcsd_prop);
cb29b646
 #endif
 
85d4c20e
                 c++;
                 offset += HWP3_CHARSHPDATA_SIZE;
                 break;
             case 1: /* normal character - as representation of another character for previous cs block */
                 break;
             default:
bfb56489
                 cli_errmsg("HWP3.x: Paragraph[%d, %d]: unknown CFS type 0x%x @ offset %llu\n", level, p, cfsb,
85d4c20e
                            (long long unsigned)offset);
                 cli_errmsg("HWP3.x: Paragraph parsing detected %d of %u characters\n", i, nchars);
                 return CL_EPARSE;
             }
26d2bfa0
         }
85d4c20e
 
bfb56489
         hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected %d CFS block(s) and %d characters\n", level, p, c, i);
85d4c20e
     } else {
bfb56489
         hwp3_debug("HWP3.x: Paragraph[%d, %d]: separate character font style segment not stored\n", level, p);
26d2bfa0
     }
cb29b646
 
85d4c20e
     if (!term)
         hwp3_debug("HWP3.x: Paragraph[%d, %d]: content starts @ offset %llu\n", level, p, (long long unsigned)offset);
cb29b646
 
     /* scan for end-of-paragraph [0x0d00 on offset parity to current content] */
f2833bb0
     while ((!term) &&
            (offset >= 0) &&
73606e9f
            (offset < map->len))
     {
26d2bfa0
         if (fmap_readn(map, &content, offset, sizeof(content)) != sizeof(content))
cb29b646
             return CL_EREAD;
 
26d2bfa0
         content = le16_to_host(content);
 
         /* special character handling */
         if (content < 32) {
             hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected special character %u @ offset %llu\n", level, p, content,
                 (long long unsigned)offset);
 
             switch(content) {
             case 0:
             case 1:
             case 2:
             case 3:
             case 4:
             case 12:
             case 27:
8ce46496
                 {
e7027384
                     /* reserved */
8ce46496
                     uint32_t length;
 
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected special character as [reserved]\n", level, p);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (4 bytes) - length of information = n
                      * offset 6 (2 bytes) - special character ID
                      * offset 8 (n bytes) - information
                      */
 
a9a669eb
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 6, content, match);
8ce46496
 
1525a42e
                     if (fmap_readn(map, &length, offset+2, sizeof(length)) != sizeof(length))
8ce46496
                         return CL_EREAD;
 
                     length = le32_to_host(length);
73606e9f
                     new_offset = offset + (8 + length);
                     if ((new_offset <= offset) || (new_offset >= map->len)) {
f2833bb0
                         cli_errmsg("HWP3.x: Paragraph[%d, %d]: length value is too high, invalid. %u\n", level, p, length);
                         return CL_EPARSE;
                     }
73606e9f
                     offset = new_offset;
f2833bb0
 
8ce46496
 #if HWP3_DEBUG
                     cli_errmsg("HWP3.x: Paragraph[%d, %d]: possible invalid usage of reserved special character %u\n", level, p, content);
                     return CL_EFORMAT;
 #endif
                     break;
                 }
e7027384
             case 5: /* field codes */
                 {
                     uint32_t length;
 
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected field code marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (4 bytes) - length of information = n
                      * offset 6 (2 bytes) - special character ID
                      * offset 8 (n bytes) - field code details
                      */
 
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 6, content, match);
 
                     if (fmap_readn(map, &length, offset+2, sizeof(length)) != sizeof(length))
                         return CL_EREAD;
 
                     length = le32_to_host(length);
73606e9f
                     new_offset = offset + (8 + length);
                     if ((new_offset <= offset) || (new_offset >= map->len)) {
f2833bb0
                         cli_errmsg("HWP3.x: Paragraph[%d, %d]: length value is too high, invalid. %u\n", level, p, length);
                         return CL_EPARSE;
                     }
73606e9f
                     offset = new_offset;
e7027384
                     break;
                 }
8ce46496
             case 6: /* bookmark */
                 {
 #if HWP3_VERIFY
                     uint32_t length;
 #endif
 
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected bookmark marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (4 bytes) - length of information = 34
                      * offset 6 (2 bytes) - special character ID
                      * offset 8 (16 x 2 bytes) - bookmark name
                      * offset 40 (2 bytes) - bookmark type
                      * total is always 42 bytes
                      */
 
 #if HWP3_VERIFY
a9a669eb
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 6, content, match);
8ce46496
 
a9a669eb
                     /* length check - always 34 bytes */
                     if (fmap_readn(map, &length, offset+2, sizeof(length)) != sizeof(length))
8ce46496
                         return CL_EREAD;
 
                     length = le32_to_host(length);
 
                     if (length != 34) {
577b2524
                         cli_errmsg("HWP3.x: Bookmark has incorrect length: %u != 34)\n", length);
8ce46496
                         return CL_EFORMAT;
                     }
 #endif
                     offset += 42;
577b2524
                     break;
8ce46496
                 }
             case 7: /* date format */
                 {
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected date format marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (40 x 2 bytes) - date format as user-defined dialog
                      * offset 82 (2 bytes) - special character ID
                      * total is always 84 bytes
                      */
 
a9a669eb
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 82, content, match);
8ce46496
 
                     offset += 84;
577b2524
                     break;
8ce46496
                 }
             case 8: /* date code */
                 {
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected date code marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (40 x 2 bytes) - date format string
                      * offset 82 (4 x 2 bytes) - date (year, month, day of week)
                      * offset 90 (2 x 2 bytes) - time (hour, minute)
                      * offset 94 (2 bytes) - special character ID
                      * total is always 96 bytes
                      */
 
a9a669eb
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 94, content, match);
8ce46496
 
                     offset += 96;
577b2524
                     break;
8ce46496
                 }
             case 9: /* tab */
                 {
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected tab marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (2 bytes) - tab width
                      * offset 4 (2 bytes) - unknown(?)
                      * offset 6 (2 bytes) - special character ID
                      * total is always 8 bytes
                      */
 
a9a669eb
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 6, content, match);
8ce46496
 
                     offset += 8;
577b2524
                     break;
8ce46496
                 }
26d2bfa0
             case 10: /* table, test box, equation, button, hypertext */
                 {
                     uint16_t ncells;
8658dbdc
 #if HWP3_DEBUG
                     uint16_t type;
 #endif
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected box object marker @ offset %llu\n", level, p, (long long unsigned)offset);
26d2bfa0
 
a9a669eb
                     /* verification (only on HWP3_VERIFY) */
                     /* id block verify */
                     HWP3_PSPECIAL_VERIFY(map, offset, 6, content, match);
                     /* extra data block verify */
                     HWP3_PSPECIAL_VERIFY(map, offset, 24, content, match);
26d2bfa0
 
                     /* ID block is 8 bytes */
                     offset += 8;
 
8658dbdc
                     /* box information (84 bytes) */
 #if HWP3_DEBUG
                     /* box type located at offset 78 of box information */
                     if (fmap_readn(map, &type, offset+78, sizeof(type)) != sizeof(type))
                         return CL_EREAD;
 
                     type = le16_to_host(type);
                     if (type == 0)
                         hwp3_debug("HWP3.x: Paragraph[%d, %d]: box object detected as table\n", level, p);
                     else if (type == 1)
                         hwp3_debug("HWP3.x: Paragraph[%d, %d]: box object detected as text box\n", level, p);
                     else if (type == 2)
                         hwp3_debug("HWP3.x: Paragraph[%d, %d]: box object detected as equation\n", level, p);
                     else if (type == 3)
                         hwp3_debug("HWP3.x: Paragraph[%d, %d]: box object detected as button\n", level, p);
                    else
e748791d
                        hwp3_debug("HWP3.x: Paragraph[%d, %d]: box object detected as UNKNOWN(%u)\n", level, p, type);
8658dbdc
 #endif
 
                     /* ncells is located at offset 80 of box information */
26d2bfa0
                     if (fmap_readn(map, &ncells, offset+80, sizeof(ncells)) != sizeof(ncells))
                         return CL_EREAD;
 
                     ncells = le16_to_host(ncells);
                     offset += 84;
 
8658dbdc
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: box object contains %u cell(s)\n", level, p, ncells);
26d2bfa0
 
22cb38ed
                     /* cell information (27 bytes x ncells(offset 80 of table)) */
26d2bfa0
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: box cell info array starts @ %llu\n", level, p, (long long unsigned)offset);
73606e9f
 
                     new_offset = offset + (27 * ncells);
                     if ((new_offset <= offset) || (new_offset >= map->len)) {
f2833bb0
                         cli_errmsg("HWP3.x: Paragraph[%d, %d]: number of box cells is too high, invalid. %u\n", level, p, ncells);
                         return CL_EPARSE;
                     }
73606e9f
                     offset = new_offset;
26d2bfa0
 
                     /* cell paragraph list */
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: box cell paragraph list starts @ %llu\n", level, p, (long long unsigned)offset);
                     for (i = 0; i < ncells; i++) {
                         l = 0;
                         while (!l && ((ret = parsehwp3_paragraph(ctx, map, sp++, level+1, &offset, &l)) == CL_SUCCESS));
e748791d
                         if (ret != CL_SUCCESS)
                             return ret;
26d2bfa0
                     }
 
                     /* box caption paragraph list */
8ce46496
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: box cell caption paragraph list starts @ %llu\n", level, p, (long long unsigned)offset);
26d2bfa0
                     l = 0;
                     while (!l && ((ret = parsehwp3_paragraph(ctx, map, sp++, level+1, &offset, &l)) == CL_SUCCESS));
e748791d
                     if (ret != CL_SUCCESS)
                         return ret;
26d2bfa0
                     break;
                 }
             case 11: /* drawing */
                 {
                     uint32_t size;
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected drawing marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
a9a669eb
                     /* verification (only on HWP3_VERIFY) */
                     /* id block verify */
                     HWP3_PSPECIAL_VERIFY(map, offset, 6, content, match);
                     /* extra data block verify */
                     HWP3_PSPECIAL_VERIFY(map, offset, 24, content, match);
26d2bfa0
 
                     /* ID block is 8 bytes */
                     offset += 8;
 
                     /* Drawing Info Block is 328+n bytes with n = size of image */
                     /* n is located at offset 0 of info block */
                     if (fmap_readn(map, &size, offset, sizeof(size)) != sizeof(size))
                         return CL_EREAD;
 
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: drawing is %u additional bytes\n", level, p, size);
 
                     size = le32_to_host(size);
73606e9f
                     new_offset = offset + (348 + size);
                     if ((new_offset <= offset) || (new_offset >= map->len)) {
f2833bb0
                         cli_errmsg("HWP3.x: Paragraph[%d, %d]: image size value is too high, invalid. %u\n", level, p, size);
                         return CL_EPARSE;
                     }
73606e9f
                     offset = new_offset;
26d2bfa0
 
                     /* caption paragraph list */
8ce46496
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: drawing caption paragraph list starts @ %llu\n", level, p, (long long unsigned)offset);
26d2bfa0
                     l = 0;
                     while (!l && ((ret = parsehwp3_paragraph(ctx, map, sp++, level+1, &offset, &l)) == CL_SUCCESS));
e748791d
                     if (ret != CL_SUCCESS)
                         return ret;
26d2bfa0
                     break;
                 }
             case 13: /* end-of-paragraph marker - treated identically as character */
                 hwp3_debug("HWP3.x: Detected end-of-paragraph marker @ offset %llu\n", (long long unsigned)offset);
                 term = 1;
 
                 offset += sizeof(content);
cb29b646
                 break;
05f6a1b5
             case 14: /* line information */
02778844
                 {
                     hwp3_debug("HWP3.x: Detected line information marker @ offset %llu\n", (long long unsigned)offset);
05f6a1b5
 
a9a669eb
                     /* verification (only on HWP3_VERIFY) */
                     /* id block verify */
                     HWP3_PSPECIAL_VERIFY(map, offset, 6, content, match);
                     /* extra data block verify */
                     HWP3_PSPECIAL_VERIFY(map, offset, 24, content, match);
 
02778844
                     /* ID block is 8 bytes + line information is always 84 bytes */
                     offset += 92;
                     break;
                 }
05f6a1b5
             case 15: /* hidden description */
                 {
                     hwp3_debug("HWP3.x: Detected hidden description marker @ offset %llu\n", (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (4 bytes) - reserved
                      * offset 6 (2 bytes) - special character ID
299e06b6
                      * offset 8 (8 bytes) - reserved
                      * total is always 16 bytes
05f6a1b5
                      */
 
a9a669eb
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 6, content, match);
05f6a1b5
 
299e06b6
                     offset += 16;
 
05f6a1b5
                     /* hidden description paragraph list */
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: hidden description paragraph list starts @ %llu\n", level, p, (long long unsigned)offset);
                     l = 0;
                     while (!l && ((ret = parsehwp3_paragraph(ctx, map, sp++, level+1, &offset, &l)) == CL_SUCCESS));
                     if (ret != CL_SUCCESS)
                         return ret;
                     break;
             }
11e14c19
             case 16: /* header/footer */
                 {
 #if HWP3_DEBUG
                     uint8_t type;
 #endif
 
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected header/footer marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (4 bytes) - reserved
                      * offset 6 (2 bytes) - special character ID
                      * offset 8 (8 x 1 byte) - reserved
                      * offset 16 (1 byte) - type (header/footer)
                      * offset 17 (1 byte) - kind
                      * total is always 18 bytes
                      */
 
a9a669eb
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 6, content, match);
11e14c19
 
 #if HWP3_DEBUG
                     if (fmap_readn(map, &type, offset+16, sizeof(type)) != sizeof(type))
                         return CL_EREAD;
 
                     if (type == 0)
                         hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected header/footer as header\n", level, p);
                     else if (type == 1)
                         hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected header/footer as footer\n", level, p);
                     else
                         hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected header/footer as UNKNOWN(%u)\n", level, p, type);
 #endif
                     offset += 18;
 
                     /* content paragraph list */
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: header/footer paragraph list starts @ %llu\n", level, p, (long long unsigned)offset);
                     l = 0;
                     while (!l && ((ret = parsehwp3_paragraph(ctx, map, sp++, level+1, &offset, &l)) == CL_SUCCESS));
                     if (ret != CL_SUCCESS)
                         return ret;
                     break;
                 }
74d9124c
             case 17: /* footnote/endnote */
05f6a1b5
                 {
74d9124c
                     hwp3_debug("HWP3.x: Detected footnote/endnote marker @ offset %llu\n", (long long unsigned)offset);
05f6a1b5
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (4 bytes) - reserved
                      * offset 6 (2 bytes) - special character ID
                      * offset 8 (8 x 1 bytes) - reserved
                      * offset 16 (2 bytes) - number
                      * offset 18 (2 bytes) - type
                      * offset 20 (2 bytes) - alignment
                      * total is always 22 bytes
                      */
 
a9a669eb
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 6, content, match);
05f6a1b5
 
                     offset += 22;
74d9124c
 
                     /* content paragraph list */
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: footnote/endnote paragraph list starts @ %llu\n", level, p, (long long unsigned)offset);
                     l = 0;
                     while (!l && ((ret = parsehwp3_paragraph(ctx, map, sp++, level+1, &offset, &l)) == CL_SUCCESS));
                     if (ret != CL_SUCCESS)
                         return ret;
05f6a1b5
                     break;
                 }
11e14c19
             case 18: /* paste code number */
                 {
 #if HWP3_DEBUG
                     uint8_t type;
 #endif
 
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected paste code number marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (2 bytes) - type
                      * offset 4 (2 bytes) - number value
                      * offset 6 (2 bytes) - special character ID
                      * total is always 8 bytes
                      */
 
a9a669eb
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 6, content, match);
11e14c19
 
 #if HWP3_DEBUG
                     if (fmap_readn(map, &type, offset+2, sizeof(type)) != sizeof(type))
                         return CL_EREAD;
 
                     if (type == 0)
                         hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected paste code number as side\n", level, p);
                     else if (type == 1)
                         hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected paste code number as footnote\n", level, p);
                     else if (type == 2)
                         hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected paste code number as North America???\n", level, p);
                     else if (type == 3)
                         hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected paste code number as drawing\n", level, p);
                     else if (type == 4)
                         hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected paste code number as table\n", level, p);
                     else if (type == 5)
                         hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected paste code number as equation\n", level, p);
                     else
                         hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected paste code number as UNKNOWN(%u)\n", level, p, type);
 #endif
                     offset += 8;
                     break;
                 }
9e100817
             case 19: /* code number change */
                 {
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected code number change marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (2 bytes) - type
                      * offset 4 (2 bytes) - new number value
                      * offset 6 (2 bytes) - special character ID
                      * total is always 8 bytes
                      */
 
a9a669eb
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 6, content, match);
9e100817
 
                     offset += 8;
                     break;
                 }
             case 20:
                 {
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected thread page number marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (2 bytes) - location
                      * offset 4 (2 bytes) - shape
                      * offset 6 (2 bytes) - special character ID
                      * total is always 8 bytes
                      */
 
a9a669eb
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 6, content, match);
9e100817
 
                     offset += 8;
                     break;
                 }
             case 21: /* hide special */
                 {
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected hide special marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (2 bytes) - type
                      * offset 4 (2 bytes) - target
                      * offset 6 (2 bytes) - special character ID
                      * total is always 8 bytes
                      */
 
a9a669eb
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 6, content, match);
9e100817
 
                     offset += 8;
                     break;
                 }
1525a42e
             case 22: /* mail merge display */
                 {
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected mail merge display marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (20 x 1 bytes) - field name (in ASCII)
                      * offset 22 (2 bytes) - special character ID
                      * total is always 24 bytes
                      */
 
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 22, content, match);
 
                     offset += 24;
                     break;
                 }
             case 23: /* overlapping letters */
                 {
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected overlapping marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (3 x 2 bytes) - overlapping letters
                      * offset 8 (2 bytes) - special character ID
                      * total is always 10 bytes
                      */
 
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 8, content, match);
 
                     offset += 10;
                     break;
                 }
             case 24: /* hyphen */
                 {
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected hyphen marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (2 bytes) - width of hyphen
                      * offset 4 (2 bytes) - special character ID
                      * total is always 6 bytes
                      */
 
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 4, content, match);
 
                     offset += 6;
                     break;
                 }
             case 25: /* title/table/picture show times */
                 {
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected title/table/picture show times marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (2 bytes) - type
                      * offset 4 (2 bytes) - special character ID
                      * total is always 6 bytes
                      */
 
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 4, content, match);
 
                     offset += 6;
                     break;
                 }
             case 26: /* browse displayed */
                 {
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected browse displayed marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (60 x 2 bytes) - keyword 1
                      * offset 122 (60 x 2 bytes) - keyword 2
                      * offset 242 (2 bytes) - page number
                      * offset 244 (2 bytes) - special character ID
                      * total is always 246 bytes
                      */
 
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 244, content, match);
 
                     offset += 246;
                     break;
                 }
             case 28: /* overview shape/summary number */
                 {
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected overview shape/summary number marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (2 bytes) - type
                      * offset 4 (1 byte)  - form
                      * offset 5 (1 byte)  - step
                      * offset 6 (7 x 2 bytes)  - summary number
                      * offset 20 (7 x 2 bytes) - custom
                      * offset 34 (2 x 7 x 2 bytes) - decorative letters
                      * offset 62 (2 bytes) - special character ID
                      * total is always 64 bytes
                      */
 
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 62, content, match);
 
                     offset += 64;
                     break;
                 }
             case 29: /* cross-reference */
                 {
                     uint32_t length;
 
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected cross-reference marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (4 bytes) - length of information
                      * offset 6 (2 bytes) - special character ID
                      * offset 8 (n bytes) - ...
                      */
 
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 6, content, match);
 
                     if (fmap_readn(map, &length, offset+2, sizeof(length)) != sizeof(length))
                         return CL_EREAD;
 
                     length = le32_to_host(length);
73606e9f
                     new_offset = offset + (8 + length);
                     if ((new_offset <= offset) || (new_offset >= map->len)) {
f2833bb0
                         cli_errmsg("HWP3.x: Paragraph[%d, %d]: length value is too high, invalid. %u\n", level, p, length);
                         return CL_EPARSE;
                     }
73606e9f
                     offset = new_offset;
1525a42e
                     break;
                 }
             case 30: /* bundle of blanks (ON SALE for 2.99!) */
                 {
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected title/table/picture show times marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (2 bytes) - special character ID
                      * total is always 4 bytes
                      */
 
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 2, content, match);
 
                     offset += 4;
                     break;
                 }
             case 31: /* fixed-width space */
                 {
                     hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected title/table/picture show times marker @ offset %llu\n", level, p, (long long unsigned)offset);
 
                     /*
                      * offset 0 (2 bytes) - special character ID
                      * offset 2 (2 bytes) - special character ID
                      * total is always 4 bytes
                      */
 
                     /* id block verification (only on HWP3_VERIFY) */
                     HWP3_PSPECIAL_VERIFY(map, offset, 2, content, match);
 
                     offset += 4;
                     break;
                 }
26d2bfa0
             default:
                 hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected special character as [UNKNOWN]\n", level, p);
                 cli_errmsg("HWP3.x: Paragraph[%d, %d]: cannot understand special character %u\n", level, p, content);
                 return CL_EPARSE;
cb29b646
             }
26d2bfa0
         } else { /* normal characters */
             offset += sizeof(content);
cb29b646
         }
     }
 
26d2bfa0
     hwp3_debug("HWP3.x: end recursion level: %d\n", level);
 
cb29b646
     (*roffset) = offset;
     return CL_SUCCESS;
 }
 
5bda89f2
 static inline int parsehwp3_infoblk_0(cli_ctx *ctx, fmap_t *dmap, off_t *offset, int *last)
fad48cf5
 {
     uint16_t infoid, infolen;
8f4f3666
     fmap_t *map = (dmap ? dmap : *ctx->fmap);
 
     return CL_SUCCESS;
 }
 
5bda89f2
 static inline int parsehwp3_infoblk_1(cli_ctx *ctx, fmap_t *dmap, off_t *offset, int *last)
8f4f3666
 {
     uint32_t infoid, infolen;
     fmap_t *map = (dmap ? dmap : *ctx->fmap);
94737d52
     int i, count, ret = CL_SUCCESS;
f1843e3b
     long long unsigned infoloc = (long long unsigned)(*offset);
5bda89f2
     char field[HWP3_FIELD_LENGTH];
21c40de6
 #if HAVE_JSON
1ab521a9
     json_object *infoblk_1, *contents, *counter, *entry;
21c40de6
 #endif
8f4f3666
 
f1843e3b
     hwp3_debug("HWP3.x: Information Block @ offset %llu\n", infoloc);
fad48cf5
 
21c40de6
 #if HAVE_JSON
     if (ctx->options & CL_SCAN_FILE_PROPERTIES) {
1ab521a9
         infoblk_1 = cli_jsonobj(ctx->wrkproperty, "InfoBlk_1");
21c40de6
         if (!infoblk_1) {
             cli_errmsg("HWP5.x: No memory for information block object\n");
             return CL_EMEM;
         }
 
1ab521a9
         contents = cli_jsonarray(infoblk_1, "Contents");
         if (!contents) {
             cli_errmsg("HWP5.x: No memory for information block contents array\n");
             return CL_EMEM;
         }
 
21c40de6
         if (!json_object_object_get_ex(infoblk_1, "Count", &counter)) { /* object not found */
             cli_jsonint(infoblk_1, "Count", 1);
         } else {
             int value = json_object_get_int(counter);
             cli_jsonint(infoblk_1, "Count", value+1);
         }
     }
 #endif
 
8f4f3666
     if (fmap_readn(map, &infoid, (*offset), sizeof(infoid)) != sizeof(infoid)) {
22cb38ed
         cli_errmsg("HWP3.x: Failed to read information block id @ %llu\n",
8f4f3666
                    (long long unsigned)(*offset));
fad48cf5
         return CL_EREAD;
     }
8f4f3666
     (*offset) += sizeof(infoid);
94737d52
     infoid = le32_to_host(infoid);
 
21c40de6
 #if HAVE_JSON
     if (ctx->options & CL_SCAN_FILE_PROPERTIES) {
1ab521a9
         entry = cli_jsonobj(contents, NULL);
21c40de6
         if (!entry) {
             cli_errmsg("HWP5.x: No memory for information block entry object\n");
             return CL_EMEM;
         }
 
         cli_jsonint(entry, "ID", infoid);
     }
 #endif
94737d52
     hwp3_debug("HWP3.x: Information Block[%llu]: ID:  %u\n", infoloc, infoid);
 
     /* Booking Information(5) - no length field and no content */
     if (infoid == 5) {
22cb38ed
         hwp3_debug("HWP3.x: Information Block[%llu]: TYPE: Booking Information\n", infoloc);
21c40de6
 #if HAVE_JSON
         if (ctx->options & CL_SCAN_FILE_PROPERTIES)
22cb38ed
             cli_jsonstr(entry, "Type", "Booking Information");
21c40de6
 #endif
94737d52
         return CL_SUCCESS;
     }
fad48cf5
 
8f4f3666
     if (fmap_readn(map, &infolen, (*offset), sizeof(infolen)) != sizeof(infolen)) {
22cb38ed
         cli_errmsg("HWP3.x: Failed to read information block len @ %llu\n",
8f4f3666
                    (long long unsigned)(*offset));
fad48cf5
         return CL_EREAD;
     }
8f4f3666
     (*offset) += sizeof(infolen);
     infolen = le32_to_host(infolen);
 
21c40de6
 #if HAVE_JSON
     if (ctx->options & CL_SCAN_FILE_PROPERTIES) {
         cli_jsonint64(entry, "Offset", infoloc);
         cli_jsonint(entry, "Length", infolen);
     }
 #endif
f1843e3b
     hwp3_debug("HWP3.x: Information Block[%llu]: LEN: %u\n", infoloc, infolen);
8f4f3666
 
5bda89f2
     /* check information block bounds */
     if ((*offset)+infolen > map->len) {
         cli_errmsg("HWP3.x: Information blocks length exceeds remaining map length, %llu > %llu\n",
                    (long long unsigned)((*offset)+infolen), (long long unsigned)(map->len));
         return CL_EREAD;
     }
 
94737d52
     /* Information Blocks */
8f4f3666
     switch(infoid) {
5bda89f2
     case 0: /* Terminating */
8f4f3666
         if (infolen == 0) {
f1843e3b
             hwp3_debug("HWP3.x: Information Block[%llu]: TYPE: Terminating Entry\n", infoloc);
21c40de6
 #if HAVE_JSON
         if (ctx->options & CL_SCAN_FILE_PROPERTIES)
             cli_jsonstr(entry, "Type", "Terminating Entry");
 #endif
8f4f3666
             if (last) *last = 1;
             return CL_SUCCESS;
         } else {
f1843e3b
             cli_errmsg("HWP3.x: Information Block[%llu]: TYPE: Invalid Terminating Entry\n", infoloc);
8f4f3666
             return CL_EFORMAT;
         }
5bda89f2
     case 1: /* Image Data */
f1843e3b
         hwp3_debug("HWP3.x: Information Block[%llu]: TYPE: Image Data\n", infoloc);
21c40de6
 #if HAVE_JSON
         if (ctx->options & CL_SCAN_FILE_PROPERTIES)
             cli_jsonstr(entry, "Type", "Image Data");
 #endif
94737d52
 #if HWP3_DEBUG /* additional fields can be added */
5bda89f2
         memset(field, 0, HWP3_FIELD_LENGTH);
         if (fmap_readn(map, field, (*offset), 16) != 16) {
22cb38ed
             cli_errmsg("HWP3.x: Failed to read information block field @ %llu\n",
5bda89f2
                        (long long unsigned)(*offset));
             return CL_EREAD;
         }
f1843e3b
         hwp3_debug("HWP3.x: Information Block[%llu]: NAME: %s\n", infoloc, field);
5bda89f2
 
         memset(field, 0, HWP3_FIELD_LENGTH);
         if (fmap_readn(map, field, (*offset)+16, 16) != 16) {
22cb38ed
             cli_errmsg("HWP3.x: Failed to read information block field @ %llu\n",
5bda89f2
                        (long long unsigned)(*offset));
             return CL_EREAD;
         }
f1843e3b
         hwp3_debug("HWP3.x: Information Block[%llu]: FORM: %s\n", infoloc, field);
5bda89f2
 #endif
         /* 32 bytes for extra data fields */
d6ab7c28
         if (infolen > 0)
             ret = cli_map_scan(map, (*offset)+32, infolen-32, ctx, CL_TYPE_ANY);
8f4f3666
         break;
5bda89f2
     case 2: /* OLE2 Data */
f1843e3b
         hwp3_debug("HWP3.x: Information Block[%llu]: TYPE: OLE2 Data\n", infoloc);
21c40de6
 #if HAVE_JSON
         if (ctx->options & CL_SCAN_FILE_PROPERTIES)
             cli_jsonstr(entry, "Type", "OLE2 Data");
 #endif
d6ab7c28
         if (infolen > 0)
             ret = cli_map_scan(map, (*offset), infolen, ctx, CL_TYPE_ANY);
5bda89f2
         break;
94737d52
     case 3: /* Hypertext/Hyperlink Information */
22cb38ed
         hwp3_debug("HWP3.x: Information Block[%llu]: TYPE: Hypertext/Hyperlink Information\n", infoloc);
94737d52
         if (infolen % 617) {
             cli_errmsg("HWP3.x: Information Block[%llu]: Invalid multiple of 617 => %u\n", infoloc, infolen);
             return CL_EFORMAT;
         }
 
         count = (infolen / 617);
         hwp3_debug("HWP3.x: Information Block[%llu]: COUNT: %d entries\n", infoloc, count);
21c40de6
 #if HAVE_JSON
         if (ctx->options & CL_SCAN_FILE_PROPERTIES) {
22cb38ed
             cli_jsonstr(entry, "Type", "Hypertext/Hyperlink Information");
21c40de6
             cli_jsonint(entry, "Count", count);
         }
 #endif
 
94737d52
         for (i = 0; i < count; i++) {
 #if HWP3_DEBUG /* additional fields can be added */
             memset(field, 0, HWP3_FIELD_LENGTH);
             if (fmap_readn(map, field, (*offset), 256) != 256) {
22cb38ed
                 cli_errmsg("HWP3.x: Failed to read information block field @ %llu\n",
94737d52
                            (long long unsigned)(*offset));
                 return CL_EREAD;
             }
             hwp3_debug("HWP3.x: Information Block[%llu]: %d: NAME: %s\n", infoloc, i, field);
 #endif
             /* scanning macros - TODO - check numbers */
             ret = cli_map_scan(map, (*offset)+(617*i)+288, 325, ctx, CL_TYPE_ANY);
         }
         break;
e7027384
     case 4: /* Presentation Information */
94737d52
         hwp3_debug("HWP3.x: Information Block[%llu]: TYPE: Presentation Information\n", infoloc);
21c40de6
 #if HAVE_JSON
         if (ctx->options & CL_SCAN_FILE_PROPERTIES)
             cli_jsonstr(entry, "Type", "Presentation Information");
 #endif
94737d52
         /* contains nothing of interest to scan */
         break;
e7027384
     case 5: /* Booking Information */
94737d52
         /* should never run this as it is short-circuited above */
22cb38ed
         hwp3_debug("HWP3.x: Information Block[%llu]: TYPE: Booking Information\n", infoloc);
21c40de6
 #if HAVE_JSON
         if (ctx->options & CL_SCAN_FILE_PROPERTIES)
             cli_jsonstr(entry, "Type", "Booking Information");
 #endif
94737d52
         break;
     case 6: /* Background Image Data */
         hwp3_debug("HWP3.x: Information Block[%llu]: TYPE: Background Image Data\n", infoloc);
21c40de6
 #if HAVE_JSON
         if (ctx->options & CL_SCAN_FILE_PROPERTIES) {
             cli_jsonstr(entry, "Type", "Background Image Data");
             cli_jsonint(entry, "ImageSize", infolen-324);
         }
 #endif
94737d52
 #if HWP3_DEBUG /* additional fields can be added */
         memset(field, 0, HWP3_FIELD_LENGTH);
         if (fmap_readn(map, field, (*offset)+24, 256) != 256) {
22cb38ed
             cli_errmsg("HWP3.x: Failed to read information block field @ %llu\n",
94737d52
                        (long long unsigned)(*offset));
             return CL_EREAD;
         }
         hwp3_debug("HWP3.x: Information Block[%llu]: NAME: %s\n", infoloc, field);
 #endif
         /* 324 bytes for extra data fields */
         if (infolen > 0)
             ret = cli_map_scan(map, (*offset)+324, infolen-324, ctx, CL_TYPE_ANY);
         break;
e7027384
     case 0x100: /* Table Extension */
21c40de6
         hwp3_debug("HWP3.x: Information Block[%llu]: TYPE: Table Extension\n", infoloc);
 #if HAVE_JSON
         if (ctx->options & CL_SCAN_FILE_PROPERTIES)
             cli_jsonstr(entry, "Type", "Table Extension");
 #endif
e7027384
         /* contains nothing of interest to scan */
         break;
     case 0x101: /* Press Frame Information Field Name */
21c40de6
         hwp3_debug("HWP3.x: Information Block[%llu]: TYPE: Press Frame Information Field Name\n", infoloc);
 #if HAVE_JSON
         if (ctx->options & CL_SCAN_FILE_PROPERTIES)
             cli_jsonstr(entry, "Type", "Press Frame Information Field Name");
 #endif
e7027384
         /* contains nothing of interest to scan */
         break;
8f4f3666
     default:
d6ab7c28
         cli_warnmsg("HWP3.x: Information Block[%llu]: TYPE: UNKNOWN(%u)\n", infoloc, infoid);
         if (infolen > 0)
             ret = cli_map_scan(map, (*offset), infolen, ctx, CL_TYPE_ANY);
8f4f3666
     }
fad48cf5
 
5bda89f2
     (*offset) += infolen;
     return ret;
fad48cf5
 }
 
 static int hwp3_cb(void *cbdata, int fd, cli_ctx *ctx)
 {
fd549508
     fmap_t *map, *dmap;
73606e9f
     off_t offset, start, new_offset;
cb29b646
     int i, t = 0, p = 0, last = 0, ret = CL_SUCCESS;
     uint16_t nstyles;
21c40de6
 #if HAVE_JSON
     json_object *fonts;
 #endif
fad48cf5
 
fd549508
     offset = start = cbdata ? *(off_t *)cbdata : 0;
fad48cf5
 
fd549508
     if (offset == 0) {
         if (fd < 0) {
             cli_errmsg("HWP3.x: Invalid file descriptor argument\n");
             return CL_ENULLARG;
         } else {
             STATBUF statbuf;
 
             if (FSTAT(fd, &statbuf) == -1) {
                 cli_errmsg("HWP3.x: Can't stat file descriptor\n");
                 return CL_ESTAT;
             }
fad48cf5
 
fd549508
             map = dmap = fmap(fd, 0, statbuf.st_size);
             if (!map) {
                 cli_errmsg("HWP3.x: Failed to get fmap for uncompressed stream\n");
                 return CL_EMAP;
             }
fad48cf5
         }
fd549508
     } else {
8e311097
         hwp3_debug("HWP3.x: Document Content Stream starts @ offset %llu\n", (long long unsigned)offset);
 
fd549508
         map = *ctx->fmap;
         dmap = NULL;
fad48cf5
     }
 
     /* Fonts - 7 entries of 2 + (n x 40) bytes where n is the first 2 bytes of the entry */
21c40de6
 #if HAVE_JSON
     if (ctx->options & CL_SCAN_FILE_PROPERTIES)
         fonts = cli_jsonarray(ctx->wrkproperty, "FontCounts");
 #endif
fad48cf5
     for (i = 0; i < 7; i++) {
         uint16_t nfonts;
 
fd549508
         if (fmap_readn(map, &nfonts, offset, sizeof(nfonts)) != sizeof(nfonts)) {
             if (dmap)
                 funmap(dmap);
8f4f3666
             return CL_EREAD;
fad48cf5
         }
         nfonts = le16_to_host(nfonts);
 
21c40de6
 #if HAVE_JSON
         if (ctx->options & CL_SCAN_FILE_PROPERTIES)
             cli_jsonint(fonts, NULL, nfonts);
 #endif
96c369df
         hwp3_debug("HWP3.x: Font Entry %d with %u entries @ offset %llu\n", i+1, nfonts, (long long unsigned)offset);
73606e9f
         new_offset = offset + (2 + nfonts * 40);
         if ((new_offset <= offset) || (new_offset >= map->len)) {
f2833bb0
             cli_errmsg("HWP3.x: Font Entry: number of fonts is too high, invalid. %u\n", nfonts);
             return CL_EPARSE;
         }
73606e9f
         offset = new_offset;
fad48cf5
     }
 
     /* Styles - 2 + (n x 238) bytes where n is the first 2 bytes of the section */
fd549508
     if (fmap_readn(map, &nstyles, offset, sizeof(nstyles)) != sizeof(nstyles)) {
         if (dmap)
             funmap(dmap);
8f4f3666
         return CL_EREAD;
fad48cf5
     }
     nstyles = le16_to_host(nstyles);
 
21c40de6
 #if HAVE_JSON
     if (ctx->options & CL_SCAN_FILE_PROPERTIES)
         cli_jsonint(ctx->wrkproperty, "StyleCount", nstyles);
 #endif
fad48cf5
     hwp3_debug("HWP3.x: %u Styles @ offset %llu\n", nstyles, (long long unsigned)offset);
73606e9f
     new_offset = offset + (2 + nstyles * 238);
     if ((new_offset <= offset) || (new_offset >= map->len)) {
f2833bb0
         cli_errmsg("HWP3.x: Font Entry: number of font styles is too high, invalid. %u\n", nstyles);
         return CL_EPARSE;
     }
fad48cf5
     offset += (2 + nstyles * 238);
 
cb29b646
     last = 0;
8f4f3666
     /* Paragraphs - variable */
d8321e2f
     /* Paragraphs - are terminated with 0x0d00[13(CR) as hchar], empty paragraph marks end of section and do NOT end with 0x0d00 */
26d2bfa0
     while (!last && ((ret = parsehwp3_paragraph(ctx, map, p++, 0, &offset, &last)) == CL_SUCCESS));
cb29b646
     /* return is never a virus */
     if (ret != CL_SUCCESS) {
         if (dmap)
             funmap(dmap);
         return ret;
     }
21c40de6
 #if HAVE_JSON
     if (ctx->options & CL_SCAN_FILE_PROPERTIES)
         cli_jsonint(ctx->wrkproperty, "ParagraphCount", p);
 #endif
fad48cf5
 
cb29b646
     last = 0;
5bda89f2
     /* 'additional information block #1's - attachments and media */
fd549508
     while (!last && ((ret = parsehwp3_infoblk_1(ctx, map, &offset, &last)) == CL_SUCCESS));
fad48cf5
 
cb29b646
     /* scan the uncompressed stream - both compressed and uncompressed cases [ALLMATCH] */
5bda89f2
     if ((ret == CL_SUCCESS) || ((SCAN_ALL) && (ret == CL_VIRUS))) {
         int subret = ret;
fd549508
         size_t dlen = offset - start;
 
         ret = cli_map_scandesc(map, start, dlen, ctx, CL_TYPE_ANY);
         //ret = cli_map_scandesc(map, 0, 0, ctx, CL_TYPE_ANY);
 
5bda89f2
         if (ret == CL_SUCCESS)
             ret = subret;
     }
fad48cf5
 
fd549508
     if (dmap)
         funmap(dmap);
fad48cf5
     return ret;
 }
 
5cab01bd
 int cli_scanhwp3(cli_ctx *ctx)
 {
     struct hwp3_docinfo docinfo;
     int ret = CL_SUCCESS;
73606e9f
     off_t offset = 0, new_offset = 0;
f2833bb0
     fmap_t *map = *ctx->fmap;
5cab01bd
 
fad48cf5
 #if HAVE_JSON
5cab01bd
     /*
78a06dae
     // magic 
5cab01bd
     cli_jsonstr(header, "Magic", hwp5->signature);
 
78a06dae
     // version
5cab01bd
     cli_jsonint(header, "RawVersion", hwp5->version);
     */
fad48cf5
 #endif
     offset += HWP3_IDENTITY_INFO_SIZE;
5cab01bd
 
fad48cf5
     if ((ret = parsehwp3_docinfo(ctx, offset, &docinfo)) != CL_SUCCESS)
5cab01bd
         return ret;
 
fad48cf5
     offset += HWP3_DOCINFO_SIZE;
 
     if ((ret = parsehwp3_docsummary(ctx, offset)) != CL_SUCCESS)
5cab01bd
         return ret;
 
fad48cf5
     offset += HWP3_DOCSUMMARY_SIZE;
 
bfc5f2d6
     /* password-protected document - cannot parse */
     if (docinfo.di_passwd) {
         cli_dbgmsg("HWP3.x: password-protected file, skip parsing\n");
         return CL_SUCCESS;
     }
 
fad48cf5
     if (docinfo.di_infoblksize) {
89b3492c
         /* OPTIONAL TODO: HANDLE OPTIONAL INFORMATION BLOCK #0's FOR PRECLASS */
73606e9f
         new_offset = offset + docinfo.di_infoblksize;
         if ((new_offset <= offset) || (new_offset >= map->len)) {
f2833bb0
             cli_errmsg("HWP3.x: Doc info block size is too high, invalid. %u\n", docinfo.di_infoblksize);
             return CL_EPARSE;
         }
73606e9f
         offset = new_offset;
fad48cf5
     }
5cab01bd
 
93c12fc9
     if (docinfo.di_compressed)
         ret = decompress_and_callback(ctx, *ctx->fmap, offset, 0, "HWP3.x", hwp3_cb, NULL);
fd549508
     else
         ret = hwp3_cb(&offset, 0, ctx);
93c12fc9
 
fad48cf5
     if (ret != CL_SUCCESS)
         return ret;
 
89b3492c
     /* OPTIONAL TODO: HANDLE OPTIONAL ADDITIONAL INFORMATION BLOCK #2's FOR PRECLASS*/
fad48cf5
 
5cab01bd
     return ret;
 }
d2eea44a
 
 /*** HWPML (hijacking the msxml parser) ***/
7eff5e69
 #if HAVE_LIBXML2
d2eea44a
 static const struct key_entry hwpml_keys[] = {
     { "hwpml",              "HWPML",              MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
 
     /* HEAD - Document Properties */
1ab521a9
     //{ "head",               "Head",               MSXML_JSON_WRKPTR },
d2eea44a
     { "docsummary",         "DocumentProperties", MSXML_JSON_WRKPTR },
     { "title",              "Title",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
     { "author",             "Author",             MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
     { "date",               "Date",               MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
     { "docsetting",         "DocumentSettings",   MSXML_JSON_WRKPTR },
     { "beginnumber",        "BeginNumber",        MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB },
     { "caretpos",           "CaretPos",           MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB },
1ab521a9
     //{ "bindatalist",        "BinDataList",        MSXML_JSON_WRKPTR },
     //{ "binitem",            "BinItem",            MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB },
d2eea44a
     { "facenamelist",       "FaceNameList",       MSXML_IGNORE_ELEM }, /* fonts list */
     { "borderfilllist",     "BorderFillList",     MSXML_IGNORE_ELEM }, /* borders list */
     { "charshapelist",      "CharShapeList",      MSXML_IGNORE_ELEM }, /* character shapes */
     { "tabdeflist",         "TableDefList",       MSXML_IGNORE_ELEM }, /* table defs */
     { "numberinglist",      "NumberingList",      MSXML_IGNORE_ELEM }, /* numbering list */
     { "parashapelist",      "ParagraphShapeList", MSXML_IGNORE_ELEM }, /* paragraph shapes */
     { "stylelist",          "StyleList",          MSXML_IGNORE_ELEM }, /* styles */
     { "compatibledocument", "WordCompatibility",  MSXML_IGNORE_ELEM }, /* word compatibility data */
 
     /* BODY - Document Contents */
     { "body",               "Body",               MSXML_IGNORE_ELEM }, /* document contents (we could build a document contents summary */
 
     /* TAIL - Document Attachments */
1ab521a9
     //{ "tail",               "Tail",               MSXML_JSON_WRKPTR },
d2eea44a
     { "bindatastorage",     "BinaryDataStorage",  MSXML_JSON_WRKPTR },
b8af0f70
     { "bindata",            "BinaryData",         MSXML_SCAN_CB | MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB },
d2eea44a
     { "scriptcode",         "ScriptCodeStorage",  MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB },
b8af0f70
     { "scriptheader",       "ScriptHeader",       MSXML_SCAN_CB | MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
     { "scriptsource",       "ScriptSource",       MSXML_SCAN_CB | MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }
d2eea44a
 };
 static size_t num_hwpml_keys = sizeof(hwpml_keys) / sizeof(struct key_entry);
 
b8af0f70
 /* binary streams needs to be base64-decoded then decompressed if fields are set */
 static int hwpml_scan_cb(void *cbdata, int fd, cli_ctx *ctx)
 {
7b020ffe
     if (fd < 0 || !ctx)
         return CL_ENULLARG;
 
b8af0f70
     return cli_magic_scandesc(fd, ctx);
 }
 
cd70b7ca
 static int hwpml_binary_cb(int fd, cli_ctx *ctx, int num_attribs, struct attrib_entry *attribs, void *cbdata)
b8af0f70
 {
     int i, ret, df = 0, com = 0, enc = 0;
d4518645
     char *tempfile;
b8af0f70
 
cd70b7ca
     UNUSEDPARAM(cbdata);
 
b8af0f70
     /* check attributes for compression and encoding */
     for (i = 0; i < num_attribs; i++) {
         if (!strcmp(attribs[i].key, "Compress")) {
             if (!strcmp(attribs[i].value, "true"))
                 com = 1;
             else if (!strcmp(attribs[i].value, "false"))
                 com = 0;
             else
                 com = -1;
         }
 
         if (!strcmp(attribs[i].key, "Encoding")) {
             if (!strcmp(attribs[i].value, "Base64"))
                 enc = 1;
             else
                 enc = -1;
         }
     }
 
     hwpml_debug("HWPML: Checking attributes: com: %d, enc: %d\n", com, enc);
 
     /* decode the binary data if needed - base64 */
     if (enc < 0) {
         cli_errmsg("HWPML: Unrecognized encoding method\n");
         return cli_magic_scandesc(fd, ctx);
     } else if (enc == 1) {
         STATBUF statbuf;
         fmap_t *input;
         const char *instream;
         char *decoded;
         size_t decodedlen;
 
         hwpml_debug("HWPML: Decoding base64-encoded binary data\n");
 
         /* fmap the input file for easier manipulation */
         if (FSTAT(fd, &statbuf) == -1) {
             cli_errmsg("HWPML: Can't stat file descriptor\n");
             return CL_ESTAT;
         }
 
         if (!(input = fmap(fd, 0, statbuf.st_size))) {
             cli_errmsg("HWPML: Failed to get fmap for binary data\n");
             return CL_EMAP;
         }
 
         /* send data for base64 conversion - TODO: what happens with really big files? */
         if (!(instream = fmap_need_off_once(input, 0, input->len))) {
             cli_errmsg("HWPML: Failed to get input stream from binary data\n");
             funmap(input);
             return CL_EMAP;
         }
 
80ba6f2c
         decoded = (char *)cl_base64_decode((char *)instream, input->len, NULL, &decodedlen, 0);
b8af0f70
         funmap(input);
         if (!decoded) {
             cli_errmsg("HWPML: Failed to get base64 decode binary data\n");
             return cli_magic_scandesc(fd, ctx);
         }
 
         /* open file for writing and scanning */
         if ((ret = cli_gentempfd(ctx->engine->tmpdir, &tempfile, &df)) != CL_SUCCESS) {
d4518645
             cli_warnmsg("HWPML: Failed to create temporary file for decoded stream scanning\n");
b8af0f70
             return ret;
         }
 
         if(cli_writen(df, decoded, decodedlen) != (int)decodedlen) {
             free(decoded);
d4518645
             ret = CL_EWRITE;
             goto hwpml_end;
b8af0f70
         }
         free(decoded);
 
         /* keeps the later logic simpler */
         fd = df;
 
         cli_dbgmsg("HWPML: Decoded binary data to %s\n", tempfile);
     }
 
     /* decompress the file if needed - zlib */
     if (com) {
         STATBUF statbuf;
         fmap_t *input;
 
         hwpml_debug("HWPML: Decompressing binary data\n");
 
         /* fmap the input file for easier manipulation */
         if (FSTAT(fd, &statbuf) == -1) {
             cli_errmsg("HWPML: Can't stat file descriptor\n");
d4518645
             ret = CL_ESTAT;
             goto hwpml_end;
b8af0f70
         }
 
         input = fmap(fd, 0, statbuf.st_size);
         if (!input) {
             cli_errmsg("HWPML: Failed to get fmap for binary data\n");
d4518645
             ret = CL_EMAP;
             goto hwpml_end;
b8af0f70
         }
         ret = decompress_and_callback(ctx, input, 0, 0, "HWPML", hwpml_scan_cb, NULL);
         funmap(input);
     } else {
         ret = hwpml_scan_cb(NULL, fd, ctx);
     }
 
     /* close decoded file descriptor if used */
d4518645
  hwpml_end:
b8af0f70
     if (df) {
         close(df);
         if (!(ctx->engine->keeptmp))
             cli_unlink(tempfile);
d4518645
         free(tempfile);
b8af0f70
     }
     return ret;
 }
7eff5e69
 #endif /* HAVE_LIBXML2 */
b8af0f70
 
d2eea44a
 int cli_scanhwpml(cli_ctx *ctx)
 {
 #if HAVE_LIBXML2
     struct msxml_cbdata cbdata;
cb740321
     struct msxml_ctx mxctx;
d2eea44a
     xmlTextReaderPtr reader = NULL;
     int state, ret = CL_SUCCESS;
 
     cli_dbgmsg("in cli_scanhwpml()\n");
 
     if (!ctx)
         return CL_ENULLARG;
 
     memset(&cbdata, 0, sizeof(cbdata));
     cbdata.map = *ctx->fmap;
 
     reader = xmlReaderForIO(msxml_read_cb, NULL, &cbdata, "hwpml.xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS);
     if (!reader) {
22cb38ed
         cli_dbgmsg("cli_scanhwpml: cannot initialize xmlReader\n");
d2eea44a
 
 #if HAVE_JSON
         ret = cli_json_parse_error(ctx->wrkproperty, "HWPML_ERROR_XML_READER_IO");
 #endif
         return ret; // libxml2 failed!
     }
 
cb740321
     memset(&mxctx, 0, sizeof(mxctx));
     mxctx.scan_cb = hwpml_binary_cb;
     ret = cli_msxml_parse_document(ctx, reader, hwpml_keys, num_hwpml_keys, MSXML_FLAG_JSON, &mxctx);
d2eea44a
 
     xmlTextReaderClose(reader);
     xmlFreeTextReader(reader);
     return ret;
 #else
     UNUSEDPARAM(ctx);
     cli_dbgmsg("in cli_scanhwpml()\n");
     cli_dbgmsg("cli_scanhwpml: scanning hwpml documents requires libxml2!\n");
 
     return CL_SUCCESS;
 #endif
 }