libclamav/msdoc.c
f53bed0b
 /*
  * Extract component parts of OLE2 files (e.g. MS Office Documents)
02840644
  *
e1cbc270
  * Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
f53bed0b
  * Copyright (C) 2007-2013 Sourcefire, Inc.
02840644
  *
87c017bc
  * Authors: Kevin Lin
02840644
  *
f53bed0b
  * This program is free software; you can redistribute it and/or modify it under
  * the terms of the GNU General Public License version 2 as published by the
  * Free Software Foundation.
02840644
  *
f53bed0b
  * This program is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
02840644
  *
f53bed0b
  * You should have received a copy of the GNU General Public License along with
  * this program; if not, write to the Free Software Foundation, Inc., 51
  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  */
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <string.h>
 #include <ctype.h>
 #include <stdlib.h>
 #include <errno.h>
 #include <conv.h>
288057e9
 #ifdef HAVE_UNISTD_H
f53bed0b
 #include <unistd.h>
 #endif
 
 #if HAVE_ICONV
 #include <iconv.h>
 #endif
 
 #include "clamav.h"
 #include "others.h"
 #include "msdoc.h"
 #include "scanners.h"
 #include "fmap.h"
 #include "json_api.h"
 
 #if HAVE_JSON
 static char *
 ole2_convert_utf(summary_ctx_t *sctx, char *begin, size_t sz, const char *encoding)
 {
288057e9
     char *outbuf = NULL;
f53bed0b
 #if HAVE_ICONV
     char *buf, *p1, *p2;
     off_t offset;
     size_t inlen, outlen, nonrev, sz2;
27948a03
     int i, attempt;
f53bed0b
     iconv_t cd;
102cd430
 #else
     UNUSEDPARAM(encoding);
f53bed0b
 #endif
     /* applies in the both case */
     if (sctx->codepage == 20127 || sctx->codepage == 65001) {
750bd0c9
         char *track;
02840644
         size_t bcnt, scnt;
750bd0c9
 
288057e9
         outbuf = cli_calloc(1, sz + 1);
750bd0c9
         if (!(outbuf))
             return NULL;
         memcpy(outbuf, begin, sz);
 
288057e9
         track = outbuf + sz - 1;
750bd0c9
         if ((sctx->codepage == 65001) && (*track & 0x80)) { /* UTF-8 with a most significant bit */
             /* locate the start of the last character */
             for (bcnt = 1; (track != outbuf); track--, bcnt++) {
                 if (((uint8_t)*track & 0xC0) != 0x80)
                     break;
             }
 
             /* count number of set (1) significant bits */
288057e9
             for (scnt = 0; scnt < sizeof(uint8_t) * 8; scnt++) {
750bd0c9
                 if (((uint8_t)*track & (0x80 >> scnt)) == 0)
                     break;
             }
 
             if (bcnt != scnt) {
02840644
                 cli_dbgmsg("ole2_convert_utf: cleaning out %zu bytes from incomplete utf-8 character length %zu\n",
288057e9
                            bcnt, scnt);
750bd0c9
                 for (; bcnt > 0; bcnt--, track++)
                     *track = '\0';
             }
         }
f53bed0b
         return outbuf;
     }
 
 #if HAVE_ICONV
     p1 = buf = cli_calloc(1, sz);
     if (!(buf))
         return NULL;
 
     memcpy(buf, begin, sz);
     inlen = sz;
 
     /* encoding lookup if not specified */
     if (!encoding) {
         for (i = 0; i < NUMCODEPAGES; ++i) {
             if (sctx->codepage == codepage_entries[i].codepage)
                 encoding = codepage_entries[i].encoding;
             else if (sctx->codepage < codepage_entries[i].codepage) {
                 /* assuming sorted array */
                 break;
             }
         }
 
         if (!encoding) {
             cli_warnmsg("ole2_convert_utf: could not locate codepage encoding for %d\n", sctx->codepage);
             sctx->flags |= OLE2_CODEPAGE_ERROR_NOTFOUND;
             free(buf);
             return NULL;
         }
     }
 
     cd = iconv_open("UTF-8", encoding);
     if (cd == (iconv_t)(-1)) {
c00baa37
         char errbuf[128];
288057e9
         cli_strerror(errno, errbuf, sizeof(errbuf));
c00baa37
         cli_errmsg("ole2_convert_utf: could not initialize iconv for encoding %s: %s\n", encoding, errbuf);
f53bed0b
         sctx->flags |= OLE2_CODEPAGE_ERROR_UNINITED;
288057e9
     } else {
f53bed0b
         offset = 0;
27948a03
         for (attempt = 1; attempt <= 3; ++attempt) {
f53bed0b
             /* charset to UTF-8 should never exceed sz*6 */
288057e9
             sz2 = (attempt * 2) * sz;
f53bed0b
             /* use cli_realloc, reuse the buffer that has already been translated */
288057e9
             outbuf = (char *)cli_realloc(outbuf, sz2 + 1);
f53bed0b
             if (!outbuf) {
                 free(buf);
50876732
                 iconv_close(cd);
f53bed0b
                 return NULL;
             }
 
             outlen = sz2 - offset;
288057e9
             p2     = outbuf + offset;
f53bed0b
 
             /* conversion */
             nonrev = iconv(cd, &p1, &inlen, &p2, &outlen);
 
             if (errno == EILSEQ) {
                 cli_dbgmsg("ole2_convert_utf: input buffer contains invalid character for its encoding\n");
                 sctx->flags |= OLE2_CODEPAGE_ERROR_INVALID;
                 break;
288057e9
             } else if (errno == EINVAL && nonrev == (size_t)-1) {
f53bed0b
                 cli_dbgmsg("ole2_convert_utf: input buffer contains incomplete multibyte character\n");
                 sctx->flags |= OLE2_CODEPAGE_ERROR_INCOMPLETE;
                 break;
288057e9
             } else if (inlen == 0) {
f53bed0b
                 //cli_dbgmsg("ole2_convert_utf: input buffer is successfully translated\n");
                 break;
             }
 
             //outbuf[sz2 - outlen] = '\0';
             //cli_dbgmsg("%u %s\n", inlen, outbuf);
 
             offset = sz2 - outlen;
27948a03
             if (attempt < 3)
f53bed0b
                 cli_dbgmsg("ole2_convert_utf: outbuf is too small, resizing %llu -> %llu\n",
288057e9
                            (long long unsigned)((attempt * 2) * sz), (long long unsigned)(((attempt + 1) * 2) * sz));
f53bed0b
         }
 
         if (errno == E2BIG && nonrev == (size_t)-1) {
             cli_dbgmsg("ole2_convert_utf: buffer could not be fully translated\n");
             sctx->flags |= OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL;
         }
 
         outbuf[sz2 - outlen] = '\0';
     }
 
     iconv_close(cd);
     free(buf);
 #endif
     /* this should force base64 encoding if NULL */
     return outbuf;
 }
 
 static int
 ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offset)
 {
     uint16_t proptype, padding;
     int ret = CL_SUCCESS;
 
     if (cli_json_timeout_cycle_check(sctx->ctx, &(sctx->toval)) != CL_SUCCESS) {
         sctx->flags |= OLE2_SUMMARY_FLAG_TIMEOUT;
         return CL_ETIMEOUT;
     }
 
288057e9
     if (offset + sizeof(proptype) + sizeof(padding) > sctx->pssize) {
f53bed0b
         sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
         return CL_EFORMAT;
     }
 
288057e9
     memcpy(&proptype, databuf + offset, sizeof(proptype));
     offset += sizeof(proptype);
     memcpy(&padding, databuf + offset, sizeof(padding));
     offset += sizeof(padding);
f53bed0b
     /* endian conversion */
     proptype = sum16_endian_convert(proptype);
 
     //cli_dbgmsg("proptype: 0x%04x\n", proptype);
     if (padding != 0) {
         cli_dbgmsg("ole2_process_property: invalid padding value, non-zero\n");
         sctx->flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
         return CL_EFORMAT;
     }
 
     switch (proptype) {
288057e9
         case PT_EMPTY:
         case PT_NULL:
             ret = cli_jsonnull(sctx->summary, sctx->propname);
             break;
         case PT_INT16: {
f53bed0b
             int16_t dout;
288057e9
             if (offset + sizeof(dout) > sctx->pssize) {
f53bed0b
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
288057e9
             memcpy(&dout, databuf + offset, sizeof(dout));
             offset += sizeof(dout);
f53bed0b
             /* endian conversion */
             dout = sum16_endian_convert(dout);
 
             if (sctx->writecp) {
                 sctx->codepage = (uint16_t)dout;
288057e9
                 ret            = cli_jsonint(sctx->summary, sctx->propname, sctx->codepage);
             } else
f53bed0b
                 ret = cli_jsonint(sctx->summary, sctx->propname, dout);
             break;
288057e9
         }
         case PT_INT32:
         case PT_INT32v1: {
f53bed0b
             int32_t dout;
288057e9
             if (offset + sizeof(dout) > sctx->pssize) {
f53bed0b
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
288057e9
             memcpy(&dout, databuf + offset, sizeof(dout));
             offset += sizeof(dout);
f53bed0b
             /* endian conversion */
             dout = sum32_endian_convert(dout);
 
             ret = cli_jsonint(sctx->summary, sctx->propname, dout);
             break;
288057e9
         }
         case PT_FLOAT32: /* review this please */
         {
f53bed0b
             float dout;
288057e9
             if (offset + sizeof(dout) > sctx->pssize) {
f53bed0b
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
288057e9
             memcpy(&dout, databuf + offset, sizeof(dout));
             offset += sizeof(dout);
f53bed0b
             /* endian conversion */
             dout = sum32_endian_convert(dout);
 
             ret = cli_jsondouble(sctx->summary, sctx->propname, dout);
             break;
288057e9
         }
         case PT_DATE:
         case PT_DOUBLE64: /* review this please */
         {
f53bed0b
             double dout;
288057e9
             if (offset + sizeof(dout) > sctx->pssize) {
f53bed0b
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
288057e9
             memcpy(&dout, databuf + offset, sizeof(dout));
             offset += sizeof(dout);
f53bed0b
             /* endian conversion */
             dout = sum64_endian_convert(dout);
 
             ret = cli_jsondouble(sctx->summary, sctx->propname, dout);
             break;
288057e9
         }
         case PT_BOOL: {
f53bed0b
             uint16_t dout;
288057e9
             if (offset + sizeof(dout) > sctx->pssize) {
f53bed0b
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
288057e9
             memcpy(&dout, databuf + offset, sizeof(dout));
             offset += sizeof(dout);
f53bed0b
             /* no need for endian conversion */
 
             ret = cli_jsonbool(sctx->summary, sctx->propname, dout);
             break;
288057e9
         }
         case PT_INT8v1: {
f53bed0b
             int8_t dout;
288057e9
             if (offset + sizeof(dout) > sctx->pssize) {
f53bed0b
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
288057e9
             memcpy(&dout, databuf + offset, sizeof(dout));
             offset += sizeof(dout);
f53bed0b
             /* no need for endian conversion */
 
             ret = cli_jsonint(sctx->summary, sctx->propname, dout);
             break;
288057e9
         }
         case PT_UINT8: {
f53bed0b
             uint8_t dout;
288057e9
             if (offset + sizeof(dout) > sctx->pssize) {
f53bed0b
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
288057e9
             memcpy(&dout, databuf + offset, sizeof(dout));
             offset += sizeof(dout);
f53bed0b
             /* no need for endian conversion */
 
             ret = cli_jsonint(sctx->summary, sctx->propname, dout);
             break;
288057e9
         }
         case PT_UINT16: {
f53bed0b
             uint16_t dout;
288057e9
             if (offset + sizeof(dout) > sctx->pssize) {
f53bed0b
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
288057e9
             memcpy(&dout, databuf + offset, sizeof(dout));
             offset += sizeof(dout);
f53bed0b
             /* endian conversion */
             dout = sum16_endian_convert(dout);
 
             if (sctx->writecp)
                 sctx->codepage = dout;
 
             ret = cli_jsonint(sctx->summary, sctx->propname, dout);
             break;
288057e9
         }
         case PT_UINT32:
         case PT_UINT32v1: {
f53bed0b
             uint32_t dout;
288057e9
             if (offset + sizeof(dout) > sctx->pssize) {
f53bed0b
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
288057e9
             memcpy(&dout, databuf + offset, sizeof(dout));
             offset += sizeof(dout);
f53bed0b
             /* endian conversion */
             dout = sum32_endian_convert(dout);
 
             ret = cli_jsonint(sctx->summary, sctx->propname, dout);
             break;
288057e9
         }
         case PT_INT64: {
f53bed0b
             int64_t dout;
288057e9
             if (offset + sizeof(dout) > sctx->pssize) {
f53bed0b
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
288057e9
             memcpy(&dout, databuf + offset, sizeof(dout));
             offset += sizeof(dout);
f53bed0b
             /* endian conversion */
             dout = sum64_endian_convert(dout);
 
             ret = cli_jsonint64(sctx->summary, sctx->propname, dout);
             break;
288057e9
         }
         case PT_UINT64: {
f53bed0b
             uint64_t dout;
288057e9
             if (offset + sizeof(dout) > sctx->pssize) {
f53bed0b
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
288057e9
             memcpy(&dout, databuf + offset, sizeof(dout));
             offset += sizeof(dout);
f53bed0b
             /* endian conversion */
             dout = sum64_endian_convert(dout);
 
             ret = cli_jsonint64(sctx->summary, sctx->propname, dout);
             break;
         }
288057e9
         case PT_BSTR:
         case PT_LPSTR:
             if (sctx->codepage == 0) {
                 cli_dbgmsg("ole2_propset_json: current codepage is unknown, cannot parse char stream\n");
                 sctx->flags |= OLE2_SUMMARY_FLAG_CODEPAGE;
             } else {
                 uint32_t strsize;
                 char *outstr, *outstr2;
 
                 if (offset + sizeof(strsize) > sctx->pssize) {
                     sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                     return CL_EFORMAT;
                 }
f53bed0b
 
288057e9
                 memcpy(&strsize, databuf + offset, sizeof(strsize));
                 offset += sizeof(strsize);
                 /* endian conversion? */
                 strsize = sum32_endian_convert(strsize);
f53bed0b
 
288057e9
                 if (offset + strsize > sctx->pssize) {
                     sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                     return CL_EFORMAT;
                 }
f53bed0b
 
288057e9
                 /* limitation on string length */
                 if (strsize > PROPSTRLIMIT) {
                     cli_dbgmsg("ole2_process_property: property string sized %lu truncated to size %lu\n",
                                (unsigned long)strsize, (unsigned long)PROPSTRLIMIT);
                     sctx->flags |= OLE2_SUMMARY_FLAG_TRUNC_STR;
                     strsize = PROPSTRLIMIT;
                 }
f53bed0b
 
288057e9
                 outstr = cli_calloc(strsize + 1, 1); /* last char must be NULL */
                 if (!outstr) {
                     return CL_EMEM;
                 }
                 strncpy(outstr, (const char *)(databuf + offset), strsize);
f53bed0b
 
288057e9
                 /* conversion of various encodings to UTF-8 */
                 outstr2 = ole2_convert_utf(sctx, outstr, strsize, NULL);
f53bed0b
                 if (!outstr2) {
288057e9
                     /* use base64 encoding when all else fails! */
                     char b64jstr[PROPSTRLIMIT];
 
                     /* outstr2 should be 4/3 times the original (rounded up) */
                     outstr2 = cl_base64_encode(outstr, strsize);
                     if (!outstr2) {
                         cli_dbgmsg("ole2_process_property: failed to convert to base64 string\n");
                         return CL_EMEM;
                     }
 
                     snprintf(b64jstr, PROPSTRLIMIT, "%s_base64", sctx->propname);
                     ret = cli_jsonbool(sctx->summary, b64jstr, 1);
                     if (ret != CL_SUCCESS)
                         return ret;
f53bed0b
                 }
 
288057e9
                 ret = cli_jsonstr(sctx->summary, sctx->propname, outstr2);
                 free(outstr);
                 free(outstr2);
f53bed0b
             }
288057e9
             break;
         case PT_LPWSTR: {
f53bed0b
             uint32_t strsize;
             char *outstr, *outstr2;
 
288057e9
             if (offset + sizeof(strsize) > sctx->pssize) {
f53bed0b
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
288057e9
             memcpy(&strsize, databuf + offset, sizeof(strsize));
             offset += sizeof(strsize);
f53bed0b
             /* endian conversion; wide strings are by length, not size (x2) */
288057e9
             strsize = sum32_endian_convert(strsize) * 2;
f53bed0b
 
             /* limitation on string length */
288057e9
             if (strsize > (2 * PROPSTRLIMIT)) {
f53bed0b
                 cli_dbgmsg("ole2_process_property: property string sized %lu truncated to size %lu\n",
288057e9
                            (unsigned long)strsize, (unsigned long)(2 * PROPSTRLIMIT));
f53bed0b
                 sctx->flags |= OLE2_SUMMARY_FLAG_TRUNC_STR;
288057e9
                 strsize = (2 * PROPSTRLIMIT);
f53bed0b
             }
 
288057e9
             if (offset + strsize > sctx->pssize) {
f53bed0b
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
288057e9
             outstr = cli_calloc(strsize + 2, 1); /* last two chars must be NULL */
f53bed0b
             if (!outstr) {
                 return CL_EMEM;
             }
288057e9
             memcpy(outstr, (const char *)(databuf + offset), strsize);
f53bed0b
             /* conversion of 16-width char strings (UTF-16 or UTF-16LE??) to UTF-8 */
             outstr2 = ole2_convert_utf(sctx, outstr, strsize, UTF16_MS);
             if (!outstr2) {
                 /* use base64 encoding when all else fails! */
                 char b64jstr[PROPSTRLIMIT];
 
                 outstr2 = cl_base64_encode(outstr, strsize);
                 if (!outstr2) {
                     free(outstr);
                     return CL_EMEM;
                 }
 
                 snprintf(b64jstr, PROPSTRLIMIT, "%s_base64", sctx->propname);
                 ret = cli_jsonbool(sctx->summary, b64jstr, 1);
50876732
                 if (ret != CL_SUCCESS) {
                     free(outstr);
                     free(outstr2);
f53bed0b
                     return ret;
50876732
                 }
f53bed0b
             }
 
             ret = cli_jsonstr(sctx->summary, sctx->propname, outstr2);
             free(outstr);
             free(outstr2);
             break;
288057e9
         }
         case PT_FILETIME: {
f53bed0b
             uint32_t ltime, htime;
288057e9
             uint64_t wtime = 0, utime = 0;
f53bed0b
 
288057e9
             if (offset + sizeof(ltime) + sizeof(htime) > sctx->pssize) {
f53bed0b
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
288057e9
             memcpy(&ltime, databuf + offset, sizeof(ltime));
             offset += sizeof(ltime);
             memcpy(&htime, databuf + offset, sizeof(htime));
             offset += sizeof(ltime);
f53bed0b
             ltime = sum32_endian_convert(ltime);
             htime = sum32_endian_convert(htime);
 
             /* UNIX timestamp formatting */
             wtime = htime;
             wtime <<= 32;
             wtime |= ltime;
 
             utime = wtime / 10000000;
             utime -= 11644473600LL;
 
             if ((uint32_t)((utime & 0xFFFFFFFF00000000) >> 32)) {
                 cli_dbgmsg("ole2_process_property: UNIX timestamp is larger than 32-bit number\n");
288057e9
             } else {
f53bed0b
                 ret = cli_jsonint(sctx->summary, sctx->propname, (uint32_t)(utime & 0xFFFFFFFF));
             }
             break;
288057e9
         }
         default:
             cli_dbgmsg("ole2_process_property: unhandled property type 0x%04x for %s property\n",
                        proptype, sctx->propname);
             sctx->flags |= OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE;
f53bed0b
     }
 
     return ret;
 }
 
 static void ole2_translate_docsummary_propid(summary_ctx_t *sctx, uint32_t propid)
 {
288057e9
     switch (propid) {
         case DSPID_CODEPAGE:
             sctx->writecp  = 1; /* must be set ONLY for codepage */
             sctx->propname = "CodePage";
             break;
         case DSPID_CATEGORY:
             sctx->propname = "Category";
             break;
         case DSPID_PRESFORMAT:
             sctx->propname = "PresentationTarget";
             break;
         case DSPID_BYTECOUNT:
             sctx->propname = "Bytes";
             break;
         case DSPID_LINECOUNT:
             sctx->propname = "Lines";
             break;
         case DSPID_PARCOUNT:
             sctx->propname = "Paragraphs";
             break;
         case DSPID_SLIDECOUNT:
             sctx->propname = "Slides";
             break;
         case DSPID_NOTECOUNT:
             sctx->propname = "Notes";
             break;
         case DSPID_HIDDENCOUNT:
             sctx->propname = "HiddenSlides";
             break;
         case DSPID_MMCLIPCOUNT:
             sctx->propname = "MMClips";
             break;
         case DSPID_SCALE:
             sctx->propname = "Scale";
             break;
         case DSPID_HEADINGPAIR: /* VT_VARIANT | VT_VECTOR */
             sctx->propname = "HeadingPairs";
             break;
         case DSPID_DOCPARTS: /* VT_VECTOR | VT_LPSTR */
             sctx->propname = "DocPartTitles";
             break;
         case DSPID_MANAGER:
             sctx->propname = "Manager";
             break;
         case DSPID_COMPANY:
             sctx->propname = "Company";
             break;
         case DSPID_LINKSDIRTY:
             sctx->propname = "LinksDirty";
             break;
         case DSPID_CCHWITHSPACES:
             sctx->propname = "Char&WSCount";
             break;
         case DSPID_SHAREDDOC: /* SHOULD BE FALSE! */
             sctx->propname = "SharedDoc";
             break;
         case DSPID_LINKBASE: /* moved to user-defined */
             sctx->propname = "LinkBase";
             break;
         case DSPID_HLINKS: /* moved to user-defined */
             sctx->propname = "HyperLinks";
             break;
         case DSPID_HYPERLINKSCHANGED:
             sctx->propname = "HyperLinksChanged";
             break;
         case DSPID_VERSION:
             sctx->propname = "Version";
             break;
         case DSPID_DIGSIG:
             sctx->propname = "DigitalSig";
             break;
         case DSPID_CONTENTTYPE:
             sctx->propname = "ContentType";
             break;
         case DSPID_CONTENTSTATUS:
             sctx->propname = "ContentStatus";
             break;
         case DSPID_LANGUAGE:
             sctx->propname = "Language";
             break;
         case DSPID_DOCVERSION:
             sctx->propname = "DocVersion";
             break;
         default:
             cli_dbgmsg("ole2_docsum_propset_json: unrecognized propid!\n");
             sctx->flags |= OLE2_SUMMARY_FLAG_UNKNOWN_PROPID;
f53bed0b
     }
 }
 
 static void ole2_translate_summary_propid(summary_ctx_t *sctx, uint32_t propid)
 {
288057e9
     switch (propid) {
         case SPID_CODEPAGE:
             sctx->writecp  = 1; /* must be set ONLY for codepage */
             sctx->propname = "CodePage";
             break;
         case SPID_TITLE:
             sctx->propname = "Title";
             break;
         case SPID_SUBJECT:
             sctx->propname = "Subject";
             break;
         case SPID_AUTHOR:
             sctx->propname = "Author";
             break;
         case SPID_KEYWORDS:
             sctx->propname = "Keywords";
             break;
         case SPID_COMMENTS:
             sctx->propname = "Comments";
             break;
         case SPID_TEMPLATE:
             sctx->propname = "Template";
             break;
         case SPID_LASTAUTHOR:
             sctx->propname = "LastAuthor";
             break;
         case SPID_REVNUMBER:
             sctx->propname = "RevNumber";
             break;
         case SPID_EDITTIME:
             sctx->propname = "EditTime";
             break;
         case SPID_LASTPRINTED:
             sctx->propname = "LastPrinted";
             break;
         case SPID_CREATEDTIME:
             sctx->propname = "CreatedTime";
             break;
         case SPID_MODIFIEDTIME:
             sctx->propname = "ModifiedTime";
             break;
         case SPID_PAGECOUNT:
             sctx->propname = "PageCount";
             break;
         case SPID_WORDCOUNT:
             sctx->propname = "WordCount";
             break;
         case SPID_CHARCOUNT:
             sctx->propname = "CharCount";
             break;
         case SPID_THUMBNAIL:
             sctx->propname = "Thumbnail";
             break;
         case SPID_APPNAME:
             sctx->propname = "AppName";
             break;
         case SPID_SECURITY:
             sctx->propname = "Security";
             break;
         default:
             cli_dbgmsg("ole2_translate_summary_propid: unrecognized propid!\n");
             sctx->flags |= OLE2_SUMMARY_FLAG_UNKNOWN_PROPID;
f53bed0b
     }
 }
 
 static int ole2_summary_propset_json(summary_ctx_t *sctx, off_t offset)
 {
     unsigned char *hdr, *ps;
     uint32_t numprops, limitprops;
     off_t foff = offset, psoff = 0;
     uint32_t poffset;
     int ret;
     unsigned int i;
 
     cli_dbgmsg("in ole2_summary_propset_json\n");
 
     /* summary ctx propset-specific setup*/
     sctx->codepage = 0;
288057e9
     sctx->writecp  = 0;
f53bed0b
     sctx->propname = NULL;
 
     /* examine property set metadata */
288057e9
     if ((foff + (2 * sizeof(uint32_t))) > sctx->maplen) {
f53bed0b
         sctx->flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
         return CL_EFORMAT;
     }
288057e9
     hdr = (unsigned char *)fmap_need_off_once(sctx->sfmap, foff, (2 * sizeof(uint32_t)));
f53bed0b
     if (!hdr) {
         sctx->flags |= OLE2_SUMMARY_ERROR_DATABUF;
         return CL_EREAD;
     }
     //foff+=(2*sizeof(uint32_t)); // keep foff pointing to start of propset segment
288057e9
     psoff += (2 * sizeof(uint32_t));
f53bed0b
     memcpy(&(sctx->pssize), hdr, sizeof(sctx->pssize));
288057e9
     memcpy(&numprops, hdr + sizeof(sctx->pssize), sizeof(numprops));
f53bed0b
     /* endian conversion */
     sctx->pssize = sum32_endian_convert(sctx->pssize);
288057e9
     numprops     = sum32_endian_convert(numprops);
f53bed0b
     cli_dbgmsg("ole2_summary_propset_json: pssize: %u, numprops: %u\n", sctx->pssize, numprops);
     if (numprops > PROPCNTLIMIT) {
         sctx->flags |= OLE2_SUMMARY_LIMIT_PROPS;
         limitprops = PROPCNTLIMIT;
288057e9
     } else {
f53bed0b
         limitprops = numprops;
     }
7cd9337a
     cli_dbgmsg("ole2_summary_propset_json: processing %u of %u (%u max) properties\n",
f53bed0b
                limitprops, numprops, PROPCNTLIMIT);
 
     /* extract remaining fragment of propset */
288057e9
     if ((size_t)(foff + (sctx->pssize)) > (size_t)(sctx->maplen)) {
f53bed0b
         sctx->flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
         return CL_EFORMAT;
     }
288057e9
     ps = (unsigned char *)fmap_need_off_once(sctx->sfmap, foff, sctx->pssize);
f53bed0b
     if (!ps) {
         sctx->flags |= OLE2_SUMMARY_ERROR_DATABUF;
         return CL_EREAD;
     }
 
     /* iterate over the properties */
     for (i = 0; i < limitprops; ++i) {
         uint32_t propid, propoff;
 
288057e9
         if (psoff + sizeof(propid) + sizeof(poffset) > sctx->pssize) {
f53bed0b
             sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
             return CL_EFORMAT;
         }
288057e9
         memcpy(&propid, ps + psoff, sizeof(propid));
         psoff += sizeof(propid);
         memcpy(&propoff, ps + psoff, sizeof(propoff));
         psoff += sizeof(propoff);
f53bed0b
         /* endian conversion */
288057e9
         propid  = sum32_endian_convert(propid);
f53bed0b
         propoff = sum32_endian_convert(propoff);
         cli_dbgmsg("ole2_summary_propset_json: propid: 0x%08x, propoff: %u\n", propid, propoff);
 
288057e9
         sctx->propname = NULL;
         sctx->writecp  = 0;
9a1232d2
         switch (sctx->mode) {
288057e9
             case 1:
                 ole2_translate_docsummary_propid(sctx, propid);
                 break;
             default:
                 ole2_translate_summary_propid(sctx, propid);
9a1232d2
         }
f53bed0b
 
         if (sctx->propname != NULL) {
             ret = ole2_process_property(sctx, ps, propoff);
             if (ret != CL_SUCCESS)
                 return ret;
288057e9
         } else {
f53bed0b
             /* add unknown propid flag */
         }
     }
 
     return CL_SUCCESS;
 }
 
 static int cli_ole2_summary_json_cleanup(summary_ctx_t *sctx, int retcode)
 {
     json_object *jarr;
 
     cli_dbgmsg("in cli_ole2_summary_json_cleanup: %d[%x]\n", retcode, sctx->flags);
 
     if (sctx->sfmap) {
         funmap(sctx->sfmap);
     }
 
     if (sctx->flags) {
         jarr = cli_jsonarray(sctx->summary, "ParseErrors");
 
         /* summary errors */
         if (sctx->flags & OLE2_SUMMARY_ERROR_TOOSMALL) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_TOOSMALL");
         }
         if (sctx->flags & OLE2_SUMMARY_ERROR_OOB) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_OOB");
         }
         if (sctx->flags & OLE2_SUMMARY_ERROR_DATABUF) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_DATABUF");
         }
         if (sctx->flags & OLE2_SUMMARY_ERROR_INVALID_ENTRY) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_INVALID_ENTRY");
         }
         if (sctx->flags & OLE2_SUMMARY_LIMIT_PROPS) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_LIMIT_PROPS");
         }
         if (sctx->flags & OLE2_SUMMARY_FLAG_TIMEOUT) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_TIMEOUT");
         }
         if (sctx->flags & OLE2_SUMMARY_FLAG_CODEPAGE) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_CODEPAGE");
         }
         if (sctx->flags & OLE2_SUMMARY_FLAG_UNKNOWN_PROPID) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_UNKNOWN_PROPID");
         }
         if (sctx->flags & OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE");
         }
         if (sctx->flags & OLE2_SUMMARY_FLAG_TRUNC_STR) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_TRUNC_STR");
         }
 
         /* codepage translation errors */
         if (sctx->flags & OLE2_CODEPAGE_ERROR_NOTFOUND) {
             cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_NOTFOUND");
         }
         if (sctx->flags & OLE2_CODEPAGE_ERROR_UNINITED) {
             cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_UNINITED");
         }
         if (sctx->flags & OLE2_CODEPAGE_ERROR_INVALID) {
             cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_INVALID");
         }
         if (sctx->flags & OLE2_CODEPAGE_ERROR_INCOMPLETE) {
             cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_INCOMPLETE");
         }
         if (sctx->flags & OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL) {
             cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL");
         }
     }
 
     return retcode;
 }
 
 int cli_ole2_summary_json(cli_ctx *ctx, int fd, int mode)
 {
     summary_ctx_t sctx;
     STATBUF statbuf;
     off_t foff = 0;
     unsigned char *databuf;
     summary_stub_t sumstub;
     propset_entry_t pentry;
     int ret = CL_SUCCESS;
 
     cli_dbgmsg("in cli_ole2_summary_json\n");
 
     /* preliminary sanity checks */
     if (ctx == NULL) {
         return CL_ENULLARG;
     }
 
     if (fd < 0) {
         cli_dbgmsg("ole2_summary_json: invalid file descriptor\n");
         return CL_ENULLARG; /* placeholder */
     }
 
b11c3e40
     if (mode < 0 || mode > 2) {
f53bed0b
         cli_dbgmsg("ole2_summary_json: invalid mode specified\n");
         return CL_ENULLARG; /* placeholder */
     }
 
     /* summary ctx setup */
     memset(&sctx, 0, sizeof(sctx));
288057e9
     sctx.ctx  = ctx;
f53bed0b
     sctx.mode = mode;
 
     if (FSTAT(fd, &statbuf) == -1) {
         cli_dbgmsg("ole2_summary_json: cannot stat file descriptor\n");
         return CL_ESTAT;
     }
 
     sctx.sfmap = fmap(fd, 0, statbuf.st_size);
     if (!sctx.sfmap) {
         cli_dbgmsg("ole2_summary_json: failed to get fmap\n");
         return CL_EMAP;
     }
     sctx.maplen = sctx.sfmap->len;
ce2dcb53
     cli_dbgmsg("ole2_summary_json: streamsize: %zu\n", sctx.maplen);
f53bed0b
 
9a1232d2
     switch (mode) {
288057e9
         case 1:
             sctx.summary = cli_jsonobj(ctx->wrkproperty, "DocSummaryInfo");
             break;
         case 2:
             sctx.summary = cli_jsonobj(ctx->wrkproperty, "Hwp5SummaryInfo");
             break;
         case 0:
         default:
             sctx.summary = cli_jsonobj(ctx->wrkproperty, "SummaryInfo");
             break;
9a1232d2
     }
 
f53bed0b
     if (!sctx.summary) {
         cli_errmsg("ole2_summary_json: no memory for json object.\n");
         return cli_ole2_summary_json_cleanup(&sctx, CL_EMEM);
     }
 
     sctx.codepage = 0;
288057e9
     sctx.writecp  = 0;
f53bed0b
 
     /* acquire property stream metadata */
     if (sctx.maplen < sizeof(summary_stub_t)) {
         sctx.flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
         return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
     }
288057e9
     databuf = (unsigned char *)fmap_need_off_once(sctx.sfmap, foff, sizeof(summary_stub_t));
f53bed0b
     if (!databuf) {
         sctx.flags |= OLE2_SUMMARY_ERROR_DATABUF;
         return cli_ole2_summary_json_cleanup(&sctx, CL_EREAD);
     }
     foff += sizeof(summary_stub_t);
     memcpy(&sumstub, databuf, sizeof(summary_stub_t));
 
     /* endian conversion and checks */
     sumstub.byte_order = le16_to_host(sumstub.byte_order);
     if (sumstub.byte_order != 0xfffe) {
         cli_dbgmsg("ole2_summary_json: byteorder 0x%x is invalid\n", sumstub.byte_order);
         sctx.flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
27948a03
         return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
f53bed0b
     }
288057e9
     sumstub.version      = sum16_endian_convert(sumstub.version); /*unused*/
     sumstub.system       = sum32_endian_convert(sumstub.system);  /*unused*/
f53bed0b
     sumstub.num_propsets = sum32_endian_convert(sumstub.num_propsets);
     if (sumstub.num_propsets != 1 && sumstub.num_propsets != 2) {
         cli_dbgmsg("ole2_summary_json: invalid number of property sets\n");
         sctx.flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
         return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
     }
 
     cli_dbgmsg("ole2_summary_json: byteorder 0x%x\n", sumstub.byte_order);
     cli_dbgmsg("ole2_summary_json: %u property set(s) detected\n", sumstub.num_propsets);
 
     /* first property set (index=0) is always SummaryInfo or DocSummaryInfo */
288057e9
     if ((sctx.maplen - foff) < sizeof(propset_entry_t)) {
f53bed0b
         sctx.flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
         return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
     }
288057e9
     databuf = (unsigned char *)fmap_need_off_once(sctx.sfmap, foff, sizeof(propset_entry_t));
f53bed0b
     if (!databuf) {
         sctx.flags |= OLE2_SUMMARY_ERROR_DATABUF;
         return cli_ole2_summary_json_cleanup(&sctx, CL_EREAD);
     }
     foff += sizeof(propset_entry_t);
     memcpy(&pentry, databuf, sizeof(propset_entry_t));
     /* endian conversion */
     pentry.offset = sum32_endian_convert(pentry.offset);
 
     if ((ret = ole2_summary_propset_json(&sctx, pentry.offset)) != CL_SUCCESS) {
         return cli_ole2_summary_json_cleanup(&sctx, ret);
     }
 
     /* second property set (index=1) is always a custom property set (if present) */
     if (sumstub.num_propsets == 2) {
         cli_jsonbool(ctx->wrkproperty, "HasUserDefinedProperties", 1);
     }
 
     return cli_ole2_summary_json_cleanup(&sctx, CL_SUCCESS);
 }
 #endif /* HAVE_JSON */