libclamav/msdoc.c
f53bed0b
 /*
  * Extract component parts of OLE2 files (e.g. MS Office Documents)
  * 
c442ca9c
  * Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
f53bed0b
  * Copyright (C) 2007-2013 Sourcefire, Inc.
  * 
87c017bc
  * Authors: Kevin Lin
f53bed0b
  * 
  * This program is free software; you can redistribute it and/or modify it under
  * the terms of the GNU General Public License version 2 as published by the
  * Free Software Foundation.
  * 
  * This program is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  * 
  * You should have received a copy of the GNU General Public License along with
  * this program; if not, write to the Free Software Foundation, Inc., 51
  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  */
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <string.h>
 #include <ctype.h>
 #include <stdlib.h>
 #include <errno.h>
 #include <conv.h>
 #ifdef	HAVE_UNISTD_H
 #include <unistd.h>
 #endif
 
 #if HAVE_ICONV
 #include <iconv.h>
 #endif
 
 #include "clamav.h"
 #include "others.h"
 #include "msdoc.h"
 #include "scanners.h"
 #include "fmap.h"
 #include "json_api.h"
 
 #if HAVE_JSON
 static char *
 ole2_convert_utf(summary_ctx_t *sctx, char *begin, size_t sz, const char *encoding)
 {
     char *outbuf=NULL;
 #if HAVE_ICONV
     char *buf, *p1, *p2;
     off_t offset;
     size_t inlen, outlen, nonrev, sz2;
     int i, try;
     iconv_t cd;
 #endif
     /* applies in the both case */
     if (sctx->codepage == 20127 || sctx->codepage == 65001) {
750bd0c9
         char *track;
         int bcnt, scnt;
 
c4ddaa22
         outbuf = cli_calloc(1, sz+1);
750bd0c9
         if (!(outbuf))
             return NULL;
         memcpy(outbuf, begin, sz);
 
         track = outbuf+sz-1;
         if ((sctx->codepage == 65001) && (*track & 0x80)) { /* UTF-8 with a most significant bit */
             /* locate the start of the last character */
             for (bcnt = 1; (track != outbuf); track--, bcnt++) {
                 if (((uint8_t)*track & 0xC0) != 0x80)
                     break;
             }
 
             /* count number of set (1) significant bits */
             for (scnt = 0; scnt < sizeof(uint8_t)*8; scnt++) {
                 if (((uint8_t)*track & (0x80 >> scnt)) == 0)
                     break;
             }
 
             if (bcnt != scnt) {
                 cli_dbgmsg("ole2_convert_utf: cleaning out %d bytes from incomplete "
                            "utf-8 character length %d\n", bcnt, scnt);
                 for (; bcnt > 0; bcnt--, track++)
                     *track = '\0';
             }
         }
f53bed0b
         return outbuf;
     }
 
 #if HAVE_ICONV
     p1 = buf = cli_calloc(1, sz);
     if (!(buf))
         return NULL;
 
     memcpy(buf, begin, sz);
     inlen = sz;
 
     /* encoding lookup if not specified */
     if (!encoding) {
         for (i = 0; i < NUMCODEPAGES; ++i) {
             if (sctx->codepage == codepage_entries[i].codepage)
                 encoding = codepage_entries[i].encoding;
             else if (sctx->codepage < codepage_entries[i].codepage) {
                 /* assuming sorted array */
                 break;
             }
         }
 
         if (!encoding) {
             cli_warnmsg("ole2_convert_utf: could not locate codepage encoding for %d\n", sctx->codepage);
             sctx->flags |= OLE2_CODEPAGE_ERROR_NOTFOUND;
             free(buf);
             return NULL;
         }
     }
 
     cd = iconv_open("UTF-8", encoding);
     if (cd == (iconv_t)(-1)) {
c00baa37
         char errbuf[128];
         cli_strerror(errno, errbuf, sizeof(errbuf)); 
         cli_errmsg("ole2_convert_utf: could not initialize iconv for encoding %s: %s\n", encoding, errbuf);
f53bed0b
         sctx->flags |= OLE2_CODEPAGE_ERROR_UNINITED;
     }
     else {
         offset = 0;
         for (try = 1; try <= 3; ++try) {
             /* charset to UTF-8 should never exceed sz*6 */
             sz2 = (try*2) * sz;
             /* use cli_realloc, reuse the buffer that has already been translated */
             outbuf = (char *)cli_realloc(outbuf, sz2+1);
             if (!outbuf) {
                 free(buf);
                 return NULL;
             }
 
             outlen = sz2 - offset;
             p2 = outbuf + offset;
 
             /* conversion */
             nonrev = iconv(cd, &p1, &inlen, &p2, &outlen);
 
             if (errno == EILSEQ) {
                 cli_dbgmsg("ole2_convert_utf: input buffer contains invalid character for its encoding\n");
                 sctx->flags |= OLE2_CODEPAGE_ERROR_INVALID;
                 break;
             }
             else if (errno == EINVAL && nonrev == (size_t)-1) {
                 cli_dbgmsg("ole2_convert_utf: input buffer contains incomplete multibyte character\n");
                 sctx->flags |= OLE2_CODEPAGE_ERROR_INCOMPLETE;
                 break;
             }
             else if (inlen == 0) {
                 //cli_dbgmsg("ole2_convert_utf: input buffer is successfully translated\n");
                 break;
             }
 
             //outbuf[sz2 - outlen] = '\0';
             //cli_dbgmsg("%u %s\n", inlen, outbuf);
 
             offset = sz2 - outlen;
             if (try < 3)
                 cli_dbgmsg("ole2_convert_utf: outbuf is too small, resizing %llu -> %llu\n",
                            (long long unsigned)((try*2) * sz), (long long unsigned)(((try+1)*2) * sz));
         }
 
         if (errno == E2BIG && nonrev == (size_t)-1) {
             cli_dbgmsg("ole2_convert_utf: buffer could not be fully translated\n");
             sctx->flags |= OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL;
         }
 
         outbuf[sz2 - outlen] = '\0';
     }
 
     iconv_close(cd);
     free(buf);
 #endif
     /* this should force base64 encoding if NULL */
     return outbuf;
 }
 
 static int
 ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offset)
 {
     uint16_t proptype, padding;
     int ret = CL_SUCCESS;
 
     if (cli_json_timeout_cycle_check(sctx->ctx, &(sctx->toval)) != CL_SUCCESS) {
         sctx->flags |= OLE2_SUMMARY_FLAG_TIMEOUT;
         return CL_ETIMEOUT;
     }
 
     if (offset+sizeof(proptype)+sizeof(padding) > sctx->pssize) {
         sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
         return CL_EFORMAT;
     }
 
     memcpy(&proptype, databuf+offset, sizeof(proptype));
     offset+=sizeof(proptype);
     memcpy(&padding, databuf+offset, sizeof(padding));
     offset+=sizeof(padding);
     /* endian conversion */
     proptype = sum16_endian_convert(proptype);
 
     //cli_dbgmsg("proptype: 0x%04x\n", proptype);
     if (padding != 0) {
         cli_dbgmsg("ole2_process_property: invalid padding value, non-zero\n");
         sctx->flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
         return CL_EFORMAT;
     }
 
     switch (proptype) {
     case PT_EMPTY:
     case PT_NULL:
         ret = cli_jsonnull(sctx->summary, sctx->propname);
         break;
     case PT_INT16:
 	{
             int16_t dout;
             if (offset+sizeof(dout) > sctx->pssize) {
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
             memcpy(&dout, databuf+offset, sizeof(dout));
             offset+=sizeof(dout);
             /* endian conversion */
             dout = sum16_endian_convert(dout);
 
             if (sctx->writecp) {
                 sctx->codepage = (uint16_t)dout;
                 ret = cli_jsonint(sctx->summary, sctx->propname, sctx->codepage);
             }
             else
                 ret = cli_jsonint(sctx->summary, sctx->propname, dout);
             break;
 	}
     case PT_INT32:
     case PT_INT32v1:
 	{
             int32_t dout;
             if (offset+sizeof(dout) > sctx->pssize) {
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
             memcpy(&dout, databuf+offset, sizeof(dout));
             offset+=sizeof(dout);
             /* endian conversion */
             dout = sum32_endian_convert(dout);
 
             ret = cli_jsonint(sctx->summary, sctx->propname, dout);
             break;
 	}
     case PT_FLOAT32: /* review this please */
 	{
             float dout;
             if (offset+sizeof(dout) > sctx->pssize) {
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
             memcpy(&dout, databuf+offset, sizeof(dout));
             offset+=sizeof(dout);
             /* endian conversion */
             dout = sum32_endian_convert(dout);
 
             ret = cli_jsondouble(sctx->summary, sctx->propname, dout);
             break;
 	}
     case PT_DATE:
     case PT_DOUBLE64: /* review this please */
 	{
             double dout;
             if (offset+sizeof(dout) > sctx->pssize) {
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
             memcpy(&dout, databuf+offset, sizeof(dout));
             offset+=sizeof(dout);
             /* endian conversion */
             dout = sum64_endian_convert(dout);
 
             ret = cli_jsondouble(sctx->summary, sctx->propname, dout);
             break;
 	}
     case PT_BOOL:
 	{
             uint16_t dout;
             if (offset+sizeof(dout) > sctx->pssize) {
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
             memcpy(&dout, databuf+offset, sizeof(dout));
             offset+=sizeof(dout);
             /* no need for endian conversion */
 
             ret = cli_jsonbool(sctx->summary, sctx->propname, dout);
             break;
 	}
     case PT_INT8v1:
 	{
             int8_t dout;
             if (offset+sizeof(dout) > sctx->pssize) {
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
             memcpy(&dout, databuf+offset, sizeof(dout));
             offset+=sizeof(dout);
             /* no need for endian conversion */
 
             ret = cli_jsonint(sctx->summary, sctx->propname, dout);
             break;
 	}
     case PT_UINT8:
 	{
             uint8_t dout;
             if (offset+sizeof(dout) > sctx->pssize) {
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
             memcpy(&dout, databuf+offset, sizeof(dout));
             offset+=sizeof(dout);
             /* no need for endian conversion */
 
             ret = cli_jsonint(sctx->summary, sctx->propname, dout);
             break;
 	}
     case PT_UINT16:
 	{
             uint16_t dout;
             if (offset+sizeof(dout) > sctx->pssize) {
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
             memcpy(&dout, databuf+offset, sizeof(dout));
             offset+=sizeof(dout);
             /* endian conversion */
             dout = sum16_endian_convert(dout);
 
             if (sctx->writecp)
                 sctx->codepage = dout;
 
             ret = cli_jsonint(sctx->summary, sctx->propname, dout);
             break;
 	}
     case PT_UINT32:
     case PT_UINT32v1:
 	{
             uint32_t dout;
             if (offset+sizeof(dout) > sctx->pssize) {
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
             memcpy(&dout, databuf+offset, sizeof(dout));
             offset+=sizeof(dout);
             /* endian conversion */
             dout = sum32_endian_convert(dout);
 
             ret = cli_jsonint(sctx->summary, sctx->propname, dout);
             break;
 	}
     case PT_INT64:
 	{
             int64_t dout;
             if (offset+sizeof(dout) > sctx->pssize) {
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
             memcpy(&dout, databuf+offset, sizeof(dout));
             offset+=sizeof(dout);
             /* endian conversion */
             dout = sum64_endian_convert(dout);
 
             ret = cli_jsonint64(sctx->summary, sctx->propname, dout);
             break;
 	}
     case PT_UINT64:
 	{
             uint64_t dout;
             if (offset+sizeof(dout) > sctx->pssize) {
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
             memcpy(&dout, databuf+offset, sizeof(dout));
             offset+=sizeof(dout);
             /* endian conversion */
             dout = sum64_endian_convert(dout);
 
             ret = cli_jsonint64(sctx->summary, sctx->propname, dout);
             break;
 	}
     case PT_BSTR:
     case PT_LPSTR:
         if (sctx->codepage == 0) {
             cli_dbgmsg("ole2_propset_json: current codepage is unknown, cannot parse char stream\n");
             sctx->flags |= OLE2_SUMMARY_FLAG_CODEPAGE;
         }
         else {
             uint32_t strsize;
             char *outstr, *outstr2;
 
             if (offset+sizeof(strsize) > sctx->pssize) {
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
 
             memcpy(&strsize, databuf+offset, sizeof(strsize));
             offset+=sizeof(strsize);
             /* endian conversion? */
             strsize = sum32_endian_convert(strsize);
 
             if (offset+strsize > sctx->pssize) {
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
 
             /* limitation on string length */
             if (strsize > PROPSTRLIMIT) {
                 cli_dbgmsg("ole2_process_property: property string sized %lu truncated to size %lu\n",
                            (unsigned long)strsize, (unsigned long)PROPSTRLIMIT);
                 sctx->flags |= OLE2_SUMMARY_FLAG_TRUNC_STR;
                 strsize = PROPSTRLIMIT;
             }
 
             outstr = cli_calloc(strsize+1, 1); /* last char must be NULL */
             if (!outstr) {
                 return CL_EMEM;
             }
             strncpy(outstr, (const char *)(databuf+offset), strsize);
 
             /* conversion of various encodings to UTF-8 */
             outstr2 = ole2_convert_utf(sctx, outstr, strsize, NULL);
             if (!outstr2) {
                 /* use base64 encoding when all else fails! */
                 char b64jstr[PROPSTRLIMIT];
 
                 /* outstr2 should be 4/3 times the original (rounded up) */
                 outstr2 = cl_base64_encode(outstr, strsize);
                 if (!outstr2) {
                     cli_dbgmsg("ole2_process_property: failed to convert to base64 string\n");
                     return CL_EMEM;
                 }
 
                 snprintf(b64jstr, PROPSTRLIMIT, "%s_base64", sctx->propname);
                 ret = cli_jsonbool(sctx->summary, b64jstr, 1);
                 if (ret != CL_SUCCESS)
                     return ret;
             }
 
             ret = cli_jsonstr(sctx->summary, sctx->propname, outstr2);
             free(outstr);
             free(outstr2);
         }
         break;
     case PT_LPWSTR:
 	{
             uint32_t strsize;
             char *outstr, *outstr2;
 
             if (offset+sizeof(strsize) > sctx->pssize) {
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
             memcpy(&strsize, databuf+offset, sizeof(strsize));
             offset+=sizeof(strsize);
             /* endian conversion; wide strings are by length, not size (x2) */
             strsize = sum32_endian_convert(strsize)*2;
 
             /* limitation on string length */
             if (strsize > (2*PROPSTRLIMIT)) {
                 cli_dbgmsg("ole2_process_property: property string sized %lu truncated to size %lu\n",
                            (unsigned long)strsize, (unsigned long)(2*PROPSTRLIMIT));
                 sctx->flags |= OLE2_SUMMARY_FLAG_TRUNC_STR;
                 strsize = (2*PROPSTRLIMIT);
             }
 
             if (offset+strsize > sctx->pssize) {
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
             outstr = cli_calloc(strsize+2, 1); /* last two chars must be NULL */
             if (!outstr) {
                 return CL_EMEM;
             }
             memcpy(outstr, (const char *)(databuf+offset), strsize);
             /* conversion of 16-width char strings (UTF-16 or UTF-16LE??) to UTF-8 */
             outstr2 = ole2_convert_utf(sctx, outstr, strsize, UTF16_MS);
             if (!outstr2) {
                 /* use base64 encoding when all else fails! */
                 char b64jstr[PROPSTRLIMIT];
 
                 outstr2 = cl_base64_encode(outstr, strsize);
                 if (!outstr2) {
                     free(outstr);
                     return CL_EMEM;
                 }
 
                 snprintf(b64jstr, PROPSTRLIMIT, "%s_base64", sctx->propname);
                 ret = cli_jsonbool(sctx->summary, b64jstr, 1);
                 if (ret != CL_SUCCESS)
                     return ret;
             }
 
             ret = cli_jsonstr(sctx->summary, sctx->propname, outstr2);
             free(outstr);
             free(outstr2);
             break;
 	}
     case PT_FILETIME:
 	{
             uint32_t ltime, htime;
             uint64_t wtime = 0, utime =0;
 
             if (offset+sizeof(ltime)+sizeof(htime) > sctx->pssize) {
                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
                 return CL_EFORMAT;
             }
             memcpy(&ltime, databuf+offset, sizeof(ltime));
             offset+=sizeof(ltime);
             memcpy(&htime, databuf+offset, sizeof(htime));
             offset+=sizeof(ltime);
             ltime = sum32_endian_convert(ltime);
             htime = sum32_endian_convert(htime);
 
             /* UNIX timestamp formatting */
             wtime = htime;
             wtime <<= 32;
             wtime |= ltime;
 
             utime = wtime / 10000000;
             utime -= 11644473600LL;
 
             if ((uint32_t)((utime & 0xFFFFFFFF00000000) >> 32)) {
                 cli_dbgmsg("ole2_process_property: UNIX timestamp is larger than 32-bit number\n");
             }
             else {
                 ret = cli_jsonint(sctx->summary, sctx->propname, (uint32_t)(utime & 0xFFFFFFFF));
             }
             break;
 	}
     default:
         cli_dbgmsg("ole2_process_property: unhandled property type 0x%04x for %s property\n", 
                    proptype, sctx->propname);
         sctx->flags |= OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE;
     }
 
     return ret;
 }
 
 static void ole2_translate_docsummary_propid(summary_ctx_t *sctx, uint32_t propid)
 {
     switch(propid) {
     case DSPID_CODEPAGE:
         sctx->writecp = 1; /* must be set ONLY for codepage */
         sctx->propname = "CodePage";
         break;
     case DSPID_CATEGORY:
         sctx->propname = "Category";
         break;
     case DSPID_PRESFORMAT:
         sctx->propname = "PresentationTarget";
         break;
     case DSPID_BYTECOUNT:
         sctx->propname = "Bytes";
         break;
     case DSPID_LINECOUNT:
         sctx->propname = "Lines";
         break;
     case DSPID_PARCOUNT:
         sctx->propname = "Paragraphs";
         break;
     case DSPID_SLIDECOUNT:
         sctx->propname = "Slides";
         break;
     case DSPID_NOTECOUNT:
         sctx->propname = "Notes";
         break;
     case DSPID_HIDDENCOUNT:
         sctx->propname = "HiddenSlides";
         break;
     case DSPID_MMCLIPCOUNT:
         sctx->propname = "MMClips";
         break;
     case DSPID_SCALE:
         sctx->propname = "Scale";
         break;
     case DSPID_HEADINGPAIR: /* VT_VARIANT | VT_VECTOR */
         sctx->propname = "HeadingPairs";
         break;
     case DSPID_DOCPARTS:    /* VT_VECTOR | VT_LPSTR */
         sctx->propname = "DocPartTitles";
         break;
     case DSPID_MANAGER:
         sctx->propname = "Manager";
         break;
     case DSPID_COMPANY:
         sctx->propname = "Company";
         break;
     case DSPID_LINKSDIRTY:
         sctx->propname = "LinksDirty";
         break;
     case DSPID_CCHWITHSPACES:
         sctx->propname = "Char&WSCount";
         break;
     case DSPID_SHAREDDOC:   /* SHOULD BE FALSE! */
         sctx->propname = "SharedDoc";
         break;
     case DSPID_LINKBASE:    /* moved to user-defined */
         sctx->propname = "LinkBase";
         break;
     case DSPID_HLINKS:      /* moved to user-defined */
         sctx->propname = "HyperLinks";
         break;
     case DSPID_HYPERLINKSCHANGED:
         sctx->propname = "HyperLinksChanged";
         break;
     case DSPID_VERSION:
         sctx->propname = "Version";
         break;
     case DSPID_DIGSIG:
         sctx->propname = "DigitalSig";
         break;
     case DSPID_CONTENTTYPE:
         sctx->propname = "ContentType";
         break;
     case DSPID_CONTENTSTATUS:
         sctx->propname = "ContentStatus";
         break;
     case DSPID_LANGUAGE:
         sctx->propname = "Language";
         break;
     case DSPID_DOCVERSION:
         sctx->propname = "DocVersion";
         break;
     default:
         cli_dbgmsg("ole2_docsum_propset_json: unrecognized propid!\n");
         sctx->flags |= OLE2_SUMMARY_FLAG_UNKNOWN_PROPID;
     }
 }
 
 static void ole2_translate_summary_propid(summary_ctx_t *sctx, uint32_t propid)
 {
     switch(propid) {
     case SPID_CODEPAGE:
         sctx->writecp = 1; /* must be set ONLY for codepage */
         sctx->propname = "CodePage";
         break;
     case SPID_TITLE:
         sctx->propname = "Title";
         break;
     case SPID_SUBJECT:
         sctx->propname = "Subject";
         break;
     case SPID_AUTHOR:
         sctx->propname = "Author";
         break;
     case SPID_KEYWORDS:
         sctx->propname = "Keywords";
         break;
     case SPID_COMMENTS:
         sctx->propname = "Comments";
         break;
     case SPID_TEMPLATE:
         sctx->propname = "Template";
         break;
     case SPID_LASTAUTHOR:
         sctx->propname = "LastAuthor";
         break;
     case SPID_REVNUMBER:
         sctx->propname = "RevNumber";
         break;
     case SPID_EDITTIME:
         sctx->propname = "EditTime";
         break;
     case SPID_LASTPRINTED:
         sctx->propname = "LastPrinted";
         break;
     case SPID_CREATEDTIME:
         sctx->propname = "CreatedTime";
         break;
     case SPID_MODIFIEDTIME:
         sctx->propname = "ModifiedTime";
         break;
     case SPID_PAGECOUNT:
         sctx->propname = "PageCount";
         break;
     case SPID_WORDCOUNT:
         sctx->propname = "WordCount";
         break;
     case SPID_CHARCOUNT:
         sctx->propname = "CharCount";
         break;
     case SPID_THUMBNAIL:
         sctx->propname = "Thumbnail";
         break;
     case SPID_APPNAME:
         sctx->propname = "AppName";
         break;
     case SPID_SECURITY:
         sctx->propname = "Security";
         break;
     default:
         cli_dbgmsg("ole2_translate_summary_propid: unrecognized propid!\n");
         sctx->flags |= OLE2_SUMMARY_FLAG_UNKNOWN_PROPID;
     }
 }
 
 static int ole2_summary_propset_json(summary_ctx_t *sctx, off_t offset)
 {
     unsigned char *hdr, *ps;
     uint32_t numprops, limitprops;
     off_t foff = offset, psoff = 0;
     uint32_t poffset;
     int ret;
     unsigned int i;
 
     cli_dbgmsg("in ole2_summary_propset_json\n");
 
     /* summary ctx propset-specific setup*/
     sctx->codepage = 0;
     sctx->writecp = 0;
     sctx->propname = NULL;
 
     /* examine property set metadata */
     if ((foff+(2*sizeof(uint32_t))) > sctx->maplen) {
         sctx->flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
         return CL_EFORMAT;
     }
     hdr = (unsigned char*)fmap_need_off_once(sctx->sfmap, foff, (2*sizeof(uint32_t)));
     if (!hdr) {
         sctx->flags |= OLE2_SUMMARY_ERROR_DATABUF;
         return CL_EREAD;
     }
     //foff+=(2*sizeof(uint32_t)); // keep foff pointing to start of propset segment
     psoff+=(2*sizeof(uint32_t));
     memcpy(&(sctx->pssize), hdr, sizeof(sctx->pssize));
     memcpy(&numprops, hdr+sizeof(sctx->pssize), sizeof(numprops));
     /* endian conversion */
     sctx->pssize = sum32_endian_convert(sctx->pssize);
     numprops = sum32_endian_convert(numprops);
     cli_dbgmsg("ole2_summary_propset_json: pssize: %u, numprops: %u\n", sctx->pssize, numprops);
     if (numprops > PROPCNTLIMIT) {
         sctx->flags |= OLE2_SUMMARY_LIMIT_PROPS;
         limitprops = PROPCNTLIMIT;
     }
     else {
         limitprops = numprops;
     }
7cd9337a
     cli_dbgmsg("ole2_summary_propset_json: processing %u of %u (%u max) properties\n",
f53bed0b
                limitprops, numprops, PROPCNTLIMIT);
 
     /* extract remaining fragment of propset */
     if ((size_t)(foff+(sctx->pssize)) > (size_t)(sctx->maplen)) {
         sctx->flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
         return CL_EFORMAT;
     }
     ps = (unsigned char*)fmap_need_off_once(sctx->sfmap, foff, sctx->pssize);
     if (!ps) {
         sctx->flags |= OLE2_SUMMARY_ERROR_DATABUF;
         return CL_EREAD;
     }
 
     /* iterate over the properties */
     for (i = 0; i < limitprops; ++i) {
         uint32_t propid, propoff;
 
         if (psoff+sizeof(propid)+sizeof(poffset) > sctx->pssize) {
             sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
             return CL_EFORMAT;
         }
         memcpy(&propid, ps+psoff, sizeof(propid));
         psoff+=sizeof(propid);
         memcpy(&propoff, ps+psoff, sizeof(propoff));
         psoff+=sizeof(propoff);
         /* endian conversion */
         propid = sum32_endian_convert(propid);
         propoff = sum32_endian_convert(propoff);
         cli_dbgmsg("ole2_summary_propset_json: propid: 0x%08x, propoff: %u\n", propid, propoff);
 
         sctx->propname = NULL; sctx->writecp = 0;
9a1232d2
         switch (sctx->mode) {
         case 1:
f53bed0b
             ole2_translate_docsummary_propid(sctx, propid);
9a1232d2
             break;
         default:
             ole2_translate_summary_propid(sctx, propid);
         }
f53bed0b
 
         if (sctx->propname != NULL) {
             ret = ole2_process_property(sctx, ps, propoff);
             if (ret != CL_SUCCESS)
                 return ret;
         }
         else {
             /* add unknown propid flag */
         }
     }
 
     return CL_SUCCESS;
 }
 
 static int cli_ole2_summary_json_cleanup(summary_ctx_t *sctx, int retcode)
 {
     json_object *jarr;
 
     cli_dbgmsg("in cli_ole2_summary_json_cleanup: %d[%x]\n", retcode, sctx->flags);
 
     if (sctx->sfmap) {
         funmap(sctx->sfmap);
     }
 
     if (sctx->flags) {
         jarr = cli_jsonarray(sctx->summary, "ParseErrors");
 
         /* summary errors */
         if (sctx->flags & OLE2_SUMMARY_ERROR_TOOSMALL) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_TOOSMALL");
         }
         if (sctx->flags & OLE2_SUMMARY_ERROR_OOB) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_OOB");
         }
         if (sctx->flags & OLE2_SUMMARY_ERROR_DATABUF) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_DATABUF");
         }
         if (sctx->flags & OLE2_SUMMARY_ERROR_INVALID_ENTRY) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_INVALID_ENTRY");
         }
         if (sctx->flags & OLE2_SUMMARY_LIMIT_PROPS) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_LIMIT_PROPS");
         }
         if (sctx->flags & OLE2_SUMMARY_FLAG_TIMEOUT) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_TIMEOUT");
         }
         if (sctx->flags & OLE2_SUMMARY_FLAG_CODEPAGE) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_CODEPAGE");
         }
         if (sctx->flags & OLE2_SUMMARY_FLAG_UNKNOWN_PROPID) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_UNKNOWN_PROPID");
         }
         if (sctx->flags & OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE");
         }
         if (sctx->flags & OLE2_SUMMARY_FLAG_TRUNC_STR) {
             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_TRUNC_STR");
         }
 
         /* codepage translation errors */
         if (sctx->flags & OLE2_CODEPAGE_ERROR_NOTFOUND) {
             cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_NOTFOUND");
         }
         if (sctx->flags & OLE2_CODEPAGE_ERROR_UNINITED) {
             cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_UNINITED");
         }
         if (sctx->flags & OLE2_CODEPAGE_ERROR_INVALID) {
             cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_INVALID");
         }
         if (sctx->flags & OLE2_CODEPAGE_ERROR_INCOMPLETE) {
             cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_INCOMPLETE");
         }
         if (sctx->flags & OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL) {
             cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL");
         }
     }
 
     return retcode;
 }
 
 int cli_ole2_summary_json(cli_ctx *ctx, int fd, int mode)
 {
     summary_ctx_t sctx;
     STATBUF statbuf;
     off_t foff = 0;
     unsigned char *databuf;
     summary_stub_t sumstub;
     propset_entry_t pentry;
     int ret = CL_SUCCESS;
 
     cli_dbgmsg("in cli_ole2_summary_json\n");
 
     /* preliminary sanity checks */
     if (ctx == NULL) {
         return CL_ENULLARG;
     }
 
     if (fd < 0) {
         cli_dbgmsg("ole2_summary_json: invalid file descriptor\n");
         return CL_ENULLARG; /* placeholder */
     }
 
9a1232d2
     if (mode < 0 && mode > 2) {
f53bed0b
         cli_dbgmsg("ole2_summary_json: invalid mode specified\n");
         return CL_ENULLARG; /* placeholder */
     }
 
     /* summary ctx setup */
     memset(&sctx, 0, sizeof(sctx));
     sctx.ctx = ctx;
     sctx.mode = mode;
 
     if (FSTAT(fd, &statbuf) == -1) {
         cli_dbgmsg("ole2_summary_json: cannot stat file descriptor\n");
         return CL_ESTAT;
     }
 
     sctx.sfmap = fmap(fd, 0, statbuf.st_size);
     if (!sctx.sfmap) {
         cli_dbgmsg("ole2_summary_json: failed to get fmap\n");
         return CL_EMAP;
     }
     sctx.maplen = sctx.sfmap->len;
ce2dcb53
     cli_dbgmsg("ole2_summary_json: streamsize: %zu\n", sctx.maplen);
f53bed0b
 
9a1232d2
     switch (mode) {
     case 1:
f53bed0b
         sctx.summary = cli_jsonobj(ctx->wrkproperty, "DocSummaryInfo");
9a1232d2
         break;
     case 2:
404b1e3e
         sctx.summary = cli_jsonobj(ctx->wrkproperty, "Hwp5SummaryInfo");
9a1232d2
         break;
     case 0:
     default:
         sctx.summary = cli_jsonobj(ctx->wrkproperty, "SummaryInfo");
         break;
     }
 
 
f53bed0b
     if (!sctx.summary) {
         cli_errmsg("ole2_summary_json: no memory for json object.\n");
         return cli_ole2_summary_json_cleanup(&sctx, CL_EMEM);
     }
 
     sctx.codepage = 0;
     sctx.writecp = 0;
 
     /* acquire property stream metadata */
     if (sctx.maplen < sizeof(summary_stub_t)) {
         sctx.flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
         return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
     }
     databuf = (unsigned char*)fmap_need_off_once(sctx.sfmap, foff, sizeof(summary_stub_t));
     if (!databuf) {
         sctx.flags |= OLE2_SUMMARY_ERROR_DATABUF;
         return cli_ole2_summary_json_cleanup(&sctx, CL_EREAD);
     }
     foff += sizeof(summary_stub_t);
     memcpy(&sumstub, databuf, sizeof(summary_stub_t));
 
     /* endian conversion and checks */
     sumstub.byte_order = le16_to_host(sumstub.byte_order);
     if (sumstub.byte_order != 0xfffe) {
         cli_dbgmsg("ole2_summary_json: byteorder 0x%x is invalid\n", sumstub.byte_order);
         sctx.flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
         return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);;
     }
     sumstub.version = sum16_endian_convert(sumstub.version); /*unused*/
     sumstub.system = sum32_endian_convert(sumstub.system); /*unused*/
     sumstub.num_propsets = sum32_endian_convert(sumstub.num_propsets);
     if (sumstub.num_propsets != 1 && sumstub.num_propsets != 2) {
         cli_dbgmsg("ole2_summary_json: invalid number of property sets\n");
         sctx.flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
         return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
     }
 
     cli_dbgmsg("ole2_summary_json: byteorder 0x%x\n", sumstub.byte_order);
     cli_dbgmsg("ole2_summary_json: %u property set(s) detected\n", sumstub.num_propsets);
 
     /* first property set (index=0) is always SummaryInfo or DocSummaryInfo */
     if ((sctx.maplen-foff) < sizeof(propset_entry_t)) {
         sctx.flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
         return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
     }
     databuf = (unsigned char*)fmap_need_off_once(sctx.sfmap, foff, sizeof(propset_entry_t));
     if (!databuf) {
         sctx.flags |= OLE2_SUMMARY_ERROR_DATABUF;
         return cli_ole2_summary_json_cleanup(&sctx, CL_EREAD);
     }
     foff += sizeof(propset_entry_t);
     memcpy(&pentry, databuf, sizeof(propset_entry_t));
     /* endian conversion */
     pentry.offset = sum32_endian_convert(pentry.offset);
 
     if ((ret = ole2_summary_propset_json(&sctx, pentry.offset)) != CL_SUCCESS) {
         return cli_ole2_summary_json_cleanup(&sctx, ret);
     }
 
     /* second property set (index=1) is always a custom property set (if present) */
     if (sumstub.num_propsets == 2) {
         cli_jsonbool(ctx->wrkproperty, "HasUserDefinedProperties", 1);
     }
 
     return cli_ole2_summary_json_cleanup(&sctx, CL_SUCCESS);
 }
 #endif /* HAVE_JSON */