f53bed0b |
/*
* Extract component parts of OLE2 files (e.g. MS Office Documents) |
02840644 |
* |
e1cbc270 |
* Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved. |
f53bed0b |
* Copyright (C) 2007-2013 Sourcefire, Inc. |
02840644 |
* |
87c017bc |
* Authors: Kevin Lin |
02840644 |
* |
f53bed0b |
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License version 2 as published by the
* Free Software Foundation. |
02840644 |
* |
f53bed0b |
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details. |
02840644 |
* |
f53bed0b |
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <errno.h>
#include <conv.h> |
288057e9 |
#ifdef HAVE_UNISTD_H |
f53bed0b |
#include <unistd.h>
#endif
#if HAVE_ICONV
#include <iconv.h>
#endif
#include "clamav.h"
#include "others.h"
#include "msdoc.h"
#include "scanners.h"
#include "fmap.h"
#include "json_api.h"
#if HAVE_JSON
static char *
ole2_convert_utf(summary_ctx_t *sctx, char *begin, size_t sz, const char *encoding)
{ |
288057e9 |
char *outbuf = NULL; |
f53bed0b |
#if HAVE_ICONV
char *buf, *p1, *p2;
off_t offset;
size_t inlen, outlen, nonrev, sz2; |
27948a03 |
int i, attempt; |
f53bed0b |
iconv_t cd; |
102cd430 |
#else
UNUSEDPARAM(encoding); |
f53bed0b |
#endif
/* applies in the both case */
if (sctx->codepage == 20127 || sctx->codepage == 65001) { |
750bd0c9 |
char *track; |
02840644 |
size_t bcnt, scnt; |
750bd0c9 |
|
288057e9 |
outbuf = cli_calloc(1, sz + 1); |
750bd0c9 |
if (!(outbuf))
return NULL;
memcpy(outbuf, begin, sz);
|
288057e9 |
track = outbuf + sz - 1; |
750bd0c9 |
if ((sctx->codepage == 65001) && (*track & 0x80)) { /* UTF-8 with a most significant bit */
/* locate the start of the last character */
for (bcnt = 1; (track != outbuf); track--, bcnt++) {
if (((uint8_t)*track & 0xC0) != 0x80)
break;
}
/* count number of set (1) significant bits */ |
288057e9 |
for (scnt = 0; scnt < sizeof(uint8_t) * 8; scnt++) { |
750bd0c9 |
if (((uint8_t)*track & (0x80 >> scnt)) == 0)
break;
}
if (bcnt != scnt) { |
02840644 |
cli_dbgmsg("ole2_convert_utf: cleaning out %zu bytes from incomplete utf-8 character length %zu\n", |
288057e9 |
bcnt, scnt); |
750bd0c9 |
for (; bcnt > 0; bcnt--, track++)
*track = '\0';
}
} |
f53bed0b |
return outbuf;
}
#if HAVE_ICONV
p1 = buf = cli_calloc(1, sz);
if (!(buf))
return NULL;
memcpy(buf, begin, sz);
inlen = sz;
/* encoding lookup if not specified */
if (!encoding) {
for (i = 0; i < NUMCODEPAGES; ++i) {
if (sctx->codepage == codepage_entries[i].codepage)
encoding = codepage_entries[i].encoding;
else if (sctx->codepage < codepage_entries[i].codepage) {
/* assuming sorted array */
break;
}
}
if (!encoding) {
cli_warnmsg("ole2_convert_utf: could not locate codepage encoding for %d\n", sctx->codepage);
sctx->flags |= OLE2_CODEPAGE_ERROR_NOTFOUND;
free(buf);
return NULL;
}
}
cd = iconv_open("UTF-8", encoding);
if (cd == (iconv_t)(-1)) { |
c00baa37 |
char errbuf[128]; |
288057e9 |
cli_strerror(errno, errbuf, sizeof(errbuf)); |
c00baa37 |
cli_errmsg("ole2_convert_utf: could not initialize iconv for encoding %s: %s\n", encoding, errbuf); |
f53bed0b |
sctx->flags |= OLE2_CODEPAGE_ERROR_UNINITED; |
288057e9 |
} else { |
f53bed0b |
offset = 0; |
27948a03 |
for (attempt = 1; attempt <= 3; ++attempt) { |
f53bed0b |
/* charset to UTF-8 should never exceed sz*6 */ |
288057e9 |
sz2 = (attempt * 2) * sz; |
f53bed0b |
/* use cli_realloc, reuse the buffer that has already been translated */ |
288057e9 |
outbuf = (char *)cli_realloc(outbuf, sz2 + 1); |
f53bed0b |
if (!outbuf) {
free(buf); |
50876732 |
iconv_close(cd); |
f53bed0b |
return NULL;
}
outlen = sz2 - offset; |
288057e9 |
p2 = outbuf + offset; |
f53bed0b |
/* conversion */
nonrev = iconv(cd, &p1, &inlen, &p2, &outlen);
if (errno == EILSEQ) {
cli_dbgmsg("ole2_convert_utf: input buffer contains invalid character for its encoding\n");
sctx->flags |= OLE2_CODEPAGE_ERROR_INVALID;
break; |
288057e9 |
} else if (errno == EINVAL && nonrev == (size_t)-1) { |
f53bed0b |
cli_dbgmsg("ole2_convert_utf: input buffer contains incomplete multibyte character\n");
sctx->flags |= OLE2_CODEPAGE_ERROR_INCOMPLETE;
break; |
288057e9 |
} else if (inlen == 0) { |
f53bed0b |
//cli_dbgmsg("ole2_convert_utf: input buffer is successfully translated\n");
break;
}
//outbuf[sz2 - outlen] = '\0';
//cli_dbgmsg("%u %s\n", inlen, outbuf);
offset = sz2 - outlen; |
27948a03 |
if (attempt < 3) |
f53bed0b |
cli_dbgmsg("ole2_convert_utf: outbuf is too small, resizing %llu -> %llu\n", |
288057e9 |
(long long unsigned)((attempt * 2) * sz), (long long unsigned)(((attempt + 1) * 2) * sz)); |
f53bed0b |
}
if (errno == E2BIG && nonrev == (size_t)-1) {
cli_dbgmsg("ole2_convert_utf: buffer could not be fully translated\n");
sctx->flags |= OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL;
}
outbuf[sz2 - outlen] = '\0';
}
iconv_close(cd);
free(buf);
#endif
/* this should force base64 encoding if NULL */
return outbuf;
}
static int
ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offset)
{
uint16_t proptype, padding;
int ret = CL_SUCCESS;
if (cli_json_timeout_cycle_check(sctx->ctx, &(sctx->toval)) != CL_SUCCESS) {
sctx->flags |= OLE2_SUMMARY_FLAG_TIMEOUT;
return CL_ETIMEOUT;
}
|
288057e9 |
if (offset + sizeof(proptype) + sizeof(padding) > sctx->pssize) { |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
return CL_EFORMAT;
}
|
288057e9 |
memcpy(&proptype, databuf + offset, sizeof(proptype));
offset += sizeof(proptype);
memcpy(&padding, databuf + offset, sizeof(padding));
offset += sizeof(padding); |
f53bed0b |
/* endian conversion */
proptype = sum16_endian_convert(proptype);
//cli_dbgmsg("proptype: 0x%04x\n", proptype);
if (padding != 0) {
cli_dbgmsg("ole2_process_property: invalid padding value, non-zero\n");
sctx->flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
return CL_EFORMAT;
}
switch (proptype) { |
288057e9 |
case PT_EMPTY:
case PT_NULL:
ret = cli_jsonnull(sctx->summary, sctx->propname);
break;
case PT_INT16: { |
f53bed0b |
int16_t dout; |
288057e9 |
if (offset + sizeof(dout) > sctx->pssize) { |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
return CL_EFORMAT;
} |
288057e9 |
memcpy(&dout, databuf + offset, sizeof(dout));
offset += sizeof(dout); |
f53bed0b |
/* endian conversion */
dout = sum16_endian_convert(dout);
if (sctx->writecp) {
sctx->codepage = (uint16_t)dout; |
288057e9 |
ret = cli_jsonint(sctx->summary, sctx->propname, sctx->codepage);
} else |
f53bed0b |
ret = cli_jsonint(sctx->summary, sctx->propname, dout);
break; |
288057e9 |
}
case PT_INT32:
case PT_INT32v1: { |
f53bed0b |
int32_t dout; |
288057e9 |
if (offset + sizeof(dout) > sctx->pssize) { |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
return CL_EFORMAT;
} |
288057e9 |
memcpy(&dout, databuf + offset, sizeof(dout));
offset += sizeof(dout); |
f53bed0b |
/* endian conversion */
dout = sum32_endian_convert(dout);
ret = cli_jsonint(sctx->summary, sctx->propname, dout);
break; |
288057e9 |
}
case PT_FLOAT32: /* review this please */
{ |
f53bed0b |
float dout; |
288057e9 |
if (offset + sizeof(dout) > sctx->pssize) { |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
return CL_EFORMAT;
} |
288057e9 |
memcpy(&dout, databuf + offset, sizeof(dout));
offset += sizeof(dout); |
f53bed0b |
/* endian conversion */
dout = sum32_endian_convert(dout);
ret = cli_jsondouble(sctx->summary, sctx->propname, dout);
break; |
288057e9 |
}
case PT_DATE:
case PT_DOUBLE64: /* review this please */
{ |
f53bed0b |
double dout; |
288057e9 |
if (offset + sizeof(dout) > sctx->pssize) { |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
return CL_EFORMAT;
} |
288057e9 |
memcpy(&dout, databuf + offset, sizeof(dout));
offset += sizeof(dout); |
f53bed0b |
/* endian conversion */
dout = sum64_endian_convert(dout);
ret = cli_jsondouble(sctx->summary, sctx->propname, dout);
break; |
288057e9 |
}
case PT_BOOL: { |
f53bed0b |
uint16_t dout; |
288057e9 |
if (offset + sizeof(dout) > sctx->pssize) { |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
return CL_EFORMAT;
} |
288057e9 |
memcpy(&dout, databuf + offset, sizeof(dout));
offset += sizeof(dout); |
f53bed0b |
/* no need for endian conversion */
ret = cli_jsonbool(sctx->summary, sctx->propname, dout);
break; |
288057e9 |
}
case PT_INT8v1: { |
f53bed0b |
int8_t dout; |
288057e9 |
if (offset + sizeof(dout) > sctx->pssize) { |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
return CL_EFORMAT;
} |
288057e9 |
memcpy(&dout, databuf + offset, sizeof(dout));
offset += sizeof(dout); |
f53bed0b |
/* no need for endian conversion */
ret = cli_jsonint(sctx->summary, sctx->propname, dout);
break; |
288057e9 |
}
case PT_UINT8: { |
f53bed0b |
uint8_t dout; |
288057e9 |
if (offset + sizeof(dout) > sctx->pssize) { |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
return CL_EFORMAT;
} |
288057e9 |
memcpy(&dout, databuf + offset, sizeof(dout));
offset += sizeof(dout); |
f53bed0b |
/* no need for endian conversion */
ret = cli_jsonint(sctx->summary, sctx->propname, dout);
break; |
288057e9 |
}
case PT_UINT16: { |
f53bed0b |
uint16_t dout; |
288057e9 |
if (offset + sizeof(dout) > sctx->pssize) { |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
return CL_EFORMAT;
} |
288057e9 |
memcpy(&dout, databuf + offset, sizeof(dout));
offset += sizeof(dout); |
f53bed0b |
/* endian conversion */
dout = sum16_endian_convert(dout);
if (sctx->writecp)
sctx->codepage = dout;
ret = cli_jsonint(sctx->summary, sctx->propname, dout);
break; |
288057e9 |
}
case PT_UINT32:
case PT_UINT32v1: { |
f53bed0b |
uint32_t dout; |
288057e9 |
if (offset + sizeof(dout) > sctx->pssize) { |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
return CL_EFORMAT;
} |
288057e9 |
memcpy(&dout, databuf + offset, sizeof(dout));
offset += sizeof(dout); |
f53bed0b |
/* endian conversion */
dout = sum32_endian_convert(dout);
ret = cli_jsonint(sctx->summary, sctx->propname, dout);
break; |
288057e9 |
}
case PT_INT64: { |
f53bed0b |
int64_t dout; |
288057e9 |
if (offset + sizeof(dout) > sctx->pssize) { |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
return CL_EFORMAT;
} |
288057e9 |
memcpy(&dout, databuf + offset, sizeof(dout));
offset += sizeof(dout); |
f53bed0b |
/* endian conversion */
dout = sum64_endian_convert(dout);
ret = cli_jsonint64(sctx->summary, sctx->propname, dout);
break; |
288057e9 |
}
case PT_UINT64: { |
f53bed0b |
uint64_t dout; |
288057e9 |
if (offset + sizeof(dout) > sctx->pssize) { |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
return CL_EFORMAT;
} |
288057e9 |
memcpy(&dout, databuf + offset, sizeof(dout));
offset += sizeof(dout); |
f53bed0b |
/* endian conversion */
dout = sum64_endian_convert(dout);
ret = cli_jsonint64(sctx->summary, sctx->propname, dout);
break;
} |
288057e9 |
case PT_BSTR:
case PT_LPSTR:
if (sctx->codepage == 0) {
cli_dbgmsg("ole2_propset_json: current codepage is unknown, cannot parse char stream\n");
sctx->flags |= OLE2_SUMMARY_FLAG_CODEPAGE;
} else {
uint32_t strsize;
char *outstr, *outstr2;
if (offset + sizeof(strsize) > sctx->pssize) {
sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
return CL_EFORMAT;
} |
f53bed0b |
|
288057e9 |
memcpy(&strsize, databuf + offset, sizeof(strsize));
offset += sizeof(strsize);
/* endian conversion? */
strsize = sum32_endian_convert(strsize); |
f53bed0b |
|
288057e9 |
if (offset + strsize > sctx->pssize) {
sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
return CL_EFORMAT;
} |
f53bed0b |
|
288057e9 |
/* limitation on string length */
if (strsize > PROPSTRLIMIT) {
cli_dbgmsg("ole2_process_property: property string sized %lu truncated to size %lu\n",
(unsigned long)strsize, (unsigned long)PROPSTRLIMIT);
sctx->flags |= OLE2_SUMMARY_FLAG_TRUNC_STR;
strsize = PROPSTRLIMIT;
} |
f53bed0b |
|
288057e9 |
outstr = cli_calloc(strsize + 1, 1); /* last char must be NULL */
if (!outstr) {
return CL_EMEM;
}
strncpy(outstr, (const char *)(databuf + offset), strsize); |
f53bed0b |
|
288057e9 |
/* conversion of various encodings to UTF-8 */
outstr2 = ole2_convert_utf(sctx, outstr, strsize, NULL); |
f53bed0b |
if (!outstr2) { |
288057e9 |
/* use base64 encoding when all else fails! */
char b64jstr[PROPSTRLIMIT];
/* outstr2 should be 4/3 times the original (rounded up) */
outstr2 = cl_base64_encode(outstr, strsize);
if (!outstr2) {
cli_dbgmsg("ole2_process_property: failed to convert to base64 string\n");
return CL_EMEM;
}
snprintf(b64jstr, PROPSTRLIMIT, "%s_base64", sctx->propname);
ret = cli_jsonbool(sctx->summary, b64jstr, 1);
if (ret != CL_SUCCESS)
return ret; |
f53bed0b |
}
|
288057e9 |
ret = cli_jsonstr(sctx->summary, sctx->propname, outstr2);
free(outstr);
free(outstr2); |
f53bed0b |
} |
288057e9 |
break;
case PT_LPWSTR: { |
f53bed0b |
uint32_t strsize;
char *outstr, *outstr2;
|
288057e9 |
if (offset + sizeof(strsize) > sctx->pssize) { |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
return CL_EFORMAT;
} |
288057e9 |
memcpy(&strsize, databuf + offset, sizeof(strsize));
offset += sizeof(strsize); |
f53bed0b |
/* endian conversion; wide strings are by length, not size (x2) */ |
288057e9 |
strsize = sum32_endian_convert(strsize) * 2; |
f53bed0b |
/* limitation on string length */ |
288057e9 |
if (strsize > (2 * PROPSTRLIMIT)) { |
f53bed0b |
cli_dbgmsg("ole2_process_property: property string sized %lu truncated to size %lu\n", |
288057e9 |
(unsigned long)strsize, (unsigned long)(2 * PROPSTRLIMIT)); |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_FLAG_TRUNC_STR; |
288057e9 |
strsize = (2 * PROPSTRLIMIT); |
f53bed0b |
}
|
288057e9 |
if (offset + strsize > sctx->pssize) { |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
return CL_EFORMAT;
} |
288057e9 |
outstr = cli_calloc(strsize + 2, 1); /* last two chars must be NULL */ |
f53bed0b |
if (!outstr) {
return CL_EMEM;
} |
288057e9 |
memcpy(outstr, (const char *)(databuf + offset), strsize); |
f53bed0b |
/* conversion of 16-width char strings (UTF-16 or UTF-16LE??) to UTF-8 */
outstr2 = ole2_convert_utf(sctx, outstr, strsize, UTF16_MS);
if (!outstr2) {
/* use base64 encoding when all else fails! */
char b64jstr[PROPSTRLIMIT];
outstr2 = cl_base64_encode(outstr, strsize);
if (!outstr2) {
free(outstr);
return CL_EMEM;
}
snprintf(b64jstr, PROPSTRLIMIT, "%s_base64", sctx->propname);
ret = cli_jsonbool(sctx->summary, b64jstr, 1); |
50876732 |
if (ret != CL_SUCCESS) {
free(outstr);
free(outstr2); |
f53bed0b |
return ret; |
50876732 |
} |
f53bed0b |
}
ret = cli_jsonstr(sctx->summary, sctx->propname, outstr2);
free(outstr);
free(outstr2);
break; |
288057e9 |
}
case PT_FILETIME: { |
f53bed0b |
uint32_t ltime, htime; |
288057e9 |
uint64_t wtime = 0, utime = 0; |
f53bed0b |
|
288057e9 |
if (offset + sizeof(ltime) + sizeof(htime) > sctx->pssize) { |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
return CL_EFORMAT;
} |
288057e9 |
memcpy(<ime, databuf + offset, sizeof(ltime));
offset += sizeof(ltime);
memcpy(&htime, databuf + offset, sizeof(htime));
offset += sizeof(ltime); |
f53bed0b |
ltime = sum32_endian_convert(ltime);
htime = sum32_endian_convert(htime);
/* UNIX timestamp formatting */
wtime = htime;
wtime <<= 32;
wtime |= ltime;
utime = wtime / 10000000;
utime -= 11644473600LL;
if ((uint32_t)((utime & 0xFFFFFFFF00000000) >> 32)) {
cli_dbgmsg("ole2_process_property: UNIX timestamp is larger than 32-bit number\n"); |
288057e9 |
} else { |
f53bed0b |
ret = cli_jsonint(sctx->summary, sctx->propname, (uint32_t)(utime & 0xFFFFFFFF));
}
break; |
288057e9 |
}
default:
cli_dbgmsg("ole2_process_property: unhandled property type 0x%04x for %s property\n",
proptype, sctx->propname);
sctx->flags |= OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE; |
f53bed0b |
}
return ret;
}
static void ole2_translate_docsummary_propid(summary_ctx_t *sctx, uint32_t propid)
{ |
288057e9 |
switch (propid) {
case DSPID_CODEPAGE:
sctx->writecp = 1; /* must be set ONLY for codepage */
sctx->propname = "CodePage";
break;
case DSPID_CATEGORY:
sctx->propname = "Category";
break;
case DSPID_PRESFORMAT:
sctx->propname = "PresentationTarget";
break;
case DSPID_BYTECOUNT:
sctx->propname = "Bytes";
break;
case DSPID_LINECOUNT:
sctx->propname = "Lines";
break;
case DSPID_PARCOUNT:
sctx->propname = "Paragraphs";
break;
case DSPID_SLIDECOUNT:
sctx->propname = "Slides";
break;
case DSPID_NOTECOUNT:
sctx->propname = "Notes";
break;
case DSPID_HIDDENCOUNT:
sctx->propname = "HiddenSlides";
break;
case DSPID_MMCLIPCOUNT:
sctx->propname = "MMClips";
break;
case DSPID_SCALE:
sctx->propname = "Scale";
break;
case DSPID_HEADINGPAIR: /* VT_VARIANT | VT_VECTOR */
sctx->propname = "HeadingPairs";
break;
case DSPID_DOCPARTS: /* VT_VECTOR | VT_LPSTR */
sctx->propname = "DocPartTitles";
break;
case DSPID_MANAGER:
sctx->propname = "Manager";
break;
case DSPID_COMPANY:
sctx->propname = "Company";
break;
case DSPID_LINKSDIRTY:
sctx->propname = "LinksDirty";
break;
case DSPID_CCHWITHSPACES:
sctx->propname = "Char&WSCount";
break;
case DSPID_SHAREDDOC: /* SHOULD BE FALSE! */
sctx->propname = "SharedDoc";
break;
case DSPID_LINKBASE: /* moved to user-defined */
sctx->propname = "LinkBase";
break;
case DSPID_HLINKS: /* moved to user-defined */
sctx->propname = "HyperLinks";
break;
case DSPID_HYPERLINKSCHANGED:
sctx->propname = "HyperLinksChanged";
break;
case DSPID_VERSION:
sctx->propname = "Version";
break;
case DSPID_DIGSIG:
sctx->propname = "DigitalSig";
break;
case DSPID_CONTENTTYPE:
sctx->propname = "ContentType";
break;
case DSPID_CONTENTSTATUS:
sctx->propname = "ContentStatus";
break;
case DSPID_LANGUAGE:
sctx->propname = "Language";
break;
case DSPID_DOCVERSION:
sctx->propname = "DocVersion";
break;
default:
cli_dbgmsg("ole2_docsum_propset_json: unrecognized propid!\n");
sctx->flags |= OLE2_SUMMARY_FLAG_UNKNOWN_PROPID; |
f53bed0b |
}
}
static void ole2_translate_summary_propid(summary_ctx_t *sctx, uint32_t propid)
{ |
288057e9 |
switch (propid) {
case SPID_CODEPAGE:
sctx->writecp = 1; /* must be set ONLY for codepage */
sctx->propname = "CodePage";
break;
case SPID_TITLE:
sctx->propname = "Title";
break;
case SPID_SUBJECT:
sctx->propname = "Subject";
break;
case SPID_AUTHOR:
sctx->propname = "Author";
break;
case SPID_KEYWORDS:
sctx->propname = "Keywords";
break;
case SPID_COMMENTS:
sctx->propname = "Comments";
break;
case SPID_TEMPLATE:
sctx->propname = "Template";
break;
case SPID_LASTAUTHOR:
sctx->propname = "LastAuthor";
break;
case SPID_REVNUMBER:
sctx->propname = "RevNumber";
break;
case SPID_EDITTIME:
sctx->propname = "EditTime";
break;
case SPID_LASTPRINTED:
sctx->propname = "LastPrinted";
break;
case SPID_CREATEDTIME:
sctx->propname = "CreatedTime";
break;
case SPID_MODIFIEDTIME:
sctx->propname = "ModifiedTime";
break;
case SPID_PAGECOUNT:
sctx->propname = "PageCount";
break;
case SPID_WORDCOUNT:
sctx->propname = "WordCount";
break;
case SPID_CHARCOUNT:
sctx->propname = "CharCount";
break;
case SPID_THUMBNAIL:
sctx->propname = "Thumbnail";
break;
case SPID_APPNAME:
sctx->propname = "AppName";
break;
case SPID_SECURITY:
sctx->propname = "Security";
break;
default:
cli_dbgmsg("ole2_translate_summary_propid: unrecognized propid!\n");
sctx->flags |= OLE2_SUMMARY_FLAG_UNKNOWN_PROPID; |
f53bed0b |
}
}
static int ole2_summary_propset_json(summary_ctx_t *sctx, off_t offset)
{
unsigned char *hdr, *ps;
uint32_t numprops, limitprops;
off_t foff = offset, psoff = 0;
uint32_t poffset;
int ret;
unsigned int i;
cli_dbgmsg("in ole2_summary_propset_json\n");
/* summary ctx propset-specific setup*/
sctx->codepage = 0; |
288057e9 |
sctx->writecp = 0; |
f53bed0b |
sctx->propname = NULL;
/* examine property set metadata */ |
288057e9 |
if ((foff + (2 * sizeof(uint32_t))) > sctx->maplen) { |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
return CL_EFORMAT;
} |
288057e9 |
hdr = (unsigned char *)fmap_need_off_once(sctx->sfmap, foff, (2 * sizeof(uint32_t))); |
f53bed0b |
if (!hdr) {
sctx->flags |= OLE2_SUMMARY_ERROR_DATABUF;
return CL_EREAD;
}
//foff+=(2*sizeof(uint32_t)); // keep foff pointing to start of propset segment |
288057e9 |
psoff += (2 * sizeof(uint32_t)); |
f53bed0b |
memcpy(&(sctx->pssize), hdr, sizeof(sctx->pssize)); |
288057e9 |
memcpy(&numprops, hdr + sizeof(sctx->pssize), sizeof(numprops)); |
f53bed0b |
/* endian conversion */
sctx->pssize = sum32_endian_convert(sctx->pssize); |
288057e9 |
numprops = sum32_endian_convert(numprops); |
f53bed0b |
cli_dbgmsg("ole2_summary_propset_json: pssize: %u, numprops: %u\n", sctx->pssize, numprops);
if (numprops > PROPCNTLIMIT) {
sctx->flags |= OLE2_SUMMARY_LIMIT_PROPS;
limitprops = PROPCNTLIMIT; |
288057e9 |
} else { |
f53bed0b |
limitprops = numprops;
} |
7cd9337a |
cli_dbgmsg("ole2_summary_propset_json: processing %u of %u (%u max) properties\n", |
f53bed0b |
limitprops, numprops, PROPCNTLIMIT);
/* extract remaining fragment of propset */ |
288057e9 |
if ((size_t)(foff + (sctx->pssize)) > (size_t)(sctx->maplen)) { |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
return CL_EFORMAT;
} |
288057e9 |
ps = (unsigned char *)fmap_need_off_once(sctx->sfmap, foff, sctx->pssize); |
f53bed0b |
if (!ps) {
sctx->flags |= OLE2_SUMMARY_ERROR_DATABUF;
return CL_EREAD;
}
/* iterate over the properties */
for (i = 0; i < limitprops; ++i) {
uint32_t propid, propoff;
|
288057e9 |
if (psoff + sizeof(propid) + sizeof(poffset) > sctx->pssize) { |
f53bed0b |
sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
return CL_EFORMAT;
} |
288057e9 |
memcpy(&propid, ps + psoff, sizeof(propid));
psoff += sizeof(propid);
memcpy(&propoff, ps + psoff, sizeof(propoff));
psoff += sizeof(propoff); |
f53bed0b |
/* endian conversion */ |
288057e9 |
propid = sum32_endian_convert(propid); |
f53bed0b |
propoff = sum32_endian_convert(propoff);
cli_dbgmsg("ole2_summary_propset_json: propid: 0x%08x, propoff: %u\n", propid, propoff);
|
288057e9 |
sctx->propname = NULL;
sctx->writecp = 0; |
9a1232d2 |
switch (sctx->mode) { |
288057e9 |
case 1:
ole2_translate_docsummary_propid(sctx, propid);
break;
default:
ole2_translate_summary_propid(sctx, propid); |
9a1232d2 |
} |
f53bed0b |
if (sctx->propname != NULL) {
ret = ole2_process_property(sctx, ps, propoff);
if (ret != CL_SUCCESS)
return ret; |
288057e9 |
} else { |
f53bed0b |
/* add unknown propid flag */
}
}
return CL_SUCCESS;
}
static int cli_ole2_summary_json_cleanup(summary_ctx_t *sctx, int retcode)
{
json_object *jarr;
cli_dbgmsg("in cli_ole2_summary_json_cleanup: %d[%x]\n", retcode, sctx->flags);
if (sctx->sfmap) {
funmap(sctx->sfmap);
}
if (sctx->flags) {
jarr = cli_jsonarray(sctx->summary, "ParseErrors");
/* summary errors */
if (sctx->flags & OLE2_SUMMARY_ERROR_TOOSMALL) {
cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_TOOSMALL");
}
if (sctx->flags & OLE2_SUMMARY_ERROR_OOB) {
cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_OOB");
}
if (sctx->flags & OLE2_SUMMARY_ERROR_DATABUF) {
cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_DATABUF");
}
if (sctx->flags & OLE2_SUMMARY_ERROR_INVALID_ENTRY) {
cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_INVALID_ENTRY");
}
if (sctx->flags & OLE2_SUMMARY_LIMIT_PROPS) {
cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_LIMIT_PROPS");
}
if (sctx->flags & OLE2_SUMMARY_FLAG_TIMEOUT) {
cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_TIMEOUT");
}
if (sctx->flags & OLE2_SUMMARY_FLAG_CODEPAGE) {
cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_CODEPAGE");
}
if (sctx->flags & OLE2_SUMMARY_FLAG_UNKNOWN_PROPID) {
cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_UNKNOWN_PROPID");
}
if (sctx->flags & OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE) {
cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE");
}
if (sctx->flags & OLE2_SUMMARY_FLAG_TRUNC_STR) {
cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_TRUNC_STR");
}
/* codepage translation errors */
if (sctx->flags & OLE2_CODEPAGE_ERROR_NOTFOUND) {
cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_NOTFOUND");
}
if (sctx->flags & OLE2_CODEPAGE_ERROR_UNINITED) {
cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_UNINITED");
}
if (sctx->flags & OLE2_CODEPAGE_ERROR_INVALID) {
cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_INVALID");
}
if (sctx->flags & OLE2_CODEPAGE_ERROR_INCOMPLETE) {
cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_INCOMPLETE");
}
if (sctx->flags & OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL) {
cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL");
}
}
return retcode;
}
int cli_ole2_summary_json(cli_ctx *ctx, int fd, int mode)
{
summary_ctx_t sctx;
STATBUF statbuf;
off_t foff = 0;
unsigned char *databuf;
summary_stub_t sumstub;
propset_entry_t pentry;
int ret = CL_SUCCESS;
cli_dbgmsg("in cli_ole2_summary_json\n");
/* preliminary sanity checks */
if (ctx == NULL) {
return CL_ENULLARG;
}
if (fd < 0) {
cli_dbgmsg("ole2_summary_json: invalid file descriptor\n");
return CL_ENULLARG; /* placeholder */
}
|
b11c3e40 |
if (mode < 0 || mode > 2) { |
f53bed0b |
cli_dbgmsg("ole2_summary_json: invalid mode specified\n");
return CL_ENULLARG; /* placeholder */
}
/* summary ctx setup */
memset(&sctx, 0, sizeof(sctx)); |
288057e9 |
sctx.ctx = ctx; |
f53bed0b |
sctx.mode = mode;
if (FSTAT(fd, &statbuf) == -1) {
cli_dbgmsg("ole2_summary_json: cannot stat file descriptor\n");
return CL_ESTAT;
}
sctx.sfmap = fmap(fd, 0, statbuf.st_size);
if (!sctx.sfmap) {
cli_dbgmsg("ole2_summary_json: failed to get fmap\n");
return CL_EMAP;
}
sctx.maplen = sctx.sfmap->len; |
ce2dcb53 |
cli_dbgmsg("ole2_summary_json: streamsize: %zu\n", sctx.maplen); |
f53bed0b |
|
9a1232d2 |
switch (mode) { |
288057e9 |
case 1:
sctx.summary = cli_jsonobj(ctx->wrkproperty, "DocSummaryInfo");
break;
case 2:
sctx.summary = cli_jsonobj(ctx->wrkproperty, "Hwp5SummaryInfo");
break;
case 0:
default:
sctx.summary = cli_jsonobj(ctx->wrkproperty, "SummaryInfo");
break; |
9a1232d2 |
}
|
f53bed0b |
if (!sctx.summary) {
cli_errmsg("ole2_summary_json: no memory for json object.\n");
return cli_ole2_summary_json_cleanup(&sctx, CL_EMEM);
}
sctx.codepage = 0; |
288057e9 |
sctx.writecp = 0; |
f53bed0b |
/* acquire property stream metadata */
if (sctx.maplen < sizeof(summary_stub_t)) {
sctx.flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
} |
288057e9 |
databuf = (unsigned char *)fmap_need_off_once(sctx.sfmap, foff, sizeof(summary_stub_t)); |
f53bed0b |
if (!databuf) {
sctx.flags |= OLE2_SUMMARY_ERROR_DATABUF;
return cli_ole2_summary_json_cleanup(&sctx, CL_EREAD);
}
foff += sizeof(summary_stub_t);
memcpy(&sumstub, databuf, sizeof(summary_stub_t));
/* endian conversion and checks */
sumstub.byte_order = le16_to_host(sumstub.byte_order);
if (sumstub.byte_order != 0xfffe) {
cli_dbgmsg("ole2_summary_json: byteorder 0x%x is invalid\n", sumstub.byte_order);
sctx.flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY; |
27948a03 |
return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT); |
f53bed0b |
} |
288057e9 |
sumstub.version = sum16_endian_convert(sumstub.version); /*unused*/
sumstub.system = sum32_endian_convert(sumstub.system); /*unused*/ |
f53bed0b |
sumstub.num_propsets = sum32_endian_convert(sumstub.num_propsets);
if (sumstub.num_propsets != 1 && sumstub.num_propsets != 2) {
cli_dbgmsg("ole2_summary_json: invalid number of property sets\n");
sctx.flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
}
cli_dbgmsg("ole2_summary_json: byteorder 0x%x\n", sumstub.byte_order);
cli_dbgmsg("ole2_summary_json: %u property set(s) detected\n", sumstub.num_propsets);
/* first property set (index=0) is always SummaryInfo or DocSummaryInfo */ |
288057e9 |
if ((sctx.maplen - foff) < sizeof(propset_entry_t)) { |
f53bed0b |
sctx.flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
} |
288057e9 |
databuf = (unsigned char *)fmap_need_off_once(sctx.sfmap, foff, sizeof(propset_entry_t)); |
f53bed0b |
if (!databuf) {
sctx.flags |= OLE2_SUMMARY_ERROR_DATABUF;
return cli_ole2_summary_json_cleanup(&sctx, CL_EREAD);
}
foff += sizeof(propset_entry_t);
memcpy(&pentry, databuf, sizeof(propset_entry_t));
/* endian conversion */
pentry.offset = sum32_endian_convert(pentry.offset);
if ((ret = ole2_summary_propset_json(&sctx, pentry.offset)) != CL_SUCCESS) {
return cli_ole2_summary_json_cleanup(&sctx, ret);
}
/* second property set (index=1) is always a custom property set (if present) */
if (sumstub.num_propsets == 2) {
cli_jsonbool(ctx->wrkproperty, "HasUserDefinedProperties", 1);
}
return cli_ole2_summary_json_cleanup(&sctx, CL_SUCCESS);
}
#endif /* HAVE_JSON */ |