...
|
...
|
@@ -1323,7 +1323,8 @@ abort:
|
1323
|
1323
|
|
1324
|
1324
|
#define WINUNICODE 0x04B0
|
1325
|
1325
|
#define PROPCNTLIMIT 25
|
1326
|
|
-#define PROPSTRLIMIT 128 /* affects property strs, NOT sanitized strs (may result in a buffer allocating PROPSTRLIMIT*6) */
|
|
1326
|
+#define PROPSTRLIMIT 256 /* affects property strs, NOT sanitized strs (may result in a buffer allocating PROPSTRLIMIT*6) */
|
|
1327
|
+#define UTF16_MS "UTF-16LE"
|
1327
|
1328
|
|
1328
|
1329
|
#define sum16_endian_convert(v) le16_to_host((uint16_t)(v))
|
1329
|
1330
|
#define sum32_endian_convert(v) le32_to_host((uint32_t)(v))
|
...
|
...
|
@@ -1449,7 +1450,7 @@ typedef struct summary_ctx {
|
1449
|
1449
|
|
1450
|
1450
|
/* propset metadata */
|
1451
|
1451
|
uint32_t pssize; /* track from propset start, not tail start */
|
1452
|
|
- int16_t codepage;
|
|
1452
|
+ uint16_t codepage;
|
1453
|
1453
|
int writecp;
|
1454
|
1454
|
|
1455
|
1455
|
/* property metadata */
|
...
|
...
|
@@ -1465,8 +1466,9 @@ struct codepage_entry {
|
1465
|
1465
|
const char *encoding;
|
1466
|
1466
|
};
|
1467
|
1467
|
|
1468
|
|
-#define NUMCODEPAGES 152
|
1469
|
|
-static const struct codepage_entry codepage_entries[NUMCODEPAGES] = {
|
|
1468
|
+#define NUMCODEPAGES sizeof(codepage_entries)/sizeof(struct codepage_entry)
|
|
1469
|
+/* MAINTAIN - the array in codepage value sorted order */
|
|
1470
|
+static const struct codepage_entry codepage_entries[] = {
|
1470
|
1471
|
{ 37, "IBM037" }, /* IBM EBCDIC US-Canada */
|
1471
|
1472
|
{ 437, "IBM437" }, /* OEM United States */
|
1472
|
1473
|
{ 500, "IBM500" }, /* IBM EBCDIC International */
|
...
|
...
|
@@ -1624,20 +1626,26 @@ static const struct codepage_entry codepage_entries[NUMCODEPAGES] = {
|
1624
|
1624
|
static char *
|
1625
|
1625
|
ole2_convert_utf(summary_ctx_t *sctx, char *begin, size_t sz, const char *encoding)
|
1626
|
1626
|
{
|
|
1627
|
+ char *outbuf=NULL;
|
1627
|
1628
|
#if HAVE_ICONV
|
1628
|
|
- char *res=NULL;
|
1629
|
|
- char *buf, *outbuf, *p1, *p2;
|
|
1629
|
+ char *buf, *p1, *p2;
|
|
1630
|
+ off_t offset;
|
1630
|
1631
|
size_t inlen, outlen, nonrev, sz2;
|
1631
|
1632
|
int i, try;
|
1632
|
1633
|
iconv_t cd;
|
|
1634
|
+#endif
|
|
1635
|
+ /* applies in the both case */
|
|
1636
|
+ if (sctx->codepage == 20127 || sctx->codepage == 65001) {
|
|
1637
|
+ outbuf = cli_strdup(begin);
|
|
1638
|
+ return outbuf;
|
|
1639
|
+ }
|
1633
|
1640
|
|
1634
|
|
- buf = cli_calloc(1, sz);
|
|
1641
|
+#if HAVE_ICONV
|
|
1642
|
+ p1 = buf = cli_calloc(1, sz);
|
1635
|
1643
|
if (!(buf))
|
1636
|
1644
|
return NULL;
|
1637
|
1645
|
|
1638
|
1646
|
memcpy(buf, begin, sz);
|
1639
|
|
-
|
1640
|
|
- outbuf = NULL;
|
1641
|
1647
|
inlen = sz;
|
1642
|
1648
|
|
1643
|
1649
|
/* encoding lookup if not specified */
|
...
|
...
|
@@ -1665,19 +1673,22 @@ ole2_convert_utf(summary_ctx_t *sctx, char *begin, size_t sz, const char *encodi
|
1665
|
1665
|
sctx->flags |= OLE2_CODEPAGE_ERROR_UNINITED;
|
1666
|
1666
|
}
|
1667
|
1667
|
else {
|
|
1668
|
+ offset = 0;
|
1668
|
1669
|
for (try = 1; try <= 3; ++try) {
|
1669
|
|
- p1 = buf;
|
1670
|
|
-
|
1671
|
|
- if (outbuf)
|
1672
|
|
- free(outbuf);
|
1673
|
|
- outlen = sz2 = (try*2) * sz;
|
1674
|
|
- p2 = outbuf = cli_calloc(1, sz2);
|
|
1670
|
+ /* charset to UTF-8 should never exceed sz*6 */
|
|
1671
|
+ sz2 = (try*2) * sz;
|
|
1672
|
+ /* use cli_realloc, reuse the buffer that has already been translated */
|
|
1673
|
+ outbuf = (char *)cli_realloc(outbuf, sz2+1);
|
1675
|
1674
|
if (!outbuf) {
|
1676
|
1675
|
free(buf);
|
1677
|
1676
|
return NULL;
|
1678
|
1677
|
}
|
1679
|
1678
|
|
1680
|
|
- nonrev = iconv(cd, (char **)(&p1), &inlen, &p2, &outlen);
|
|
1679
|
+ outlen = sz2 - offset;
|
|
1680
|
+ p2 = outbuf + offset;
|
|
1681
|
+
|
|
1682
|
+ /* conversion */
|
|
1683
|
+ nonrev = iconv(cd, &p1, &inlen, &p2, &outlen);
|
1681
|
1684
|
|
1682
|
1685
|
if (errno == EILSEQ) {
|
1683
|
1686
|
cli_dbgmsg("ole2_convert_utf: input buffer contains invalid character for its encoding\n");
|
...
|
...
|
@@ -1694,27 +1705,28 @@ ole2_convert_utf(summary_ctx_t *sctx, char *begin, size_t sz, const char *encodi
|
1694
|
1694
|
break;
|
1695
|
1695
|
}
|
1696
|
1696
|
|
1697
|
|
- cli_dbgmsg("ole2_convert_utf: outbuf is too small, resizing %llu -> %llu\n",
|
1698
|
|
- (long long unsigned)((try*2) * sz), (long long unsigned)(((try+1)*2) * sz));
|
|
1697
|
+ //outbuf[sz2 - outlen] = '\0';
|
|
1698
|
+ //cli_dbgmsg("%u %s\n", inlen, outbuf);
|
|
1699
|
+
|
|
1700
|
+ offset = sz2 - outlen;
|
|
1701
|
+ if (try < 3)
|
|
1702
|
+ cli_dbgmsg("ole2_convert_utf: outbuf is too small, resizing %llu -> %llu\n",
|
|
1703
|
+ (long long unsigned)((try*2) * sz), (long long unsigned)(((try+1)*2) * sz));
|
1699
|
1704
|
}
|
1700
|
1705
|
|
1701
|
|
- if (inlen != 0 || (errno == E2BIG && nonrev == (size_t)-1)) {
|
|
1706
|
+ if (errno == E2BIG && nonrev == (size_t)-1) {
|
1702
|
1707
|
cli_dbgmsg("ole2_convert_utf: buffer could not be fully translated\n");
|
1703
|
1708
|
sctx->flags |= OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL;
|
1704
|
1709
|
}
|
1705
|
1710
|
|
1706
|
1711
|
outbuf[sz2 - outlen] = '\0';
|
1707
|
|
- res = strdup(outbuf);
|
1708
|
1712
|
}
|
1709
|
1713
|
|
1710
|
1714
|
iconv_close(cd);
|
1711
|
1715
|
free(buf);
|
1712
|
|
- free(outbuf);
|
1713
|
|
- return res;
|
1714
|
|
-#else
|
1715
|
|
- /* this should force base64 encoding */
|
1716
|
|
- return NULL;
|
1717
|
1716
|
#endif
|
|
1717
|
+ /* this should force base64 encoding if NULL */
|
|
1718
|
+ return outbuf;
|
1718
|
1719
|
}
|
1719
|
1720
|
|
1720
|
1721
|
static int
|
...
|
...
|
@@ -1764,10 +1776,12 @@ ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offs
|
1764
|
1764
|
/* endian conversion */
|
1765
|
1765
|
dout = sum16_endian_convert(dout);
|
1766
|
1766
|
|
1767
|
|
- if (sctx->writecp)
|
1768
|
|
- sctx->codepage = dout;
|
1769
|
|
-
|
1770
|
|
- ret = cli_jsonint(sctx->summary, sctx->propname, dout);
|
|
1767
|
+ if (sctx->writecp) {
|
|
1768
|
+ sctx->codepage = (uint16_t)dout;
|
|
1769
|
+ ret = cli_jsonint(sctx->summary, sctx->propname, sctx->codepage);
|
|
1770
|
+ }
|
|
1771
|
+ else
|
|
1772
|
+ ret = cli_jsonint(sctx->summary, sctx->propname, dout);
|
1771
|
1773
|
break;
|
1772
|
1774
|
}
|
1773
|
1775
|
case PT_INT32:
|
...
|
...
|
@@ -1928,9 +1942,8 @@ ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offs
|
1928
|
1928
|
if (sctx->codepage == 0) {
|
1929
|
1929
|
cli_dbgmsg("ole2_propset_json: current codepage is unknown, cannot parse char stream\n");
|
1930
|
1930
|
sctx->flags |= OLE2_SUMMARY_FLAG_CODEPAGE;
|
1931
|
|
- break;
|
1932
|
1931
|
}
|
1933
|
|
- else if (sctx->codepage != WINUNICODE) {
|
|
1932
|
+ else {
|
1934
|
1933
|
uint32_t strsize;
|
1935
|
1934
|
char *outstr, *outstr2;
|
1936
|
1935
|
|
...
|
...
|
@@ -1941,7 +1954,7 @@ ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offs
|
1941
|
1941
|
|
1942
|
1942
|
memcpy(&strsize, databuf+offset, sizeof(strsize));
|
1943
|
1943
|
offset+=sizeof(strsize);
|
1944
|
|
- /* endian conversion */
|
|
1944
|
+ /* endian conversion? */
|
1945
|
1945
|
strsize = sum32_endian_convert(strsize);
|
1946
|
1946
|
|
1947
|
1947
|
if (offset+strsize > sctx->pssize) {
|
...
|
...
|
@@ -1969,9 +1982,10 @@ ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offs
|
1969
|
1969
|
/* use base64 encoding when all else fails! */
|
1970
|
1970
|
char b64jstr[PROPSTRLIMIT];
|
1971
|
1971
|
|
|
1972
|
+ /* outstr2 should be 4/3 times the original (rounded up) */
|
1972
|
1973
|
outstr2 = cl_base64_encode(outstr, strsize);
|
1973
|
1974
|
if (!outstr2) {
|
1974
|
|
- free(outstr);
|
|
1975
|
+ cli_dbgmsg("ole2_process_property: failed to convert to base64 string\n");
|
1975
|
1976
|
return CL_EMEM;
|
1976
|
1977
|
}
|
1977
|
1978
|
|
...
|
...
|
@@ -1984,9 +1998,8 @@ ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offs
|
1984
|
1984
|
ret = cli_jsonstr(sctx->summary, sctx->propname, outstr2);
|
1985
|
1985
|
free(outstr);
|
1986
|
1986
|
free(outstr2);
|
1987
|
|
- break;
|
1988
|
1987
|
}
|
1989
|
|
- /* fall-through for unicode strings */
|
|
1988
|
+ break;
|
1990
|
1989
|
case PT_LPWSTR:
|
1991
|
1990
|
{
|
1992
|
1991
|
uint32_t strsize;
|
...
|
...
|
@@ -1998,19 +2011,8 @@ ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offs
|
1998
|
1998
|
}
|
1999
|
1999
|
memcpy(&strsize, databuf+offset, sizeof(strsize));
|
2000
|
2000
|
offset+=sizeof(strsize);
|
2001
|
|
- /* endian conversion */
|
2002
|
|
- strsize = sum32_endian_convert(strsize);
|
2003
|
|
-
|
2004
|
|
- if (proptype == PT_LPSTR) { /* fall-through specifics */
|
2005
|
|
- if (strsize % 2) {
|
2006
|
|
- cli_dbgmsg("ole2_process_property: LPSTR using wchar not sized a multiple of 2\n");
|
2007
|
|
- sctx->flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
|
2008
|
|
- return CL_EFORMAT;
|
2009
|
|
- }
|
2010
|
|
- }
|
2011
|
|
- else {
|
2012
|
|
- strsize*=2; /* Unicode strings are by length, not size */
|
2013
|
|
- }
|
|
2001
|
+ /* endian conversion; wide strings are by length, not size (x2) */
|
|
2002
|
+ strsize = sum32_endian_convert(strsize)*2;
|
2014
|
2003
|
|
2015
|
2004
|
/* limitation on string length */
|
2016
|
2005
|
if (strsize > (2*PROPSTRLIMIT)) {
|
...
|
...
|
@@ -2029,8 +2031,8 @@ ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offs
|
2029
|
2029
|
return CL_EMEM;
|
2030
|
2030
|
}
|
2031
|
2031
|
memcpy(outstr, (const char *)(databuf+offset), strsize);
|
2032
|
|
- /* conversion of 16-width char strings to UTF-8 */
|
2033
|
|
- outstr2 = ole2_convert_utf(sctx, outstr, strsize, "UTF-16");
|
|
2032
|
+ /* conversion of 16-width char strings (UTF-16 or UTF-16LE??) to UTF-8 */
|
|
2033
|
+ outstr2 = ole2_convert_utf(sctx, outstr, strsize, UTF16_MS);
|
2034
|
2034
|
if (!outstr2) {
|
2035
|
2035
|
/* use base64 encoding when all else fails! */
|
2036
|
2036
|
char b64jstr[PROPSTRLIMIT];
|