Browse code

ole2str sanitation code review changes: - ole2 string conversion now reuses previous translation (realloc) - ole2 codepages are stored and reported as uint16_t from int16_t - added ole2 string exception when source is UTF-8 or US-ASCII - various other code fixes

Kevin Lin authored on 2014/11/01 06:37:25
Showing 1 changed files
... ...
@@ -1323,7 +1323,8 @@ abort:
1323 1323
 
1324 1324
 #define WINUNICODE 0x04B0
1325 1325
 #define PROPCNTLIMIT 25
1326
-#define PROPSTRLIMIT 128 /* affects property strs, NOT sanitized strs (may result in a buffer allocating PROPSTRLIMIT*6) */
1326
+#define PROPSTRLIMIT 256 /* affects property strs, NOT sanitized strs (may result in a buffer allocating PROPSTRLIMIT*6) */
1327
+#define UTF16_MS "UTF-16LE"
1327 1328
 
1328 1329
 #define sum16_endian_convert(v) le16_to_host((uint16_t)(v))
1329 1330
 #define sum32_endian_convert(v) le32_to_host((uint32_t)(v))
... ...
@@ -1449,7 +1450,7 @@ typedef struct summary_ctx {
1449 1449
 
1450 1450
     /* propset metadata */
1451 1451
     uint32_t pssize; /* track from propset start, not tail start */
1452
-    int16_t codepage;
1452
+    uint16_t codepage;
1453 1453
     int writecp;
1454 1454
 
1455 1455
     /* property metadata */
... ...
@@ -1465,8 +1466,9 @@ struct codepage_entry {
1465 1465
     const char *encoding;
1466 1466
 };
1467 1467
 
1468
-#define NUMCODEPAGES 152
1469
-static const struct codepage_entry codepage_entries[NUMCODEPAGES] = {
1468
+#define NUMCODEPAGES sizeof(codepage_entries)/sizeof(struct codepage_entry)
1469
+/* MAINTAIN - the array in codepage value sorted order */
1470
+static const struct codepage_entry codepage_entries[] = {
1470 1471
     { 37,    "IBM037" },      /* IBM EBCDIC US-Canada */
1471 1472
     { 437,   "IBM437" },      /* OEM United States */
1472 1473
     { 500,   "IBM500" },      /* IBM EBCDIC International */
... ...
@@ -1624,20 +1626,26 @@ static const struct codepage_entry codepage_entries[NUMCODEPAGES] = {
1624 1624
 static char *
1625 1625
 ole2_convert_utf(summary_ctx_t *sctx, char *begin, size_t sz, const char *encoding)
1626 1626
 {
1627
+    char *outbuf=NULL;
1627 1628
 #if HAVE_ICONV
1628
-    char *res=NULL;
1629
-    char *buf, *outbuf, *p1, *p2;
1629
+    char *buf, *p1, *p2;
1630
+    off_t offset;
1630 1631
     size_t inlen, outlen, nonrev, sz2;
1631 1632
     int i, try;
1632 1633
     iconv_t cd;
1634
+#endif
1635
+    /* applies in the both case */
1636
+    if (sctx->codepage == 20127 || sctx->codepage == 65001) {
1637
+        outbuf = cli_strdup(begin);
1638
+        return outbuf;
1639
+    }
1633 1640
 
1634
-    buf = cli_calloc(1, sz);
1641
+#if HAVE_ICONV
1642
+    p1 = buf = cli_calloc(1, sz);
1635 1643
     if (!(buf))
1636 1644
         return NULL;
1637 1645
 
1638 1646
     memcpy(buf, begin, sz);
1639
-
1640
-    outbuf = NULL;
1641 1647
     inlen = sz;
1642 1648
 
1643 1649
     /* encoding lookup if not specified */
... ...
@@ -1665,19 +1673,22 @@ ole2_convert_utf(summary_ctx_t *sctx, char *begin, size_t sz, const char *encodi
1665 1665
         sctx->flags |= OLE2_CODEPAGE_ERROR_UNINITED;
1666 1666
     }
1667 1667
     else {
1668
+        offset = 0;
1668 1669
         for (try = 1; try <= 3; ++try) {
1669
-            p1 = buf;
1670
-
1671
-            if (outbuf)
1672
-                free(outbuf);
1673
-            outlen = sz2 = (try*2) * sz;
1674
-            p2 = outbuf = cli_calloc(1, sz2);
1670
+            /* charset to UTF-8 should never exceed sz*6 */
1671
+            sz2 = (try*2) * sz;
1672
+            /* use cli_realloc, reuse the buffer that has already been translated */
1673
+            outbuf = (char *)cli_realloc(outbuf, sz2+1);
1675 1674
             if (!outbuf) {
1676 1675
                 free(buf);
1677 1676
                 return NULL;
1678 1677
             }
1679 1678
 
1680
-            nonrev = iconv(cd, (char **)(&p1), &inlen, &p2, &outlen);
1679
+            outlen = sz2 - offset;
1680
+            p2 = outbuf + offset;
1681
+
1682
+            /* conversion */
1683
+            nonrev = iconv(cd, &p1, &inlen, &p2, &outlen);
1681 1684
 
1682 1685
             if (errno == EILSEQ) {
1683 1686
                 cli_dbgmsg("ole2_convert_utf: input buffer contains invalid character for its encoding\n");
... ...
@@ -1694,27 +1705,28 @@ ole2_convert_utf(summary_ctx_t *sctx, char *begin, size_t sz, const char *encodi
1694 1694
                 break;
1695 1695
             }
1696 1696
 
1697
-            cli_dbgmsg("ole2_convert_utf: outbuf is too small, resizing %llu -> %llu\n",
1698
-                       (long long unsigned)((try*2) * sz), (long long unsigned)(((try+1)*2) * sz));
1697
+            //outbuf[sz2 - outlen] = '\0';
1698
+            //cli_dbgmsg("%u %s\n", inlen, outbuf);
1699
+
1700
+            offset = sz2 - outlen;
1701
+            if (try < 3)
1702
+                cli_dbgmsg("ole2_convert_utf: outbuf is too small, resizing %llu -> %llu\n",
1703
+                           (long long unsigned)((try*2) * sz), (long long unsigned)(((try+1)*2) * sz));
1699 1704
         }
1700 1705
 
1701
-        if (inlen != 0 || (errno == E2BIG && nonrev == (size_t)-1)) {
1706
+        if (errno == E2BIG && nonrev == (size_t)-1) {
1702 1707
             cli_dbgmsg("ole2_convert_utf: buffer could not be fully translated\n");
1703 1708
             sctx->flags |= OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL;
1704 1709
         }
1705 1710
 
1706 1711
         outbuf[sz2 - outlen] = '\0';
1707
-        res = strdup(outbuf);
1708 1712
     }
1709 1713
 
1710 1714
     iconv_close(cd);
1711 1715
     free(buf);
1712
-    free(outbuf);
1713
-    return res;
1714
-#else
1715
-    /* this should force base64 encoding */
1716
-    return NULL;
1717 1716
 #endif
1717
+    /* this should force base64 encoding if NULL */
1718
+    return outbuf;
1718 1719
 }
1719 1720
 
1720 1721
 static int
... ...
@@ -1764,10 +1776,12 @@ ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offs
1764 1764
             /* endian conversion */
1765 1765
             dout = sum16_endian_convert(dout);
1766 1766
 
1767
-            if (sctx->writecp)
1768
-                sctx->codepage = dout;
1769
-
1770
-            ret = cli_jsonint(sctx->summary, sctx->propname, dout);
1767
+            if (sctx->writecp) {
1768
+                sctx->codepage = (uint16_t)dout;
1769
+                ret = cli_jsonint(sctx->summary, sctx->propname, sctx->codepage);
1770
+            }
1771
+            else
1772
+                ret = cli_jsonint(sctx->summary, sctx->propname, dout);
1771 1773
             break;
1772 1774
 	}
1773 1775
     case PT_INT32:
... ...
@@ -1928,9 +1942,8 @@ ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offs
1928 1928
         if (sctx->codepage == 0) {
1929 1929
             cli_dbgmsg("ole2_propset_json: current codepage is unknown, cannot parse char stream\n");
1930 1930
             sctx->flags |= OLE2_SUMMARY_FLAG_CODEPAGE;
1931
-            break;
1932 1931
         }
1933
-        else if (sctx->codepage != WINUNICODE) {
1932
+        else {
1934 1933
             uint32_t strsize;
1935 1934
             char *outstr, *outstr2;
1936 1935
 
... ...
@@ -1941,7 +1954,7 @@ ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offs
1941 1941
 
1942 1942
             memcpy(&strsize, databuf+offset, sizeof(strsize));
1943 1943
             offset+=sizeof(strsize);
1944
-            /* endian conversion */
1944
+            /* endian conversion? */
1945 1945
             strsize = sum32_endian_convert(strsize);
1946 1946
 
1947 1947
             if (offset+strsize > sctx->pssize) {
... ...
@@ -1969,9 +1982,10 @@ ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offs
1969 1969
                 /* use base64 encoding when all else fails! */
1970 1970
                 char b64jstr[PROPSTRLIMIT];
1971 1971
 
1972
+                /* outstr2 should be 4/3 times the original (rounded up) */
1972 1973
                 outstr2 = cl_base64_encode(outstr, strsize);
1973 1974
                 if (!outstr2) {
1974
-                    free(outstr);
1975
+                    cli_dbgmsg("ole2_process_property: failed to convert to base64 string\n");
1975 1976
                     return CL_EMEM;
1976 1977
                 }
1977 1978
 
... ...
@@ -1984,9 +1998,8 @@ ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offs
1984 1984
             ret = cli_jsonstr(sctx->summary, sctx->propname, outstr2);
1985 1985
             free(outstr);
1986 1986
             free(outstr2);
1987
-            break;
1988 1987
         }
1989
-        /* fall-through for unicode strings */
1988
+        break;
1990 1989
     case PT_LPWSTR:
1991 1990
 	{
1992 1991
             uint32_t strsize;
... ...
@@ -1998,19 +2011,8 @@ ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offs
1998 1998
             }
1999 1999
             memcpy(&strsize, databuf+offset, sizeof(strsize));
2000 2000
             offset+=sizeof(strsize);
2001
-            /* endian conversion */
2002
-            strsize = sum32_endian_convert(strsize);
2003
-            
2004
-            if (proptype == PT_LPSTR) { /* fall-through specifics */
2005
-                if (strsize % 2) {
2006
-                    cli_dbgmsg("ole2_process_property: LPSTR using wchar not sized a multiple of 2\n");
2007
-                    sctx->flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
2008
-                    return CL_EFORMAT;
2009
-                }
2010
-            }
2011
-            else {
2012
-                strsize*=2; /* Unicode strings are by length, not size */
2013
-            }
2001
+            /* endian conversion; wide strings are by length, not size (x2) */
2002
+            strsize = sum32_endian_convert(strsize)*2;
2014 2003
 
2015 2004
             /* limitation on string length */
2016 2005
             if (strsize > (2*PROPSTRLIMIT)) {
... ...
@@ -2029,8 +2031,8 @@ ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offs
2029 2029
                 return CL_EMEM;
2030 2030
             }
2031 2031
             memcpy(outstr, (const char *)(databuf+offset), strsize);
2032
-            /* conversion of 16-width char strings to UTF-8 */
2033
-            outstr2 = ole2_convert_utf(sctx, outstr, strsize, "UTF-16");
2032
+            /* conversion of 16-width char strings (UTF-16 or UTF-16LE??) to UTF-8 */
2033
+            outstr2 = ole2_convert_utf(sctx, outstr, strsize, UTF16_MS);
2034 2034
             if (!outstr2) {
2035 2035
                 /* use base64 encoding when all else fails! */
2036 2036
                 char b64jstr[PROPSTRLIMIT];