Browse code

separated ole2 and msdoc parsing into separate sources

Kevin Lin authored on 2014/11/04 07:16:53
Showing 4 changed files
... ...
@@ -441,7 +441,9 @@ libclamav_la_SOURCES = \
441 441
 	hostid.c \
442 442
 	hostid.h \
443 443
 	openioc.c \
444
-	openioc.h
444
+	openioc.h \
445
+	msdoc.c \
446
+	msdoc.h
445 447
 
446 448
 libclamav_la_SOURCES += bignum.h\
447 449
 	bignum_fast.h\
448 450
new file mode 100644
... ...
@@ -0,0 +1,954 @@
0
+/*
1
+ * Extract component parts of OLE2 files (e.g. MS Office Documents)
2
+ * 
3
+ * Copyright (C) 2007-2013 Sourcefire, Inc.
4
+ * 
5
+ * Authors: Trog
6
+ * 
7
+ * This program is free software; you can redistribute it and/or modify it under
8
+ * the terms of the GNU General Public License version 2 as published by the
9
+ * Free Software Foundation.
10
+ * 
11
+ * This program is distributed in the hope that it will be useful, but WITHOUT
12
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14
+ * more details.
15
+ * 
16
+ * You should have received a copy of the GNU General Public License along with
17
+ * this program; if not, write to the Free Software Foundation, Inc., 51
18
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19
+ */
20
+
21
+#if HAVE_CONFIG_H
22
+#include "clamav-config.h"
23
+#endif
24
+
25
+#include <sys/types.h>
26
+#include <sys/stat.h>
27
+#include <fcntl.h>
28
+#include <stdio.h>
29
+#include <string.h>
30
+#include <ctype.h>
31
+#include <stdlib.h>
32
+#include <errno.h>
33
+#include <conv.h>
34
+#ifdef	HAVE_UNISTD_H
35
+#include <unistd.h>
36
+#endif
37
+
38
+#if HAVE_ICONV
39
+#include <iconv.h>
40
+#endif
41
+
42
+#include "clamav.h"
43
+#include "cltypes.h"
44
+#include "others.h"
45
+#include "msdoc.h"
46
+#include "scanners.h"
47
+#include "fmap.h"
48
+#include "json_api.h"
49
+
50
+#if HAVE_JSON
51
+static char *
52
+ole2_convert_utf(summary_ctx_t *sctx, char *begin, size_t sz, const char *encoding)
53
+{
54
+    char *outbuf=NULL;
55
+#if HAVE_ICONV
56
+    char *buf, *p1, *p2;
57
+    off_t offset;
58
+    size_t inlen, outlen, nonrev, sz2;
59
+    int i, try;
60
+    iconv_t cd;
61
+#endif
62
+    /* applies in the both case */
63
+    if (sctx->codepage == 20127 || sctx->codepage == 65001) {
64
+        outbuf = cli_strdup(begin);
65
+        return outbuf;
66
+    }
67
+
68
+#if HAVE_ICONV
69
+    p1 = buf = cli_calloc(1, sz);
70
+    if (!(buf))
71
+        return NULL;
72
+
73
+    memcpy(buf, begin, sz);
74
+    inlen = sz;
75
+
76
+    /* encoding lookup if not specified */
77
+    if (!encoding) {
78
+        for (i = 0; i < NUMCODEPAGES; ++i) {
79
+            if (sctx->codepage == codepage_entries[i].codepage)
80
+                encoding = codepage_entries[i].encoding;
81
+            else if (sctx->codepage < codepage_entries[i].codepage) {
82
+                /* assuming sorted array */
83
+                break;
84
+            }
85
+        }
86
+
87
+        if (!encoding) {
88
+            cli_warnmsg("ole2_convert_utf: could not locate codepage encoding for %d\n", sctx->codepage);
89
+            sctx->flags |= OLE2_CODEPAGE_ERROR_NOTFOUND;
90
+            free(buf);
91
+            return NULL;
92
+        }
93
+    }
94
+
95
+    cd = iconv_open("UTF-8", encoding);
96
+    if (cd == (iconv_t)(-1)) {
97
+        cli_errmsg("ole2_convert_utf: could not initialize iconv\n");
98
+        sctx->flags |= OLE2_CODEPAGE_ERROR_UNINITED;
99
+    }
100
+    else {
101
+        offset = 0;
102
+        for (try = 1; try <= 3; ++try) {
103
+            /* charset to UTF-8 should never exceed sz*6 */
104
+            sz2 = (try*2) * sz;
105
+            /* use cli_realloc, reuse the buffer that has already been translated */
106
+            outbuf = (char *)cli_realloc(outbuf, sz2+1);
107
+            if (!outbuf) {
108
+                free(buf);
109
+                return NULL;
110
+            }
111
+
112
+            outlen = sz2 - offset;
113
+            p2 = outbuf + offset;
114
+
115
+            /* conversion */
116
+            nonrev = iconv(cd, &p1, &inlen, &p2, &outlen);
117
+
118
+            if (errno == EILSEQ) {
119
+                cli_dbgmsg("ole2_convert_utf: input buffer contains invalid character for its encoding\n");
120
+                sctx->flags |= OLE2_CODEPAGE_ERROR_INVALID;
121
+                break;
122
+            }
123
+            else if (errno == EINVAL && nonrev == (size_t)-1) {
124
+                cli_dbgmsg("ole2_convert_utf: input buffer contains incomplete multibyte character\n");
125
+                sctx->flags |= OLE2_CODEPAGE_ERROR_INCOMPLETE;
126
+                break;
127
+            }
128
+            else if (inlen == 0) {
129
+                //cli_dbgmsg("ole2_convert_utf: input buffer is successfully translated\n");
130
+                break;
131
+            }
132
+
133
+            //outbuf[sz2 - outlen] = '\0';
134
+            //cli_dbgmsg("%u %s\n", inlen, outbuf);
135
+
136
+            offset = sz2 - outlen;
137
+            if (try < 3)
138
+                cli_dbgmsg("ole2_convert_utf: outbuf is too small, resizing %llu -> %llu\n",
139
+                           (long long unsigned)((try*2) * sz), (long long unsigned)(((try+1)*2) * sz));
140
+        }
141
+
142
+        if (errno == E2BIG && nonrev == (size_t)-1) {
143
+            cli_dbgmsg("ole2_convert_utf: buffer could not be fully translated\n");
144
+            sctx->flags |= OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL;
145
+        }
146
+
147
+        outbuf[sz2 - outlen] = '\0';
148
+    }
149
+
150
+    iconv_close(cd);
151
+    free(buf);
152
+#endif
153
+    /* this should force base64 encoding if NULL */
154
+    return outbuf;
155
+}
156
+
157
+static int
158
+ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offset)
159
+{
160
+    uint16_t proptype, padding;
161
+    int ret = CL_SUCCESS;
162
+
163
+    if (cli_json_timeout_cycle_check(sctx->ctx, &(sctx->toval)) != CL_SUCCESS) {
164
+        sctx->flags |= OLE2_SUMMARY_FLAG_TIMEOUT;
165
+        return CL_ETIMEOUT;
166
+    }
167
+
168
+    if (offset+sizeof(proptype)+sizeof(padding) > sctx->pssize) {
169
+        sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
170
+        return CL_EFORMAT;
171
+    }
172
+
173
+    memcpy(&proptype, databuf+offset, sizeof(proptype));
174
+    offset+=sizeof(proptype);
175
+    memcpy(&padding, databuf+offset, sizeof(padding));
176
+    offset+=sizeof(padding);
177
+    /* endian conversion */
178
+    proptype = sum16_endian_convert(proptype);
179
+
180
+    //cli_dbgmsg("proptype: 0x%04x\n", proptype);
181
+    if (padding != 0) {
182
+        cli_dbgmsg("ole2_process_property: invalid padding value, non-zero\n");
183
+        sctx->flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
184
+        return CL_EFORMAT;
185
+    }
186
+
187
+    switch (proptype) {
188
+    case PT_EMPTY:
189
+    case PT_NULL:
190
+        ret = cli_jsonnull(sctx->summary, sctx->propname);
191
+        break;
192
+    case PT_INT16:
193
+	{
194
+            int16_t dout;
195
+            if (offset+sizeof(dout) > sctx->pssize) {
196
+                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
197
+                return CL_EFORMAT;
198
+            }
199
+            memcpy(&dout, databuf+offset, sizeof(dout));
200
+            offset+=sizeof(dout);
201
+            /* endian conversion */
202
+            dout = sum16_endian_convert(dout);
203
+
204
+            if (sctx->writecp) {
205
+                sctx->codepage = (uint16_t)dout;
206
+                ret = cli_jsonint(sctx->summary, sctx->propname, sctx->codepage);
207
+            }
208
+            else
209
+                ret = cli_jsonint(sctx->summary, sctx->propname, dout);
210
+            break;
211
+	}
212
+    case PT_INT32:
213
+    case PT_INT32v1:
214
+	{
215
+            int32_t dout;
216
+            if (offset+sizeof(dout) > sctx->pssize) {
217
+                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
218
+                return CL_EFORMAT;
219
+            }
220
+            memcpy(&dout, databuf+offset, sizeof(dout));
221
+            offset+=sizeof(dout);
222
+            /* endian conversion */
223
+            dout = sum32_endian_convert(dout);
224
+
225
+            ret = cli_jsonint(sctx->summary, sctx->propname, dout);
226
+            break;
227
+	}
228
+    case PT_FLOAT32: /* review this please */
229
+	{
230
+            float dout;
231
+            if (offset+sizeof(dout) > sctx->pssize) {
232
+                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
233
+                return CL_EFORMAT;
234
+            }
235
+            memcpy(&dout, databuf+offset, sizeof(dout));
236
+            offset+=sizeof(dout);
237
+            /* endian conversion */
238
+            dout = sum32_endian_convert(dout);
239
+
240
+            ret = cli_jsondouble(sctx->summary, sctx->propname, dout);
241
+            break;
242
+	}
243
+    case PT_DATE:
244
+    case PT_DOUBLE64: /* review this please */
245
+	{
246
+            double dout;
247
+            if (offset+sizeof(dout) > sctx->pssize) {
248
+                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
249
+                return CL_EFORMAT;
250
+            }
251
+            memcpy(&dout, databuf+offset, sizeof(dout));
252
+            offset+=sizeof(dout);
253
+            /* endian conversion */
254
+            dout = sum64_endian_convert(dout);
255
+
256
+            ret = cli_jsondouble(sctx->summary, sctx->propname, dout);
257
+            break;
258
+	}
259
+    case PT_BOOL:
260
+	{
261
+            uint16_t dout;
262
+            if (offset+sizeof(dout) > sctx->pssize) {
263
+                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
264
+                return CL_EFORMAT;
265
+            }
266
+            memcpy(&dout, databuf+offset, sizeof(dout));
267
+            offset+=sizeof(dout);
268
+            /* no need for endian conversion */
269
+
270
+            ret = cli_jsonbool(sctx->summary, sctx->propname, dout);
271
+            break;
272
+	}
273
+    case PT_INT8v1:
274
+	{
275
+            int8_t dout;
276
+            if (offset+sizeof(dout) > sctx->pssize) {
277
+                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
278
+                return CL_EFORMAT;
279
+            }
280
+            memcpy(&dout, databuf+offset, sizeof(dout));
281
+            offset+=sizeof(dout);
282
+            /* no need for endian conversion */
283
+
284
+            ret = cli_jsonint(sctx->summary, sctx->propname, dout);
285
+            break;
286
+	}
287
+    case PT_UINT8:
288
+	{
289
+            uint8_t dout;
290
+            if (offset+sizeof(dout) > sctx->pssize) {
291
+                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
292
+                return CL_EFORMAT;
293
+            }
294
+            memcpy(&dout, databuf+offset, sizeof(dout));
295
+            offset+=sizeof(dout);
296
+            /* no need for endian conversion */
297
+
298
+            ret = cli_jsonint(sctx->summary, sctx->propname, dout);
299
+            break;
300
+	}
301
+    case PT_UINT16:
302
+	{
303
+            uint16_t dout;
304
+            if (offset+sizeof(dout) > sctx->pssize) {
305
+                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
306
+                return CL_EFORMAT;
307
+            }
308
+            memcpy(&dout, databuf+offset, sizeof(dout));
309
+            offset+=sizeof(dout);
310
+            /* endian conversion */
311
+            dout = sum16_endian_convert(dout);
312
+
313
+            if (sctx->writecp)
314
+                sctx->codepage = dout;
315
+
316
+            ret = cli_jsonint(sctx->summary, sctx->propname, dout);
317
+            break;
318
+	}
319
+    case PT_UINT32:
320
+    case PT_UINT32v1:
321
+	{
322
+            uint32_t dout;
323
+            if (offset+sizeof(dout) > sctx->pssize) {
324
+                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
325
+                return CL_EFORMAT;
326
+            }
327
+            memcpy(&dout, databuf+offset, sizeof(dout));
328
+            offset+=sizeof(dout);
329
+            /* endian conversion */
330
+            dout = sum32_endian_convert(dout);
331
+
332
+            ret = cli_jsonint(sctx->summary, sctx->propname, dout);
333
+            break;
334
+	}
335
+    case PT_INT64:
336
+	{
337
+            int64_t dout;
338
+            if (offset+sizeof(dout) > sctx->pssize) {
339
+                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
340
+                return CL_EFORMAT;
341
+            }
342
+            memcpy(&dout, databuf+offset, sizeof(dout));
343
+            offset+=sizeof(dout);
344
+            /* endian conversion */
345
+            dout = sum64_endian_convert(dout);
346
+
347
+            ret = cli_jsonint64(sctx->summary, sctx->propname, dout);
348
+            break;
349
+	}
350
+    case PT_UINT64:
351
+	{
352
+            uint64_t dout;
353
+            if (offset+sizeof(dout) > sctx->pssize) {
354
+                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
355
+                return CL_EFORMAT;
356
+            }
357
+            memcpy(&dout, databuf+offset, sizeof(dout));
358
+            offset+=sizeof(dout);
359
+            /* endian conversion */
360
+            dout = sum64_endian_convert(dout);
361
+
362
+            ret = cli_jsonint64(sctx->summary, sctx->propname, dout);
363
+            break;
364
+	}
365
+    case PT_BSTR:
366
+    case PT_LPSTR:
367
+        if (sctx->codepage == 0) {
368
+            cli_dbgmsg("ole2_propset_json: current codepage is unknown, cannot parse char stream\n");
369
+            sctx->flags |= OLE2_SUMMARY_FLAG_CODEPAGE;
370
+        }
371
+        else {
372
+            uint32_t strsize;
373
+            char *outstr, *outstr2;
374
+
375
+            if (offset+sizeof(strsize) > sctx->pssize) {
376
+                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
377
+                return CL_EFORMAT;
378
+            }
379
+
380
+            memcpy(&strsize, databuf+offset, sizeof(strsize));
381
+            offset+=sizeof(strsize);
382
+            /* endian conversion? */
383
+            strsize = sum32_endian_convert(strsize);
384
+
385
+            if (offset+strsize > sctx->pssize) {
386
+                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
387
+                return CL_EFORMAT;
388
+            }
389
+
390
+            /* limitation on string length */
391
+            if (strsize > PROPSTRLIMIT) {
392
+                cli_dbgmsg("ole2_process_property: property string sized %lu truncated to size %lu\n",
393
+                           (unsigned long)strsize, (unsigned long)PROPSTRLIMIT);
394
+                sctx->flags |= OLE2_SUMMARY_FLAG_TRUNC_STR;
395
+                strsize = PROPSTRLIMIT;
396
+            }
397
+
398
+            outstr = cli_calloc(strsize+1, 1); /* last char must be NULL */
399
+            if (!outstr) {
400
+                return CL_EMEM;
401
+            }
402
+            strncpy(outstr, (const char *)(databuf+offset), strsize);
403
+
404
+            /* conversion of various encodings to UTF-8 */
405
+            outstr2 = ole2_convert_utf(sctx, outstr, strsize, NULL);
406
+            if (!outstr2) {
407
+                /* use base64 encoding when all else fails! */
408
+                char b64jstr[PROPSTRLIMIT];
409
+
410
+                /* outstr2 should be 4/3 times the original (rounded up) */
411
+                outstr2 = cl_base64_encode(outstr, strsize);
412
+                if (!outstr2) {
413
+                    cli_dbgmsg("ole2_process_property: failed to convert to base64 string\n");
414
+                    return CL_EMEM;
415
+                }
416
+
417
+                snprintf(b64jstr, PROPSTRLIMIT, "%s_base64", sctx->propname);
418
+                ret = cli_jsonbool(sctx->summary, b64jstr, 1);
419
+                if (ret != CL_SUCCESS)
420
+                    return ret;
421
+            }
422
+
423
+            ret = cli_jsonstr(sctx->summary, sctx->propname, outstr2);
424
+            free(outstr);
425
+            free(outstr2);
426
+        }
427
+        break;
428
+    case PT_LPWSTR:
429
+	{
430
+            uint32_t strsize;
431
+            char *outstr, *outstr2;
432
+
433
+            if (offset+sizeof(strsize) > sctx->pssize) {
434
+                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
435
+                return CL_EFORMAT;
436
+            }
437
+            memcpy(&strsize, databuf+offset, sizeof(strsize));
438
+            offset+=sizeof(strsize);
439
+            /* endian conversion; wide strings are by length, not size (x2) */
440
+            strsize = sum32_endian_convert(strsize)*2;
441
+
442
+            /* limitation on string length */
443
+            if (strsize > (2*PROPSTRLIMIT)) {
444
+                cli_dbgmsg("ole2_process_property: property string sized %lu truncated to size %lu\n",
445
+                           (unsigned long)strsize, (unsigned long)(2*PROPSTRLIMIT));
446
+                sctx->flags |= OLE2_SUMMARY_FLAG_TRUNC_STR;
447
+                strsize = (2*PROPSTRLIMIT);
448
+            }
449
+
450
+            if (offset+strsize > sctx->pssize) {
451
+                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
452
+                return CL_EFORMAT;
453
+            }
454
+            outstr = cli_calloc(strsize+2, 1); /* last two chars must be NULL */
455
+            if (!outstr) {
456
+                return CL_EMEM;
457
+            }
458
+            memcpy(outstr, (const char *)(databuf+offset), strsize);
459
+            /* conversion of 16-width char strings (UTF-16 or UTF-16LE??) to UTF-8 */
460
+            outstr2 = ole2_convert_utf(sctx, outstr, strsize, UTF16_MS);
461
+            if (!outstr2) {
462
+                /* use base64 encoding when all else fails! */
463
+                char b64jstr[PROPSTRLIMIT];
464
+
465
+                outstr2 = cl_base64_encode(outstr, strsize);
466
+                if (!outstr2) {
467
+                    free(outstr);
468
+                    return CL_EMEM;
469
+                }
470
+
471
+                snprintf(b64jstr, PROPSTRLIMIT, "%s_base64", sctx->propname);
472
+                ret = cli_jsonbool(sctx->summary, b64jstr, 1);
473
+                if (ret != CL_SUCCESS)
474
+                    return ret;
475
+            }
476
+
477
+            ret = cli_jsonstr(sctx->summary, sctx->propname, outstr2);
478
+            free(outstr);
479
+            free(outstr2);
480
+            break;
481
+	}
482
+    case PT_FILETIME:
483
+	{
484
+            uint32_t ltime, htime;
485
+            uint64_t wtime = 0, utime =0;
486
+
487
+            if (offset+sizeof(ltime)+sizeof(htime) > sctx->pssize) {
488
+                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
489
+                return CL_EFORMAT;
490
+            }
491
+            memcpy(&ltime, databuf+offset, sizeof(ltime));
492
+            offset+=sizeof(ltime);
493
+            memcpy(&htime, databuf+offset, sizeof(htime));
494
+            offset+=sizeof(ltime);
495
+            ltime = sum32_endian_convert(ltime);
496
+            htime = sum32_endian_convert(htime);
497
+
498
+            /* UNIX timestamp formatting */
499
+            wtime = htime;
500
+            wtime <<= 32;
501
+            wtime |= ltime;
502
+
503
+            utime = wtime / 10000000;
504
+            utime -= 11644473600LL;
505
+
506
+            if ((uint32_t)((utime & 0xFFFFFFFF00000000) >> 32)) {
507
+                cli_dbgmsg("ole2_process_property: UNIX timestamp is larger than 32-bit number\n");
508
+            }
509
+            else {
510
+                ret = cli_jsonint(sctx->summary, sctx->propname, (uint32_t)(utime & 0xFFFFFFFF));
511
+            }
512
+            break;
513
+	}
514
+    default:
515
+        cli_dbgmsg("ole2_process_property: unhandled property type 0x%04x for %s property\n", 
516
+                   proptype, sctx->propname);
517
+        sctx->flags |= OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE;
518
+    }
519
+
520
+    return ret;
521
+}
522
+
523
+static void ole2_translate_docsummary_propid(summary_ctx_t *sctx, uint32_t propid)
524
+{
525
+    switch(propid) {
526
+    case DSPID_CODEPAGE:
527
+        sctx->writecp = 1; /* must be set ONLY for codepage */
528
+        sctx->propname = "CodePage";
529
+        break;
530
+    case DSPID_CATEGORY:
531
+        sctx->propname = "Category";
532
+        break;
533
+    case DSPID_PRESFORMAT:
534
+        sctx->propname = "PresentationTarget";
535
+        break;
536
+    case DSPID_BYTECOUNT:
537
+        sctx->propname = "Bytes";
538
+        break;
539
+    case DSPID_LINECOUNT:
540
+        sctx->propname = "Lines";
541
+        break;
542
+    case DSPID_PARCOUNT:
543
+        sctx->propname = "Paragraphs";
544
+        break;
545
+    case DSPID_SLIDECOUNT:
546
+        sctx->propname = "Slides";
547
+        break;
548
+    case DSPID_NOTECOUNT:
549
+        sctx->propname = "Notes";
550
+        break;
551
+    case DSPID_HIDDENCOUNT:
552
+        sctx->propname = "HiddenSlides";
553
+        break;
554
+    case DSPID_MMCLIPCOUNT:
555
+        sctx->propname = "MMClips";
556
+        break;
557
+    case DSPID_SCALE:
558
+        sctx->propname = "Scale";
559
+        break;
560
+    case DSPID_HEADINGPAIR: /* VT_VARIANT | VT_VECTOR */
561
+        sctx->propname = "HeadingPairs";
562
+        break;
563
+    case DSPID_DOCPARTS:    /* VT_VECTOR | VT_LPSTR */
564
+        sctx->propname = "DocPartTitles";
565
+        break;
566
+    case DSPID_MANAGER:
567
+        sctx->propname = "Manager";
568
+        break;
569
+    case DSPID_COMPANY:
570
+        sctx->propname = "Company";
571
+        break;
572
+    case DSPID_LINKSDIRTY:
573
+        sctx->propname = "LinksDirty";
574
+        break;
575
+    case DSPID_CCHWITHSPACES:
576
+        sctx->propname = "Char&WSCount";
577
+        break;
578
+    case DSPID_SHAREDDOC:   /* SHOULD BE FALSE! */
579
+        sctx->propname = "SharedDoc";
580
+        break;
581
+    case DSPID_LINKBASE:    /* moved to user-defined */
582
+        sctx->propname = "LinkBase";
583
+        break;
584
+    case DSPID_HLINKS:      /* moved to user-defined */
585
+        sctx->propname = "HyperLinks";
586
+        break;
587
+    case DSPID_HYPERLINKSCHANGED:
588
+        sctx->propname = "HyperLinksChanged";
589
+        break;
590
+    case DSPID_VERSION:
591
+        sctx->propname = "Version";
592
+        break;
593
+    case DSPID_DIGSIG:
594
+        sctx->propname = "DigitalSig";
595
+        break;
596
+    case DSPID_CONTENTTYPE:
597
+        sctx->propname = "ContentType";
598
+        break;
599
+    case DSPID_CONTENTSTATUS:
600
+        sctx->propname = "ContentStatus";
601
+        break;
602
+    case DSPID_LANGUAGE:
603
+        sctx->propname = "Language";
604
+        break;
605
+    case DSPID_DOCVERSION:
606
+        sctx->propname = "DocVersion";
607
+        break;
608
+    default:
609
+        cli_dbgmsg("ole2_docsum_propset_json: unrecognized propid!\n");
610
+        sctx->flags |= OLE2_SUMMARY_FLAG_UNKNOWN_PROPID;
611
+    }
612
+}
613
+
614
+static void ole2_translate_summary_propid(summary_ctx_t *sctx, uint32_t propid)
615
+{
616
+    switch(propid) {
617
+    case SPID_CODEPAGE:
618
+        sctx->writecp = 1; /* must be set ONLY for codepage */
619
+        sctx->propname = "CodePage";
620
+        break;
621
+    case SPID_TITLE:
622
+        sctx->propname = "Title";
623
+        break;
624
+    case SPID_SUBJECT:
625
+        sctx->propname = "Subject";
626
+        break;
627
+    case SPID_AUTHOR:
628
+        sctx->propname = "Author";
629
+        break;
630
+    case SPID_KEYWORDS:
631
+        sctx->propname = "Keywords";
632
+        break;
633
+    case SPID_COMMENTS:
634
+        sctx->propname = "Comments";
635
+        break;
636
+    case SPID_TEMPLATE:
637
+        sctx->propname = "Template";
638
+        break;
639
+    case SPID_LASTAUTHOR:
640
+        sctx->propname = "LastAuthor";
641
+        break;
642
+    case SPID_REVNUMBER:
643
+        sctx->propname = "RevNumber";
644
+        break;
645
+    case SPID_EDITTIME:
646
+        sctx->propname = "EditTime";
647
+        break;
648
+    case SPID_LASTPRINTED:
649
+        sctx->propname = "LastPrinted";
650
+        break;
651
+    case SPID_CREATEDTIME:
652
+        sctx->propname = "CreatedTime";
653
+        break;
654
+    case SPID_MODIFIEDTIME:
655
+        sctx->propname = "ModifiedTime";
656
+        break;
657
+    case SPID_PAGECOUNT:
658
+        sctx->propname = "PageCount";
659
+        break;
660
+    case SPID_WORDCOUNT:
661
+        sctx->propname = "WordCount";
662
+        break;
663
+    case SPID_CHARCOUNT:
664
+        sctx->propname = "CharCount";
665
+        break;
666
+    case SPID_THUMBNAIL:
667
+        sctx->propname = "Thumbnail";
668
+        break;
669
+    case SPID_APPNAME:
670
+        sctx->propname = "AppName";
671
+        break;
672
+    case SPID_SECURITY:
673
+        sctx->propname = "Security";
674
+        break;
675
+    default:
676
+        cli_dbgmsg("ole2_translate_summary_propid: unrecognized propid!\n");
677
+        sctx->flags |= OLE2_SUMMARY_FLAG_UNKNOWN_PROPID;
678
+    }
679
+}
680
+
681
+static int ole2_summary_propset_json(summary_ctx_t *sctx, off_t offset)
682
+{
683
+    unsigned char *hdr, *ps;
684
+    uint32_t numprops, limitprops;
685
+    off_t foff = offset, psoff = 0;
686
+    uint32_t poffset;
687
+    int ret;
688
+    unsigned int i;
689
+
690
+    cli_dbgmsg("in ole2_summary_propset_json\n");
691
+
692
+    /* summary ctx propset-specific setup*/
693
+    sctx->codepage = 0;
694
+    sctx->writecp = 0;
695
+    sctx->propname = NULL;
696
+
697
+    /* examine property set metadata */
698
+    if ((foff+(2*sizeof(uint32_t))) > sctx->maplen) {
699
+        sctx->flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
700
+        return CL_EFORMAT;
701
+    }
702
+    hdr = (unsigned char*)fmap_need_off_once(sctx->sfmap, foff, (2*sizeof(uint32_t)));
703
+    if (!hdr) {
704
+        sctx->flags |= OLE2_SUMMARY_ERROR_DATABUF;
705
+        return CL_EREAD;
706
+    }
707
+    //foff+=(2*sizeof(uint32_t)); // keep foff pointing to start of propset segment
708
+    psoff+=(2*sizeof(uint32_t));
709
+    memcpy(&(sctx->pssize), hdr, sizeof(sctx->pssize));
710
+    memcpy(&numprops, hdr+sizeof(sctx->pssize), sizeof(numprops));
711
+    /* endian conversion */
712
+    sctx->pssize = sum32_endian_convert(sctx->pssize);
713
+    numprops = sum32_endian_convert(numprops);
714
+    cli_dbgmsg("ole2_summary_propset_json: pssize: %u, numprops: %u\n", sctx->pssize, numprops);
715
+    if (numprops > PROPCNTLIMIT) {
716
+        sctx->flags |= OLE2_SUMMARY_LIMIT_PROPS;
717
+        limitprops = PROPCNTLIMIT;
718
+    }
719
+    else {
720
+        limitprops = numprops;
721
+    }
722
+    cli_dbgmsg("ole2_summary_propset_json: processing %u of %u (%u max) propeties\n",
723
+               limitprops, numprops, PROPCNTLIMIT);
724
+
725
+    /* extract remaining fragment of propset */
726
+    if ((size_t)(foff+(sctx->pssize)) > (size_t)(sctx->maplen)) {
727
+        sctx->flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
728
+        return CL_EFORMAT;
729
+    }
730
+    ps = (unsigned char*)fmap_need_off_once(sctx->sfmap, foff, sctx->pssize);
731
+    if (!ps) {
732
+        sctx->flags |= OLE2_SUMMARY_ERROR_DATABUF;
733
+        return CL_EREAD;
734
+    }
735
+
736
+    /* iterate over the properties */
737
+    for (i = 0; i < limitprops; ++i) {
738
+        uint32_t propid, propoff;
739
+
740
+        if (psoff+sizeof(propid)+sizeof(poffset) > sctx->pssize) {
741
+            sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
742
+            return CL_EFORMAT;
743
+        }
744
+        memcpy(&propid, ps+psoff, sizeof(propid));
745
+        psoff+=sizeof(propid);
746
+        memcpy(&propoff, ps+psoff, sizeof(propoff));
747
+        psoff+=sizeof(propoff);
748
+        /* endian conversion */
749
+        propid = sum32_endian_convert(propid);
750
+        propoff = sum32_endian_convert(propoff);
751
+        cli_dbgmsg("ole2_summary_propset_json: propid: 0x%08x, propoff: %u\n", propid, propoff);
752
+
753
+        sctx->propname = NULL; sctx->writecp = 0;
754
+        if (!sctx->mode)
755
+            ole2_translate_summary_propid(sctx, propid);
756
+        else
757
+            ole2_translate_docsummary_propid(sctx, propid);
758
+
759
+        if (sctx->propname != NULL) {
760
+            ret = ole2_process_property(sctx, ps, propoff);
761
+            if (ret != CL_SUCCESS)
762
+                return ret;
763
+        }
764
+        else {
765
+            /* add unknown propid flag */
766
+        }
767
+    }
768
+
769
+    return CL_SUCCESS;
770
+}
771
+
772
+static int cli_ole2_summary_json_cleanup(summary_ctx_t *sctx, int retcode)
773
+{
774
+    json_object *jarr;
775
+
776
+    cli_dbgmsg("in cli_ole2_summary_json_cleanup: %d[%x]\n", retcode, sctx->flags);
777
+
778
+    if (sctx->sfmap) {
779
+        funmap(sctx->sfmap);
780
+    }
781
+
782
+    if (sctx->flags) {
783
+        jarr = cli_jsonarray(sctx->summary, "ParseErrors");
784
+
785
+        /* summary errors */
786
+        if (sctx->flags & OLE2_SUMMARY_ERROR_TOOSMALL) {
787
+            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_TOOSMALL");
788
+        }
789
+        if (sctx->flags & OLE2_SUMMARY_ERROR_OOB) {
790
+            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_OOB");
791
+        }
792
+        if (sctx->flags & OLE2_SUMMARY_ERROR_DATABUF) {
793
+            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_DATABUF");
794
+        }
795
+        if (sctx->flags & OLE2_SUMMARY_ERROR_INVALID_ENTRY) {
796
+            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_INVALID_ENTRY");
797
+        }
798
+        if (sctx->flags & OLE2_SUMMARY_LIMIT_PROPS) {
799
+            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_LIMIT_PROPS");
800
+        }
801
+        if (sctx->flags & OLE2_SUMMARY_FLAG_TIMEOUT) {
802
+            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_TIMEOUT");
803
+        }
804
+        if (sctx->flags & OLE2_SUMMARY_FLAG_CODEPAGE) {
805
+            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_CODEPAGE");
806
+        }
807
+        if (sctx->flags & OLE2_SUMMARY_FLAG_UNKNOWN_PROPID) {
808
+            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_UNKNOWN_PROPID");
809
+        }
810
+        if (sctx->flags & OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE) {
811
+            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE");
812
+        }
813
+        if (sctx->flags & OLE2_SUMMARY_FLAG_TRUNC_STR) {
814
+            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_TRUNC_STR");
815
+        }
816
+
817
+        /* codepage translation errors */
818
+        if (sctx->flags & OLE2_CODEPAGE_ERROR_NOTFOUND) {
819
+            cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_NOTFOUND");
820
+        }
821
+        if (sctx->flags & OLE2_CODEPAGE_ERROR_UNINITED) {
822
+            cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_UNINITED");
823
+        }
824
+        if (sctx->flags & OLE2_CODEPAGE_ERROR_INVALID) {
825
+            cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_INVALID");
826
+        }
827
+        if (sctx->flags & OLE2_CODEPAGE_ERROR_INCOMPLETE) {
828
+            cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_INCOMPLETE");
829
+        }
830
+        if (sctx->flags & OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL) {
831
+            cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL");
832
+        }
833
+    }
834
+
835
+    return retcode;
836
+}
837
+
838
+int cli_ole2_summary_json(cli_ctx *ctx, int fd, int mode)
839
+{
840
+    summary_ctx_t sctx;
841
+    STATBUF statbuf;
842
+    off_t foff = 0;
843
+    unsigned char *databuf;
844
+    summary_stub_t sumstub;
845
+    propset_entry_t pentry;
846
+    int ret = CL_SUCCESS;
847
+
848
+    cli_dbgmsg("in cli_ole2_summary_json\n");
849
+
850
+    /* preliminary sanity checks */
851
+    if (ctx == NULL) {
852
+        return CL_ENULLARG;
853
+    }
854
+
855
+    if (fd < 0) {
856
+        cli_dbgmsg("ole2_summary_json: invalid file descriptor\n");
857
+        return CL_ENULLARG; /* placeholder */
858
+    }
859
+
860
+    if (mode != 0 && mode != 1) {
861
+        cli_dbgmsg("ole2_summary_json: invalid mode specified\n");
862
+        return CL_ENULLARG; /* placeholder */
863
+    }
864
+
865
+    /* summary ctx setup */
866
+    memset(&sctx, 0, sizeof(sctx));
867
+    sctx.ctx = ctx;
868
+    sctx.mode = mode;
869
+
870
+    if (FSTAT(fd, &statbuf) == -1) {
871
+        cli_dbgmsg("ole2_summary_json: cannot stat file descriptor\n");
872
+        return CL_ESTAT;
873
+    }
874
+
875
+    sctx.sfmap = fmap(fd, 0, statbuf.st_size);
876
+    if (!sctx.sfmap) {
877
+        cli_dbgmsg("ole2_summary_json: failed to get fmap\n");
878
+        return CL_EMAP;
879
+    }
880
+    sctx.maplen = sctx.sfmap->len;
881
+    cli_dbgmsg("ole2_summary_json: streamsize: %u\n", sctx.maplen);
882
+
883
+    if (!mode)
884
+        sctx.summary = cli_jsonobj(ctx->wrkproperty, "SummaryInfo");
885
+    else
886
+        sctx.summary = cli_jsonobj(ctx->wrkproperty, "DocSummaryInfo");
887
+    if (!sctx.summary) {
888
+        cli_errmsg("ole2_summary_json: no memory for json object.\n");
889
+        return cli_ole2_summary_json_cleanup(&sctx, CL_EMEM);
890
+    }
891
+
892
+    sctx.codepage = 0;
893
+    sctx.writecp = 0;
894
+
895
+    /* acquire property stream metadata */
896
+    if (sctx.maplen < sizeof(summary_stub_t)) {
897
+        sctx.flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
898
+        return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
899
+    }
900
+    databuf = (unsigned char*)fmap_need_off_once(sctx.sfmap, foff, sizeof(summary_stub_t));
901
+    if (!databuf) {
902
+        sctx.flags |= OLE2_SUMMARY_ERROR_DATABUF;
903
+        return cli_ole2_summary_json_cleanup(&sctx, CL_EREAD);
904
+    }
905
+    foff += sizeof(summary_stub_t);
906
+    memcpy(&sumstub, databuf, sizeof(summary_stub_t));
907
+
908
+    /* endian conversion and checks */
909
+    sumstub.byte_order = le16_to_host(sumstub.byte_order);
910
+    if (sumstub.byte_order != 0xfffe) {
911
+        cli_dbgmsg("ole2_summary_json: byteorder 0x%x is invalid\n", sumstub.byte_order);
912
+        sctx.flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
913
+        return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);;
914
+    }
915
+    sumstub.version = sum16_endian_convert(sumstub.version); /*unused*/
916
+    sumstub.system = sum32_endian_convert(sumstub.system); /*unused*/
917
+    sumstub.num_propsets = sum32_endian_convert(sumstub.num_propsets);
918
+    if (sumstub.num_propsets != 1 && sumstub.num_propsets != 2) {
919
+        cli_dbgmsg("ole2_summary_json: invalid number of property sets\n");
920
+        sctx.flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
921
+        return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
922
+    }
923
+
924
+    cli_dbgmsg("ole2_summary_json: byteorder 0x%x\n", sumstub.byte_order);
925
+    cli_dbgmsg("ole2_summary_json: %u property set(s) detected\n", sumstub.num_propsets);
926
+
927
+    /* first property set (index=0) is always SummaryInfo or DocSummaryInfo */
928
+    if ((sctx.maplen-foff) < sizeof(propset_entry_t)) {
929
+        sctx.flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
930
+        return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
931
+    }
932
+    databuf = (unsigned char*)fmap_need_off_once(sctx.sfmap, foff, sizeof(propset_entry_t));
933
+    if (!databuf) {
934
+        sctx.flags |= OLE2_SUMMARY_ERROR_DATABUF;
935
+        return cli_ole2_summary_json_cleanup(&sctx, CL_EREAD);
936
+    }
937
+    foff += sizeof(propset_entry_t);
938
+    memcpy(&pentry, databuf, sizeof(propset_entry_t));
939
+    /* endian conversion */
940
+    pentry.offset = sum32_endian_convert(pentry.offset);
941
+
942
+    if ((ret = ole2_summary_propset_json(&sctx, pentry.offset)) != CL_SUCCESS) {
943
+        return cli_ole2_summary_json_cleanup(&sctx, ret);
944
+    }
945
+
946
+    /* second property set (index=1) is always a custom property set (if present) */
947
+    if (sumstub.num_propsets == 2) {
948
+        cli_jsonbool(ctx->wrkproperty, "HasUserDefinedProperties", 1);
949
+    }
950
+
951
+    return cli_ole2_summary_json_cleanup(&sctx, CL_SUCCESS);
952
+}
953
+#endif /* HAVE_JSON */
0 954
new file mode 100644
... ...
@@ -0,0 +1,336 @@
0
+/*
1
+ *  Extract component parts of OLE2 files (e.g. MS Office Documents)
2
+ *
3
+ *  Copyright (C) 2007-2008 Sourcefire, Inc.
4
+ *
5
+ *  Authors: Trog
6
+ *
7
+ *  This program is free software; you can redistribute it and/or modify
8
+ *  it under the terms of the GNU General Public License version 2 as
9
+ *  published by the Free Software Foundation.
10
+ *
11
+ *  This program is distributed in the hope that it will be useful,
12
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
+ *  GNU General Public License for more details.
15
+ *
16
+ *  You should have received a copy of the GNU General Public License
17
+ *  along with this program; if not, write to the Free Software
18
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19
+ *  MA 02110-1301, USA.
20
+ */
21
+
22
+#ifndef __MSDOC_H
23
+#define __MSDOC_H
24
+
25
+#include "others.h"
26
+#include "uniq.h"
27
+
28
+/* Summary and Document Information Parsing to JSON */
29
+#if HAVE_JSON
30
+
31
+#define PROPCNTLIMIT 25
32
+#define PROPSTRLIMIT 256 /* affects property strs, NOT sanitized strs (may result in a buffer allocating PROPSTRLIMIT*6) */
33
+#define UTF16_MS "UTF-16LE"
34
+
35
+#define sum16_endian_convert(v) le16_to_host((uint16_t)(v))
36
+#define sum32_endian_convert(v) le32_to_host((uint32_t)(v))
37
+#define sum64_endian_convert(v) le64_to_host((uint64_t)(v))
38
+
39
+enum summary_pidsi {
40
+    SPID_CODEPAGE   = 0x00000001,
41
+    SPID_TITLE      = 0x00000002,
42
+    SPID_SUBJECT    = 0x00000003,
43
+    SPID_AUTHOR     = 0x00000004,
44
+    SPID_KEYWORDS   = 0x00000005,
45
+    SPID_COMMENTS   = 0x00000006,
46
+    SPID_TEMPLATE   = 0x00000007,
47
+    SPID_LASTAUTHOR = 0x00000008,
48
+    SPID_REVNUMBER  = 0x00000009,
49
+    SPID_EDITTIME   = 0x0000000A,
50
+    SPID_LASTPRINTED  = 0x0000000B,
51
+    SPID_CREATEDTIME  = 0x0000000C,
52
+    SPID_MODIFIEDTIME = 0x0000000D,
53
+    SPID_PAGECOUNT = 0x0000000E,
54
+    SPID_WORDCOUNT = 0x0000000F,
55
+    SPID_CHARCOUNT = 0x00000010,
56
+    SPID_THUMBNAIL = 0x00000011,
57
+    SPID_APPNAME   = 0x00000012,
58
+    SPID_SECURITY  = 0x00000013
59
+};
60
+
61
+enum docsum_pidsi {
62
+    DSPID_CODEPAGE    = 0x00000001,
63
+    DSPID_CATEGORY    = 0x00000002,
64
+    DSPID_PRESFORMAT  = 0x00000003,
65
+    DSPID_BYTECOUNT   = 0x00000004,
66
+    DSPID_LINECOUNT   = 0x00000005,
67
+    DSPID_PARCOUNT    = 0x00000006,
68
+    DSPID_SLIDECOUNT  = 0x00000007,
69
+    DSPID_NOTECOUNT   = 0x00000008,
70
+    DSPID_HIDDENCOUNT = 0x00000009,
71
+    DSPID_MMCLIPCOUNT = 0x0000000A,
72
+    DSPID_SCALE       = 0x0000000B,
73
+    DSPID_HEADINGPAIR = 0x0000000C, /* VT_VARIANT | VT_VECTOR */
74
+    DSPID_DOCPARTS    = 0x0000000D, /* VT_VECTOR | VT_LPSTR */
75
+    DSPID_MANAGER     = 0x0000000E,
76
+    DSPID_COMPANY     = 0x0000000F,
77
+    DSPID_LINKSDIRTY  = 0x00000010,
78
+    DSPID_CCHWITHSPACES = 0x00000011,
79
+    DSPID_SHAREDDOC   = 0x00000013, /* must be false */
80
+    DSPID_LINKBASE    = 0x00000014, /* moved to user-defined */
81
+    DSPID_HLINKS      = 0x00000015, /* moved to user-defined */
82
+    DSPID_HYPERLINKSCHANGED = 0x00000016,
83
+    DSPID_VERSION     = 0x00000017,
84
+    DSPID_DIGSIG      = 0x00000018,
85
+    DSPID_CONTENTTYPE   = 0x0000001A,
86
+    DSPID_CONTENTSTATUS = 0x0000001B,
87
+    DSPID_LANGUAGE      = 0x0000001C,
88
+    DSPID_DOCVERSION    = 0x0000001D
89
+};
90
+
91
+enum property_type {
92
+    PT_EMPTY    = 0x0000,
93
+    PT_NULL     = 0x0001,
94
+    PT_INT16    = 0x0002,
95
+    PT_INT32    = 0x0003,
96
+    PT_FLOAT32  = 0x0004,
97
+    PT_DOUBLE64 = 0x0005,
98
+    PT_DATE     = 0x0007,
99
+    PT_BSTR     = 0x0008,
100
+    PT_BOOL    = 0x000B,
101
+    PT_INT8v1  = 0x0010,
102
+    PT_UINT8   = 0x0011,
103
+    PT_UINT16  = 0x0012,
104
+    PT_UINT32  = 0x0013,
105
+    PT_INT64   = 0x0014,
106
+    PT_UINT64  = 0x0015,
107
+    PT_INT32v1  = 0x0016,
108
+    PT_UINT32v1 = 0x0017,
109
+    PT_LPSTR  = 0x001E,
110
+    PT_LPWSTR = 0x001F,
111
+    PT_FILETIME = 0x0040,
112
+	
113
+    /* More Types not currently handled */
114
+};
115
+
116
+typedef struct summary_stub {
117
+    uint16_t byte_order;
118
+    uint16_t version;
119
+    uint32_t system; /* implementation-specific */
120
+    uint8_t CLSID[16];
121
+
122
+    uint32_t num_propsets; /* 1 or 2 */
123
+} summary_stub_t;
124
+
125
+typedef struct propset_summary_entry {
126
+    uint8_t FMTID[16];
127
+    uint32_t offset;
128
+} propset_entry_t;
129
+
130
+/* error codes */
131
+#define OLE2_SUMMARY_ERROR_TOOSMALL      0x00000001
132
+#define OLE2_SUMMARY_ERROR_OOB           0x00000002
133
+#define OLE2_SUMMARY_ERROR_DATABUF       0x00000004
134
+#define OLE2_SUMMARY_ERROR_INVALID_ENTRY 0x00000008
135
+#define OLE2_SUMMARY_LIMIT_PROPS         0x00000010
136
+#define OLE2_SUMMARY_FLAG_TIMEOUT        0x00000020
137
+#define OLE2_SUMMARY_FLAG_CODEPAGE       0x00000040
138
+#define OLE2_SUMMARY_FLAG_UNKNOWN_PROPID 0x00000080
139
+#define OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE 0x00000100
140
+#define OLE2_SUMMARY_FLAG_TRUNC_STR      0x00000200
141
+
142
+#define OLE2_CODEPAGE_ERROR_NOTFOUND     0x00000400
143
+#define OLE2_CODEPAGE_ERROR_UNINITED     0x00000800
144
+#define OLE2_CODEPAGE_ERROR_INVALID      0x00001000
145
+#define OLE2_CODEPAGE_ERROR_INCOMPLETE   0x00002000
146
+#define OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL 0x00002000
147
+
148
+/* metadata structures */
149
+typedef struct summary_ctx {
150
+    cli_ctx *ctx;
151
+    int mode;
152
+    fmap_t *sfmap;
153
+    json_object *summary;
154
+    size_t maplen;
155
+    uint32_t flags;
156
+
157
+    /* propset metadata */
158
+    uint32_t pssize; /* track from propset start, not tail start */
159
+    uint16_t codepage;
160
+    int writecp;
161
+
162
+    /* property metadata */
163
+    const char *propname;
164
+
165
+    /* timeout meta */
166
+    int toval;
167
+} summary_ctx_t;
168
+
169
+/* string conversion */
170
+struct codepage_entry {
171
+    int16_t codepage;
172
+    const char *encoding;
173
+};
174
+
175
+#define NUMCODEPAGES sizeof(codepage_entries)/sizeof(struct codepage_entry)
176
+/* MAINTAIN - the array in codepage value sorted order */
177
+static const struct codepage_entry codepage_entries[] = {
178
+    { 37,    "IBM037" },      /* IBM EBCDIC US-Canada */
179
+    { 437,   "IBM437" },      /* OEM United States */
180
+    { 500,   "IBM500" },      /* IBM EBCDIC International */
181
+    { 708,   "ASMO-708" },    /* Arabic (ASMO 708) */
182
+    { 709,   NULL },          /* Arabic (ASMO-449+, BCON V4) */
183
+    { 710,   NULL },          /* Arabic - Transparent Arabic */
184
+    { 720,   NULL },          /* Arabic (Transparent ASMO); Arabic (DOS) */
185
+    { 737,   NULL },          /* OEM Greek (formerly 437G); Greek (DOS) */
186
+    { 775,   "IBM775" },      /* OEM Baltic; Baltic (DOS) */
187
+    { 850,   "IBM850" },      /* OEM Multilingual Latin 1; Western European (DOS) */
188
+    { 852,   "IBM852" },      /* OEM Latin 2; Central European (DOS) */
189
+    { 855,   "IBM855" },      /* OEM Cyrillic (primarily Russian) */
190
+    { 857,   "IBM857" },      /* OEM Turkish; Turkish (DOS) */
191
+    { 858,   NULL },          /* OEM Multilingual Latin 1 + Euro symbol */
192
+    { 860,   "IBM860" },      /* OEM Portuguese; Portuguese (DOS) */
193
+    { 861,   "IBM861" },      /* OEM Icelandic; Icelandic (DOS) */
194
+    { 862,   NULL },          /* OEM Hebrew; Hebrew (DOS) */
195
+    { 863,   "IBM863" },      /* OEM French Canadian; French Canadian (DOS) */
196
+    { 864,   "IBM864" },      /* OEM Arabic; Arabic (864) */
197
+    { 865,   "IBM865" },      /* OEM Nordic; Nordic (DOS) */
198
+    { 866,   "CP866" },       /* OEM Russian; Cyrillic (DOS) */
199
+    { 869,   "IBM869" },      /* OEM Modern Greek; Greek, Modern (DOS) */
200
+    { 870,   "IBM870" },      /* IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 */
201
+    { 874,   "WINDOWS-874" }, /* ANSI/OEM Thai (ISO 8859-11); Thai (Windows) */
202
+    { 875,   "CP875" },       /* IBM EBCDIC Greek Modern */
203
+    { 932,   "SHIFT_JIS" },   /* ANSI/OEM Japanese; Japanese (Shift-JIS) */
204
+    { 936,   "GB2312" },      /* ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) */
205
+    { 949,   NULL },          /* ANSI/OEM Korean (Unified Hangul Code) */
206
+    { 950,   "BIG5" },        /* ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) */
207
+    { 1026,  "IBM1026" },     /* IBM EBCDIC Turkish (Latin 5) */
208
+    { 1047,  NULL },          /* IBM EBCDIC Latin 1/Open System */
209
+    { 1140,  NULL },          /* IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) */
210
+    { 1141,  NULL },          /* IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) */
211
+    { 1142,  NULL },          /* IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) */
212
+    { 1143,  NULL },          /* IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) */
213
+    { 1144,  NULL },          /* IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) */
214
+    { 1145,  NULL },          /* IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) */
215
+    { 1146,  NULL },          /* IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) */
216
+    { 1147,  NULL },          /* IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) */
217
+    { 1148,  NULL },          /* IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) */
218
+    { 1149,  NULL },          /* IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) */
219
+    { 1200,  "UTF-16LE" },    /* Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications */
220
+    { 1201,  "UTF-16BE" },    /* Unicode UTF-16, big endian byte order; available only to managed applications */
221
+    { 1250,  "WINDOWS-1250" }, /* ANSI Central European; Central European (Windows) */
222
+    { 1251,  "WINDOWS-1251" }, /* ANSI Cyrillic; Cyrillic (Windows) */
223
+    { 1252,  "WINDOWS-1252" }, /* ANSI Latin 1; Western European (Windows) */
224
+    { 1253,  "WINDOWS-1253" }, /* ANSI Greek; Greek (Windows) */
225
+    { 1254,  "WINDOWS-1254" }, /* ANSI Turkish; Turkish (Windows) */
226
+    { 1255,  "WINDOWS-1255" }, /* ANSI Hebrew; Hebrew (Windows) */
227
+    { 1256,  "WINDOWS-1256" }, /* ANSI Arabic; Arabic (Windows) */
228
+    { 1257,  "WINDOWS-1257" }, /* ANSI Baltic; Baltic (Windows) */
229
+    { 1258,  "WINDOWS-1258" }, /* ANSI/OEM Vietnamese; Vietnamese (Windows) */
230
+    { 1361,  "JOHAB" },       /* Korean (Johab) */
231
+    { 10000, "MACINTOSH" },   /* MAC Roman; Western European (Mac) */
232
+    { 10001, NULL },          /* Japanese (Mac) */
233
+    { 10002, NULL },          /* MAC Traditional Chinese (Big5); Chinese Traditional (Mac) */
234
+    { 10003, NULL },          /* Korean (Mac) */
235
+    { 10004, NULL },          /* Arabic (Mac) */
236
+    { 10005, NULL },          /* Hebrew (Mac) */
237
+    { 10006, NULL },          /* Greek (Mac) */
238
+    { 10007, NULL },          /* Cyrillic (Mac) */
239
+    { 10008, NULL },          /* MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) */
240
+    { 10010, NULL },          /* Romanian (Mac) */
241
+    { 10017, NULL },          /* Ukrainian (Mac) */
242
+    { 10021, NULL },          /* Thai (Mac) */
243
+    { 10029, NULL },          /* MAC Latin 2; Central European (Mac) */
244
+    { 10079, NULL },          /* Icelandic (Mac) */
245
+    { 10081, NULL },          /* Turkish (Mac) */
246
+    { 10082, NULL },          /* Croatian (Mac) */
247
+    { 12000, "UTF-32LE" },    /* Unicode UTF-32, little endian byte order; available only to managed applications */
248
+    { 12001, "UTF-32BE" },    /* Unicode UTF-32, big endian byte order; available only to managed applications */
249
+    { 20000, NULL },          /* CNS Taiwan; Chinese Traditional (CNS) */
250
+    { 20001, NULL },          /* TCA Taiwan */
251
+    { 20002, NULL },          /* Eten Taiwan; Chinese Traditional (Eten) */
252
+    { 20003, NULL },          /* IBM5550 Taiwan */
253
+    { 20004, NULL },          /* TeleText Taiwan */
254
+    { 20005, NULL },          /* Wang Taiwan */
255
+    { 20105, NULL },          /* IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) */
256
+    { 20106, NULL },          /* IA5 German (7-bit) */
257
+    { 20107, NULL },          /* IA5 Swedish (7-bit) */
258
+    { 20108, NULL },          /* IA5 Norwegian (7-bit) */
259
+    { 20127, "US-ASCII" },    /* US-ASCII (7-bit) */
260
+    { 20261, NULL },          /* T.61 */
261
+    { 20269, NULL },          /* ISO 6937 Non-Spacing Accent */
262
+    { 20273, "IBM273" },      /* IBM EBCDIC Germany */
263
+    { 20277, "IBM277" },      /* IBM EBCDIC Denmark-Norway */
264
+    { 20278, "IBM278" },      /* IBM EBCDIC Finland-Sweden */
265
+    { 20280, "IBM280" },      /* IBM EBCDIC Italy */
266
+    { 20284, "IBM284" },      /* IBM EBCDIC Latin America-Spain */
267
+    { 20285, "IBM285" },      /* IBM EBCDIC United Kingdom */
268
+    { 20290, "IBM290" },      /* IBM EBCDIC Japanese Katakana Extended */
269
+    { 20297, "IBM297" },      /* IBM EBCDIC France */
270
+    { 20420, "IBM420" },      /* IBM EBCDIC Arabic */
271
+    { 20423, "IBM423" },      /* IBM EBCDIC Greek */
272
+    { 20424, "IBM424" },      /* IBM EBCDIC Hebrew */
273
+    { 20833, NULL },          /* IBM EBCDIC Korean Extended */
274
+    { 20838, NULL },          /* IBM EBCDIC Thai */
275
+    { 20866, "KOI8-R" },      /* Russian (KOI8-R); Cyrillic (KOI8-R) */
276
+    { 20871, "IBM871" },      /* IBM EBCDIC Icelandic */
277
+    { 20880, "IBM880" },      /* IBM EBCDIC Cyrillic Russian */
278
+    { 20905, "IBM905" },      /* IBM EBCDIC Turkish */
279
+    { 20924, NULL },          /* IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) */
280
+    { 20932, "EUC-JP" },      /* Japanese (JIS 0208-1990 and 0212-1990) */
281
+    { 20936, NULL },          /* Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) */
282
+    { 20949, NULL },          /* Korean Wansung */
283
+    { 21025, "CP1025" },      /* IBM EBCDIC Cyrillic Serbian-Bulgarian */
284
+    { 21027, NULL },          /* (deprecated) */
285
+    { 21866, "KOI8-U" },      /* Ukrainian (KOI8-U); Cyrillic (KOI8-U) */
286
+    { 28591, "ISO-8859-1" },  /* ISO 8859-1 Latin 1; Western European (ISO) */
287
+    { 28592, "ISO-8859-2" },  /* ISO 8859-2 Central European; Central European (ISO) */
288
+    { 28593, "ISO-8859-3" },  /* ISO 8859-3 Latin 3 */
289
+    { 28594, "ISO-8859-4" },  /* ISO 8859-4 Baltic */
290
+    { 28595, "ISO-8859-5" },  /* ISO 8859-5 Cyrillic */
291
+    { 28596, "ISO-8859-6" },  /* ISO 8859-6 Arabic */
292
+    { 28597, "ISO-8859-7" },  /* ISO 8859-7 Greek */
293
+    { 28598, "ISO-8859-8" },  /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */
294
+    { 28599, "ISO-8859-9" },  /* ISO 8859-9 Turkish */
295
+    { 28603, "ISO-8859-13" }, /* ISO 8859-13 Estonian */
296
+    { 28605, "ISO-8859-15" }, /* ISO 8859-15 Latin 9 */
297
+    { 29001, NULL },          /* Europa 3 */
298
+    { 38598, NULL },          /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */
299
+    { 50220, "ISO-2022-JP" },   /* ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) (guess) */
300
+    { 50221, "ISO-2022-JP-2" }, /* ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) (guess) */
301
+    { 50222, "ISO-2022-JP-3" }, /* ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) (guess) */
302
+    { 50225, "ISO-2022-KR" }, /* ISO 2022 Korean */
303
+    { 50227, NULL },          /* ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) */
304
+    { 50229, NULL },          /* ISO 2022 Traditional Chinese */
305
+    { 50930, NULL },          /* EBCDIC Japanese (Katakana) Extended */
306
+    { 50931, NULL },          /* EBCDIC US-Canada and Japanese */
307
+    { 50933, NULL },          /* EBCDIC Korean Extended and Korean */
308
+    { 50935, NULL },          /* EBCDIC Simplified Chinese Extended and Simplified Chinese */
309
+    { 50936, NULL },          /* EBCDIC Simplified Chinese */
310
+    { 50937, NULL },          /* EBCDIC US-Canada and Traditional Chinese */
311
+    { 50939, NULL },          /* EBCDIC Japanese (Latin) Extended and Japanese */
312
+    { 51932, "EUC-JP" },      /* EUC Japanese */
313
+    { 51936, "EUC-CN" },      /* EUC Simplified Chinese; Chinese Simplified (EUC) */
314
+    { 51949, "EUC-KR" },      /* EUC Korean */
315
+    { 51950, NULL },          /* EUC Traditional Chinese */
316
+    { 52936, NULL },          /* HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) */
317
+    { 54936, "GB18030" },     /* Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) */
318
+    { 57002, NULL },          /* ISCII Devanagari */
319
+    { 57003, NULL },          /* ISCII Bengali */
320
+    { 57004, NULL },          /* ISCII Tamil */
321
+    { 57005, NULL },          /* ISCII Telugu */
322
+    { 57006, NULL },          /* ISCII Assamese */
323
+    { 57007, NULL },          /* ISCII Oriya */
324
+    { 57008, NULL },          /* ISCII Kannada */
325
+    { 57009, NULL },          /* ISCII Malayalam */
326
+    { 57010, NULL },          /* ISCII Gujarati */
327
+    { 57011, NULL },          /* ISCII Punjabi */
328
+    { 65000, "UTF-7" },       /* Unicode (UTF-7) */
329
+    { 65001, "UTF-8" }        /* Unicode (UTF-8) */
330
+};
331
+
332
+int cli_ole2_summary_json(cli_ctx *ctx, int fd, int mode);
333
+#endif /* HAVE_JSON */
334
+
335
+#endif /* __MSDOC_H_ */
... ...
@@ -36,10 +36,6 @@
36 36
 #include <unistd.h>
37 37
 #endif
38 38
 
39
-#if HAVE_ICONV
40
-#include <iconv.h>
41
-#endif
42
-
43 39
 #include "clamav.h"
44 40
 #include "cltypes.h"
45 41
 #include "others.h"
... ...
@@ -1317,1216 +1313,3 @@ abort:
1317 1317
 
1318 1318
     return ret == CL_BREAK ? CL_CLEAN : ret;
1319 1319
 }
1320
-
1321
-/* Summary and Document Information Parsing to JSON */
1322
-#if HAVE_JSON
1323
-
1324
-#define WINUNICODE 0x04B0
1325
-#define PROPCNTLIMIT 25
1326
-#define PROPSTRLIMIT 256 /* affects property strs, NOT sanitized strs (may result in a buffer allocating PROPSTRLIMIT*6) */
1327
-#define UTF16_MS "UTF-16LE"
1328
-
1329
-#define sum16_endian_convert(v) le16_to_host((uint16_t)(v))
1330
-#define sum32_endian_convert(v) le32_to_host((uint32_t)(v))
1331
-#define sum64_endian_convert(v) le64_to_host((uint64_t)(v))
1332
-
1333
-enum summary_pidsi {
1334
-    SPID_CODEPAGE   = 0x00000001,
1335
-    SPID_TITLE      = 0x00000002,
1336
-    SPID_SUBJECT    = 0x00000003,
1337
-    SPID_AUTHOR     = 0x00000004,
1338
-    SPID_KEYWORDS   = 0x00000005,
1339
-    SPID_COMMENTS   = 0x00000006,
1340
-    SPID_TEMPLATE   = 0x00000007,
1341
-    SPID_LASTAUTHOR = 0x00000008,
1342
-    SPID_REVNUMBER  = 0x00000009,
1343
-    SPID_EDITTIME   = 0x0000000A,
1344
-    SPID_LASTPRINTED  = 0x0000000B,
1345
-    SPID_CREATEDTIME  = 0x0000000C,
1346
-    SPID_MODIFIEDTIME = 0x0000000D,
1347
-    SPID_PAGECOUNT = 0x0000000E,
1348
-    SPID_WORDCOUNT = 0x0000000F,
1349
-    SPID_CHARCOUNT = 0x00000010,
1350
-    SPID_THUMBNAIL = 0x00000011,
1351
-    SPID_APPNAME   = 0x00000012,
1352
-    SPID_SECURITY  = 0x00000013
1353
-};
1354
-
1355
-enum docsum_pidsi {
1356
-    DSPID_CODEPAGE    = 0x00000001,
1357
-    DSPID_CATEGORY    = 0x00000002,
1358
-    DSPID_PRESFORMAT  = 0x00000003,
1359
-    DSPID_BYTECOUNT   = 0x00000004,
1360
-    DSPID_LINECOUNT   = 0x00000005,
1361
-    DSPID_PARCOUNT    = 0x00000006,
1362
-    DSPID_SLIDECOUNT  = 0x00000007,
1363
-    DSPID_NOTECOUNT   = 0x00000008,
1364
-    DSPID_HIDDENCOUNT = 0x00000009,
1365
-    DSPID_MMCLIPCOUNT = 0x0000000A,
1366
-    DSPID_SCALE       = 0x0000000B,
1367
-    DSPID_HEADINGPAIR = 0x0000000C, /* VT_VARIANT | VT_VECTOR */
1368
-    DSPID_DOCPARTS    = 0x0000000D, /* VT_VECTOR | VT_LPSTR */
1369
-    DSPID_MANAGER     = 0x0000000E,
1370
-    DSPID_COMPANY     = 0x0000000F,
1371
-    DSPID_LINKSDIRTY  = 0x00000010,
1372
-    DSPID_CCHWITHSPACES = 0x00000011,
1373
-    DSPID_SHAREDDOC   = 0x00000013, /* must be false */
1374
-    DSPID_LINKBASE    = 0x00000014, /* moved to user-defined */
1375
-    DSPID_HLINKS      = 0x00000015, /* moved to user-defined */
1376
-    DSPID_HYPERLINKSCHANGED = 0x00000016,
1377
-    DSPID_VERSION     = 0x00000017,
1378
-    DSPID_DIGSIG      = 0x00000018,
1379
-    DSPID_CONTENTTYPE   = 0x0000001A,
1380
-    DSPID_CONTENTSTATUS = 0x0000001B,
1381
-    DSPID_LANGUAGE      = 0x0000001C,
1382
-    DSPID_DOCVERSION    = 0x0000001D
1383
-};
1384
-
1385
-enum property_type {
1386
-    PT_EMPTY    = 0x0000,
1387
-    PT_NULL     = 0x0001,
1388
-    PT_INT16    = 0x0002,
1389
-    PT_INT32    = 0x0003,
1390
-    PT_FLOAT32  = 0x0004,
1391
-    PT_DOUBLE64 = 0x0005,
1392
-    PT_DATE     = 0x0007,
1393
-    PT_BSTR     = 0x0008,
1394
-    PT_BOOL    = 0x000B,
1395
-    PT_INT8v1  = 0x0010,
1396
-    PT_UINT8   = 0x0011,
1397
-    PT_UINT16  = 0x0012,
1398
-    PT_UINT32  = 0x0013,
1399
-    PT_INT64   = 0x0014,
1400
-    PT_UINT64  = 0x0015,
1401
-    PT_INT32v1  = 0x0016,
1402
-    PT_UINT32v1 = 0x0017,
1403
-    PT_LPSTR  = 0x001E,
1404
-    PT_LPWSTR = 0x001F,
1405
-    PT_FILETIME = 0x0040,
1406
-	
1407
-    /* More Types not currently handled */
1408
-};
1409
-
1410
-typedef struct summary_stub {
1411
-    uint16_t byte_order;
1412
-    uint16_t version;
1413
-    uint32_t system; /* implementation-specific */
1414
-    uint8_t CLSID[16];
1415
-
1416
-    uint32_t num_propsets; /* 1 or 2 */
1417
-} summary_stub_t;
1418
-
1419
-typedef struct propset_summary_entry {
1420
-    uint8_t FMTID[16];
1421
-    uint32_t offset;
1422
-} propset_entry_t;
1423
-
1424
-/* error codes */
1425
-#define OLE2_SUMMARY_ERROR_TOOSMALL      0x00000001
1426
-#define OLE2_SUMMARY_ERROR_OOB           0x00000002
1427
-#define OLE2_SUMMARY_ERROR_DATABUF       0x00000004
1428
-#define OLE2_SUMMARY_ERROR_INVALID_ENTRY 0x00000008
1429
-#define OLE2_SUMMARY_LIMIT_PROPS         0x00000010
1430
-#define OLE2_SUMMARY_FLAG_TIMEOUT        0x00000020
1431
-#define OLE2_SUMMARY_FLAG_CODEPAGE       0x00000040
1432
-#define OLE2_SUMMARY_FLAG_UNKNOWN_PROPID 0x00000080
1433
-#define OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE 0x00000100
1434
-#define OLE2_SUMMARY_FLAG_TRUNC_STR      0x00000200
1435
-
1436
-#define OLE2_CODEPAGE_ERROR_NOTFOUND     0x00000400
1437
-#define OLE2_CODEPAGE_ERROR_UNINITED     0x00000800
1438
-#define OLE2_CODEPAGE_ERROR_INVALID      0x00001000
1439
-#define OLE2_CODEPAGE_ERROR_INCOMPLETE   0x00002000
1440
-#define OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL 0x00002000
1441
-
1442
-/* metadata structures */
1443
-typedef struct summary_ctx {
1444
-    cli_ctx *ctx;
1445
-    int mode;
1446
-    fmap_t *sfmap;
1447
-    json_object *summary;
1448
-    size_t maplen;
1449
-    uint32_t flags;
1450
-
1451
-    /* propset metadata */
1452
-    uint32_t pssize; /* track from propset start, not tail start */
1453
-    uint16_t codepage;
1454
-    int writecp;
1455
-
1456
-    /* property metadata */
1457
-    const char *propname;
1458
-
1459
-    /* timeout meta */
1460
-    int toval;
1461
-} summary_ctx_t;
1462
-
1463
-/* string conversion */
1464
-struct codepage_entry {
1465
-    int16_t codepage;
1466
-    const char *encoding;
1467
-};
1468
-
1469
-#define NUMCODEPAGES sizeof(codepage_entries)/sizeof(struct codepage_entry)
1470
-/* MAINTAIN - the array in codepage value sorted order */
1471
-static const struct codepage_entry codepage_entries[] = {
1472
-    { 37,    "IBM037" },      /* IBM EBCDIC US-Canada */
1473
-    { 437,   "IBM437" },      /* OEM United States */
1474
-    { 500,   "IBM500" },      /* IBM EBCDIC International */
1475
-    { 708,   "ASMO-708" },    /* Arabic (ASMO 708) */
1476
-    { 709,   NULL },          /* Arabic (ASMO-449+, BCON V4) */
1477
-    { 710,   NULL },          /* Arabic - Transparent Arabic */
1478
-    { 720,   NULL },          /* Arabic (Transparent ASMO); Arabic (DOS) */
1479
-    { 737,   NULL },          /* OEM Greek (formerly 437G); Greek (DOS) */
1480
-    { 775,   "IBM775" },      /* OEM Baltic; Baltic (DOS) */
1481
-    { 850,   "IBM850" },      /* OEM Multilingual Latin 1; Western European (DOS) */
1482
-    { 852,   "IBM852" },      /* OEM Latin 2; Central European (DOS) */
1483
-    { 855,   "IBM855" },      /* OEM Cyrillic (primarily Russian) */
1484
-    { 857,   "IBM857" },      /* OEM Turkish; Turkish (DOS) */
1485
-    { 858,   NULL },          /* OEM Multilingual Latin 1 + Euro symbol */
1486
-    { 860,   "IBM860" },      /* OEM Portuguese; Portuguese (DOS) */
1487
-    { 861,   "IBM861" },      /* OEM Icelandic; Icelandic (DOS) */
1488
-    { 862,   NULL },          /* OEM Hebrew; Hebrew (DOS) */
1489
-    { 863,   "IBM863" },      /* OEM French Canadian; French Canadian (DOS) */
1490
-    { 864,   "IBM864" },      /* OEM Arabic; Arabic (864) */
1491
-    { 865,   "IBM865" },      /* OEM Nordic; Nordic (DOS) */
1492
-    { 866,   "CP866" },       /* OEM Russian; Cyrillic (DOS) */
1493
-    { 869,   "IBM869" },      /* OEM Modern Greek; Greek, Modern (DOS) */
1494
-    { 870,   "IBM870" },      /* IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 */
1495
-    { 874,   "WINDOWS-874" }, /* ANSI/OEM Thai (ISO 8859-11); Thai (Windows) */
1496
-    { 875,   "CP875" },       /* IBM EBCDIC Greek Modern */
1497
-    { 932,   "SHIFT_JIS" },   /* ANSI/OEM Japanese; Japanese (Shift-JIS) */
1498
-    { 936,   "GB2312" },      /* ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) */
1499
-    { 949,   NULL },          /* ANSI/OEM Korean (Unified Hangul Code) */
1500
-    { 950,   "BIG5" },        /* ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) */
1501
-    { 1026,  "IBM1026" },     /* IBM EBCDIC Turkish (Latin 5) */
1502
-    { 1047,  NULL },          /* IBM EBCDIC Latin 1/Open System */
1503
-    { 1140,  NULL },          /* IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) */
1504
-    { 1141,  NULL },          /* IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) */
1505
-    { 1142,  NULL },          /* IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) */
1506
-    { 1143,  NULL },          /* IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) */
1507
-    { 1144,  NULL },          /* IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) */
1508
-    { 1145,  NULL },          /* IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) */
1509
-    { 1146,  NULL },          /* IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) */
1510
-    { 1147,  NULL },          /* IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) */
1511
-    { 1148,  NULL },          /* IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) */
1512
-    { 1149,  NULL },          /* IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) */
1513
-    { 1200,  "UTF-16LE" },    /* Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications */
1514
-    { 1201,  "UTF-16BE" },    /* Unicode UTF-16, big endian byte order; available only to managed applications */
1515
-    { 1250,  "WINDOWS-1250" }, /* ANSI Central European; Central European (Windows) */
1516
-    { 1251,  "WINDOWS-1251" }, /* ANSI Cyrillic; Cyrillic (Windows) */
1517
-    { 1252,  "WINDOWS-1252" }, /* ANSI Latin 1; Western European (Windows) */
1518
-    { 1253,  "WINDOWS-1253" }, /* ANSI Greek; Greek (Windows) */
1519
-    { 1254,  "WINDOWS-1254" }, /* ANSI Turkish; Turkish (Windows) */
1520
-    { 1255,  "WINDOWS-1255" }, /* ANSI Hebrew; Hebrew (Windows) */
1521
-    { 1256,  "WINDOWS-1256" }, /* ANSI Arabic; Arabic (Windows) */
1522
-    { 1257,  "WINDOWS-1257" }, /* ANSI Baltic; Baltic (Windows) */
1523
-    { 1258,  "WINDOWS-1258" }, /* ANSI/OEM Vietnamese; Vietnamese (Windows) */
1524
-    { 1361,  "JOHAB" },       /* Korean (Johab) */
1525
-    { 10000, "MACINTOSH" },   /* MAC Roman; Western European (Mac) */
1526
-    { 10001, NULL },          /* Japanese (Mac) */
1527
-    { 10002, NULL },          /* MAC Traditional Chinese (Big5); Chinese Traditional (Mac) */
1528
-    { 10003, NULL },          /* Korean (Mac) */
1529
-    { 10004, NULL },          /* Arabic (Mac) */
1530
-    { 10005, NULL },          /* Hebrew (Mac) */
1531
-    { 10006, NULL },          /* Greek (Mac) */
1532
-    { 10007, NULL },          /* Cyrillic (Mac) */
1533
-    { 10008, NULL },          /* MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) */
1534
-    { 10010, NULL },          /* Romanian (Mac) */
1535
-    { 10017, NULL },          /* Ukrainian (Mac) */
1536
-    { 10021, NULL },          /* Thai (Mac) */
1537
-    { 10029, NULL },          /* MAC Latin 2; Central European (Mac) */
1538
-    { 10079, NULL },          /* Icelandic (Mac) */
1539
-    { 10081, NULL },          /* Turkish (Mac) */
1540
-    { 10082, NULL },          /* Croatian (Mac) */
1541
-    { 12000, "UTF-32LE" },    /* Unicode UTF-32, little endian byte order; available only to managed applications */
1542
-    { 12001, "UTF-32BE" },    /* Unicode UTF-32, big endian byte order; available only to managed applications */
1543
-    { 20000, NULL },          /* CNS Taiwan; Chinese Traditional (CNS) */
1544
-    { 20001, NULL },          /* TCA Taiwan */
1545
-    { 20002, NULL },          /* Eten Taiwan; Chinese Traditional (Eten) */
1546
-    { 20003, NULL },          /* IBM5550 Taiwan */
1547
-    { 20004, NULL },          /* TeleText Taiwan */
1548
-    { 20005, NULL },          /* Wang Taiwan */
1549
-    { 20105, NULL },          /* IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) */
1550
-    { 20106, NULL },          /* IA5 German (7-bit) */
1551
-    { 20107, NULL },          /* IA5 Swedish (7-bit) */
1552
-    { 20108, NULL },          /* IA5 Norwegian (7-bit) */
1553
-    { 20127, "US-ASCII" },    /* US-ASCII (7-bit) */
1554
-    { 20261, NULL },          /* T.61 */
1555
-    { 20269, NULL },          /* ISO 6937 Non-Spacing Accent */
1556
-    { 20273, "IBM273" },      /* IBM EBCDIC Germany */
1557
-    { 20277, "IBM277" },      /* IBM EBCDIC Denmark-Norway */
1558
-    { 20278, "IBM278" },      /* IBM EBCDIC Finland-Sweden */
1559
-    { 20280, "IBM280" },      /* IBM EBCDIC Italy */
1560
-    { 20284, "IBM284" },      /* IBM EBCDIC Latin America-Spain */
1561
-    { 20285, "IBM285" },      /* IBM EBCDIC United Kingdom */
1562
-    { 20290, "IBM290" },      /* IBM EBCDIC Japanese Katakana Extended */
1563
-    { 20297, "IBM297" },      /* IBM EBCDIC France */
1564
-    { 20420, "IBM420" },      /* IBM EBCDIC Arabic */
1565
-    { 20423, "IBM423" },      /* IBM EBCDIC Greek */
1566
-    { 20424, "IBM424" },      /* IBM EBCDIC Hebrew */
1567
-    { 20833, NULL },          /* IBM EBCDIC Korean Extended */
1568
-    { 20838, NULL },          /* IBM EBCDIC Thai */
1569
-    { 20866, "KOI8-R" },      /* Russian (KOI8-R); Cyrillic (KOI8-R) */
1570
-    { 20871, "IBM871" },      /* IBM EBCDIC Icelandic */
1571
-    { 20880, "IBM880" },      /* IBM EBCDIC Cyrillic Russian */
1572
-    { 20905, "IBM905" },      /* IBM EBCDIC Turkish */
1573
-    { 20924, NULL },          /* IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) */
1574
-    { 20932, "EUC-JP" },      /* Japanese (JIS 0208-1990 and 0212-1990) */
1575
-    { 20936, NULL },          /* Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) */
1576
-    { 20949, NULL },          /* Korean Wansung */
1577
-    { 21025, "CP1025" },      /* IBM EBCDIC Cyrillic Serbian-Bulgarian */
1578
-    { 21027, NULL },          /* (deprecated) */
1579
-    { 21866, "KOI8-U" },      /* Ukrainian (KOI8-U); Cyrillic (KOI8-U) */
1580
-    { 28591, "ISO-8859-1" },  /* ISO 8859-1 Latin 1; Western European (ISO) */
1581
-    { 28592, "ISO-8859-2" },  /* ISO 8859-2 Central European; Central European (ISO) */
1582
-    { 28593, "ISO-8859-3" },  /* ISO 8859-3 Latin 3 */
1583
-    { 28594, "ISO-8859-4" },  /* ISO 8859-4 Baltic */
1584
-    { 28595, "ISO-8859-5" },  /* ISO 8859-5 Cyrillic */
1585
-    { 28596, "ISO-8859-6" },  /* ISO 8859-6 Arabic */
1586
-    { 28597, "ISO-8859-7" },  /* ISO 8859-7 Greek */
1587
-    { 28598, "ISO-8859-8" },  /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */
1588
-    { 28599, "ISO-8859-9" },  /* ISO 8859-9 Turkish */
1589
-    { 28603, "ISO-8859-13" }, /* ISO 8859-13 Estonian */
1590
-    { 28605, "ISO-8859-15" }, /* ISO 8859-15 Latin 9 */
1591
-    { 29001, NULL },          /* Europa 3 */
1592
-    { 38598, NULL },          /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */
1593
-    { 50220, "ISO-2022-JP" },   /* ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) (guess) */
1594
-    { 50221, "ISO-2022-JP-2" }, /* ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) (guess) */
1595
-    { 50222, "ISO-2022-JP-3" }, /* ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) (guess) */
1596
-    { 50225, "ISO-2022-KR" }, /* ISO 2022 Korean */
1597
-    { 50227, NULL },          /* ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) */
1598
-    { 50229, NULL },          /* ISO 2022 Traditional Chinese */
1599
-    { 50930, NULL },          /* EBCDIC Japanese (Katakana) Extended */
1600
-    { 50931, NULL },          /* EBCDIC US-Canada and Japanese */
1601
-    { 50933, NULL },          /* EBCDIC Korean Extended and Korean */
1602
-    { 50935, NULL },          /* EBCDIC Simplified Chinese Extended and Simplified Chinese */
1603
-    { 50936, NULL },          /* EBCDIC Simplified Chinese */
1604
-    { 50937, NULL },          /* EBCDIC US-Canada and Traditional Chinese */
1605
-    { 50939, NULL },          /* EBCDIC Japanese (Latin) Extended and Japanese */
1606
-    { 51932, "EUC-JP" },      /* EUC Japanese */
1607
-    { 51936, "EUC-CN" },      /* EUC Simplified Chinese; Chinese Simplified (EUC) */
1608
-    { 51949, "EUC-KR" },      /* EUC Korean */
1609
-    { 51950, NULL },          /* EUC Traditional Chinese */
1610
-    { 52936, NULL },          /* HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) */
1611
-    { 54936, "GB18030" },     /* Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) */
1612
-    { 57002, NULL },          /* ISCII Devanagari */
1613
-    { 57003, NULL },          /* ISCII Bengali */
1614
-    { 57004, NULL },          /* ISCII Tamil */
1615
-    { 57005, NULL },          /* ISCII Telugu */
1616
-    { 57006, NULL },          /* ISCII Assamese */
1617
-    { 57007, NULL },          /* ISCII Oriya */
1618
-    { 57008, NULL },          /* ISCII Kannada */
1619
-    { 57009, NULL },          /* ISCII Malayalam */
1620
-    { 57010, NULL },          /* ISCII Gujarati */
1621
-    { 57011, NULL },          /* ISCII Punjabi */
1622
-    { 65000, "UTF-7" },       /* Unicode (UTF-7) */
1623
-    { 65001, "UTF-8" }        /* Unicode (UTF-8) */
1624
-};
1625
-
1626
-static char *
1627
-ole2_convert_utf(summary_ctx_t *sctx, char *begin, size_t sz, const char *encoding)
1628
-{
1629
-    char *outbuf=NULL;
1630
-#if HAVE_ICONV
1631
-    char *buf, *p1, *p2;
1632
-    off_t offset;
1633
-    size_t inlen, outlen, nonrev, sz2;
1634
-    int i, try;
1635
-    iconv_t cd;
1636
-#endif
1637
-    /* applies in the both case */
1638
-    if (sctx->codepage == 20127 || sctx->codepage == 65001) {
1639
-        outbuf = cli_strdup(begin);
1640
-        return outbuf;
1641
-    }
1642
-
1643
-#if HAVE_ICONV
1644
-    p1 = buf = cli_calloc(1, sz);
1645
-    if (!(buf))
1646
-        return NULL;
1647
-
1648
-    memcpy(buf, begin, sz);
1649
-    inlen = sz;
1650
-
1651
-    /* encoding lookup if not specified */
1652
-    if (!encoding) {
1653
-        for (i = 0; i < NUMCODEPAGES; ++i) {
1654
-            if (sctx->codepage == codepage_entries[i].codepage)
1655
-                encoding = codepage_entries[i].encoding;
1656
-            else if (sctx->codepage < codepage_entries[i].codepage) {
1657
-                /* assuming sorted array */
1658
-                break;
1659
-            }
1660
-        }
1661
-
1662
-        if (!encoding) {
1663
-            cli_warnmsg("ole2_convert_utf: could not locate codepage encoding for %d\n", sctx->codepage);
1664
-            sctx->flags |= OLE2_CODEPAGE_ERROR_NOTFOUND;
1665
-            free(buf);
1666
-            return NULL;
1667
-        }
1668
-    }
1669
-
1670
-    cd = iconv_open("UTF-8", encoding);
1671
-    if (cd == (iconv_t)(-1)) {
1672
-        cli_errmsg("ole2_convert_utf: could not initialize iconv\n");
1673
-        sctx->flags |= OLE2_CODEPAGE_ERROR_UNINITED;
1674
-    }
1675
-    else {
1676
-        offset = 0;
1677
-        for (try = 1; try <= 3; ++try) {
1678
-            /* charset to UTF-8 should never exceed sz*6 */
1679
-            sz2 = (try*2) * sz;
1680
-            /* use cli_realloc, reuse the buffer that has already been translated */
1681
-            outbuf = (char *)cli_realloc(outbuf, sz2+1);
1682
-            if (!outbuf) {
1683
-                free(buf);
1684
-                return NULL;
1685
-            }
1686
-
1687
-            outlen = sz2 - offset;
1688
-            p2 = outbuf + offset;
1689
-
1690
-            /* conversion */
1691
-            nonrev = iconv(cd, &p1, &inlen, &p2, &outlen);
1692
-
1693
-            if (errno == EILSEQ) {
1694
-                cli_dbgmsg("ole2_convert_utf: input buffer contains invalid character for its encoding\n");
1695
-                sctx->flags |= OLE2_CODEPAGE_ERROR_INVALID;
1696
-                break;
1697
-            }
1698
-            else if (errno == EINVAL && nonrev == (size_t)-1) {
1699
-                cli_dbgmsg("ole2_convert_utf: input buffer contains incomplete multibyte character\n");
1700
-                sctx->flags |= OLE2_CODEPAGE_ERROR_INCOMPLETE;
1701
-                break;
1702
-            }
1703
-            else if (inlen == 0) {
1704
-                //cli_dbgmsg("ole2_convert_utf: input buffer is successfully translated\n");
1705
-                break;
1706
-            }
1707
-
1708
-            //outbuf[sz2 - outlen] = '\0';
1709
-            //cli_dbgmsg("%u %s\n", inlen, outbuf);
1710
-
1711
-            offset = sz2 - outlen;
1712
-            if (try < 3)
1713
-                cli_dbgmsg("ole2_convert_utf: outbuf is too small, resizing %llu -> %llu\n",
1714
-                           (long long unsigned)((try*2) * sz), (long long unsigned)(((try+1)*2) * sz));
1715
-        }
1716
-
1717
-        if (errno == E2BIG && nonrev == (size_t)-1) {
1718
-            cli_dbgmsg("ole2_convert_utf: buffer could not be fully translated\n");
1719
-            sctx->flags |= OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL;
1720
-        }
1721
-
1722
-        outbuf[sz2 - outlen] = '\0';
1723
-    }
1724
-
1725
-    iconv_close(cd);
1726
-    free(buf);
1727
-#endif
1728
-    /* this should force base64 encoding if NULL */
1729
-    return outbuf;
1730
-}
1731
-
1732
-static int
1733
-ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offset)
1734
-{
1735
-    uint16_t proptype, padding;
1736
-    int ret = CL_SUCCESS;
1737
-
1738
-    if (cli_json_timeout_cycle_check(sctx->ctx, &(sctx->toval)) != CL_SUCCESS) {
1739
-        sctx->flags |= OLE2_SUMMARY_FLAG_TIMEOUT;
1740
-        return CL_ETIMEOUT;
1741
-    }
1742
-
1743
-    if (offset+sizeof(proptype)+sizeof(padding) > sctx->pssize) {
1744
-        sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
1745
-        return CL_EFORMAT;
1746
-    }
1747
-
1748
-    memcpy(&proptype, databuf+offset, sizeof(proptype));
1749
-    offset+=sizeof(proptype);
1750
-    memcpy(&padding, databuf+offset, sizeof(padding));
1751
-    offset+=sizeof(padding);
1752
-    /* endian conversion */
1753
-    proptype = sum16_endian_convert(proptype);
1754
-
1755
-    //cli_dbgmsg("proptype: 0x%04x\n", proptype);
1756
-    if (padding != 0) {
1757
-        cli_dbgmsg("ole2_process_property: invalid padding value, non-zero\n");
1758
-        sctx->flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
1759
-        return CL_EFORMAT;
1760
-    }
1761
-
1762
-    switch (proptype) {
1763
-    case PT_EMPTY:
1764
-    case PT_NULL:
1765
-        ret = cli_jsonnull(sctx->summary, sctx->propname);
1766
-        break;
1767
-    case PT_INT16:
1768
-	{
1769
-            int16_t dout;
1770
-            if (offset+sizeof(dout) > sctx->pssize) {
1771
-                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
1772
-                return CL_EFORMAT;
1773
-            }
1774
-            memcpy(&dout, databuf+offset, sizeof(dout));
1775
-            offset+=sizeof(dout);
1776
-            /* endian conversion */
1777
-            dout = sum16_endian_convert(dout);
1778
-
1779
-            if (sctx->writecp) {
1780
-                sctx->codepage = (uint16_t)dout;
1781
-                ret = cli_jsonint(sctx->summary, sctx->propname, sctx->codepage);
1782
-            }
1783
-            else
1784
-                ret = cli_jsonint(sctx->summary, sctx->propname, dout);
1785
-            break;
1786
-	}
1787
-    case PT_INT32:
1788
-    case PT_INT32v1:
1789
-	{
1790
-            int32_t dout;
1791
-            if (offset+sizeof(dout) > sctx->pssize) {
1792
-                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
1793
-                return CL_EFORMAT;
1794
-            }
1795
-            memcpy(&dout, databuf+offset, sizeof(dout));
1796
-            offset+=sizeof(dout);
1797
-            /* endian conversion */
1798
-            dout = sum32_endian_convert(dout);
1799
-
1800
-            ret = cli_jsonint(sctx->summary, sctx->propname, dout);
1801
-            break;
1802
-	}
1803
-    case PT_FLOAT32: /* review this please */
1804
-	{
1805
-            float dout;
1806
-            if (offset+sizeof(dout) > sctx->pssize) {
1807
-                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
1808
-                return CL_EFORMAT;
1809
-            }
1810
-            memcpy(&dout, databuf+offset, sizeof(dout));
1811
-            offset+=sizeof(dout);
1812
-            /* endian conversion */
1813
-            dout = sum32_endian_convert(dout);
1814
-
1815
-            ret = cli_jsondouble(sctx->summary, sctx->propname, dout);
1816
-            break;
1817
-	}
1818
-    case PT_DATE:
1819
-    case PT_DOUBLE64: /* review this please */
1820
-	{
1821
-            double dout;
1822
-            if (offset+sizeof(dout) > sctx->pssize) {
1823
-                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
1824
-                return CL_EFORMAT;
1825
-            }
1826
-            memcpy(&dout, databuf+offset, sizeof(dout));
1827
-            offset+=sizeof(dout);
1828
-            /* endian conversion */
1829
-            dout = sum64_endian_convert(dout);
1830
-
1831
-            ret = cli_jsondouble(sctx->summary, sctx->propname, dout);
1832
-            break;
1833
-	}
1834
-    case PT_BOOL:
1835
-	{
1836
-            uint16_t dout;
1837
-            if (offset+sizeof(dout) > sctx->pssize) {
1838
-                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
1839
-                return CL_EFORMAT;
1840
-            }
1841
-            memcpy(&dout, databuf+offset, sizeof(dout));
1842
-            offset+=sizeof(dout);
1843
-            /* no need for endian conversion */
1844
-
1845
-            ret = cli_jsonbool(sctx->summary, sctx->propname, dout);
1846
-            break;
1847
-	}
1848
-    case PT_INT8v1:
1849
-	{
1850
-            int8_t dout;
1851
-            if (offset+sizeof(dout) > sctx->pssize) {
1852
-                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
1853
-                return CL_EFORMAT;
1854
-            }
1855
-            memcpy(&dout, databuf+offset, sizeof(dout));
1856
-            offset+=sizeof(dout);
1857
-            /* no need for endian conversion */
1858
-
1859
-            ret = cli_jsonint(sctx->summary, sctx->propname, dout);
1860
-            break;
1861
-	}
1862
-    case PT_UINT8:
1863
-	{
1864
-            uint8_t dout;
1865
-            if (offset+sizeof(dout) > sctx->pssize) {
1866
-                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
1867
-                return CL_EFORMAT;
1868
-            }
1869
-            memcpy(&dout, databuf+offset, sizeof(dout));
1870
-            offset+=sizeof(dout);
1871
-            /* no need for endian conversion */
1872
-
1873
-            ret = cli_jsonint(sctx->summary, sctx->propname, dout);
1874
-            break;
1875
-	}
1876
-    case PT_UINT16:
1877
-	{
1878
-            uint16_t dout;
1879
-            if (offset+sizeof(dout) > sctx->pssize) {
1880
-                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
1881
-                return CL_EFORMAT;
1882
-            }
1883
-            memcpy(&dout, databuf+offset, sizeof(dout));
1884
-            offset+=sizeof(dout);
1885
-            /* endian conversion */
1886
-            dout = sum16_endian_convert(dout);
1887
-
1888
-            if (sctx->writecp)
1889
-                sctx->codepage = dout;
1890
-
1891
-            ret = cli_jsonint(sctx->summary, sctx->propname, dout);
1892
-            break;
1893
-	}
1894
-    case PT_UINT32:
1895
-    case PT_UINT32v1:
1896
-	{
1897
-            uint32_t dout;
1898
-            if (offset+sizeof(dout) > sctx->pssize) {
1899
-                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
1900
-                return CL_EFORMAT;
1901
-            }
1902
-            memcpy(&dout, databuf+offset, sizeof(dout));
1903
-            offset+=sizeof(dout);
1904
-            /* endian conversion */
1905
-            dout = sum32_endian_convert(dout);
1906
-
1907
-            ret = cli_jsonint(sctx->summary, sctx->propname, dout);
1908
-            break;
1909
-	}
1910
-    case PT_INT64:
1911
-	{
1912
-            int64_t dout;
1913
-            if (offset+sizeof(dout) > sctx->pssize) {
1914
-                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
1915
-                return CL_EFORMAT;
1916
-            }
1917
-            memcpy(&dout, databuf+offset, sizeof(dout));
1918
-            offset+=sizeof(dout);
1919
-            /* endian conversion */
1920
-            dout = sum64_endian_convert(dout);
1921
-
1922
-            ret = cli_jsonint64(sctx->summary, sctx->propname, dout);
1923
-            break;
1924
-	}
1925
-    case PT_UINT64:
1926
-	{
1927
-            uint64_t dout;
1928
-            if (offset+sizeof(dout) > sctx->pssize) {
1929
-                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
1930
-                return CL_EFORMAT;
1931
-            }
1932
-            memcpy(&dout, databuf+offset, sizeof(dout));
1933
-            offset+=sizeof(dout);
1934
-            /* endian conversion */
1935
-            dout = sum64_endian_convert(dout);
1936
-
1937
-            ret = cli_jsonint64(sctx->summary, sctx->propname, dout);
1938
-            break;
1939
-	}
1940
-    case PT_BSTR:
1941
-    case PT_LPSTR:
1942
-        if (sctx->codepage == 0) {
1943
-            cli_dbgmsg("ole2_propset_json: current codepage is unknown, cannot parse char stream\n");
1944
-            sctx->flags |= OLE2_SUMMARY_FLAG_CODEPAGE;
1945
-        }
1946
-        else {
1947
-            uint32_t strsize;
1948
-            char *outstr, *outstr2;
1949
-
1950
-            if (offset+sizeof(strsize) > sctx->pssize) {
1951
-                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
1952
-                return CL_EFORMAT;
1953
-            }
1954
-
1955
-            memcpy(&strsize, databuf+offset, sizeof(strsize));
1956
-            offset+=sizeof(strsize);
1957
-            /* endian conversion? */
1958
-            strsize = sum32_endian_convert(strsize);
1959
-
1960
-            if (offset+strsize > sctx->pssize) {
1961
-                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
1962
-                return CL_EFORMAT;
1963
-            }
1964
-
1965
-            /* limitation on string length */
1966
-            if (strsize > PROPSTRLIMIT) {
1967
-                cli_dbgmsg("ole2_process_property: property string sized %lu truncated to size %lu\n",
1968
-                           (unsigned long)strsize, (unsigned long)PROPSTRLIMIT);
1969
-                sctx->flags |= OLE2_SUMMARY_FLAG_TRUNC_STR;
1970
-                strsize = PROPSTRLIMIT;
1971
-            }
1972
-
1973
-            outstr = cli_calloc(strsize+1, 1); /* last char must be NULL */
1974
-            if (!outstr) {
1975
-                return CL_EMEM;
1976
-            }
1977
-            strncpy(outstr, (const char *)(databuf+offset), strsize);
1978
-
1979
-            /* conversion of various encodings to UTF-8 */
1980
-            outstr2 = ole2_convert_utf(sctx, outstr, strsize, NULL);
1981
-            if (!outstr2) {
1982
-                /* use base64 encoding when all else fails! */
1983
-                char b64jstr[PROPSTRLIMIT];
1984
-
1985
-                /* outstr2 should be 4/3 times the original (rounded up) */
1986
-                outstr2 = cl_base64_encode(outstr, strsize);
1987
-                if (!outstr2) {
1988
-                    cli_dbgmsg("ole2_process_property: failed to convert to base64 string\n");
1989
-                    return CL_EMEM;
1990
-                }
1991
-
1992
-                snprintf(b64jstr, PROPSTRLIMIT, "%s_base64", sctx->propname);
1993
-                ret = cli_jsonbool(sctx->summary, b64jstr, 1);
1994
-                if (ret != CL_SUCCESS)
1995
-                    return ret;
1996
-            }
1997
-
1998
-            ret = cli_jsonstr(sctx->summary, sctx->propname, outstr2);
1999
-            free(outstr);
2000
-            free(outstr2);
2001
-        }
2002
-        break;
2003
-    case PT_LPWSTR:
2004
-	{
2005
-            uint32_t strsize;
2006
-            char *outstr, *outstr2;
2007
-
2008
-            if (offset+sizeof(strsize) > sctx->pssize) {
2009
-                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
2010
-                return CL_EFORMAT;
2011
-            }
2012
-            memcpy(&strsize, databuf+offset, sizeof(strsize));
2013
-            offset+=sizeof(strsize);
2014
-            /* endian conversion; wide strings are by length, not size (x2) */
2015
-            strsize = sum32_endian_convert(strsize)*2;
2016
-
2017
-            /* limitation on string length */
2018
-            if (strsize > (2*PROPSTRLIMIT)) {
2019
-                cli_dbgmsg("ole2_process_property: property string sized %lu truncated to size %lu\n",
2020
-                           (unsigned long)strsize, (unsigned long)(2*PROPSTRLIMIT));
2021
-                sctx->flags |= OLE2_SUMMARY_FLAG_TRUNC_STR;
2022
-                strsize = (2*PROPSTRLIMIT);
2023
-            }
2024
-
2025
-            if (offset+strsize > sctx->pssize) {
2026
-                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
2027
-                return CL_EFORMAT;
2028
-            }
2029
-            outstr = cli_calloc(strsize+2, 1); /* last two chars must be NULL */
2030
-            if (!outstr) {
2031
-                return CL_EMEM;
2032
-            }
2033
-            memcpy(outstr, (const char *)(databuf+offset), strsize);
2034
-            /* conversion of 16-width char strings (UTF-16 or UTF-16LE??) to UTF-8 */
2035
-            outstr2 = ole2_convert_utf(sctx, outstr, strsize, UTF16_MS);
2036
-            if (!outstr2) {
2037
-                /* use base64 encoding when all else fails! */
2038
-                char b64jstr[PROPSTRLIMIT];
2039
-
2040
-                outstr2 = cl_base64_encode(outstr, strsize);
2041
-                if (!outstr2) {
2042
-                    free(outstr);
2043
-                    return CL_EMEM;
2044
-                }
2045
-
2046
-                snprintf(b64jstr, PROPSTRLIMIT, "%s_base64", sctx->propname);
2047
-                ret = cli_jsonbool(sctx->summary, b64jstr, 1);
2048
-                if (ret != CL_SUCCESS)
2049
-                    return ret;
2050
-            }
2051
-
2052
-            ret = cli_jsonstr(sctx->summary, sctx->propname, outstr2);
2053
-            free(outstr);
2054
-            free(outstr2);
2055
-            break;
2056
-	}
2057
-    case PT_FILETIME:
2058
-	{
2059
-            uint32_t ltime, htime;
2060
-            uint64_t wtime = 0, utime =0;
2061
-
2062
-            if (offset+sizeof(ltime)+sizeof(htime) > sctx->pssize) {
2063
-                sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
2064
-                return CL_EFORMAT;
2065
-            }
2066
-            memcpy(&ltime, databuf+offset, sizeof(ltime));
2067
-            offset+=sizeof(ltime);
2068
-            memcpy(&htime, databuf+offset, sizeof(htime));
2069
-            offset+=sizeof(ltime);
2070
-            ltime = sum32_endian_convert(ltime);
2071
-            htime = sum32_endian_convert(htime);
2072
-
2073
-            /* UNIX timestamp formatting */
2074
-            wtime = htime;
2075
-            wtime <<= 32;
2076
-            wtime |= ltime;
2077
-
2078
-            utime = wtime / 10000000;
2079
-            utime -= 11644473600LL;
2080
-
2081
-            if ((uint32_t)((utime & 0xFFFFFFFF00000000) >> 32)) {
2082
-                cli_dbgmsg("ole2_process_property: UNIX timestamp is larger than 32-bit number\n");
2083
-            }
2084
-            else {
2085
-                ret = cli_jsonint(sctx->summary, sctx->propname, (uint32_t)(utime & 0xFFFFFFFF));
2086
-            }
2087
-            break;
2088
-	}
2089
-    default:
2090
-        cli_dbgmsg("ole2_process_property: unhandled property type 0x%04x for %s property\n", 
2091
-                   proptype, sctx->propname);
2092
-        sctx->flags |= OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE;
2093
-    }
2094
-
2095
-    return ret;
2096
-}
2097
-
2098
-static void ole2_translate_docsummary_propid(summary_ctx_t *sctx, uint32_t propid)
2099
-{
2100
-    switch(propid) {
2101
-    case DSPID_CODEPAGE:
2102
-        sctx->writecp = 1; /* must be set ONLY for codepage */
2103
-        sctx->propname = "CodePage";
2104
-        break;
2105
-    case DSPID_CATEGORY:
2106
-        sctx->propname = "Category";
2107
-        break;
2108
-    case DSPID_PRESFORMAT:
2109
-        sctx->propname = "PresentationTarget";
2110
-        break;
2111
-    case DSPID_BYTECOUNT:
2112
-        sctx->propname = "Bytes";
2113
-        break;
2114
-    case DSPID_LINECOUNT:
2115
-        sctx->propname = "Lines";
2116
-        break;
2117
-    case DSPID_PARCOUNT:
2118
-        sctx->propname = "Paragraphs";
2119
-        break;
2120
-    case DSPID_SLIDECOUNT:
2121
-        sctx->propname = "Slides";
2122
-        break;
2123
-    case DSPID_NOTECOUNT:
2124
-        sctx->propname = "Notes";
2125
-        break;
2126
-    case DSPID_HIDDENCOUNT:
2127
-        sctx->propname = "HiddenSlides";
2128
-        break;
2129
-    case DSPID_MMCLIPCOUNT:
2130
-        sctx->propname = "MMClips";
2131
-        break;
2132
-    case DSPID_SCALE:
2133
-        sctx->propname = "Scale";
2134
-        break;
2135
-    case DSPID_HEADINGPAIR: /* VT_VARIANT | VT_VECTOR */
2136
-        sctx->propname = "HeadingPairs";
2137
-        break;
2138
-    case DSPID_DOCPARTS:    /* VT_VECTOR | VT_LPSTR */
2139
-        sctx->propname = "DocPartTitles";
2140
-        break;
2141
-    case DSPID_MANAGER:
2142
-        sctx->propname = "Manager";
2143
-        break;
2144
-    case DSPID_COMPANY:
2145
-        sctx->propname = "Company";
2146
-        break;
2147
-    case DSPID_LINKSDIRTY:
2148
-        sctx->propname = "LinksDirty";
2149
-        break;
2150
-    case DSPID_CCHWITHSPACES:
2151
-        sctx->propname = "Char&WSCount";
2152
-        break;
2153
-    case DSPID_SHAREDDOC:   /* SHOULD BE FALSE! */
2154
-        sctx->propname = "SharedDoc";
2155
-        break;
2156
-    case DSPID_LINKBASE:    /* moved to user-defined */
2157
-        sctx->propname = "LinkBase";
2158
-        break;
2159
-    case DSPID_HLINKS:      /* moved to user-defined */
2160
-        sctx->propname = "HyperLinks";
2161
-        break;
2162
-    case DSPID_HYPERLINKSCHANGED:
2163
-        sctx->propname = "HyperLinksChanged";
2164
-        break;
2165
-    case DSPID_VERSION:
2166
-        sctx->propname = "Version";
2167
-        break;
2168
-    case DSPID_DIGSIG:
2169
-        sctx->propname = "DigitalSig";
2170
-        break;
2171
-    case DSPID_CONTENTTYPE:
2172
-        sctx->propname = "ContentType";
2173
-        break;
2174
-    case DSPID_CONTENTSTATUS:
2175
-        sctx->propname = "ContentStatus";
2176
-        break;
2177
-    case DSPID_LANGUAGE:
2178
-        sctx->propname = "Language";
2179
-        break;
2180
-    case DSPID_DOCVERSION:
2181
-        sctx->propname = "DocVersion";
2182
-        break;
2183
-    default:
2184
-        cli_dbgmsg("ole2_docsum_propset_json: unrecognized propid!\n");
2185
-        sctx->flags |= OLE2_SUMMARY_FLAG_UNKNOWN_PROPID;
2186
-    }
2187
-}
2188
-
2189
-static void ole2_translate_summary_propid(summary_ctx_t *sctx, uint32_t propid)
2190
-{
2191
-    switch(propid) {
2192
-    case SPID_CODEPAGE:
2193
-        sctx->writecp = 1; /* must be set ONLY for codepage */
2194
-        sctx->propname = "CodePage";
2195
-        break;
2196
-    case SPID_TITLE:
2197
-        sctx->propname = "Title";
2198
-        break;
2199
-    case SPID_SUBJECT:
2200
-        sctx->propname = "Subject";
2201
-        break;
2202
-    case SPID_AUTHOR:
2203
-        sctx->propname = "Author";
2204
-        break;
2205
-    case SPID_KEYWORDS:
2206
-        sctx->propname = "Keywords";
2207
-        break;
2208
-    case SPID_COMMENTS:
2209
-        sctx->propname = "Comments";
2210
-        break;
2211
-    case SPID_TEMPLATE:
2212
-        sctx->propname = "Template";
2213
-        break;
2214
-    case SPID_LASTAUTHOR:
2215
-        sctx->propname = "LastAuthor";
2216
-        break;
2217
-    case SPID_REVNUMBER:
2218
-        sctx->propname = "RevNumber";
2219
-        break;
2220
-    case SPID_EDITTIME:
2221
-        sctx->propname = "EditTime";
2222
-        break;
2223
-    case SPID_LASTPRINTED:
2224
-        sctx->propname = "LastPrinted";
2225
-        break;
2226
-    case SPID_CREATEDTIME:
2227
-        sctx->propname = "CreatedTime";
2228
-        break;
2229
-    case SPID_MODIFIEDTIME:
2230
-        sctx->propname = "ModifiedTime";
2231
-        break;
2232
-    case SPID_PAGECOUNT:
2233
-        sctx->propname = "PageCount";
2234
-        break;
2235
-    case SPID_WORDCOUNT:
2236
-        sctx->propname = "WordCount";
2237
-        break;
2238
-    case SPID_CHARCOUNT:
2239
-        sctx->propname = "CharCount";
2240
-        break;
2241
-    case SPID_THUMBNAIL:
2242
-        sctx->propname = "Thumbnail";
2243
-        break;
2244
-    case SPID_APPNAME:
2245
-        sctx->propname = "AppName";
2246
-        break;
2247
-    case SPID_SECURITY:
2248
-        sctx->propname = "Security";
2249
-        break;
2250
-    default:
2251
-        cli_dbgmsg("ole2_translate_summary_propid: unrecognized propid!\n");
2252
-        sctx->flags |= OLE2_SUMMARY_FLAG_UNKNOWN_PROPID;
2253
-    }
2254
-}
2255
-
2256
-static int ole2_summary_propset_json(summary_ctx_t *sctx, off_t offset)
2257
-{
2258
-    unsigned char *hdr, *ps;
2259
-    uint32_t numprops, limitprops;
2260
-    off_t foff = offset, psoff = 0;
2261
-    uint32_t poffset;
2262
-    int ret;
2263
-    unsigned int i;
2264
-
2265
-    cli_dbgmsg("in ole2_summary_propset_json\n");
2266
-
2267
-    /* summary ctx propset-specific setup*/
2268
-    sctx->codepage = 0;
2269
-    sctx->writecp = 0;
2270
-    sctx->propname = NULL;
2271
-
2272
-    /* examine property set metadata */
2273
-    if ((foff+(2*sizeof(uint32_t))) > sctx->maplen) {
2274
-        sctx->flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
2275
-        return CL_EFORMAT;
2276
-    }
2277
-    hdr = (unsigned char*)fmap_need_off_once(sctx->sfmap, foff, (2*sizeof(uint32_t)));
2278
-    if (!hdr) {
2279
-        sctx->flags |= OLE2_SUMMARY_ERROR_DATABUF;
2280
-        return CL_EREAD;
2281
-    }
2282
-    //foff+=(2*sizeof(uint32_t)); // keep foff pointing to start of propset segment
2283
-    psoff+=(2*sizeof(uint32_t));
2284
-    memcpy(&(sctx->pssize), hdr, sizeof(sctx->pssize));
2285
-    memcpy(&numprops, hdr+sizeof(sctx->pssize), sizeof(numprops));
2286
-    /* endian conversion */
2287
-    sctx->pssize = sum32_endian_convert(sctx->pssize);
2288
-    numprops = sum32_endian_convert(numprops);
2289
-    cli_dbgmsg("ole2_summary_propset_json: pssize: %u, numprops: %u\n", sctx->pssize, numprops);
2290
-    if (numprops > PROPCNTLIMIT) {
2291
-        sctx->flags |= OLE2_SUMMARY_LIMIT_PROPS;
2292
-        limitprops = PROPCNTLIMIT;
2293
-    }
2294
-    else {
2295
-        limitprops = numprops;
2296
-    }
2297
-    cli_dbgmsg("ole2_summary_propset_json: processing %u of %u (%u max) propeties\n",
2298
-               limitprops, numprops, PROPCNTLIMIT);
2299
-
2300
-    /* extract remaining fragment of propset */
2301
-    if ((size_t)(foff+(sctx->pssize)) > (size_t)(sctx->maplen)) {
2302
-        sctx->flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
2303
-        return CL_EFORMAT;
2304
-    }
2305
-    ps = (unsigned char*)fmap_need_off_once(sctx->sfmap, foff, sctx->pssize);
2306
-    if (!ps) {
2307
-        sctx->flags |= OLE2_SUMMARY_ERROR_DATABUF;
2308
-        return CL_EREAD;
2309
-    }
2310
-
2311
-    /* iterate over the properties */
2312
-    for (i = 0; i < limitprops; ++i) {
2313
-        uint32_t propid, propoff;
2314
-
2315
-        if (psoff+sizeof(propid)+sizeof(poffset) > sctx->pssize) {
2316
-            sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
2317
-            return CL_EFORMAT;
2318
-        }
2319
-        memcpy(&propid, ps+psoff, sizeof(propid));
2320
-        psoff+=sizeof(propid);
2321
-        memcpy(&propoff, ps+psoff, sizeof(propoff));
2322
-        psoff+=sizeof(propoff);
2323
-        /* endian conversion */
2324
-        propid = sum32_endian_convert(propid);
2325
-        propoff = sum32_endian_convert(propoff);
2326
-        cli_dbgmsg("ole2_summary_propset_json: propid: 0x%08x, propoff: %u\n", propid, propoff);
2327
-
2328
-        sctx->propname = NULL; sctx->writecp = 0;
2329
-        if (!sctx->mode)
2330
-            ole2_translate_summary_propid(sctx, propid);
2331
-        else
2332
-            ole2_translate_docsummary_propid(sctx, propid);
2333
-
2334
-        if (sctx->propname != NULL) {
2335
-            ret = ole2_process_property(sctx, ps, propoff);
2336
-            if (ret != CL_SUCCESS)
2337
-                return ret;
2338
-        }
2339
-        else {
2340
-            /* add unknown propid flag */
2341
-        }
2342
-    }
2343
-
2344
-    return CL_SUCCESS;
2345
-}
2346
-
2347
-static int cli_ole2_summary_json_cleanup(summary_ctx_t *sctx, int retcode)
2348
-{
2349
-    json_object *jarr;
2350
-
2351
-    cli_dbgmsg("in cli_ole2_summary_json_cleanup: %d[%x]\n", retcode, sctx->flags);
2352
-
2353
-    if (sctx->sfmap) {
2354
-        funmap(sctx->sfmap);
2355
-    }
2356
-
2357
-    if (sctx->flags) {
2358
-        jarr = cli_jsonarray(sctx->summary, "ParseErrors");
2359
-
2360
-        /* summary errors */
2361
-        if (sctx->flags & OLE2_SUMMARY_ERROR_TOOSMALL) {
2362
-            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_TOOSMALL");
2363
-        }
2364
-        if (sctx->flags & OLE2_SUMMARY_ERROR_OOB) {
2365
-            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_OOB");
2366
-        }
2367
-        if (sctx->flags & OLE2_SUMMARY_ERROR_DATABUF) {
2368
-            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_DATABUF");
2369
-        }
2370
-        if (sctx->flags & OLE2_SUMMARY_ERROR_INVALID_ENTRY) {
2371
-            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_INVALID_ENTRY");
2372
-        }
2373
-        if (sctx->flags & OLE2_SUMMARY_LIMIT_PROPS) {
2374
-            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_LIMIT_PROPS");
2375
-        }
2376
-        if (sctx->flags & OLE2_SUMMARY_FLAG_TIMEOUT) {
2377
-            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_TIMEOUT");
2378
-        }
2379
-        if (sctx->flags & OLE2_SUMMARY_FLAG_CODEPAGE) {
2380
-            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_CODEPAGE");
2381
-        }
2382
-        if (sctx->flags & OLE2_SUMMARY_FLAG_UNKNOWN_PROPID) {
2383
-            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_UNKNOWN_PROPID");
2384
-        }
2385
-        if (sctx->flags & OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE) {
2386
-            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE");
2387
-        }
2388
-        if (sctx->flags & OLE2_SUMMARY_FLAG_TRUNC_STR) {
2389
-            cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_TRUNC_STR");
2390
-        }
2391
-
2392
-        /* codepage translation errors */
2393
-        if (sctx->flags & OLE2_CODEPAGE_ERROR_NOTFOUND) {
2394
-            cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_NOTFOUND");
2395
-        }
2396
-        if (sctx->flags & OLE2_CODEPAGE_ERROR_UNINITED) {
2397
-            cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_UNINITED");
2398
-        }
2399
-        if (sctx->flags & OLE2_CODEPAGE_ERROR_INVALID) {
2400
-            cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_INVALID");
2401
-        }
2402
-        if (sctx->flags & OLE2_CODEPAGE_ERROR_INCOMPLETE) {
2403
-            cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_INCOMPLETE");
2404
-        }
2405
-        if (sctx->flags & OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL) {
2406
-            cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL");
2407
-        }
2408
-    }
2409
-
2410
-    return retcode;
2411
-}
2412
-
2413
-
2414
-#endif /* HAVE_JSON */
2415
-
2416
-#if HAVE_JSON
2417
-int cli_ole2_summary_json(cli_ctx *ctx, int fd, int mode)
2418
-{
2419
-    summary_ctx_t sctx;
2420
-    STATBUF statbuf;
2421
-    off_t foff = 0;
2422
-    unsigned char *databuf;
2423
-    summary_stub_t sumstub;
2424
-    propset_entry_t pentry;
2425
-    int ret = CL_SUCCESS;
2426
-
2427
-    cli_dbgmsg("in cli_ole2_summary_json\n");
2428
-
2429
-    /* preliminary sanity checks */
2430
-    if (ctx == NULL) {
2431
-        return CL_ENULLARG;
2432
-    }
2433
-
2434
-    if (fd < 0) {
2435
-        cli_dbgmsg("ole2_summary_json: invalid file descriptor\n");
2436
-        return CL_ENULLARG; /* placeholder */
2437
-    }
2438
-
2439
-    if (mode != 0 && mode != 1) {
2440
-        cli_dbgmsg("ole2_summary_json: invalid mode specified\n");
2441
-        return CL_ENULLARG; /* placeholder */
2442
-    }
2443
-
2444
-    /* summary ctx setup */
2445
-    memset(&sctx, 0, sizeof(sctx));
2446
-    sctx.ctx = ctx;
2447
-    sctx.mode = mode;
2448
-
2449
-    if (FSTAT(fd, &statbuf) == -1) {
2450
-        cli_dbgmsg("ole2_summary_json: cannot stat file descriptor\n");
2451
-        return CL_ESTAT;
2452
-    }
2453
-
2454
-    sctx.sfmap = fmap(fd, 0, statbuf.st_size);
2455
-    if (!sctx.sfmap) {
2456
-        cli_dbgmsg("ole2_summary_json: failed to get fmap\n");
2457
-        return CL_EMAP;
2458
-    }
2459
-    sctx.maplen = sctx.sfmap->len;
2460
-    cli_dbgmsg("ole2_summary_json: streamsize: %u\n", sctx.maplen);
2461
-
2462
-    if (!mode)
2463
-        sctx.summary = cli_jsonobj(ctx->wrkproperty, "SummaryInfo");
2464
-    else
2465
-        sctx.summary = cli_jsonobj(ctx->wrkproperty, "DocSummaryInfo");
2466
-    if (!sctx.summary) {
2467
-        cli_errmsg("ole2_summary_json: no memory for json object.\n");
2468
-        return cli_ole2_summary_json_cleanup(&sctx, CL_EMEM);
2469
-    }
2470
-
2471
-    sctx.codepage = 0;
2472
-    sctx.writecp = 0;
2473
-
2474
-    /* acquire property stream metadata */
2475
-    if (sctx.maplen < sizeof(summary_stub_t)) {
2476
-        sctx.flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
2477
-        return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
2478
-    }
2479
-    databuf = (unsigned char*)fmap_need_off_once(sctx.sfmap, foff, sizeof(summary_stub_t));
2480
-    if (!databuf) {
2481
-        sctx.flags |= OLE2_SUMMARY_ERROR_DATABUF;
2482
-        return cli_ole2_summary_json_cleanup(&sctx, CL_EREAD);
2483
-    }
2484
-    foff += sizeof(summary_stub_t);
2485
-    memcpy(&sumstub, databuf, sizeof(summary_stub_t));
2486
-
2487
-    /* endian conversion and checks */
2488
-    sumstub.byte_order = le16_to_host(sumstub.byte_order);
2489
-    if (sumstub.byte_order != 0xfffe) {
2490
-        cli_dbgmsg("ole2_summary_json: byteorder 0x%x is invalid\n", sumstub.byte_order);
2491
-        sctx.flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
2492
-        return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);;
2493
-    }
2494
-    sumstub.version = sum16_endian_convert(sumstub.version); /*unused*/
2495
-    sumstub.system = sum32_endian_convert(sumstub.system); /*unused*/
2496
-    sumstub.num_propsets = sum32_endian_convert(sumstub.num_propsets);
2497
-    if (sumstub.num_propsets != 1 && sumstub.num_propsets != 2) {
2498
-        cli_dbgmsg("ole2_summary_json: invalid number of property sets\n");
2499
-        sctx.flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
2500
-        return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
2501
-    }
2502
-
2503
-    cli_dbgmsg("ole2_summary_json: byteorder 0x%x\n", sumstub.byte_order);
2504
-    cli_dbgmsg("ole2_summary_json: %u property set(s) detected\n", sumstub.num_propsets);
2505
-
2506
-    /* first property set (index=0) is always SummaryInfo or DocSummaryInfo */
2507
-    if ((sctx.maplen-foff) < sizeof(propset_entry_t)) {
2508
-        sctx.flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
2509
-        return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
2510
-    }
2511
-    databuf = (unsigned char*)fmap_need_off_once(sctx.sfmap, foff, sizeof(propset_entry_t));
2512
-    if (!databuf) {
2513
-        sctx.flags |= OLE2_SUMMARY_ERROR_DATABUF;
2514
-        return cli_ole2_summary_json_cleanup(&sctx, CL_EREAD);
2515
-    }
2516
-    foff += sizeof(propset_entry_t);
2517
-    memcpy(&pentry, databuf, sizeof(propset_entry_t));
2518
-    /* endian conversion */
2519
-    pentry.offset = sum32_endian_convert(pentry.offset);
2520
-
2521
-    if ((ret = ole2_summary_propset_json(&sctx, pentry.offset)) != CL_SUCCESS) {
2522
-        return cli_ole2_summary_json_cleanup(&sctx, ret);
2523
-    }
2524
-
2525
-    /* second property set (index=1) is always a custom property set (if present) */
2526
-    if (sumstub.num_propsets == 2) {
2527
-        cli_jsonbool(ctx->wrkproperty, "HasUserDefinedProperties", 1);
2528
-    }
2529
-
2530
-    return cli_ole2_summary_json_cleanup(&sctx, CL_SUCCESS);
2531
-}
2532
-#endif /* HAVE_JSON */