Browse code

Merge master to features/yara.

Steven Morgan authored on 2015/05/02 07:36:48
Showing 66 changed files
1 1
deleted file mode 100644
... ...
@@ -1,23 +0,0 @@
1
- Copyright (c) 2001-2003 Allan Saddi <allan@saddi.com>
2
- All rights reserved.
3
-
4
- Redistribution and use in source and binary forms, with or without
5
- modification, are permitted provided that the following conditions
6
- are met:
7
- 1. Redistributions of source code must retain the above copyright
8
-    notice, this list of conditions and the following disclaimer.
9
- 2. Redistributions in binary form must reproduce the above copyright
10
-    notice, this list of conditions and the following disclaimer in the
11
-    documentation and/or other materials provided with the distribution.
12
-
13
- THIS SOFTWARE IS PROVIDED BY ALLAN SADDI AND HIS CONTRIBUTORS ``AS IS''
14
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16
- ARE DISCLAIMED.  IN NO EVENT SHALL ALLAN SADDI OR HIS CONTRIBUTORS BE
17
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
23
- POSSIBILITY OF SUCH DAMAGE.
... ...
@@ -1,3 +1,206 @@
1
+Mon, 27 Apr 12:00:00 EDT
2
+-----------------------------------
3
+ * 0.98.7 Release.
4
+
5
+Tue, 14 Apr 2015 15:53:17 EDT (klin)
6
+-----------------------------------
7
+ * bb#11296 - various fixes to pdf string base64 string conversion 
8
+
9
+Mon, 13 Apr 2015 12:14:41 EDT (smorgan)
10
+-----------------------------------
11
+ * bb11298 - look for TOC element name <unarchived-checksum> 
12
+   (as a synonynm for <extracted-checksum>). Continue processing rather
13
+    than exit in the event of missing or error in TOC checksum specification.
14
+
15
+Wed, 8 Apr 2015 15:51:04 EDT (smorgan)
16
+-----------------------------------
17
+ * iso9660: remove unnecessaty parameter on iso_parse_dir() and reset return
18
+   code when scanall is in effect. 
19
+
20
+Wed, 1 Apr 2015 17:41:59 EDT (klin)
21
+-----------------------------------
22
+ * pdf: correctly handle decoding, decryption, character set conversions,
23
+   and file properties collection(base64 encoded as needed).
24
+
25
+Fri, 27 Mar 2015 13:21:49 EDT (klin)
26
+-----------------------------------
27
+ * converted cb_file_props from using engine-based ctx to file-based ctx 
28
+
29
+Thu, 26 Mar 2015 12:24:02 EDT (smorgan)
30
+-----------------------------------
31
+ * bb11281 - Reworked reverted upack.c crash patch to fix regression
32
+   false negatives. 
33
+
34
+Tue, 24 Mar 2015 12:06:57 EDT (klin)
35
+-----------------------------------
36
+ * make check: added env check 'T' to set timeout 
37
+
38
+Mon, 23 Mar 2015 17:58:35 EDT (klin)
39
+-----------------------------------
40
+ * bb#11282 - patch for code clean up in rebuildpe. Patch
41
+   supplied by Sebastian Andrzej Siewior.
42
+
43
+Mon, 23 Mar 2015 13:04:54 EDT (klin)
44
+-----------------------------------
45
+ * bb#11284 - fixed integer underflow in detecting W32.Polipos.A method.
46
+   Patch supplied by Sebastian Andrzej Siewior.
47
+
48
+Mon, 16 Mar 2015 18:35:14 EDT (klin)
49
+-----------------------------------
50
+ * updated documentation on document property collection 
51
+
52
+Mon, 16 Mar 2015 18:26:07 EDT (klin)
53
+-----------------------------------
54
+ * added support for MS Office 2003 XML(msxml) document types and msxml
55
+   file properties collection. 
56
+
57
+Mon, 16 Mar 2015 13:11:56 EDT (klin)
58
+-----------------------------------
59
+ * fixed converity issue ID 12109 buffer was not freed on rare error case 
60
+
61
+Mon, 16 Mar 2015 13:08:03 EDT (klin)
62
+-----------------------------------
63
+ * fixed coverity ID 12110 12111 changed a the type of a value from unsigned
64
+  to signed due to possible negative values 
65
+
66
+Thu, 12 Mar 2015 19:06:23 EDT (smorgan)
67
+-----------------------------------
68
+ * Fix for infinite loop on crafted xz file. 
69
+
70
+Wed, 11 Mar 2015 15:03:43 EDT (smorgan)
71
+-----------------------------------
72
+ * bb11278 - was not detecting viruses on files inside iso9660.
73
+   Also fix up all-match logic. 
74
+
75
+Mon, 9 Mar 2015 13:02:25 EDT (smorgan)
76
+-----------------------------------
77
+ * bb11274 - adds out of bounds check for petite packed files.
78
+   Patch from Sebastian Andrzej Siewior. 
79
+
80
+Wed, 4 Mar 2015 14:04:24 EDT (klin)
81
+-----------------------------------
82
+ * updated example fileprop analysis bytecodes moved old example bytecodes
83
+   to examples/fileprop_analysis/old/ 
84
+
85
+Wed, 4 Mar 2015 12:08:34 EDT (klin)
86
+-----------------------------------
87
+ * backwards compatibility for target type 13 json scanning 
88
+
89
+Tue, 3 Mar 2015 17:47:55 EDT (klin)
90
+-----------------------------------
91
+ * generates fmap from desc if no map is NULL 
92
+
93
+Tue, 3 Mar 2015 16:37:08 EDT (smorgan)
94
+-----------------------------------
95
+ * Apply y0da cryptor patch sent in by Sebastian Andrzej Siewior. 
96
+
97
+Tue, 3 Mar 2015 16:12:48 EDT (klin)
98
+-----------------------------------
99
+ * flevel updated to 80 (new bytecode hook type) 
100
+
101
+Tue, 3 Mar 2015 16:12:22 EDT (klin)
102
+-----------------------------------
103
+ * clambc info option updated for new hook type 
104
+
105
+Tue, 3 Mar 2015 15:00:41 EDT (klin)
106
+-----------------------------------
107
+ * added BC_PRECLASS hook support; replaces target type 13 
108
+
109
+Mon, 2 Mar 2015 19:06:23 EDT (klin)
110
+-----------------------------------
111
+ * pdf string UTF-16 conversion no longer solely depends on ICONV reason:
112
+   no ICONV meant no conversion even though conversion function existed 
113
+
114
+Fri, 27 Feb 2015 15:23:51 EDT (klin)
115
+-----------------------------------
116
+ * bb#11269 - bm matcher no longer sets scanning window offset reason:
117
+   certain segments could be hashed multiple times 
118
+
119
+Wed, 25 Feb 2015 14:55:21 EDT (klin)
120
+-----------------------------------
121
+ * bb#11269 - hash does not compute on segments smaller than the maxpatlen 
122
+
123
+Tue, 24 Feb 2015 16:21:09 EDT (klin)
124
+-----------------------------------
125
+ * bb#11267 - libclamav upx cover against hand crafted section ove patch
126
+   supplied bySebastian Andrzej Siewior.
127
+
128
+Fri, 27 Feb 2015 16:57:19 EDT (smorgan)
129
+-----------------------------------
130
+ * Patch for integer overflow checks for petite unpack code supplied by
131
+   Sebastian Andrzej Siewior. 
132
+
133
+Fri, 27 Feb 2015 16:54:55 EDT (smorgan)
134
+-----------------------------------
135
+ * remove obsolete parameters from the clamd.conf man page: MailMaxRecursion,
136
+   ArchiveMaxFileSize, ArchiveMaxRecursion, ArchiveMaxFiles,
137
+   ArchiveMaxCompressionRatio, ArchiveBlockMax, ArchiveLimitMemoryUsage, Clamuko*. 
138
+
139
+Wed, 18 Feb 2015 15:23:54 EDT (klin)
140
+-----------------------------------
141
+ * bb#11212 - fix MEW unpacker 
142
+
143
+Mon, 16 Feb 2015 11:46:21 EDT (smorgan)
144
+-----------------------------------
145
+ * bb11264 - patch for 'possible' heap overflow submitted by the Debian team. 
146
+
147
+Tue, 10 Feb 2015 15:16:48 EDT (smorgan)
148
+-----------------------------------
149
+ * bb11260: fix compile error when './configure --disable-pthreads' is specified. 
150
+
151
+Fri, 6 Feb 2015 14:59:43 EDT (klin)
152
+-----------------------------------
153
+ * bb#11254 - removed built-in llvm configure check and added
154
+   --with-llvm-linking option to specify system-llvm linking method 
155
+
156
+Fri, 6 Feb 2015 13:22:35 EDT (klin)
157
+-----------------------------------
158
+ * improved documentation on macro subsignatures 
159
+
160
+Wed, 4 Feb 2015 18:52:01 EDT (smorgan)
161
+-----------------------------------
162
+ * fix documentation errors in example logical signature. 
163
+
164
+Fri, 30 Jan 2015 12:15:07 EDT (klin)
165
+-----------------------------------
166
+ * bb#12887 - fixed an issue regarding (fd==-1) in WinAPI 
167
+
168
+Wed, 28 Jan 2015 11:20:35 EDT (klin)
169
+-----------------------------------
170
+ * fixed Windows API SetOption/GetOption CLAM_LIMIT_RECURSION 
171
+
172
+Wed, 21 Jan 2015 11:41:07 EDT (klin)
173
+-----------------------------------
174
+ * added ICONV to clamconf optional features report 
175
+
176
+Thu, 15 Jan 2015 15:15:01 EDT (klin)
177
+-----------------------------------
178
+ * fixed an incorrect return value for magic_scandesc 
179
+
180
+Wed, 14 Jan 2015 09:25:47 EDT (klin)
181
+-----------------------------------
182
+ * cleaned up configure help strings by using AS_HELP_STRING 
183
+
184
+Mon, 12 Jan 2015 13:45:36 EDT (klin)
185
+-----------------------------------
186
+ * bb#11238 - added missing PDF preclass operations
187
+   > added whitespace fix for indirect references strings
188
+   > added PDF escape sequence handling (including octal) 
189
+
190
+Thu, 8 Jan 2015 09:48:20 EDT (klin)
191
+-----------------------------------
192
+ * bb#11237 - fixed bug in building CUD file 
193
+
194
+Wed, 7 Jan 2015 04:46:15 EDT (smorgan)
195
+-----------------------------------
196
+ * bb11233 - fix a strange bus error on Mac OS X PPC when using debug mode. 
197
+
198
+Mon, 22 Dec 2014 12:13:38 EDT (klin)
199
+-----------------------------------
200
+ * bb#11226 - fixed gpt GUID debugging message 
201
+
202
+ *** End of 0.98.6, Start of 0.98.7
203
+
1 204
 
2 205
 Tue Dec 16 16:21:40 2014 EDT (swebb)
3 206
 -------------------------------------
... ...
@@ -1,36 +1,45 @@
1
-0.98.6
1
+0.98.7
2 2
 ------
3 3
 
4
-ClamAV 0.98.6 is a bug fix release correcting the following:
4
+ClamAV 0.98.7 is here! This release contains new scanning features
5
+and bug fixes. 
5 6
 
6
-    - library shared object revisions.
7
-    - installation issues on some Mac OS X and FreeBSD platforms.
8
-    - includes a patch from Sebastian Andrzej Siewior making
9
-      ClamAV pid files compatible with systemd.
10
-    - Fix a heap out of bounds condition with crafted Yoda's
11
-      crypter files. This issue was discovered by Felix Groebert
12
-      of the Google Security Team.
13
-    - Fix a heap out of bounds condition with crafted mew packer
14
-      files. This issue was discovered by Felix Groebert of the
15
-      Google Security Team.
16
-    - Fix a heap out of bounds condition with crafted upx packer
17
-      files. This issue was discovered by Kevin Szkudlapski of
18
-      Quarkslab.
19
-    - Fix a heap out of bounds condition with crafted upack packer
20
-      files. This issue was discovered by Sebastian Andrzej Siewior.
21
-      CVE-2014-9328.
22
-    - Compensate a crash due to incorrect compiler optimization when
23
-      handling crafted petite packer files. This issue was discovered
24
-      by Sebastian Andrzej Siewior.
25
-      
26
-Thanks to the following ClamAV community members for code submissions
27
-and bug reporting included in ClamAV 0.98.6:
7
+    - Improvements to PDF processing: decryption, escape sequence
8
+      handling, and file property collection.
9
+    - Scanning/analysis of additional Microsoft Office 2003 XML format.
10
+    - Fix infinite loop condition on crafted y0da cryptor file. Identified
11
+      and patch suggested by Sebastian Andrzej Siewior. CVE-2015-2221.
12
+    - Fix crash on crafted petite packed file. Reported and patch
13
+      supplied by Sebastian Andrzej Siewior. CVE-2015-2222.
14
+    - Fix false negatives on files within iso9660 containers. This issue
15
+      was reported by Minzhuan Gong.
16
+    - Fix a couple crashes on crafted upack packed file. Identified and
17
+      patches supplied by Sebastian Andrzej Siewior.
18
+    - Fix a crash during algorithmic detection on crafted PE file.
19
+      Identified and patch supplied by Sebastian Andrzej Siewior.
20
+    - Fix an infinite loop condition on a crafted "xz" archive file.
21
+      This was reported by Dimitri Kirchner and Goulven Guiheux.
22
+      CVE-2015-2668.
23
+    - Fix compilation error after ./configure --disable-pthreads.
24
+      Reported and fix suggested by John E. Krokes.
25
+    - Apply upstream patch for possible heap overflow in Henry Spencer's 
26
+      regex library. CVE-2015-2305.
27
+    - Fix crash in upx decoder with crafted file. Discovered and patch
28
+      supplied by Sebastian Andrzej Siewior. CVE-2015-2170.
29
+    - Fix segfault scanning certain HTML files. Reported with sample by
30
+      Kai Risku.
31
+    - Improve detections within xar/pkg files.
32
+
33
+As always, we appreciate contributions of bug reports, code fixes,
34
+and sample submission from the ClamAV community members:
28 35
 
29 36
 Sebastian Andrzej Siewior
30
-Felix Groebert
31
-Kevin Szkudlapski
32
-Mark Pizzolato
33
-Daniel J. Luke
37
+Minzhaun Gong
38
+Dimitri Kirchner
39
+Goulven Guiheux
40
+John E. Krokes
41
+Kai Risku
42
+
34 43
 
35 44
 --
36 45
 The ClamAV team (http://www.clamav.net/about.html#credits)
... ...
@@ -2,6 +2,48 @@ Note: This README/NEWS file refers to the source tarball. Some things described
2 2
 here may not be available in binary packages.
3 3
 --
4 4
 
5
+0.98.7
6
+------
7
+
8
+ClamAV 0.98.7 is here! This release contains new scanning features
9
+and bug fixes. 
10
+
11
+    - Improvements to PDF processing: decryption, escape sequence
12
+      handling, and file property collection.
13
+    - Scanning/analysis of additional Microsoft Office 2003 XML format.
14
+    - Fix infinite loop condition on crafted y0da cryptor file. Identified
15
+      and patch suggested by Sebastian Andrzej Siewior. CVE-2015-2221.
16
+    - Fix crash on crafted petite packed file. Reported and patch
17
+      supplied by Sebastian Andrzej Siewior. CVE-2015-2222.
18
+    - Fix false negatives on files within iso9660 containers. This issue
19
+      was reported by Minzhuan Gong.
20
+    - Fix a couple crashes on crafted upack packed file. Identified and
21
+      patches supplied by Sebastian Andrzej Siewior.
22
+    - Fix a crash during algorithmic detection on crafted PE file.
23
+      Identified and patch supplied by Sebastian Andrzej Siewior.
24
+    - Fix an infinite loop condition on a crafted "xz" archive file.
25
+      This was reported by Dimitri Kirchner and Goulven Guiheux.
26
+      CVE-2015-2668.
27
+    - Fix compilation error after ./configure --disable-pthreads.
28
+      Reported and fix suggested by John E. Krokes.
29
+    - Apply upstream patch for possible heap overflow in Henry Spencer's 
30
+      regex library. CVE-2015-2305.
31
+    - Fix crash in upx decoder with crafted file. Discovered and patch
32
+      supplied by Sebastian Andrzej Siewior. CVE-2015-2170.
33
+    - Fix segfault scanning certain HTML files. Reported with sample by
34
+      Kai Risku.
35
+    - Improve detections within xar/pkg files.
36
+
37
+As always, we appreciate contributions of bug reports, code fixes,
38
+and sample submission from the ClamAV community members:
39
+
40
+Sebastian Andrzej Siewior
41
+Minzhaun Gong
42
+Dimitri Kirchner
43
+Goulven Guiheux
44
+John E. Krokes
45
+Kai Risku
46
+
5 47
 0.98.6
6 48
 ------
7 49
 
... ...
@@ -28291,7 +28291,7 @@ fi
28291 28291
 if test "x$XML_LIBS" = "x"; then
28292 28292
 
28293 28293
 
28294
-   $as_echo_n "              dmg and xar : "
28294
+   $as_echo_n "              libxml2     : "
28295 28295
    if test "x" = "xno"; then :
28296 28296
   $as_echo "no (disabled)"
28297 28297
 elif test "x" = "xyes"; then :
... ...
@@ -28305,7 +28305,7 @@ fi
28305 28305
 else
28306 28306
 
28307 28307
 
28308
-   $as_echo_n "              dmg and xar : "
28308
+   $as_echo_n "              libxml2     : "
28309 28309
    if test "x" = "xno"; then :
28310 28310
   $as_echo "yes, from $XML_HOME (disabled)"
28311 28311
 elif test "x" = "xyes"; then :
... ...
@@ -241,9 +241,9 @@ else
241 241
     CL_MSG_STATUS([pcre        ],[$PCRE_HOME],[$have_pcre])
242 242
 fi
243 243
 if test "x$XML_LIBS" = "x"; then 
244
-    CL_MSG_STATUS([dmg and xar ],[no],[])
244
+    CL_MSG_STATUS([libxml2     ],[no],[])
245 245
 else
246
-    CL_MSG_STATUS([dmg and xar ],[yes, from $XML_HOME],[])
246
+    CL_MSG_STATUS([libxml2     ],[yes, from $XML_HOME],[])
247 247
 fi
248 248
 
249 249
 # Yep, downgrading the compiler avoids the bug too:
250 250
Binary files a/docs/ClamAV_Document_Properties.xlsx and b/docs/ClamAV_Document_Properties.xlsx differ
... ...
@@ -379,9 +379,6 @@ Scan RFC1341 messages split over many emails. You will need to periodically clea
379 379
 .br
380 380
 Default: no
381 381
 .TP
382
-\fBMailMaxRecursion (OBSOLETE)\fR
383
-\fBWARNING:\fR This option is no longer accepted. See \fBMaxRecursion\fR.
384
-.TP 
385 382
 \fBPhishingSignatures BOOL\fR
386 383
 With this option enabled ClamAV will try to detect phishing attempts by using signatures.
387 384
 .br 
... ...
@@ -488,24 +485,6 @@ This option causes memory or nested map scans to dump the content to disk.
488 488
 If you turn on this option, more data is written to disk and is available when the leave-temps option is enabled at the cost of more disk writes.
489 489
 .br
490 490
 Default: no
491
-.TP 
492
-\fBArchiveMaxFileSize (OBSOLETE)\fR
493
-\fBWARNING:\fR This option is no longer accepted. See \fBMaxFileSize\fR and \fBMaxScanSize\fR.
494
-.TP 
495
-\fBArchiveMaxRecursion (OBSOLETE)\fR
496
-\fBWARNING:\fR This option is no longer accepted. See \fBMaxRecursion\fR.
497
-.TP 
498
-\fBArchiveMaxFiles (OBSOLETE)\fR
499
-\fBWARNING:\fR This option is no longer accepted. See \fBMaxFiles\fR.
500
-.TP 
501
-\fBArchiveMaxCompressionRatio (OBSOLETE)\fR
502
-\fBWARNING:\fR This option is no longer accepted.
503
-.TP 
504
-\fBArchiveBlockMax (OBSOLETE)\fR
505
-\fBWARNING:\fR This option is no longer accepted.
506
-.TP 
507
-\fBArchiveLimitMemoryUsage (OBSOLETE)\fR
508
-\fBWARNING:\fR This option is no longer accepted.
509 491
 .br 
510 492
 Default: no
511 493
 .TP 
... ...
@@ -637,33 +616,6 @@ WARNING: setting this limit too high or disabling it may severely impact perform
637 637
 .br
638 638
 Default: 25M
639 639
 .TP
640
-\fBClamukoScanOnAccess (OBSOLETE)\fR
641
-\fBWARNING:\fR This option is no longer accepted. See \fBScanOnAccess\fR.
642
-.TP 
643
-\fBClamukoScannerCount (OBSOLETE)\fR
644
-\fBWARNING:\fR This option is no longer accepted.
645
-.TP 
646
-\fBClamukoScanOnOpen (OBSOLETE)\fR
647
-\fBWARNING:\fR This option is no longer accepted.
648
-.TP 
649
-\fBClamukoScanOnClose (OBSOLETE)\fR
650
-\fBWARNING:\fR This option is no longer accepted.
651
-.TP 
652
-\fBClamukoScanOnExec (OBSOLETE)\fR
653
-\fBWARNING:\fR This option is no longer accepted.
654
-.TP 
655
-\fBClamukoIncludePath (OBSOLETE)\fR
656
-\fBWARNING:\fR This option is no longer accepted. See \fBOnAccessIncludePath\fR.
657
-.TP 
658
-\fBClamukoExcludePath (OBSOLETE)\fR
659
-\fBWARNING:\fR This option is no longer accepted. See \fBOnAccessExcludePath\fR.
660
-.TP
661
-\fBClamukoExcludeUID (OBSOLETE)\fR
662
-\fBWARNING:\fR This option is no longer accepted. See \fBOnAccessExcludeUID\fR.
663
-.TP 
664
-\fBClamukoMaxFileSize (OBSOLETE)\fR
665
-\fBWARNING:\fR This option is no longer accepted. See \fBOnAccessMaxFileSize\fR.
666
-.TP
667 640
 \fBScanOnAccess BOOL\fR
668 641
 This option enables on-access scanning (Linux only)
669 642
 .br
670 643
Binary files a/examples/fileprop_analysis/analysis.cud and b/examples/fileprop_analysis/analysis.cud differ
... ...
@@ -1,26 +1,15 @@
1 1
 VIRUSNAME_PREFIX("SUBMIT.contains")
2 2
 VIRUSNAMES("EmbedPE")
3 3
 
4
-/* Target type is 13, internal JSON properties */
5
-TARGET(13)
4
+/* Target type is 0, all relevant files */
5
+TARGET(0)
6 6
 
7
-/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
8
-FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
9
-
10
-SIGNATURES_DECL_BEGIN
11
-DECLARE_SIGNATURE(sig1)
12
-SIGNATURES_DECL_END
13
-
14
-SIGNATURES_DEF_BEGIN
15
-/* search @offset 0 : '{ "Magic": "CLAMJSON' */
16
-/* this can be readjusted for specific filetypes */
17
-DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
18
-SIGNATURES_END
7
+/* Declares to run bytecode only for preclassification (affecting only preclass files) */
8
+PRECLASS_HOOK_DECLARE
19 9
 
20
-bool logical_trigger(void)
21
-{
22
-    return matches(Signatures.sig1);
23
-}
10
+/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
11
+/* PRECLASS_HOOK_DECLARE will require FUNC_LEVEL_098_7 = 80 */
12
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_7)
24 13
 
25 14
 #define STR_MAXLEN 256
26 15
 
... ...
@@ -3,26 +3,15 @@ VIRUSNAMES("CL_TYPE_MSWORD", "CL_TYPE_MSPPT", "CL_TYPE_MSXL",
3 3
            "CL_TYPE_OOXML_WORD", "CL_TYPE_OOXML_PPT", "CL_TYPE_OOXML_XL",
4 4
            "CL_TYPE_MSEXE", "CL_TYPE_PDF", "CL_TYPE_MSOLE2", "CL_TYPE_UNKNOWN", "InActive")
5 5
 
6
-/* Target type is 13, internal JSON properties */
7
-TARGET(13)
6
+/* Target type is 0, all relevant files */
7
+TARGET(0)
8 8
 
9
-/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
10
-FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
11
-
12
-SIGNATURES_DECL_BEGIN
13
-DECLARE_SIGNATURE(sig1)
14
-SIGNATURES_DECL_END
15
-
16
-SIGNATURES_DEF_BEGIN
17
-/* search @offset 0 : '{ "Magic": "CLAMJSON' */
18
-/* this can be readjusted for specific filetypes */
19
-DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
20
-SIGNATURES_END
9
+/* Declares to run bytecode only for preclassification (affecting only preclass files) */
10
+PRECLASS_HOOK_DECLARE
21 11
 
22
-bool logical_trigger(void)
23
-{
24
-    return matches(Signatures.sig1);
25
-}
12
+/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
13
+/* PRECLASS_HOOK_DECLARE will require FUNC_LEVEL_098_7 = 80 */
14
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_7)
26 15
 
27 16
 #define STR_MAXLEN 256
28 17
 
... ...
@@ -1,34 +1,51 @@
1 1
 VIRUSNAME_PREFIX("SUBMIT.NotPDF")
2 2
 VIRUSNAMES("InActive", "Submit")
3 3
 
4
-/* Target type is 13, internal JSON properties */
5
-TARGET(13)
4
+/* Target type is 0, all relevant files */
5
+TARGET(0)
6
+
7
+/* Declares to run bytecode only for preclassification (affecting only preclass files) */
8
+PRECLASS_HOOK_DECLARE
6 9
 
7 10
 /* JSON API call will require FUNC_LEVEL_098_5 = 78 */
8
-FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
9
-
10
-SIGNATURES_DECL_BEGIN
11
-DECLARE_SIGNATURE(sig1)
12
-DECLARE_SIGNATURE(sig2)
13
-SIGNATURES_DECL_END
14
-
15
-SIGNATURES_DEF_BEGIN
16
-/* search @offset 0 : '{ "Magic": "CLAMJSON' */
17
-/* this can be readjusted for specific filetypes */
18
-DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
19
-/* search '"RootFileType": "CL_TYPE_PDF"' */
20
-DEFINE_SIGNATURE(sig2, "22526f6f7446696c6554797065223a2022434c5f545950455f50444622")
21
-SIGNATURES_END
22
-
23
-bool logical_trigger(void)
24
-{
25
-    return matches(Signatures.sig1) && !matches(Signatures.sig2);
26
-}
11
+/* PRECLASS_HOOK_DECLARE will require FUNC_LEVEL_098_7 = 80 */
12
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_7)
27 13
 
28 14
 #define STR_MAXLEN 256
29 15
 
30 16
 int entrypoint ()
31 17
 {
32
-    foundVirus("Submit");
18
+    int32_t type, obj, strlen;
19
+    char str[STR_MAXLEN];
20
+
21
+    /* check is json is available, alerts on inactive (optional) */
22
+    if (!json_is_active()) {
23
+        return -1;
24
+    }
25
+
26
+    /* acquire array of internal contained objects */
27
+    obj = json_get_object("FileType", 8, 0);
28
+    if (obj <= 0) return -1;
29
+
30
+    /* acquire and check type */
31
+    type = json_get_type(obj);
32
+    if (type == JSON_TYPE_STRING) {
33
+        /* acquire string length, note +1 is for the NULL terminator */
34
+        strlen = json_get_string_length(obj)+1;
35
+        /* prevent buffer overflow */
36
+        if (strlen > STR_MAXLEN)
37
+            strlen = STR_MAXLEN;
38
+        /* acquire string data, note strlen includes NULL terminator */
39
+        if (json_get_string(str, strlen, obj)) {
40
+            /* debug print str (with '\n' and prepended message */
41
+            debug_print_str(str,strlen);
42
+
43
+            /* check the contained object's type */
44
+            if (!(strlen == 12) || !memcmp(str, "CL_TYPE_PDF", 12)) {
45
+                foundVirus("Submit");
46
+            }
47
+        }
48
+    }
49
+
33 50
     return 0;
34 51
 }
35 52
new file mode 100644
36 53
Binary files /dev/null and b/examples/fileprop_analysis/old/analysis.cud differ
37 54
new file mode 100644
... ...
@@ -0,0 +1,84 @@
0
+VIRUSNAME_PREFIX("SUBMIT.contains")
1
+VIRUSNAMES("EmbedPE")
2
+
3
+/* Target type is 13, internal JSON properties */
4
+TARGET(13)
5
+
6
+/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
7
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
8
+
9
+SIGNATURES_DECL_BEGIN
10
+DECLARE_SIGNATURE(sig1)
11
+SIGNATURES_DECL_END
12
+
13
+SIGNATURES_DEF_BEGIN
14
+/* search @offset 0 : '{ "Magic": "CLAMJSON' */
15
+/* this can be readjusted for specific filetypes */
16
+DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
17
+SIGNATURES_END
18
+
19
+bool logical_trigger(void)
20
+{
21
+    return matches(Signatures.sig1);
22
+}
23
+
24
+#define STR_MAXLEN 256
25
+
26
+int entrypoint ()
27
+{
28
+    int i;
29
+    int32_t type, obj, objarr, objit, arrlen, strlen;
30
+    char str[STR_MAXLEN];
31
+
32
+    /* check is json is available, alerts on inactive (optional) */
33
+    if (!json_is_active()) {
34
+        return -1;
35
+    }
36
+
37
+    /* acquire array of internal contained objects */
38
+    objarr = json_get_object("ContainedObjects", 16, 0);
39
+    type = json_get_type(objarr);
40
+    /* debug print uint (no '\n' or prepended message */
41
+    debug_print_uint(type);
42
+
43
+    if (type != JSON_TYPE_ARRAY) {
44
+        return -1;
45
+    }
46
+
47
+    /* check array length for iteration over elements */
48
+    arrlen = json_get_array_length(objarr);
49
+    for (i = 0; i < arrlen; ++i) {
50
+        /* acquire json object @ idx i */
51
+        objit = json_get_array_idx(i, objarr);
52
+        if (objit <= 0) continue;
53
+
54
+        /* acquire FileType object of the array element @ idx i */
55
+        obj = json_get_object("FileType", 8, objit);
56
+        if (obj <= 0) continue;
57
+
58
+        /* acquire and check type */
59
+        type = json_get_type(obj);
60
+        if (type == JSON_TYPE_STRING) {
61
+            /* acquire string length, note +1 is for the NULL terminator */
62
+            strlen = json_get_string_length(obj)+1;
63
+            /* prevent buffer overflow */
64
+            if (strlen > STR_MAXLEN)
65
+                strlen = STR_MAXLEN;
66
+            /* acquire string data, note strlen includes NULL terminator */
67
+            if (json_get_string(str, strlen, obj)) {
68
+                /* debug print str (with '\n' and prepended message */
69
+                debug_print_str(str,strlen);
70
+
71
+                /* check the contained object's type */
72
+                if (strlen == 14 && !memcmp(str, "CL_TYPE_MSEXE", 14)) {
73
+                //if (!strcmp(str, strlen, "CL_TYPE_MSEXE", strlen)) {
74
+                    /* alert for submission */
75
+                    foundVirus("EmbedPE");
76
+                    return 0;
77
+                }
78
+            }
79
+        }
80
+    }
81
+
82
+    return 0;
83
+}
0 84
new file mode 100644
... ...
@@ -0,0 +1,104 @@
0
+VIRUSNAME_PREFIX("SUBMIT.filetype")
1
+VIRUSNAMES("CL_TYPE_MSWORD", "CL_TYPE_MSPPT", "CL_TYPE_MSXL",
2
+           "CL_TYPE_OOXML_WORD", "CL_TYPE_OOXML_PPT", "CL_TYPE_OOXML_XL",
3
+           "CL_TYPE_MSEXE", "CL_TYPE_PDF", "CL_TYPE_MSOLE2", "CL_TYPE_UNKNOWN", "InActive")
4
+
5
+/* Target type is 13, internal JSON properties */
6
+TARGET(13)
7
+
8
+/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
9
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
10
+
11
+SIGNATURES_DECL_BEGIN
12
+DECLARE_SIGNATURE(sig1)
13
+SIGNATURES_DECL_END
14
+
15
+SIGNATURES_DEF_BEGIN
16
+/* search @offset 0 : '{ "Magic": "CLAMJSON' */
17
+/* this can be readjusted for specific filetypes */
18
+DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
19
+SIGNATURES_END
20
+
21
+bool logical_trigger(void)
22
+{
23
+    return matches(Signatures.sig1);
24
+}
25
+
26
+#define STR_MAXLEN 256
27
+
28
+int entrypoint ()
29
+{
30
+    int32_t objid, type, strlen;
31
+    char str[STR_MAXLEN];
32
+
33
+    /* check is json is available, alerts on inactive (optional) */
34
+    if (!json_is_active())
35
+        foundVirus("InActive");
36
+
37
+    /* acquire the filetype object */
38
+    objid = json_get_object("FileType", 8, 0);
39
+    if (objid <= 0) {
40
+        debug_print_str("json object has no filetype!", 28);
41
+        return 1;
42
+    }
43
+    type = json_get_type(objid);
44
+    if (type != JSON_TYPE_STRING) {
45
+        debug_print_str("json object filetype property is not string!", 44);
46
+        return 1;
47
+    }
48
+
49
+    /* acquire string length, note +1 is for the NULL terminator */
50
+    strlen = json_get_string_length(objid)+1;
51
+    /* prevent buffer overflow */
52
+    if (strlen > STR_MAXLEN)
53
+        strlen = STR_MAXLEN;
54
+    
55
+    /* acquire string data, note strlen includes NULL terminator */
56
+    if (json_get_string(str, strlen, objid)) {
57
+        /* debug print str (with '\n' and prepended message */
58
+        debug_print_str(str,strlen);
59
+
60
+        /* check the contained object's filetype */
61
+        if (strlen == 14 && !memcmp(str, "CL_TYPE_MSEXE", 14)) {
62
+            foundVirus("CL_TYPE_MSEXE");
63
+            return 0;
64
+        }
65
+        if (strlen == 12 && !memcmp(str, "CL_TYPE_PDF", 12)) {
66
+            foundVirus("CL_TYPE_PDF");
67
+            return 0;
68
+        }
69
+        if (strlen == 19 && !memcmp(str, "CL_TYPE_OOXML_WORD", 19)) {
70
+            foundVirus("CL_TYPE_OOXML_WORD");
71
+            return 0;
72
+        }
73
+        if (strlen == 18 && !memcmp(str, "CL_TYPE_OOXML_PPT", 18)) {
74
+            foundVirus("CL_TYPE_OOXML_PPT");
75
+            return 0;
76
+        }
77
+        if (strlen == 17 && !memcmp(str, "CL_TYPE_OOXML_XL", 17)) {
78
+            foundVirus("CL_TYPE_OOXML_XL");
79
+            return 0;
80
+        }
81
+        if (strlen == 15 && !memcmp(str, "CL_TYPE_MSWORD", 15)) {
82
+            foundVirus("CL_TYPE_MSWORD");
83
+            return 0;
84
+        }
85
+        if (strlen == 14 && !memcmp(str, "CL_TYPE_MSPPT", 14)) {
86
+            foundVirus("CL_TYPE_MSPPT");
87
+            return 0;
88
+        }
89
+        if (strlen == 13 && !memcmp(str, "CL_TYPE_MSXL", 13)) {
90
+            foundVirus("CL_TYPE_MSXL");
91
+            return 0;
92
+        }
93
+        if (strlen == 15 && !memcmp(str, "CL_TYPE_MSOLE2", 15)) {
94
+            foundVirus("CL_TYPE_MSOLE2");
95
+            return 0;
96
+        }
97
+
98
+        foundVirus("CL_TYPE_UNKNOWN");
99
+        return 0;
100
+    }
101
+
102
+    return 0;
103
+}
0 104
new file mode 100644
... ...
@@ -0,0 +1,34 @@
0
+VIRUSNAME_PREFIX("SUBMIT.NotPDF")
1
+VIRUSNAMES("InActive", "Submit")
2
+
3
+/* Target type is 13, internal JSON properties */
4
+TARGET(13)
5
+
6
+/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
7
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
8
+
9
+SIGNATURES_DECL_BEGIN
10
+DECLARE_SIGNATURE(sig1)
11
+DECLARE_SIGNATURE(sig2)
12
+SIGNATURES_DECL_END
13
+
14
+SIGNATURES_DEF_BEGIN
15
+/* search @offset 0 : '{ "Magic": "CLAMJSON' */
16
+/* this can be readjusted for specific filetypes */
17
+DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
18
+/* search '"RootFileType": "CL_TYPE_PDF"' */
19
+DEFINE_SIGNATURE(sig2, "22526f6f7446696c6554797065223a2022434c5f545950455f50444622")
20
+SIGNATURES_END
21
+
22
+bool logical_trigger(void)
23
+{
24
+    return matches(Signatures.sig1) && !matches(Signatures.sig2);
25
+}
26
+
27
+#define STR_MAXLEN 256
28
+
29
+int entrypoint ()
30
+{
31
+    foundVirus("Submit");
32
+    return 0;
33
+}
0 34
new file mode 100644
... ...
@@ -0,0 +1,134 @@
0
+VIRUSNAME_PREFIX("SUBMIT.PE")
1
+VIRUSNAMES("Root", "Embedded", "RootEmbedded")
2
+
3
+/* Target type is 13, internal JSON properties */
4
+TARGET(13)
5
+
6
+/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
7
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
8
+
9
+SIGNATURES_DECL_BEGIN
10
+DECLARE_SIGNATURE(sig1)
11
+DECLARE_SIGNATURE(sig2)
12
+SIGNATURES_DECL_END
13
+
14
+SIGNATURES_DEF_BEGIN
15
+/* search @offset 0 : '{ "Magic": "CLAMJSON' */
16
+/* this can be readjusted for specific filetypes */
17
+DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
18
+/* search '"FileType": "CL_TYPE_MSEXE"' */
19
+DEFINE_SIGNATURE(sig2, "2246696c6554797065223a2022434c5f545950455f4d5345584522")
20
+SIGNATURES_END
21
+
22
+bool logical_trigger(void)
23
+{
24
+    return matches(Signatures.sig1) && matches(Signatures.sig2);
25
+}
26
+
27
+#define STR_MAXLEN 256
28
+
29
+int entrypoint ()
30
+{
31
+    int32_t i, root = 0, embedded = 0;
32
+    int32_t type, obj, strlen, objarr, objit, arrlen;
33
+    char str[STR_MAXLEN];
34
+
35
+    /* check is json is available, alerts on inactive (optional) */
36
+    if (!json_is_active()) {
37
+        return -1;
38
+    }
39
+
40
+    /* acquire array of internal contained objects */
41
+    obj = json_get_object("FileType", 8, 0);
42
+    if (obj <= 0) return -1;
43
+
44
+    /* acquire and check type */
45
+    type = json_get_type(obj);
46
+    if (type == JSON_TYPE_STRING) {
47
+        /* acquire string length, note +1 is for the NULL terminator */
48
+        strlen = json_get_string_length(obj)+1;
49
+        /* prevent buffer overflow */
50
+        if (strlen > STR_MAXLEN)
51
+            strlen = STR_MAXLEN;
52
+        /* acquire string data, note strlen includes NULL terminator */
53
+        if (json_get_string(str, strlen, obj)) {
54
+            /* debug print str (with '\n' and prepended message */
55
+            debug_print_str(str,strlen);
56
+
57
+            /* check the contained object's type */
58
+            if (strlen == 14 && !memcmp(str, "CL_TYPE_MSEXE", 14)) {
59
+                //if (!strcmp(str, strlen, "CL_TYPE_MSEXE", strlen)) {
60
+                /* alert for submission */
61
+                root = 1;
62
+            }
63
+        }
64
+    }
65
+
66
+    debug_print_uint(root);
67
+
68
+    /* acquire array of internal contained objects */
69
+    objarr = json_get_object("ContainedObjects", 16, 0);
70
+    if (objarr <= 0) {
71
+        if (root)
72
+            foundVirus("Root");
73
+        return 0;
74
+    }
75
+
76
+    type = json_get_type(objarr);
77
+    /* debug print uint (no '\n' or prepended message */
78
+    debug_print_uint(type);
79
+
80
+    if (type != JSON_TYPE_ARRAY) {
81
+        return -1;
82
+    }
83
+
84
+    /* check array length for iteration over elements */
85
+    arrlen = json_get_array_length(objarr);
86
+    for (i = 0; i < arrlen; ++i) {
87
+        /* acquire json object @ idx i */
88
+        objit = json_get_array_idx(i, objarr);
89
+        if (objit <= 0) continue;
90
+
91
+        /* acquire FileType object of the array element @ idx i */
92
+        obj = json_get_object("FileType", 8, objit);
93
+        if (obj <= 0) continue;
94
+
95
+        /* acquire and check type */
96
+        type = json_get_type(obj);
97
+        if (type == JSON_TYPE_STRING) {
98
+            /* acquire string length, note +1 is for the NULL terminator */
99
+            strlen = json_get_string_length(obj)+1;
100
+            /* prevent buffer overflow */
101
+            if (strlen > STR_MAXLEN)
102
+                strlen = STR_MAXLEN;
103
+            /* acquire string data, note strlen includes NULL terminator */
104
+            if (json_get_string(str, strlen, obj)) {
105
+                /* debug print str (with '\n' and prepended message */
106
+                debug_print_str(str,strlen);
107
+
108
+                /* check the contained object's type */
109
+                if (strlen == 14 && !memcmp(str, "CL_TYPE_MSEXE", 14)) {
110
+                    //if (!strcmp(str, strlen, "CL_TYPE_MSEXE", strlen)) {
111
+                    /* alert for submission */
112
+                    embedded = 1;
113
+                    break;
114
+                }
115
+            }
116
+        }
117
+    }
118
+
119
+    debug_print_uint(root);
120
+    debug_print_uint(embedded);
121
+
122
+    if (root && embedded) {
123
+        foundVirus("RootEmbedded");
124
+    }
125
+    else if (root) {
126
+        foundVirus("Root");
127
+    }
128
+    else if (embedded) {
129
+        foundVirus("Embedded");
130
+    }
131
+
132
+    return 0;
133
+}
0 134
new file mode 100644
... ...
@@ -0,0 +1,28 @@
0
+VIRUSNAME_PREFIX("SUBMIT")
1
+VIRUSNAMES("Sandbox")
2
+
3
+/* Target type is 13, internal JSON properties */
4
+TARGET(13)
5
+
6
+/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
7
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
8
+
9
+SIGNATURES_DECL_BEGIN
10
+DECLARE_SIGNATURE(sig1)
11
+SIGNATURES_DECL_END
12
+
13
+SIGNATURES_DEF_BEGIN
14
+/* search @offset 0 : '{ "Magic": "CLAMJSON' */
15
+/* this can be readjusted for specific filetypes */
16
+DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
17
+SIGNATURES_END
18
+
19
+bool logical_trigger(void)
20
+{
21
+    return matches(Signatures.sig1);
22
+}
23
+
24
+int entrypoint ()
25
+{
26
+    return 0;
27
+}
... ...
@@ -1,29 +1,15 @@
1 1
 VIRUSNAME_PREFIX("SUBMIT.PE")
2 2
 VIRUSNAMES("Root", "Embedded", "RootEmbedded")
3 3
 
4
-/* Target type is 13, internal JSON properties */
5
-TARGET(13)
4
+/* Target type is 0, all relevant files */
5
+TARGET(0)
6
+
7
+/* Declares to run bytecode only for preclassification (affecting only preclass files) */
8
+PRECLASS_HOOK_DECLARE
6 9
 
7 10
 /* JSON API call will require FUNC_LEVEL_098_5 = 78 */
8
-FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
9
-
10
-SIGNATURES_DECL_BEGIN
11
-DECLARE_SIGNATURE(sig1)
12
-DECLARE_SIGNATURE(sig2)
13
-SIGNATURES_DECL_END
14
-
15
-SIGNATURES_DEF_BEGIN
16
-/* search @offset 0 : '{ "Magic": "CLAMJSON' */
17
-/* this can be readjusted for specific filetypes */
18
-DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
19
-/* search '"FileType": "CL_TYPE_MSEXE"' */
20
-DEFINE_SIGNATURE(sig2, "2246696c6554797065223a2022434c5f545950455f4d5345584522")
21
-SIGNATURES_END
22
-
23
-bool logical_trigger(void)
24
-{
25
-    return matches(Signatures.sig1) && matches(Signatures.sig2);
26
-}
11
+/* PRECLASS_HOOK_DECLARE will require FUNC_LEVEL_098_7 = 80 */
12
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_7)
27 13
 
28 14
 #define STR_MAXLEN 256
29 15
 
... ...
@@ -1,26 +1,15 @@
1 1
 VIRUSNAME_PREFIX("SUBMIT")
2 2
 VIRUSNAMES("Sandbox")
3 3
 
4
-/* Target type is 13, internal JSON properties */
5
-TARGET(13)
4
+/* Target type is 0, all relevant files */
5
+TARGET(0)
6 6
 
7
-/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
8
-FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
9
-
10
-SIGNATURES_DECL_BEGIN
11
-DECLARE_SIGNATURE(sig1)
12
-SIGNATURES_DECL_END
13
-
14
-SIGNATURES_DEF_BEGIN
15
-/* search @offset 0 : '{ "Magic": "CLAMJSON' */
16
-/* this can be readjusted for specific filetypes */
17
-DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
18
-SIGNATURES_END
7
+/* Declares to run bytecode only for preclassification (affecting only preclass files) */
8
+PRECLASS_HOOK_DECLARE
19 9
 
20
-bool logical_trigger(void)
21
-{
22
-    return matches(Signatures.sig1);
23
-}
10
+/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
11
+/* PRECLASS_HOOK_DECLARE will require FUNC_LEVEL_098_7 = 80 */
12
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_7)
24 13
 
25 14
 int entrypoint ()
26 15
 {
... ...
@@ -462,7 +462,11 @@ libclamav_la_SOURCES = \
462 462
 	matcher-pcre.c \
463 463
 	matcher-pcre.h \
464 464
 	regex_pcre.c \
465
-	regex_pcre.h
465
+	regex_pcre.h \
466
+	msxml.c \
467
+	msxml.h \
468
+	msxml_parser.c \
469
+	msxml_parser.h
466 470
 
467 471
 libclamav_la_SOURCES += bignum.h\
468 472
 	bignum_fast.h\
... ...
@@ -243,7 +243,8 @@ am_libclamav_la_OBJECTS = libclamav_la-matcher-ac.lo \
243 243
 	libclamav_la-yara_hash.lo libclamav_la-yara_grammar.lo \
244 244
 	libclamav_la-yara_lexer.lo libclamav_la-yara_parser.lo \
245 245
 	libclamav_la-msdoc.lo libclamav_la-matcher-pcre.lo \
246
-	libclamav_la-regex_pcre.lo libclamav_la-fp_add.lo \
246
+	libclamav_la-regex_pcre.lo libclamav_la-msxml.lo \
247
+	libclamav_la-msxml_parser.lo libclamav_la-fp_add.lo \
247 248
 	libclamav_la-fp_add_d.lo libclamav_la-fp_addmod.lo \
248 249
 	libclamav_la-fp_cmp.lo libclamav_la-fp_cmp_d.lo \
249 250
 	libclamav_la-fp_cmp_mag.lo libclamav_la-fp_sub.lo \
... ...
@@ -839,10 +840,10 @@ libclamav_la_SOURCES = matcher-ac.c matcher-ac.h matcher-bm.c \
839 839
 	yara_hash.c yara_hash.h yara_grammar.y yara_lexer.l \
840 840
 	yara_lexer.h yara_parser.c yara_parser.h yara_clam.h msdoc.c \
841 841
 	msdoc.h matcher-pcre.c matcher-pcre.h regex_pcre.c \
842
-	regex_pcre.h bignum.h bignum_fast.h \
843
-	tomsfastmath/addsub/fp_add.c tomsfastmath/addsub/fp_add_d.c \
844
-	tomsfastmath/addsub/fp_addmod.c tomsfastmath/addsub/fp_cmp.c \
845
-	tomsfastmath/addsub/fp_cmp_d.c \
842
+	regex_pcre.h msxml.c msxml.h msxml_parser.c msxml_parser.h \
843
+	bignum.h bignum_fast.h tomsfastmath/addsub/fp_add.c \
844
+	tomsfastmath/addsub/fp_add_d.c tomsfastmath/addsub/fp_addmod.c \
845
+	tomsfastmath/addsub/fp_cmp.c tomsfastmath/addsub/fp_cmp_d.c \
846 846
 	tomsfastmath/addsub/fp_cmp_mag.c tomsfastmath/addsub/fp_sub.c \
847 847
 	tomsfastmath/addsub/fp_sub_d.c tomsfastmath/addsub/fp_submod.c \
848 848
 	tomsfastmath/addsub/s_fp_add.c tomsfastmath/addsub/s_fp_sub.c \
... ...
@@ -1196,6 +1197,8 @@ distclean-compile:
1196 1196
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-msdoc.Plo@am__quote@
1197 1197
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-msexpand.Plo@am__quote@
1198 1198
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-mspack.Plo@am__quote@
1199
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-msxml.Plo@am__quote@
1200
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-msxml_parser.Plo@am__quote@
1199 1201
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-nulsft.Plo@am__quote@
1200 1202
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-ole2_extract.Plo@am__quote@
1201 1203
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-ooxml.Plo@am__quote@
... ...
@@ -2311,6 +2314,20 @@ libclamav_la-regex_pcre.lo: regex_pcre.c
2311 2311
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2312 2312
 @am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -c -o libclamav_la-regex_pcre.lo `test -f 'regex_pcre.c' || echo '$(srcdir)/'`regex_pcre.c
2313 2313
 
2314
+libclamav_la-msxml.lo: msxml.c
2315
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -MT libclamav_la-msxml.lo -MD -MP -MF $(DEPDIR)/libclamav_la-msxml.Tpo -c -o libclamav_la-msxml.lo `test -f 'msxml.c' || echo '$(srcdir)/'`msxml.c
2316
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libclamav_la-msxml.Tpo $(DEPDIR)/libclamav_la-msxml.Plo
2317
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='msxml.c' object='libclamav_la-msxml.lo' libtool=yes @AMDEPBACKSLASH@
2318
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2319
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -c -o libclamav_la-msxml.lo `test -f 'msxml.c' || echo '$(srcdir)/'`msxml.c
2320
+
2321
+libclamav_la-msxml_parser.lo: msxml_parser.c
2322
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -MT libclamav_la-msxml_parser.lo -MD -MP -MF $(DEPDIR)/libclamav_la-msxml_parser.Tpo -c -o libclamav_la-msxml_parser.lo `test -f 'msxml_parser.c' || echo '$(srcdir)/'`msxml_parser.c
2323
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libclamav_la-msxml_parser.Tpo $(DEPDIR)/libclamav_la-msxml_parser.Plo
2324
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='msxml_parser.c' object='libclamav_la-msxml_parser.lo' libtool=yes @AMDEPBACKSLASH@
2325
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2326
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -c -o libclamav_la-msxml_parser.lo `test -f 'msxml_parser.c' || echo '$(srcdir)/'`msxml_parser.c
2327
+
2314 2328
 libclamav_la-fp_add.lo: tomsfastmath/addsub/fp_add.c
2315 2329
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -MT libclamav_la-fp_add.lo -MD -MP -MF $(DEPDIR)/libclamav_la-fp_add.Tpo -c -o libclamav_la-fp_add.lo `test -f 'tomsfastmath/addsub/fp_add.c' || echo '$(srcdir)/'`tomsfastmath/addsub/fp_add.c
2316 2330
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libclamav_la-fp_add.Tpo $(DEPDIR)/libclamav_la-fp_add.Plo
... ...
@@ -2970,7 +2970,13 @@ void cli_bytecode_describe(const struct cli_bc *bc)
2970 2970
 	    puts("logical only");
2971 2971
 	    break;
2972 2972
 	case BC_PE_UNPACKER:
2973
-	    puts("PE hook");
2973
+	    puts("PE unpacker hook");
2974
+	    break;
2975
+    case BC_PE_ALL:
2976
+        puts("all PE hook");
2977
+        break;
2978
+    case BC_PRECLASS:
2979
+        puts("preclass hook");
2974 2980
 	    break;
2975 2981
 	default:
2976 2982
 	    printf("Unknown (type %u)", bc->kind);
... ...
@@ -3007,6 +3013,12 @@ void cli_bytecode_describe(const struct cli_bc *bc)
3007 3007
 	    else
3008 3008
 		puts("all PE files!");
3009 3009
 	    break;
3010
+	case BC_PRECLASS:
3011
+	    if (bc->lsig)
3012
+		puts("PRECLASS files matching logical signature");
3013
+	    else
3014
+		puts("all PRECLASS files!");
3015
+	    break;
3010 3016
 	default:
3011 3017
 	    puts("N/A (unknown type)\n");
3012 3018
 	    break;
... ...
@@ -61,6 +61,9 @@ enum BytecodeKind {
61 61
     /** specifies a PE hook, executes at a predetermined point in PE parsing for PE files,
62 62
       * both packed and unpacked files */
63 63
     BC_PE_ALL,
64
+    /** specifies a PRECLASS hook, executes at the end of file property collection and
65
+      * operates on the original file targeted for property collection */
66
+    BC_PRECLASS,
64 67
     _BC_LAST_HOOK
65 68
 };
66 69
 
... ...
@@ -97,12 +100,13 @@ enum FunctionalityLevels {
97 97
     FUNC_LEVEL_097_6     = 67, /**< LibClamAV release 0.97.6 */
98 98
     FUNC_LEVEL_097_7     = 68, /**< LibClamAV release 0.97.7 */
99 99
     FUNC_LEVEL_097_8     = 69, /**< LibClamAV release 0.97.8 */
100
-    FUNC_LEVEL_098_1     = 76, /**< LibClamAV release 0.98.2 */ /*last syncing to clamav*/
100
+    FUNC_LEVEL_098_1     = 76, /**< LibClamAV release 0.98.1 */ /*last syncing to clamav*/
101 101
     FUNC_LEVEL_098_2     = 77, /**< LibClamAV release 0.98.2 */
102 102
     FUNC_LEVEL_098_3     = 77, /**< LibClamAV release 0.98.3 */
103 103
     FUNC_LEVEL_098_4     = 77, /**< LibClamAV release 0.98.4 */
104 104
     FUNC_LEVEL_098_5     = 79, /**< LibClamAV release 0.98.5: JSON reading API requires this minimum level */
105 105
     FUNC_LEVEL_098_6     = 79, /**< LibClamAV release 0.98.6 */
106
+    FUNC_LEVEL_098_7     = 80, /**< LibClamAV release 0.98.7: BC_PRECLASS bytecodes require minimum level */
106 107
     FUNC_LEVEL_100       = 100 /*future release candidate*/
107 108
 };
108 109
 
... ...
@@ -111,7 +115,7 @@ enum FunctionalityLevels {
111 111
  * Phase of PDF parsing used for PDF Hooks
112 112
  */
113 113
 enum pdf_phase {
114
-    PDF_PHASE_NONE,     /* not a PDF */
114
+    PDF_PHASE_NONE,     /**< not a PDF */
115 115
     PDF_PHASE_PARSED,   /**< after parsing a PDF, object flags can be set etc. */
116 116
     PDF_PHASE_POSTDUMP, /**< after an obj was dumped and scanned */
117 117
     PDF_PHASE_END,      /**< after the pdf scan finished */
... ...
@@ -1123,14 +1127,14 @@ int32_t get_file_reliability(void);
1123 1123
 /* ----------------- END 0.96.4 APIs ---------------------------------- */
1124 1124
 /* ----------------- BEGIN 0.98.4 APIs -------------------------------- */
1125 1125
 /* ----------------- JSON Parsing APIs -------------------------------- */
1126
-/*
1126
+/**
1127 1127
 \group_json
1128 1128
  * @return 0 - json is disabled or option not specified
1129 1129
  * @return 1 - json is active and properties are available
1130 1130
  */
1131 1131
 int32_t json_is_active(void);
1132 1132
 
1133
-/*
1133
+/**
1134 1134
 \group_json
1135 1135
  * @return objid of json object with specified name
1136 1136
  * @return 0 if json object of specified name cannot be found
... ...
@@ -1142,7 +1146,7 @@ int32_t json_is_active(void);
1142 1142
  */
1143 1143
 int32_t json_get_object(const int8_t* name, int32_t name_len, int32_t objid);
1144 1144
 
1145
-/*
1145
+/**
1146 1146
 \group_json
1147 1147
  * @return type (json_type) of json object specified
1148 1148
  * @return -1 if type unknown or invalid id
... ...
@@ -1150,7 +1154,7 @@ int32_t json_get_object(const int8_t* name, int32_t name_len, int32_t objid);
1150 1150
  */
1151 1151
 int32_t json_get_type(int32_t objid);
1152 1152
 
1153
-/*
1153
+/**
1154 1154
 \group_json
1155 1155
  * @return number of elements in the json array of objid
1156 1156
  * @return -1 if an error has occurred
... ...
@@ -1159,7 +1163,7 @@ int32_t json_get_type(int32_t objid);
1159 1159
  */
1160 1160
 int32_t json_get_array_length(int32_t objid);
1161 1161
 
1162
-/*
1162
+/**
1163 1163
 \group_json
1164 1164
  * @return objid of json object at idx of json array of objid
1165 1165
  * @return 0 if invalid idx
... ...
@@ -1170,7 +1174,7 @@ int32_t json_get_array_length(int32_t objid);
1170 1170
  */
1171 1171
 int32_t json_get_array_idx(int32_t idx, int32_t objid);
1172 1172
 
1173
-/*
1173
+/**
1174 1174
 \group_json
1175 1175
  * @return length of json string of objid, not including terminating null-character
1176 1176
  * @return -1 if an error has occurred
... ...
@@ -1179,7 +1183,7 @@ int32_t json_get_array_idx(int32_t idx, int32_t objid);
1179 1179
  */
1180 1180
 int32_t json_get_string_length(int32_t objid);
1181 1181
 
1182
-/*
1182
+/**
1183 1183
 \group_json
1184 1184
  * @return number of characters transferred (capped by str_len), 
1185 1185
  *         including terminating null-character
... ...
@@ -1192,20 +1196,21 @@ int32_t json_get_string_length(int32_t objid);
1192 1192
  */
1193 1193
 int32_t json_get_string(int8_t* str, int32_t str_len, int32_t objid);
1194 1194
 
1195
-/*
1195
+/**
1196 1196
 \group_json
1197 1197
  * @return boolean value of queried objid; will force other types to boolean
1198 1198
  * @param[in] objid - id value of json object to query
1199 1199
  */
1200 1200
 int32_t json_get_boolean(int32_t objid);
1201 1201
 
1202
-/*
1202
+/**
1203 1203
 \group_json
1204 1204
  * @return integer value of queried objid; will force other types to integer
1205 1205
  * @param[in] objid - id value of json object to query
1206 1206
  */
1207 1207
 int32_t json_get_int(int32_t objid);
1208 1208
 
1209
+//int64_t json_get_int64(int32_t objid);
1209 1210
 /* bytecode does not support double type */
1210 1211
 //double json_get_double(int32_t objid);
1211 1212
 
... ...
@@ -402,7 +402,7 @@ extern void cl_engine_set_clcb_meta(struct cl_engine *engine, clcb_meta callback
402 402
 
403 403
 /* File properties callback */
404 404
 typedef int (*clcb_file_props)(const char *j_propstr, int rc, void *cbdata);
405
-extern void cl_engine_set_clcb_file_props(struct cl_engine *engine, clcb_file_props callback, void * cbdata);
405
+extern void cl_engine_set_clcb_file_props(struct cl_engine *engine, clcb_file_props callback);
406 406
 
407 407
 /* Statistics/intelligence gathering callbacks */
408 408
 extern void cl_engine_set_stats_set_cbdata(struct cl_engine *engine, void *cbdata);
... ...
@@ -115,7 +115,13 @@ char *cl_base64_encode(void *data, size_t len)
115 115
     size_t elen;
116 116
 
117 117
     b64 = BIO_new(BIO_f_base64());
118
+    if (!(b64))
119
+        return NULL;
118 120
     bio = BIO_new(BIO_s_mem());
121
+    if (!(bio)) {
122
+        BIO_free(b64);
123
+        return NULL;
124
+    }
119 125
 
120 126
     bio = BIO_push(b64, bio);
121 127
     BIO_write(bio, data, len);
... ...
@@ -119,6 +119,8 @@ static const struct ftmap_s {
119 119
     { "CL_TYPE_OOXML_XL",	CL_TYPE_OOXML_XL     	},
120 120
     { "CL_TYPE_INTERNAL",	CL_TYPE_INTERNAL     	},
121 121
     { "CL_TYPE_XDP",        CL_TYPE_XDP             },
122
+    { "CL_TYPE_XML_WORD",   CL_TYPE_XML_WORD        },
123
+    { "CL_TYPE_XML_XL",     CL_TYPE_XML_XL          },
122 124
     { NULL,			CL_TYPE_IGNORED		}
123 125
 };
124 126
 
... ...
@@ -108,6 +108,8 @@ typedef enum {
108 108
     CL_TYPE_GPT,
109 109
     CL_TYPE_APM,
110 110
     CL_TYPE_XDP,
111
+    CL_TYPE_XML_WORD,
112
+    CL_TYPE_XML_XL,
111 113
     CL_TYPE_IGNORED /* please don't add anything below */
112 114
 } cli_file_t;
113 115
 
... ...
@@ -160,7 +160,8 @@ static const char *ftypes_int[] = {
160 160
   "0:0:377f0683002de218:SQLite WAL:CL_TYPE_ANY:CL_TYPE_IGNORED",
161 161
   "0:0:53514c69746520666f726d6174203300:SQLite database:CL_TYPE_ANY:CL_TYPE_IGNORED",
162 162
   "0:0:d9d505f920a163d7:SQLite journal:CL_TYPE_ANY:CL_TYPE_IGNORED",
163
-  "0:0:435753:SWF (compressed):CL_TYPE_ANY:CL_TYPE_SWF:71",
163
+  "0:0:5a5753:SWF (LZMA compressed):CL_TYPE_ANY:CL_TYPE_SWF:81",
164
+  "0:0:435753:SWF (zlib compressed):CL_TYPE_ANY:CL_TYPE_SWF:71",
164 165
   "0:0:465753:SWF (uncompressed):CL_TYPE_ANY:CL_TYPE_SWF:71",
165 166
   "0:0:4d53434600000000:MS CAB:CL_TYPE_ANY:CL_TYPE_MSCAB",
166 167
   "1:*:4d53434600000000:CAB-SFX:CL_TYPE_ANY:CL_TYPE_CABSFX",
... ...
@@ -182,6 +183,10 @@ static const char *ftypes_int[] = {
182 182
   "1:0:4552{510}504D0000:Disk Image - Apple Partition Map:CL_TYPE_ANY:CL_TYPE_APM:77",
183 183
   "0:0:7b20224d61676963223a2022434c414d4a534f4e763022:Internal properties:CL_TYPE_ANY:CL_TYPE_INTERNAL:78",
184 184
   "1:*:3c7864703a786470:Adobe XDP - Embedded PDF:CL_TYPE_ANY:CL_TYPE_XDP:79",
185
+  "1:0:3c3f786d6c2076657273696f6e3d22312e3022{0-1024}3c776f7264446f63756d656e74:Microsoft Word 2003 XML Document:CL_TYPE_ANY:CL_TYPE_XML_WORD:80",
186
+  "1:0:3c3f786d6c2076657273696f6e3d22312e3022{0-1024}3c??3a776f7264446f63756d656e74:Microsoft Word 2003 XML Document:CL_TYPE_ANY:CL_TYPE_XML_WORD:80",
187
+  "1:0:3c3f786d6c2076657273696f6e3d22312e3022{0-1024}3c576f726b626f6f6b:Microsoft Excel 2003 XML Document:CL_TYPE_ANY:CL_TYPE_XML_XL:80",
188
+  "1:0:3c3f786d6c2076657273696f6e3d22312e3022{0-1024}3c??3a576f726b626f6f6b:Microsoft Excel 2003 XML Document:CL_TYPE_ANY:CL_TYPE_XML_XL:80",
185 189
   NULL
186 190
 };
187 191
 
... ...
@@ -47,7 +47,7 @@
47 47
 #include <ifaddrs.h>
48 48
 #endif
49 49
 
50
-#if defined(SIOCGIFHWADDR)
50
+#if defined(SIOCGIFHWADDR) && !defined(__GNU__)
51 51
 #if defined(_AIX)
52 52
 #include <sys/ndd_var.h>
53 53
 #include <sys/kinfo.h>
... ...
@@ -116,7 +116,7 @@ struct device *get_devices(void)
116 116
     uint8_t *mac;
117 117
     int sock;
118 118
 
119
-#if defined(SIOCGIFHWADDR)
119
+#if defined(SIOCGIFHWADDR) && !defined(__GNU__)
120 120
     struct ifreq ifr;
121 121
 #else
122 122
     struct sockaddr_dl *sdl;
... ...
@@ -155,7 +155,7 @@ struct device *get_devices(void)
155 155
          * Instead, Linux uses its own ioctl. This code only runs if we're not Linux,
156 156
          * Windows, or FreeBSD.
157 157
          */
158
-#if !defined(SIOCGIFHWADDR)
158
+#if !defined(SIOCGIFHWADDR) || defined(__GNU__)
159 159
         for (i=0; i < ndevices; i++) {
160 160
             if (!(strcmp(devices[i].name, addr->ifa_name))) {
161 161
                 sdl = (struct sockaddr_dl *)(addr->ifa_addr);
... ...
@@ -180,7 +180,7 @@ struct device *get_devices(void)
180 180
     }
181 181
 
182 182
     /* This is the Linux version of getting the MAC addresses */
183
-#if defined(SIOCGIFHWADDR)
183
+#if defined(SIOCGIFHWADDR) && !defined(__GNU__)
184 184
     for (i=0; i < ndevices; i++) {
185 185
         if (!(devices[i].name))
186 186
             continue;
... ...
@@ -118,6 +118,7 @@ static char *iso_string(iso9660_t *iso, const void *src, unsigned int len) {
118 118
 static int iso_parse_dir(iso9660_t *iso, unsigned int block, unsigned int len) {
119 119
     cli_ctx *ctx = iso->ctx;
120 120
     int ret = CL_CLEAN;
121
+    int viruses_found = 0;
121 122
 
122 123
     if(len < 34) {
123 124
 	cli_dbgmsg("iso_parse_dir: Directory too small, skipping\n");
... ...
@@ -182,10 +183,13 @@ static int iso_parse_dir(iso9660_t *iso, unsigned int block, unsigned int len) {
182 182
 	    filesz = cli_readint32(dir+10);
183 183
 
184 184
 	    cli_dbgmsg("iso_parse_dir: %s '%s': off %x - size %x - flags %x - unit size %x - gap size %x - volume %u\n", (dir[25] & 2) ? "Directory" : "File", iso->buf, fileoff, filesz, dir[25], dir[26], dir[27], cli_readint32(&dir[28]) & 0xffff);
185
-	    if(cli_matchmeta(ctx, iso->buf, filesz, filesz, 0, 0, 0, NULL) == CL_VIRUS) {
186
-		ret = CL_VIRUS;
187
-		break;
188
-	    }
185
+            ret = cli_matchmeta(ctx, iso->buf, filesz, filesz, 0, 0, 0, NULL);
186
+            if (ret == CL_VIRUS) {
187
+                viruses_found = 1;
188
+                if (!SCAN_ALL)
189
+                    break;
190
+                ret = CL_CLEAN;
191
+            }
189 192
 
190 193
 	    if(dir[26] || dir[27])
191 194
 		cli_dbgmsg("iso_parse_dir: Skipping interleaved file\n");
... ...
@@ -199,6 +203,12 @@ static int iso_parse_dir(iso9660_t *iso, unsigned int block, unsigned int len) {
199 199
 		    else
200 200
 			ret = iso_scan_file(iso, fileoff, filesz);
201 201
 		}
202
+                if (ret == CL_VIRUS) {
203
+                    viruses_found = 1;
204
+                    if (!SCAN_ALL)
205
+                        break;
206
+                    ret = CL_CLEAN;
207
+                }
202 208
 	    }
203 209
 	    dirsz -= entrysz;
204 210
 	    dir += entrysz;
... ...
@@ -206,6 +216,8 @@ static int iso_parse_dir(iso9660_t *iso, unsigned int block, unsigned int len) {
206 206
 
207 207
 	fmap_unneed_ptr(*ctx->fmap, dir_orig, iso->blocksz);
208 208
     }
209
+    if (viruses_found == 1)
210
+        return CL_VIRUS;
209 211
     return ret;
210 212
 }
211 213
 
... ...
@@ -46,6 +46,21 @@ int cli_json_timeout_cycle_check(cli_ctx *ctx, int *toval)
46 46
     return CL_SUCCESS;
47 47
 }
48 48
 
49
+int cli_json_parse_error(json_object *root, const char *errstr)
50
+{
51
+    json_object *perr;
52
+
53
+    if (!root)
54
+        return CL_SUCCESS; /* CL_ENULLARG? */
55
+
56
+    perr = cli_jsonarray(root, "ParseErrors");
57
+    if (perr == NULL) {
58
+        return CL_EMEM;
59
+    }
60
+
61
+    return cli_jsonstr(perr, NULL, errstr);
62
+}
63
+
49 64
 int cli_jsonnull(json_object *obj, const char* key)
50 65
 {
51 66
     json_type objty;
... ...
@@ -37,6 +37,7 @@
37 37
 #define JSON_TIMEOUT_SKIP_CYCLES 3
38 38
 
39 39
 int cli_json_timeout_cycle_check(cli_ctx *ctx, int *toval);
40
+int cli_json_parse_error(json_object *root, const char *errstr);
40 41
 
41 42
 int cli_jsonnull(json_object *obj, const char* key);
42 43
 int cli_jsonstr(json_object *obj, const char* key, const char* s);
... ...
@@ -245,7 +245,7 @@ void cli_bm_free(struct cli_matcher *root)
245 245
     }
246 246
 }
247 247
 
248
-int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_bm_patt **patt, const struct cli_matcher *root, uint32_t offset, const struct cli_target_info *info, struct cli_bm_off *offdata, uint32_t *viroffset)
248
+int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_bm_patt **patt, const struct cli_matcher *root, uint32_t offset, const struct cli_target_info *info, struct cli_bm_off *offdata, cli_ctx *ctx)
249 249
 {
250 250
 	uint32_t i, j, off, off_min, off_max;
251 251
 	uint8_t found, pchain, shift;
... ...
@@ -253,7 +253,7 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
253 253
 	struct cli_bm_patt *p;
254 254
 	const unsigned char *bp, *pt;
255 255
 	unsigned char prefix;
256
-        int ret;
256
+        int ret, viruses_found = 0;
257 257
 
258 258
     if(!root || !root->bm_shift)
259 259
 	return CL_CLEAN;
... ...
@@ -285,8 +285,11 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
285 285
 		if(offdata) {
286 286
 		    off = offset + i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
287 287
 		    for(; offdata->pos < offdata->cnt && off >= offdata->offtab[offdata->pos]; offdata->pos++);
288
-		    if(offdata->pos == offdata->cnt || off >= offdata->offtab[offdata->pos])
288
+		    if(offdata->pos == offdata->cnt || off >= offdata->offtab[offdata->pos]) {
289
+			if (viruses_found)
290
+			    return CL_VIRUS;
289 291
 			return CL_CLEAN;
292
+		    }
290 293
 		    i += offdata->offtab[offdata->pos] - off;
291 294
 		} else {
292 295
 		    i++;
... ...
@@ -377,12 +380,18 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
377 377
 		    }
378 378
 		    if(virname) {
379 379
 			*virname = p->virname;
380
-			if(viroffset)
381
-			    *viroffset = offset + i + j - BM_MIN_LENGTH + BM_BLOCK_SIZE;
380
+			if(ctx != NULL && SCAN_ALL) {
381
+			    cli_append_virus(ctx, *virname);
382
+			    //*viroffset = offset + i + j - BM_MIN_LENGTH + BM_BLOCK_SIZE;
383
+			}
382 384
 		    }
383 385
 		    if(patt)
384 386
 			*patt = p;
385
-		    return CL_VIRUS;
387
+
388
+		    viruses_found = 1;
389
+
390
+		    if(ctx != NULL && !SCAN_ALL)
391
+			return CL_VIRUS;
386 392
 		}
387 393
 		p = p->next;
388 394
 	    }
... ...
@@ -392,8 +401,11 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
392 392
 	if(offdata) {
393 393
 	    off = offset + i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
394 394
 	    for(; offdata->pos < offdata->cnt && off >= offdata->offtab[offdata->pos]; offdata->pos++);
395
-	    if(offdata->pos == offdata->cnt || off >= offdata->offtab[offdata->pos])
395
+	    if(offdata->pos == offdata->cnt || off >= offdata->offtab[offdata->pos]) {
396
+		if (viruses_found)
397
+		    return CL_VIRUS;
396 398
 		return CL_CLEAN;
399
+	    }
397 400
 	    i += offdata->offtab[offdata->pos] - off;
398 401
 	} else {
399 402
 	    i += shift;
... ...
@@ -401,5 +413,7 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
401 401
 
402 402
     }
403 403
 
404
+    if (viruses_found)
405
+	return CL_VIRUS;
404 406
     return CL_CLEAN;
405 407
 }
... ...
@@ -25,6 +25,7 @@
25 25
 #include "filetypes.h"
26 26
 #include "cltypes.h"
27 27
 #include "fmap.h"
28
+#include "others.h"
28 29
 
29 30
 #define BM_BOUNDARY_EOL	1
30 31
 
... ...
@@ -47,7 +48,7 @@ int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern, const
47 47
 int cli_bm_init(struct cli_matcher *root);
48 48
 int cli_bm_initoff(const struct cli_matcher *root, struct cli_bm_off *data, const struct cli_target_info *info);
49 49
 void cli_bm_freeoff(struct cli_bm_off *data);
50
-int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_bm_patt **patt, const struct cli_matcher *root, uint32_t offset, const struct cli_target_info *info, struct cli_bm_off *offdata, uint32_t *viroffset);
50
+int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_bm_patt **patt, const struct cli_matcher *root, uint32_t offset, const struct cli_target_info *info, struct cli_bm_off *offdata, cli_ctx *ctx);
51 51
 void cli_bm_free(struct cli_matcher *root);
52 52
 
53 53
 #endif
... ...
@@ -104,7 +104,6 @@ static inline int matcher_run(const struct cli_matcher *root,
104 104
 			      fmap_t *map,
105 105
 			      struct cli_bm_off *offdata,
106 106
 			      struct cli_pcre_off *poffdata,
107
-			      uint32_t *viroffset,
108 107
 			      cli_ctx *ctx)
109 108
 {
110 109
     int ret, tmp;
... ...
@@ -143,17 +142,20 @@ static inline int matcher_run(const struct cli_matcher *root,
143 143
 	    /* Don't use prefiltering for BM offset mode, since BM keeps tracks
144 144
 	     * of offsets itself, and doesn't work if we skip chunks of input
145 145
 	     * data */
146
-	    ret = cli_bm_scanbuff(orig_buffer, orig_length, virname, NULL, root, orig_offset, tinfo, offdata, viroffset);
146
+	    ret = cli_bm_scanbuff(orig_buffer, orig_length, virname, NULL, root, orig_offset, tinfo, offdata, ctx);
147 147
 	} else {
148
-	    ret = cli_bm_scanbuff(buffer, length, virname, NULL, root, offset, tinfo, offdata, viroffset);
148
+	    ret = cli_bm_scanbuff(buffer, length, virname, NULL, root, offset, tinfo, offdata, ctx);
149 149
 	}
150
-	if (ret == CL_VIRUS) {
151
-	    if (ctx) {
150
+	if (ret != CL_CLEAN) {
151
+	    if (ret != CL_VIRUS)
152
+		return ret;
153
+
154
+	    /* else (ret == CL_VIRUS) */
155
+	    if (SCAN_ALL)
156
+		viruses_found = 1;
157
+	    else {
152 158
 		cli_append_virus(ctx, *virname);
153
-		if (SCAN_ALL)
154
-		    viruses_found++;
155
-		else
156
-		    return ret;
159
+		return ret;
157 160
 	    }
158 161
 	}
159 162
     }
... ...
@@ -253,7 +255,7 @@ int cli_scanbuff(const unsigned char *buffer, uint32_t length, uint32_t offset,
253 253
 	if(!acdata && (ret = cli_ac_initdata(&mdata, troot->ac_partsigs, troot->ac_lsigs, troot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)))
254 254
 	    return ret;
255 255
 
256
-	ret = matcher_run(troot, buffer, length, &virname, acdata ? (acdata[0]): (&mdata), offset, NULL, ftype, NULL, AC_SCAN_VIR, PCRE_SCAN_BUFF, NULL, *ctx->fmap, NULL, NULL, NULL, ctx);
256
+	ret = matcher_run(troot, buffer, length, &virname, acdata ? (acdata[0]): (&mdata), offset, NULL, ftype, NULL, AC_SCAN_VIR, PCRE_SCAN_BUFF, NULL, *ctx->fmap, NULL, NULL, ctx);
257 257
 
258 258
 	if(!acdata)
259 259
 	    cli_ac_freedata(&mdata);
... ...
@@ -273,7 +275,7 @@ int cli_scanbuff(const unsigned char *buffer, uint32_t length, uint32_t offset,
273 273
     if(!acdata && (ret = cli_ac_initdata(&mdata, groot->ac_partsigs, groot->ac_lsigs, groot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)))
274 274
 	return ret;
275 275
 
276
-    ret = matcher_run(groot, buffer, length, &virname, acdata ? (acdata[1]): (&mdata), offset, NULL, ftype, NULL, AC_SCAN_VIR, PCRE_SCAN_BUFF, NULL, *ctx->fmap, NULL, NULL, NULL, ctx);
276
+    ret = matcher_run(groot, buffer, length, &virname, acdata ? (acdata[1]): (&mdata), offset, NULL, ftype, NULL, AC_SCAN_VIR, PCRE_SCAN_BUFF, NULL, *ctx->fmap, NULL, NULL, ctx);
277 277
 
278 278
     if(!acdata)
279 279
 	cli_ac_freedata(&mdata);
... ...
@@ -813,7 +815,6 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
813 813
     fmap_t *map = *ctx->fmap;
814 814
     struct cli_matcher *hdb, *fp;
815 815
     const char *virname = NULL;
816
-    uint32_t viroffset = 0;
817 816
     uint32_t viruses_found = 0;
818 817
     void *md5ctx, *sha1ctx, *sha256ctx;
819 818
 
... ...
@@ -993,8 +994,7 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
993 993
 
994 994
         if(troot) {
995 995
                 virname = NULL;
996
-                viroffset = 0;
997
-                ret = matcher_run(troot, buff, bytes, &virname, &tdata, offset, &info, ftype, ftoffset, acmode, PCRE_SCAN_FMAP, acres, map, bm_offmode ? &toff : NULL, &tpoff, &viroffset, ctx);
996
+                ret = matcher_run(troot, buff, bytes, &virname, &tdata, offset, &info, ftype, ftoffset, acmode, PCRE_SCAN_FMAP, acres, map, bm_offmode ? &toff : NULL, &tpoff, ctx);
998 997
 
999 998
             if (virname) {
1000 999
                 /* virname already appended by matcher_run */
... ...
@@ -1024,8 +1024,7 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
1024 1024
 
1025 1025
         if(!ftonly) {
1026 1026
             virname = NULL;
1027
-            viroffset = 0;
1028
-            ret = matcher_run(groot, buff, bytes, &virname, &gdata, offset, &info, ftype, ftoffset, acmode, PCRE_SCAN_FMAP, acres, map, NULL, &gpoff, &viroffset, ctx);
1027
+            ret = matcher_run(groot, buff, bytes, &virname, &gdata, offset, &info, ftype, ftoffset, acmode, PCRE_SCAN_FMAP, acres, map, NULL, &gpoff, ctx);
1029 1028
 
1030 1029
             if (virname) {
1031 1030
                 /* virname already appended by matcher_run */
... ...
@@ -1054,7 +1053,9 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
1054 1054
                     type = ret;
1055 1055
             }
1056 1056
 
1057
-            if(hdb) {
1057
+            /* if (bytes <= (maxpatlen * (offset!=0))), it means the last window finished the file hashing *
1058
+             *   since the last window is responsible for adding intersection between windows (maxpatlen)  */
1059
+            if(hdb && (bytes > (maxpatlen * (offset!=0)))) {
1058 1060
                 const void *data = buff + maxpatlen * (offset!=0);
1059 1061
                 uint32_t data_len = bytes - maxpatlen * (offset!=0);
1060 1062
 
... ...
@@ -1067,11 +1068,6 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
1067 1067
             }
1068 1068
         }
1069 1069
 
1070
-        if(SCAN_ALL && viroffset) {
1071
-            offset = viroffset;
1072
-            continue;
1073
-        }
1074
-
1075 1070
         if(bytes < SCANBUFF)
1076 1071
             break;
1077 1072
 
... ...
@@ -3,7 +3,7 @@
3 3
  * 
4 4
  * Copyright (C) 2007-2013 Sourcefire, Inc.
5 5
  * 
6
- * Authors: Trog
6
+ * Authors: Kevin Lin
7 7
  * 
8 8
  * This program is free software; you can redistribute it and/or modify it under
9 9
  * the terms of the GNU General Public License version 2 as published by the
... ...
@@ -3,7 +3,7 @@
3 3
  *
4 4
  *  Copyright (C) 2007-2008 Sourcefire, Inc.
5 5
  *
6
- *  Authors: Trog
6
+ *  Authors: Kevin Lin
7 7
  *
8 8
  *  This program is free software; you can redistribute it and/or modify
9 9
  *  it under the terms of the GNU General Public License version 2 as
10 10
new file mode 100644
... ...
@@ -0,0 +1,285 @@
0
+/*
1
+ * Extract component parts of MS XML files (e.g. MS Office 2003 XML Documents)
2
+ * 
3
+ * Copyright (C) 2007-2013 Sourcefire, Inc.
4
+ * 
5
+ * Authors: Kevin Lin
6
+ * 
7
+ * This program is free software; you can redistribute it and/or modify it under
8
+ * the terms of the GNU General Public License version 2 as published by the
9
+ * Free Software Foundation.
10
+ * 
11
+ * This program is distributed in the hope that it will be useful, but WITHOUT
12
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14
+ * more details.
15
+ * 
16
+ * You should have received a copy of the GNU General Public License along with
17
+ * this program; if not, write to the Free Software Foundation, Inc., 51
18
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19
+ */
20
+
21
+#if HAVE_CONFIG_H
22
+#include "clamav-config.h"
23
+#endif
24
+
25
+#include <sys/types.h>
26
+#include <sys/stat.h>
27
+#include <fcntl.h>
28
+
29
+#include "clamav.h"
30
+#include "others.h"
31
+#include "conv.h"
32
+#include "json_api.h"
33
+#include "msxml.h"
34
+#include "msxml_parser.h"
35
+
36
+#if HAVE_LIBXML2
37
+#ifdef _WIN32
38
+#ifndef LIBXML_WRITER_ENABLED
39
+#define LIBXML_WRITER_ENABLED 1
40
+#endif
41
+#endif
42
+#include <libxml/xmlreader.h>
43
+
44
+#define MSXML_VERBIOSE 0
45
+#if MSXML_VERBIOSE
46
+#define cli_msxmlmsg(...) cli_dbgmsg(__VA_ARGS__)
47
+#else
48
+#define cli_msxmlmsg(...)
49
+#endif
50
+
51
+#define MSXML_READBUFF SCANBUFF
52
+
53
+static const struct key_entry msxml_keys[] = {
54
+    { "worddocument",       "WordDocument",       MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
55
+    { "workbook",           "Workbook",           MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
56
+
57
+    { "bindata",            "BinaryData",         MSXML_SCAN_B64 | MSXML_JSON_COUNT | MSXML_JSON_ROOT },
58
+    { "documentproperties", "DocumentProperties", MSXML_JSON_ROOT },
59
+    { "author",             "Author",             MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
60
+    { "lastauthor",         "LastAuthor",         MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
61
+    { "revision",           "Revision",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
62
+    { "totaltime",          "TotalTime",          MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
63
+    { "created",            "Created",            MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
64
+    { "lastsaved",          "LastSaved",          MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
65
+    { "pages",              "Pages",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
66
+    { "words",              "Words",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
67
+    { "characters",         "Characters",         MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
68
+    { "lines",              "Lines",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
69
+    { "paragraph",          "Paragraph",          MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
70
+    { "characterswithspaces", "CharactersWithSpaces", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
71
+    { "version",            "Version",            MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
72
+
73
+    { "allowpng",           "AllowPNG",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
74
+
75
+    { "fonts",              "Fonts",              MSXML_IGNORE_ELEM },
76
+    { "styles",             "Styles",             MSXML_IGNORE_ELEM }
77
+};
78
+static size_t num_msxml_keys = sizeof(msxml_keys) / sizeof(struct key_entry);
79
+
80
+enum msxml_state {
81
+    MSXML_STATE_NORMAL = 0,
82
+    MSXML_STATE_ENTITY_START_1,
83
+    MSXML_STATE_ENTITY_START_2,
84
+    MSXML_STATE_ENTITY_HEX,
85
+    MSXML_STATE_ENTITY_DEC,
86
+    MSXML_STATE_ENTITY_CLOSE,
87
+    MSXML_STATE_ENTITY_NONE
88
+};
89
+
90
+struct msxml_cbdata {
91
+    enum msxml_state state;
92
+    fmap_t *map;
93
+    const unsigned char *window;
94
+    off_t winpos, mappos;
95
+    size_t winsize;
96
+};
97
+
98
+static inline size_t msxml_read_cb_new_window(struct msxml_cbdata *cbdata)
99
+{
100
+    const unsigned char *new_window = NULL;
101
+    off_t new_mappos;
102
+    size_t bytes;
103
+
104
+    if (cbdata->mappos == cbdata->map->len) {
105
+        cli_msxmlmsg("msxml_read_cb: fmap REALLY EOF\n");
106
+        return 0;
107
+    }
108
+
109
+    new_mappos = cbdata->mappos + cbdata->winsize;
110
+    bytes = MIN(cbdata->map->len - new_mappos, MSXML_READBUFF);
111
+    if (!bytes) {
112
+        cbdata->window = NULL;
113
+        cbdata->winpos = 0;
114
+        cbdata->mappos = cbdata->map->len;
115
+        cbdata->winsize = 0;
116
+
117
+        cli_msxmlmsg("msxml_read_cb: fmap EOF\n");
118
+        return 0;
119
+    }
120
+
121
+    new_window = fmap_need_off_once(cbdata->map, new_mappos, bytes);
122
+    if (!new_window) {
123
+        cli_errmsg("msxml_read_cb: cannot acquire new window for fmap\n");
124
+        return -1;
125
+    }
126
+
127
+    cbdata->window = new_window;
128
+    cbdata->winpos = 0;
129
+    cbdata->mappos = new_mappos;
130
+    cbdata->winsize = bytes;
131
+
132
+    cli_msxmlmsg("msxml_read_cb: acquired new window @ [%llu(+%llu)(max:%llu)]\n",
133
+                 (long long unsigned)cbdata->mappos, (long long unsigned)(cbdata->mappos + cbdata->winsize),
134
+                 (long long unsigned)cbdata->map->len);
135
+
136
+    return bytes;
137
+}
138
+
139
+int msxml_read_cb(void *ctx, char *buffer, int len)
140
+{
141
+    struct msxml_cbdata *cbdata = (struct msxml_cbdata *)ctx;
142
+    size_t wbytes, rbytes;
143
+    int winret;
144
+
145
+    cli_msxmlmsg("msxml_read_cb called\n");
146
+
147
+    /* initial iteration */
148
+    if (!cbdata->window) {
149
+        if ((winret = msxml_read_cb_new_window(cbdata)) <= 0)
150
+            return winret;
151
+    }
152
+
153
+    cli_msxmlmsg("msxml_read_cb: requested %d bytes from offset %llu\n", len, (long long unsigned)(cbdata->mappos+cbdata->winpos));
154
+
155
+    wbytes = 0;
156
+    rbytes = cbdata->winsize - cbdata->winpos;
157
+
158
+    /* copying loop with preprocessing */
159
+    while (wbytes < len) {
160
+        const unsigned char *read_from;
161
+        char *write_to = buffer + wbytes;
162
+        enum msxml_state *state;
163
+#if MSXML_VERBIOSE
164
+        size_t written;
165
+#endif
166
+
167
+        if (!rbytes) {
168
+            if ((winret = msxml_read_cb_new_window(cbdata)) < 0)
169
+                return winret;
170
+            if (winret == 0) {
171
+                cli_msxmlmsg("msxml_read_cb: propagating fmap EOF [%llu]\n", (long long unsigned)wbytes);
172
+                return (int)wbytes;
173
+            }
174
+
175
+            rbytes = cbdata->winsize;
176
+        }
177
+
178
+#if MSXML_VERBIOSE
179
+        written = MIN(rbytes, len - wbytes);
180
+        cli_msxmlmsg("msxml_read_cb: copying from window [%llu(+%llu)] %llu->~%llu\n",
181
+                     (long long unsigned)(cbdata->winsize - rbytes), (long long unsigned)cbdata->winsize,
182
+                     (long long unsigned)cbdata->winpos, (long long unsigned)(cbdata->winpos + written));
183
+#endif
184
+
185
+        read_from = cbdata->window + cbdata->winpos;
186
+        state = &(cbdata->state);
187
+
188
+        while (rbytes > 0 && wbytes < len) {
189
+            switch (*state) {
190
+            case MSXML_STATE_NORMAL:
191
+                if ((*read_from) == '&')
192
+                    *state = MSXML_STATE_ENTITY_START_1;
193
+                break;
194
+            case MSXML_STATE_ENTITY_START_1:
195
+                if ((*read_from) == '#')
196
+                    *state = MSXML_STATE_ENTITY_START_2;
197
+                else
198
+                    *state = MSXML_STATE_NORMAL;
199
+                break;
200
+            case MSXML_STATE_ENTITY_START_2:
201
+                if ((*read_from) == 'x')
202
+                    *state = MSXML_STATE_ENTITY_HEX;
203
+                else if (((*read_from) >= '0') && ((*read_from) <= '9'))
204
+                    *state = MSXML_STATE_ENTITY_DEC;
205
+                else
206
+                    *state = MSXML_STATE_NORMAL;
207
+                break;
208
+            case MSXML_STATE_ENTITY_HEX:
209
+                if ((((*read_from) >= '0') && ((*read_from) <= '9')) ||
210
+                    (((*read_from) >= 'a') && ((*read_from) <= 'f')) ||
211
+                    (((*read_from) >= 'A') && ((*read_from) <= 'F'))) {}
212
+                else
213
+                    *state = MSXML_STATE_ENTITY_CLOSE;
214
+                break;
215
+            case MSXML_STATE_ENTITY_DEC:
216
+                if (((*read_from) >= '0') && ((*read_from) <= '9')) {}
217
+                else
218
+                    *state = MSXML_STATE_ENTITY_CLOSE;
219
+                break;
220
+            default:
221
+                cli_errmsg("unknown *state: %d\n", *state);
222
+            }
223
+
224
+            if (*state == MSXML_STATE_ENTITY_CLOSE) {
225
+                if ((*read_from) != ';') {
226
+                    cli_msxmlmsg("msxml_read_cb: detected unterminated character entity @ winoff %d\n",
227
+                                 (int)(read_from - cbdata->window));
228
+                    (*write_to++) = ';';
229
+                    wbytes++;
230
+                }
231
+                *state = MSXML_STATE_NORMAL;
232
+                if (wbytes >= len)
233
+                    break;
234
+            }
235
+
236
+            *(write_to++) = *(read_from++);
237
+            rbytes--;
238
+            wbytes++;
239
+        }
240
+    }
241
+
242
+    cbdata->winpos = cbdata->winsize - rbytes;
243
+    return (int)wbytes;
244
+}
245
+#endif
246
+
247
+int cli_scanmsxml(cli_ctx *ctx)
248
+{
249
+#if HAVE_LIBXML2
250
+    struct msxml_cbdata cbdata;
251
+    xmlTextReaderPtr reader = NULL;
252
+    int state, ret = CL_SUCCESS;
253
+
254
+    cli_dbgmsg("in cli_scanmsxml()\n");
255
+
256
+    if (!ctx)
257
+        return CL_ENULLARG;
258
+
259
+    memset(&cbdata, 0, sizeof(cbdata));
260
+    cbdata.map = *ctx->fmap;
261
+
262
+    reader = xmlReaderForIO(msxml_read_cb, NULL, &cbdata, "msxml.xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS);
263
+    if (!reader) {
264
+        cli_dbgmsg("cli_scanmsxml: cannot intialize xmlReader\n");
265
+
266
+#if HAVE_JSON
267
+        ret = cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_XML_READER_IO");
268
+#endif
269
+        return ret; // libxml2 failed!
270
+    }
271
+
272
+    ret = cli_msxml_parse_document(ctx, reader, msxml_keys, num_msxml_keys, 1);
273
+
274
+    xmlTextReaderClose(reader);
275
+    xmlFreeTextReader(reader);
276
+    return ret;
277
+#else
278
+    UNUSEDPARAM(ctx);
279
+    cli_dbgmsg("in cli_scanmsxml()\n");
280
+    cli_dbgmsg("cli_scanmsxml: scanning msxml documents requires libxml2!\n");
281
+
282
+    return CL_SUCCESS;
283
+#endif
284
+}
0 285
new file mode 100644
... ...
@@ -0,0 +1,34 @@
0
+/*
1
+ *  Extract component parts of MS XML files (e.g. MS Office 2003 XML Documents)
2
+ *
3
+ *  Copyright (C) 2007-2008 Sourcefire, Inc.
4
+ *
5
+ *  Authors: Kevin Lin
6
+ *
7
+ *  This program is free software; you can redistribute it and/or modify
8
+ *  it under the terms of the GNU General Public License version 2 as
9
+ *  published by the Free Software Foundation.
10
+ *
11
+ *  This program is distributed in the hope that it will be useful,
12
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
+ *  GNU General Public License for more details.
15
+ *
16
+ *  You should have received a copy of the GNU General Public License
17
+ *  along with this program; if not, write to the Free Software
18
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19
+ *  MA 02110-1301, USA.
20
+ */
21
+
22
+#ifndef __MSXML_H
23
+#define __MSXML_H
24
+
25
+#if HAVE_CONFIG_H
26
+#include "clamav-config.h"
27
+#endif
28
+
29
+#include "others.h"
30
+
31
+int cli_scanmsxml(cli_ctx *ctx);
32
+
33
+#endif /* __MSXML_H */
0 34
new file mode 100644
... ...
@@ -0,0 +1,543 @@
0
+/*
1
+ * Extract component parts of MS XML files (e.g. MS Office 2003 XML Documents)
2
+ *
3
+ * Copyright (C) 2007-2013 Sourcefire, Inc.
4
+ *
5
+ * Authors: Kevin Lin
6
+ *
7
+ * This program is free software; you can redistribute it and/or modify it under
8
+ * the terms of the GNU General Public License version 2 as published by the
9
+ * Free Software Foundation.
10
+ *
11
+ * This program is distributed in the hope that it will be useful, but WITHOUT
12
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14
+ * more details.
15
+ *
16
+ * You should have received a copy of the GNU General Public License along with
17
+ * this program; if not, write to the Free Software Foundation, Inc., 51
18
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19
+ */
20
+
21
+#if HAVE_CONFIG_H
22
+#include "clamav-config.h"
23
+#endif
24
+
25
+#include <sys/types.h>
26
+#include <sys/stat.h>
27
+#include <fcntl.h>
28
+
29
+#include "clamav.h"
30
+#include "others.h"
31
+#include "conv.h"
32
+#include "scanners.h"
33
+#include "json_api.h"
34
+#include "msxml_parser.h"
35
+
36
+#if HAVE_LIBXML2
37
+#ifdef _WIN32
38
+#ifndef LIBXML_WRITER_ENABLED
39
+#define LIBXML_WRITER_ENABLED 1
40
+#endif
41
+#endif
42
+#include <libxml/xmlreader.h>
43
+
44
+#define MSXML_VERBIOSE 0
45
+#if MSXML_VERBIOSE
46
+#define cli_msxmlmsg(...) cli_dbgmsg(__VA_ARGS__)
47
+#else
48
+#define cli_msxmlmsg(...)
49
+#endif
50
+
51
+#define check_state(state)                                              \
52
+    do {                                                                \
53
+        if (state == -1) {                                              \
54
+            cli_warnmsg("check_state[msxml]: CL_EPARSE @ ln%d\n", __LINE__); \
55
+            return CL_EPARSE;                                           \
56
+        }                                                               \
57
+        else if (state == 0) {                                          \
58
+            cli_dbgmsg("check_state[msxml]: CL_BREAK @ ln%d\n", __LINE__); \
59
+            return CL_BREAK;                                            \
60
+        }                                                               \
61
+    } while(0)
62
+
63
+
64
+struct key_entry blank_key = { NULL, NULL, 0 };
65
+
66
+static const struct key_entry *msxml_check_key(struct msxml_ctx *mxctx, const xmlChar *key, size_t keylen)
67
+{
68
+    unsigned i;
69
+
70
+    if (keylen > MSXML_JSON_STRLEN_MAX-1) {
71
+        cli_dbgmsg("msxml_check_key: key name too long\n");
72
+        return &blank_key;
73
+    }
74
+
75
+    for (i = 0; i < mxctx->num_keys; ++i) {
76
+        if (keylen == strlen(mxctx->keys[i].key) && !strncasecmp((char *)key, mxctx->keys[i].key, keylen)) {
77
+            return &mxctx->keys[i];
78
+        }
79
+    }
80
+
81
+    return &blank_key;
82
+}
83
+
84
+static void msxml_error_handler(void* arg, const char* msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator)
85
+{
86
+    int line = xmlTextReaderLocatorLineNumber(locator);
87
+    xmlChar *URI = xmlTextReaderLocatorBaseURI(locator);
88
+
89
+    switch (severity) {
90
+    case XML_PARSER_SEVERITY_WARNING:
91
+    case XML_PARSER_SEVERITY_VALIDITY_WARNING:
92
+        cli_warnmsg("%s:%d: parser warning : %s", (char*)URI, line, msg);
93
+        break;
94
+    case XML_PARSER_SEVERITY_ERROR:
95
+    case XML_PARSER_SEVERITY_VALIDITY_ERROR:
96
+        cli_warnmsg("%s:%d: parser error : %s", (char*)URI, line, msg);
97
+        break;
98
+    default:
99
+        cli_dbgmsg("%s:%d: unknown severity : %s", (char*)URI, line, msg);
100
+        break;
101
+    }
102
+    free(URI);
103
+}
104
+
105
+#if HAVE_JSON
106
+static int msxml_is_int(const char *value, size_t len, int32_t *val)
107
+{
108
+    long val2;
109
+    char *endptr = NULL;
110
+
111
+    val2 = strtol(value, &endptr, 10);
112
+    if (endptr != value+len) {
113
+        return 0;
114
+    }
115
+
116
+    *val = (int32_t)(val2 & 0x0000ffff);
117
+
118
+    return 1;
119
+}
120
+
121
+static int msxml_parse_value(json_object *wrkptr, const char *arrname, const xmlChar *node_value)
122
+{
123
+    json_object *newobj, *arrobj;
124
+    int val;
125
+
126
+    if (!wrkptr)
127
+        return CL_ENULLARG;
128
+
129
+    arrobj = cli_jsonarray(wrkptr, arrname);
130
+    if (arrobj == NULL) {
131
+        return CL_EMEM;
132
+    }
133
+
134
+    if (msxml_is_int((const char *)node_value, xmlStrlen(node_value), &val)) {
135
+        newobj = json_object_new_int(val);
136
+    }
137
+    else if (!xmlStrcmp(node_value, (const xmlChar *)"true")) {
138
+        newobj = json_object_new_boolean(1);
139
+    }
140
+    else if (!xmlStrcmp(node_value, (const xmlChar *)"false")) {
141
+        newobj = json_object_new_boolean(0);
142
+    }
143
+    else {
144
+        newobj = json_object_new_string((const char *)node_value);
145
+    }
146
+
147
+    if (NULL == newobj) {
148
+        cli_errmsg("msxml_parse_value: no memory for json value for [%s]\n", arrname);
149
+        return CL_EMEM;
150
+    }
151
+
152
+    json_object_array_add(arrobj, newobj);
153
+    return CL_SUCCESS;
154
+}
155
+#endif /* HAVE_JSON */
156
+
157
+static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader, int rlvl, void *jptr)
158
+{
159
+    const xmlChar *element_name = NULL;
160
+    const xmlChar *node_name = NULL, *node_value = NULL;
161
+    const struct key_entry *keyinfo;
162
+    int ret, virus = 0, state, node_type, endtag = 0;
163
+    cli_ctx *ctx = mxctx->ctx;
164
+#if HAVE_JSON
165
+    json_object *parent = (json_object *)jptr;
166
+    json_object *thisjobj = NULL;
167
+#else
168
+    void *thisjobj = NULL;
169
+#endif
170
+
171
+    cli_msxmlmsg("in msxml_parse_element @ layer %d\n", rlvl);
172
+
173
+    /* check recursion level */
174
+    if (rlvl >= MSXML_RECLEVEL_MAX) {
175
+        cli_dbgmsg("msxml_parse_element: reached msxml json recursion limit\n");
176
+
177
+#if HAVE_JSON
178
+        if (mxctx->mode) {
179
+            int tmp = cli_json_parse_error(mxctx->root, "MSXML_RECURSIVE_LIMIT");
180
+            if (tmp != CL_SUCCESS)
181
+                return tmp;
182
+        }
183
+#endif
184
+
185
+        /* skip it */
186
+        state = xmlTextReaderNext(reader);
187
+        check_state(state);
188
+        return CL_SUCCESS;
189
+    }
190
+
191
+    /* acquire element type */
192
+    node_type = xmlTextReaderNodeType(reader);
193
+    if (node_type == -1)
194
+        return CL_EPARSE;
195
+
196
+    node_name = xmlTextReaderConstLocalName(reader);
197
+    node_value = xmlTextReaderConstValue(reader);
198
+
199
+    /* branch on node type */
200
+    switch (node_type) {
201
+    case XML_READER_TYPE_ELEMENT:
202
+        cli_msxmlmsg("msxml_parse_element: ELEMENT %s [%d]: %s\n", node_name, node_type, node_value);
203
+
204
+        /* storing the element name for verification/collection */
205
+        element_name = node_name;
206
+        if (!element_name) {
207
+            cli_dbgmsg("msxml_parse_element: element tag node nameless\n");
208
+#if HAVE_JSON
209
+            if (mxctx->mode) {
210
+                int tmp = cli_json_parse_error(mxctx->root, "MSXML_NAMELESS_ELEMENT");
211
+                if (tmp != CL_SUCCESS)
212
+                    return tmp;
213
+            }
214
+#endif
215
+            return CL_EPARSE; /* no name, nameless */
216
+        }
217
+
218
+        /* determine if the element is interesting */
219
+        keyinfo = msxml_check_key(mxctx, element_name, xmlStrlen(element_name));
220
+
221
+        cli_msxmlmsg("key:  %s\n", keyinfo->key);
222
+        cli_msxmlmsg("name: %s\n", keyinfo->name);
223
+        cli_msxmlmsg("type: 0x%x\n", keyinfo->type);
224
+
225
+        /* element and contents are ignored */
226
+        if (keyinfo->type & MSXML_IGNORE_ELEM) {
227
+            cli_msxmlmsg("msxml_parse_element: IGNORING ELEMENT %s\n", keyinfo->name);
228
+
229
+            state = xmlTextReaderNext(reader);
230
+            check_state(state);
231
+            return CL_SUCCESS;
232
+        }
233
+
234
+#if HAVE_JSON
235
+        if (mxctx->mode && (keyinfo->type & MSXML_JSON_TRACK)) {
236
+            if (keyinfo->type & MSXML_JSON_ROOT)
237
+                thisjobj = cli_jsonobj(mxctx->root, keyinfo->name);
238
+            else if (keyinfo->type & MSXML_JSON_WRKPTR)
239
+                thisjobj = cli_jsonobj(parent, keyinfo->name);
240
+
241
+            if (!thisjobj) {
242
+                return CL_EMEM;
243
+            }
244
+            cli_msxmlmsg("msxml_parse_element: generated json object [%s]\n", keyinfo->name);
245
+
246
+            /* count this element */
247
+            if (thisjobj && (keyinfo->type & MSXML_JSON_COUNT)) {
248
+                json_object *counter = NULL;
249
+
250
+                if (!json_object_object_get_ex(thisjobj, "Count", &counter)) { /* object not found */
251
+                    cli_jsonint(thisjobj, "Count", 1);
252
+                } else {
253
+                    int value = json_object_get_int(counter);
254
+                    cli_jsonint(thisjobj, "Count", value+1);
255
+                }
256
+                cli_msxmlmsg("msxml_parse_element: retrieved json object [Count]\n");
257
+            }
258
+
259
+            /* handle attributes */
260
+            if (thisjobj && (keyinfo->type & MSXML_JSON_ATTRIB)) {
261
+                state = xmlTextReaderHasAttributes(reader);
262
+                if (state == 1) {
263
+                    json_object *attributes;
264
+                    const xmlChar *name, *value;
265
+
266
+                    attributes = cli_jsonobj(thisjobj, "Attributes");
267
+                    if (!attributes) {
268
+                        return CL_EPARSE;
269
+                    }
270
+                    cli_msxmlmsg("msxml_parse_element: retrieved json object [Attributes]\n");
271
+
272
+                    while (xmlTextReaderMoveToNextAttribute(reader) == 1) {
273
+                        name = xmlTextReaderConstLocalName(reader);
274
+                        value = xmlTextReaderConstValue(reader);
275
+
276
+                        cli_msxmlmsg("\t%s: %s\n", name, value);
277
+                        cli_jsonstr(attributes, name, (const char *)value);
278
+                    }
279
+                }
280
+                else if (state == -1)
281
+                    return CL_EPARSE;
282
+            }
283
+        }
284
+#endif
285
+
286
+        /* check self-containment */
287
+        state = xmlTextReaderMoveToElement(reader);
288
+        if (state == -1)
289
+            return CL_EPARSE;
290
+
291
+        state = xmlTextReaderIsEmptyElement(reader);
292
+        if (state == 1) {
293
+            cli_msxmlmsg("msxml_parse_element: SELF-CLOSING\n");
294
+
295
+            state = xmlTextReaderNext(reader);
296
+            check_state(state);
297
+            return CL_SUCCESS;
298
+        } else if (state == -1)
299
+            return CL_EPARSE;
300
+
301
+        /* advance to first content node */
302
+        state = xmlTextReaderRead(reader);
303
+        check_state(state);
304
+
305
+        while (!endtag) {
306
+#if HAVE_JSON
307
+            if (mxctx->mode && (cli_json_timeout_cycle_check(mxctx->ctx, &(mxctx->toval)) != CL_SUCCESS))
308
+                return CL_ETIMEOUT;
309
+#endif
310
+
311
+            node_type = xmlTextReaderNodeType(reader);
312
+            if (node_type == -1)
313
+                return CL_EPARSE;
314
+
315
+            switch (node_type) {
316
+            case XML_READER_TYPE_ELEMENT:
317
+                ret = msxml_parse_element(mxctx, reader, rlvl+1, thisjobj);
318
+                if (ret != CL_SUCCESS || (!SCAN_ALL && ret == CL_VIRUS)) {
319
+                    return ret;
320
+                } else if (SCAN_ALL && ret == CL_VIRUS) {
321
+                    virus = 1;
322
+                }
323
+                break;
324
+
325
+            case XML_READER_TYPE_TEXT:
326
+                node_value = xmlTextReaderConstValue(reader);
327
+
328
+                cli_msxmlmsg("TEXT: %s\n", node_value);
329
+
330
+#if HAVE_JSON
331
+                if (thisjobj && (keyinfo->type & MSXML_JSON_VALUE)) {
332
+
333
+                    ret = msxml_parse_value(thisjobj, "Value", node_value);
334
+                    if (ret != CL_SUCCESS)
335
+                        return ret;
336
+
337
+                    cli_msxmlmsg("msxml_parse_element: added json value [%s: %s]\n", keyinfo->name, (const char *)node_value);
338
+                }
339
+#endif
340
+
341
+                /* scanning protocol for embedded objects encoded in base64 */
342
+                if (keyinfo->type & MSXML_SCAN_B64) {
343
+                    char name[1024];
344
+                    char *decoded, *tempfile = name;
345
+                    size_t decodedlen;
346
+                    int of;
347
+
348
+                    cli_msxmlmsg("BINARY DATA!\n");
349
+
350
+                    decoded = (char *)cl_base64_decode((char *)node_value, strlen((const char *)node_value), NULL, &decodedlen, 0);
351
+                    if (!decoded) {
352
+                        cli_warnmsg("msxml_parse_element: failed to decode base64-encoded binary data\n");
353
+                        state = xmlTextReaderRead(reader);
354
+                        check_state(state);
355
+                        break;
356
+                    }
357
+
358
+                    if ((ret = cli_gentempfd(ctx->engine->tmpdir, &tempfile, &of)) != CL_SUCCESS) {
359
+                        cli_warnmsg("msxml_parse_element: failed to create temporary file %s\n", tempfile);
360
+                        free(decoded);
361
+                        return ret;
362
+                    }
363
+
364
+                    if(cli_writen(of, decoded, decodedlen) != (int)decodedlen) {
365
+                        free(decoded);
366
+                        close(of);
367
+                        return CL_EWRITE;
368
+                    }
369
+                    free(decoded);
370
+
371
+                    cli_dbgmsg("msxml_parse_element: extracted binary data to %s\n", tempfile);
372
+
373
+                    ret = cli_magic_scandesc(of, ctx);
374
+                    close(of);
375
+                    if (ctx && !(ctx->engine->keeptmp))
376
+                        cli_unlink(tempfile);
377
+                    free(tempfile);
378
+                    if (ret != CL_SUCCESS || (!SCAN_ALL && ret == CL_VIRUS)) {
379
+                        return ret;
380
+                    } else if (SCAN_ALL && ret == CL_VIRUS) {
381
+                        virus = 1;
382
+                    }
383
+                }
384
+
385
+                /* advance to next node */
386
+                state = xmlTextReaderRead(reader);
387
+                check_state(state);
388
+                break;
389
+
390
+            case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
391
+                /* advance to next node */
392
+                state = xmlTextReaderRead(reader);
393
+                check_state(state);
394
+                break;
395
+
396
+            case XML_READER_TYPE_END_ELEMENT:
397
+                cli_msxmlmsg("in msxml_parse_element @ layer %d closed\n", rlvl);
398
+                node_name = xmlTextReaderConstLocalName(reader);
399
+                if (!node_name) {
400
+                    cli_dbgmsg("msxml_parse_element: element end tag node nameless\n");
401
+                    return CL_EPARSE; /* no name, nameless */
402
+                }
403
+
404
+                if (xmlStrcmp(element_name, node_name)) {
405
+                    cli_dbgmsg("msxml_parse_element: element tag does not match end tag %s != %s\n", element_name, node_name);
406
+                    return CL_EFORMAT;
407
+                }
408
+
409
+                /* advance to next element tag */
410
+                state = xmlTextReaderRead(reader);
411
+                check_state(state);
412
+
413
+                endtag = 1;
414
+                break;
415
+
416
+            default:
417
+                node_name = xmlTextReaderConstLocalName(reader);
418
+                node_value = xmlTextReaderConstValue(reader);
419
+
420
+                cli_dbgmsg("msxml_parse_element: unhandled xml secondary node %s [%d]: %s\n", node_name, node_type, node_value);
421
+
422
+                state = xmlTextReaderNext(reader);
423
+                check_state(state);
424
+                return (virus ? CL_VIRUS : CL_SUCCESS);
425
+            }
426
+        }
427
+
428
+        break;
429
+    case XML_READER_TYPE_PROCESSING_INSTRUCTION:
430
+        cli_msxmlmsg("msxml_parse_element: PROCESSING INSTRUCTION %s [%d]: %s\n", node_name, node_type, node_value);
431
+        break;
432
+    case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
433
+        cli_msxmlmsg("msxml_parse_element: SIGNIFICANT WHITESPACE %s [%d]: %s\n", node_name, node_type, node_value);
434
+        break;
435
+    case XML_READER_TYPE_END_ELEMENT:
436
+        cli_msxmlmsg("msxml_parse_element: END ELEMENT %s [%d]: %s\n", node_name, node_type, node_value);
437
+        return (virus ? CL_VIRUS : CL_SUCCESS);
438
+    default:
439
+        cli_dbgmsg("msxml_parse_element: unhandled xml primary node %s [%d]: %s\n", node_name, node_type, node_value);
440
+    }
441
+
442
+    return (virus ? CL_VIRUS : CL_SUCCESS);
443
+}
444
+
445
+/* reader intialization and closing handled by caller */
446
+int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, int mode)
447
+{
448
+    struct msxml_ctx mxctx;
449
+    int state, virus = 0, ret = CL_SUCCESS;
450
+
451
+    mxctx.ctx = ctx;
452
+    mxctx.keys = keys;
453
+    mxctx.num_keys = num_keys;
454
+#if HAVE_JSON
455
+    mxctx.mode = mode;
456
+    if (mode) {
457
+        mxctx.root = ctx->wrkproperty;
458
+        /* JSON Sanity Check */
459
+        if (!mxctx.root)
460
+            mxctx.mode = 0;
461
+        mxctx.toval = 0;
462
+    }
463
+#endif
464
+
465
+    /* Error Handler */
466
+    xmlTextReaderSetErrorHandler(reader, NULL, NULL); /* xml default handler */
467
+    //xmlTextReaderSetErrorHandler(reader, msxml_error_handler, NULL);
468
+
469
+    /* Main Processing Loop */
470
+    while ((state = xmlTextReaderRead(reader)) == 1) {
471
+#if HAVE_JSON
472
+        if (mxctx.mode && (cli_json_timeout_cycle_check(mxctx.ctx, &(mxctx.toval)) != CL_SUCCESS))
473
+            return CL_ETIMEOUT;
474
+
475
+        ret = msxml_parse_element(&mxctx, reader, 0, mxctx.root);
476
+#else
477
+        ret = msxml_parse_element(&mxctx, reader, 0, NULL);
478
+#endif
479
+        if (ret == CL_SUCCESS);
480
+        else if (SCAN_ALL && ret == CL_VIRUS) {
481
+            /* non-allmatch simply propagates it down to return through ret */
482
+            virus = 1;
483
+        } else if (ret == CL_VIRUS || ret == CL_ETIMEOUT || ret == CL_BREAK) {
484
+            cli_dbgmsg("cli_msxml_parse_document: encountered halt event in parsing xml document\n");
485
+            break;
486
+        } else {
487
+            cli_warnmsg("cli_msxml_parse_document: encountered issue in parsing xml document\n");
488
+            break;
489
+        }
490
+    }
491
+
492
+    if (state == -1)
493
+        ret = CL_EPARSE;
494
+
495
+#if HAVE_JSON
496
+    /* Parse General Error Handler */
497
+    if (mxctx.mode) {
498
+        int tmp = CL_SUCCESS;
499
+
500
+        switch(ret) {
501
+        case CL_SUCCESS:
502
+        case CL_BREAK: /* OK */
503
+            break;
504
+        case CL_VIRUS:
505
+            tmp = cli_json_parse_error(mxctx.root, "MSXML_INTR_VIRUS");
506
+            break;
507
+        case CL_ETIMEOUT:
508
+            tmp = cli_json_parse_error(mxctx.root, "MSXML_INTR_TIMEOUT");
509
+            break;
510
+        case CL_EPARSE:
511
+            tmp = cli_json_parse_error(mxctx.root, "MSXML_ERROR_XMLPARSER");
512
+            break;
513
+        case CL_EMEM:
514
+            tmp = cli_json_parse_error(mxctx.root, "MSXML_ERROR_OUTOFMEM");
515
+            break;
516
+        case CL_EFORMAT:
517
+            tmp = cli_json_parse_error(mxctx.root, "MSXML_ERROR_MALFORMED");
518
+            break;
519
+        default:
520
+            tmp = cli_json_parse_error(mxctx.root, "MSXML_ERROR_OTHER");
521
+            break;
522
+        }
523
+
524
+        if (tmp)
525
+            return tmp;
526
+    }
527
+#endif
528
+
529
+    /* non-critical return supression */
530
+    if (ret == CL_ETIMEOUT || ret == CL_BREAK)
531
+        ret = CL_SUCCESS;
532
+
533
+    /* important but non-critical suppression */
534
+    if (ret == CL_EPARSE) {
535
+        cli_dbgmsg("cli_msxml_parse_document: suppressing parsing error to continue scan\n");
536
+        ret = CL_SUCCESS;
537
+    }
538
+
539
+    return (virus ? CL_VIRUS : ret);
540
+}
541
+
542
+#endif /* HAVE_LIBXML2 */
0 543
new file mode 100644
... ...
@@ -0,0 +1,80 @@
0
+/*
1
+ * Extract component parts of MS XML files (e.g. MS Office 2003 XML Documents)
2
+ * 
3
+ * Copyright (C) 2007-2013 Sourcefire, Inc.
4
+ * 
5
+ * Authors: Kevin Lin
6
+ * 
7
+ * This program is free software; you can redistribute it and/or modify it under
8
+ * the terms of the GNU General Public License version 2 as published by the
9
+ * Free Software Foundation.
10
+ * 
11
+ * This program is distributed in the hope that it will be useful, but WITHOUT
12
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14
+ * more details.
15
+ * 
16
+ * You should have received a copy of the GNU General Public License along with
17
+ * this program; if not, write to the Free Software Foundation, Inc., 51
18
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19
+ */
20
+
21
+#ifndef __MSXML_PARSER_H
22
+#define __MSXML_PARSER_H
23
+
24
+#if HAVE_LIBXML2
25
+
26
+#if HAVE_CONFIG_H
27
+#include "clamav-config.h"
28
+#endif
29
+
30
+#include "others.h"
31
+#include "json_api.h"
32
+
33
+#ifdef _WIN32
34
+#ifndef LIBXML_WRITER_ENABLED
35
+#define LIBXML_WRITER_ENABLED 1
36
+#endif
37
+#endif
38
+#include <libxml/xmlreader.h>
39
+
40
+
41
+#define MSXML_RECLEVEL_MAX 20
42
+#define MSXML_JSON_STRLEN_MAX 128
43
+
44
+struct key_entry {
45
+/* how */
46
+#define MSXML_IGNORE       0x00
47
+#define MSXML_IGNORE_ELEM  0x01
48
+#define MSXML_SCAN_B64     0x02
49
+/* where */
50
+#define MSXML_JSON_ROOT    0x04
51
+#define MSXML_JSON_WRKPTR  0x08
52
+
53
+#define MSXML_JSON_TRACK (MSXML_JSON_ROOT | MSXML_JSON_WRKPTR)
54
+/* what */
55
+#define MSXML_JSON_COUNT   0x10
56
+#define MSXML_JSON_VALUE   0x20
57
+#define MSXML_JSON_ATTRIB  0x40
58
+
59
+    const char *key;
60
+    const char *name;
61
+    int type;
62
+};
63
+
64
+struct msxml_ctx {
65
+    cli_ctx *ctx;
66
+    const struct key_entry *keys;
67
+    size_t num_keys;
68
+
69
+#if HAVE_JSON
70
+    json_object *root;
71
+    int mode, toval;
72
+#endif
73
+};
74
+
75
+int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, int mode);
76
+
77
+#endif /* HAVE_LIBXML2 */
78
+
79
+#endif /* __MSXML_PARSER_H */
... ...
@@ -32,6 +32,7 @@
32 32
 #include <stdlib.h>
33 33
 #include <errno.h>
34 34
 #include <conv.h>
35
+#include <zlib.h>
35 36
 #ifdef	HAVE_UNISTD_H
36 37
 #include <unistd.h>
37 38
 #endif
... ...
@@ -955,6 +956,168 @@ handler_enum(ole2_header_t * hdr, property_t * prop, const char *dir, cli_ctx *
955 955
     return CL_SUCCESS;
956 956
 }
957 957
 
958
+static int
959
+likely_mso_stream(int fd)
960
+{
961
+    off_t fsize;
962
+    unsigned char check[2];
963
+
964
+    fsize = lseek(fd, 0, SEEK_END);
965
+    if (fsize == -1) {
966
+        cli_dbgmsg("likely_mso_stream: call to lseek() failed\n");
967
+        return 0;
968
+    } else if (fsize < 6) {
969
+        return 0;
970
+    }
971
+
972
+    if (lseek(fd, 4, SEEK_SET) == -1) {
973
+        cli_dbgmsg("likely_mso_stream: call to lseek() failed\n");
974
+        return 0;
975
+    }
976
+
977
+    if (cli_readn(fd, check, 2) != 2) {
978
+        cli_dbgmsg("likely_mso_stream: reading from fd failed\n");
979
+        return 0;
980
+    }
981
+
982
+    if (check[0] == 0x78 && check[1] == 0x9C)
983
+        return 1;
984
+
985
+    return 0;
986
+}
987
+
988
+static int
989
+scan_mso_stream(int fd, cli_ctx *ctx)
990
+{
991
+    int zret, ofd, ret = CL_SUCCESS;
992
+    fmap_t *input;
993
+    off_t off_in = 0;
994
+    size_t count, outsize = 0;
995
+    z_stream zstrm;
996
+    char *tmpname;
997
+    uint32_t prefix;
998
+    unsigned char inbuf[FILEBUFF], outbuf[FILEBUFF];
999
+
1000
+    /* fmap the input file for easier manipulation */
1001
+    if (fd < 0) {
1002
+        cli_dbgmsg("scan_mso_stream: Invalid file descriptor argument\n");
1003
+        return CL_ENULLARG;
1004
+    } else {
1005
+        STATBUF statbuf;
1006
+
1007
+        if (FSTAT(fd, &statbuf) == -1) {
1008
+            cli_dbgmsg("scan_mso_stream: Can't stat file descriptor\n");
1009
+            return CL_ESTAT;
1010
+        }
1011
+
1012
+        input = fmap(fd, 0, statbuf.st_size);
1013
+        if (!input) {
1014
+            cli_dbgmsg("scan_mso_stream: Failed to get fmap for input stream\n");
1015
+            return CL_EMAP;
1016
+        }
1017
+    }
1018
+
1019
+    /* reserve tempfile for output and scanning */
1020
+    if ((ret = cli_gentempfd(ctx->engine->tmpdir, &tmpname, &ofd)) != CL_SUCCESS) {
1021
+        cli_errmsg("scan_mso_stream: Can't generate temporary file\n");
1022
+        funmap(input);
1023
+        return ret;
1024
+    }
1025
+
1026
+    /* initialize zlib inflation stream */
1027
+    memset(&zstrm, 0, sizeof(zstrm));
1028
+    zstrm.zalloc = Z_NULL;
1029
+    zstrm.zfree = Z_NULL;
1030
+    zstrm.opaque = Z_NULL;
1031
+    zstrm.next_in = inbuf;
1032
+    zstrm.next_out = outbuf;
1033
+    zstrm.avail_in = 0;
1034
+    zstrm.avail_out = FILEBUFF;
1035
+
1036
+    zret = inflateInit(&zstrm);
1037
+    if (zret != Z_OK) {
1038
+        cli_dbgmsg("scan_mso_stream: Can't initialize zlib inflation stream\n");
1039
+        ret = CL_EUNPACK;
1040
+        goto mso_end;
1041
+    }
1042
+
1043
+    /* extract 32-bit prefix */
1044
+    if (fmap_readn(input, &prefix, off_in, sizeof(prefix)) != sizeof(prefix)) {
1045
+        cli_dbgmsg("scan_mso_stream: Can't extract 4-byte prefix\n");
1046
+        ret = CL_EREAD;
1047
+        goto mso_end;
1048
+    }
1049
+    off_in += sizeof(uint32_t);
1050
+    cli_dbgmsg("scan_mso_stream: stream prefix = %08x(%d)\n", prefix, prefix);
1051
+
1052
+    /* inflation loop */
1053
+    do {
1054
+        if (zstrm.avail_in == 0) {
1055
+            zstrm.next_in = inbuf;
1056
+            ret = fmap_readn(input, inbuf, off_in, FILEBUFF);
1057
+            if (ret < 0) {
1058
+                cli_errmsg("scan_mso_stream: Error reading MSO file\n");
1059
+                ret = CL_EUNPACK;
1060
+                goto mso_end;
1061
+            }
1062
+            if (!ret)
1063
+                break;
1064
+
1065
+            zstrm.avail_in = ret;
1066
+            off_in += ret;
1067
+        }
1068
+        zret = inflate(&zstrm, Z_SYNC_FLUSH);
1069
+        count = FILEBUFF - zstrm.avail_out;
1070
+        if (count) {
1071
+            if (cli_checklimits("MSO", ctx, outsize + count, 0, 0) != CL_SUCCESS)
1072
+                break;
1073
+            if (cli_writen(ofd, outbuf, count) != count) {
1074
+                cli_errmsg("scan_mso_stream: Can't write to file %s\n", tmpname);
1075
+                ret = CL_EWRITE;
1076
+                goto mso_end;
1077
+            }
1078
+            outsize += count;
1079
+        }
1080
+        zstrm.next_out = outbuf;
1081
+        zstrm.avail_out = FILEBUFF;
1082
+    } while(zret == Z_OK);
1083
+
1084
+    /* post inflation checks */
1085
+    if (zret != Z_STREAM_END && zret != Z_OK) {
1086
+        if (outsize == 0) {
1087
+            cli_infomsg(ctx, "scan_mso_stream: Error decompressing MSO file. No data decompressed.\n");
1088
+            ret = CL_EUNPACK;
1089
+            goto mso_end;
1090
+        }
1091
+
1092
+        cli_infomsg(ctx, "scan_mso_stream: Error decompressing MSO file. Scanning what was decompressed.\n");
1093
+    }
1094
+    cli_dbgmsg("scan_mso_stream: Decompressed to %s, size %d\n", tmpname, outsize);
1095
+
1096
+    if (outsize != prefix) {
1097
+        cli_warnmsg("scan_mso_stream: declared prefix != inflated stream size, %llu != %llu\n",
1098
+                    (long long unsigned)prefix, (long long unsigned)outsize);
1099
+    } else {
1100
+        cli_dbgmsg("scan_mso_stream: declared prefix == inflated stream size, %llu == %llu\n",
1101
+                   (long long unsigned)prefix, (long long unsigned)outsize);
1102
+    }
1103
+
1104
+    /* scanning inflated stream */
1105
+    ret = cli_magic_scandesc(ofd, ctx);
1106
+
1107
+    /* clean-up */
1108
+ mso_end:
1109
+    zret = inflateEnd(&zstrm);
1110
+    if (zret != Z_OK)
1111
+        ret = CL_EUNPACK;
1112
+    close(ofd);
1113
+    if(ctx && !ctx->engine->keeptmp)
1114
+        if (cli_unlink(tmpname))
1115
+            ret = CL_EUNLINK;
1116
+    free(tmpname);
1117
+    funmap(input);
1118
+    return ret;
1119
+}
958 1120
 
959 1121
 static int
960 1122
 handler_otf(ole2_header_t * hdr, property_t * prop, const char *dir, cli_ctx * ctx)
... ...
@@ -962,7 +1125,7 @@ handler_otf(ole2_header_t * hdr, property_t * prop, const char *dir, cli_ctx * c
962 962
     char           *tempfile;
963 963
     unsigned char  *buff;
964 964
     int32_t         current_block, len, offset;
965
-    int             ofd, ret;
965
+    int             ofd, is_mso, ret;
966 966
     bitset_t       *blk_bitset;
967 967
 
968 968
     UNUSEDPARAM(dir);
... ...
@@ -1061,6 +1224,7 @@ handler_otf(ole2_header_t * hdr, property_t * prop, const char *dir, cli_ctx * c
1061 1061
         }
1062 1062
     }
1063 1063
 
1064
+    is_mso = likely_mso_stream(ofd);
1064 1065
     if (lseek(ofd, 0, SEEK_SET) == -1) {
1065 1066
         close(ofd);
1066 1067
         if (ctx && !(ctx->engine->keeptmp))
... ...
@@ -1112,8 +1276,18 @@ handler_otf(ole2_header_t * hdr, property_t * prop, const char *dir, cli_ctx * c
1112 1112
     }
1113 1113
 #endif
1114 1114
 
1115
-    /* Normal File Scan */
1116
-    ret = cli_magic_scandesc(ofd, ctx);
1115
+    if (is_mso < 0) {
1116
+        ret = CL_ESEEK;
1117
+    } else if (is_mso) {
1118
+        /* MSO Stream Scan */
1119
+        ret = scan_mso_stream(ofd, ctx);
1120
+        /* CONSIDER: running cli_magic_scandesc in the chance of MSO fp? */
1121
+        //if (ret != CL_SUCCESS || ret != CL_VIRUS)
1122
+        //ret = cli_magic_scandesc(ofd, ctx);
1123
+    } else {
1124
+        /* Normal File Scan */
1125
+        ret = cli_magic_scandesc(ofd, ctx);
1126
+    }
1117 1127
     close(ofd);
1118 1128
     free(buff);
1119 1129
     cli_bitset_free(blk_bitset);
... ...
@@ -31,7 +31,7 @@
31 31
 #include "json.h"
32 32
 #endif
33 33
 #include "json_api.h"
34
-
34
+#include "msxml_parser.h"
35 35
 #include "ooxml.h"
36 36
 
37 37
 #if HAVE_LIBXML2
... ...
@@ -43,366 +43,59 @@
43 43
 #include <libxml/xmlreader.h>
44 44
 #endif
45 45
 
46
-#define OOXML_DEBUG 0
47
-
48
-#if HAVE_LIBXML2 && HAVE_JSON
49
-
50
-#define OOXML_JSON_RECLEVEL 16
51
-#define OOXML_JSON_RECLEVEL_MAX 5
52
-#define OOXML_JSON_STRLEN_MAX 100
53
-
54
-#define check_state(state)                                              \
55
-    do {                                                                \
56
-        if (state == -1) {                                              \
57
-            cli_warnmsg("check_state: CL_EPARSE @ ln%d\n", __LINE__);   \
58
-            return CL_EPARSE;                                           \
59
-        }                                                               \
60
-        else if (state == 0) {                                          \
61
-            cli_dbgmsg("check_state: CL_BREAK @ ln%d\n", __LINE__);     \
62
-            return CL_BREAK;                                            \
63
-        }                                                               \
64
-    } while(0)
65
-
66
-static int ooxml_is_int(const char *value, size_t len, int32_t *val)
67
-{
68
-    long val2;
69
-    char *endptr = NULL;
70
-
71
-    val2 = strtol(value, &endptr, 10);
72
-    if (endptr != value+len) {
73
-        return 0;
74
-    }
75
-
76
-    *val = (int32_t)(val2 & 0x0000ffff);
77
-
78
-    return 1;
79
-}
80
-
81
-static int ooxml_add_parse_error(json_object *wrkptr, const xmlChar *errstr)
82
-{
83
-    json_object *perr;
84
-
85
-    if (!wrkptr)
86
-        return CL_ENULLARG;
87
-
88
-    perr = cli_jsonarray(wrkptr, "ParseErrors");
89
-    if (perr == NULL) {
90
-        return CL_EMEM;
91
-    }
92
-
93
-    return cli_jsonstr(perr, NULL, errstr);
94
-}
95
-
96
-static int ooxml_parse_value(json_object *wrkptr, const char *arrname, const xmlChar *node_value)
97
-{
98
-    json_object *newobj, *arrobj;
99
-    int val;
100
-
101
-    if (!wrkptr)
102
-        return CL_ENULLARG;
103
-
104
-    arrobj = cli_jsonarray(wrkptr, arrname);
105
-    if (arrobj == NULL) {
106
-        return CL_EMEM;
107
-    }
108
-
109
-    if (ooxml_is_int((const char *)node_value, xmlStrlen(node_value), &val)) {
110
-        newobj = json_object_new_int(val);
111
-    }
112
-    else if (!xmlStrcmp(node_value, (const xmlChar *)"true")) {
113
-        newobj = json_object_new_boolean(1);
114
-    }
115
-    else if (!xmlStrcmp(node_value, (const xmlChar *)"false")) {
116
-        newobj = json_object_new_boolean(0);
117
-    }
118
-    else {
119
-        newobj = json_object_new_string((const char *)node_value);
120
-    }
121 46
 
122
-    if (NULL == newobj) {
123
-        cli_errmsg("ooxml_parse_value: no memory for json value for [%s]\n", arrname);
124
-        return CL_EMEM;
125
-    }
126 47
 
127
-    json_object_array_add(arrobj, newobj);
128
-    return CL_SUCCESS;
129
-}
48
+#if HAVE_LIBXML2 && HAVE_JSON
130 49
 
131
-static const char *ooxml_keys[] = {
132
-    "coreproperties",
133
-    "title",
134
-    "subject",
135
-    "creator",
136
-    "keywords",
137
-    "comments",
138
-    "description",
139
-    "lastmodifiedby",
140
-    "revision",
141
-    "created",
142
-    "modified",
143
-    "category",
144
-    "contentstatus",
145
-
146
-    "properties",
147
-    "application",
148
-    "appversion",
149
-    "characters",
150
-    "characterswithspaces",
151
-    "company",
152
-    "digsig",
153
-    "docsecurity",
154
-    //"headingpairs",
155
-    "hiddenslides",
156
-    "hlinks",
157
-    "hyperlinkbase",
158
-    "hyperlinkschanged",
159
-    "lines",
160
-    "linksuptodate",
161
-    "manager",
162
-    "mmclips",
163
-    "notes",
164
-    "pages",
165
-    "paragraphs",
166
-    "presentationformat",
167
-    "properties",
168
-    "scalecrop",
169
-    "shareddoc",
170
-    "slides",
171
-    "template",
172
-    //"titlesofparts",
173
-    "totaltime",
174
-    "words"
50
+static const struct key_entry ooxml_keys[] = {
51
+    { "coreproperties",     "CoreProperties",     MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
52
+    { "title",              "Title",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
53
+    { "subject",            "Subject",            MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
54
+    { "creator",            "Author",             MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
55
+    { "keywords",           "Keywords",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
56
+    { "comments",           "Comments",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
57
+    { "description",        "Description",        MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
58
+    { "lastmodifiedby",     "LastAuthor",         MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
59
+    { "revision",           "Revision",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
60
+    { "created",            "Created",            MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
61
+    { "modified",           "Modified",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
62
+    { "category",           "Category",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
63
+    { "contentstatus",      "ContentStatus",      MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
64
+
65
+    { "properties",         "ExtendedProperties", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
66
+    { "application",        "Application",        MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
67
+    { "appversion",         "AppVersion",         MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
68
+    { "characters",         "Characters",         MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
69
+    { "characterswithspaces", "CharactersWithSpaces", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
70
+    { "company",            "Company",            MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
71
+    { "digsig",             "DigSig",             MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
72
+    { "docsecurity",        "DocSecurity",        MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
73
+    //{ "headingpairs",       "HeadingPairs",       MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
74
+    { "hiddenslides",       "HiddenSlides",       MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
75
+    { "hlinks",             "HLinks",             MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
76
+    { "hyperlinkbase",      "HyperlinkBase",      MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
77
+    { "hyperlinkschanged",  "HyperlinksChanged",  MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
78
+    { "lines",              "Lines",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
79
+    { "linksuptodate",      "LinksUpToDate",      MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
80
+    { "manager",            "Manager",            MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
81
+    { "mmclips",            "MultimediaClips",    MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
82
+    { "notes",              "Notes",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
83
+    { "pages",              "Pages",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
84
+    { "paragraphs",         "Paragraphs",         MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
85
+    { "presentationformat", "PresentationFormat", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
86
+    //{ "properties",         "Properties",         MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
87
+    { "scalecrop",          "ScaleCrop",          MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
88
+    { "shareddoc",          "SharedDocs",         MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
89
+    { "slides",             "Slides",             MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
90
+    { "template",           "Template",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
91
+    //{ "titleofparts",       "TitleOfParts",       MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
92
+    { "totaltime",          "TotalTime",          MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
93
+    { "words",              "Words",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
94
+
95
+    /* Should NOT Exist */
96
+    { "bindata",            "BinaryData",         MSXML_SCAN_B64 | MSXML_JSON_COUNT | MSXML_JSON_ROOT }
175 97
 };
176
-static const char *ooxml_json_keys[] = {
177
-    "CoreProperties",
178
-    "Title",
179
-    "Subject",
180
-    "Author",
181
-    "Keywords",
182
-    "Comments",
183
-    "Description",
184
-    "LastAuthor",
185
-    "Revision",
186
-    "Created",
187
-    "Modified",
188
-    "Category",
189
-    "ContentStatus",
190
-
191
-    "ExtendedProperties",
192
-    "Application",
193
-    "AppVersion",
194
-    "Characters",
195
-    "CharactersWithSpaces",
196
-    "Company",
197
-    "DigSig",
198
-    "DocSecurity",
199
-    //"HeadingPairs",
200
-    "HiddenSlides",
201
-    "HLinks",
202
-    "HyperlinkBase",
203
-    "HyperlinksChanged",
204
-    "Lines",
205
-    "LinksUpToDate",
206
-    "Manager",
207
-    "MultimediaClips",
208
-    "Notes",
209
-    "Pages",
210
-    "Paragraphs",
211
-    "PresentationFormat",
212
-    "Properties",
213
-    "ScaleCrop",
214
-    "SharedDoc",
215
-    "Slides",
216
-    "Template",
217
-    //"TitlesOfParts",
218
-    "TotalTime",
219
-    "Words"
220
-};
221
-static size_t num_ooxml_keys = 40; //42
222
-
223
-static const char *ooxml_check_key(const char* key, size_t keylen)
224
-{
225
-    unsigned i;
226
-
227
-    if (keylen > OOXML_JSON_STRLEN_MAX-1) {
228
-        cli_dbgmsg("ooxml_check_key: key name too long\n");
229
-        return NULL;
230
-    }
231
-
232
-    for (i = 0; i < num_ooxml_keys; ++i) {
233
-        //cli_dbgmsg("%d %d %s %s %s %s\n", keylen, strlen(ooxml_keys[i]), key, keycmp, ooxml_keys[i], ooxml_json_keys[i]);
234
-        if (keylen == strlen(ooxml_keys[i]) && !strncasecmp(key, ooxml_keys[i], keylen)) {
235
-            return ooxml_json_keys[i];
236
-        }
237
-    }
238
-
239
-    return NULL;
240
-}
241
-
242
-static int ooxml_parse_element(cli_ctx *ctx, xmlTextReaderPtr reader, json_object *wrkptr, int rlvl, json_object *root)
243
-{
244
-    const char *element_tag = NULL, *end_tag = NULL;
245
-    const xmlChar *node_name = NULL, *node_value = NULL;
246
-    json_object *thisjobj = NULL;
247
-    int node_type, ret = CL_SUCCESS, endtag = 0, toval = 0, state = 1;
248
-
249
-    cli_dbgmsg("in ooxml_parse_element @ layer %d\n", rlvl);
250
-
251
-    /* check recursion level */
252
-    if (rlvl >= OOXML_JSON_RECLEVEL_MAX) {
253
-        cli_dbgmsg("ooxml_parse_element: reached ooxml json recursion limit\n");
254
-        cli_jsonbool(root, "HitRecursiveLimit", 1);
255
-        /* skip it */
256
-        state = xmlTextReaderNext(reader);
257
-        check_state(state);
258
-        return CL_SUCCESS;
259
-    }
260
-
261
-    /* acquire element type */
262
-    node_type = xmlTextReaderNodeType(reader);
263
-    if (node_type == -1)
264
-        return CL_EPARSE;
265
-
266
-    if (node_type != XML_READER_TYPE_ELEMENT) {
267
-        cli_dbgmsg("ooxml_parse_element: first node typed %d, not %d\n", node_type, XML_READER_TYPE_ELEMENT);
268
-        return CL_EFORMAT; /* first type is not an element */
269
-    }
270
-
271
-    node_name = xmlTextReaderConstLocalName(reader);
272
-    if (!node_name) {
273
-        cli_dbgmsg("ooxml_parse_element: element tag node nameless\n");
274
-        return CL_EPARSE; /* no name, nameless */
275
-    }
276
-    element_tag = ooxml_check_key((const char *)node_name, xmlStrlen(node_name));
277
-    if (!element_tag) {
278
-        cli_dbgmsg("ooxml_parse_element: invalid element tag [%s]\n", node_name);
279
-        /* skip it */
280
-        state = xmlTextReaderNext(reader);
281
-        check_state(state);
282
-        return CL_SUCCESS;
283
-    }
284
-
285
-    /* generate json object */
286
-    thisjobj = cli_jsonobj(wrkptr, element_tag);
287
-    if (!thisjobj) {
288
-        return CL_EMEM;
289
-    }
290
-    cli_dbgmsg("ooxml_parse_element: generated json object [%s]\n", element_tag);
291
-
292
-    if (rlvl == 0)
293
-        root = thisjobj;
294
-
295
-    /* handle attributes */
296
-    state = xmlTextReaderHasAttributes(reader);
297
-    if (state == 1) {
298
-        json_object *attributes;
299
-
300
-        attributes = cli_jsonobj(thisjobj, "Attributes");
301
-        if (!attributes) {
302
-            return CL_EPARSE;
303
-        }
304
-        cli_dbgmsg("ooxml_parse_element: retrieved json object [Attributes]\n");
305
-
306
-        while (xmlTextReaderMoveToNextAttribute(reader) == 1) {
307
-            const xmlChar *name, *value;
308
-            name = xmlTextReaderConstLocalName(reader);
309
-            value = xmlTextReaderConstValue(reader);
310
-            if (name == NULL || value == NULL) continue;
311
-
312
-            cli_dbgmsg("%s: %s\n", name, value);
313
-
314
-            cli_jsonstr(attributes, name, (const char *)value);
315
-        }
316
-    }
317
-    else if (state == -1)
318
-        return CL_EPARSE;
319
-
320
-    state = xmlTextReaderIsEmptyElement(reader);
321
-    if (state == 1) {
322
-        state = xmlTextReaderNext(reader);
323
-        check_state(state);
324
-        return CL_SUCCESS;
325
-    }
326
-    else if (state == -1)
327
-        return CL_EPARSE;
328
-
329
-    /* advance to first content node */
330
-    state = xmlTextReaderRead(reader);
331
-    check_state(state);
332
-
333
-    /* parse until the end element tag */
334
-    while (!endtag) {
335
-        if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) {
336
-            return CL_ETIMEOUT;
337
-        }
338
-
339
-        node_type = xmlTextReaderNodeType(reader);
340
-        if (node_type == -1)
341
-            return CL_EPARSE;
342
-
343
-        switch (node_type) {
344
-        case XML_READER_TYPE_ELEMENT:
345
-            ret = ooxml_parse_element(ctx, reader, thisjobj, rlvl+1, root);
346
-            if (ret != CL_SUCCESS) {
347
-                return ret;
348
-            }
349
-            break;
350
-
351
-        case XML_READER_TYPE_END_ELEMENT:
352
-            cli_dbgmsg("in ooxml_parse_element @ layer %d closed\n", rlvl);
353
-            node_name = xmlTextReaderConstLocalName(reader);
354
-            if (!node_name) {
355
-                cli_dbgmsg("ooxml_parse_element: element end tag node nameless\n");
356
-                return CL_EPARSE; /* no name, nameless */
357
-            }
358
-
359
-            end_tag = ooxml_check_key((const char *)node_name, xmlStrlen(node_name));
360
-            if (!end_tag) {
361
-                cli_dbgmsg("ooxml_parse_element: invalid element end tag [%s]\n", node_name);
362
-                return CL_EFORMAT; /* unrecognized element tag */
363
-            }
364
-            if (strncmp(element_tag, end_tag, strlen(element_tag))) {
365
-                cli_dbgmsg("ooxml_parse_element: element tag does not match end tag\n");
366
-                return CL_EFORMAT;
367
-            }
368
-
369
-            /* advance to next element tag */
370
-            state = xmlTextReaderRead(reader);
371
-            check_state(state);
372
-
373
-            endtag = 1;
374
-            break;
375
-
376
-        case XML_READER_TYPE_TEXT:
377
-            node_value = xmlTextReaderConstValue(reader);
378
-
379
-            ret = ooxml_parse_value(thisjobj, "Value", node_value);
380
-            if (ret != CL_SUCCESS)
381
-                return ret;
382
-
383
-            cli_dbgmsg("ooxml_parse_element: added json value [%s: %s]\n", element_tag, node_value);
384
-
385
-            /* advance to next element tag */
386
-            state = xmlTextReaderRead(reader);
387
-            check_state(state);
388
-
389
-            break;
390
-
391
-        default:
392
-#if OOXML_DEBUG
393
-            node_name = xmlTextReaderConstLocalName(reader);
394
-            node_value = xmlTextReaderConstValue(reader);
395
-
396
-            cli_dbgmsg("ooxml_parse_element: unhandled xml node %s [%d]: %s\n", node_name, node_type, node_value);
397
-#endif
398
-            state = xmlTextReaderNext(reader);
399
-            check_state(state);
400
-            return CL_SUCCESS;
401
-        }
402
-    }
403
-
404
-    return CL_SUCCESS;
405
-}
98
+static size_t num_ooxml_keys = sizeof(ooxml_keys) / sizeof(struct key_entry);
406 99
 
407 100
 static int ooxml_updatelimits(int fd, cli_ctx *ctx)
408 101
 {
... ...
@@ -433,12 +126,7 @@ static int ooxml_parse_document(int fd, cli_ctx *ctx)
433 433
         return CL_SUCCESS; // internal error from libxml2
434 434
     }
435 435
 
436
-    /* move reader to first element */
437
-    if (xmlTextReaderRead(reader) != 1) {
438
-        return CL_SUCCESS; /* libxml2 failed */
439
-    }
440
-
441
-    ret = ooxml_parse_element(ctx, reader, ctx->wrkproperty, 0, NULL);
436
+    ret = cli_msxml_parse_document(ctx, reader, ooxml_keys, num_ooxml_keys, 1);
442 437
 
443 438
     if (ret != CL_SUCCESS && ret != CL_ETIMEOUT && ret != CL_BREAK)
444 439
         cli_warnmsg("ooxml_parse_document: encountered issue in parsing properties document\n");
... ...
@@ -455,9 +143,9 @@ static int ooxml_core_cb(int fd, cli_ctx *ctx)
455 455
     cli_dbgmsg("in ooxml_core_cb\n");
456 456
     ret = ooxml_parse_document(fd, ctx);
457 457
     if (ret == CL_EPARSE)
458
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_CORE_XMLPARSER");
458
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_CORE_XMLPARSER");
459 459
     else if (ret == CL_EFORMAT)
460
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_CORE_MALFORMED");
460
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_CORE_MALFORMED");
461 461
 
462 462
     return ret;
463 463
 }
... ...
@@ -469,9 +157,9 @@ static int ooxml_extn_cb(int fd, cli_ctx *ctx)
469 469
     cli_dbgmsg("in ooxml_extn_cb\n");
470 470
     ret = ooxml_parse_document(fd, ctx);
471 471
     if (ret == CL_EPARSE)
472
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_EXTN_XMLPARSER");
472
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EXTN_XMLPARSER");
473 473
     else if (ret == CL_EFORMAT)
474
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_EXTN_MALFORMED");
474
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EXTN_MALFORMED");
475 475
 
476 476
     return ret;
477 477
 }
... ...
@@ -499,7 +187,7 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
499 499
     reader = xmlReaderForFd(fd, "[Content_Types].xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS);
500 500
     if (reader == NULL) {
501 501
         cli_dbgmsg("ooxml_content_cb: xmlReaderForFd error for ""[Content_Types].xml""\n");
502
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_XML_READER_FD");
502
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_XML_READER_FD");
503 503
 
504 504
         ctx->scansize = sav_scansize;
505 505
         ctx->scannedfiles = sav_scannedfiles;
... ...
@@ -608,37 +296,37 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
608 608
     if (core) {
609 609
         cli_jsonint(ctx->wrkproperty, "CorePropertiesFileCount", core);
610 610
         if (core > 1)
611
-            ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_CORE_PROPFILES");
611
+            cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_CORE_PROPFILES");
612 612
     }
613 613
     else if (!mcore)
614 614
         cli_dbgmsg("cli_process_ooxml: file does not contain core properties file\n");
615 615
     if (mcore) {
616 616
         cli_jsonint(ctx->wrkproperty, "CorePropertiesMissingFileCount", mcore);
617
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_CORE_PROPFILES");
617
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_CORE_PROPFILES");
618 618
     }
619 619
 
620 620
     if (extn) {
621 621
         cli_jsonint(ctx->wrkproperty, "ExtendedPropertiesFileCount", extn);
622 622
         if (extn > 1)
623
-            ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_EXTN_PROPFILES");
623
+            cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_EXTN_PROPFILES");
624 624
     }
625 625
     else if (!mextn)
626 626
         cli_dbgmsg("cli_process_ooxml: file does not contain extended properties file\n");
627 627
     if (mextn) {
628 628
         cli_jsonint(ctx->wrkproperty, "ExtendedPropertiesMissingFileCount", mextn);
629
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_EXTN_PROPFILES");
629
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_EXTN_PROPFILES");
630 630
     }
631 631
 
632 632
     if (cust) {
633 633
         cli_jsonint(ctx->wrkproperty, "CustomPropertiesFileCount", cust);
634 634
         if (cust > 1)
635
-            ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_CUSTOM_PROPFILES");
635
+            cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_CUSTOM_PROPFILES");
636 636
     }
637 637
     else if (!mcust)
638 638
         cli_dbgmsg("cli_process_ooxml: file does not contain custom properties file\n");
639 639
     if (mcust) {
640 640
         cli_jsonint(ctx->wrkproperty, "CustomPropertiesMissingFileCount", mcust);
641
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_CUST_PROPFILES");
641
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_CUST_PROPFILES");
642 642
     }
643 643
 
644 644
     if (dsig) {
... ...
@@ -694,7 +382,7 @@ int cli_process_ooxml(cli_ctx *ctx)
694 694
     uint32_t loff = 0;
695 695
     int tmp = CL_SUCCESS;
696 696
 
697
-    cli_dbgmsg("in cli_processooxml\n");
697
+    cli_dbgmsg("in cli_process_ooxml\n");
698 698
     if (!ctx) {
699 699
         return CL_ENULLARG;
700 700
     }
... ...
@@ -702,35 +390,35 @@ int cli_process_ooxml(cli_ctx *ctx)
702 702
     /* find "[Content Types].xml" */
703 703
     tmp = unzip_search_single(ctx, "[Content_Types].xml", 18, &loff);
704 704
     if (tmp == CL_ETIMEOUT) {
705
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
705
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
706 706
         return CL_ETIMEOUT;
707 707
     }
708 708
     else if (tmp != CL_VIRUS) {
709 709
         cli_dbgmsg("cli_process_ooxml: failed to find ""[Content_Types].xml""!\n");
710
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_CONTENT_TYPES");
710
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_CONTENT_TYPES");
711 711
         return CL_EFORMAT;
712 712
     }
713 713
     cli_dbgmsg("cli_process_ooxml: found ""[Content_Types].xml"" @ %x\n", loff);
714 714
 
715 715
     tmp = unzip_single_internal(ctx, loff, ooxml_content_cb);
716 716
     if (tmp == CL_ETIMEOUT)
717
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
717
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
718 718
     else if (tmp == CL_EMEM)
719
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_OUTOFMEM");
719
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_OUTOFMEM");
720 720
     else if (tmp == CL_EMAXSIZE)
721
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXSIZE");
721
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXSIZE");
722 722
     else if (tmp == CL_EMAXFILES)
723
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXFILES");
723
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXFILES");
724 724
 
725 725
     return tmp;
726 726
 #else
727 727
     UNUSEDPARAM(ctx);
728
-    cli_dbgmsg("in cli_processooxml\n");
728
+    cli_dbgmsg("in cli_process_ooxml\n");
729 729
 #if !HAVE_LIBXML2
730
-    cli_dbgmsg("cli_process_ooxml: libxml2 needs to enabled!");
730
+    cli_dbgmsg("cli_process_ooxml: libxml2 needs to enabled!\n");
731 731
 #endif
732 732
 #if !HAVE_JSON
733
-    cli_dbgmsg("cli_process_ooxml: libjson needs to enabled!");
733
+    cli_dbgmsg("cli_process_ooxml: libjson needs to enabled!\n");
734 734
 #endif
735 735
     return CL_SUCCESS;
736 736
 #endif
... ...
@@ -770,7 +770,6 @@ struct cl_settings *cl_engine_settings_copy(const struct cl_engine *engine)
770 770
     settings->cb_hash = engine->cb_hash;
771 771
     settings->cb_meta = engine->cb_meta;
772 772
     settings->cb_file_props = engine->cb_file_props;
773
-    settings->cb_file_props_data = engine->cb_file_props_data;
774 773
     settings->engine_options = engine->engine_options;
775 774
 
776 775
     settings->cb_stats_add_sample = engine->cb_stats_add_sample;
... ...
@@ -843,7 +842,6 @@ int cl_engine_settings_apply(struct cl_engine *engine, const struct cl_settings
843 843
     engine->cb_hash = settings->cb_hash;
844 844
     engine->cb_meta = settings->cb_meta;
845 845
     engine->cb_file_props = settings->cb_file_props;
846
-    engine->cb_file_props_data = settings->cb_file_props_data;
847 846
 
848 847
     engine->cb_stats_add_sample = settings->cb_stats_add_sample;
849 848
     engine->cb_stats_remove_sample = settings->cb_stats_remove_sample;
... ...
@@ -1363,8 +1361,7 @@ void cl_engine_set_clcb_meta(struct cl_engine *engine, clcb_meta callback)
1363 1363
     engine->cb_meta = callback;
1364 1364
 }
1365 1365
 
1366
- void cl_engine_set_clcb_file_props(struct cl_engine *engine, clcb_file_props callback, void * cbdata)
1366
+void cl_engine_set_clcb_file_props(struct cl_engine *engine, clcb_file_props callback)
1367 1367
 {
1368 1368
     engine->cb_file_props = callback;
1369
-    engine->cb_file_props_data = cbdata;
1370 1369
 }
... ...
@@ -67,7 +67,7 @@
67 67
  * in re-enabling affected modules.
68 68
  */
69 69
 
70
-#define CL_FLEVEL 80
70
+#define CL_FLEVEL 81
71 71
 #define CL_FLEVEL_DCONF	CL_FLEVEL
72 72
 #define CL_FLEVEL_SIGTOOL CL_FLEVEL
73 73
 
... ...
@@ -315,7 +315,6 @@ struct cl_engine {
315 315
     clcb_hash cb_hash;
316 316
     clcb_meta cb_meta;
317 317
     clcb_file_props cb_file_props;
318
-    void *cb_file_props_data;
319 318
 
320 319
     /* Used for bytecode */
321 320
     struct cli_all_bc bcs;
... ...
@@ -391,7 +390,6 @@ struct cl_settings {
391 391
     clcb_hash cb_hash;
392 392
     clcb_meta cb_meta;
393 393
     clcb_file_props cb_file_props;
394
-    void *cb_file_props_data;
395 394
 
396 395
     /* Engine max settings */
397 396
     uint64_t maxembeddedpe;  /* max size to scan MSEXE for PE */
... ...
@@ -59,6 +59,7 @@
59 59
 #include "arc4.h"
60 60
 #include "rijndael.h"
61 61
 #include "textnorm.h"
62
+#include "conv.h"
62 63
 #include "json_api.h"
63 64
 
64 65
 #ifdef	CL_DEBUG
... ...
@@ -746,7 +747,7 @@ static void aes_decrypt(const unsigned char *in, off_t *length, unsigned char *q
746 746
 }
747 747
 
748 748
 
749
-static char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, off_t *length, enum enc_method enc_method)
749
+char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, off_t *length, enum enc_method enc_method)
750 750
 {
751 751
     unsigned char *key, *q, result[16];
752 752
     unsigned n;
... ...
@@ -846,7 +847,7 @@ static char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, of
846 846
     return (char *)q;
847 847
 }
848 848
 
849
-static enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj)
849
+enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj)
850 850
 {
851 851
     if (obj->flags & (1 << OBJ_EMBEDDED_FILE))
852 852
         return pdf->enc_method_embeddedfile;
... ...
@@ -2244,7 +2245,7 @@ static enum enc_method parse_enc_method(const char *dict, unsigned len, const ch
2244 2244
     return ret;
2245 2245
 }
2246 2246
 
2247
-static void pdf_handle_enc(struct pdf_struct *pdf)
2247
+void pdf_handle_enc(struct pdf_struct *pdf)
2248 2248
 {
2249 2249
     struct pdf_obj *obj;
2250 2250
     uint32_t len, n, R, P, length, EM = 1, i, oulen;
... ...
@@ -3214,8 +3215,12 @@ static void Author_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfnam
3214 3214
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3215 3215
         return;
3216 3216
 
3217
-    if (!(pdf->stats.author))
3218
-        pdf->stats.author = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author", NULL);
3217
+    if (!(pdf->stats.author)) {
3218
+        pdf->stats.author = cli_calloc(1, sizeof(struct pdf_stats_entry));
3219
+        if (!(pdf->stats.author))
3220
+            return;
3221
+        pdf->stats.author->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author", NULL, &(pdf->stats.author->meta));
3222
+    }
3219 3223
 }
3220 3224
 #endif
3221 3225
 
... ...
@@ -3230,8 +3235,12 @@ static void Creator_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfna
3230 3230
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3231 3231
         return;
3232 3232
 
3233
-    if (!(pdf->stats.creator))
3234
-        pdf->stats.creator = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator", NULL);
3233
+    if (!(pdf->stats.creator)) {
3234
+        pdf->stats.creator = cli_calloc(1, sizeof(struct pdf_stats_entry));
3235
+        if (!(pdf->stats.creator))
3236
+            return;
3237
+        pdf->stats.creator->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator", NULL, &(pdf->stats.creator->meta));
3238
+    }
3235 3239
 }
3236 3240
 #endif
3237 3241
 
... ...
@@ -3246,8 +3255,12 @@ static void ModificationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, str
3246 3246
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3247 3247
         return;
3248 3248
 
3249
-    if (!(pdf->stats.modificationdate))
3250
-        pdf->stats.modificationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate", NULL);
3249
+    if (!(pdf->stats.modificationdate)) {
3250
+        pdf->stats.modificationdate = cli_calloc(1, sizeof(struct pdf_stats_entry));
3251
+        if (!(pdf->stats.modificationdate))
3252
+            return;
3253
+        pdf->stats.modificationdate->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate", NULL, &(pdf->stats.modificationdate->meta));
3254
+    }
3251 3255
 }
3252 3256
 #endif
3253 3257
 
... ...
@@ -3262,8 +3275,12 @@ static void CreationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct
3262 3262
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3263 3263
         return;
3264 3264
 
3265
-    if (!(pdf->stats.creationdate))
3266
-        pdf->stats.creationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate", NULL);
3265
+    if (!(pdf->stats.creationdate)) {
3266
+        pdf->stats.creationdate = cli_calloc(1, sizeof(struct pdf_stats_entry));
3267
+        if (!(pdf->stats.creationdate))
3268
+            return;
3269
+        pdf->stats.creationdate->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate", NULL, &(pdf->stats.creationdate->meta));
3270
+    }
3267 3271
 }
3268 3272
 #endif
3269 3273
 
... ...
@@ -3278,8 +3295,12 @@ static void Producer_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfn
3278 3278
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3279 3279
         return;
3280 3280
 
3281
-    if (!(pdf->stats.producer))
3282
-        pdf->stats.producer = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer", NULL);
3281
+    if (!(pdf->stats.producer)) {
3282
+        pdf->stats.producer = cli_calloc(1, sizeof(struct pdf_stats_entry));
3283
+        if (!(pdf->stats.producer))
3284
+            return;
3285
+        pdf->stats.producer->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer", NULL, &(pdf->stats.producer->meta));
3286
+    }
3283 3287
 }
3284 3288
 #endif
3285 3289
 
... ...
@@ -3294,8 +3315,12 @@ static void Title_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname
3294 3294
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3295 3295
         return;
3296 3296
 
3297
-    if (!(pdf->stats.title))
3298
-        pdf->stats.title = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title", NULL);
3297
+    if (!(pdf->stats.title)) {
3298
+        pdf->stats.title = cli_calloc(1, sizeof(struct pdf_stats_entry));
3299
+        if (!(pdf->stats.title))
3300
+            return;
3301
+        pdf->stats.title->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title", NULL, &(pdf->stats.title->meta));
3302
+    }
3299 3303
 }
3300 3304
 #endif
3301 3305
 
... ...
@@ -3310,8 +3335,12 @@ static void Keywords_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfn
3310 3310
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3311 3311
         return;
3312 3312
 
3313
-    if (!(pdf->stats.keywords))
3314
-        pdf->stats.keywords = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords", NULL);
3313
+    if (!(pdf->stats.keywords)) {
3314
+        pdf->stats.keywords = cli_calloc(1, sizeof(struct pdf_stats_entry));
3315
+        if (!(pdf->stats.keywords))
3316
+            return;
3317
+        pdf->stats.keywords->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords", NULL, &(pdf->stats.keywords->meta));
3318
+    }
3315 3319
 }
3316 3320
 #endif
3317 3321
 
... ...
@@ -3326,8 +3355,12 @@ static void Subject_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfna
3326 3326
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3327 3327
         return;
3328 3328
 
3329
-    if (!(pdf->stats.subject))
3330
-        pdf->stats.subject = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject", NULL);
3329
+    if (!(pdf->stats.subject)) {
3330
+        pdf->stats.subject = cli_calloc(1, sizeof(struct pdf_stats_entry));
3331
+        if (!(pdf->stats.subject))
3332
+            return;
3333
+        pdf->stats.subject->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject", NULL, &(pdf->stats.subject->meta));
3334
+    }
3331 3335
 }
3332 3336
 #endif
3333 3337
 
... ...
@@ -3511,22 +3544,182 @@ static void pdf_export_json(struct pdf_struct *pdf)
3511 3511
         goto cleanup;
3512 3512
     }
3513 3513
 
3514
-    if (pdf->stats.author)
3515
-        cli_jsonstr(pdfobj, "Author", pdf->stats.author);
3516
-    if (pdf->stats.creator)
3517
-        cli_jsonstr(pdfobj, "Creator", pdf->stats.creator);
3518
-    if (pdf->stats.producer)
3519
-        cli_jsonstr(pdfobj, "Producer", pdf->stats.producer);
3520
-    if (pdf->stats.modificationdate)
3521
-        cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate);
3522
-    if (pdf->stats.creationdate)
3523
-        cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate);
3524
-    if (pdf->stats.title)
3525
-        cli_jsonstr(pdfobj, "Title", pdf->stats.title);
3526
-    if (pdf->stats.subject)
3527
-        cli_jsonstr(pdfobj, "Subject", pdf->stats.subject);
3528
-    if (pdf->stats.keywords)
3529
-        cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords);
3514
+    if (pdf->stats.author) {
3515
+        if (!pdf->stats.author->meta.success) {
3516
+            char *out = pdf_finalize_string(pdf, pdf->stats.author->meta.obj, pdf->stats.author->data, pdf->stats.author->meta.length);
3517
+            if (out) {
3518
+                free(pdf->stats.author->data);
3519
+                pdf->stats.author->data = out;
3520
+                pdf->stats.author->meta.length = strlen(out);
3521
+                pdf->stats.author->meta.success = 1;
3522
+            }
3523
+        }
3524
+
3525
+        if (pdf->stats.author->meta.success && cli_isutf8(pdf->stats.author->data, pdf->stats.author->meta.length)) {
3526
+            cli_jsonstr(pdfobj, "Author", pdf->stats.author->data);
3527
+        } else if (pdf->stats.author->data && pdf->stats.author->meta.length) {
3528
+            char *b64 = cl_base64_encode(pdf->stats.author->data, pdf->stats.author->meta.length);
3529
+            cli_jsonstr(pdfobj, "Author", b64);
3530
+            cli_jsonbool(pdfobj, "Author_base64", 1);
3531
+            free(b64);
3532
+        } else {
3533
+            cli_jsonstr(pdfobj, "Author", "");
3534
+        }
3535
+    }
3536
+    if (pdf->stats.creator) {
3537
+        if (!pdf->stats.creator->meta.success) {
3538
+            char *out = pdf_finalize_string(pdf, pdf->stats.creator->meta.obj, pdf->stats.creator->data, pdf->stats.creator->meta.length);
3539
+            if (out) {
3540
+                free(pdf->stats.creator->data);
3541
+                pdf->stats.creator->data = out;
3542
+                pdf->stats.creator->meta.length = strlen(out);
3543
+                pdf->stats.creator->meta.success = 1;
3544
+            }
3545
+        }
3546
+
3547
+        if (pdf->stats.creator->meta.success && cli_isutf8(pdf->stats.creator->data, pdf->stats.creator->meta.length)) {
3548
+            cli_jsonstr(pdfobj, "Creator", pdf->stats.creator->data);
3549
+        } else if (pdf->stats.creator->data && pdf->stats.creator->meta.length) {
3550
+            char *b64 = cl_base64_encode(pdf->stats.creator->data, pdf->stats.creator->meta.length);
3551
+            cli_jsonstr(pdfobj, "Creator", b64);
3552
+            cli_jsonbool(pdfobj, "Creator_base64", 1);
3553
+            free(b64);
3554
+        } else {
3555
+            cli_jsonstr(pdfobj, "Creator", "");
3556
+        }
3557
+    }
3558
+    if (pdf->stats.producer) {
3559
+        if (!pdf->stats.producer->meta.success) {
3560
+            char *out = pdf_finalize_string(pdf, pdf->stats.producer->meta.obj, pdf->stats.producer->data, pdf->stats.producer->meta.length);
3561
+            if (out) {
3562
+                free(pdf->stats.producer->data);
3563
+                pdf->stats.producer->data = out;
3564
+                pdf->stats.producer->meta.length = strlen(out);
3565
+                pdf->stats.producer->meta.success = 1;
3566
+            }
3567
+        }
3568
+
3569
+        if (pdf->stats.producer->meta.success && cli_isutf8(pdf->stats.producer->data, pdf->stats.producer->meta.length)) {
3570
+            cli_jsonstr(pdfobj, "Producer", pdf->stats.producer->data);
3571
+        } else if (pdf->stats.producer->data && pdf->stats.producer->meta.length) {
3572
+            char *b64 = cl_base64_encode(pdf->stats.producer->data, pdf->stats.producer->meta.length);
3573
+            cli_jsonstr(pdfobj, "Producer", b64);
3574
+            cli_jsonbool(pdfobj, "Producer_base64", 1);
3575
+            free(b64);
3576
+        } else {
3577
+            cli_jsonstr(pdfobj, "Producer", "");
3578
+        }
3579
+    }
3580
+    if (pdf->stats.modificationdate) {
3581
+        if (!pdf->stats.modificationdate->meta.success) {
3582
+            char *out = pdf_finalize_string(pdf, pdf->stats.modificationdate->meta.obj, pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length);
3583
+            if (out) {
3584
+                free(pdf->stats.modificationdate->data);
3585
+                pdf->stats.modificationdate->data = out;
3586
+                pdf->stats.modificationdate->meta.length = strlen(out);
3587
+                pdf->stats.modificationdate->meta.success = 1;
3588
+            }
3589
+        }
3590
+
3591
+        if (pdf->stats.modificationdate->meta.success && cli_isutf8(pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length)) {
3592
+            cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate->data);
3593
+        } else if (pdf->stats.modificationdate->data && pdf->stats.modificationdate->meta.length) {
3594
+            char *b64 = cl_base64_encode(pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length);
3595
+            cli_jsonstr(pdfobj, "ModificationDate", b64);
3596
+            cli_jsonbool(pdfobj, "ModificationDate_base64", 1);
3597
+            free(b64);
3598
+        } else {
3599
+            cli_jsonstr(pdfobj, "ModificationDate", "");
3600
+        }
3601
+    }
3602
+    if (pdf->stats.creationdate) {
3603
+        if (!pdf->stats.creationdate->meta.success) {
3604
+            char *out = pdf_finalize_string(pdf, pdf->stats.creationdate->meta.obj, pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length);
3605
+            if (out) {
3606
+                free(pdf->stats.creationdate->data);
3607
+                pdf->stats.creationdate->data = out;
3608
+                pdf->stats.creationdate->meta.length = strlen(out);
3609
+                pdf->stats.creationdate->meta.success = 1;
3610
+            }
3611
+        }
3612
+
3613
+        if (pdf->stats.creationdate->meta.success && cli_isutf8(pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length)) {
3614
+            cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate->data);
3615
+        } else if (pdf->stats.creationdate->data && pdf->stats.creationdate->meta.length) {
3616
+            char *b64 = cl_base64_encode(pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length);
3617
+            cli_jsonstr(pdfobj, "CreationDate", b64);
3618
+            cli_jsonbool(pdfobj, "CreationDate_base64", 1);
3619
+            free(b64);
3620
+        } else {
3621
+            cli_jsonstr(pdfobj, "CreationDate", "");
3622
+        }
3623
+    }
3624
+    if (pdf->stats.title) {
3625
+        if (!pdf->stats.title->meta.success) {
3626
+            char *out = pdf_finalize_string(pdf, pdf->stats.title->meta.obj, pdf->stats.title->data, pdf->stats.title->meta.length);
3627
+            if (out) {
3628
+                free(pdf->stats.title->data);
3629
+                pdf->stats.title->data = out;
3630
+                pdf->stats.title->meta.length = strlen(out);
3631
+                pdf->stats.title->meta.success = 1;
3632
+            }
3633
+        }
3634
+
3635
+        if (pdf->stats.title->meta.success && cli_isutf8(pdf->stats.title->data, pdf->stats.title->meta.length)) {
3636
+            cli_jsonstr(pdfobj, "Title", pdf->stats.title->data);
3637
+        } else if (pdf->stats.title->data && pdf->stats.title->meta.length) {
3638
+            char *b64 = cl_base64_encode(pdf->stats.title->data, pdf->stats.title->meta.length);
3639
+            cli_jsonstr(pdfobj, "Title", b64);
3640
+            cli_jsonbool(pdfobj, "Title_base64", 1);
3641
+            free(b64);
3642
+        } else {
3643
+            cli_jsonstr(pdfobj, "Title", "");
3644
+        }
3645
+    }
3646
+    if (pdf->stats.subject) {
3647
+        if (!pdf->stats.subject->meta.success) {
3648
+            char *out = pdf_finalize_string(pdf, pdf->stats.subject->meta.obj, pdf->stats.subject->data, pdf->stats.subject->meta.length);
3649
+            if (out) {
3650
+                free(pdf->stats.subject->data);
3651
+                pdf->stats.subject->data = out;
3652
+                pdf->stats.subject->meta.length = strlen(out);
3653
+                pdf->stats.subject->meta.success = 1;
3654
+            }
3655
+        }
3656
+
3657
+        if (pdf->stats.subject->meta.success && cli_isutf8(pdf->stats.subject->data, pdf->stats.subject->meta.length)) {
3658
+            cli_jsonstr(pdfobj, "Subject", pdf->stats.subject->data);
3659
+        } else if (pdf->stats.subject->data && pdf->stats.subject->meta.length) {
3660
+            char *b64 = cl_base64_encode(pdf->stats.subject->data, pdf->stats.subject->meta.length);
3661
+            cli_jsonstr(pdfobj, "Subject", b64);
3662
+            cli_jsonbool(pdfobj, "Subject_base64", 1);
3663
+            free(b64);
3664
+        } else {
3665
+            cli_jsonstr(pdfobj, "Subject", "");
3666
+        }
3667
+    }
3668
+    if (pdf->stats.keywords) {
3669
+        if (!pdf->stats.keywords->meta.success) {
3670
+            char *out = pdf_finalize_string(pdf, pdf->stats.keywords->meta.obj, pdf->stats.keywords->data, pdf->stats.keywords->meta.length);
3671
+            if (out) {
3672
+                free(pdf->stats.keywords->data);
3673
+                pdf->stats.keywords->data = out;
3674
+                pdf->stats.keywords->meta.length = strlen(out);
3675
+                pdf->stats.keywords->meta.success = 1;
3676
+            }
3677
+        }
3678
+
3679
+        if (pdf->stats.keywords->meta.success && cli_isutf8(pdf->stats.keywords->data, pdf->stats.keywords->meta.length)) {
3680
+            cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords->data);
3681
+        } else if (pdf->stats.keywords->data && pdf->stats.keywords->meta.length) {
3682
+            char *b64 = cl_base64_encode(pdf->stats.keywords->data, pdf->stats.keywords->meta.length);
3683
+            cli_jsonstr(pdfobj, "Keywords", b64);
3684
+            cli_jsonbool(pdfobj, "Keywords_base64", 1);
3685
+            free(b64);
3686
+        } else {
3687
+            cli_jsonstr(pdfobj, "Keywords", "");
3688
+        }
3689
+    }
3530 3690
     if (pdf->stats.ninvalidobjs)
3531 3691
         cli_jsonint(pdfobj, "InvalidObjectCount", pdf->stats.ninvalidobjs);
3532 3692
     if (pdf->stats.njs)
... ...
@@ -3589,6 +3782,8 @@ static void pdf_export_json(struct pdf_struct *pdf)
3589 3589
         cli_jsonbool(pdfobj, "Encrypted", 1);
3590 3590
         if (pdf->flags & (1 << DECRYPTABLE_PDF))
3591 3591
             cli_jsonbool(pdfobj, "Decryptable", 1);
3592
+        else
3593
+            cli_jsonbool(pdfobj, "Decryptable", 0);
3592 3594
     }
3593 3595
 
3594 3596
     for (i=0; i < pdf->nobjs; i++) {
... ...
@@ -3605,41 +3800,57 @@ static void pdf_export_json(struct pdf_struct *pdf)
3605 3605
 
3606 3606
 cleanup:
3607 3607
     if ((pdf->stats.author)) {
3608
+        if (pdf->stats.author->data)
3609
+            free(pdf->stats.author->data);
3608 3610
         free(pdf->stats.author);
3609 3611
         pdf->stats.author = NULL;
3610 3612
     }
3611 3613
 
3612 3614
     if (pdf->stats.creator) {
3615
+        if (pdf->stats.creator->data)
3616
+            free(pdf->stats.creator->data);
3613 3617
         free(pdf->stats.creator);
3614 3618
         pdf->stats.creator = NULL;
3615 3619
     }
3616 3620
 
3617 3621
     if (pdf->stats.producer) {
3622
+        if (pdf->stats.producer->data)
3623
+            free(pdf->stats.producer->data);
3618 3624
         free(pdf->stats.producer);
3619 3625
         pdf->stats.producer = NULL;
3620 3626
     }
3621 3627
 
3622 3628
     if (pdf->stats.modificationdate) {
3629
+        if (pdf->stats.modificationdate->data)
3630
+            free(pdf->stats.modificationdate->data);
3623 3631
         free(pdf->stats.modificationdate);
3624 3632
         pdf->stats.modificationdate = NULL;
3625 3633
     }
3626 3634
 
3627 3635
     if (pdf->stats.creationdate) {
3636
+        if (pdf->stats.creationdate->data)
3637
+            free(pdf->stats.creationdate->data);
3628 3638
         free(pdf->stats.creationdate);
3629 3639
         pdf->stats.creationdate = NULL;
3630 3640
     }
3631 3641
 
3632 3642
     if (pdf->stats.title) {
3643
+        if (pdf->stats.title->data)
3644
+            free(pdf->stats.title->data);
3633 3645
         free(pdf->stats.title);
3634 3646
         pdf->stats.title = NULL;
3635 3647
     }
3636 3648
 
3637 3649
     if (pdf->stats.subject) {
3650
+        if (pdf->stats.subject->data)
3651
+            free(pdf->stats.subject->data);
3638 3652
         free(pdf->stats.subject);
3639 3653
         pdf->stats.subject = NULL;
3640 3654
     }
3641 3655
 
3642 3656
     if (pdf->stats.keywords) {
3657
+        if (pdf->stats.keywords->data)
3658
+            free(pdf->stats.keywords->data);
3643 3659
         free(pdf->stats.keywords);
3644 3660
         pdf->stats.keywords = NULL;
3645 3661
     }
... ...
@@ -62,6 +62,17 @@ struct pdf_dict {
62 62
     struct pdf_dict_node *tail;
63 63
 };
64 64
 
65
+struct pdf_stats_entry {
66
+    char *data;
67
+
68
+    /* populated by pdf_parse_string */
69
+    struct pdf_stats_metadata {
70
+        int length;
71
+        struct pdf_obj *obj;
72
+        int success; /* if finalize succeeds */
73
+    } meta;
74
+};
75
+
65 76
 struct pdf_stats {
66 77
     int32_t ninvalidobjs;     /* Number of invalid objects */
67 78
     int32_t njs;              /* Number of javascript objects */
... ...
@@ -88,14 +99,14 @@ struct pdf_stats {
88 88
     int32_t nrichmedia;       /* Number of RichMedia objects */
89 89
     int32_t nacroform;        /* Number of AcroForm objects */
90 90
     int32_t nxfa;             /* Number of XFA objects */
91
-    char *author;             /* Author of the PDF */
92
-    char *creator;            /* Application used to create the PDF */
93
-    char *producer;           /* Application used to produce the PDF */
94
-    char *creationdate;       /* Date the PDF was created */
95
-    char *modificationdate;   /* Date the PDF was modified */
96
-    char *title;              /* Title of the PDF */
97
-    char *subject;            /* Subject of the PDF */
98
-    char *keywords;           /* Keywords of the PDF */
91
+    struct pdf_stats_entry *author;             /* Author of the PDF */
92
+    struct pdf_stats_entry *creator;            /* Application used to create the PDF */
93
+    struct pdf_stats_entry *producer;           /* Application used to produce the PDF */
94
+    struct pdf_stats_entry *creationdate;       /* Date the PDF was created */
95
+    struct pdf_stats_entry *modificationdate;   /* Date the PDF was modified */
96
+    struct pdf_stats_entry *title;              /* Title of the PDF */
97
+    struct pdf_stats_entry *subject;            /* Subject of the PDF */
98
+    struct pdf_stats_entry *keywords;           /* Keywords of the PDF */
99 99
 };
100 100
 
101 101
 
... ...
@@ -144,7 +155,12 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
144 144
 int pdf_findobj(struct pdf_struct *pdf);
145 145
 struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid);
146 146
 
147
-char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar);
147
+void pdf_handle_enc(struct pdf_struct *pdf);
148
+char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, off_t *length, enum enc_method enc_method);
149
+enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj);
150
+
151
+char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len);
152
+char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *stats);
148 153
 struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar);
149 154
 struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar);
150 155
 int is_object_reference(char *begin, char **endchar, uint32_t *id);
... ...
@@ -68,82 +68,33 @@
68 68
 #include "rijndael.h"
69 69
 #include "textnorm.h"
70 70
 #include "json_api.h"
71
+#include "conv.h"
71 72
 
72 73
 char *pdf_convert_utf(char *begin, size_t sz);
73 74
 
74 75
 char *pdf_convert_utf(char *begin, size_t sz)
75 76
 {
76 77
     char *res=NULL;
78
+    char *buf, *outbuf;
77 79
 #if HAVE_ICONV
78
-    char *buf, *outbuf, *p1, *p2;
79
-    size_t sz2, inlen, outlen, i;
80
+    char *p1, *p2;
81
+    size_t inlen, outlen, i;
80 82
     char *encodings[] = {
81 83
         "UTF-16",
82 84
         NULL
83 85
     };
84 86
     iconv_t cd;
87
+#endif
85 88
 
86
-    buf = cli_calloc(1, sz);
89
+    buf = cli_calloc(1, sz+1);
87 90
     if (!(buf))
88 91
         return NULL;
92
+    memcpy(buf, begin, sz);
89 93
 
90
-    /* convert PDF specific escape sequences, like octal sequences */
91
-    sz2 = 0;
92
-    for (i = 0; i < sz; ++i) {
93
-        if ((i+1 < sz) && begin[i] == '\\') {
94
-            if ((i+3 < sz) &&
95
-                (isdigit(begin[i+1]) && isdigit(begin[i+2]) && isdigit(begin[i+3]))) {
96
-                /* octal sequence */
97
-                char octal[4], *check;
98
-                unsigned long value;
99
-
100
-                memcpy(octal, &begin[i+1], 3);
101
-                octal[3] = '\0';
102
-
103
-                value = (char)strtoul(octal, &check, 8);
104
-                /* check if all characters were converted */
105
-                if (check == &octal[3])
106
-                    buf[sz2++] = value;
107
-                i += 3;
108
-            } else {
109
-                /* other sequences */
110
-                switch(begin[i+1]) {
111
-                case 'n':
112
-                    buf[sz2++] = 0x0a;
113
-                    break;
114
-                case 'r':
115
-                    buf[sz2++] = 0x0d;
116
-                    break;
117
-                case 't':
118
-                    buf[sz2++] = 0x09;
119
-                    break;
120
-                case 'b':
121
-                    buf[sz2++] = 0x08;
122
-                    break;
123
-                case 'f':
124
-                    buf[sz2++] = 0x0c;
125
-                    break;
126
-                case '(':
127
-                    buf[sz2++] = 0x28;
128
-                    break;
129
-                case ')':
130
-                    buf[sz2++] = 0x29;
131
-                    break;
132
-                case '\\':
133
-                    buf[sz2++] = 0x5c;
134
-                    break;
135
-                default:
136
-                    /* IGNORE THE REVERSE SOLIDUS - PDF3000-2008 */
137
-                    break;
138
-                }
139
-            }
140
-        } else
141
-            buf[sz2++] = begin[i]; 
142
-    }
143
-    //memcpy(buf, begin, sz);
94
+#if HAVE_ICONV
144 95
     p1 = buf;
145 96
 
146
-    p2 = outbuf = cli_calloc(1, sz2+1);
97
+    p2 = outbuf = cli_calloc(1, sz+1);
147 98
     if (!(outbuf)) {
148 99
         free(buf);
149 100
         return NULL;
... ...
@@ -152,7 +103,7 @@ char *pdf_convert_utf(char *begin, size_t sz)
152 152
     for (i=0; encodings[i] != NULL; i++) {
153 153
         p1 = buf;
154 154
         p2 = outbuf;
155
-        inlen = outlen = sz2;
155
+        inlen = outlen = sz;
156 156
 
157 157
         cd = iconv_open("UTF-8", encodings[i]);
158 158
         if (cd == (iconv_t)(-1)) {
... ...
@@ -162,32 +113,31 @@ char *pdf_convert_utf(char *begin, size_t sz)
162 162
 
163 163
         iconv(cd, (char **)(&p1), &inlen, &p2, &outlen);
164 164
 
165
-        if (outlen == sz2) {
165
+        if (outlen == sz) {
166 166
             /* Decoding unsuccessful right from the start */
167 167
             iconv_close(cd);
168 168
             continue;
169 169
         }
170 170
 
171
-        outbuf[sz2 - outlen] = '\0';
171
+        outbuf[sz - outlen] = '\0';
172 172
 
173 173
         res = strdup(outbuf);
174 174
         iconv_close(cd);
175 175
         break;
176 176
     }
177
+#else
178
+    outbuf = cli_utf16_to_utf8(buf, sz, UTF16_BOM);
179
+    if (!outbuf) {
180
+        free(buf);
181
+        return NULL;
182
+    }
177 183
 
184
+    res = strdup(outbuf);
185
+#endif
178 186
     free(buf);
179 187
     free(outbuf);
180 188
 
181 189
     return res;
182
-#else
183
-    res = cli_calloc(sz+1, 1);
184
-    if ((res)) {
185
-        memcpy(res, begin, sz);
186
-        res[sz] = '\0';
187
-    }
188
-
189
-    return res;
190
-#endif
191 190
 }
192 191
 
193 192
 int is_object_reference(char *begin, char **endchar, uint32_t *id)
... ...
@@ -274,13 +224,154 @@ int is_object_reference(char *begin, char **endchar, uint32_t *id)
274 274
     return 0;
275 275
 }
276 276
 
277
-char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar)
277
+static char *pdf_decrypt_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, off_t *length)
278
+{
279
+    enum enc_method enc;
280
+
281
+    /* handled only once in cli_pdf() */
282
+    //pdf_handle_enc(pdf);
283
+    if (pdf->flags & (1 << DECRYPTABLE_PDF)) {
284
+        enc = get_enc_method(pdf, obj);
285
+        return decrypt_any(pdf, obj->id, in, length, enc);
286
+    }
287
+    return NULL;
288
+}
289
+
290
+char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len)
291
+{
292
+    char *wrkstr, *output = NULL;
293
+    size_t wrklen = len, outlen;
294
+    unsigned int i, likelyutf = 0;
295
+
296
+    if (!in)
297
+        return NULL;
298
+
299
+    /* get a working copy */
300
+    wrkstr = cli_calloc(len+1, sizeof(char));
301
+    if (!wrkstr)
302
+        return NULL;
303
+    memcpy(wrkstr, in, len);
304
+
305
+    //cli_errmsg("pdf_final: start(%d):   %s\n", wrklen, wrkstr);
306
+
307
+    /* convert PDF specific escape sequences, like octal sequences */
308
+    /* TODO: replace the escape sequences directly in the wrkstr   */
309
+    if (strchr(wrkstr, '\\')) {
310
+        output = cli_calloc(wrklen+1, sizeof(char));
311
+        if (!output)
312
+            return NULL;
313
+
314
+        outlen = 0;
315
+        for (i = 0; i < wrklen; ++i) {
316
+            if ((i+1 < wrklen) && wrkstr[i] == '\\') {
317
+                if ((i+3 < wrklen) &&
318
+                    (isdigit(wrkstr[i+1]) && isdigit(wrkstr[i+2]) && isdigit(wrkstr[i+3]))) {
319
+                    /* octal sequence */
320
+                    char octal[4], *check;
321
+                    unsigned long value;
322
+
323
+                    memcpy(octal, &wrkstr[i+1], 3);
324
+                    octal[3] = '\0';
325
+
326
+                    value = (char)strtoul(octal, &check, 8);
327
+                    /* check if all characters were converted */
328
+                    if (check == &octal[3])
329
+                        output[outlen++] = value;
330
+                    i += 3; /* 4 with for loop [\ddd] */
331
+                } else {
332
+                    /* other sequences */
333
+                    switch(wrkstr[i+1]) {
334
+                    case 'n':
335
+                        output[outlen++] = 0x0a;
336
+                        break;
337
+                    case 'r':
338
+                        output[outlen++] = 0x0d;
339
+                        break;
340
+                    case 't':
341
+                        output[outlen++] = 0x09;
342
+                        break;
343
+                    case 'b':
344
+                        output[outlen++] = 0x08;
345
+                        break;
346
+                    case 'f':
347
+                        output[outlen++] = 0x0c;
348
+                        break;
349
+                    case '(':
350
+                        output[outlen++] = 0x28;
351
+                        break;
352
+                    case ')':
353
+                        output[outlen++] = 0x29;
354
+                        break;
355
+                    case '\\':
356
+                        output[outlen++] = 0x5c;
357
+                        break;
358
+                    default:
359
+                        /* IGNORE THE REVERSE SOLIDUS - PDF3000-2008 */
360
+                        break;
361
+                    }
362
+                    i += 1; /* 2 with for loop [\c] */
363
+                }
364
+            } else {
365
+                output[outlen++] = wrkstr[i];
366
+            }
367
+        }
368
+
369
+        free(wrkstr);
370
+        wrkstr = cli_strdup(output);
371
+        free(output);
372
+        wrklen = outlen;
373
+    }
374
+
375
+    //cli_errmsg("pdf_final: escaped(%d): %s\n", wrklen, wrkstr);
376
+
377
+    /* check for encryption and decrypt */
378
+    if (pdf->flags & (1 << ENCRYPTED_PDF))
379
+    {
380
+        off_t tmpsz = (off_t)wrklen;
381
+        output = pdf_decrypt_string(pdf, obj, wrkstr, &tmpsz);
382
+        outlen = (size_t)tmpsz;
383
+        free(wrkstr);
384
+        if (output) {
385
+            wrkstr = cli_calloc(outlen+1, sizeof(char));
386
+            if (!wrkstr) {
387
+                free(output);
388
+                return NULL;
389
+            }
390
+            memcpy(wrkstr, output, outlen);
391
+            free(output);
392
+            wrklen = outlen;
393
+        } else {
394
+            return NULL;
395
+        }
396
+    }
397
+
398
+    //cli_errmsg("pdf_final: decrypt(%d): %s\n", wrklen, wrkstr);
399
+
400
+    /* check for UTF-* and convert to UTF-8 */
401
+    for (i = 0; i < wrklen; ++i) {
402
+        if (((unsigned char)wrkstr[i] > (unsigned char)0x7f) || (wrkstr[i] == '\0')) {
403
+            likelyutf = 1;
404
+            break;
405
+        }
406
+    }
407
+
408
+    if (likelyutf) {
409
+        output = pdf_convert_utf(wrkstr, wrklen);
410
+        free(wrkstr);
411
+        wrkstr = output;
412
+    }
413
+
414
+    //cli_errmsg("pdf_final: postutf(%d): %s\n", wrklen, wrkstr);
415
+
416
+    return wrkstr;
417
+}
418
+
419
+char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *meta)
278 420
 {
279 421
     const char *q = objstart;
280 422
     char *p1, *p2;
281 423
     size_t len, checklen;
282
-    char *res;
283
-    int likelyutf = 0;
424
+    char *res = NULL;
284 425
     uint32_t objid;
285 426
     size_t i;
286 427
 
... ...
@@ -294,8 +385,6 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
294 294
      * Fourth, Attempt to decode from UTF-* to UTF-8
295 295
      */
296 296
 
297
-    res = NULL;
298
-
299 297
     if (str) {
300 298
         checklen = strlen(str);
301 299
 
... ...
@@ -407,26 +496,29 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
407 407
             switch (*p3) {
408 408
                 case '(':
409 409
                 case '<':
410
-                    res = pdf_parse_string(pdf, obj, p3, objsize2, NULL, NULL);
411
-                    free(begin);
410
+                    res = pdf_parse_string(pdf, obj, p3, objsize2, NULL, NULL, meta);
412 411
                     break;
413 412
                 default:
414
-                    for (i=0; i < objsize2; i++) {
415
-                        if (p3[i] >= 0x7f) {
416
-                            likelyutf=1;
417
-                            break;
418
-                        }
419
-                    }
420
-
421
-                    res = likelyutf ? pdf_convert_utf(p3, objsize2) : NULL;
422
-
423
-                    if (!(res)) {
424
-                        res = begin;
413
+                    res = pdf_finalize_string(pdf, obj, begin, objsize2);
414
+                    if (!res) {
415
+                        res = cli_calloc(1, objsize2+1);
416
+                        if (!(res))
417
+                            return NULL;
418
+                        memcpy(res, begin, objsize2);
425 419
                         res[objsize2] = '\0';
426
-                    } else {
427
-                        free(begin);
420
+
421
+                        if (meta) {
422
+                            meta->length = objsize2;
423
+                            meta->obj = obj;
424
+                            meta->success = 0;
425
+                        }
426
+                    } else if (meta) {
427
+                        meta->length = strlen(res);
428
+                        meta->obj = obj;
429
+                        meta->success = 1;
428 430
                     }
429 431
             }
432
+            free(begin);
430 433
         }
431 434
 
432 435
         close(fd);
... ...
@@ -471,9 +563,6 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
471 471
     while (p2 < objstart + objsize) {
472 472
         int shouldbreak=0;
473 473
 
474
-        if (!likelyutf && (*((unsigned char *)p2) > (unsigned char)0x7f || *p2 == '\0'))
475
-            likelyutf = 1;
476
-
477 474
         switch (*p2) {
478 475
             case '\\':
479 476
                 p2++;
... ...
@@ -496,22 +585,25 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
496 496
 
497 497
     len = (size_t)(p2 - p1) + 1;
498 498
 
499
-    if (likelyutf == 0) {
500
-        /* We're not UTF-*, so just make a copy of the string and return that */
499
+    res = pdf_finalize_string(pdf, obj, p1, len);
500
+    if (!res) {
501 501
         res = cli_calloc(1, len+1);
502 502
         if (!(res))
503 503
             return NULL;
504
-
505 504
         memcpy(res, p1, len);
506 505
         res[len] = '\0';
507
-        if (endchar)
508
-            *endchar = p2;
509 506
 
510
-        return res;
507
+        if (meta) {
508
+            meta->length = len;
509
+            meta->obj = obj;
510
+            meta->success = 0;
511
+        }
512
+    } else if (meta) {
513
+        meta->length = strlen(res);
514
+        meta->obj = obj;
515
+        meta->success = 1;
511 516
     }
512 517
 
513
-    res = pdf_convert_utf(p1, len);
514
-
515 518
     if (res && endchar)
516 519
         *endchar = p2;
517 520
 
... ...
@@ -672,7 +764,7 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz
672 672
 
673 673
         switch (begin[0]) {
674 674
             case '(':
675
-                val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1);
675
+                val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1, NULL);
676 676
                 begin = p1+2;
677 677
                 break;
678 678
             case '[':
... ...
@@ -688,7 +780,7 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz
688 688
                     }
689 689
                 }
690 690
 
691
-                val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1);
691
+                val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1, NULL);
692 692
                 begin = p1+2;
693 693
                 break;
694 694
             default:
... ...
@@ -870,7 +962,7 @@ struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, s
870 870
 
871 871
                 /* Not a dictionary. Intentially fall through. */
872 872
             case '(':
873
-                val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &begin);
873
+                val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &begin, NULL);
874 874
                 begin += 2;
875 875
                 break;
876 876
             case '[':
... ...
@@ -1642,7 +1642,7 @@ int cli_scanpe(cli_ctx *ctx)
1642 1642
 
1643 1643
         if(exe_sections[0].rsz > CLI_MAX_ALLOCATION)
1644 1644
             break;
1645
-        if(!exe_sections[0].rsz)
1645
+        if(exe_sections[0].rsz < 5)
1646 1646
             break;
1647 1647
         if(!(code=fmap_need_off_once(map, exe_sections[0].raw, exe_sections[0].rsz)))
1648 1648
             break;
... ...
@@ -2457,7 +2457,21 @@ int cli_scanpe(cli_ctx *ctx)
2457 2457
 
2458 2458
             for(i = 0 ; i < nsections; i++) {
2459 2459
                 if(exe_sections[i].raw) {
2460
-                    if(!exe_sections[i].rsz || (unsigned int)fmap_readn(map, dest + exe_sections[i].rva - min, exe_sections[i].raw, exe_sections[i].ursz) != exe_sections[i].ursz) {
2460
+			unsigned int r_ret;
2461
+
2462
+			if (!exe_sections[i].rsz)
2463
+				goto out_no_petite;
2464
+
2465
+			if (!CLI_ISCONTAINED(dest, dsize,
2466
+					     dest + exe_sections[i].rva - min,
2467
+					     exe_sections[i].ursz))
2468
+				goto out_no_petite;
2469
+
2470
+			r_ret = fmap_readn(map, dest + exe_sections[i].rva - min,
2471
+					exe_sections[i].raw,
2472
+					exe_sections[i].ursz);
2473
+		    if (r_ret != exe_sections[i].ursz) {
2474
+out_no_petite:
2461 2475
                         free(exe_sections);
2462 2476
                         free(dest);
2463 2477
                         return CL_CLEAN;
... ...
@@ -393,6 +393,11 @@ int petite_inflate2x_1to9(char *buf, uint32_t minrva, uint32_t bufsz, struct cli
393 393
 	      free(usects);
394 394
 	      return 1;
395 395
 	    }
396
+	    if (backbytes >= INT_MAX / 2) {
397
+		    free(usects);
398
+		    cli_dbgmsg("Petite: probably invalid file\n");
399
+		    return 1;
400
+	    }
396 401
 	    backbytes = backbytes*2 + oob;
397 402
 	    if ( (oob = doubledl(&ssrc, &mydl, buf, bufsz)) == -1 ) {
398 403
 	      free(usects);
... ...
@@ -409,6 +414,11 @@ int petite_inflate2x_1to9(char *buf, uint32_t minrva, uint32_t bufsz, struct cli
409 409
 		free(usects);
410 410
 		return 1;
411 411
 	      }
412
+	      if (backbytes >= INT_MAX / 2) {
413
+		      free(usects);
414
+		      cli_dbgmsg("Petite: probably invalid file\n");
415
+		      return 1;
416
+	      }
412 417
 	      backbytes = backbytes*2 + oob;
413 418
 	      backsize--;
414 419
 	    } while (backsize);
... ...
@@ -146,63 +146,63 @@ int cli_rebuildpe_align(char *buffer, struct cli_exe_section *sections, int sect
146 146
   if(datasize > CLI_MAX_ALLOCATION)
147 147
     return 0;
148 148
 
149
-  if((pefile = (char *) cli_calloc(rawbase+datasize, 1))) {
150
-    memcpy(pefile, HEADERS, 0x148);
151
-
152
-    datasize = PESALIGN(rawbase, 0x1000);
153
-
154
-    fakepe = (struct IMAGE_PE_HEADER *)(pefile+0xd0);
155
-    fakepe->NumberOfSections = EC16(sects+gotghost);
156
-    fakepe->AddressOfEntryPoint = EC32(ep);
157
-    fakepe->ImageBase = EC32(base);
158
-    fakepe->SizeOfHeaders = EC32(rawbase);
159
-    memset(pefile+0x148, 0, 0x80);
160
-    cli_writeint32(pefile+0x148+0x10, ResRva);
161
-    cli_writeint32(pefile+0x148+0x14, ResSize);
162
-    curpe = pefile+0x148+0x80;
163
-
164
-    if (gotghost) {
149
+  pefile = (char *) cli_calloc(rawbase+datasize, 1);
150
+  if(!pefile)
151
+      return 0;
152
+
153
+  memcpy(pefile, HEADERS, 0x148);
154
+
155
+  datasize = PESALIGN(rawbase, 0x1000);
156
+
157
+  fakepe = (struct IMAGE_PE_HEADER *)(pefile+0xd0);
158
+  fakepe->NumberOfSections = EC16(sects+gotghost);
159
+  fakepe->AddressOfEntryPoint = EC32(ep);
160
+  fakepe->ImageBase = EC32(base);
161
+  fakepe->SizeOfHeaders = EC32(rawbase);
162
+  memset(pefile+0x148, 0, 0x80);
163
+  cli_writeint32(pefile+0x148+0x10, ResRva);
164
+  cli_writeint32(pefile+0x148+0x14, ResSize);
165
+  curpe = pefile+0x148+0x80;
166
+
167
+  if (gotghost) {
165 168
       snprintf(curpe, 8, "empty");
166 169
       cli_writeint32(curpe+8, sections[0].rva-datasize); /* vsize */
167 170
       cli_writeint32(curpe+12, datasize); /* rva */
168 171
       cli_writeint32(curpe+0x24, 0xffffffff);
169 172
       curpe+=40;
170 173
       datasize+=PESALIGN(sections[0].rva-datasize, 0x1000);
171
-    }
174
+  }
172 175
 
173
-    for (i=0; i < sects; i++) {
176
+  for (i=0; i < sects; i++) {
174 177
       snprintf(curpe, 8, ".clam%.2d", i+1);
175 178
       if (!align) {
176
-        cli_writeint32(curpe+8, sections[i].vsz);
177
-        cli_writeint32(curpe+12, sections[i].rva);
178
-        cli_writeint32(curpe+16, sections[i].rsz);
179
-        cli_writeint32(curpe+20, rawbase);
179
+          cli_writeint32(curpe+8, sections[i].vsz);
180
+          cli_writeint32(curpe+12, sections[i].rva);
181
+          cli_writeint32(curpe+16, sections[i].rsz);
182
+          cli_writeint32(curpe+20, rawbase);
180 183
       } else {
181
-        cli_writeint32(curpe+8, PESALIGN(sections[i].vsz, align));
182
-        cli_writeint32(curpe+12, PESALIGN(sections[i].rva, align));
183
-        cli_writeint32(curpe+16, PESALIGN(sections[i].rsz, align));
184
-        cli_writeint32(curpe+20, rawbase);
184
+          cli_writeint32(curpe+8, PESALIGN(sections[i].vsz, align));
185
+          cli_writeint32(curpe+12, PESALIGN(sections[i].rva, align));
186
+          cli_writeint32(curpe+16, PESALIGN(sections[i].rsz, align));
187
+          cli_writeint32(curpe+20, rawbase);
185 188
       }
186 189
       /* already zeroed
187
-      cli_writeint32(curpe+24, 0);
188
-      cli_writeint32(curpe+28, 0);
189
-      cli_writeint32(curpe+32, 0);
190
+         cli_writeint32(curpe+24, 0);
191
+         cli_writeint32(curpe+28, 0);
192
+         cli_writeint32(curpe+32, 0);
190 193
       */
191 194
       cli_writeint32(curpe+0x24, 0xffffffff);
192 195
       memcpy(pefile+rawbase, buffer+sections[i].raw, sections[i].rsz);
193 196
       curpe+=40;
194 197
       if (!align) {
195
-        rawbase+=PESALIGN(sections[i].rsz, 0x200);
196
-        datasize+=PESALIGN(sections[i].vsz, 0x1000);
198
+          rawbase+=PESALIGN(sections[i].rsz, 0x200);
199
+          datasize+=PESALIGN(sections[i].vsz, 0x1000);
197 200
       } else {
198
-        rawbase+=PESALIGN(PESALIGN(sections[i].rsz, align), 0x200);
199
-        datasize+=PESALIGN(PESALIGN(sections[i].vsz, align), 0x1000);
201
+          rawbase+=PESALIGN(PESALIGN(sections[i].rsz, align), 0x200);
202
+          datasize+=PESALIGN(PESALIGN(sections[i].vsz, align), 0x1000);
200 203
       }
201
-    }
202
-    fakepe->SizeOfImage = EC32(datasize);
203
-  } else {
204
-    return 0;
205 204
   }
205
+  fakepe->SizeOfImage = EC32(datasize);
206 206
 
207 207
   i = (cli_writen(file, pefile, rawbase)!=-1);
208 208
   free(pefile);
... ...
@@ -105,6 +105,7 @@
105 105
 #include "ooxml.h"
106 106
 #include "xdp.h"
107 107
 #include "json_api.h"
108
+#include "msxml.h"
108 109
 
109 110
 #ifdef HAVE_BZLIB_H
110 111
 #include <bzlib.h>
... ...
@@ -2212,6 +2213,12 @@ static int cli_scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_file_
2212 2212
                 case CL_TYPE_XDP:
2213 2213
                     ret = cli_scanxdp(ctx);
2214 2214
                     break;
2215
+                case CL_TYPE_XML_WORD:
2216
+                    ret = cli_scanmsxml(ctx);
2217
+                    break;
2218
+                case CL_TYPE_XML_XL:
2219
+                    ret = cli_scanmsxml(ctx);
2220
+                    break;
2215 2221
                 case CL_TYPE_RARSFX:
2216 2222
                     if(type != CL_TYPE_RAR && have_rar && SCAN_ARCHIVE && (DCONF_ARCH & ARCH_CONF_RAR)) {
2217 2223
                         char *tmpname = NULL;
... ...
@@ -2602,7 +2609,9 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
2602 2602
                 //type == CL_TYPE_ZIP ||
2603 2603
                 type == CL_TYPE_OOXML_WORD ||
2604 2604
                 type == CL_TYPE_OOXML_PPT ||
2605
-                type == CL_TYPE_OOXML_XL) { 
2605
+                type == CL_TYPE_OOXML_XL ||
2606
+                type == CL_TYPE_XML_WORD ||
2607
+                type == CL_TYPE_XML_XL) {
2606 2608
                 ctx->properties = json_object_new_object();
2607 2609
                 if (NULL == ctx->properties) {
2608 2610
                     cli_errmsg("magic_scandesc: no memory for json properties object\n");
... ...
@@ -2750,6 +2759,14 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
2750 2750
 	case CL_TYPE_IGNORED:
2751 2751
 	    break;
2752 2752
 
2753
+    case CL_TYPE_XML_WORD:
2754
+        ret = cli_scanmsxml(ctx);
2755
+        break;
2756
+
2757
+    case CL_TYPE_XML_XL:
2758
+        ret = cli_scanmsxml(ctx);
2759
+        break;
2760
+
2753 2761
     case CL_TYPE_XDP:
2754 2762
         ret = cli_scanxdp(ctx);
2755 2763
         break;
... ...
@@ -3467,15 +3484,46 @@ static int scan_common(int desc, cl_fmap_t *map, const char **virname, unsigned
3467 3467
             int ret = CL_SUCCESS;
3468 3468
             cli_dbgmsg("%s\n", jstring);
3469 3469
 
3470
-           /* Scan the json string unless a virus was detected */
3471 3470
             if (rc != CL_VIRUS) {
3472
-                ctx.options &= ~CL_SCAN_FILE_PROPERTIES;
3473
-                rc = cli_mem_scandesc(jstring, strlen(jstring), &ctx);
3471
+                /* run bytecode preclass hook; generate fmap if needed for running hook */
3472
+                struct cli_bc_ctx *bc_ctx = cli_bytecode_context_alloc();
3473
+                if (!bc_ctx) {
3474
+                    cli_errmsg("scan_common: can't allocate memory for bc_ctx\n");
3475
+                    rc = CL_EMEM;
3476
+                }
3477
+                else {
3478
+                    fmap_t *pc_map = map;
3479
+
3480
+                    if (!pc_map) {
3481
+                        perf_start(&ctx, PERFT_MAP);
3482
+                        if(!(pc_map = fmap(desc, 0, sb.st_size))) {
3483
+                            perf_stop(&ctx, PERFT_MAP);
3484
+                            rc = CL_EMEM;
3485
+                        }
3486
+                        perf_stop(&ctx, PERFT_MAP);
3487
+                    }
3488
+
3489
+                    if (pc_map) {
3490
+                        cli_bytecode_context_setctx(bc_ctx, &ctx);
3491
+                        rc = cli_bytecode_runhook(&ctx, ctx.engine, bc_ctx, BC_PRECLASS, pc_map);
3492
+                        cli_bytecode_context_destroy(bc_ctx);
3493
+
3494
+                        if (!map)
3495
+                            funmap(pc_map);
3496
+                    }
3497
+                }
3498
+
3499
+                /* backwards compatibility: scan the json string unless a virus was detected */
3500
+                if (rc != CL_VIRUS && ctx.engine->root[13]->ac_lsigs) {
3501
+                    cli_warnmsg("scan_common: running depeciated preclass bytecodes for target type 13\n");
3502
+                    ctx.options &= ~CL_SCAN_FILE_PROPERTIES;
3503
+                    rc = cli_mem_scandesc(jstring, strlen(jstring), &ctx);
3504
+                }
3474 3505
             }
3475 3506
 
3476 3507
             /* Invoke file props callback */
3477 3508
             if (ctx.engine->cb_file_props != NULL) {
3478
-                ret = ctx.engine->cb_file_props(jstring, rc, ctx.engine->cb_file_props_data);
3509
+                ret = ctx.engine->cb_file_props(jstring, rc, ctx.cb_ctx);
3479 3510
                 if (ret != CL_SUCCESS)
3480 3511
                     rc = ret;
3481 3512
             }
... ...
@@ -690,3 +690,49 @@ char *cli_utf16_to_utf8(const char *utf16, size_t length, utf16_type type)
690 690
     s2[j] = '\0';
691 691
     return s2;
692 692
 }
693
+
694
+int cli_isutf8(const char *buf, unsigned int len)
695
+{
696
+	unsigned int i, j;
697
+
698
+    for(i = 0; i < len; i++) {
699
+        if((buf[i] & 0x80) == 0) {  /* 0xxxxxxx is plain ASCII */
700
+            continue;
701
+        } else if((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
702
+            return 0;
703
+        } else {
704
+            unsigned int following;
705
+
706
+            if((buf[i] & 0x20) == 0) {		/* 110xxxxx */
707
+                /* c = buf[i] & 0x1f; */
708
+                following = 1;
709
+            } else if((buf[i] & 0x10) == 0) {	/* 1110xxxx */
710
+                /* c = buf[i] & 0x0f; */
711
+                following = 2;
712
+            } else if((buf[i] & 0x08) == 0) {	/* 11110xxx */
713
+                /* c = buf[i] & 0x07; */
714
+                following = 3;
715
+            } else if((buf[i] & 0x04) == 0) {	/* 111110xx */
716
+                /* c = buf[i] & 0x03; */
717
+                following = 4;
718
+            } else if((buf[i] & 0x02) == 0) {	/* 1111110x */
719
+                /* c = buf[i] & 0x01; */
720
+                following = 5;
721
+            } else {
722
+                return 0;
723
+            }
724
+
725
+            for(j = 0; j < following; j++) {
726
+                if(++i >= len)
727
+                    return 0;
728
+
729
+                if((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
730
+                    return 0;
731
+
732
+                /* c = (c << 6) + (buf[i] & 0x3f); */
733
+            }
734
+        }
735
+    }
736
+
737
+    return 1;
738
+}
... ...
@@ -64,5 +64,7 @@ typedef enum {
64 64
 } utf16_type;
65 65
 char *cli_utf16_to_utf8(const char *utf16, size_t length, utf16_type type);
66 66
 
67
+int cli_isutf8(const char *buf, unsigned int len);
68
+
67 69
 size_t cli_strlcat(char *dst, const char *src, size_t sz); /* libclamav/strlcat.c */
68 70
 #endif
... ...
@@ -39,7 +39,7 @@
39 39
 #include <sys/stat.h>
40 40
 #include <fcntl.h>
41 41
 #include <sys/stat.h>
42
-#ifdef	HAVE_UNISTD_H
42
+#ifdef        HAVE_UNISTD_H
43 43
 #include <unistd.h>
44 44
 #endif
45 45
 #include <time.h>
... ...
@@ -49,68 +49,69 @@
49 49
 #include "swf.h"
50 50
 #include "clamav.h"
51 51
 #include "scanners.h"
52
-
53
-#define EC16(v)	le16_to_host(v)
54
-#define EC32(v)	le32_to_host(v)
55
-
56
-#define INITBITS								\
57
-{										\
58
-    if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) {	\
59
-	bitpos = 8;								\
60
-	bitbuf = (unsigned int) get_c;						\
61
-	offset += sizeof(get_c);						\
62
-    } else {									\
63
-	cli_warnmsg("cli_scanswf: INITBITS: Can't read file or file truncated\n");	\
64
-	return CL_EFORMAT;							\
65
-    }										\
52
+#include "lzma_iface.h"
53
+
54
+#define EC16(v)        le16_to_host(v)
55
+#define EC32(v)        le32_to_host(v)
56
+
57
+#define INITBITS                                                                \
58
+{                                                                               \
59
+    if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) {       \
60
+        bitpos = 8;                                                             \
61
+        bitbuf = (unsigned int) get_c;                                          \
62
+        offset += sizeof(get_c);                                                \
63
+    } else {                                                                    \
64
+        cli_warnmsg("cli_scanswf: INITBITS: Can't read file or file truncated\n"); \
65
+        return CL_EFORMAT;                                                      \
66
+    }                                                                           \
66 67
 }
67 68
 
68
-#define GETBITS(v, n)								\
69
-{										\
70
-    getbits_n = n;								\
71
-    bits = 0;									\
72
-    while(getbits_n > bitpos) {							\
73
-	getbits_n -= bitpos;							\
74
-	bits |= bitbuf << getbits_n;						\
75
-	if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) {	\
76
-	    bitbuf = (unsigned int) get_c;					\
77
-	    bitpos = 8;								\
78
-	    offset += sizeof(get_c);						\
79
-	} else {								\
80
-	    cli_warnmsg("cli_scanswf: GETBITS: Can't read file or file truncated\n");	\
81
-	    return CL_EFORMAT;							\
82
-	}									\
83
-    }										\
84
-    bitpos -= getbits_n;							\
85
-    bits |= bitbuf >> bitpos;							\
86
-    bitbuf &= 0xff >> (8 - bitpos);						\
87
-    v = bits & 0xffff;								\
69
+#define GETBITS(v, n)                                                           \
70
+{                                                                               \
71
+    getbits_n = n;                                                              \
72
+    bits = 0;                                                                   \
73
+    while(getbits_n > bitpos) {                                                 \
74
+        getbits_n -= bitpos;                                                    \
75
+        bits |= bitbuf << getbits_n;                                            \
76
+        if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) {   \
77
+            bitbuf = (unsigned int) get_c;                                      \
78
+            bitpos = 8;                                                         \
79
+            offset += sizeof(get_c);                                            \
80
+        } else {                                                                \
81
+            cli_warnmsg("cli_scanswf: GETBITS: Can't read file or file truncated\n"); \
82
+            return CL_EFORMAT;                                                  \
83
+        }                                                                       \
84
+    }                                                                           \
85
+    bitpos -= getbits_n;                                                        \
86
+    bits |= bitbuf >> bitpos;                                                   \
87
+    bitbuf &= 0xff >> (8 - bitpos);                                             \
88
+    v = bits & 0xffff;                                                          \
88 89
 }
89 90
 
90
-#define GETWORD(v)								\
91
-{										\
92
-    if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) {	\
93
-	getword_1 = (unsigned int) get_c;					\
94
-	offset += sizeof(get_c);						\
95
-    } else {									\
96
-	cli_warnmsg("cli_scanswf: GETWORD: Can't read file or file truncated\n");	\
97
-	return CL_EFORMAT;							\
98
-    }										\
99
-    if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) {	\
100
-	getword_2 = (unsigned int) get_c;					\
101
-	offset += sizeof(get_c);						\
102
-    } else {									\
103
-	cli_warnmsg("cli_scanswf: GETWORD: Can't read file or file truncated\n");	\
104
-	return CL_EFORMAT;							\
105
-    }										\
106
-    v = (uint16_t)(getword_1 & 0xff) | ((getword_2 & 0xff) << 8);		\
91
+#define GETWORD(v)                                                              \
92
+{                                                                               \
93
+    if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) {       \
94
+        getword_1 = (unsigned int) get_c;                                       \
95
+        offset += sizeof(get_c);                                                \
96
+    } else {                                                                    \
97
+        cli_warnmsg("cli_scanswf: GETWORD: Can't read file or file truncated\n"); \
98
+        return CL_EFORMAT;                                                      \
99
+    }                                                                           \
100
+    if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) {       \
101
+        getword_2 = (unsigned int) get_c;                                       \
102
+        offset += sizeof(get_c);                                                \
103
+    } else {                                                                    \
104
+        cli_warnmsg("cli_scanswf: GETWORD: Can't read file or file truncated\n"); \
105
+        return CL_EFORMAT;                                                      \
106
+    }                                                                           \
107
+    v = (uint16_t)(getword_1 & 0xff) | ((getword_2 & 0xff) << 8);               \
107 108
 }
108 109
 
109
-#define GETDWORD(v)								\
110
-{										\
111
-    GETWORD(getdword_1);							\
112
-    GETWORD(getdword_2);							\
113
-    v = (uint32_t)(getdword_1 | (getdword_2 << 16));				\
110
+#define GETDWORD(v)                                                             \
111
+{                                                                               \
112
+    GETWORD(getdword_1);                                                        \
113
+    GETWORD(getdword_2);                                                        \
114
+    v = (uint32_t)(getdword_1 | (getdword_2 << 16));                            \
114 115
 }
115 116
 
116 117
 struct swf_file_hdr {
... ...
@@ -119,30 +120,200 @@ struct swf_file_hdr {
119 119
     uint32_t filesize;
120 120
 };
121 121
 
122
+static int scanzws(cli_ctx *ctx, struct swf_file_hdr *hdr)
123
+{
124
+        struct CLI_LZMA lz;
125
+        unsigned char inbuff[FILEBUFF], outbuff[FILEBUFF];
126
+        fmap_t *map = *ctx->fmap;
127
+        /* strip off header */
128
+        off_t offset = 8;
129
+        uint32_t d_insize;
130
+        size_t outsize = 8;
131
+        int ret, lret, count;
132
+        char *tmpname;
133
+        int fd;
134
+
135
+    if((ret = cli_gentempfd(ctx->engine->tmpdir, &tmpname, &fd)) != CL_SUCCESS) {
136
+        cli_errmsg("scanzws: Can't generate temporary file\n");
137
+        return ret;
138
+    }
139
+
140
+    hdr->signature[0] = 'F';
141
+    if(cli_writen(fd, hdr, sizeof(struct swf_file_hdr)) != sizeof(struct swf_file_hdr)) {
142
+        cli_errmsg("scanzws: Can't write to file %s\n", tmpname);
143
+        close(fd);
144
+        if(cli_unlink(tmpname)) {
145
+            free(tmpname);
146
+            return CL_EUNLINK;
147
+        }
148
+        free(tmpname);
149
+        return CL_EWRITE;
150
+    }
151
+
152
+    /* read 4 bytes (for compressed 32-bit filesize) [not used for LZMA] */
153
+    if (fmap_readn(map, &d_insize, offset, sizeof(d_insize)) != sizeof(d_insize)) {
154
+        cli_errmsg("scanzws: Error reading SWF file\n");
155
+        close(fd);
156
+        if (cli_unlink(tmpname)) {
157
+            free(tmpname);
158
+            return CL_EUNLINK;
159
+        }
160
+        free(tmpname);
161
+        return CL_EREAD;
162
+    }
163
+    offset += sizeof(d_insize);
164
+
165
+    /* check if declared input size matches actual output size */
166
+    /* map->len = header (8 bytes) + d_insize (4 bytes) + flags (5 bytes) + compressed stream */
167
+    if (d_insize != (map->len - 17)) {
168
+        cli_warnmsg("SWF: declared input length != compressed stream size, %u != %llu\n",
169
+                    d_insize, (long long unsigned)(map->len - 17));
170
+    } else {
171
+        cli_dbgmsg("SWF: declared input length == compressed stream size, %u == %llu\n",
172
+                    d_insize, (long long unsigned)(map->len - 17));
173
+    }
174
+
175
+    /* first buffer required for initializing LZMA */
176
+    ret = fmap_readn(map, inbuff, offset, FILEBUFF);
177
+    if (ret < 0) {
178
+        cli_errmsg("scanzws: Error reading SWF file\n");
179
+        close(fd);
180
+        if (cli_unlink(tmpname)) {
181
+            free(tmpname);
182
+            return CL_EUNLINK;
183
+        }
184
+        free(tmpname);
185
+        return CL_EUNPACK;
186
+    }
187
+    if (!ret)
188
+        return CL_EFORMAT; /* likely truncated */
189
+    offset += ret;
190
+
191
+    memset(&lz, 0, sizeof(lz));
192
+    lz.next_in = inbuff;
193
+    lz.next_out = outbuff;
194
+    lz.avail_in = ret;
195
+    lz.avail_out = FILEBUFF;
196
+
197
+    lret = cli_LzmaInit(&lz, hdr->filesize);
198
+    if (lret != LZMA_RESULT_OK) {
199
+        cli_errmsg("scanzws: LzmaInit() failed\n");
200
+        close(fd);
201
+        if (cli_unlink(tmpname)) {
202
+            free(tmpname);
203
+            return CL_EUNLINK;
204
+        }
205
+        free(tmpname);
206
+        return CL_EUNPACK;
207
+    }
208
+
209
+    while (lret == LZMA_RESULT_OK) {
210
+        if (lz.avail_in == 0) {
211
+            lz.next_in = inbuff;
212
+
213
+            ret = fmap_readn(map, inbuff, offset, FILEBUFF);
214
+            if (ret < 0) {
215
+                cli_errmsg("scanzws: Error reading SWF file\n");
216
+                cli_LzmaShutdown(&lz);
217
+                close(fd);
218
+                if (cli_unlink(tmpname)) {
219
+                    free(tmpname);
220
+                    return CL_EUNLINK;
221
+                }
222
+                free(tmpname);
223
+                return CL_EUNPACK;
224
+            }
225
+            if (!ret)
226
+                break;
227
+            lz.avail_in = ret;
228
+            offset += ret;
229
+        }
230
+        lret = cli_LzmaDecode(&lz);
231
+        count = FILEBUFF - lz.avail_out;
232
+        if (count) {
233
+            if (cli_checklimits("SWF", ctx, outsize + count, 0, 0) != CL_SUCCESS)
234
+                break;
235
+            if (cli_writen(fd, outbuff, count) != count) {
236
+                cli_errmsg("scanzws: Can't write to file %s\n", tmpname);
237
+                cli_LzmaShutdown(&lz);
238
+                close(fd);
239
+                if (cli_unlink(tmpname)) {
240
+                    free(tmpname);
241
+                    return CL_EUNLINK;
242
+                }
243
+                free(tmpname);
244
+                return CL_EWRITE;
245
+            }
246
+            outsize += count;
247
+        }
248
+        lz.next_out = outbuff;
249
+        lz.avail_out = FILEBUFF;
250
+    }
251
+
252
+    cli_LzmaShutdown(&lz);
253
+
254
+    if (lret != LZMA_STREAM_END && lret != LZMA_RESULT_OK) {
255
+        /* outsize starts at 8, therefore, if its still 8, nothing was decompressed */
256
+        if (outsize == 8) {
257
+            cli_infomsg(ctx, "scanzws: Error decompressing SWF file. No data decompressed.\n");
258
+            close(fd);
259
+            if (cli_unlink(tmpname)) {
260
+                free(tmpname);
261
+                return CL_EUNLINK;
262
+            }
263
+            free(tmpname);
264
+            return CL_EUNPACK;
265
+        }
266
+        cli_infomsg(ctx, "scanzws: Error decompressing SWF file. Scanning what was decompressed.\n");
267
+    }
268
+    cli_dbgmsg("SWF: Decompressed[LZMA] to %s, size %d\n", tmpname, outsize);
269
+
270
+    /* check if declared output size matches actual output size */
271
+    if (hdr->filesize != outsize) {
272
+        cli_warnmsg("SWF: declared output length != inflated stream size, %u != %llu\n",
273
+                    hdr->filesize, (long long unsigned)outsize);
274
+    } else {
275
+        cli_dbgmsg("SWF: declared output length == inflated stream size, %u == %llu\n",
276
+                   hdr->filesize, (long long unsigned)outsize);
277
+    }
278
+
279
+    ret = cli_magic_scandesc(fd, ctx);
280
+
281
+    close(fd);
282
+    if (!(ctx->engine->keeptmp)) {
283
+        if (cli_unlink(tmpname)) {
284
+            free(tmpname);
285
+            return CL_EUNLINK;
286
+        }
287
+    }
288
+    free(tmpname);
289
+    return ret;
290
+}
291
+
122 292
 static int scancws(cli_ctx *ctx, struct swf_file_hdr *hdr)
123 293
 {
124
-	z_stream stream;
125
-	char inbuff[FILEBUFF], outbuff[FILEBUFF];
126
-	fmap_t *map = *ctx->fmap;
127
-	int offset = 8, ret, zret, outsize = 8, count, zend;
128
-	char *tmpname;
129
-	int fd;
294
+        z_stream stream;
295
+        char inbuff[FILEBUFF], outbuff[FILEBUFF];
296
+        fmap_t *map = *ctx->fmap;
297
+        int offset = 8, ret, zret, outsize = 8, count, zend;
298
+        char *tmpname;
299
+        int fd;
130 300
 
131 301
     if((ret = cli_gentempfd(ctx->engine->tmpdir, &tmpname, &fd)) != CL_SUCCESS) {
132
-	cli_errmsg("scancws: Can't generate temporary file\n");
133
-	return ret;
302
+        cli_errmsg("scancws: Can't generate temporary file\n");
303
+        return ret;
134 304
     }
135 305
 
136 306
     hdr->signature[0] = 'F';
137 307
     if(cli_writen(fd, hdr, sizeof(struct swf_file_hdr)) != sizeof(struct swf_file_hdr)) {
138
-	cli_errmsg("scancws: Can't write to file %s\n", tmpname);
308
+        cli_errmsg("scancws: Can't write to file %s\n", tmpname);
139 309
         close(fd);
140
-	if(cli_unlink(tmpname)) {
141
-	    free(tmpname);
142
-	    return CL_EUNLINK;
143
-	}
144
-	free(tmpname);
145
-	return CL_EWRITE;
310
+        if(cli_unlink(tmpname)) {
311
+            free(tmpname);
312
+            return CL_EUNLINK;
313
+        }
314
+        free(tmpname);
315
+        return CL_EWRITE;
146 316
     }
147 317
 
148 318
     stream.avail_in = 0;
... ...
@@ -155,56 +326,56 @@ static int scancws(cli_ctx *ctx, struct swf_file_hdr *hdr)
155 155
 
156 156
     zret = inflateInit(&stream);
157 157
     if(zret != Z_OK) {
158
-	cli_errmsg("scancws: inflateInit() failed\n");
158
+        cli_errmsg("scancws: inflateInit() failed\n");
159 159
         close(fd);
160
-	if(cli_unlink(tmpname)) {
161
-	    free(tmpname);
162
-	    return CL_EUNLINK;
163
-	}
164
-	free(tmpname);
165
-	return CL_EUNPACK;
160
+        if(cli_unlink(tmpname)) {
161
+            free(tmpname);
162
+            return CL_EUNLINK;
163
+        }
164
+        free(tmpname);
165
+        return CL_EUNPACK;
166 166
     }
167 167
 
168 168
     do {
169
-	if(stream.avail_in == 0) {
170
-	    stream.next_in = (Bytef *)inbuff;
171
-	    ret = fmap_readn(map, inbuff, offset, FILEBUFF);
172
-	    if(ret < 0) {
173
-		cli_errmsg("scancws: Error reading SWF file\n");
174
-		close(fd);
175
-		if(cli_unlink(tmpname)) {
176
-		    free(tmpname);
177
-            inflateEnd(&stream);
178
-		    return CL_EUNLINK;
179
-		}
180
-		free(tmpname);
181
-        inflateEnd(&stream);
182
-		return CL_EUNPACK;
183
-	    }
184
-	    if(!ret)
185
-		break;
186
-	    stream.avail_in = ret;
187
-	    offset += ret;
188
-	}
189
-	zret = inflate(&stream, Z_SYNC_FLUSH);
190
-	count = FILEBUFF - stream.avail_out;
191
-	if(count) {
192
-	    if(cli_checklimits("SWF", ctx, outsize + count, 0, 0) != CL_SUCCESS)
193
-		break;
194
-	    if(cli_writen(fd, outbuff, count) != count) {
195
-		cli_errmsg("scancws: Can't write to file %s\n", tmpname);
196
-		close(fd);
197
-		if(cli_unlink(tmpname)) {
198
-		    free(tmpname);
199
-		    return CL_EUNLINK;
200
-		}
201
-		free(tmpname);
202
-		return CL_EWRITE;
203
-	    }
204
-	    outsize += count;
205
-	}
206
-	stream.next_out = (Bytef *)outbuff;
207
-	stream.avail_out = FILEBUFF;
169
+        if(stream.avail_in == 0) {
170
+            stream.next_in = (Bytef *)inbuff;
171
+            ret = fmap_readn(map, inbuff, offset, FILEBUFF);
172
+            if(ret < 0) {
173
+                cli_errmsg("scancws: Error reading SWF file\n");
174
+                close(fd);
175
+                inflateEnd(&stream);
176
+                if(cli_unlink(tmpname)) {
177
+                    free(tmpname);
178
+                    return CL_EUNLINK;
179
+                }
180
+                free(tmpname);
181
+                return CL_EUNPACK;
182
+            }
183
+            if(!ret)
184
+                break;
185
+            stream.avail_in = ret;
186
+            offset += ret;
187
+        }
188
+        zret = inflate(&stream, Z_SYNC_FLUSH);
189
+        count = FILEBUFF - stream.avail_out;
190
+        if(count) {
191
+            if(cli_checklimits("SWF", ctx, outsize + count, 0, 0) != CL_SUCCESS)
192
+                break;
193
+            if(cli_writen(fd, outbuff, count) != count) {
194
+                cli_errmsg("scancws: Can't write to file %s\n", tmpname);
195
+                inflateEnd(&stream);
196
+                close(fd);
197
+                if(cli_unlink(tmpname)) {
198
+                    free(tmpname);
199
+                    return CL_EUNLINK;
200
+                }
201
+                free(tmpname);
202
+                return CL_EWRITE;
203
+            }
204
+            outsize += count;
205
+        }
206
+        stream.next_out = (Bytef *)outbuff;
207
+        stream.avail_out = FILEBUFF;
208 208
     } while(zret == Z_OK);
209 209
 
210 210
     zend = inflateEnd(&stream);
... ...
@@ -226,16 +397,25 @@ static int scancws(cli_ctx *ctx, struct swf_file_hdr *hdr)
226 226
         }
227 227
         cli_infomsg(ctx, "scancws: Error decompressing SWF file. Scanning what was decompressed.\n");
228 228
     }
229
-    cli_dbgmsg("SWF: Decompressed to %s, size %d\n", tmpname, outsize);
229
+    cli_dbgmsg("SWF: Decompressed[zlib] to %s, size %d\n", tmpname, outsize);
230
+
231
+    /* check if declared output size matches actual output size */
232
+    if (hdr->filesize != outsize) {
233
+        cli_warnmsg("SWF: declared output length != inflated stream size, %u != %llu\n",
234
+                    hdr->filesize, (long long unsigned)outsize);
235
+    } else {
236
+        cli_dbgmsg("SWF: declared output length == inflated stream size, %u == %llu\n",
237
+                   hdr->filesize, (long long unsigned)outsize);
238
+    }
230 239
 
231 240
     ret = cli_magic_scandesc(fd, ctx);
232 241
 
233 242
     close(fd);
234 243
     if(!ctx->engine->keeptmp) {
235
-	if(cli_unlink(tmpname)) {
236
-	    free(tmpname);
237
-	    return CL_EUNLINK;
238
-	}
244
+        if(cli_unlink(tmpname)) {
245
+            free(tmpname);
246
+            return CL_EUNLINK;
247
+        }
239 248
     }
240 249
     free(tmpname);
241 250
     return ret;
... ...
@@ -243,11 +423,11 @@ static int scancws(cli_ctx *ctx, struct swf_file_hdr *hdr)
243 243
 
244 244
 static const char *tagname(tag_id id)
245 245
 {
246
-	unsigned int i;
246
+        unsigned int i;
247 247
 
248 248
     for(i = 0; tag_names[i].name; i++)
249
-	if(tag_names[i].id == id)
250
-	    return tag_names[i].name;
249
+        if(tag_names[i].id == id)
250
+            return tag_names[i].name;
251 251
     return NULL;
252 252
 }
253 253
 
... ...
@@ -265,19 +445,22 @@ int cli_scanswf(cli_ctx *ctx)
265 265
     cli_dbgmsg("in cli_scanswf()\n");
266 266
 
267 267
     if(fmap_readn(map, &file_hdr, offset, sizeof(file_hdr)) != sizeof(file_hdr)) {
268
-	cli_dbgmsg("SWF: Can't read file header\n");
269
-	return CL_CLEAN;
268
+        cli_dbgmsg("SWF: Can't read file header\n");
269
+        return CL_CLEAN;
270 270
     }
271 271
     offset += sizeof(file_hdr);
272 272
 
273 273
     if(!strncmp(file_hdr.signature, "CWS", 3)) {
274
-	cli_dbgmsg("SWF: Compressed file\n");
275
-	return scancws(ctx, &file_hdr);
274
+        cli_dbgmsg("SWF: zlib compressed file\n");
275
+        return scancws(ctx, &file_hdr);
276
+    } else if(!strncmp(file_hdr.signature, "ZWS", 3)) {
277
+        cli_dbgmsg("SWF: LZMA compressed file\n");
278
+        return scanzws(ctx, &file_hdr);
276 279
     } else if(!strncmp(file_hdr.signature, "FWS", 3)) {
277
-	cli_dbgmsg("SWF: Uncompressed file\n");
280
+        cli_dbgmsg("SWF: Uncompressed file\n");
278 281
     } else {
279
-	cli_dbgmsg("SWF: Not a SWF file\n");
280
-	return CL_CLEAN;
282
+        cli_dbgmsg("SWF: Not a SWF file\n");
283
+        return CL_CLEAN;
281 284
     }
282 285
 
283 286
     cli_dbgmsg("SWF: Version: %u\n", file_hdr.version);
... ...
@@ -306,62 +489,62 @@ int cli_scanswf(cli_ctx *ctx)
306 306
     }
307 307
 
308 308
     while(offset < map->len) {
309
-	GETWORD(tag_hdr);
310
-	tag_type = tag_hdr >> 6;
311
-	if(tag_type == 0)
312
-	    break;
313
-	tag_len = tag_hdr & 0x3f;
314
-	if(tag_len == 0x3f)
315
-	    GETDWORD(tag_len);
316
-
317
-	pt = tagname(tag_type);
318
-	cli_dbgmsg("SWF: %s\n", pt ? pt : "UNKNOWN TAG");
319
-	cli_dbgmsg("SWF: Tag length: %u\n", tag_len);
320
-	if (tag_len > map->len) {
321
-	    cli_dbgmsg("SWF: Invalid tag length.\n");
322
-	    return CL_EFORMAT;
323
-	}
324
-	if ((offset + tag_len) < offset) {
325
-	    cli_warnmsg("SWF: Tag length too large.\n");
326
-	    break;
327
-	}
328
-	if(!pt) {
329
-	    offset += tag_len;
330
-	    continue;
331
-	}
332
-
333
-	switch(tag_type) {
334
-	    case TAG_SCRIPTLIMITS: {
335
-		unsigned int recursion, timeout;
336
-		GETWORD(recursion);
337
-		GETWORD(timeout);
338
-		cli_dbgmsg("SWF: scriptLimits recursion %u timeout %u\n", recursion, timeout);
339
-		break;
340
-	    }
341
-
342
-	    case TAG_FILEATTRIBUTES:
343
-		GETDWORD(val);
344
-		cli_dbgmsg("SWF: File attributes:\n");
345
-		if(val & SWF_ATTR_USENETWORK)
346
-		    cli_dbgmsg("    * Use network\n");
347
-		if(val & SWF_ATTR_RELATIVEURLS)
348
-		    cli_dbgmsg("    * Relative URLs\n");
349
-		if(val & SWF_ATTR_SUPPRESSCROSSDOMAINCACHE)
350
-		    cli_dbgmsg("    * Suppress cross domain cache\n");
351
-		if(val & SWF_ATTR_ACTIONSCRIPT3)
352
-		    cli_dbgmsg("    * ActionScript 3.0\n");
353
-		if(val & SWF_ATTR_HASMETADATA)
354
-		    cli_dbgmsg("    * Has metadata\n");
355
-		if(val & SWF_ATTR_USEDIRECTBLIT)
356
-		    cli_dbgmsg("    * Use hardware acceleration\n");
357
-		if(val & SWF_ATTR_USEGPU)
358
-		    cli_dbgmsg("    * Use GPU\n");
359
-		break;
360
-
361
-	    default:
362
-		offset += tag_len;
363
-		continue;
364
-	}
309
+        GETWORD(tag_hdr);
310
+        tag_type = tag_hdr >> 6;
311
+        if(tag_type == 0)
312
+            break;
313
+        tag_len = tag_hdr & 0x3f;
314
+        if(tag_len == 0x3f)
315
+            GETDWORD(tag_len);
316
+
317
+        pt = tagname(tag_type);
318
+        cli_dbgmsg("SWF: %s\n", pt ? pt : "UNKNOWN TAG");
319
+        cli_dbgmsg("SWF: Tag length: %u\n", tag_len);
320
+        if (tag_len > map->len) {
321
+            cli_dbgmsg("SWF: Invalid tag length.\n");
322
+            return CL_EFORMAT;
323
+        }
324
+        if ((offset + tag_len) < offset) {
325
+            cli_warnmsg("SWF: Tag length too large.\n");
326
+            break;
327
+        }
328
+        if(!pt) {
329
+            offset += tag_len;
330
+            continue;
331
+        }
332
+
333
+        switch(tag_type) {
334
+            case TAG_SCRIPTLIMITS: {
335
+                unsigned int recursion, timeout;
336
+                GETWORD(recursion);
337
+                GETWORD(timeout);
338
+                cli_dbgmsg("SWF: scriptLimits recursion %u timeout %u\n", recursion, timeout);
339
+                break;
340
+            }
341
+
342
+            case TAG_FILEATTRIBUTES:
343
+                GETDWORD(val);
344
+                cli_dbgmsg("SWF: File attributes:\n");
345
+                if(val & SWF_ATTR_USENETWORK)
346
+                    cli_dbgmsg("    * Use network\n");
347
+                if(val & SWF_ATTR_RELATIVEURLS)
348
+                    cli_dbgmsg("    * Relative URLs\n");
349
+                if(val & SWF_ATTR_SUPPRESSCROSSDOMAINCACHE)
350
+                    cli_dbgmsg("    * Suppress cross domain cache\n");
351
+                if(val & SWF_ATTR_ACTIONSCRIPT3)
352
+                    cli_dbgmsg("    * ActionScript 3.0\n");
353
+                if(val & SWF_ATTR_HASMETADATA)
354
+                    cli_dbgmsg("    * Has metadata\n");
355
+                if(val & SWF_ATTR_USEDIRECTBLIT)
356
+                    cli_dbgmsg("    * Use hardware acceleration\n");
357
+                if(val & SWF_ATTR_USEGPU)
358
+                    cli_dbgmsg("    * Use GPU\n");
359
+                break;
360
+
361
+            default:
362
+                offset += tag_len;
363
+                continue;
364
+        }
365 365
     }
366 366
 
367 367
     return CL_CLEAN;
... ...
@@ -302,6 +302,8 @@ int unupack(int upack, char *dest, uint32_t dsize, char *buff, uint32_t vma, uin
302 302
 			loc_esi += 4;
303 303
 			cli_dbgmsg("Upack: ecx counter: %08x\n", j);
304 304
 
305
+			if (((uint64_t)count+j) * 4 > UINT_MAX)
306
+				return -1;
305 307
 			if (!CLI_ISCONTAINED(dest, dsize, loc_esi, (j*4)) || !CLI_ISCONTAINED(dest, dsize, loc_edi, ((j+count)*4)))
306 308
 				return -1;
307 309
 			for (;j--; loc_edi+=4, loc_esi+=4)
... ...
@@ -359,6 +361,8 @@ int unupack(int upack, char *dest, uint32_t dsize, char *buff, uint32_t vma, uin
359 359
 			loc_edi += 4;
360 360
 			loc_ebx = loc_edi;
361 361
 		
362
+			if (((uint64_t)count+6) * 4 > UINT_MAX)
363
+				return -1;
362 364
 			if (!CLI_ISCONTAINED(dest, dsize, loc_edi, ((6+count)*4)))
363 365
 				return -1;
364 366
 			cli_writeint32(loc_edi, 0xffffffff);
... ...
@@ -432,6 +436,13 @@ int unupack(int upack, char *dest, uint32_t dsize, char *buff, uint32_t vma, uin
432 432
 	section.rsz = end_edi-loc_edi;
433 433
 	section.vsz = end_edi-loc_edi;
434 434
 
435
+	/* bb#11282 - prevent dest+va/dest from passing an invalid dereference to cli_rebuildpe */
436
+	/* check should trigger on broken PE files where the section exists outside of the file */
437
+	if ((!upack && ((va + section.rsz) > dsize)) || (upack && (section.rsz > dsize))) {
438
+		cli_dbgmsg("Upack: Rebuilt section exceeds allocated buffer; breaks cli_rebuildpe() bb#11282\n");
439
+		return 0;
440
+	}
441
+
435 442
 	if (!cli_rebuildpe(dest + (upack?0:va), &section, 1, base, original_ep, 0, 0, file)) {
436 443
 		cli_dbgmsg("Upack: Rebuilding failed\n");
437 444
 		return 0;
... ...
@@ -128,13 +128,13 @@ static int pefromupx (const char *src, uint32_t ssize, char *dst, uint32_t *dsiz
128 128
     return 0;
129 129
 
130 130
   while ((valign=magic[sectcnt++])) {
131
-    if ( ep - upx1 + valign <= ssize-5  &&    /* Wondering how we got so far?! */
131
+    if (CLI_ISCONTAINED(src, ssize - 5, src + ep - upx1 + valign - 2, 2) &&
132 132
 	 src[ep - upx1 + valign - 2] == '\x8d' && /* lea edi, ...                  */
133 133
 	 src[ep - upx1 + valign - 1] == '\xbe' )  /* ... [esi + offset]          */
134 134
       break;
135 135
   }
136 136
 
137
-  if (!valign && ep - upx1 + 0x80 < ssize-8) {
137
+  if (!valign && CLI_ISCONTAINED(src, ssize - 8, src + ep - upx1 + 0x80, 8)) {
138 138
     const char *pt = &src[ep - upx1 + 0x80];
139 139
     cli_dbgmsg("UPX: bad magic - scanning for imports\n");
140 140
     
... ...
@@ -186,7 +186,8 @@ static int xar_get_toc_data_values(xmlTextReaderPtr reader, long *length, long *
186 186
                 cli_dbgmsg("cli_scanxar: <archived-checksum>:\n");
187 187
                 xar_get_checksum_values(reader, a_cksum, a_hash);
188 188
                 
189
-            } else if (xmlStrEqual(name, (const xmlChar *)"extracted-checksum") &&
189
+            } else if ((xmlStrEqual(name, (const xmlChar *)"extracted-checksum") ||
190
+                        xmlStrEqual(name, (const xmlChar *)"unarchived-checksum")) &&
190 191
                        xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
191 192
                 cli_dbgmsg("cli_scanxar: <extracted-checksum>:\n");
192 193
                 xar_get_checksum_values(reader, e_cksum, e_hash);
... ...
@@ -561,11 +562,7 @@ int cli_scanxar(cli_ctx *ctx)
561 561
 
562 562
 
563 563
         a_hash_ctx = xar_hash_init(a_hash, &a_sc, &a_mc);
564
-        if (a_hash_ctx == NULL)
565
-            goto exit_tmpfile;
566 564
         e_hash_ctx = xar_hash_init(e_hash, &e_sc, &e_mc);
567
-        if (e_hash_ctx == NULL)
568
-            goto exit_tmpfile;
569 565
 
570 566
         switch (encoding) {
571 567
         case CL_TYPE_GZ:
... ...
@@ -606,7 +603,8 @@ int cli_scanxar(cli_ctx *ctx)
606 606
 
607 607
                     bytes = sizeof(buff) - strm.avail_out;
608 608
 
609
-                    xar_hash_update(e_hash_ctx, buff, bytes, e_hash);
609
+                    if (e_hash_ctx != NULL)
610
+                        xar_hash_update(e_hash_ctx, buff, bytes, e_hash);
610 611
                    
611 612
                     if (cli_writen(fd, buff, bytes) < 0) {
612 613
                         cli_dbgmsg("cli_scanxar: cli_writen error file %s.\n", tmpname);
... ...
@@ -627,7 +625,8 @@ int cli_scanxar(cli_ctx *ctx)
627 627
                     break;
628 628
 
629 629
                 avail_in -= strm.avail_in;
630
-                xar_hash_update(a_hash_ctx, next_in, avail_in, a_hash);
630
+                if (a_hash_ctx != NULL)
631
+                    xar_hash_update(a_hash_ctx, next_in, avail_in, a_hash);
631 632
             }
632 633
 
633 634
             inflateEnd(&strm);
... ...
@@ -665,7 +664,8 @@ int cli_scanxar(cli_ctx *ctx)
665 665
                 lz.next_in = blockp;
666 666
                 lz.avail_in = CLI_LZMA_HDR_SIZE;
667 667
 
668
-                xar_hash_update(a_hash_ctx, blockp, CLI_LZMA_HDR_SIZE, a_hash);
668
+                if (a_hash_ctx != NULL)
669
+                    xar_hash_update(a_hash_ctx, blockp, CLI_LZMA_HDR_SIZE, a_hash);
669 670
 
670 671
                 lret = cli_LzmaInit(&lz, 0);
671 672
                 if (lret != LZMA_RESULT_OK) {
... ...
@@ -716,8 +716,10 @@ int cli_scanxar(cli_ctx *ctx)
716 716
                         cli_dbgmsg("cli_scanxar: cli_LzmaDecode() produces no output for "
717 717
                                    "avail_in %lu, avail_out %lu.\n", avail_in, avail_out);
718 718
 
719
-                    xar_hash_update(a_hash_ctx, next_in, in_consumed, a_hash);                    
720
-                    xar_hash_update(e_hash_ctx, buff, avail_out, e_hash);
719
+                    if (a_hash_ctx != NULL)
720
+                        xar_hash_update(a_hash_ctx, next_in, in_consumed, a_hash);                    
721
+                    if (e_hash_ctx != NULL)
722
+                        xar_hash_update(e_hash_ctx, buff, avail_out, e_hash);
721 723
 
722 724
                     /* Write a decompressed block. */
723 725
                     /* cli_dbgmsg("Writing %li bytes to LZMA decompress temp file, " */
... ...
@@ -770,7 +772,8 @@ int cli_scanxar(cli_ctx *ctx)
770 770
                     goto exit_tmpfile;
771 771
                 }
772 772
                 
773
-                xar_hash_update(a_hash_ctx, blockp, length, a_hash);
773
+                if (a_hash_ctx != NULL)
774
+                    xar_hash_update(a_hash_ctx, blockp, length, a_hash);
774 775
                 
775 776
                 if (cli_writen(fd, blockp, write_len) < 0) {
776 777
                     cli_dbgmsg("cli_scanxar: cli_writen error %li bytes @ %li.\n", length, at);
... ...
@@ -782,25 +785,36 @@ int cli_scanxar(cli_ctx *ctx)
782 782
         }
783 783
 
784 784
         if (rc == CL_SUCCESS) {
785
-            xar_hash_final(a_hash_ctx, result, a_hash);
786
-            a_hash_ctx = NULL;
785
+            if (a_hash_ctx != NULL) {
786
+                xar_hash_final(a_hash_ctx, result, a_hash);
787
+                a_hash_ctx = NULL;
788
+            } else {
789
+                cli_dbgmsg("cli_scanxar: archived-checksum missing.\n");
790
+                cksum_fails++;
791
+            }
787 792
             if (a_cksum != NULL) {
788 793
                 expected = cli_hex2str((char *)a_cksum);
789 794
                 if (xar_hash_check(a_hash, result, expected) != 0) {
790
-                    cli_dbgmsg("cli_scanxar: archived-checksum missing or mismatch.\n");
795
+                    cli_dbgmsg("cli_scanxar: archived-checksum mismatch.\n");
791 796
                     cksum_fails++;
792 797
                 } else {
793 798
                     cli_dbgmsg("cli_scanxar: archived-checksum matched.\n");                
794 799
                 }
795 800
                 free(expected);
796 801
             }
797
-            xar_hash_final(e_hash_ctx, result, e_hash);
798
-            e_hash_ctx = NULL;
802
+
803
+            if (e_hash_ctx != NULL) {
804
+                xar_hash_final(e_hash_ctx, result, e_hash);
805
+                e_hash_ctx = NULL;
806
+            } else {
807
+                cli_dbgmsg("cli_scanxar: extracted-checksum(unarchived-checksum) missing.\n");
808
+                cksum_fails++;
809
+            }
799 810
             if (e_cksum != NULL) {
800 811
                 if (do_extract_cksum) {
801 812
                     expected = cli_hex2str((char *)e_cksum);
802 813
                     if (xar_hash_check(e_hash, result, expected) != 0) {
803
-                        cli_dbgmsg("cli_scanxar: extracted-checksum missing or mismatch.\n");
814
+                        cli_dbgmsg("cli_scanxar: extracted-checksum mismatch.\n");
804 815
                         cksum_fails++;
805 816
                     } else {
806 817
                         cli_dbgmsg("cli_scanxar: extracted-checksum matched.\n");                
... ...
@@ -75,7 +75,7 @@ int cli_XzDecode(struct CLI_XZ *XZ) {
75 75
         return XZ_STREAM_END;
76 76
     if (XZ->status == CODER_STATUS_NOT_FINISHED && XZ->avail_out == 0)
77 77
         return XZ_RESULT_OK;
78
-    if (res != SZ_OK)
78
+    if (((inbytes == 0) && (outbytes == 0)) || res != SZ_OK)
79 79
 	return XZ_RESULT_DATA_ERROR;
80 80
     return XZ_RESULT_OK;
81 81
 }
... ...
@@ -81,6 +81,7 @@ static int yc_poly_emulator(cli_ctx *ctx, char *base, unsigned int filesize, cha
81 81
   unsigned char al;
82 82
   unsigned char cl = ecx & 0xff;
83 83
   unsigned int j,i;
84
+  unsigned int max_jmp_loop = 100000000;
84 85
 
85 86
   for(i=0;i<ecx&&i<max_emu;i++) /* Byte looper - Decrypts every byte and write it back */
86 87
     {
... ...
@@ -103,6 +104,9 @@ static int yc_poly_emulator(cli_ctx *ctx, char *base, unsigned int filesize, cha
103 103
             if (yc_bounds_check(ctx, base, filesize, decryptor_offset, j)) {
104 104
                 return 2;
105 105
             }
106
+	      if (!max_jmp_loop)
107
+	          return 2;
108
+	      max_jmp_loop--;
106 109
 	      j = j + decryptor_offset[j];
107 110
 	      break;
108 111
 
... ...
@@ -3,7 +3,7 @@ VERSION="devel-`date +%Y%m%d`"
3 3
 dnl VERSION="1.0rc1"
4 4
 
5 5
 LC_CURRENT=7
6
-LC_REVISION=24
6
+LC_REVISION=26
7 7
 LC_AGE=1
8 8
 LIBCLAMAV_VERSION="$LC_CURRENT":"$LC_REVISION":"$LC_AGE"
9 9
 AC_SUBST([LIBCLAMAV_VERSION])
... ...
@@ -623,6 +623,7 @@ static Suite *test_cl_suite(void)
623 623
     Suite *s = suite_create("cl_api");
624 624
     TCase *tc_cl = tcase_create("cl_dup");
625 625
     TCase *tc_cl_scan = tcase_create("cl_scan");
626
+    char *user_timeout = NULL;
626 627
     int expect = expected_testfiles;
627 628
     suite_add_tcase (s, tc_cl);
628 629
     tcase_add_test(tc_cl, test_cl_free);
... ...
@@ -661,6 +662,12 @@ static Suite *test_cl_suite(void)
661 661
     tcase_add_loop_test(tc_cl_scan, test_cl_scanmap_callback_handle_allscan, 0, expect);
662 662
     tcase_add_loop_test(tc_cl_scan, test_cl_scanmap_callback_mem, 0, expect);
663 663
     tcase_add_loop_test(tc_cl_scan, test_cl_scanmap_callback_mem_allscan, 0, expect);
664
+
665
+    user_timeout = getenv("T");
666
+    if (user_timeout) {
667
+        int timeout = atoi(user_timeout);
668
+        tcase_set_timeout(tc_cl_scan, timeout);
669
+    }
664 670
 #endif
665 671
     return s;
666 672
 }
... ...
@@ -357,6 +357,8 @@
357 357
     <ClCompile Include="..\libclamav\mpool.c" />
358 358
     <ClCompile Include="..\libclamav\msexpand.c" />
359 359
     <ClCompile Include="..\libclamav\mspack.c" />
360
+    <ClCompile Include="..\libclamav\msxml.c" />
361
+    <ClCompile Include="..\libclamav\msxml_parser.c" />
360 362
     <ClCompile Include="..\libclamav\nsis\bzlib.c">
361 363
       <ObjectFileName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)\nsis_bzlib</ObjectFileName>
362 364
       <ObjectFileName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)\nsis_bzlib</ObjectFileName>
... ...
@@ -201,6 +201,12 @@
201 201
     <ClCompile Include="..\libclamav\mspack.c">
202 202
       <Filter>Source Files</Filter>
203 203
     </ClCompile>
204
+    <ClCompile Include="..\libclamav\msxml.c">
205
+      <Filter>Source Files</Filter>
206
+    </ClCompile>
207
+    <ClCompile Include="..\libclamav\msxml_parser.c">
208
+      <Filter>Source Files</Filter>
209
+    </ClCompile>
204 210
     <ClCompile Include="..\libclamav\ole2_extract.c">
205 211
       <Filter>Source Files</Filter>
206 212
     </ClCompile>
... ...
@@ -6,8 +6,8 @@
6 6
 #define REPO_VERSION VERSION
7 7
 #endif
8 8
 
9
-#define RES_VER_Q 0,98,0,0
10
-#define RES_VER_S "ClamAV 0.98"
9
+#define RES_VER_Q 0,98,7,0
10
+#define RES_VER_S "ClamAV 0.98.7"
11 11
 
12 12
 VS_VERSION_INFO VERSIONINFO
13 13
     FILEVERSION RES_VER_Q