... | ... |
@@ -32,6 +32,7 @@ |
32 | 32 |
#include "conv.h" |
33 | 33 |
#include "json_api.h" |
34 | 34 |
#include "msxml.h" |
35 |
+#include "msxml_parser.h" |
|
35 | 36 |
|
36 | 37 |
#if HAVE_LIBXML2 |
37 | 38 |
#ifdef _WIN32 |
... | ... |
@@ -48,24 +49,14 @@ |
48 | 48 |
#define cli_msxmlmsg(...) |
49 | 49 |
#endif |
50 | 50 |
|
51 |
-//#define MSXML_RECLEVEL 16 |
|
52 |
-#define MSXML_RECLEVEL_MAX 20 |
|
53 |
-#define MSXML_JSON_STRLEN_MAX 100 |
|
54 |
- |
|
55 | 51 |
#define MSXML_READBUFF SCANBUFF |
56 | 52 |
|
57 |
-#define check_state(state) \ |
|
58 |
- do { \ |
|
59 |
- if (state == -1) { \ |
|
60 |
- cli_warnmsg("check_state[msxml]: CL_EPARSE @ ln%d\n", __LINE__); \ |
|
61 |
- return CL_EPARSE; \ |
|
62 |
- } \ |
|
63 |
- else if (state == 0) { \ |
|
64 |
- cli_dbgmsg("check_state[msxml]: CL_BREAK @ ln%d\n", __LINE__); \ |
|
65 |
- return CL_BREAK; \ |
|
66 |
- } \ |
|
67 |
- } while(0) |
|
68 |
- |
|
53 |
+static const struct key_entry msxml_keys[] = { |
|
54 |
+ { "documentproperties", "DocumentProperties", MSXML_JSON_ROOT }, |
|
55 |
+ { "worddocument", "WordDocument", MSXML_JSON_ROOT }, |
|
56 |
+ { "bindata", "BinaryData", MSXML_SCAN_B64 | MSXML_JSON_COUNT | MSXML_JSON_ROOT } |
|
57 |
+}; |
|
58 |
+static size_t num_msxml_keys = sizeof(msxml_keys) / sizeof(struct key_entry); |
|
69 | 59 |
|
70 | 60 |
struct msxml_cbdata { |
71 | 61 |
fmap_t *map; |
... | ... |
@@ -162,213 +153,6 @@ int msxml_read_cb(void *ctx, char *buffer, int len) |
162 | 162 |
cbdata->winpos = cbdata->winsize - rbytes; |
163 | 163 |
return (int)wbytes; |
164 | 164 |
} |
165 |
- |
|
166 |
-static int msxml_parse_element(cli_ctx *ctx, xmlTextReaderPtr reader, int rlvl) |
|
167 |
-{ |
|
168 |
- const xmlChar *element_name = NULL; |
|
169 |
- const xmlChar *node_name = NULL, *node_value = NULL; |
|
170 |
- int ret, state, node_type, endtag = 0; |
|
171 |
- |
|
172 |
- cli_msxmlmsg("in msxml_parse_element @ layer %d\n", rlvl); |
|
173 |
- |
|
174 |
- /* check recursion level */ |
|
175 |
- if (rlvl >= MSXML_RECLEVEL_MAX) { |
|
176 |
- cli_dbgmsg("msxml_parse_element: reached msxml json recursion limit\n"); |
|
177 |
- //cli_jsonbool(root, "HitRecursiveLimit", 1); |
|
178 |
- /* skip it */ |
|
179 |
- state = xmlTextReaderNext(reader); |
|
180 |
- check_state(state); |
|
181 |
- return CL_SUCCESS; |
|
182 |
- } |
|
183 |
- |
|
184 |
- /* acquire element type */ |
|
185 |
- node_type = xmlTextReaderNodeType(reader); |
|
186 |
- if (node_type == -1) |
|
187 |
- return CL_EPARSE; |
|
188 |
- |
|
189 |
- node_name = xmlTextReaderConstLocalName(reader); |
|
190 |
- node_value = xmlTextReaderConstValue(reader); |
|
191 |
- |
|
192 |
- /* branch on node type */ |
|
193 |
- switch (node_type) { |
|
194 |
- case XML_READER_TYPE_ELEMENT: |
|
195 |
- cli_msxmlmsg("msxml_parse_element: ELEMENT %s [%d]: %s\n", node_name, node_type, node_value); |
|
196 |
- |
|
197 |
- /* storing the element name for verification/collection */ |
|
198 |
- element_name = xmlTextReaderConstLocalName(reader); |
|
199 |
- if (!node_name) { |
|
200 |
- cli_dbgmsg("msxml_parse_element: element tag node nameless\n"); |
|
201 |
- return CL_EPARSE; /* no name, nameless */ |
|
202 |
- } |
|
203 |
- |
|
204 |
- /* handle attributes */ |
|
205 |
- state = xmlTextReaderHasAttributes(reader); |
|
206 |
- if (state == 1) { |
|
207 |
- while (xmlTextReaderMoveToNextAttribute(reader) == 1) { |
|
208 |
- const xmlChar *name, *value; |
|
209 |
- name = xmlTextReaderConstLocalName(reader); |
|
210 |
- value = xmlTextReaderConstValue(reader); |
|
211 |
- |
|
212 |
- cli_msxmlmsg("\t%s: %s\n", name, value); |
|
213 |
- } |
|
214 |
- } |
|
215 |
- else if (state == -1) |
|
216 |
- return CL_EPARSE; |
|
217 |
- |
|
218 |
- /* check self-containment */ |
|
219 |
- state = xmlTextReaderMoveToElement(reader); |
|
220 |
- if (state == -1) |
|
221 |
- return CL_EPARSE; |
|
222 |
- |
|
223 |
- state = xmlTextReaderIsEmptyElement(reader); |
|
224 |
- if (state == 1) { |
|
225 |
- cli_msxmlmsg("msxml_parse_element: SELF-CLOSING\n"); |
|
226 |
- |
|
227 |
- state = xmlTextReaderNext(reader); |
|
228 |
- check_state(state); |
|
229 |
- return CL_SUCCESS; |
|
230 |
- } else if (state == -1) |
|
231 |
- return CL_EPARSE; |
|
232 |
- |
|
233 |
- /* advance to first content node */ |
|
234 |
- state = xmlTextReaderRead(reader); |
|
235 |
- check_state(state); |
|
236 |
- |
|
237 |
- while (!endtag) { |
|
238 |
- node_type = xmlTextReaderNodeType(reader); |
|
239 |
- if (node_type == -1) |
|
240 |
- return CL_EPARSE; |
|
241 |
- |
|
242 |
- switch (node_type) { |
|
243 |
- case XML_READER_TYPE_ELEMENT: |
|
244 |
- ret = msxml_parse_element(ctx, reader, rlvl+1); |
|
245 |
- if (ret != CL_SUCCESS) { |
|
246 |
- return ret; |
|
247 |
- } |
|
248 |
- break; |
|
249 |
- |
|
250 |
- case XML_READER_TYPE_TEXT: |
|
251 |
- node_value = xmlTextReaderConstValue(reader); |
|
252 |
- |
|
253 |
- cli_msxmlmsg("TEXT: %s\n", node_value); |
|
254 |
- |
|
255 |
- if (!strncmp(element_name, "binData", strlen(element_name))) { |
|
256 |
- char name[1024]; |
|
257 |
- char *decoded, *tempfile = name; |
|
258 |
- size_t decodedlen; |
|
259 |
- int of; |
|
260 |
- |
|
261 |
- cli_msxmlmsg("BINARY DATA!\n"); |
|
262 |
- |
|
263 |
- decoded = cl_base64_decode((char *)node_value, strlen((const char *)node_value), NULL, &decodedlen, 0); |
|
264 |
- if (!decoded) { |
|
265 |
- cli_warnmsg("msxml_parse_element: failed to decode base64-encoded binary data\n"); |
|
266 |
- state = xmlTextReaderRead(reader); |
|
267 |
- check_state(state); |
|
268 |
- break; |
|
269 |
- } |
|
270 |
- |
|
271 |
- if(!(tempfile = cli_gentemp(ctx->engine->tmpdir))) { |
|
272 |
- free(decoded); |
|
273 |
- return CL_EMEM; |
|
274 |
- } |
|
275 |
- |
|
276 |
- if((of = open(tempfile, O_RDWR|O_CREAT|O_TRUNC|O_BINARY, S_IRUSR|S_IWUSR))==-1) { |
|
277 |
- cli_warnmsg("msxml_parse_element: failed to create temporary file %s\n", tempfile); |
|
278 |
- free(decoded); |
|
279 |
- return CL_ECREAT; |
|
280 |
- } |
|
281 |
- |
|
282 |
- if(cli_writen(of, decoded, decodedlen) != (int)decodedlen) { |
|
283 |
- free(decoded); |
|
284 |
- close(of); |
|
285 |
- return CL_EWRITE; |
|
286 |
- } |
|
287 |
- free(decoded); |
|
288 |
- |
|
289 |
- cli_dbgmsg("msxml_parse_element: extracted binary data to %s\n", tempfile); |
|
290 |
- |
|
291 |
- ret = cli_magic_scandesc(of, ctx); |
|
292 |
- close(of); |
|
293 |
- if (ret != CL_SUCCESS || (!SCAN_ALL && ret == CL_VIRUS)) { |
|
294 |
- return ret; |
|
295 |
- } |
|
296 |
- |
|
297 |
- /* |
|
298 |
- ret = cli_mem_scandesc(decoded, decodedlen, ctx); |
|
299 |
- free(decoded); |
|
300 |
- if (ret != CL_SUCCESS) { |
|
301 |
- return ret; |
|
302 |
- }*/ |
|
303 |
- } |
|
304 |
- |
|
305 |
- /* |
|
306 |
- ret = ooxml_parse_value(thisjobj, "Value", node_value); |
|
307 |
- if (ret != CL_SUCCESS) |
|
308 |
- return ret; |
|
309 |
- |
|
310 |
- cli_dbgmsg("ooxml_parse_element: added json value [%s: %s]\n", element_tag, node_value); |
|
311 |
- */ |
|
312 |
- |
|
313 |
- /* advance to next node */ |
|
314 |
- state = xmlTextReaderRead(reader); |
|
315 |
- check_state(state); |
|
316 |
- break; |
|
317 |
- |
|
318 |
- case XML_READER_TYPE_SIGNIFICANT_WHITESPACE: |
|
319 |
- /* advance to next node */ |
|
320 |
- state = xmlTextReaderRead(reader); |
|
321 |
- check_state(state); |
|
322 |
- break; |
|
323 |
- |
|
324 |
- case XML_READER_TYPE_END_ELEMENT: |
|
325 |
- cli_msxmlmsg("in msxml_parse_element @ layer %d closed\n", rlvl); |
|
326 |
- node_name = xmlTextReaderConstLocalName(reader); |
|
327 |
- if (!node_name) { |
|
328 |
- cli_dbgmsg("msxml_parse_element: element end tag node nameless\n"); |
|
329 |
- return CL_EPARSE; /* no name, nameless */ |
|
330 |
- } |
|
331 |
- |
|
332 |
- if (strncmp(element_name, node_name, strlen(element_name))) { |
|
333 |
- cli_dbgmsg("msxml_parse_element: element tag does not match end tag %s != %s\n", element_name, node_name); |
|
334 |
- return CL_EFORMAT; |
|
335 |
- } |
|
336 |
- |
|
337 |
- /* advance to next element tag */ |
|
338 |
- state = xmlTextReaderRead(reader); |
|
339 |
- check_state(state); |
|
340 |
- |
|
341 |
- endtag = 1; |
|
342 |
- break; |
|
343 |
- |
|
344 |
- default: |
|
345 |
- node_name = xmlTextReaderConstLocalName(reader); |
|
346 |
- node_value = xmlTextReaderConstValue(reader); |
|
347 |
- |
|
348 |
- cli_dbgmsg("msxml_parse_element: unhandled xml secondary node %s [%d]: %s\n", node_name, node_type, node_value); |
|
349 |
- |
|
350 |
- state = xmlTextReaderNext(reader); |
|
351 |
- check_state(state); |
|
352 |
- return CL_SUCCESS; |
|
353 |
- } |
|
354 |
- } |
|
355 |
- |
|
356 |
- break; |
|
357 |
- case XML_READER_TYPE_PROCESSING_INSTRUCTION: |
|
358 |
- cli_msxmlmsg("msxml_parse_element: PROCESSING INSTRUCTION %s [%d]: %s\n", node_name, node_type, node_value); |
|
359 |
- break; |
|
360 |
- case XML_READER_TYPE_SIGNIFICANT_WHITESPACE: |
|
361 |
- cli_msxmlmsg("msxml_parse_element: SIGNIFICANT WHITESPACE %s [%d]: %s\n", node_name, node_type, node_value); |
|
362 |
- break; |
|
363 |
- case XML_READER_TYPE_END_ELEMENT: |
|
364 |
- cli_msxmlmsg("msxml_parse_element: END ELEMENT %s [%d]: %s\n", node_name, node_type, node_value); |
|
365 |
- return CL_SUCCESS; |
|
366 |
- default: |
|
367 |
- cli_dbgmsg("msxml_parse_element: unhandled xml primary node %s [%d]: %s\n", node_name, node_type, node_value); |
|
368 |
- } |
|
369 |
- |
|
370 |
- return CL_SUCCESS; |
|
371 |
-} |
|
372 | 165 |
#endif |
373 | 166 |
|
374 | 167 |
int cli_scanmsxml(cli_ctx *ctx) |
... | ... |
@@ -394,22 +178,7 @@ int cli_scanmsxml(cli_ctx *ctx) |
394 | 394 |
return CL_SUCCESS; // libxml2 failed! |
395 | 395 |
} |
396 | 396 |
|
397 |
- /* Main Processing Loop */ |
|
398 |
- while ((state = xmlTextReaderRead(reader)) == 1) { |
|
399 |
- ret = msxml_parse_element(ctx, reader, 0); |
|
400 |
- |
|
401 |
- if (ret != CL_SUCCESS && ret != CL_ETIMEOUT && ret != CL_BREAK) { |
|
402 |
- cli_warnmsg("cli_scanmsxml: encountered issue in parsing properties document\n"); |
|
403 |
- break; |
|
404 |
- } |
|
405 |
- } |
|
406 |
- |
|
407 |
- /* non-critical return supression */ |
|
408 |
- if (ret == CL_ETIMEOUT || ret == CL_BREAK) |
|
409 |
- ret = CL_SUCCESS; |
|
410 |
- |
|
411 |
- if (state == -1) |
|
412 |
- ret = CL_EPARSE; |
|
397 |
+ ret = cli_msxml_parse_document(ctx, reader, msxml_keys, num_msxml_keys, 1); |
|
413 | 398 |
|
414 | 399 |
xmlTextReaderClose(reader); |
415 | 400 |
xmlFreeTextReader(reader); |
416 | 401 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,381 @@ |
0 |
+/* |
|
1 |
+ * Extract component parts of MS XML files (e.g. MS Office 2003 XML Documents) |
|
2 |
+ * |
|
3 |
+ * Copyright (C) 2007-2013 Sourcefire, Inc. |
|
4 |
+ * |
|
5 |
+ * Authors: Kevin Lin |
|
6 |
+ * |
|
7 |
+ * This program is free software; you can redistribute it and/or modify it under |
|
8 |
+ * the terms of the GNU General Public License version 2 as published by the |
|
9 |
+ * Free Software Foundation. |
|
10 |
+ * |
|
11 |
+ * This program is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
14 |
+ * more details. |
|
15 |
+ * |
|
16 |
+ * You should have received a copy of the GNU General Public License along with |
|
17 |
+ * this program; if not, write to the Free Software Foundation, Inc., 51 |
|
18 |
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
19 |
+ */ |
|
20 |
+ |
|
21 |
+#if HAVE_CONFIG_H |
|
22 |
+#include "clamav-config.h" |
|
23 |
+#endif |
|
24 |
+ |
|
25 |
+#include <sys/types.h> |
|
26 |
+#include <sys/stat.h> |
|
27 |
+#include <fcntl.h> |
|
28 |
+ |
|
29 |
+#include "clamav.h" |
|
30 |
+#include "others.h" |
|
31 |
+#include "conv.h" |
|
32 |
+#include "json_api.h" |
|
33 |
+#include "msxml_parser.h" |
|
34 |
+ |
|
35 |
+#if HAVE_LIBXML2 |
|
36 |
+#ifdef _WIN32 |
|
37 |
+#ifndef LIBXML_WRITER_ENABLED |
|
38 |
+#define LIBXML_WRITER_ENABLED 1 |
|
39 |
+#endif |
|
40 |
+#endif |
|
41 |
+#include <libxml/xmlreader.h> |
|
42 |
+ |
|
43 |
+#define MSXML_VERBIOSE 1 |
|
44 |
+#if MSXML_VERBIOSE |
|
45 |
+#define cli_msxmlmsg(...) cli_dbgmsg(__VA_ARGS__) |
|
46 |
+#else |
|
47 |
+#define cli_msxmlmsg(...) |
|
48 |
+#endif |
|
49 |
+ |
|
50 |
+#define check_state(state) \ |
|
51 |
+ do { \ |
|
52 |
+ if (state == -1) { \ |
|
53 |
+ cli_warnmsg("check_state[msxml]: CL_EPARSE @ ln%d\n", __LINE__); \ |
|
54 |
+ return CL_EPARSE; \ |
|
55 |
+ } \ |
|
56 |
+ else if (state == 0) { \ |
|
57 |
+ cli_dbgmsg("check_state[msxml]: CL_BREAK @ ln%d\n", __LINE__); \ |
|
58 |
+ return CL_BREAK; \ |
|
59 |
+ } \ |
|
60 |
+ } while(0) |
|
61 |
+ |
|
62 |
+ |
|
63 |
+struct key_entry blank_key = { NULL, NULL, 0 }; |
|
64 |
+ |
|
65 |
+static const struct key_entry *msxml_check_key(struct msxml_ctx *mxctx, const char *key, size_t keylen) |
|
66 |
+{ |
|
67 |
+ unsigned i; |
|
68 |
+ |
|
69 |
+ if (keylen > MSXML_JSON_STRLEN_MAX-1) { |
|
70 |
+ cli_dbgmsg("msxml_check_key: key name too long\n"); |
|
71 |
+ return &blank_key; |
|
72 |
+ } |
|
73 |
+ |
|
74 |
+ for (i = 0; i < mxctx->num_keys; ++i) { |
|
75 |
+ //cli_dbgmsg("%d %d %s %s %s %s\n", keylen, strlen(ooxml_keys[i]), key, keycmp, ooxml_keys[i], ooxml_json_keys[i]); |
|
76 |
+ if (keylen == strlen(mxctx->keys[i].key) && !strncasecmp(key, mxctx->keys[i].key, keylen)) { |
|
77 |
+ return &mxctx->keys[i]; |
|
78 |
+ } |
|
79 |
+ } |
|
80 |
+ |
|
81 |
+ return &blank_key; |
|
82 |
+} |
|
83 |
+ |
|
84 |
+static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader, int rlvl) |
|
85 |
+{ |
|
86 |
+ const xmlChar *element_name = NULL; |
|
87 |
+ const xmlChar *node_name = NULL, *node_value = NULL; |
|
88 |
+ const struct key_entry *keyinfo; |
|
89 |
+ int ret, state, node_type, endtag = 0; |
|
90 |
+ cli_ctx *ctx = mxctx->ctx; |
|
91 |
+#if HAVE_JSON |
|
92 |
+ json_object *parent = mxctx->wrkptr; |
|
93 |
+ json_object *thisjobj = NULL; |
|
94 |
+#endif |
|
95 |
+ |
|
96 |
+ cli_msxmlmsg("in msxml_parse_element @ layer %d\n", rlvl); |
|
97 |
+ |
|
98 |
+ /* check recursion level */ |
|
99 |
+ if (rlvl >= MSXML_RECLEVEL_MAX) { |
|
100 |
+ cli_dbgmsg("msxml_parse_element: reached msxml json recursion limit\n"); |
|
101 |
+ //cli_jsonbool(root, "HitRecursiveLimit", 1); |
|
102 |
+ /* skip it */ |
|
103 |
+ state = xmlTextReaderNext(reader); |
|
104 |
+ check_state(state); |
|
105 |
+ return CL_SUCCESS; |
|
106 |
+ } |
|
107 |
+ |
|
108 |
+ /* acquire element type */ |
|
109 |
+ node_type = xmlTextReaderNodeType(reader); |
|
110 |
+ if (node_type == -1) |
|
111 |
+ return CL_EPARSE; |
|
112 |
+ |
|
113 |
+ node_name = xmlTextReaderConstLocalName(reader); |
|
114 |
+ node_value = xmlTextReaderConstValue(reader); |
|
115 |
+ |
|
116 |
+ /* branch on node type */ |
|
117 |
+ switch (node_type) { |
|
118 |
+ case XML_READER_TYPE_ELEMENT: |
|
119 |
+ cli_msxmlmsg("msxml_parse_element: ELEMENT %s [%d]: %s\n", node_name, node_type, node_value); |
|
120 |
+ |
|
121 |
+ /* storing the element name for verification/collection */ |
|
122 |
+ element_name = xmlTextReaderConstLocalName(reader); |
|
123 |
+ if (!node_name) { |
|
124 |
+ cli_dbgmsg("msxml_parse_element: element tag node nameless\n"); |
|
125 |
+ return CL_EPARSE; /* no name, nameless */ |
|
126 |
+ } |
|
127 |
+ |
|
128 |
+ /* determine if the element is interesting */ |
|
129 |
+ keyinfo = msxml_check_key(mxctx, element_name, strlen(element_name)); |
|
130 |
+ |
|
131 |
+ cli_msxmlmsg("key: %s\n", keyinfo->key); |
|
132 |
+ cli_msxmlmsg("name: %s\n", keyinfo->name); |
|
133 |
+ cli_msxmlmsg("type: %d\n", keyinfo->type); |
|
134 |
+ |
|
135 |
+#if HAVE_JSON |
|
136 |
+ if (keyinfo->type & MSXML_JSON_TRACK) { |
|
137 |
+ if (MSXML_JSON_ROOT) |
|
138 |
+ thisjobj = cli_jsonobj(mxctx->root, keyinfo->name); |
|
139 |
+ else if (MSXML_JSON_WRKPTR) |
|
140 |
+ thisjobj = cli_jsonobj(parent, keyinfo->name); |
|
141 |
+ |
|
142 |
+ if (!thisjobj) { |
|
143 |
+ return CL_EMEM; |
|
144 |
+ } |
|
145 |
+ cli_dbgmsg("msxml_parse_element: generated json object [%s]\n", keyinfo->name); |
|
146 |
+ |
|
147 |
+ /* count this element */ |
|
148 |
+ if (thisjobj && keyinfo->type & MSXML_JSON_COUNT) { |
|
149 |
+ json_object *counter; |
|
150 |
+ |
|
151 |
+ if (!json_object_object_get_ex(thisjobj, "Count", &counter)) { /* object not found */ |
|
152 |
+ cli_jsonint(thisjobj, "Count", 1); |
|
153 |
+ if (!counter) { |
|
154 |
+ return CL_EPARSE; |
|
155 |
+ } |
|
156 |
+ } else { |
|
157 |
+ int value = json_object_get_int(counter); |
|
158 |
+ cli_jsonint(thisjobj, "Count", value+1); |
|
159 |
+ } |
|
160 |
+ cli_dbgmsg("msxml_parse_element: retrieved json object [Count]\n"); |
|
161 |
+ } |
|
162 |
+ |
|
163 |
+ /* handle attributes */ |
|
164 |
+ state = xmlTextReaderHasAttributes(reader); |
|
165 |
+ if (state == 1) { |
|
166 |
+ json_object *attributes; |
|
167 |
+ |
|
168 |
+ attributes = cli_jsonobj(thisjobj, "Attributes"); |
|
169 |
+ if (!attributes) { |
|
170 |
+ return CL_EPARSE; |
|
171 |
+ } |
|
172 |
+ cli_dbgmsg("msxml_parse_element: retrieved json object [Attributes]\n"); |
|
173 |
+ |
|
174 |
+ while (xmlTextReaderMoveToNextAttribute(reader) == 1) { |
|
175 |
+ const xmlChar *name, *value; |
|
176 |
+ name = xmlTextReaderConstLocalName(reader); |
|
177 |
+ value = xmlTextReaderConstValue(reader); |
|
178 |
+ |
|
179 |
+ cli_dbgmsg("\t%s: %s\n", name, value); |
|
180 |
+ cli_jsonstr(attributes, name, (const char *)value); |
|
181 |
+ } |
|
182 |
+ } |
|
183 |
+ else if (state == -1) |
|
184 |
+ return CL_EPARSE; |
|
185 |
+ } |
|
186 |
+#endif |
|
187 |
+ |
|
188 |
+ /* check self-containment */ |
|
189 |
+ state = xmlTextReaderMoveToElement(reader); |
|
190 |
+ if (state == -1) |
|
191 |
+ return CL_EPARSE; |
|
192 |
+ |
|
193 |
+ state = xmlTextReaderIsEmptyElement(reader); |
|
194 |
+ if (state == 1) { |
|
195 |
+ cli_msxmlmsg("msxml_parse_element: SELF-CLOSING\n"); |
|
196 |
+ |
|
197 |
+ state = xmlTextReaderNext(reader); |
|
198 |
+ check_state(state); |
|
199 |
+ return CL_SUCCESS; |
|
200 |
+ } else if (state == -1) |
|
201 |
+ return CL_EPARSE; |
|
202 |
+ |
|
203 |
+ /* advance to first content node */ |
|
204 |
+ state = xmlTextReaderRead(reader); |
|
205 |
+ check_state(state); |
|
206 |
+ |
|
207 |
+ while (!endtag) { |
|
208 |
+ node_type = xmlTextReaderNodeType(reader); |
|
209 |
+ if (node_type == -1) |
|
210 |
+ return CL_EPARSE; |
|
211 |
+ |
|
212 |
+ switch (node_type) { |
|
213 |
+ case XML_READER_TYPE_ELEMENT: |
|
214 |
+ ret = msxml_parse_element(mxctx, reader, rlvl+1); |
|
215 |
+ if (ret != CL_SUCCESS) { |
|
216 |
+ return ret; |
|
217 |
+ } |
|
218 |
+ break; |
|
219 |
+ |
|
220 |
+ case XML_READER_TYPE_TEXT: |
|
221 |
+ node_value = xmlTextReaderConstValue(reader); |
|
222 |
+ |
|
223 |
+ cli_msxmlmsg("TEXT: %s\n", node_value); |
|
224 |
+ |
|
225 |
+ /* |
|
226 |
+ ret = ooxml_parse_value(thisjobj, "Value", node_value); |
|
227 |
+ if (ret != CL_SUCCESS) |
|
228 |
+ return ret; |
|
229 |
+ |
|
230 |
+ cli_dbgmsg("ooxml_parse_element: added json value [%s: %s]\n", element_tag, node_value); |
|
231 |
+ */ |
|
232 |
+ |
|
233 |
+ |
|
234 |
+ /* scanning protocol for embedded objects encoded in base64 */ |
|
235 |
+ if (keyinfo->type & MSXML_SCAN_B64) { |
|
236 |
+ char name[1024]; |
|
237 |
+ char *decoded, *tempfile = name; |
|
238 |
+ size_t decodedlen; |
|
239 |
+ int of; |
|
240 |
+ |
|
241 |
+ cli_msxmlmsg("BINARY DATA!\n"); |
|
242 |
+ |
|
243 |
+ decoded = cl_base64_decode((char *)node_value, strlen((const char *)node_value), NULL, &decodedlen, 0); |
|
244 |
+ if (!decoded) { |
|
245 |
+ cli_warnmsg("msxml_parse_element: failed to decode base64-encoded binary data\n"); |
|
246 |
+ state = xmlTextReaderRead(reader); |
|
247 |
+ check_state(state); |
|
248 |
+ break; |
|
249 |
+ } |
|
250 |
+ |
|
251 |
+ if(!(tempfile = cli_gentemp(ctx->engine->tmpdir))) { |
|
252 |
+ free(decoded); |
|
253 |
+ return CL_EMEM; |
|
254 |
+ } |
|
255 |
+ |
|
256 |
+ if((of = open(tempfile, O_RDWR|O_CREAT|O_TRUNC|O_BINARY, S_IRUSR|S_IWUSR))==-1) { |
|
257 |
+ cli_warnmsg("msxml_parse_element: failed to create temporary file %s\n", tempfile); |
|
258 |
+ free(decoded); |
|
259 |
+ return CL_ECREAT; |
|
260 |
+ } |
|
261 |
+ |
|
262 |
+ if(cli_writen(of, decoded, decodedlen) != (int)decodedlen) { |
|
263 |
+ free(decoded); |
|
264 |
+ close(of); |
|
265 |
+ return CL_EWRITE; |
|
266 |
+ } |
|
267 |
+ free(decoded); |
|
268 |
+ |
|
269 |
+ cli_dbgmsg("msxml_parse_element: extracted binary data to %s\n", tempfile); |
|
270 |
+ |
|
271 |
+ ret = cli_magic_scandesc(of, ctx); |
|
272 |
+ close(of); |
|
273 |
+ if (ret != CL_SUCCESS || (!SCAN_ALL && ret == CL_VIRUS)) { |
|
274 |
+ return ret; |
|
275 |
+ } |
|
276 |
+ |
|
277 |
+ /* |
|
278 |
+ ret = cli_mem_scandesc(decoded, decodedlen, ctx); |
|
279 |
+ free(decoded); |
|
280 |
+ if (ret != CL_SUCCESS) { |
|
281 |
+ return ret; |
|
282 |
+ }*/ |
|
283 |
+ } |
|
284 |
+ |
|
285 |
+ /* advance to next node */ |
|
286 |
+ state = xmlTextReaderRead(reader); |
|
287 |
+ check_state(state); |
|
288 |
+ break; |
|
289 |
+ |
|
290 |
+ case XML_READER_TYPE_SIGNIFICANT_WHITESPACE: |
|
291 |
+ /* advance to next node */ |
|
292 |
+ state = xmlTextReaderRead(reader); |
|
293 |
+ check_state(state); |
|
294 |
+ break; |
|
295 |
+ |
|
296 |
+ case XML_READER_TYPE_END_ELEMENT: |
|
297 |
+ cli_msxmlmsg("in msxml_parse_element @ layer %d closed\n", rlvl); |
|
298 |
+ node_name = xmlTextReaderConstLocalName(reader); |
|
299 |
+ if (!node_name) { |
|
300 |
+ cli_dbgmsg("msxml_parse_element: element end tag node nameless\n"); |
|
301 |
+ return CL_EPARSE; /* no name, nameless */ |
|
302 |
+ } |
|
303 |
+ |
|
304 |
+ if (strncmp(element_name, node_name, strlen(element_name))) { |
|
305 |
+ cli_dbgmsg("msxml_parse_element: element tag does not match end tag %s != %s\n", element_name, node_name); |
|
306 |
+ return CL_EFORMAT; |
|
307 |
+ } |
|
308 |
+ |
|
309 |
+ /* advance to next element tag */ |
|
310 |
+ state = xmlTextReaderRead(reader); |
|
311 |
+ check_state(state); |
|
312 |
+ |
|
313 |
+ endtag = 1; |
|
314 |
+ break; |
|
315 |
+ |
|
316 |
+ default: |
|
317 |
+ node_name = xmlTextReaderConstLocalName(reader); |
|
318 |
+ node_value = xmlTextReaderConstValue(reader); |
|
319 |
+ |
|
320 |
+ cli_dbgmsg("msxml_parse_element: unhandled xml secondary node %s [%d]: %s\n", node_name, node_type, node_value); |
|
321 |
+ |
|
322 |
+ state = xmlTextReaderNext(reader); |
|
323 |
+ check_state(state); |
|
324 |
+ return CL_SUCCESS; |
|
325 |
+ } |
|
326 |
+ } |
|
327 |
+ |
|
328 |
+ break; |
|
329 |
+ case XML_READER_TYPE_PROCESSING_INSTRUCTION: |
|
330 |
+ cli_msxmlmsg("msxml_parse_element: PROCESSING INSTRUCTION %s [%d]: %s\n", node_name, node_type, node_value); |
|
331 |
+ break; |
|
332 |
+ case XML_READER_TYPE_SIGNIFICANT_WHITESPACE: |
|
333 |
+ cli_msxmlmsg("msxml_parse_element: SIGNIFICANT WHITESPACE %s [%d]: %s\n", node_name, node_type, node_value); |
|
334 |
+ break; |
|
335 |
+ case XML_READER_TYPE_END_ELEMENT: |
|
336 |
+ cli_msxmlmsg("msxml_parse_element: END ELEMENT %s [%d]: %s\n", node_name, node_type, node_value); |
|
337 |
+ return CL_SUCCESS; |
|
338 |
+ default: |
|
339 |
+ cli_dbgmsg("msxml_parse_element: unhandled xml primary node %s [%d]: %s\n", node_name, node_type, node_value); |
|
340 |
+ } |
|
341 |
+ |
|
342 |
+ return CL_SUCCESS; |
|
343 |
+} |
|
344 |
+ |
|
345 |
+/* reader intialization and closing handled by caller */ |
|
346 |
+int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, int mode) |
|
347 |
+{ |
|
348 |
+ struct msxml_ctx mxctx; |
|
349 |
+ int state, ret = CL_SUCCESS; |
|
350 |
+ |
|
351 |
+ mxctx.ctx = ctx; |
|
352 |
+ mxctx.keys = keys; |
|
353 |
+ mxctx.num_keys = num_keys; |
|
354 |
+#if HAVE_JSON |
|
355 |
+ if (mode) { |
|
356 |
+ mxctx.root = ctx->wrkproperty; |
|
357 |
+ mxctx.wrkptr = ctx->wrkproperty; |
|
358 |
+ } |
|
359 |
+#endif |
|
360 |
+ |
|
361 |
+ /* Main Processing Loop */ |
|
362 |
+ while ((state = xmlTextReaderRead(reader)) == 1) { |
|
363 |
+ msxml_parse_element(&mxctx, reader, 0); |
|
364 |
+ if (ret != CL_SUCCESS && ret != CL_ETIMEOUT && ret != CL_BREAK) { |
|
365 |
+ cli_warnmsg("cli_msxml_parse_document: encountered issue in parsing xml document\n"); |
|
366 |
+ break; |
|
367 |
+ } |
|
368 |
+ } |
|
369 |
+ |
|
370 |
+ if (state == -1) |
|
371 |
+ return CL_EPARSE; |
|
372 |
+ |
|
373 |
+ /* non-critical return supression */ |
|
374 |
+ if (ret == CL_ETIMEOUT || ret == CL_BREAK) |
|
375 |
+ return CL_SUCCESS; |
|
376 |
+ |
|
377 |
+ return ret; |
|
378 |
+} |
|
379 |
+ |
|
380 |
+#endif /* HAVE_LIBXML2 */ |
0 | 381 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,76 @@ |
0 |
+/* |
|
1 |
+ * Extract component parts of MS XML files (e.g. MS Office 2003 XML Documents) |
|
2 |
+ * |
|
3 |
+ * Copyright (C) 2007-2013 Sourcefire, Inc. |
|
4 |
+ * |
|
5 |
+ * Authors: Kevin Lin |
|
6 |
+ * |
|
7 |
+ * This program is free software; you can redistribute it and/or modify it under |
|
8 |
+ * the terms of the GNU General Public License version 2 as published by the |
|
9 |
+ * Free Software Foundation. |
|
10 |
+ * |
|
11 |
+ * This program is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
14 |
+ * more details. |
|
15 |
+ * |
|
16 |
+ * You should have received a copy of the GNU General Public License along with |
|
17 |
+ * this program; if not, write to the Free Software Foundation, Inc., 51 |
|
18 |
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
19 |
+ */ |
|
20 |
+ |
|
21 |
+#ifndef __MSXML_PARSER_H |
|
22 |
+#define __MSXML_PARSER_H |
|
23 |
+ |
|
24 |
+#if HAVE_LIBXML2 |
|
25 |
+ |
|
26 |
+#if HAVE_CONFIG_H |
|
27 |
+#include "clamav-config.h" |
|
28 |
+#endif |
|
29 |
+ |
|
30 |
+#include "others.h" |
|
31 |
+#include "json_api.h" |
|
32 |
+ |
|
33 |
+#ifdef _WIN32 |
|
34 |
+#ifndef LIBXML_WRITER_ENABLED |
|
35 |
+#define LIBXML_WRITER_ENABLED 1 |
|
36 |
+#endif |
|
37 |
+#endif |
|
38 |
+#include <libxml/xmlreader.h> |
|
39 |
+ |
|
40 |
+ |
|
41 |
+#define MSXML_RECLEVEL_MAX 20 |
|
42 |
+#define MSXML_JSON_STRLEN_MAX 128 |
|
43 |
+ |
|
44 |
+struct key_entry { |
|
45 |
+#define MSXML_IGNORE 0x00 |
|
46 |
+#define MSXML_IGNORE_ELEM 0x01 |
|
47 |
+#define MSXML_SCAN_B64 0x02 |
|
48 |
+#define MSXML_JSON_ROOT 0x04 |
|
49 |
+#define MSXML_JSON_WRKPTR 0x08 |
|
50 |
+#define MSXML_JSON_COUNT 0x10 |
|
51 |
+#define MSXML_JSON_VALUE 0x20 |
|
52 |
+ |
|
53 |
+#define MSXML_JSON_TRACK (MSXML_JSON_ROOT | MSXML_JSON_WRKPTR) |
|
54 |
+ |
|
55 |
+ const char *key; |
|
56 |
+ const char *name; |
|
57 |
+ int type; |
|
58 |
+}; |
|
59 |
+ |
|
60 |
+struct msxml_ctx { |
|
61 |
+ cli_ctx *ctx; |
|
62 |
+ const struct key_entry *keys; |
|
63 |
+ size_t num_keys; |
|
64 |
+ |
|
65 |
+#if HAVE_JSON |
|
66 |
+ json_object *root; |
|
67 |
+ json_object *wrkptr; |
|
68 |
+#endif |
|
69 |
+}; |
|
70 |
+ |
|
71 |
+int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, int mode); |
|
72 |
+ |
|
73 |
+#endif /* HAVE_LIBXML2 */ |
|
74 |
+ |
|
75 |
+#endif /* __MSXML_PARSER_H */ |