... | ... |
@@ -714,7 +714,7 @@ int cli_scanhwpml(cli_ctx *ctx) |
714 | 714 |
return ret; // libxml2 failed! |
715 | 715 |
} |
716 | 716 |
|
717 |
- ret = cli_msxml_parse_document(ctx, reader, hwpml_keys, num_hwpml_keys, 1); |
|
717 |
+ ret = cli_msxml_parse_document(ctx, reader, hwpml_keys, num_hwpml_keys, 1, hwpml_binary_cb); |
|
718 | 718 |
|
719 | 719 |
xmlTextReaderClose(reader); |
720 | 720 |
xmlFreeTextReader(reader); |
... | ... |
@@ -253,7 +253,7 @@ int cli_scanmsxml(cli_ctx *ctx) |
253 | 253 |
return ret; // libxml2 failed! |
254 | 254 |
} |
255 | 255 |
|
256 |
- ret = cli_msxml_parse_document(ctx, reader, msxml_keys, num_msxml_keys, 1); |
|
256 |
+ ret = cli_msxml_parse_document(ctx, reader, msxml_keys, num_msxml_keys, 1, NULL); |
|
257 | 257 |
|
258 | 258 |
xmlTextReaderClose(reader); |
259 | 259 |
xmlFreeTextReader(reader); |
... | ... |
@@ -156,12 +156,14 @@ static int msxml_parse_value(json_object *wrkptr, const char *arrname, const xml |
156 | 156 |
} |
157 | 157 |
#endif /* HAVE_JSON */ |
158 | 158 |
|
159 |
+#define MAX_ATTRIBS 20 |
|
159 | 160 |
static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader, int rlvl, void *jptr) |
160 | 161 |
{ |
161 | 162 |
const xmlChar *element_name = NULL; |
162 | 163 |
const xmlChar *node_name = NULL, *node_value = NULL; |
163 | 164 |
const struct key_entry *keyinfo; |
164 |
- int ret, virus = 0, state, node_type, endtag = 0; |
|
165 |
+ struct attrib_entry attribs[MAX_ATTRIBS]; |
|
166 |
+ int ret, virus = 0, state, node_type, endtag = 0, num_attribs = 0; |
|
165 | 167 |
cli_ctx *ctx = mxctx->ctx; |
166 | 168 |
#if HAVE_JSON |
167 | 169 |
json_object *parent = (json_object *)jptr; |
... | ... |
@@ -284,6 +286,35 @@ static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader, |
284 | 284 |
} |
285 | 285 |
} |
286 | 286 |
#endif |
287 |
+ /* populate attributes for scanning callback - BROKEN, probably from the fact the reader is pointed to the attribute from previously parsing attributes */ |
|
288 |
+ if ((keyinfo->type & MSXML_SCAN_CB) && mxctx->scan_cb) { |
|
289 |
+ state = xmlTextReaderHasAttributes(reader); |
|
290 |
+ if (state == 0) { |
|
291 |
+ state = xmlTextReaderMoveToFirstAttribute(reader); |
|
292 |
+ if (state == 1) { |
|
293 |
+ /* read first attribute (current head) */ |
|
294 |
+ attribs[num_attribs].key = xmlTextReaderConstLocalName(reader); |
|
295 |
+ attribs[num_attribs].value = xmlTextReaderConstValue(reader); |
|
296 |
+ num_attribs++; |
|
297 |
+ } else if (state == -1) { |
|
298 |
+ return CL_EPARSE; |
|
299 |
+ } |
|
300 |
+ } |
|
301 |
+ |
|
302 |
+ /* start reading attributes or read remainder of attributes */ |
|
303 |
+ if (state == 1) { |
|
304 |
+ cli_msxmlmsg("msxml_parse_element: adding attributes to scanning context\n"); |
|
305 |
+ |
|
306 |
+ while ((num_attribs < MAX_ATTRIBS) && (xmlTextReaderMoveToNextAttribute(reader) == 1)) { |
|
307 |
+ attribs[num_attribs].key = xmlTextReaderConstLocalName(reader); |
|
308 |
+ attribs[num_attribs].value = xmlTextReaderConstValue(reader); |
|
309 |
+ num_attribs++; |
|
310 |
+ } |
|
311 |
+ } |
|
312 |
+ else if (state == -1) { |
|
313 |
+ return CL_EPARSE; |
|
314 |
+ } |
|
315 |
+ } |
|
287 | 316 |
|
288 | 317 |
/* check self-containment */ |
289 | 318 |
state = xmlTextReaderMoveToElement(reader); |
... | ... |
@@ -339,9 +370,40 @@ static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader, |
339 | 339 |
cli_msxmlmsg("msxml_parse_element: added json value [%s: %s]\n", keyinfo->name, (const char *)node_value); |
340 | 340 |
} |
341 | 341 |
#endif |
342 |
+ /* callback-based scanning mechanism for embedded objects (used by HWPML) */ |
|
343 |
+ if ((keyinfo->type & MSXML_SCAN_CB) && mxctx->scan_cb) { |
|
344 |
+ char name[1024]; |
|
345 |
+ char *tempfile = name; |
|
346 |
+ int of; |
|
347 |
+ size_t vlen = strlen((const char *)node_value); |
|
348 |
+ |
|
349 |
+ cli_msxmlmsg("BINARY CALLBACK DATA!\n"); |
|
350 |
+ |
|
351 |
+ if ((ret = cli_gentempfd(ctx->engine->tmpdir, &tempfile, &of)) != CL_SUCCESS) { |
|
352 |
+ cli_warnmsg("msxml_parse_element: failed to create temporary file %s\n", tempfile); |
|
353 |
+ return ret; |
|
354 |
+ } |
|
342 | 355 |
|
343 |
- /* scanning protocol for embedded objects encoded in base64 */ |
|
344 |
- if ((keyinfo->type & MSXML_SCAN_B64) || (keyinfo->type & MSXML_SCAN_B64_TRIM4)) { |
|
356 |
+ if (cli_writen(of, (char *)node_value, vlen) != vlen) { |
|
357 |
+ close(of); |
|
358 |
+ return CL_EWRITE; |
|
359 |
+ } |
|
360 |
+ |
|
361 |
+ cli_dbgmsg("msxml_parse_element: extracted binary data to %s\n", tempfile); |
|
362 |
+ |
|
363 |
+ ret = mxctx->scan_cb(of, ctx, num_attribs, attribs); |
|
364 |
+ if (!(ctx->engine->keeptmp)) |
|
365 |
+ cli_unlink(tempfile); |
|
366 |
+ free(tempfile); |
|
367 |
+ if (ret != CL_SUCCESS && (ret != CL_VIRUS || (!SCAN_ALL && ret == CL_VIRUS))) { |
|
368 |
+ return ret; |
|
369 |
+ } else if (SCAN_ALL && ret == CL_VIRUS) { |
|
370 |
+ virus = 1; |
|
371 |
+ } |
|
372 |
+ } |
|
373 |
+ |
|
374 |
+ /* scanning protocol for embedded objects encoded in base64 (used by MSXML) */ |
|
375 |
+ if (keyinfo->type & MSXML_SCAN_B64) { |
|
345 | 376 |
char name[1024]; |
346 | 377 |
char *decoded, *tempfile = name; |
347 | 378 |
size_t decodedlen; |
... | ... |
@@ -372,31 +434,7 @@ static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader, |
372 | 372 |
|
373 | 373 |
cli_dbgmsg("msxml_parse_element: extracted binary data to %s\n", tempfile); |
374 | 374 |
|
375 |
- if (keyinfo->type & MSXML_SCAN_B64_TRIM4) { |
|
376 |
- STATBUF statbuf; |
|
377 |
- fmap_t *map; |
|
378 |
- |
|
379 |
- cli_dbgmsg("msxml_parse_element: trimming 4-byte prefix from binary stream\n"); |
|
380 |
- |
|
381 |
- if (FSTAT(of, &statbuf) == -1) { |
|
382 |
- cli_errmsg("msxml_parse_element: cannot stat file descriptor\n"); |
|
383 |
- close(of); |
|
384 |
- return CL_ESTAT; |
|
385 |
- } |
|
386 |
- |
|
387 |
- map = fmap(of, 0, statbuf.st_size); |
|
388 |
- if (!map) { |
|
389 |
- cli_errmsg("msxml_parse_element: failed to fmap binary data\n"); |
|
390 |
- close(of); |
|
391 |
- return CL_EMAP; |
|
392 |
- } |
|
393 |
- |
|
394 |
- ret = cli_map_scandesc(map, 4, 0, ctx, CL_TYPE_ANY); |
|
395 |
- funmap(map); |
|
396 |
- } else { |
|
397 |
- ret = cli_magic_scandesc(of, ctx); |
|
398 |
- } |
|
399 |
- |
|
375 |
+ ret = cli_magic_scandesc(of, ctx); |
|
400 | 376 |
close(of); |
401 | 377 |
if (!(ctx->engine->keeptmp)) |
402 | 378 |
cli_unlink(tempfile); |
... | ... |
@@ -469,7 +507,7 @@ static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader, |
469 | 469 |
} |
470 | 470 |
|
471 | 471 |
/* reader intialization and closing handled by caller */ |
472 |
-int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, int mode) |
|
472 |
+int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, int mode, msxml_scan_cb scan_cb) |
|
473 | 473 |
{ |
474 | 474 |
struct msxml_ctx mxctx; |
475 | 475 |
int state, virus = 0, ret = CL_SUCCESS; |
... | ... |
@@ -478,6 +516,7 @@ int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct |
478 | 478 |
return CL_ENULLARG; |
479 | 479 |
|
480 | 480 |
mxctx.ctx = ctx; |
481 |
+ mxctx.scan_cb = scan_cb; |
|
481 | 482 |
mxctx.keys = keys; |
482 | 483 |
mxctx.num_keys = num_keys; |
483 | 484 |
#if HAVE_JSON |
... | ... |
@@ -43,12 +43,17 @@ |
43 | 43 |
#define MSXML_RECLEVEL_MAX 20 |
44 | 44 |
#define MSXML_JSON_STRLEN_MAX 128 |
45 | 45 |
|
46 |
+struct attrib_entry { |
|
47 |
+ const char *key; |
|
48 |
+ const char *value; |
|
49 |
+}; |
|
50 |
+ |
|
46 | 51 |
struct key_entry { |
47 | 52 |
/* how */ |
48 | 53 |
#define MSXML_IGNORE 0x00 |
49 | 54 |
#define MSXML_IGNORE_ELEM 0x01 |
50 |
-#define MSXML_SCAN_B64 0x02 |
|
51 |
-#define MSXML_SCAN_B64_TRIM4 0x04 |
|
55 |
+#define MSXML_SCAN_CB 0x02 |
|
56 |
+#define MSXML_SCAN_B64 0x04 |
|
52 | 57 |
/* where */ |
53 | 58 |
#define MSXML_JSON_ROOT 0x08 |
54 | 59 |
#define MSXML_JSON_WRKPTR 0x10 |
... | ... |
@@ -64,8 +69,11 @@ struct key_entry { |
64 | 64 |
uint32_t type; |
65 | 65 |
}; |
66 | 66 |
|
67 |
+typedef int (*msxml_scan_cb)(int fd, cli_ctx *ctx, int num_attribs, struct attrib_entry *attribs); |
|
68 |
+ |
|
67 | 69 |
struct msxml_ctx { |
68 | 70 |
cli_ctx *ctx; |
71 |
+ msxml_scan_cb scan_cb; |
|
69 | 72 |
const struct key_entry *keys; |
70 | 73 |
size_t num_keys; |
71 | 74 |
|
... | ... |
@@ -75,7 +83,7 @@ struct msxml_ctx { |
75 | 75 |
#endif |
76 | 76 |
}; |
77 | 77 |
|
78 |
-int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, int mode); |
|
78 |
+int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, int mode, msxml_scan_cb scan_cb); |
|
79 | 79 |
|
80 | 80 |
#endif /* HAVE_LIBXML2 */ |
81 | 81 |
|
... | ... |
@@ -126,7 +126,7 @@ static int ooxml_parse_document(int fd, cli_ctx *ctx) |
126 | 126 |
return CL_SUCCESS; // internal error from libxml2 |
127 | 127 |
} |
128 | 128 |
|
129 |
- ret = cli_msxml_parse_document(ctx, reader, ooxml_keys, num_ooxml_keys, 1); |
|
129 |
+ ret = cli_msxml_parse_document(ctx, reader, ooxml_keys, num_ooxml_keys, 1, NULL); |
|
130 | 130 |
|
131 | 131 |
if (ret != CL_SUCCESS && ret != CL_ETIMEOUT && ret != CL_BREAK) |
132 | 132 |
cli_warnmsg("ooxml_parse_document: encountered issue in parsing properties document\n"); |