Browse code

msxml_parser: flags for modifying reader usage (json, walk)

Kevin Lin authored on 2016/05/20 06:54:56
Showing 5 changed files
... ...
@@ -2079,7 +2079,7 @@ int cli_scanhwpml(cli_ctx *ctx)
2079 2079
         return ret; // libxml2 failed!
2080 2080
     }
2081 2081
 
2082
-    ret = cli_msxml_parse_document(ctx, reader, hwpml_keys, num_hwpml_keys, 1, hwpml_binary_cb);
2082
+    ret = cli_msxml_parse_document(ctx, reader, hwpml_keys, num_hwpml_keys, MSXML_FLAG_JSON, hwpml_binary_cb);
2083 2083
 
2084 2084
     xmlTextReaderClose(reader);
2085 2085
     xmlFreeTextReader(reader);
... ...
@@ -1175,7 +1175,7 @@ parseRootMHTML(mbox_ctx *mctx, message *m, text *t)
1175 1175
 	return rc;
1176 1176
     }
1177 1177
 
1178
-    ret = cli_msxml_parse_document(ctx, reader, mhtml_keys, num_mhtml_keys, 1, NULL);
1178
+    ret = cli_msxml_parse_document(ctx, reader, mhtml_keys, num_mhtml_keys, MSXML_FLAG_JSON | MSXML_FLAG_WALK, NULL);
1179 1179
     switch (ret) {
1180 1180
     case CL_SUCCESS:
1181 1181
     case CL_ETIMEOUT:
... ...
@@ -43,7 +43,7 @@
43 43
 #endif
44 44
 #include <libxml/xmlreader.h>
45 45
 
46
-#define MSXML_VERBIOSE 0
46
+#define MSXML_VERBIOSE 1
47 47
 #if MSXML_VERBIOSE
48 48
 #define cli_msxmlmsg(...) cli_dbgmsg(__VA_ARGS__)
49 49
 #else
... ...
@@ -62,15 +62,18 @@
62 62
         }                                                               \
63 63
     } while(0)
64 64
 
65
+#define track_json(mxctx) (mxctx->flags & MSXML_FLAG_JSON)
66
+
65 67
 struct msxml_ctx {
66 68
     cli_ctx *ctx;
67
-    msxml_scan_cb scan_cb;
69
+    uint32_t flags;
68 70
     const struct key_entry *keys;
69 71
     size_t num_keys;
70 72
 
73
+    msxml_scan_cb scan_cb;
71 74
 #if HAVE_JSON
72 75
     json_object *root;
73
-    int mode, toval;
76
+    int toval;
74 77
 #endif
75 78
 };
76 79
 
... ...
@@ -191,7 +194,7 @@ static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader,
191 191
         cli_dbgmsg("msxml_parse_element: reached msxml json recursion limit\n");
192 192
 
193 193
 #if HAVE_JSON
194
-        if (mxctx->mode) {
194
+        if (track_json(mxctx)) {
195 195
             int tmp = cli_json_parse_error(mxctx->root, "MSXML_RECURSIVE_LIMIT");
196 196
             if (tmp != CL_SUCCESS)
197 197
                 return tmp;
... ...
@@ -222,7 +225,7 @@ static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader,
222 222
         if (!element_name) {
223 223
             cli_dbgmsg("msxml_parse_element: element tag node nameless\n");
224 224
 #if HAVE_JSON
225
-            if (mxctx->mode) {
225
+            if (track_json(mxctx)) {
226 226
                 int tmp = cli_json_parse_error(mxctx->root, "MSXML_NAMELESS_ELEMENT");
227 227
                 if (tmp != CL_SUCCESS)
228 228
                     return tmp;
... ...
@@ -248,7 +251,7 @@ static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader,
248 248
         }
249 249
 
250 250
 #if HAVE_JSON
251
-        if (mxctx->mode && (keyinfo->type & MSXML_JSON_TRACK)) {
251
+        if (track_json(mxctx) && (keyinfo->type & MSXML_JSON_TRACK)) {
252 252
             if (keyinfo->type & MSXML_JSON_ROOT)
253 253
                 thisjobj = cli_jsonobj(mxctx->root, keyinfo->name);
254 254
             else if (keyinfo->type & MSXML_JSON_WRKPTR)
... ...
@@ -364,7 +367,7 @@ static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader,
364 364
 
365 365
         while (!endtag) {
366 366
 #if HAVE_JSON
367
-            if (mxctx->mode && (cli_json_timeout_cycle_check(mxctx->ctx, &(mxctx->toval)) != CL_SUCCESS))
367
+            if (track_json(mxctx) && (cli_json_timeout_cycle_check(mxctx->ctx, &(mxctx->toval)) != CL_SUCCESS))
368 368
                 return CL_ETIMEOUT;
369 369
 #endif
370 370
 
... ...
@@ -534,7 +537,7 @@ static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader,
534 534
 }
535 535
 
536 536
 /* reader intialization and closing handled by caller */
537
-int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, int mode, msxml_scan_cb scan_cb)
537
+int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, uint32_t flags, msxml_scan_cb scan_cb)
538 538
 {
539 539
     struct msxml_ctx mxctx;
540 540
     int state, virus = 0, ret = CL_SUCCESS;
... ...
@@ -543,28 +546,31 @@ int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct
543 543
         return CL_ENULLARG;
544 544
 
545 545
     mxctx.ctx = ctx;
546
-    mxctx.scan_cb = scan_cb;
546
+    mxctx.flags = flags;
547 547
     mxctx.keys = keys;
548 548
     mxctx.num_keys = num_keys;
549
+    mxctx.scan_cb = scan_cb;
549 550
 #if HAVE_JSON
550
-    mxctx.mode = mode;
551
-    if (mode) {
551
+    if (flags & MSXML_FLAG_JSON) {
552 552
         mxctx.root = ctx->wrkproperty;
553 553
         /* JSON Sanity Check */
554 554
         if (!mxctx.root)
555
-            mxctx.mode = 0;
555
+            mxctx.flags &= ~MSXML_FLAG_JSON;
556 556
         mxctx.toval = 0;
557 557
     }
558
+#else
559
+    mxctx.flags &= ~MSXML_FLAG_JSON;
558 560
 #endif
559 561
 
560
-    /* Error Handler */
561
-    xmlTextReaderSetErrorHandler(reader, NULL, NULL); /* xml default handler */
562
-    //xmlTextReaderSetErrorHandler(reader, msxml_error_handler, NULL);
562
+    /* Error Handler (setting handler on tree walker causes segfault) */
563
+    if (!(flags & MSXML_FLAG_WALK))
564
+        xmlTextReaderSetErrorHandler(reader, NULL, NULL); /* xml default handler */
565
+        //xmlTextReaderSetErrorHandler(reader, msxml_error_handler, NULL);
563 566
 
564 567
     /* Main Processing Loop */
565 568
     while ((state = xmlTextReaderRead(reader)) == 1) {
566 569
 #if HAVE_JSON
567
-        if (mxctx.mode && (cli_json_timeout_cycle_check(mxctx.ctx, &(mxctx.toval)) != CL_SUCCESS))
570
+        if ((mxctx.flags & MSXML_FLAG_JSON) && (cli_json_timeout_cycle_check(mxctx.ctx, &(mxctx.toval)) != CL_SUCCESS))
568 571
             return CL_ETIMEOUT;
569 572
 
570 573
         ret = msxml_parse_element(&mxctx, reader, 0, mxctx.root);
... ...
@@ -589,7 +595,7 @@ int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct
589 589
 
590 590
 #if HAVE_JSON
591 591
     /* Parse General Error Handler */
592
-    if (mxctx.mode) {
592
+    if (mxctx.flags & MSXML_FLAG_JSON) {
593 593
         int tmp = CL_SUCCESS;
594 594
 
595 595
         switch(ret) {
... ...
@@ -43,6 +43,10 @@
43 43
 #define MSXML_RECLEVEL_MAX 20
44 44
 #define MSXML_JSON_STRLEN_MAX 128
45 45
 
46
+/* reader usage flags */
47
+#define MSXML_FLAG_JSON  0x1
48
+#define MSXML_FLAG_WALK  0x2
49
+
46 50
 struct attrib_entry {
47 51
     const char *key;
48 52
     const char *value;
... ...
@@ -72,7 +76,7 @@ struct key_entry {
72 72
 
73 73
 typedef int (*msxml_scan_cb)(int fd, cli_ctx *ctx, int num_attribs, struct attrib_entry *attribs);
74 74
 
75
-int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, int mode, msxml_scan_cb scan_cb);
75
+int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, uint32_t flags, msxml_scan_cb scan_cb);
76 76
 
77 77
 #endif /* HAVE_LIBXML2 */
78 78
 
... ...
@@ -128,7 +128,7 @@ static int ooxml_parse_document(int fd, cli_ctx *ctx)
128 128
         return CL_SUCCESS; // internal error from libxml2
129 129
     }
130 130
 
131
-    ret = cli_msxml_parse_document(ctx, reader, ooxml_keys, num_ooxml_keys, 1, NULL);
131
+    ret = cli_msxml_parse_document(ctx, reader, ooxml_keys, num_ooxml_keys, MSXML_FLAG_JSON, NULL);
132 132
 
133 133
     if (ret != CL_SUCCESS && ret != CL_ETIMEOUT && ret != CL_BREAK)
134 134
         cli_warnmsg("ooxml_parse_document: encountered issue in parsing properties document\n");
... ...
@@ -375,7 +375,7 @@ static int ooxml_hwp_cb(int fd, cli_ctx *ctx)
375 375
         return CL_SUCCESS; // internal error from libxml2
376 376
     }
377 377
 
378
-    ret = cli_msxml_parse_document(ctx, reader, ooxml_hwp_keys, num_ooxml_hwp_keys, 1, NULL);
378
+    ret = cli_msxml_parse_document(ctx, reader, ooxml_hwp_keys, num_ooxml_hwp_keys, MSXML_FLAG_JSON, NULL);
379 379
 
380 380
     if (ret != CL_SUCCESS && ret != CL_ETIMEOUT && ret != CL_BREAK)
381 381
         cli_warnmsg("ooxml_hwp_cb: encountered issue in parsing properties document\n");