Browse code

msxml_parser: change method of setting callback system; add comment_cb

Kevin Lin authored on 2016/05/21 02:47:35
Showing 3 changed files
... ...
@@ -2058,6 +2058,7 @@ int cli_scanhwpml(cli_ctx *ctx)
2058 2058
 {
2059 2059
 #if HAVE_LIBXML2
2060 2060
     struct msxml_cbdata cbdata;
2061
+    struct msxml_ctx mxctx;
2061 2062
     xmlTextReaderPtr reader = NULL;
2062 2063
     int state, ret = CL_SUCCESS;
2063 2064
 
... ...
@@ -2079,7 +2080,9 @@ int cli_scanhwpml(cli_ctx *ctx)
2079 2079
         return ret; // libxml2 failed!
2080 2080
     }
2081 2081
 
2082
-    ret = cli_msxml_parse_document(ctx, reader, hwpml_keys, num_hwpml_keys, MSXML_FLAG_JSON, hwpml_binary_cb);
2082
+    memset(&mxctx, 0, sizeof(mxctx));
2083
+    mxctx.scan_cb = hwpml_binary_cb;
2084
+    ret = cli_msxml_parse_document(ctx, reader, hwpml_keys, num_hwpml_keys, MSXML_FLAG_JSON, &mxctx);
2083 2085
 
2084 2086
     xmlTextReaderClose(reader);
2085 2087
     xmlFreeTextReader(reader);
... ...
@@ -43,7 +43,7 @@
43 43
 #endif
44 44
 #include <libxml/xmlreader.h>
45 45
 
46
-#define MSXML_VERBIOSE 1
46
+#define MSXML_VERBIOSE 0
47 47
 #if MSXML_VERBIOSE
48 48
 #define cli_msxmlmsg(...) cli_dbgmsg(__VA_ARGS__)
49 49
 #else
... ...
@@ -62,15 +62,14 @@
62 62
         }                                                               \
63 63
     } while(0)
64 64
 
65
-#define track_json(mxctx) (mxctx->flags & MSXML_FLAG_JSON)
65
+#define track_json(mxctx) (mxctx->ictx->flags & MSXML_FLAG_JSON)
66 66
 
67
-struct msxml_ctx {
67
+struct msxml_ictx {
68 68
     cli_ctx *ctx;
69 69
     uint32_t flags;
70 70
     const struct key_entry *keys;
71 71
     size_t num_keys;
72 72
 
73
-    msxml_scan_cb scan_cb;
74 73
 #if HAVE_JSON
75 74
     json_object *root;
76 75
     int toval;
... ...
@@ -79,7 +78,7 @@ struct msxml_ctx {
79 79
 
80 80
 struct key_entry blank_key = { NULL, NULL, 0 };
81 81
 
82
-static const struct key_entry *msxml_check_key(struct msxml_ctx *mxctx, const xmlChar *key, size_t keylen)
82
+static const struct key_entry *msxml_check_key(struct msxml_ictx *ictx, const xmlChar *key, size_t keylen)
83 83
 {
84 84
     unsigned i;
85 85
 
... ...
@@ -88,9 +87,9 @@ static const struct key_entry *msxml_check_key(struct msxml_ctx *mxctx, const xm
88 88
         return &blank_key;
89 89
     }
90 90
 
91
-    for (i = 0; i < mxctx->num_keys; ++i) {
92
-        if (keylen == strlen(mxctx->keys[i].key) && !strncasecmp((char *)key, mxctx->keys[i].key, keylen)) {
93
-            return &mxctx->keys[i];
91
+    for (i = 0; i < ictx->num_keys; ++i) {
92
+        if (keylen == strlen(ictx->keys[i].key) && !strncasecmp((char *)key, ictx->keys[i].key, keylen)) {
93
+            return &ictx->keys[i];
94 94
         }
95 95
     }
96 96
 
... ...
@@ -178,8 +177,9 @@ static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader,
178 178
     const struct key_entry *keyinfo;
179 179
     struct attrib_entry attribs[MAX_ATTRIBS];
180 180
     int ret, virus = 0, state, node_type, endtag = 0, num_attribs = 0;
181
-    cli_ctx *ctx = mxctx->ctx;
181
+    cli_ctx *ctx = mxctx->ictx->ctx;
182 182
 #if HAVE_JSON
183
+    json_object *root = mxctx->ictx->root;
183 184
     json_object *parent = (json_object *)jptr;
184 185
     json_object *thisjobj = NULL;
185 186
 #else
... ...
@@ -195,7 +195,7 @@ static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader,
195 195
 
196 196
 #if HAVE_JSON
197 197
         if (track_json(mxctx)) {
198
-            int tmp = cli_json_parse_error(mxctx->root, "MSXML_RECURSIVE_LIMIT");
198
+            int tmp = cli_json_parse_error(root, "MSXML_RECURSIVE_LIMIT");
199 199
             if (tmp != CL_SUCCESS)
200 200
                 return tmp;
201 201
         }
... ...
@@ -226,7 +226,7 @@ static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader,
226 226
             cli_dbgmsg("msxml_parse_element: element tag node nameless\n");
227 227
 #if HAVE_JSON
228 228
             if (track_json(mxctx)) {
229
-                int tmp = cli_json_parse_error(mxctx->root, "MSXML_NAMELESS_ELEMENT");
229
+                int tmp = cli_json_parse_error(root, "MSXML_NAMELESS_ELEMENT");
230 230
                 if (tmp != CL_SUCCESS)
231 231
                     return tmp;
232 232
             }
... ...
@@ -235,7 +235,7 @@ static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader,
235 235
         }
236 236
 
237 237
         /* determine if the element is interesting */
238
-        keyinfo = msxml_check_key(mxctx, element_name, xmlStrlen(element_name));
238
+        keyinfo = msxml_check_key(mxctx->ictx, element_name, xmlStrlen(element_name));
239 239
 
240 240
         cli_msxmlmsg("key:  %s\n", keyinfo->key);
241 241
         cli_msxmlmsg("name: %s\n", keyinfo->name);
... ...
@@ -253,7 +253,7 @@ static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader,
253 253
 #if HAVE_JSON
254 254
         if (track_json(mxctx) && (keyinfo->type & MSXML_JSON_TRACK)) {
255 255
             if (keyinfo->type & MSXML_JSON_ROOT)
256
-                thisjobj = cli_jsonobj(mxctx->root, keyinfo->name);
256
+                thisjobj = cli_jsonobj(root, keyinfo->name);
257 257
             else if (keyinfo->type & MSXML_JSON_WRKPTR)
258 258
                 thisjobj = cli_jsonobj(parent, keyinfo->name);
259 259
 
... ...
@@ -367,7 +367,7 @@ static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader,
367 367
 
368 368
         while (!endtag) {
369 369
 #if HAVE_JSON
370
-            if (track_json(mxctx) && (cli_json_timeout_cycle_check(mxctx->ctx, &(mxctx->toval)) != CL_SUCCESS))
370
+            if (track_json(mxctx) && (cli_json_timeout_cycle_check(ctx, &(mxctx->ictx->toval)) != CL_SUCCESS))
371 371
                 return CL_ETIMEOUT;
372 372
 #endif
373 373
 
... ...
@@ -481,6 +481,31 @@ static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader,
481 481
                 check_state(state);
482 482
                 break;
483 483
 
484
+            case XML_READER_TYPE_COMMENT:
485
+                node_value = xmlTextReaderConstValue(reader);
486
+
487
+                cli_msxmlmsg("COMMENT: %s\n", node_value);
488
+
489
+                /* callback-based scanning mechanism for comments (used by MHTML) */
490
+                if ((keyinfo->type & MSXML_COMMENT_CB) && mxctx->comment_cb) {
491
+#if HAVE_JSON
492
+                    ret = mxctx->comment_cb((const char *)node_value, ctx, thisjobj);
493
+#else
494
+                    ret = mxctx->comment_cb((const char *)node_value, ctx, NULL);
495
+#endif
496
+                    if (ret != CL_SUCCESS && (ret != CL_VIRUS || (!SCAN_ALL && ret == CL_VIRUS))) {
497
+                        return ret;
498
+                    } else if (SCAN_ALL && ret == CL_VIRUS) {
499
+                        virus = 1;
500
+                    }
501
+
502
+                }
503
+
504
+                /* advance to next node */
505
+                state = xmlTextReaderRead(reader);
506
+                check_state(state);
507
+                break;
508
+
484 509
             case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
485 510
                 /* advance to next node */
486 511
                 state = xmlTextReaderRead(reader);
... ...
@@ -513,9 +538,8 @@ static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader,
513 513
 
514 514
                 cli_dbgmsg("msxml_parse_element: unhandled xml secondary node %s [%d]: %s\n", node_name, node_type, node_value);
515 515
 
516
-                state = xmlTextReaderNext(reader);
516
+                state = xmlTextReaderRead(reader);
517 517
                 check_state(state);
518
-                return (virus ? CL_VIRUS : CL_SUCCESS);
519 518
             }
520 519
         }
521 520
 
... ...
@@ -537,30 +561,36 @@ static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader,
537 537
 }
538 538
 
539 539
 /* reader intialization and closing handled by caller */
540
-int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, uint32_t flags, msxml_scan_cb scan_cb)
540
+int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, uint32_t flags, struct msxml_ctx *mxctx)
541 541
 {
542
-    struct msxml_ctx mxctx;
542
+    struct msxml_ctx reserve;
543
+    struct msxml_ictx ictx;
543 544
     int state, virus = 0, ret = CL_SUCCESS;
544 545
 
545 546
     if (!ctx)
546 547
         return CL_ENULLARG;
547 548
 
548
-    mxctx.ctx = ctx;
549
-    mxctx.flags = flags;
550
-    mxctx.keys = keys;
551
-    mxctx.num_keys = num_keys;
552
-    mxctx.scan_cb = scan_cb;
549
+    if (!mxctx) {
550
+        memset(&reserve, 0, sizeof(reserve));
551
+        mxctx = &reserve;
552
+    }
553
+
554
+    ictx.ctx = ctx;
555
+    ictx.flags = flags;
556
+    ictx.keys = keys;
557
+    ictx.num_keys = num_keys;
553 558
 #if HAVE_JSON
554 559
     if (flags & MSXML_FLAG_JSON) {
555
-        mxctx.root = ctx->wrkproperty;
560
+        ictx.root = ctx->wrkproperty;
556 561
         /* JSON Sanity Check */
557
-        if (!mxctx.root)
558
-            mxctx.flags &= ~MSXML_FLAG_JSON;
559
-        mxctx.toval = 0;
562
+        if (!ictx.root)
563
+            ictx.flags &= ~MSXML_FLAG_JSON;
564
+        ictx.toval = 0;
560 565
     }
561 566
 #else
562
-    mxctx.flags &= ~MSXML_FLAG_JSON;
567
+    ictx.flags &= ~MSXML_FLAG_JSON;
563 568
 #endif
569
+    mxctx->ictx = &ictx;
564 570
 
565 571
     /* Error Handler (setting handler on tree walker causes segfault) */
566 572
     if (!(flags & MSXML_FLAG_WALK))
... ...
@@ -570,12 +600,12 @@ int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct
570 570
     /* Main Processing Loop */
571 571
     while ((state = xmlTextReaderRead(reader)) == 1) {
572 572
 #if HAVE_JSON
573
-        if ((mxctx.flags & MSXML_FLAG_JSON) && (cli_json_timeout_cycle_check(mxctx.ctx, &(mxctx.toval)) != CL_SUCCESS))
573
+        if ((ictx.flags & MSXML_FLAG_JSON) && (cli_json_timeout_cycle_check(ictx.ctx, &(ictx.toval)) != CL_SUCCESS))
574 574
             return CL_ETIMEOUT;
575 575
 
576
-        ret = msxml_parse_element(&mxctx, reader, 0, mxctx.root);
576
+        ret = msxml_parse_element(mxctx, reader, 0, ictx.root);
577 577
 #else
578
-        ret = msxml_parse_element(&mxctx, reader, 0, NULL);
578
+        ret = msxml_parse_element(mxctx, reader, 0, NULL);
579 579
 #endif
580 580
         if (ret == CL_SUCCESS);
581 581
         else if (SCAN_ALL && ret == CL_VIRUS) {
... ...
@@ -595,7 +625,7 @@ int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct
595 595
 
596 596
 #if HAVE_JSON
597 597
     /* Parse General Error Handler */
598
-    if (mxctx.flags & MSXML_FLAG_JSON) {
598
+    if (ictx.flags & MSXML_FLAG_JSON) {
599 599
         int tmp = CL_SUCCESS;
600 600
 
601 601
         switch(ret) {
... ...
@@ -603,22 +633,22 @@ int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct
603 603
         case CL_BREAK: /* OK */
604 604
             break;
605 605
         case CL_VIRUS:
606
-            tmp = cli_json_parse_error(mxctx.root, "MSXML_INTR_VIRUS");
606
+            tmp = cli_json_parse_error(ictx.root, "MSXML_INTR_VIRUS");
607 607
             break;
608 608
         case CL_ETIMEOUT:
609
-            tmp = cli_json_parse_error(mxctx.root, "MSXML_INTR_TIMEOUT");
609
+            tmp = cli_json_parse_error(ictx.root, "MSXML_INTR_TIMEOUT");
610 610
             break;
611 611
         case CL_EPARSE:
612
-            tmp = cli_json_parse_error(mxctx.root, "MSXML_ERROR_XMLPARSER");
612
+            tmp = cli_json_parse_error(ictx.root, "MSXML_ERROR_XMLPARSER");
613 613
             break;
614 614
         case CL_EMEM:
615
-            tmp = cli_json_parse_error(mxctx.root, "MSXML_ERROR_OUTOFMEM");
615
+            tmp = cli_json_parse_error(ictx.root, "MSXML_ERROR_OUTOFMEM");
616 616
             break;
617 617
         case CL_EFORMAT:
618
-            tmp = cli_json_parse_error(mxctx.root, "MSXML_ERROR_MALFORMED");
618
+            tmp = cli_json_parse_error(ictx.root, "MSXML_ERROR_MALFORMED");
619 619
             break;
620 620
         default:
621
-            tmp = cli_json_parse_error(mxctx.root, "MSXML_ERROR_OTHER");
621
+            tmp = cli_json_parse_error(ictx.root, "MSXML_ERROR_OTHER");
622 622
             break;
623 623
         }
624 624
 
... ...
@@ -30,7 +30,6 @@
30 30
 #endif
31 31
 
32 32
 #include "others.h"
33
-#include "json_api.h"
34 33
 
35 34
 #ifdef _WIN32
36 35
 #ifndef LIBXML_WRITER_ENABLED
... ...
@@ -47,6 +46,8 @@
47 47
 #define MSXML_FLAG_JSON  0x1
48 48
 #define MSXML_FLAG_WALK  0x2
49 49
 
50
+struct msxml_ictx;
51
+
50 52
 struct attrib_entry {
51 53
     const char *key;
52 54
     const char *value;
... ...
@@ -58,16 +59,17 @@ struct key_entry {
58 58
 #define MSXML_IGNORE_ELEM     0x1
59 59
 #define MSXML_SCAN_CB         0x2
60 60
 #define MSXML_SCAN_B64        0x4
61
+#define MSXML_COMMENT_CB      0x8
61 62
 /* where */
62
-#define MSXML_JSON_ROOT       0x8
63
-#define MSXML_JSON_WRKPTR     0x10
64
-#define MSXML_JSON_MULTI      0x20
63
+#define MSXML_JSON_ROOT       0x10
64
+#define MSXML_JSON_WRKPTR     0x20
65
+#define MSXML_JSON_MULTI      0x40
65 66
 
66 67
 #define MSXML_JSON_TRACK (MSXML_JSON_ROOT | MSXML_JSON_WRKPTR)
67 68
 /* what */
68
-#define MSXML_JSON_COUNT      0x40
69
-#define MSXML_JSON_VALUE      0x80
70
-#define MSXML_JSON_ATTRIB     0x100
69
+#define MSXML_JSON_COUNT      0x100
70
+#define MSXML_JSON_VALUE      0x200
71
+#define MSXML_JSON_ATTRIB     0x400
71 72
 
72 73
     const char *key;
73 74
     const char *name;
... ...
@@ -75,8 +77,15 @@ struct key_entry {
75 75
 };
76 76
 
77 77
 typedef int (*msxml_scan_cb)(int fd, cli_ctx *ctx, int num_attribs, struct attrib_entry *attribs);
78
+typedef int (*msxml_comment_cb)(const char *comment, cli_ctx *ctx, void *wrkjobj);
79
+
80
+struct msxml_ctx {
81
+    msxml_scan_cb scan_cb;
82
+    msxml_comment_cb comment_cb;
83
+    struct msxml_ictx *ictx;
84
+};
78 85
 
79
-int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, uint32_t flags, msxml_scan_cb scan_cb);
86
+int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, uint32_t flags, struct msxml_ctx *mxctx);
80 87
 
81 88
 #endif /* HAVE_LIBXML2 */
82 89