Browse code

ooxml: moved value parsing into separate function ooxml: cleaned up comments ooxml: restructured code for continued parsing after excedding rlvl limit

Kevin Lin authored on 2014/07/01 06:26:48
Showing 1 changed files
... ...
@@ -52,21 +52,41 @@
52 52
 #define OOXML_JSON_RECLEVEL_MAX 5
53 53
 #define OOXML_JSON_STRLEN_MAX 100
54 54
 
55
-static int ooxml_is_int(const char *value, size_t len, int32_t *val2)
55
+static int ooxml_is_int(const char *value, size_t len, int32_t *val)
56 56
 {
57
-    long val3;
57
+    long val2;
58 58
     char *endptr = NULL;
59 59
 
60
-    val3 = strtol(value, &endptr, 10);
60
+    val2 = strtol(value, &endptr, 10);
61 61
     if (endptr != value+len) {
62 62
         return 0;
63 63
     }
64 64
 
65
-    *val2 = (int32_t)(val3 & 0x0000ffff);
65
+    *val = (int32_t)(val2 & 0x0000ffff);
66 66
 
67 67
     return 1;
68 68
 }
69 69
 
70
+static int ooxml_parse_value(json_object *wrkptr, const char *element_tag, const xmlChar *node_value)
71
+{
72
+    int ret = CL_SUCCESS;
73
+    int val;
74
+
75
+    if (ooxml_is_int(node_value, xmlStrlen(node_value), &val)) {
76
+        ret = cli_jsonint(wrkptr, element_tag, val);
77
+    }
78
+    else if (!xmlStrcmp(node_value, "true")) {
79
+        ret = cli_jsonbool(wrkptr, element_tag, 1);
80
+    }
81
+    else if (!xmlStrcmp(node_value, "false")) {
82
+        ret = cli_jsonbool(wrkptr, element_tag, 0);
83
+    }
84
+    else {
85
+        ret = cli_jsonstr(wrkptr, element_tag, node_value);
86
+    }
87
+    return ret;
88
+}
89
+
70 90
 static const char *ooxml_keys[] = {
71 91
     "coreproperties",
72 92
     "title",
... ...
@@ -178,23 +198,22 @@ static const char *ooxml_check_key(const char* key, size_t keylen)
178 178
     return NULL;
179 179
 }
180 180
 
181
-static int ooxml_parse_element(cli_ctx *ctx, xmlTextReaderPtr reader, json_object *wrkptr, int rlvl, int skip)
181
+static int ooxml_parse_element(cli_ctx *ctx, xmlTextReaderPtr reader, json_object *wrkptr, int rlvl)
182 182
 {
183 183
     const char *element_tag = NULL, *end_tag = NULL;
184 184
     const xmlChar *node_name = NULL, *node_value = NULL;
185
-    json_object *njptr;
186
-    int node_type, ret = CL_SUCCESS, toval = 0;;
187
-    int32_t val2;
185
+    json_object *thisjobj;
186
+    int node_type, ret = CL_SUCCESS, endtag = 0, toval = 0;
188 187
 
189 188
     cli_dbgmsg("in ooxml_parse_element @ layer %d\n", rlvl);
190 189
 
191 190
     /* check recursion level */
192 191
     if (rlvl >= OOXML_JSON_RECLEVEL_MAX) {
193
-        return CL_EMAXREC;
194
-    }
195
-
196
-    if (wrkptr == NULL) {
197
-        skip = 1;
192
+        cli_dbgmsg("ooxml_parse_element: reached ooxml json recursion limit\n", node_name);
193
+        /* skip it */
194
+        xmlTextReaderNext(reader);
195
+        //return CL_EMAXREC;
196
+        return CL_SUCCESS;
198 197
     }
199 198
 
200 199
     /* acquire element type */
... ...
@@ -204,7 +223,6 @@ static int ooxml_parse_element(cli_ctx *ctx, xmlTextReaderPtr reader, json_objec
204 204
         return CL_EPARSE; /* first type is not an element */
205 205
     }
206 206
 
207
-    /* acquire element tag */
208 207
     node_name = xmlTextReaderConstLocalName(reader);
209 208
     if (!node_name) {
210 209
         cli_dbgmsg("ooxml_parse_element: element tag node nameless\n");
... ...
@@ -213,13 +231,19 @@ static int ooxml_parse_element(cli_ctx *ctx, xmlTextReaderPtr reader, json_objec
213 213
     element_tag = ooxml_check_key(node_name, xmlStrlen(node_name));
214 214
     if (!element_tag) {
215 215
         cli_dbgmsg("ooxml_parse_element: invalid element tag [%s]\n", node_name);
216
-        skip = 1; /* skipping element */
216
+        /* skip it */
217
+        xmlTextReaderNext(reader);
218
+        return CL_SUCCESS;
217 219
     }
218 220
 
219 221
     /* handle attributes if you want */
220 222
 
221
-    /* loop across all element contents */
222
-    while (xmlTextReaderRead(reader) == 1) {
223
+    /* advance to first content node */
224
+    if (xmlTextReaderRead(reader) != 1)
225
+        return CL_EPARSE;
226
+
227
+    /* parse until the end element tag */
228
+    while (!endtag) {
223 229
         if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) {
224 230
             return CL_ETIMEOUT;
225 231
         }
... ...
@@ -227,23 +251,19 @@ static int ooxml_parse_element(cli_ctx *ctx, xmlTextReaderPtr reader, json_objec
227 227
         node_type = xmlTextReaderNodeType(reader);
228 228
         switch (node_type) {
229 229
         case XML_READER_TYPE_ELEMENT:
230
-            if (!skip) {
231
-                njptr = cli_jsonobj(wrkptr, element_tag);
232
-                if (!njptr) {
233
-                    cli_errmsg("ooxml_parse_element: failed to retrieve node for json object [%s]\n", element_tag);
234
-                    return CL_EFORMAT;
235
-                }
236
-                cli_dbgmsg("ooxml_parse_element: added json object [%s]\n", element_tag);
230
+            /* generate json object node */
231
+            thisjobj = cli_jsonobj(wrkptr, element_tag);
232
+            if (!thisjobj) {
233
+                return CL_EPARSE;
237 234
             }
238
-            else {
239
-                njptr = NULL;
240
-            } 
235
+            cli_dbgmsg("ooxml_parse_element: retrieved json object [%s]\n", element_tag);
241 236
 
242
-            ret = ooxml_parse_element(ctx, reader, njptr, rlvl+1, skip);
237
+            ret = ooxml_parse_element(ctx, reader, thisjobj, rlvl+1);
243 238
             if (ret != CL_SUCCESS) {
244 239
                 return ret;
245 240
             }
246 241
             break;
242
+
247 243
         case XML_READER_TYPE_END_ELEMENT:
248 244
             cli_dbgmsg("in ooxml_parse_element @ layer %d closed\n", rlvl);
249 245
             node_name = xmlTextReaderConstLocalName(reader);
... ...
@@ -251,49 +271,39 @@ static int ooxml_parse_element(cli_ctx *ctx, xmlTextReaderPtr reader, json_objec
251 251
                 cli_dbgmsg("ooxml_parse_element: element end tag node nameless\n");
252 252
                 return CL_EPARSE; /* no name, nameless */
253 253
             }
254
-            if (!skip) {
255
-                end_tag = ooxml_check_key(node_name, xmlStrlen(node_name));
256
-                if (!end_tag) {
257
-                    cli_dbgmsg("ooxml_parse_element: invalid element end tag [%s]\n", node_name);
258
-                    return CL_EFORMAT; /* unrecognized element tag */
259
-                }
260
-                if (strncmp(element_tag, end_tag, strlen(element_tag))) {
261
-                    cli_dbgmsg("ooxml_parse_element: element tag does not match end tag\n");
262
-                    return CL_EFORMAT;
263
-                }
254
+
255
+            end_tag = ooxml_check_key(node_name, xmlStrlen(node_name));
256
+            if (!end_tag) {
257
+                cli_dbgmsg("ooxml_parse_element: invalid element end tag [%s]\n", node_name);
258
+                return CL_EFORMAT; /* unrecognized element tag */
259
+            }
260
+            if (strncmp(element_tag, end_tag, strlen(element_tag))) {
261
+                cli_dbgmsg("ooxml_parse_element: element tag does not match end tag\n");
262
+                return CL_EFORMAT;
264 263
             }
265
-            return CL_SUCCESS;
264
+
265
+            /* advance to next element tag */
266
+            if (xmlTextReaderRead(reader) != 1)
267
+                return CL_EPARSE;
268
+
269
+            endtag = 1;
270
+            break;
271
+
266 272
         case XML_READER_TYPE_TEXT:
267
-            if (!skip) {
268
-                node_value = xmlTextReaderConstValue(reader);
273
+            node_value = xmlTextReaderConstValue(reader);
269 274
 
270
-                if (ooxml_is_int(node_value, xmlStrlen(node_value), &val2)) {
271
-                    ret = cli_jsonint(wrkptr, element_tag, val2);
272
-                }
273
-                else if (!xmlStrcmp(node_value, "true")) {
274
-                    ret = cli_jsonbool(wrkptr, element_tag, 1);
275
-                }
276
-                else if (!xmlStrcmp(node_value, "false")) {
277
-                    ret = cli_jsonbool(wrkptr, element_tag, 0);
278
-                }
279
-                else {
280
-                    ret = cli_jsonstr(wrkptr, element_tag, node_value);
281
-                }
275
+            ret = ooxml_parse_value(wrkptr, element_tag, node_value);
276
+            if (ret != CL_SUCCESS)
277
+                return ret;
282 278
 
283
-                if (ret != CL_SUCCESS)
284
-                    return ret;
279
+            cli_dbgmsg("ooxml_parse_element: added json value [%s: %s]\n", element_tag, node_value);
285 280
 
286
-                cli_dbgmsg("ooxml_parse_element: added json value [%s: %s]\n", element_tag, node_value);
287
-            }
288
-#if OOXML_DEBUG
289
-            else {
290
-                node_name = xmlTextReaderConstLocalName(reader);
291
-                node_value = xmlTextReaderConstValue(reader);
281
+            /* advance to next element tag */
282
+            if (xmlTextReaderRead(reader) != 1)
283
+                return CL_EPARSE;
292 284
 
293
-                cli_dbgmsg("ooxml_parse_element: not adding xml node %s [%d]: %s\n", node_name, node_type, node_value);
294
-            }
295
-#endif
296 285
             break;
286
+
297 287
         default:
298 288
 #if OOXML_DEBUG
299 289
             node_name = xmlTextReaderConstLocalName(reader);
... ...
@@ -326,7 +336,7 @@ static int ooxml_parse_document(int fd, cli_ctx *ctx)
326 326
         return CL_SUCCESS; /* libxml2 failed */
327 327
     }
328 328
 
329
-    ret = ooxml_parse_element(ctx, reader, ctx->wrkproperty, 0, 0);
329
+    ret = ooxml_parse_element(ctx, reader, ctx->wrkproperty, 0);
330 330
 
331 331
     xmlTextReaderClose(reader);
332 332
     xmlFreeTextReader(reader);
... ...
@@ -351,6 +361,7 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
351 351
 {
352 352
     int ret = CL_SUCCESS, tmp, toval = 0;
353 353
     int core=0, extn=0, cust=0, dsig=0;
354
+    int mcore=0, mextn=0, mcust=0;
354 355
     const xmlChar *name, *value, *CT, *PN;
355 356
     xmlTextReaderPtr reader = NULL;
356 357
     uint32_t loff;
... ...
@@ -404,13 +415,14 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
404 404
                 }
405 405
                 else if (tmp != CL_VIRUS) {
406 406
                     cli_dbgmsg("cli_process_ooxml: failed to find core properties file \"%s\"!\n", PN);
407
+                    mcore++;
407 408
                 }
408 409
                 else {
409 410
                     cli_dbgmsg("ooxml_content_cb: found core properties file \"%s\" @ %x\n", PN, loff);
410 411
                     ret = unzip_single_internal(ctx, loff, ooxml_core_cb);
412
+                    core++;
411 413
                 }
412 414
             }
413
-            core++;
414 415
         }
415 416
         else if (!xmlStrcmp(CT, "application/vnd.openxmlformats-officedocument.extended-properties+xml")) {
416 417
             if (!extn) {
... ...
@@ -421,13 +433,14 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
421 421
                 }
422 422
                 else if (tmp != CL_VIRUS) {
423 423
                     cli_dbgmsg("cli_process_ooxml: failed to find extended properties file \"%s\"!\n", PN);
424
+                    mextn++;
424 425
                 }
425 426
                 else {
426 427
                     cli_dbgmsg("ooxml_content_cb: found extended properties file \"%s\" @ %x\n", PN, loff);
427 428
                     ret = unzip_single_internal(ctx, loff, ooxml_extn_cb);
429
+                    extn++;
428 430
                 }
429 431
             }
430
-            extn++;
431 432
         }
432 433
         else if (!xmlStrcmp(CT, "application/vnd.openxmlformats-officedocument.custom-properties+xml")) {
433 434
             if (!cust) {
... ...
@@ -438,13 +451,14 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
438 438
                 }
439 439
                 else if (tmp != CL_VIRUS) {
440 440
                     cli_dbgmsg("cli_process_ooxml: failed to find custom properties file \"%s\"!\n", PN);
441
+                    mcust++;
441 442
                 }
442 443
                 else {
443 444
                     cli_dbgmsg("ooxml_content_cb: found custom properties file \"%s\" @ %x\n", PN, loff);
445
+                    cust++;
444 446
                     //ret = unzip_single_internal(ctx, loff, ooxml_cust_cb);
445 447
                 }
446 448
             }
447
-            cust++;
448 449
         }
449 450
         else if (!xmlStrcmp(CT, "application/vnd.openxmlformats-package.digital-signature-xmlsignature+xml")) {
450 451
             dsig++;
... ...
@@ -454,24 +468,27 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
454 454
             goto ooxml_content_exit;
455 455
     }
456 456
 
457
-    if (core) {
457
+    if (core)
458 458
         cli_jsonint(ctx->wrkproperty, "CorePropertiesFileCount", core);
459
-    }
460
-    else {
459
+    else if (!mcore)
461 460
         cli_dbgmsg("cli_process_ooxml: file does not contain core properties file\n");
462
-    }
463
-    if (extn) {
461
+    if (mcore)
462
+        cli_jsonint(ctx->wrkproperty, "CorePropertiesMissingFileCount", core);
463
+
464
+    if (extn)
464 465
         cli_jsonint(ctx->wrkproperty, "ExtendedPropertiesFileCount", extn);
465
-    }
466
-    else {
466
+    else if (!mextn)
467 467
         cli_dbgmsg("cli_process_ooxml: file does not contain extended properties file\n");
468
-    }
469
-    if (cust) {
468
+    if (mextn)
469
+        cli_jsonint(ctx->wrkproperty, "ExtendedPropertiesMissingFileCount", extn);
470
+
471
+    if (cust)
470 472
         cli_jsonint(ctx->wrkproperty, "CustomPropertiesFileCount", cust);
471
-    }
472
-    else {
473
+    else if (!mcust)
473 474
         cli_dbgmsg("cli_process_ooxml: file does not contain custom properties file\n");
474
-    }
475
+    if (mcust)
476
+        cli_jsonint(ctx->wrkproperty, "CustomPropertiesMissingFileCount", cust);
477
+
475 478
     if (dsig) {
476 479
         cli_jsonint(ctx->wrkproperty, "DigitalSignaturesCount", dsig);
477 480
     }