Browse code

bb12133: Wrapping cli_strntol to provide easy error detection. Applying cli_strntol_wrap with error checking. Adding logic to identify when a parsing error is in fact a new revision of the PDF.

Micah Snyder authored on 2018/06/03 09:58:35
Showing 3 changed files
... ...
@@ -207,7 +207,7 @@ int pdf_findobj(struct pdf_struct *pdf)
207 207
     const char *start, *q, *q2, *q3, *eof;
208 208
     struct pdf_obj *obj;
209 209
     off_t bytesleft;
210
-    unsigned genid, objid;
210
+    unsigned long genid, objid;
211 211
 
212 212
     pdf->nobjs++;
213 213
     pdf->objs = cli_realloc2(pdf->objs, sizeof(*pdf->objs)*pdf->nobjs);
... ...
@@ -243,12 +243,43 @@ int pdf_findobj(struct pdf_struct *pdf)
243 243
     while (q > start && isdigit(*q))
244 244
         q--;
245 245
 
246
-    genid = (unsigned int)cli_strntol(q, (size_t)bytesleft, NULL, 10);
246
+    if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, (long*)&genid)) {
247
+        cli_dbgmsg("cli_pdf: Failed to parse object genid (%u)\n", pdf->nobjs);
248
+        return -1;
249
+    }
247 250
     q = findNextNonWSBack(q-1,start);
248 251
     while (q > start && isdigit(*q))
249 252
         q--;
250 253
 
251
-    objid = (unsigned int)cli_strntol(q, (size_t)bytesleft, NULL, 10);
254
+    if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, (long*)&objid)) {
255
+        /*
256
+         * PDFs with multiple revisions will have %%EOF before the end of the file, 
257
+         * followed by the next revision of the PDF.  If this is the case, we can 
258
+         * detect it and continue parsing after the %%EOF.
259
+         */
260
+        if (q - 4 > start) {
261
+            const char* lastfile = q - 4;
262
+            if (0 != strncmp(lastfile, "\%\%EOF", 5)) {
263
+                /* Nope, wasn't %%EOF, I guess just fail out. */
264
+                cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs);
265
+                return -1;
266
+            }
267
+            /* Yup, Looks, like the file continues after %%EOF.  
268
+             * Probably another revision.  Keep parsing... */
269
+            q++;
270
+            cli_dbgmsg("cli_pdf: \%\%EOF detected before end of file, at %zu\n", (size_t)q);
271
+        } else {
272
+            /* Failed parsing at the very beginning */
273
+            cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs);
274
+            return -1;
275
+        }
276
+        /* Try again, with offset slightly adjusted */
277
+        if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, (long*)&objid)) {
278
+            cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs);
279
+            return -1;
280
+        }
281
+        cli_dbgmsg("cli_pdf: There appears to be an additional revision. Continuing to parse...\n");
282
+    }
252 283
     obj->id = (objid << 8) | (genid&0xff);
253 284
     obj->start = q2+4 - pdf->map;
254 285
     obj->flags = 0;
... ...
@@ -406,7 +437,7 @@ struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t o
406 406
 
407 407
 static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *start, off_t len)
408 408
 {
409
-    int length;
409
+    unsigned long length;
410 410
     const char *q;
411 411
 
412 412
     q = cli_memstr(start, len, "/Length", 7);
... ...
@@ -421,17 +452,24 @@ static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
421 421
 
422 422
     /* len -= start - q; */
423 423
     q = start;
424
-    length = (int)cli_strntol(q, (size_t)len, NULL, 10);
424
+    if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)len, 0, 10, (long*)&length)) {
425
+        cli_dbgmsg("cli_pdf: failed to parse object length\n");
426
+        return 0;
427
+    }
428
+
425 429
     while (isdigit(*q) && len > 0) {
426 430
         q++;
427 431
         len--;
428 432
     }
429 433
 
430 434
     if (*q == ' ' && len > 0) {
431
-        int genid;
435
+        unsigned long genid;
432 436
         q++;
433 437
         len--;
434
-        genid = (int)cli_strntol(q, (size_t)len, NULL, 10);
438
+        if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)len, 0, 10, (long*)&genid)) {
439
+            cli_dbgmsg("cli_pdf: failed to parse object genid\n");
440
+            return 0;
441
+        }
435 442
 
436 443
         while(isdigit(*q) && len > 0) {
437 444
             q++;
... ...
@@ -439,7 +477,7 @@ static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
439 439
         }
440 440
 
441 441
         if (q[0] == ' ' && q[1] == 'R') {
442
-            cli_dbgmsg("cli_pdf: length is in indirect object %u %u\n", length, genid);
442
+            cli_dbgmsg("cli_pdf: length is in indirect object %lu %lu\n", length, genid);
443 443
 
444 444
             obj = find_obj(pdf, obj, (length << 8) | (genid&0xff));
445 445
             if (!obj) {
... ...
@@ -453,7 +491,10 @@ static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
453 453
                 return 0;
454 454
             }
455 455
 
456
-            length = (int)cli_strntol(q, (size_t)len, NULL, 10);
456
+            if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)len, 0, 10, (long*)&length)) {
457
+                cli_dbgmsg("cli_pdf: failed to parse object length from indirect object\n");
458
+                return 0;
459
+            }
457 460
         }
458 461
     }
459 462
 
... ...
@@ -1283,7 +1324,8 @@ static int pdf_readint(const char *q0, int len, const char *key);
1283 1283
 static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len)
1284 1284
 {
1285 1285
     const char *q, *q2;
1286
-    uint32_t objid;
1286
+    unsigned long objid;
1287
+    unsigned long genid;
1287 1288
 
1288 1289
     if (len >= 16 && !strncmp(enc, "/EncryptMetadata", 16)) {
1289 1290
         q = cli_memstr(enc+16, len-16, "/Encrypt", 8);
... ...
@@ -1302,19 +1344,27 @@ static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len)
1302 1302
     len -= q2 - q;
1303 1303
     q = q2;
1304 1304
 
1305
-    objid = (uint32_t)cli_strntol(q2, (size_t)len, NULL, 10) << 8;
1305
+    if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)len, 0, 10, (long*)&objid)) {
1306
+        cli_dbgmsg("cli_pdf: Found Encrypt dictionary but failed to parse objid\n");
1307
+        return;
1308
+    }
1309
+    objid = objid << 8;
1306 1310
     q2 = pdf_nextobject(q, len);
1307 1311
     if (!q2 || !isdigit(*q2))
1308 1312
         return;
1309 1313
     len -= q2 - q;
1310 1314
     q = q2;
1311 1315
 
1312
-    objid |= (uint32_t)cli_strntol(q2, (size_t)len, NULL, 10) & 0xff;
1316
+    if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)len, 0, 10, (long*)&genid)) {
1317
+        cli_dbgmsg("cli_pdf: Found Encrypt dictionary but failed to parse genid\n");
1318
+        return;
1319
+    }
1320
+    objid |= genid & 0xff; 
1313 1321
     q2 = pdf_nextobject(q, len);
1314 1322
     if (!q2 || *q2 != 'R')
1315 1323
         return;
1316 1324
 
1317
-    cli_dbgmsg("cli_pdf: Encrypt dictionary in obj %d %d\n", objid>>8, objid&0xff);
1325
+    cli_dbgmsg("cli_pdf: Encrypt dictionary in obj %lu %lu\n", objid>>8, objid&0xff);
1318 1326
 
1319 1327
     pdf->enc_objid = objid;
1320 1328
 }
... ...
@@ -1569,9 +1619,17 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
1569 1569
             q2 = pdf_nextobject(q, dict_remaining);
1570 1570
             if (q2 && isdigit(*q2)) {
1571 1571
                 const char * q2_old = NULL;
1572
+                unsigned long objid;
1573
+                unsigned long genid;
1574
+
1572 1575
                 dict_remaining -= (off_t)(q2 - q);
1573 1576
 
1574
-                uint32_t objid = (uint32_t)cli_strntol(q2, (size_t)dict_remaining, NULL, 10) << 8;
1577
+                if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)dict_remaining, 0, 10, (long*)&objid)) {
1578
+                    cli_dbgmsg("cli_pdf: failed to parse object objid\n");
1579
+                    return;
1580
+                }
1581
+                objid = objid << 8;
1582
+
1575 1583
                 while (isdigit(*q2))
1576 1584
                     q2++;
1577 1585
 
... ...
@@ -1579,13 +1637,17 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
1579 1579
                 q2 = pdf_nextobject(q2, dict_remaining);
1580 1580
                 if (q2 && isdigit(*q2)) {
1581 1581
                     dict_remaining -= (off_t)(q2 - q2_old);
1582
-                    objid |= (uint32_t)cli_strntol(q2, (size_t)dict_remaining, NULL, 10) & 0xff;
1582
+                    if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)dict_remaining, 0, 10, (long*)&genid)) {
1583
+                        cli_dbgmsg("cli_pdf: failed to parse object genid\n");
1584
+                        return;
1585
+                    }
1586
+                    objid |= genid & 0xff;
1583 1587
 
1584 1588
                     q2 = pdf_nextobject(q2, dict_remaining);
1585 1589
                     if (q2 && *q2 == 'R') {
1586 1590
                         struct pdf_obj *obj2;
1587 1591
 
1588
-                        cli_dbgmsg("cli_pdf: found %s stored in indirect object %u %u\n", pdfname, objid >> 8, objid&0xff);
1592
+                        cli_dbgmsg("cli_pdf: found %s stored in indirect object %lu %lu\n", pdfname, objid >> 8, objid&0xff);
1589 1593
                         obj2 = find_obj(pdf, obj, objid);
1590 1594
                         if (obj2) {
1591 1595
                             enum pdf_objflags flag =
... ...
@@ -1879,9 +1941,16 @@ static char *pdf_readval(const char *q, int len, const char *key)
1879 1879
 
1880 1880
 static int pdf_readint(const char *q0, int len, const char *key)
1881 1881
 {
1882
+    long value = 0;
1882 1883
     const char *q  = pdf_getdict(q0, &len, key);
1883 1884
 
1884
-    return (q != NULL) ? (int)cli_strntol(q, (size_t)len, NULL, 10) : -1;
1885
+    if (q == NULL) {
1886
+        value = -1;
1887
+    }
1888
+    else if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)len, 0, 10, &value)) {
1889
+        value = -1;
1890
+    }
1891
+    return value;
1885 1892
 }
1886 1893
 
1887 1894
 static int pdf_readbool(const char *q0, int len, const char *key, int Default)
... ...
@@ -2425,16 +2494,21 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
2425 2425
 
2426 2426
             while (q < eof && (*q == ' ' || *q == '\n' || *q == '\r')) { q++; }
2427 2427
 
2428
-            xref = cli_strntol(q, q - eofmap + map_off, NULL, 10);
2429
-            bytesleft = map->len - offset - xref;
2430
-            if (bytesleft > 4096)
2431
-                bytesleft = 4096;
2432
-
2433
-            q = fmap_need_off_once(map, offset + xref, bytesleft);
2434
-            if (!q || xrefCheck(q, q+bytesleft) == -1) {
2435
-                cli_dbgmsg("cli_pdf: did not find valid xref\n");
2428
+            if (CL_SUCCESS != cli_strntol_wrap(q, q - eofmap + map_off, 0, 10, &xref)) {
2429
+                cli_dbgmsg("cli_pdf: failed to parse PDF trailer xref\n");
2436 2430
                 pdf.flags |= 1 << BAD_PDF_TRAILER;
2437 2431
             }
2432
+            else {
2433
+                bytesleft = map->len - offset - xref;
2434
+                if (bytesleft > 4096)
2435
+                    bytesleft = 4096;
2436
+
2437
+                q = fmap_need_off_once(map, offset + xref, bytesleft);
2438
+                if (!q || xrefCheck(q, q+bytesleft) == -1) {
2439
+                    cli_dbgmsg("cli_pdf: did not find valid xref\n");
2440
+                    pdf.flags |= 1 << BAD_PDF_TRAILER;
2441
+                }
2442
+            }
2438 2443
         }
2439 2444
     }
2440 2445
 
... ...
@@ -3221,9 +3295,10 @@ static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname
3221 3221
         goto cleanup;
3222 3222
     }
3223 3223
 
3224
-    count = cli_strntol(begin, (size_t)(obj->start + pdf->map + objsz - begin), NULL, 10);
3225
-    if (count != npages)
3224
+    if ((CL_SUCCESS != cli_strntol_wrap(begin, (size_t)(obj->start + pdf->map + objsz - begin), 0, 10, (long*)&count)) ||
3225
+        (count != npages)) {
3226 3226
         cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);
3227
+    }
3227 3228
 
3228 3229
 cleanup:
3229 3230
     pdf_free_array(array);
... ...
@@ -3266,7 +3341,8 @@ static void Colors_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfnam
3266 3266
     if ((size_t)(p1 - start) == objsz)
3267 3267
         return;
3268 3268
 
3269
-    ncolors = cli_strntol(p1, (size_t)((p1 - start) - objsz), NULL, 10);
3269
+    if (CL_SUCCESS != cli_strntol_wrap(p1, (size_t)((p1 - start) - objsz), 0, 10, (long*)&ncolors))
3270
+        return;
3270 3271
 
3271 3272
     /* We only care if the number of colors > 2**24 */
3272 3273
     if (ncolors < 1<<24)
... ...
@@ -31,6 +31,7 @@
31 31
 #include <stdio.h>
32 32
 #include <stdlib.h>
33 33
 #include <string.h>
34
+#include <limits.h>
34 35
 #ifdef HAVE_STRINGS_H
35 36
 #include <strings.h>
36 37
 #endif
... ...
@@ -523,16 +524,21 @@ size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count,
523 523
  * Copyright (c) 1990 The Regents of the University of California.
524 524
  * All rights reserved.
525 525
  *
526
- * @param nptr 		Pointer to start of string.
527
- * @param n 		Max length of buffer in bytes.
528
- * @param endptr 	[OUT optional] If endptr is not NULL, strtol() stores the address
529
- * 					of the first invalid character in *endptr. If there were no digits
530
- * 					at all, however, strtol() stores the
531
- * 					original value of str in *endptr.
532
- * @param int 		The conversion is done according to the given base, which must be
533
- * 					between 2 and 36 inclusive, or be the special value 0.
534
- * @return long 	The signed long value.
526
+ * @param nptr          Pointer to start of string.
527
+ * @param n             Max length of buffer in bytes.
528
+ * @param[out] endptr   [optional] If endptr is not NULL, strtol() stores the address
529
+ *                      of the first invalid character in *endptr. If there were no digits
530
+ *                      at all, however, strtol() stores the
531
+ *                      original value of str in *endptr. 
532
+ * 	                     Nota Bene:  If the buffer is non-null terminated and the number
533
+ *                       comprises the entire buffer, endptr will point past the end of
534
+ *                       the buffer, and the caller should check if endptr >= nptr + n.
535
+ *                      
536
+ * @param int           The conversion is done according to the given base, which must be
537
+ *                      between 2 and 36 inclusive, or be the special value 0.
538
+ * @return long         The signed long value.
535 539
  */
540
+static
536 541
 long cli_strntol(const char *nptr, size_t n, char **endptr, register int base)
537 542
 {
538 543
 	register const char *s = nptr;
... ...
@@ -627,10 +633,54 @@ long cli_strntol(const char *nptr, size_t n, char **endptr, register int base)
627 627
 	} else if (neg)
628 628
 		acc = -acc;
629 629
 	if (endptr != 0)
630
-		*endptr = (char *) (any ? s - 1 : nptr);
630
+		*endptr = (char *) (any ? s : nptr);
631 631
 	return (acc);
632 632
 }
633 633
 
634
+/**
635
+ * @brief 	The strntol() function converts the string in str to a long value.
636
+ * 
637
+ * Wrapper for cli_strntol() that provides incentive to check for failure.
638
+ * 
639
+ * @param buf               Pointer to start of string. 
640
+ * @param buf_size 			Max length of buffer to convert to integer.
641
+ * @param fail_at_nondigit  If 1, fail out if the a non-digit character is found before the end of the buffer.
642
+ *                          If 0, non-digit character represents end of number and is not a failure.
643
+ * @param base              The conversion is done according to the given base, which must be
644
+ *                          between 2 and 36 inclusive, or be the special value 0.
645
+ * @param[out] result 	    Long integer value of ascii number.
646
+ * @return CL_SUCCESS       Success
647
+ * @return CL_EPARSE        Failure
648
+ */
649
+int cli_strntol_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, long *result)
650
+{
651
+    char *endptr = NULL;
652
+    long num;
653
+
654
+    if (buf_size == 0 || !buf || !result) {
655
+        /* invalid parameter */
656
+        return CL_EPARSE;
657
+    }
658
+    errno = 0;
659
+    num = cli_strntol(buf, buf_size, &endptr, base);
660
+    if ((num == LONG_MIN || num == LONG_MAX) && errno == ERANGE) {
661
+        /* under- or overflow */
662
+        return CL_EPARSE;
663
+    }
664
+    if (endptr == buf) {
665
+        /* no digits */
666
+        return CL_EPARSE;
667
+    }
668
+    if (fail_at_nondigit && (endptr < (buf + buf_size)) && (*endptr != '\0')) {
669
+        /* non-digit encountered */
670
+        return CL_EPARSE;
671
+    }
672
+    /* success */
673
+    *result = num;
674
+    return CL_SUCCESS;
675
+}
676
+
677
+
634 678
 size_t cli_ldbtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens, int token_skip)
635 679
 {
636 680
     size_t tokens_found, i;
... ...
@@ -68,7 +68,7 @@ const char *cli_memstr(const char *haystack, unsigned int hs, const char *needle
68 68
 char *cli_strrcpy(char *dest, const char *source);
69 69
 size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens);
70 70
 size_t cli_ldbtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens, int token_skip);
71
-long cli_strntol(const char *nptr, size_t n, char **endptr, register int base);
71
+int cli_strntol_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, long *result);
72 72
 int cli_isnumber(const char *str);
73 73
 char *cli_unescape(const char *str);
74 74
 struct text_buffer;