Browse code

Replacing strntol with strntoul to ensure proper (un)signedness when parsing numbers from PDFs.

Micah Snyder authored on 2018/06/13 09:47:21
Showing 3 changed files
... ...
@@ -251,7 +251,7 @@ int pdf_findobj(struct pdf_struct *pdf)
251 251
     while (q > start && isdigit(*q))
252 252
         q--;
253 253
 
254
-    if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, (long*)&genid)) {
254
+    if (CL_SUCCESS != cli_strntoul_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, &genid)) {
255 255
         cli_dbgmsg("cli_pdf: Failed to parse object genid (%u)\n", pdf->nobjs);
256 256
         /* Failed to parse, probably not a real object.  Skip past the "obj" thing, and continue. */
257 257
         pdf->offset = q2 + 4 - pdf->map;
... ...
@@ -261,7 +261,7 @@ int pdf_findobj(struct pdf_struct *pdf)
261 261
     while (q > start && isdigit(*q))
262 262
         q--;
263 263
 
264
-    if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, (long*)&objid)) {
264
+    if (CL_SUCCESS != cli_strntoul_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, &objid)) {
265 265
         /*
266 266
          * PDFs with multiple revisions will have %%EOF before the end of the file, 
267 267
          * followed by the next revision of the PDF.  If this is the case, we can 
... ...
@@ -288,7 +288,7 @@ int pdf_findobj(struct pdf_struct *pdf)
288 288
             return 2;
289 289
         }
290 290
         /* Try again, with offset slightly adjusted */
291
-        if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, (long*)&objid)) {
291
+        if (CL_SUCCESS != cli_strntoul_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, &objid)) {
292 292
             cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs);
293 293
             /* Still failed... Probably not a real object.  Skip past the "obj" thing, and continue. */
294 294
             pdf->offset = q2 + 4 - pdf->map;
... ...
@@ -468,7 +468,7 @@ static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
468 468
 
469 469
     len -= start - q;
470 470
     q = start;
471
-    if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)len, 0, 10, (long*)&length)) {
471
+    if (CL_SUCCESS != cli_strntoul_wrap(q, (size_t)len, 0, 10, &length)) {
472 472
         cli_dbgmsg("cli_pdf: failed to parse object length\n");
473 473
         return 0;
474 474
     }
... ...
@@ -482,7 +482,7 @@ static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
482 482
         unsigned long genid;
483 483
         q++;
484 484
         len--;
485
-        if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)len, 0, 10, (long*)&genid)) {
485
+        if (CL_SUCCESS != cli_strntoul_wrap(q, (size_t)len, 0, 10, &genid)) {
486 486
             cli_dbgmsg("cli_pdf: failed to parse object genid\n");
487 487
             return 0;
488 488
         }
... ...
@@ -507,7 +507,7 @@ static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
507 507
                 return 0;
508 508
             }
509 509
 
510
-            if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)len, 0, 10, (long*)&length)) {
510
+            if (CL_SUCCESS != cli_strntoul_wrap(q, (size_t)len, 0, 10, &length)) {
511 511
                 cli_dbgmsg("cli_pdf: failed to parse object length from indirect object\n");
512 512
                 return 0;
513 513
             }
... ...
@@ -1360,7 +1360,7 @@ static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len)
1360 1360
     len -= q2 - q;
1361 1361
     q = q2;
1362 1362
 
1363
-    if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)len, 0, 10, (long*)&objid)) {
1363
+    if (CL_SUCCESS != cli_strntoul_wrap(q2, (size_t)len, 0, 10, &objid)) {
1364 1364
         cli_dbgmsg("cli_pdf: Found Encrypt dictionary but failed to parse objid\n");
1365 1365
         return;
1366 1366
     }
... ...
@@ -1371,7 +1371,7 @@ static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len)
1371 1371
     len -= q2 - q;
1372 1372
     q = q2;
1373 1373
 
1374
-    if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)len, 0, 10, (long*)&genid)) {
1374
+    if (CL_SUCCESS != cli_strntoul_wrap(q2, (size_t)len, 0, 10, &genid)) {
1375 1375
         cli_dbgmsg("cli_pdf: Found Encrypt dictionary but failed to parse genid\n");
1376 1376
         return;
1377 1377
     }
... ...
@@ -1640,7 +1640,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
1640 1640
 
1641 1641
                 dict_remaining -= (off_t)(q2 - q);
1642 1642
 
1643
-                if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)dict_remaining, 0, 10, (long*)&objid)) {
1643
+                if (CL_SUCCESS != cli_strntoul_wrap(q2, (size_t)dict_remaining, 0, 10, &objid)) {
1644 1644
                     cli_dbgmsg("cli_pdf: failed to parse object objid\n");
1645 1645
                     return;
1646 1646
                 }
... ...
@@ -1653,7 +1653,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
1653 1653
                 q2 = pdf_nextobject(q2, dict_remaining);
1654 1654
                 if (q2 && isdigit(*q2)) {
1655 1655
                     dict_remaining -= (off_t)(q2 - q2_old);
1656
-                    if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)dict_remaining, 0, 10, (long*)&genid)) {
1656
+                    if (CL_SUCCESS != cli_strntoul_wrap(q2, (size_t)dict_remaining, 0, 10, &genid)) {
1657 1657
                         cli_dbgmsg("cli_pdf: failed to parse object genid\n");
1658 1658
                         return;
1659 1659
                     }
... ...
@@ -2370,7 +2370,7 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
2370 2370
     size_t size = map->len - offset;
2371 2371
     off_t versize = size > 1032 ? 1032 : size;
2372 2372
     off_t map_off, bytesleft;
2373
-    long xref;
2373
+    unsigned long xref;
2374 2374
     const char *pdfver, *tmp, *start, *eofmap, *q, *eof;
2375 2375
     int rc, badobjects = 0;
2376 2376
     unsigned i, alerts = 0;
... ...
@@ -2510,7 +2510,7 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
2510 2510
 
2511 2511
             while (q < eof && (*q == ' ' || *q == '\n' || *q == '\r')) { q++; }
2512 2512
 
2513
-            if (CL_SUCCESS != cli_strntol_wrap(q, q - eofmap + map_off, 0, 10, &xref)) {
2513
+            if (CL_SUCCESS != cli_strntoul_wrap(q, q - eofmap + map_off, 0, 10, &xref)) {
2514 2514
                 cli_dbgmsg("cli_pdf: failed to parse PDF trailer xref\n");
2515 2515
                 pdf.flags |= 1 << BAD_PDF_TRAILER;
2516 2516
             }
... ...
@@ -3321,7 +3321,7 @@ static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname
3321 3321
         goto cleanup;
3322 3322
     }
3323 3323
 
3324
-    if ((CL_SUCCESS != cli_strntol_wrap(begin, (size_t)(obj->start + pdf->map + objsz - begin), 0, 10, (long*)&count)) ||
3324
+    if ((CL_SUCCESS != cli_strntoul_wrap(begin, (size_t)(obj->start + pdf->map + objsz - begin), 0, 10, &count)) ||
3325 3325
         (count != npages)) {
3326 3326
         cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);
3327 3327
     }
... ...
@@ -3367,7 +3367,7 @@ static void Colors_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfnam
3367 3367
     if ((size_t)(p1 - start) == objsz)
3368 3368
         return;
3369 3369
 
3370
-    if (CL_SUCCESS != cli_strntol_wrap(p1, (size_t)((p1 - start) - objsz), 0, 10, (long*)&ncolors))
3370
+    if (CL_SUCCESS != cli_strntoul_wrap(p1, (size_t)((p1 - start) - objsz), 0, 10, &ncolors))
3371 3371
         return;
3372 3372
 
3373 3373
     /* We only care if the number of colors > 2**24 */
... ...
@@ -538,57 +538,58 @@ size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count,
538 538
  *                      between 2 and 36 inclusive, or be the special value 0.
539 539
  * @return long         The signed long value.
540 540
  */
541
-static
542
-long cli_strntol(const char *nptr, size_t n, char **endptr, register int base)
541
+static long cli_strntol(const char* nptr, size_t n, char** endptr, register int base)
543 542
 {
544
-	register const char *s = nptr;
545
-	register unsigned long acc;
546
-	register int c;
547
-	register unsigned long cutoff;
548
-	register int neg = 0, any, cutlim;
543
+    register const char* s = nptr;
544
+    register unsigned long acc = 0;
545
+    register int c;
546
+    register unsigned long cutoff;
547
+    register int neg = 0, any = 0, cutlim;
549 548
 
550 549
     if (0 == n) {
551
-        return 0;
550
+        goto done;
552 551
     }
553
-	/*
552
+    /*
554 553
 	 * Skip white space and pick up leading +/- sign if any.
555 554
 	 * If base is 0, allow 0x for hex and 0 for octal, else
556 555
 	 * assume decimal; if base is already 16, allow 0x.
557 556
 	 */
558
-	do {
559
-		c = *s;
560
-	} while (isspace(c) && (++s < nptr + n));
557
+    do {
558
+        c = *s;
559
+    } while (isspace(c) && (++s < nptr + n));
561 560
 
562 561
     if (s >= nptr + n) {
563
-        return 0;
562
+        goto done;
564 563
     }
565 564
 
566
-	if (c == '-') {
567
-		neg = 1;
568
-		c = *s++;
569
-        if (s >= nptr + n)
570
-            return 0;
571
-	} else if (c == '+') {
572
-		c = *s++;
573
-        if (s >= nptr + n)
574
-            return 0;
565
+    if (c == '-') {
566
+        neg = 1;
567
+        c = *s++;
568
+        if (s >= nptr + n) {
569
+            goto done;
570
+        }
571
+    } else if (c == '+') {
572
+        c = *s++;
573
+        if (s >= nptr + n) {
574
+            goto done;
575
+        }
575 576
     }
576 577
 
577
-	if (base == 0 || base == 16) {
578
+    if (base == 0 || base == 16) {
578 579
         if (c == '0' && (*s == 'x' || *s == 'X')) {
579 580
             if (s + 2 >= nptr + n) {
580
-                return 0;
581
+                goto done;
581 582
             }
582 583
             c = s[1];
583 584
             s += 2;
584 585
             base = 16;
585 586
         }
586
-	}
587
+    }
587 588
 
588
-	if (base == 0)
589
-		base = c == '0' ? 8 : 10;
589
+    if (base == 0)
590
+        base = c == '0' ? 8 : 10;
590 591
 
591
-	/*
592
+    /*
592 593
 	 * Compute the cutoff value between legal numbers and illegal
593 594
 	 * numbers.  That is the largest legal value, divided by the
594 595
 	 * base.  An input number that is greater than this value, if
... ...
@@ -605,40 +606,142 @@ long cli_strntol(const char *nptr, size_t n, char **endptr, register int base)
605 605
 	 * Set any if any `digits' consumed; make it negative to indicate
606 606
 	 * overflow.
607 607
 	 */
608
-	cutoff = neg ? -(unsigned long)LONG_MIN : LONG_MAX;
609
-	cutlim = cutoff % (unsigned long)base;
610
-	cutoff /= (unsigned long)base;
611
-	for (acc = 0, any = 0; s < nptr + n; s++) {
608
+    cutoff = neg ? -(unsigned long)LONG_MIN : LONG_MAX;
609
+    cutlim = cutoff % (unsigned long)base;
610
+    cutoff /= (unsigned long)base;
611
+    for (acc = 0, any = 0; s < nptr + n; s++) {
612 612
         c = *s;
613 613
 
614
-		if (isdigit(c))
615
-			c -= '0';
616
-		else if (isalpha(c))
617
-			c -= isupper(c) ? 'A' - 10 : 'a' - 10;
618
-		else
619
-			break;
620
-		if (c >= base)
621
-			break;
622
-		if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
623
-			any = -1;
624
-		else {
625
-			any = 1;
626
-			acc *= base;
627
-			acc += c;
628
-		}
629
-	}
630
-	if (any < 0) {
631
-		acc = neg ? LONG_MIN : LONG_MAX;
632
-		errno = ERANGE;
633
-	} else if (neg)
634
-		acc = -acc;
635
-	if (endptr != 0)
636
-		*endptr = (char *) (any ? s : nptr);
637
-	return (acc);
614
+        if (isdigit(c))
615
+            c -= '0';
616
+        else if (isalpha(c))
617
+            c -= isupper(c) ? 'A' - 10 : 'a' - 10;
618
+        else
619
+            break;
620
+        if (c >= base)
621
+            break;
622
+        if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
623
+            any = -1;
624
+        else {
625
+            any = 1;
626
+            acc *= base;
627
+            acc += c;
628
+        }
629
+    }
630
+    if (any < 0) {
631
+        acc = neg ? LONG_MIN : LONG_MAX;
632
+        errno = ERANGE;
633
+    } else if (neg)
634
+        acc = -acc;
635
+
636
+done:
637
+    if (endptr != 0)
638
+        *endptr = (char*)(any ? s : nptr);
639
+    return (acc);
638 640
 }
639 641
 
640 642
 /**
641
- * @brief 	The strntol() function converts the string in str to a long value.
643
+ * @brief The strntoul() function converts the string in str to an unsigned long value.
644
+ * Modifications made to validate the length of the string for non-null term strings.
645
+ *
646
+ * Copyright (c) 1990 The Regents of the University of California.
647
+ * All rights reserved.
648
+ *
649
+ * @param nptr          Pointer to start of string.
650
+ * @param n             Max length of buffer in bytes.
651
+ * @param[out] endptr   [optional] If endptr is not NULL, strtol() stores the address
652
+ *                      of the first invalid character in *endptr. If there were no digits
653
+ *                      at all, however, strtol() stores the
654
+ *                      original value of str in *endptr. 
655
+ * 	                     Nota Bene:  If the buffer is non-null terminated and the number
656
+ *                       comprises the entire buffer, endptr will point past the end of
657
+ *                       the buffer, and the caller should check if endptr >= nptr + n.
658
+ *                      
659
+ * @param int           The conversion is done according to the given base, which must be
660
+ *                      between 2 and 36 inclusive, or be the special value 0.
661
+ * @return unsigned long The unsigned long value.
662
+ */
663
+static unsigned long
664
+cli_strntoul(const char* nptr, size_t n, char** endptr, register int base)
665
+{
666
+    register const char* s = nptr;
667
+    register unsigned long acc = 0;
668
+    register int c;
669
+    register unsigned long cutoff;
670
+    register int neg = 0, any = 0, cutlim;
671
+
672
+    /*
673
+	 * See cli_strntol for comments as to the logic used.
674
+	 */
675
+    do {
676
+        c = *s;
677
+    } while (isspace(c) && (++s < nptr + n));
678
+
679
+    if (s >= nptr + n) {
680
+        goto done;
681
+    }
682
+
683
+    if (c == '-') {
684
+        neg = 1;
685
+        c = *s++;
686
+        if (s >= nptr + n) {
687
+            goto done;
688
+        }
689
+    } else if (c == '+') {
690
+        c = *s++;
691
+        if (s >= nptr + n) {
692
+            goto done;
693
+        }
694
+    }
695
+
696
+    if (base == 0 || base == 16) {
697
+        if (c == '0' && (*s == 'x' || *s == 'X')) {
698
+            if (s + 2 >= nptr + n) {
699
+                goto done;
700
+            }
701
+            c = s[1];
702
+            s += 2;
703
+            base = 16;
704
+        }
705
+    }
706
+    if (base == 0)
707
+        base = c == '0' ? 8 : 10;
708
+
709
+    cutoff = (unsigned long)ULONG_MAX / (unsigned long)base;
710
+    cutlim = (unsigned long)ULONG_MAX % (unsigned long)base;
711
+    for (acc = 0, any = 0; s < nptr + n; s++) {
712
+        c = *s;
713
+
714
+        if (isdigit(c))
715
+            c -= '0';
716
+        else if (isalpha(c))
717
+            c -= isupper(c) ? 'A' - 10 : 'a' - 10;
718
+        else
719
+            break;
720
+        if (c >= base)
721
+            break;
722
+        if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
723
+            any = -1;
724
+        else {
725
+            any = 1;
726
+            acc *= base;
727
+            acc += c;
728
+        }
729
+    }
730
+    if (any < 0) {
731
+        acc = ULONG_MAX;
732
+        errno = ERANGE;
733
+    } else if (neg)
734
+        acc = -acc;
735
+
736
+done:
737
+    if (endptr != 0)
738
+        *endptr = (char*)(any ? s : nptr);
739
+    return (acc);
740
+}
741
+
742
+/**
743
+ * @brief 	cli_strntol_wrap() converts the string in str to a long value.
642 744
  * 
643 745
  * Wrapper for cli_strntol() that provides incentive to check for failure.
644 746
  * 
... ...
@@ -680,6 +783,48 @@ int cli_strntol_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int
680 680
     return CL_SUCCESS;
681 681
 }
682 682
 
683
+/**
684
+ * @brief 	cli_strntoul_wrap() converts the string in str to a long value.
685
+ * 
686
+ * Wrapper for cli_strntoul() that provides incentive to check for failure.
687
+ * 
688
+ * @param buf               Pointer to start of string. 
689
+ * @param buf_size 			Max length of buffer to convert to integer.
690
+ * @param fail_at_nondigit  If 1, fail out if the a non-digit character is found before the end of the buffer.
691
+ *                          If 0, non-digit character represents end of number and is not a failure.
692
+ * @param base              The conversion is done according to the given base, which must be
693
+ *                          between 2 and 36 inclusive, or be the special value 0.
694
+ * @param[out] result 	    Unsigned long integer value of ascii number.
695
+ * @return CL_SUCCESS       Success
696
+ * @return CL_EPARSE        Failure
697
+ */
698
+int cli_strntoul_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, unsigned long *result)
699
+{
700
+    char *endptr = NULL;
701
+    long num;
702
+
703
+    if (buf_size == 0 || !buf || !result) {
704
+        /* invalid parameter */
705
+        return CL_EPARSE;
706
+    }
707
+    errno = 0;
708
+    num = cli_strntoul(buf, buf_size, &endptr, base);
709
+    if (num == ULONG_MAX && errno == ERANGE) {
710
+        /* under- or overflow */
711
+        return CL_EPARSE;
712
+    }
713
+    if (endptr == buf) {
714
+        /* no digits */
715
+        return CL_EPARSE;
716
+    }
717
+    if (fail_at_nondigit && (endptr < (buf + buf_size)) && (*endptr != '\0')) {
718
+        /* non-digit encountered */
719
+        return CL_EPARSE;
720
+    }
721
+    /* success */
722
+    *result = num;
723
+    return CL_SUCCESS;
724
+}
683 725
 
684 726
 size_t cli_ldbtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens, int token_skip)
685 727
 {
... ...
@@ -69,6 +69,7 @@ char *cli_strrcpy(char *dest, const char *source);
69 69
 size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens);
70 70
 size_t cli_ldbtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens, int token_skip);
71 71
 int cli_strntol_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, long *result);
72
+int cli_strntoul_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, unsigned long *result);
72 73
 int cli_isnumber(const char *str);
73 74
 char *cli_unescape(const char *str);
74 75
 struct text_buffer;