Browse code

bb12133: Implementing cli_strntol based on gnu gcc's strtol implementation with modifications to limit string buffer length for non-null terminated strings. Using cli_strntol in pdf.c for added safety.

Micah Snyder authored on 2018/06/02 03:23:25
Showing 3 changed files
... ...
@@ -243,12 +243,12 @@ int pdf_findobj(struct pdf_struct *pdf)
243 243
     while (q > start && isdigit(*q))
244 244
         q--;
245 245
 
246
-    genid = atoi(q);
246
+    genid = (unsigned int)cli_strntol(q, (size_t)bytesleft, NULL, 10);
247 247
     q = findNextNonWSBack(q-1,start);
248 248
     while (q > start && isdigit(*q))
249 249
         q--;
250 250
 
251
-    objid = atoi(q);
251
+    objid = (unsigned int)cli_strntol(q, (size_t)bytesleft, NULL, 10);
252 252
     obj->id = (objid << 8) | (genid&0xff);
253 253
     obj->start = q2+4 - pdf->map;
254 254
     obj->flags = 0;
... ...
@@ -421,17 +421,22 @@ static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
421 421
 
422 422
     /* len -= start - q; */
423 423
     q = start;
424
-    length = atoi(q);
425
-    while (isdigit(*q))
424
+    length = (int)cli_strntol(q, (size_t)len, NULL, 10);
425
+    while (isdigit(*q) && len > 0) {
426 426
         q++;
427
+        len--;
428
+    }
427 429
 
428
-    if (*q == ' ') {
430
+    if (*q == ' ' && len > 0) {
429 431
         int genid;
430 432
         q++;
431
-        genid = atoi(q);
433
+        len--;
434
+        genid = (int)cli_strntol(q, (size_t)len, NULL, 10);
432 435
 
433
-        while(isdigit(*q))
436
+        while(isdigit(*q) && len > 0) {
434 437
             q++;
438
+            len--;
439
+        }
435 440
 
436 441
         if (q[0] == ' ' && q[1] == 'R') {
437 442
             cli_dbgmsg("cli_pdf: length is in indirect object %u %u\n", length, genid);
... ...
@@ -448,7 +453,7 @@ static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
448 448
                 return 0;
449 449
             }
450 450
 
451
-            length = atoi(q);
451
+            length = (int)cli_strntol(q, (size_t)len, NULL, 10);
452 452
         }
453 453
     }
454 454
 
... ...
@@ -564,8 +569,8 @@ static void aes_decrypt(const unsigned char *in, size_t *length, unsigned char *
564 564
     cli_dbgmsg("aes_decrypt: Calling rijndaelSetupDecrypt\n");
565 565
     nrounds = rijndaelSetupDecrypt(rk, (const unsigned char *)key, key_n*8);
566 566
     if (!nrounds) {
567
-	cli_dbgmsg("cli_pdf: aes_decrypt: nrounds = 0\n");
568
-	return;
567
+    cli_dbgmsg("cli_pdf: aes_decrypt: nrounds = 0\n");
568
+    return;
569 569
     }
570 570
     cli_dbgmsg("aes_decrypt: Beginning rijndaelDecrypt\n");
571 571
 
... ...
@@ -1294,17 +1299,17 @@ static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len)
1294 1294
     q2 = pdf_nextobject(q, len);
1295 1295
     if (!q2 || !isdigit(*q2))
1296 1296
         return;
1297
-
1298
-    objid = atoi(q2) << 8;
1299 1297
     len -= q2 - q;
1300 1298
     q = q2;
1299
+
1300
+    objid = (uint32_t)cli_strntol(q2, (size_t)len, NULL, 10) << 8;
1301 1301
     q2 = pdf_nextobject(q, len);
1302 1302
     if (!q2 || !isdigit(*q2))
1303 1303
         return;
1304
-
1305
-    objid |= atoi(q2) & 0xff;
1306 1304
     len -= q2 - q;
1307 1305
     q = q2;
1306
+
1307
+    objid |= (uint32_t)cli_strntol(q2, (size_t)len, NULL, 10) & 0xff;
1308 1308
     q2 = pdf_nextobject(q, len);
1309 1309
     if (!q2 || *q2 != 'R')
1310 1310
         return;
... ...
@@ -1566,7 +1571,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
1566 1566
                 const char * q2_old = NULL;
1567 1567
                 dict_remaining -= (off_t)(q2 - q);
1568 1568
 
1569
-                uint32_t objid = atoi(q2) << 8;
1569
+                uint32_t objid = (uint32_t)cli_strntol(q2, (size_t)dict_remaining, NULL, 10) << 8;
1570 1570
                 while (isdigit(*q2))
1571 1571
                     q2++;
1572 1572
 
... ...
@@ -1574,7 +1579,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
1574 1574
                 q2 = pdf_nextobject(q2, dict_remaining);
1575 1575
                 if (q2 && isdigit(*q2)) {
1576 1576
                     dict_remaining -= (off_t)(q2 - q2_old);
1577
-                    objid |= atoi(q2) & 0xff;
1577
+                    objid |= (uint32_t)cli_strntol(q2, (size_t)dict_remaining, NULL, 10) & 0xff;
1578 1578
 
1579 1579
                     q2 = pdf_nextobject(q2, dict_remaining);
1580 1580
                     if (q2 && *q2 == 'R') {
... ...
@@ -1876,7 +1881,7 @@ static int pdf_readint(const char *q0, int len, const char *key)
1876 1876
 {
1877 1877
     const char *q  = pdf_getdict(q0, &len, key);
1878 1878
 
1879
-    return (q != NULL) ? atoi(q) : -1;
1879
+    return (q != NULL) ? (int)cli_strntol(q, (size_t)len, NULL, 10) : -1;
1880 1880
 }
1881 1881
 
1882 1882
 static int pdf_readbool(const char *q0, int len, const char *key, int Default)
... ...
@@ -1913,9 +1918,9 @@ static void dbg_printhex(const char *msg, const char *hex, unsigned len)
1913 1913
 }
1914 1914
 
1915 1915
 static void check_user_password(struct pdf_struct *pdf, int R, const char *O,
1916
-				const char *U, int32_t P, int EM,
1917
-				const char *UE,
1918
-				unsigned length, unsigned oulen)
1916
+                const char *U, int32_t P, int EM,
1917
+                const char *UE,
1918
+                unsigned length, unsigned oulen)
1919 1919
 {
1920 1920
     unsigned i;
1921 1921
     uint8_t result[16];
... ...
@@ -2420,7 +2425,7 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
2420 2420
 
2421 2421
             while (q < eof && (*q == ' ' || *q == '\n' || *q == '\r')) { q++; }
2422 2422
 
2423
-            xref = atol(q);
2423
+            xref = cli_strntol(q, q - eofmap + map_off, NULL, 10);
2424 2424
             bytesleft = map->len - offset - xref;
2425 2425
             if (bytesleft > 4096)
2426 2426
                 bytesleft = 4096;
... ...
@@ -2580,17 +2585,17 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
2580 2580
             }
2581 2581
         }
2582 2582
 #if 0
2583
-	/* TODO: find both trailers, and /Encrypt settings */
2584
-	if (pdf.flags & (1 << LINEARIZED_PDF))
2585
-	    pdf.flags &= ~ (1 << BAD_ASCIIDECODE);
2586
-	if (pdf.flags & (1 << MANY_FILTERS))
2587
-	    pdf.flags &= ~ (1 << BAD_ASCIIDECODE);
2588
-	if (!rc && (pdf.flags &
2589
-	    ((1 << BAD_PDF_TOOMANYOBJS) | (1 << BAD_STREAM_FILTERS) |
2590
-	     (1<<BAD_FLATE) | (1<<BAD_ASCIIDECODE)|
2591
-    	     (1<<UNTERMINATED_OBJ_DICT) | (1<<UNKNOWN_FILTER)))) {
2592
-	    rc = CL_EUNPACK;
2593
-	}
2583
+    /* TODO: find both trailers, and /Encrypt settings */
2584
+    if (pdf.flags & (1 << LINEARIZED_PDF))
2585
+        pdf.flags &= ~ (1 << BAD_ASCIIDECODE);
2586
+    if (pdf.flags & (1 << MANY_FILTERS))
2587
+        pdf.flags &= ~ (1 << BAD_ASCIIDECODE);
2588
+    if (!rc && (pdf.flags &
2589
+        ((1 << BAD_PDF_TOOMANYOBJS) | (1 << BAD_STREAM_FILTERS) |
2590
+         (1<<BAD_FLATE) | (1<<BAD_ASCIIDECODE)|
2591
+             (1<<UNTERMINATED_OBJ_DICT) | (1<<UNKNOWN_FILTER)))) {
2592
+        rc = CL_EUNPACK;
2593
+    }
2594 2594
 #endif
2595 2595
     }
2596 2596
 
... ...
@@ -3216,7 +3221,7 @@ static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname
3216 3216
         goto cleanup;
3217 3217
     }
3218 3218
 
3219
-    count = strtoul(begin, NULL, 10);
3219
+    count = cli_strntol(begin, (size_t)(obj->start + pdf->map + objsz - begin), NULL, 10);
3220 3220
     if (count != npages)
3221 3221
         cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);
3222 3222
 
... ...
@@ -3261,7 +3266,7 @@ static void Colors_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfnam
3261 3261
     if ((size_t)(p1 - start) == objsz)
3262 3262
         return;
3263 3263
 
3264
-    ncolors = strtoul(p1, NULL, 10);
3264
+    ncolors = cli_strntol(p1, (size_t)((p1 - start) - objsz), NULL, 10);
3265 3265
 
3266 3266
     /* We only care if the number of colors > 2**24 */
3267 3267
     if (ncolors < 1<<24)
... ...
@@ -516,6 +516,121 @@ size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count,
516 516
     return tokens_found;
517 517
 }
518 518
 
519
+/**
520
+ * @brief The strntol() function converts the string in str to a long value.
521
+ * Modifications made to validate the length of the string for non-null term strings.
522
+ *
523
+ * Copyright (c) 1990 The Regents of the University of California.
524
+ * All rights reserved.
525
+ *
526
+ * @param nptr 		Pointer to start of string.
527
+ * @param n 		Max length of buffer in bytes.
528
+ * @param endptr 	[OUT optional] If endptr is not NULL, strtol() stores the address
529
+ * 					of the first invalid character in *endptr. If there were no digits
530
+ * 					at all, however, strtol() stores the
531
+ * 					original value of str in *endptr.
532
+ * @param int 		The conversion is done according to the given base, which must be
533
+ * 					between 2 and 36 inclusive, or be the special value 0.
534
+ * @return long 	The signed long value.
535
+ */
536
+long cli_strntol(const char *nptr, size_t n, char **endptr, register int base)
537
+{
538
+	register const char *s = nptr;
539
+	register unsigned long acc;
540
+	register int c;
541
+	register unsigned long cutoff;
542
+	register int neg = 0, any, cutlim;
543
+
544
+    if (0 == n) {
545
+        return 0;
546
+    }
547
+	/*
548
+	 * Skip white space and pick up leading +/- sign if any.
549
+	 * If base is 0, allow 0x for hex and 0 for octal, else
550
+	 * assume decimal; if base is already 16, allow 0x.
551
+	 */
552
+	do {
553
+		c = *s;
554
+	} while (isspace(c) && (++s < nptr + n));
555
+
556
+    if (s >= nptr + n) {
557
+        return 0;
558
+    }
559
+
560
+	if (c == '-') {
561
+		neg = 1;
562
+		c = *s++;
563
+        if (s >= nptr + n)
564
+            return 0;
565
+	} else if (c == '+') {
566
+		c = *s++;
567
+        if (s >= nptr + n)
568
+            return 0;
569
+    }
570
+
571
+	if (base == 0 || base == 16) {
572
+        if (c == '0' && (*s == 'x' || *s == 'X')) {
573
+            if (s + 2 >= nptr + n) {
574
+                return 0;
575
+            }
576
+            c = s[1];
577
+            s += 2;
578
+            base = 16;
579
+        }
580
+	}
581
+
582
+	if (base == 0)
583
+		base = c == '0' ? 8 : 10;
584
+
585
+	/*
586
+	 * Compute the cutoff value between legal numbers and illegal
587
+	 * numbers.  That is the largest legal value, divided by the
588
+	 * base.  An input number that is greater than this value, if
589
+	 * followed by a legal input character, is too big.  One that
590
+	 * is equal to this value may be valid or not; the limit
591
+	 * between valid and invalid numbers is then based on the last
592
+	 * digit.  For instance, if the range for longs is
593
+	 * [-2147483648..2147483647] and the input base is 10,
594
+	 * cutoff will be set to 214748364 and cutlim to either
595
+	 * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated
596
+	 * a value > 214748364, or equal but the next digit is > 7 (or 8),
597
+	 * the number is too big, and we will return a range error.
598
+	 *
599
+	 * Set any if any `digits' consumed; make it negative to indicate
600
+	 * overflow.
601
+	 */
602
+	cutoff = neg ? -(unsigned long)LONG_MIN : LONG_MAX;
603
+	cutlim = cutoff % (unsigned long)base;
604
+	cutoff /= (unsigned long)base;
605
+	for (acc = 0, any = 0; s < nptr + n; s++) {
606
+        c = *s;
607
+
608
+		if (isdigit(c))
609
+			c -= '0';
610
+		else if (isalpha(c))
611
+			c -= isupper(c) ? 'A' - 10 : 'a' - 10;
612
+		else
613
+			break;
614
+		if (c >= base)
615
+			break;
616
+		if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
617
+			any = -1;
618
+		else {
619
+			any = 1;
620
+			acc *= base;
621
+			acc += c;
622
+		}
623
+	}
624
+	if (any < 0) {
625
+		acc = neg ? LONG_MIN : LONG_MAX;
626
+		errno = ERANGE;
627
+	} else if (neg)
628
+		acc = -acc;
629
+	if (endptr != 0)
630
+		*endptr = (char *) (any ? s - 1 : nptr);
631
+	return (acc);
632
+}
633
+
519 634
 size_t cli_ldbtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens, int token_skip)
520 635
 {
521 636
     size_t tokens_found, i;
... ...
@@ -68,6 +68,7 @@ const char *cli_memstr(const char *haystack, unsigned int hs, const char *needle
68 68
 char *cli_strrcpy(char *dest, const char *source);
69 69
 size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens);
70 70
 size_t cli_ldbtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens, int token_skip);
71
+long cli_strntol(const char *nptr, size_t n, char **endptr, register int base);
71 72
 int cli_isnumber(const char *str);
72 73
 char *cli_unescape(const char *str);
73 74
 struct text_buffer;