... | ... |
@@ -207,7 +207,7 @@ int pdf_findobj(struct pdf_struct *pdf) |
207 | 207 |
const char *start, *q, *q2, *q3, *eof; |
208 | 208 |
struct pdf_obj *obj; |
209 | 209 |
off_t bytesleft; |
210 |
- unsigned genid, objid; |
|
210 |
+ unsigned long genid, objid; |
|
211 | 211 |
|
212 | 212 |
pdf->nobjs++; |
213 | 213 |
pdf->objs = cli_realloc2(pdf->objs, sizeof(*pdf->objs)*pdf->nobjs); |
... | ... |
@@ -243,12 +243,43 @@ int pdf_findobj(struct pdf_struct *pdf) |
243 | 243 |
while (q > start && isdigit(*q)) |
244 | 244 |
q--; |
245 | 245 |
|
246 |
- genid = (unsigned int)cli_strntol(q, (size_t)bytesleft, NULL, 10); |
|
246 |
+ if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, (long*)&genid)) { |
|
247 |
+ cli_dbgmsg("cli_pdf: Failed to parse object genid (%u)\n", pdf->nobjs); |
|
248 |
+ return -1; |
|
249 |
+ } |
|
247 | 250 |
q = findNextNonWSBack(q-1,start); |
248 | 251 |
while (q > start && isdigit(*q)) |
249 | 252 |
q--; |
250 | 253 |
|
251 |
- objid = (unsigned int)cli_strntol(q, (size_t)bytesleft, NULL, 10); |
|
254 |
+ if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, (long*)&objid)) { |
|
255 |
+ /* |
|
256 |
+ * PDFs with multiple revisions will have %%EOF before the end of the file, |
|
257 |
+ * followed by the next revision of the PDF. If this is the case, we can |
|
258 |
+ * detect it and continue parsing after the %%EOF. |
|
259 |
+ */ |
|
260 |
+ if (q - 4 > start) { |
|
261 |
+ const char* lastfile = q - 4; |
|
262 |
+ if (0 != strncmp(lastfile, "\%\%EOF", 5)) { |
|
263 |
+ /* Nope, wasn't %%EOF, I guess just fail out. */ |
|
264 |
+ cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs); |
|
265 |
+ return -1; |
|
266 |
+ } |
|
267 |
+ /* Yup, Looks, like the file continues after %%EOF. |
|
268 |
+ * Probably another revision. Keep parsing... */ |
|
269 |
+ q++; |
|
270 |
+ cli_dbgmsg("cli_pdf: \%\%EOF detected before end of file, at %zu\n", (size_t)q); |
|
271 |
+ } else { |
|
272 |
+ /* Failed parsing at the very beginning */ |
|
273 |
+ cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs); |
|
274 |
+ return -1; |
|
275 |
+ } |
|
276 |
+ /* Try again, with offset slightly adjusted */ |
|
277 |
+ if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, (long*)&objid)) { |
|
278 |
+ cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs); |
|
279 |
+ return -1; |
|
280 |
+ } |
|
281 |
+ cli_dbgmsg("cli_pdf: There appears to be an additional revision. Continuing to parse...\n"); |
|
282 |
+ } |
|
252 | 283 |
obj->id = (objid << 8) | (genid&0xff); |
253 | 284 |
obj->start = q2+4 - pdf->map; |
254 | 285 |
obj->flags = 0; |
... | ... |
@@ -406,7 +437,7 @@ struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t o |
406 | 406 |
|
407 | 407 |
static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *start, off_t len) |
408 | 408 |
{ |
409 |
- int length; |
|
409 |
+ unsigned long length; |
|
410 | 410 |
const char *q; |
411 | 411 |
|
412 | 412 |
q = cli_memstr(start, len, "/Length", 7); |
... | ... |
@@ -421,17 +452,24 @@ static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char * |
421 | 421 |
|
422 | 422 |
/* len -= start - q; */ |
423 | 423 |
q = start; |
424 |
- length = (int)cli_strntol(q, (size_t)len, NULL, 10); |
|
424 |
+ if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)len, 0, 10, (long*)&length)) { |
|
425 |
+ cli_dbgmsg("cli_pdf: failed to parse object length\n"); |
|
426 |
+ return 0; |
|
427 |
+ } |
|
428 |
+ |
|
425 | 429 |
while (isdigit(*q) && len > 0) { |
426 | 430 |
q++; |
427 | 431 |
len--; |
428 | 432 |
} |
429 | 433 |
|
430 | 434 |
if (*q == ' ' && len > 0) { |
431 |
- int genid; |
|
435 |
+ unsigned long genid; |
|
432 | 436 |
q++; |
433 | 437 |
len--; |
434 |
- genid = (int)cli_strntol(q, (size_t)len, NULL, 10); |
|
438 |
+ if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)len, 0, 10, (long*)&genid)) { |
|
439 |
+ cli_dbgmsg("cli_pdf: failed to parse object genid\n"); |
|
440 |
+ return 0; |
|
441 |
+ } |
|
435 | 442 |
|
436 | 443 |
while(isdigit(*q) && len > 0) { |
437 | 444 |
q++; |
... | ... |
@@ -439,7 +477,7 @@ static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char * |
439 | 439 |
} |
440 | 440 |
|
441 | 441 |
if (q[0] == ' ' && q[1] == 'R') { |
442 |
- cli_dbgmsg("cli_pdf: length is in indirect object %u %u\n", length, genid); |
|
442 |
+ cli_dbgmsg("cli_pdf: length is in indirect object %lu %lu\n", length, genid); |
|
443 | 443 |
|
444 | 444 |
obj = find_obj(pdf, obj, (length << 8) | (genid&0xff)); |
445 | 445 |
if (!obj) { |
... | ... |
@@ -453,7 +491,10 @@ static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char * |
453 | 453 |
return 0; |
454 | 454 |
} |
455 | 455 |
|
456 |
- length = (int)cli_strntol(q, (size_t)len, NULL, 10); |
|
456 |
+ if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)len, 0, 10, (long*)&length)) { |
|
457 |
+ cli_dbgmsg("cli_pdf: failed to parse object length from indirect object\n"); |
|
458 |
+ return 0; |
|
459 |
+ } |
|
457 | 460 |
} |
458 | 461 |
} |
459 | 462 |
|
... | ... |
@@ -1283,7 +1324,8 @@ static int pdf_readint(const char *q0, int len, const char *key); |
1283 | 1283 |
static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len) |
1284 | 1284 |
{ |
1285 | 1285 |
const char *q, *q2; |
1286 |
- uint32_t objid; |
|
1286 |
+ unsigned long objid; |
|
1287 |
+ unsigned long genid; |
|
1287 | 1288 |
|
1288 | 1289 |
if (len >= 16 && !strncmp(enc, "/EncryptMetadata", 16)) { |
1289 | 1290 |
q = cli_memstr(enc+16, len-16, "/Encrypt", 8); |
... | ... |
@@ -1302,19 +1344,27 @@ static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len) |
1302 | 1302 |
len -= q2 - q; |
1303 | 1303 |
q = q2; |
1304 | 1304 |
|
1305 |
- objid = (uint32_t)cli_strntol(q2, (size_t)len, NULL, 10) << 8; |
|
1305 |
+ if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)len, 0, 10, (long*)&objid)) { |
|
1306 |
+ cli_dbgmsg("cli_pdf: Found Encrypt dictionary but failed to parse objid\n"); |
|
1307 |
+ return; |
|
1308 |
+ } |
|
1309 |
+ objid = objid << 8; |
|
1306 | 1310 |
q2 = pdf_nextobject(q, len); |
1307 | 1311 |
if (!q2 || !isdigit(*q2)) |
1308 | 1312 |
return; |
1309 | 1313 |
len -= q2 - q; |
1310 | 1314 |
q = q2; |
1311 | 1315 |
|
1312 |
- objid |= (uint32_t)cli_strntol(q2, (size_t)len, NULL, 10) & 0xff; |
|
1316 |
+ if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)len, 0, 10, (long*)&genid)) { |
|
1317 |
+ cli_dbgmsg("cli_pdf: Found Encrypt dictionary but failed to parse genid\n"); |
|
1318 |
+ return; |
|
1319 |
+ } |
|
1320 |
+ objid |= genid & 0xff; |
|
1313 | 1321 |
q2 = pdf_nextobject(q, len); |
1314 | 1322 |
if (!q2 || *q2 != 'R') |
1315 | 1323 |
return; |
1316 | 1324 |
|
1317 |
- cli_dbgmsg("cli_pdf: Encrypt dictionary in obj %d %d\n", objid>>8, objid&0xff); |
|
1325 |
+ cli_dbgmsg("cli_pdf: Encrypt dictionary in obj %lu %lu\n", objid>>8, objid&0xff); |
|
1318 | 1326 |
|
1319 | 1327 |
pdf->enc_objid = objid; |
1320 | 1328 |
} |
... | ... |
@@ -1569,9 +1619,17 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj) |
1569 | 1569 |
q2 = pdf_nextobject(q, dict_remaining); |
1570 | 1570 |
if (q2 && isdigit(*q2)) { |
1571 | 1571 |
const char * q2_old = NULL; |
1572 |
+ unsigned long objid; |
|
1573 |
+ unsigned long genid; |
|
1574 |
+ |
|
1572 | 1575 |
dict_remaining -= (off_t)(q2 - q); |
1573 | 1576 |
|
1574 |
- uint32_t objid = (uint32_t)cli_strntol(q2, (size_t)dict_remaining, NULL, 10) << 8; |
|
1577 |
+ if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)dict_remaining, 0, 10, (long*)&objid)) { |
|
1578 |
+ cli_dbgmsg("cli_pdf: failed to parse object objid\n"); |
|
1579 |
+ return; |
|
1580 |
+ } |
|
1581 |
+ objid = objid << 8; |
|
1582 |
+ |
|
1575 | 1583 |
while (isdigit(*q2)) |
1576 | 1584 |
q2++; |
1577 | 1585 |
|
... | ... |
@@ -1579,13 +1637,17 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj) |
1579 | 1579 |
q2 = pdf_nextobject(q2, dict_remaining); |
1580 | 1580 |
if (q2 && isdigit(*q2)) { |
1581 | 1581 |
dict_remaining -= (off_t)(q2 - q2_old); |
1582 |
- objid |= (uint32_t)cli_strntol(q2, (size_t)dict_remaining, NULL, 10) & 0xff; |
|
1582 |
+ if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)dict_remaining, 0, 10, (long*)&genid)) { |
|
1583 |
+ cli_dbgmsg("cli_pdf: failed to parse object genid\n"); |
|
1584 |
+ return; |
|
1585 |
+ } |
|
1586 |
+ objid |= genid & 0xff; |
|
1583 | 1587 |
|
1584 | 1588 |
q2 = pdf_nextobject(q2, dict_remaining); |
1585 | 1589 |
if (q2 && *q2 == 'R') { |
1586 | 1590 |
struct pdf_obj *obj2; |
1587 | 1591 |
|
1588 |
- cli_dbgmsg("cli_pdf: found %s stored in indirect object %u %u\n", pdfname, objid >> 8, objid&0xff); |
|
1592 |
+ cli_dbgmsg("cli_pdf: found %s stored in indirect object %lu %lu\n", pdfname, objid >> 8, objid&0xff); |
|
1589 | 1593 |
obj2 = find_obj(pdf, obj, objid); |
1590 | 1594 |
if (obj2) { |
1591 | 1595 |
enum pdf_objflags flag = |
... | ... |
@@ -1879,9 +1941,16 @@ static char *pdf_readval(const char *q, int len, const char *key) |
1879 | 1879 |
|
1880 | 1880 |
static int pdf_readint(const char *q0, int len, const char *key) |
1881 | 1881 |
{ |
1882 |
+ long value = 0; |
|
1882 | 1883 |
const char *q = pdf_getdict(q0, &len, key); |
1883 | 1884 |
|
1884 |
- return (q != NULL) ? (int)cli_strntol(q, (size_t)len, NULL, 10) : -1; |
|
1885 |
+ if (q == NULL) { |
|
1886 |
+ value = -1; |
|
1887 |
+ } |
|
1888 |
+ else if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)len, 0, 10, &value)) { |
|
1889 |
+ value = -1; |
|
1890 |
+ } |
|
1891 |
+ return value; |
|
1885 | 1892 |
} |
1886 | 1893 |
|
1887 | 1894 |
static int pdf_readbool(const char *q0, int len, const char *key, int Default) |
... | ... |
@@ -2425,16 +2494,21 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
2425 | 2425 |
|
2426 | 2426 |
while (q < eof && (*q == ' ' || *q == '\n' || *q == '\r')) { q++; } |
2427 | 2427 |
|
2428 |
- xref = cli_strntol(q, q - eofmap + map_off, NULL, 10); |
|
2429 |
- bytesleft = map->len - offset - xref; |
|
2430 |
- if (bytesleft > 4096) |
|
2431 |
- bytesleft = 4096; |
|
2432 |
- |
|
2433 |
- q = fmap_need_off_once(map, offset + xref, bytesleft); |
|
2434 |
- if (!q || xrefCheck(q, q+bytesleft) == -1) { |
|
2435 |
- cli_dbgmsg("cli_pdf: did not find valid xref\n"); |
|
2428 |
+ if (CL_SUCCESS != cli_strntol_wrap(q, q - eofmap + map_off, 0, 10, &xref)) { |
|
2429 |
+ cli_dbgmsg("cli_pdf: failed to parse PDF trailer xref\n"); |
|
2436 | 2430 |
pdf.flags |= 1 << BAD_PDF_TRAILER; |
2437 | 2431 |
} |
2432 |
+ else { |
|
2433 |
+ bytesleft = map->len - offset - xref; |
|
2434 |
+ if (bytesleft > 4096) |
|
2435 |
+ bytesleft = 4096; |
|
2436 |
+ |
|
2437 |
+ q = fmap_need_off_once(map, offset + xref, bytesleft); |
|
2438 |
+ if (!q || xrefCheck(q, q+bytesleft) == -1) { |
|
2439 |
+ cli_dbgmsg("cli_pdf: did not find valid xref\n"); |
|
2440 |
+ pdf.flags |= 1 << BAD_PDF_TRAILER; |
|
2441 |
+ } |
|
2442 |
+ } |
|
2438 | 2443 |
} |
2439 | 2444 |
} |
2440 | 2445 |
|
... | ... |
@@ -3221,9 +3295,10 @@ static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname |
3221 | 3221 |
goto cleanup; |
3222 | 3222 |
} |
3223 | 3223 |
|
3224 |
- count = cli_strntol(begin, (size_t)(obj->start + pdf->map + objsz - begin), NULL, 10); |
|
3225 |
- if (count != npages) |
|
3224 |
+ if ((CL_SUCCESS != cli_strntol_wrap(begin, (size_t)(obj->start + pdf->map + objsz - begin), 0, 10, (long*)&count)) || |
|
3225 |
+ (count != npages)) { |
|
3226 | 3226 |
cli_jsonbool(pdfobj, "IncorrectPagesCount", 1); |
3227 |
+ } |
|
3227 | 3228 |
|
3228 | 3229 |
cleanup: |
3229 | 3230 |
pdf_free_array(array); |
... | ... |
@@ -3266,7 +3341,8 @@ static void Colors_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfnam |
3266 | 3266 |
if ((size_t)(p1 - start) == objsz) |
3267 | 3267 |
return; |
3268 | 3268 |
|
3269 |
- ncolors = cli_strntol(p1, (size_t)((p1 - start) - objsz), NULL, 10); |
|
3269 |
+ if (CL_SUCCESS != cli_strntol_wrap(p1, (size_t)((p1 - start) - objsz), 0, 10, (long*)&ncolors)) |
|
3270 |
+ return; |
|
3270 | 3271 |
|
3271 | 3272 |
/* We only care if the number of colors > 2**24 */ |
3272 | 3273 |
if (ncolors < 1<<24) |
... | ... |
@@ -31,6 +31,7 @@ |
31 | 31 |
#include <stdio.h> |
32 | 32 |
#include <stdlib.h> |
33 | 33 |
#include <string.h> |
34 |
+#include <limits.h> |
|
34 | 35 |
#ifdef HAVE_STRINGS_H |
35 | 36 |
#include <strings.h> |
36 | 37 |
#endif |
... | ... |
@@ -523,16 +524,21 @@ size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count, |
523 | 523 |
* Copyright (c) 1990 The Regents of the University of California. |
524 | 524 |
* All rights reserved. |
525 | 525 |
* |
526 |
- * @param nptr Pointer to start of string. |
|
527 |
- * @param n Max length of buffer in bytes. |
|
528 |
- * @param endptr [OUT optional] If endptr is not NULL, strtol() stores the address |
|
529 |
- * of the first invalid character in *endptr. If there were no digits |
|
530 |
- * at all, however, strtol() stores the |
|
531 |
- * original value of str in *endptr. |
|
532 |
- * @param int The conversion is done according to the given base, which must be |
|
533 |
- * between 2 and 36 inclusive, or be the special value 0. |
|
534 |
- * @return long The signed long value. |
|
526 |
+ * @param nptr Pointer to start of string. |
|
527 |
+ * @param n Max length of buffer in bytes. |
|
528 |
+ * @param[out] endptr [optional] If endptr is not NULL, strtol() stores the address |
|
529 |
+ * of the first invalid character in *endptr. If there were no digits |
|
530 |
+ * at all, however, strtol() stores the |
|
531 |
+ * original value of str in *endptr. |
|
532 |
+ * Nota Bene: If the buffer is non-null terminated and the number |
|
533 |
+ * comprises the entire buffer, endptr will point past the end of |
|
534 |
+ * the buffer, and the caller should check if endptr >= nptr + n. |
|
535 |
+ * |
|
536 |
+ * @param int The conversion is done according to the given base, which must be |
|
537 |
+ * between 2 and 36 inclusive, or be the special value 0. |
|
538 |
+ * @return long The signed long value. |
|
535 | 539 |
*/ |
540 |
+static |
|
536 | 541 |
long cli_strntol(const char *nptr, size_t n, char **endptr, register int base) |
537 | 542 |
{ |
538 | 543 |
register const char *s = nptr; |
... | ... |
@@ -627,10 +633,54 @@ long cli_strntol(const char *nptr, size_t n, char **endptr, register int base) |
627 | 627 |
} else if (neg) |
628 | 628 |
acc = -acc; |
629 | 629 |
if (endptr != 0) |
630 |
- *endptr = (char *) (any ? s - 1 : nptr); |
|
630 |
+ *endptr = (char *) (any ? s : nptr); |
|
631 | 631 |
return (acc); |
632 | 632 |
} |
633 | 633 |
|
634 |
+/** |
|
635 |
+ * @brief The strntol() function converts the string in str to a long value. |
|
636 |
+ * |
|
637 |
+ * Wrapper for cli_strntol() that provides incentive to check for failure. |
|
638 |
+ * |
|
639 |
+ * @param buf Pointer to start of string. |
|
640 |
+ * @param buf_size Max length of buffer to convert to integer. |
|
641 |
+ * @param fail_at_nondigit If 1, fail out if the a non-digit character is found before the end of the buffer. |
|
642 |
+ * If 0, non-digit character represents end of number and is not a failure. |
|
643 |
+ * @param base The conversion is done according to the given base, which must be |
|
644 |
+ * between 2 and 36 inclusive, or be the special value 0. |
|
645 |
+ * @param[out] result Long integer value of ascii number. |
|
646 |
+ * @return CL_SUCCESS Success |
|
647 |
+ * @return CL_EPARSE Failure |
|
648 |
+ */ |
|
649 |
+int cli_strntol_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, long *result) |
|
650 |
+{ |
|
651 |
+ char *endptr = NULL; |
|
652 |
+ long num; |
|
653 |
+ |
|
654 |
+ if (buf_size == 0 || !buf || !result) { |
|
655 |
+ /* invalid parameter */ |
|
656 |
+ return CL_EPARSE; |
|
657 |
+ } |
|
658 |
+ errno = 0; |
|
659 |
+ num = cli_strntol(buf, buf_size, &endptr, base); |
|
660 |
+ if ((num == LONG_MIN || num == LONG_MAX) && errno == ERANGE) { |
|
661 |
+ /* under- or overflow */ |
|
662 |
+ return CL_EPARSE; |
|
663 |
+ } |
|
664 |
+ if (endptr == buf) { |
|
665 |
+ /* no digits */ |
|
666 |
+ return CL_EPARSE; |
|
667 |
+ } |
|
668 |
+ if (fail_at_nondigit && (endptr < (buf + buf_size)) && (*endptr != '\0')) { |
|
669 |
+ /* non-digit encountered */ |
|
670 |
+ return CL_EPARSE; |
|
671 |
+ } |
|
672 |
+ /* success */ |
|
673 |
+ *result = num; |
|
674 |
+ return CL_SUCCESS; |
|
675 |
+} |
|
676 |
+ |
|
677 |
+ |
|
634 | 678 |
size_t cli_ldbtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens, int token_skip) |
635 | 679 |
{ |
636 | 680 |
size_t tokens_found, i; |
... | ... |
@@ -68,7 +68,7 @@ const char *cli_memstr(const char *haystack, unsigned int hs, const char *needle |
68 | 68 |
char *cli_strrcpy(char *dest, const char *source); |
69 | 69 |
size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens); |
70 | 70 |
size_t cli_ldbtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens, int token_skip); |
71 |
-long cli_strntol(const char *nptr, size_t n, char **endptr, register int base); |
|
71 |
+int cli_strntol_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, long *result); |
|
72 | 72 |
int cli_isnumber(const char *str); |
73 | 73 |
char *cli_unescape(const char *str); |
74 | 74 |
struct text_buffer; |