Also dump undecompressable streams, since we are not decrypting.
Török Edvin authored on 2010/07/30 22:54:53... | ... |
@@ -69,6 +69,7 @@ enum pdf_flag { |
69 | 69 |
BAD_PDF_TOOMANYOBJS, |
70 | 70 |
BAD_STREAM_FILTERS, |
71 | 71 |
BAD_FLATE, |
72 |
+ BAD_FLATESTART, |
|
72 | 73 |
BAD_STREAMSTART, |
73 | 74 |
BAD_ASCIIDECODE, |
74 | 75 |
BAD_INDOBJ, |
... | ... |
@@ -77,7 +78,8 @@ enum pdf_flag { |
77 | 77 |
HEX_JAVASCRIPT, |
78 | 78 |
UNKNOWN_FILTER, |
79 | 79 |
HAS_OPENACTION, |
80 |
- BAD_STREAMLEN |
|
80 |
+ BAD_STREAMLEN, |
|
81 |
+ ENCRYPTED_PDF |
|
81 | 82 |
}; |
82 | 83 |
|
83 | 84 |
static int xrefCheck(const char *xref, const char *eof) |
... | ... |
@@ -117,7 +119,8 @@ enum objflags { |
117 | 117 |
OBJ_FILTER_CRYPT, |
118 | 118 |
OBJ_JAVASCRIPT, |
119 | 119 |
OBJ_OPENACTION, |
120 |
- OBJ_HASFILTERS |
|
120 |
+ OBJ_HASFILTERS, |
|
121 |
+ OBJ_SIGNED |
|
121 | 122 |
}; |
122 | 123 |
|
123 | 124 |
struct pdf_obj { |
... | ... |
@@ -264,6 +267,9 @@ static void pdfobj_flag(struct pdf_struct *pdf, struct pdf_obj *obj, enum pdf_fl |
264 | 264 |
case BAD_FLATE: |
265 | 265 |
s = "bad deflate stream"; |
266 | 266 |
break; |
267 |
+ case BAD_FLATESTART: |
|
268 |
+ s = "bad deflate stream start"; |
|
269 |
+ break; |
|
267 | 270 |
case BAD_STREAMSTART: |
268 | 271 |
s = "bad stream start"; |
269 | 272 |
break; |
... | ... |
@@ -285,8 +291,11 @@ static void pdfobj_flag(struct pdf_struct *pdf, struct pdf_obj *obj, enum pdf_fl |
285 | 285 |
case BAD_STREAMLEN: |
286 | 286 |
s = "bad /Length, too small"; |
287 | 287 |
break; |
288 |
+ case ENCRYPTED_PDF: |
|
289 |
+ s = "PDF is encrypted"; |
|
290 |
+ break; |
|
288 | 291 |
} |
289 |
- cli_dbgmsg("cli_pdf: %s in object %u %u\n", s, obj->id>>8, obj->id&0xff); |
|
292 |
+ cli_dbgmsg("cli_pdf: %s flagged in object %u %u\n", s, obj->id>>8, obj->id&0xff); |
|
290 | 293 |
} |
291 | 294 |
|
292 | 295 |
static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, |
... | ... |
@@ -351,8 +360,18 @@ static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, |
351 | 351 |
else |
352 | 352 |
cli_dbgmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF stream in %u %u obj\n", |
353 | 353 |
(unsigned long)nbytes, zstat, obj->id>>8, obj->id&0xff); |
354 |
- pdfobj_flag(pdf, obj, BAD_FLATE); |
|
354 |
+ /* mark stream as bad only if not encrypted */ |
|
355 | 355 |
inflateEnd(&stream); |
356 |
+ if (!nbytes) { |
|
357 |
+ cli_dbgmsg("cli_pdf: dumping raw stream (probably encrypted)\n"); |
|
358 |
+ if (filter_writen(pdf, obj, fout, buf, len, sum) != len) { |
|
359 |
+ cli_errmsg("cli_pdf: failed to write output file\n"); |
|
360 |
+ return CL_EWRITE; |
|
361 |
+ } |
|
362 |
+ pdfobj_flag(pdf, obj, BAD_FLATESTART); |
|
363 |
+ } else { |
|
364 |
+ pdfobj_flag(pdf, obj, BAD_FLATE); |
|
365 |
+ } |
|
356 | 366 |
return CL_CLEAN; |
357 | 367 |
} |
358 | 368 |
break; |
... | ... |
@@ -662,6 +681,8 @@ static struct pdfname_action pdfname_actions[] = { |
662 | 662 |
{"DCTDecode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER}, |
663 | 663 |
{"JPXDecode", OBJ_FILTER_JPX, STATE_FILTER, STATE_FILTER}, |
664 | 664 |
{"Crypt", OBJ_FILTER_CRYPT, STATE_FILTER, STATE_NONE}, |
665 |
+ {"Standard", OBJ_FILTER_CRYPT, STATE_FILTER, STATE_FILTER}, |
|
666 |
+ {"Sig", OBJ_SIGNED, STATE_NONE, STATE_NONE}, |
|
665 | 667 |
{"Filter", OBJ_HASFILTERS, STATE_ANY, STATE_FILTER}, |
666 | 668 |
{"JavaScript", OBJ_JAVASCRIPT, STATE_S, STATE_JAVASCRIPT}, |
667 | 669 |
{"Length", OBJ_DICT, STATE_FILTER, STATE_NONE}, |
... | ... |
@@ -686,6 +707,9 @@ static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj, |
686 | 686 |
} |
687 | 687 |
if (!act) { |
688 | 688 |
if (*state == STATE_FILTER && |
689 |
+ !(obj->flags & (1 << OBJ_SIGNED)) && |
|
690 |
+ /* these are digital signature objects, filter doesn't matter, |
|
691 |
+ * we don't need them anyway */ |
|
689 | 692 |
!(obj->flags & KNOWN_FILTERS)) { |
690 | 693 |
cli_dbgmsg("cli_pdf: unknown filter %s\n", pdfname); |
691 | 694 |
pdfobj_flag(pdf, obj, UNKNOWN_FILTER); |
... | ... |
@@ -784,7 +808,7 @@ static void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj) |
784 | 784 |
escapes = 1; |
785 | 785 |
continue; |
786 | 786 |
} |
787 |
- if (*q == ' ' || *q == '\r' || *q == '\n' || *q == '/') |
|
787 |
+ if (*q == ' ' || *q == '\r' || *q == '\n' || *q == '/' || *q == '>' || *q == ']') |
|
788 | 788 |
break; |
789 | 789 |
pdfname[i] = *q; |
790 | 790 |
} |
... | ... |
@@ -888,6 +912,7 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
888 | 888 |
pdf.flags |= 1 << BAD_PDF_TRAILER; |
889 | 889 |
cli_dbgmsg("cli_pdf: %%%%EOF not found\n"); |
890 | 890 |
} else { |
891 |
+ const char *t; |
|
891 | 892 |
size = q - eofmap + map_off; |
892 | 893 |
for (;q > eofmap;q--) { |
893 | 894 |
if (memcmp(q, "startxref", 9) == 0) |
... | ... |
@@ -896,17 +921,28 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
896 | 896 |
if (q <= eofmap) { |
897 | 897 |
pdf.flags |= 1 << BAD_PDF_TRAILER; |
898 | 898 |
cli_dbgmsg("cli_pdf: startxref not found\n"); |
899 |
- } |
|
900 |
- q += 9; |
|
901 |
- while (q < eof && (*q == ' ' || *q == '\n' || *q == '\r')) { q++; } |
|
902 |
- xref = atol(q); |
|
903 |
- bytesleft = map->len - offset - xref; |
|
904 |
- if (bytesleft > 4096) |
|
905 |
- bytesleft = 4096; |
|
906 |
- q = fmap_need_off_once(map, offset + xref, bytesleft); |
|
907 |
- if (!q || xrefCheck(q, q+bytesleft) == -1) { |
|
908 |
- cli_dbgmsg("cli_pdf: did not find valid xref\n"); |
|
909 |
- pdf.flags |= 1 << BAD_PDF_TRAILER; |
|
899 |
+ } else { |
|
900 |
+ for (t=q;t > eofmap; t--) { |
|
901 |
+ if (memcmp(t,"trailer",7) == 0) |
|
902 |
+ break; |
|
903 |
+ } |
|
904 |
+ if (t > eofmap) { |
|
905 |
+ if (cli_memstr(t, q-t, "/Encrypt", 8)) { |
|
906 |
+ pdf.flags |= 1 << ENCRYPTED_PDF; |
|
907 |
+ cli_dbgmsg("cli_pdf: encrypted pdf found, stream will probably fail to decompress!\n"); |
|
908 |
+ } |
|
909 |
+ } |
|
910 |
+ q += 9; |
|
911 |
+ while (q < eof && (*q == ' ' || *q == '\n' || *q == '\r')) { q++; } |
|
912 |
+ xref = atol(q); |
|
913 |
+ bytesleft = map->len - offset - xref; |
|
914 |
+ if (bytesleft > 4096) |
|
915 |
+ bytesleft = 4096; |
|
916 |
+ q = fmap_need_off_once(map, offset + xref, bytesleft); |
|
917 |
+ if (!q || xrefCheck(q, q+bytesleft) == -1) { |
|
918 |
+ cli_dbgmsg("cli_pdf: did not find valid xref\n"); |
|
919 |
+ pdf.flags |= 1 << BAD_PDF_TRAILER; |
|
920 |
+ } |
|
910 | 921 |
} |
911 | 922 |
} |
912 | 923 |
size -= offset; |
... | ... |
@@ -941,6 +977,9 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
941 | 941 |
break; |
942 | 942 |
} |
943 | 943 |
|
944 |
+ if (pdf.flags & (1 << ENCRYPTED_PDF)) |
|
945 |
+ pdf.flags &= ~ (1 << BAD_FLATESTART); |
|
946 |
+ |
|
944 | 947 |
if (pdf.flags) { |
945 | 948 |
cli_dbgmsg("cli_pdf: flags 0x%02x\n", pdf.flags); |
946 | 949 |
if (pdf.flags & (1 << ESCAPED_COMMON_PDFNAME)) { |