... | ... |
@@ -871,6 +871,7 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
871 | 871 |
int len = p_stream; |
872 | 872 |
const char *pstr; |
873 | 873 |
struct pdf_dict *dparams = NULL; |
874 |
+ int xref = 0; |
|
874 | 875 |
|
875 | 876 |
length = find_length(pdf, obj, start, p_stream); |
876 | 877 |
if (length < 0) |
... | ... |
@@ -926,6 +927,9 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
926 | 926 |
} |
927 | 927 |
} |
928 | 928 |
|
929 |
+ if (cli_memstr(start, p_stream, "/XRef", 5)) |
|
930 |
+ xref = 1; |
|
931 |
+ |
|
929 | 932 |
cli_dbgmsg("-------------EXPERIMENTAL-------------\n"); |
930 | 933 |
|
931 | 934 |
pstr = pdf_getdict(start, &len, "/DecodeParms"); |
... | ... |
@@ -953,7 +957,7 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
953 | 953 |
cli_dbgmsg("cli_pdf: failed to locate DecodeParms dictionary start\n"); |
954 | 954 |
} |
955 | 955 |
|
956 |
- sum = pdf_decodestream(pdf, obj, dparams, start + p_stream, length, fout, &rc); |
|
956 |
+ sum = pdf_decodestream(pdf, obj, dparams, start + p_stream, length, xref, fout, &rc); |
|
957 | 957 |
if (dparams) |
958 | 958 |
pdf_free_dict(dparams); |
959 | 959 |
|
... | ... |
@@ -963,14 +967,6 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
963 | 963 |
} |
964 | 964 |
|
965 | 965 |
cli_dbgmsg("-------------EXPERIMENTAL-------------\n"); |
966 |
- |
|
967 |
-#if 0 |
|
968 |
- if (pdf->flags & (1 << DECRYPTABLE_PDF)) { |
|
969 |
- if (cli_memstr(start, p_stream, "/XRef", 5)) { |
|
970 |
- cli_dbgmsg("cli_pdf: cross reference stream, skipping\n"); |
|
971 |
- } |
|
972 |
- } |
|
973 |
-#endif |
|
974 | 966 |
} else { |
975 | 967 |
noisy_warnmsg("cannot find stream bounds for obj %u %u\n", obj->id>>8, obj->id&0xff); |
976 | 968 |
} |
... | ... |
@@ -65,9 +65,13 @@ |
65 | 65 |
#include "bytecode_api.h" |
66 | 66 |
#include "lzw/lzwdec.h" |
67 | 67 |
|
68 |
+#define PDFTOKEN_FLAG_XREF 0x1 |
|
69 |
+ |
|
68 | 70 |
struct pdf_token { |
69 |
- uint32_t length; |
|
70 |
- uint8_t *content; |
|
71 |
+ uint32_t flags; /* tracking flags */ |
|
72 |
+ |
|
73 |
+ uint32_t length; /* length of current content */ |
|
74 |
+ uint8_t *content; /* content stream */ |
|
71 | 75 |
}; |
72 | 76 |
|
73 | 77 |
static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token); |
... | ... |
@@ -80,7 +84,7 @@ static int filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *obj, s |
80 | 80 |
static int filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int mode); |
81 | 81 |
static int filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token); |
82 | 82 |
|
83 |
-off_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, const char *stream, uint32_t streamlen, int fout, int *rc) |
|
83 |
+off_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, const char *stream, uint32_t streamlen, int xref, int fout, int *rc) |
|
84 | 84 |
{ |
85 | 85 |
struct pdf_token *token; |
86 | 86 |
off_t rv; |
... | ... |
@@ -104,6 +108,10 @@ off_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_d |
104 | 104 |
return -1; |
105 | 105 |
} |
106 | 106 |
|
107 |
+ token->flags = 0; |
|
108 |
+ if (xref) |
|
109 |
+ token->flags |= PDFTOKEN_FLAG_XREF; |
|
110 |
+ |
|
107 | 111 |
token->content = cli_malloc(streamlen); |
108 | 112 |
if (!token->content) { |
109 | 113 |
free(token); |
... | ... |
@@ -150,9 +158,13 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj |
150 | 150 |
* if none, force a DECRYPT filter application |
151 | 151 |
*/ |
152 | 152 |
if ((pdf->flags & (1 << DECRYPTABLE_PDF)) && !(obj->flags & (1 << OBJ_FILTER_CRYPT))) { |
153 |
- cli_dbgmsg("cli_pdf: decoding => non-filter CRYPT\n"); |
|
154 |
- if ((rc = filter_decrypt(pdf, obj, params, token, 1)) != CL_SUCCESS) { |
|
155 |
- return rc; |
|
153 |
+ if (token->flags & PDFTOKEN_FLAG_XREF) /* TODO: is this on all crypt filters or only the assumed one? */ |
|
154 |
+ cli_dbgmsg("cli_pdf: skipping decoding => non-filter CRYPT (reason: xref)\n"); |
|
155 |
+ else { |
|
156 |
+ cli_dbgmsg("cli_pdf: decoding => non-filter CRYPT\n"); |
|
157 |
+ if ((rc = filter_decrypt(pdf, obj, params, token, 1)) != CL_SUCCESS) { |
|
158 |
+ return rc; |
|
159 |
+ } |
|
156 | 160 |
} |
157 | 161 |
} |
158 | 162 |
|
... | ... |
@@ -224,15 +236,15 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj |
224 | 224 |
reason = "detection"; |
225 | 225 |
break; |
226 | 226 |
case CL_BREAK: |
227 |
- reason = "break decoding"; |
|
227 |
+ reason = "decoding break"; |
|
228 | 228 |
break; |
229 | 229 |
default: |
230 |
- reason = "error decoding"; |
|
230 |
+ reason = "decoding error"; |
|
231 | 231 |
break; |
232 | 232 |
} |
233 | 233 |
|
234 |
- cli_dbgmsg("cli_pdf: %s, stopping after %d (of %lu) filters\n", |
|
235 |
- reason, i, (long unsigned)(obj->numfilters)); |
|
234 |
+ cli_dbgmsg("cli_pdf: stopping after %d (of %lu) filters (reason: %s)\n", |
|
235 |
+ i, (long unsigned)(obj->numfilters), reason); |
|
236 | 236 |
break; |
237 | 237 |
} |
238 | 238 |
} |
... | ... |
@@ -36,6 +36,6 @@ |
36 | 36 |
|
37 | 37 |
#include "pdf.h" |
38 | 38 |
|
39 |
-off_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, const char *stream, uint32_t streamlen, int fout, int *rc); |
|
39 |
+off_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, const char *stream, uint32_t streamlen, int xref, int fout, int *rc); |
|
40 | 40 |
|
41 | 41 |
#endif /* __PDFDECODE_H__ */ |