... | ... |
@@ -59,6 +59,7 @@ |
59 | 59 |
#include "clamav.h" |
60 | 60 |
#include "others.h" |
61 | 61 |
#include "pdf.h" |
62 |
+#include "pdfdecode.h" |
|
62 | 63 |
#include "str.h" |
63 | 64 |
#include "bytecode.h" |
64 | 65 |
#include "bytecode_api.h" |
... | ... |
@@ -70,12 +71,13 @@ struct pdf_token { |
70 | 70 |
|
71 | 71 |
static int filter_ascii85decode(struct pdf_token *token); |
72 | 72 |
static int filter_rldecode(struct pdf_token *token); |
73 |
-static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token); |
|
73 |
+static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token); |
|
74 | 74 |
static int filter_asciihexdecode(struct pdf_token *token); |
75 |
+static int filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int mode); |
|
75 | 76 |
|
76 |
-static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token); |
|
77 |
+static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token); |
|
77 | 78 |
|
78 |
-int pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, const char *stream, uint32_t streamlen, int fout) |
|
79 |
+int pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, const char *stream, uint32_t streamlen, int fout) |
|
79 | 80 |
{ |
80 | 81 |
struct pdf_token *token; |
81 | 82 |
int rc; |
... | ... |
@@ -85,6 +87,11 @@ int pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, const char *st |
85 | 85 |
return CL_ENULLARG; |
86 | 86 |
} |
87 | 87 |
|
88 |
+#if 0 |
|
89 |
+ if (params) |
|
90 |
+ pdf_print_dict(params, 0); |
|
91 |
+#endif |
|
92 |
+ |
|
88 | 93 |
token = cli_malloc(sizeof(struct pdf_token)); |
89 | 94 |
if (!token) |
90 | 95 |
return CL_EMEM; |
... | ... |
@@ -97,13 +104,13 @@ int pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, const char *st |
97 | 97 |
memcpy(token->content, stream, streamlen); |
98 | 98 |
token->length = streamlen; |
99 | 99 |
|
100 |
- rc = pdf_decodestream_internal(pdf, obj, token); |
|
101 |
- |
|
102 |
- if (rc == CL_SUCCESS) { |
|
103 |
- cli_dbgmsg("cli_pdf: decoding SUCCESS!\n"); |
|
100 |
+ rc = pdf_decodestream_internal(pdf, obj, params, token); |
|
104 | 101 |
|
105 |
- if (!cli_checklimits("pdf", pdf->ctx, token->length, 0, 0)) |
|
106 |
- rc = cli_writen(fout, token->content, token->length); |
|
102 |
+ if ((rc == CL_SUCCESS) && !cli_checklimits("pdf", pdf->ctx, token->length, 0, 0)) { |
|
103 |
+ if (cli_writen(fout, token->content, token->length) != token->length) { |
|
104 |
+ cli_errmsg("cli_pdf: failed to write output file\n"); |
|
105 |
+ rc = CL_EWRITE; |
|
106 |
+ } |
|
107 | 107 |
} |
108 | 108 |
|
109 | 109 |
free(token->content); |
... | ... |
@@ -111,12 +118,24 @@ int pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, const char *st |
111 | 111 |
return rc; |
112 | 112 |
} |
113 | 113 |
|
114 |
-static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token) |
|
114 |
+static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token) |
|
115 | 115 |
{ |
116 |
+ const char *filter = NULL; |
|
116 | 117 |
int i, rc; |
117 | 118 |
|
118 | 119 |
cli_dbgmsg("cli_pdf: detected %lu filters applied\n", (long unsigned)(obj->numfilters)); |
119 | 120 |
|
121 |
+ /* |
|
122 |
+ * if pdf is decryptable, scan for CRYPT filter |
|
123 |
+ * if none, force a DECRYPT filter application |
|
124 |
+ */ |
|
125 |
+ if (!(obj->flags & (1 << OBJ_FILTER_CRYPT))) { |
|
126 |
+ cli_dbgmsg("cli_pdf: decoding => non-filter CRYPT\n"); |
|
127 |
+ if ((rc = filter_decrypt(pdf, obj, params, token, 1)) != CL_SUCCESS) |
|
128 |
+ return rc; |
|
129 |
+ |
|
130 |
+ } |
|
131 |
+ |
|
120 | 132 |
/* TODO - MAY BE SUBJECT TO CHANGE */ |
121 | 133 |
for (i = 0; i < obj->numfilters; i++) { |
122 | 134 |
switch(obj->filterlist[i]) { |
... | ... |
@@ -132,7 +151,7 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj |
132 | 132 |
|
133 | 133 |
case OBJ_FILTER_FLATE: |
134 | 134 |
cli_dbgmsg("cli_pdf: decoding [%d] => FLATEDECODE\n", obj->filterlist[i]); |
135 |
- rc = filter_flatedecode(pdf, obj, token); |
|
135 |
+ rc = filter_flatedecode(pdf, obj, params, token); |
|
136 | 136 |
break; |
137 | 137 |
|
138 | 138 |
case OBJ_FILTER_AH: |
... | ... |
@@ -140,14 +159,30 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj |
140 | 140 |
rc = filter_asciihexdecode(token); |
141 | 141 |
break; |
142 | 142 |
|
143 |
+ case OBJ_FILTER_CRYPT: |
|
144 |
+ cli_dbgmsg("cli_pdf: decoding [%d] => CRYPT\n", obj->filterlist[i]); |
|
145 |
+ rc = filter_decrypt(pdf, obj, params, token, 0); |
|
146 |
+ break; |
|
147 |
+ |
|
143 | 148 |
case OBJ_FILTER_JPX: |
144 |
- case OBJ_FILTER_DCT: //OBJ_FILTER_JBIG2 |
|
149 |
+ if (!filter) filter = "JPXDECODE"; |
|
150 |
+ case OBJ_FILTER_DCT: |
|
151 |
+ if (!filter) filter = "DCTDECODE"; |
|
145 | 152 |
case OBJ_FILTER_LZW: |
153 |
+ if (!filter) filter = "LZWDECODE"; |
|
146 | 154 |
case OBJ_FILTER_FAX: |
147 |
- case OBJ_FILTER_CRYPT: |
|
155 |
+ if (!filter) filter = "FAXDECODE"; |
|
156 |
+ case OBJ_FILTER_JBIG2: |
|
157 |
+ if (!filter) filter = "JBIG2DECODE"; |
|
158 |
+ |
|
159 |
+ cli_warnmsg("cli_pdf: unimplemented filter type [%d] => %s\n", obj->filterlist[i], filter); |
|
160 |
+ filter = NULL; |
|
161 |
+ rc = CL_BREAK; |
|
162 |
+ break; |
|
148 | 163 |
|
149 | 164 |
default: |
150 |
- cli_warnmsg("cli_pdf: unknown filter type [%d].\n", obj->filterlist[i]); |
|
165 |
+ cli_warnmsg("cli_pdf: unknown filter type [%d]\n", obj->filterlist[i]); |
|
166 |
+ rc = CL_BREAK; |
|
151 | 167 |
break; |
152 | 168 |
} |
153 | 169 |
|
... | ... |
@@ -160,6 +195,10 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj |
160 | 160 |
return CL_SUCCESS; |
161 | 161 |
} |
162 | 162 |
|
163 |
+/* |
|
164 |
+ * ascii85 inflation |
|
165 |
+ * See http://www.piclist.com/techref/method/encode.htm (look for base85) |
|
166 |
+ */ |
|
163 | 167 |
static int filter_ascii85decode(struct pdf_token *token) |
164 | 168 |
{ |
165 | 169 |
uint8_t *decoded; |
... | ... |
@@ -363,7 +402,7 @@ static uint8_t *decode_nextlinestart(uint8_t *content, uint32_t length) |
363 | 363 |
return pt; |
364 | 364 |
} |
365 | 365 |
|
366 |
-static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token) |
|
366 |
+static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token) |
|
367 | 367 |
{ |
368 | 368 |
uint8_t *decoded, *temp; |
369 | 369 |
uint32_t declen = 0, capacity = 0; |
... | ... |
@@ -373,13 +412,15 @@ static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struc |
373 | 373 |
z_stream stream; |
374 | 374 |
int zstat, skip = 0, rc = CL_SUCCESS; |
375 | 375 |
|
376 |
+ UNUSEDPARAM(params); |
|
377 |
+ |
|
376 | 378 |
if (length == 0) |
377 | 379 |
return CL_CLEAN; |
378 | 380 |
|
379 | 381 |
if (*content == '\r') { |
380 | 382 |
content++; |
381 | 383 |
length--; |
382 |
- //pdfobj_flag(pdf, obj, BAD_STREAMSTART); |
|
384 |
+ pdfobj_flag(pdf, obj, BAD_STREAMSTART); |
|
383 | 385 |
/* PDF spec says stream is followed by \r\n or \n, but not \r alone. |
384 | 386 |
* Sample 0015315109, it has \r followed by zlib header. |
385 | 387 |
* Flag pdf as suspicious, and attempt to extract by skipping the \r. |
... | ... |
@@ -431,7 +472,7 @@ static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struc |
431 | 431 |
return CL_EMEM; |
432 | 432 |
} |
433 | 433 |
|
434 |
- //pdfobj_flag(pdf, obj, BAD_FLATESTART); |
|
434 |
+ pdfobj_flag(pdf, obj, BAD_FLATESTART); |
|
435 | 435 |
} |
436 | 436 |
|
437 | 437 |
zstat = inflate(&stream, Z_NO_FLUSH); |
... | ... |
@@ -485,12 +526,12 @@ static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struc |
485 | 485 |
(unsigned long)declen, zstat, obj->id>>8, obj->id&0xff); |
486 | 486 |
|
487 | 487 |
if (declen == 0) { |
488 |
- //pdfobj_flag(pdf, obj, BAD_FLATESTART); |
|
488 |
+ pdfobj_flag(pdf, obj, BAD_FLATESTART); |
|
489 | 489 |
cli_dbgmsg("cli_pdf: no bytes were inflated.\n"); |
490 | 490 |
|
491 | 491 |
rc = CL_EFORMAT; |
492 | 492 |
} else { |
493 |
- //pdfobj_flag(pdf, obj, BAD_FLATE); |
|
493 |
+ pdfobj_flag(pdf, obj, BAD_FLATE); |
|
494 | 494 |
} |
495 | 495 |
break; |
496 | 496 |
} |
... | ... |
@@ -559,3 +600,46 @@ static int filter_asciihexdecode(struct pdf_token *token) |
559 | 559 |
} |
560 | 560 |
return rc; |
561 | 561 |
} |
562 |
+ |
|
563 |
+/* modes: 0 = use default/DecodeParms, 1 = use document setting */ |
|
564 |
+static int filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int mode) |
|
565 |
+{ |
|
566 |
+ char *decrypted; |
|
567 |
+ off_t length = token->length; |
|
568 |
+ enum enc_method enc = ENC_IDENTITY; |
|
569 |
+ |
|
570 |
+ if (mode) |
|
571 |
+ enc = get_enc_method(pdf, obj); |
|
572 |
+ else if (params) { |
|
573 |
+ struct pdf_dict_node *node = params->nodes; |
|
574 |
+ |
|
575 |
+ while (node) { |
|
576 |
+ if (node->type == PDF_DICT_STRING) { |
|
577 |
+ if (!strncmp(node->key, "/Type", 6)) { /* optional field - Type */ |
|
578 |
+ /* MUST be "CryptFilterDecodeParms" */ |
|
579 |
+ cli_dbgmsg("cli_pdf: Type: %s\n", (char *)(node->value)); |
|
580 |
+ } else if (!strncmp(node->key, "/Name", 6)) { /* optional field - Name */ |
|
581 |
+ /* overrides document and default encryption method */ |
|
582 |
+ cli_dbgmsg("cli_pdf: Name: %s\n", (char *)(node->value)); |
|
583 |
+ enc = parse_enc_method(pdf->CF, pdf->CF_n, (char *)(node->value), enc); |
|
584 |
+ } |
|
585 |
+ } |
|
586 |
+ node = node->next; |
|
587 |
+ } |
|
588 |
+ } |
|
589 |
+ |
|
590 |
+ decrypted = decrypt_any(pdf, obj->id, token->content, &length, enc); |
|
591 |
+ if (!decrypted) { |
|
592 |
+ cli_dbgmsg("cli_pdf: failed to decrypt stream\n"); |
|
593 |
+ return CL_EPARSE; /* TODO: what should this value be? */ |
|
594 |
+ } |
|
595 |
+ |
|
596 |
+ cli_dbgmsg("cli_pdf: decrypted %lld bytes from %lu total bytes\n", |
|
597 |
+ (long long int)length, (long unsigned)token->length); |
|
598 |
+ |
|
599 |
+ |
|
600 |
+ free(token->content); |
|
601 |
+ token->content = (uint8_t *)decrypted; |
|
602 |
+ token->length = (uint32_t)length; /* this may truncate unfortunately, TODO: use 64-bit values internally? */ |
|
603 |
+ return CL_SUCCESS; |
|
604 |
+} |
562 | 605 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,41 @@ |
0 |
+/* |
|
1 |
+ * Copyright (C) 2016 Cisco and/or its affiliates. All rights reserved. |
|
2 |
+ * |
|
3 |
+ * Author: Kevin Lin |
|
4 |
+ * |
|
5 |
+ * This program is free software; you can redistribute it and/or modify |
|
6 |
+ * it under the terms of the GNU General Public License version 2 as |
|
7 |
+ * published by the Free Software Foundation. |
|
8 |
+ * |
|
9 |
+ * This program is distributed in the hope that it will be useful, |
|
10 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
12 |
+ * GNU General Public License for more details. |
|
13 |
+ * |
|
14 |
+ * You should have received a copy of the GNU General Public License |
|
15 |
+ * along with this program; if not, write to the Free Software |
|
16 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, |
|
17 |
+ * MA 02110-1301, USA. |
|
18 |
+ * |
|
19 |
+ * In addition, as a special exception, the copyright holders give |
|
20 |
+ * permission to link the code of portions of this program with the |
|
21 |
+ * OpenSSL library under certain conditions as described in each |
|
22 |
+ * individual source file, and distribute linked combinations |
|
23 |
+ * including the two. |
|
24 |
+ * |
|
25 |
+ * You must obey the GNU General Public License in all respects |
|
26 |
+ * for all of the code used other than OpenSSL. If you modify |
|
27 |
+ * file(s) with this exception, you may extend this exception to your |
|
28 |
+ * version of the file(s), but you are not obligated to do so. If you |
|
29 |
+ * do not wish to do so, delete this exception statement from your |
|
30 |
+ * version. If you delete this exception statement from all source |
|
31 |
+ * files in the program, then also delete it here. |
|
32 |
+ */ |
|
33 |
+#ifndef __PDFDECODE_H__ |
|
34 |
+#define __PDFDECODE_H__ |
|
35 |
+ |
|
36 |
+#include "pdf.h" |
|
37 |
+ |
|
38 |
+int pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, const char *stream, uint32_t streamlen, int fout); |
|
39 |
+ |
|
40 |
+#endif /* __PDFDECODE_H__ */ |