Browse code

pdfdecode: implement crypt filter handler

Kevin Lin authored on 2016/03/31 05:39:37
Showing 2 changed files
... ...
@@ -59,6 +59,7 @@
59 59
 #include "clamav.h"
60 60
 #include "others.h"
61 61
 #include "pdf.h"
62
+#include "pdfdecode.h"
62 63
 #include "str.h"
63 64
 #include "bytecode.h"
64 65
 #include "bytecode_api.h"
... ...
@@ -70,12 +71,13 @@ struct pdf_token {
70 70
 
71 71
 static  int filter_ascii85decode(struct pdf_token *token);
72 72
 static  int filter_rldecode(struct pdf_token *token);
73
-static  int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token);
73
+static  int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token);
74 74
 static  int filter_asciihexdecode(struct pdf_token *token);
75
+static  int filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int mode);
75 76
 
76
-static  int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token);
77
+static  int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token);
77 78
 
78
-int pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, const char *stream, uint32_t streamlen, int fout)
79
+int pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, const char *stream, uint32_t streamlen, int fout)
79 80
 {
80 81
     struct pdf_token *token;
81 82
     int rc;
... ...
@@ -85,6 +87,11 @@ int pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, const char *st
85 85
         return CL_ENULLARG;
86 86
     }
87 87
 
88
+#if 0
89
+    if (params)
90
+        pdf_print_dict(params, 0);
91
+#endif
92
+
88 93
     token = cli_malloc(sizeof(struct pdf_token));
89 94
     if (!token)
90 95
         return CL_EMEM;
... ...
@@ -97,13 +104,13 @@ int pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, const char *st
97 97
     memcpy(token->content, stream, streamlen);
98 98
     token->length = streamlen;
99 99
 
100
-    rc = pdf_decodestream_internal(pdf, obj, token);
101
-
102
-    if (rc == CL_SUCCESS) {
103
-        cli_dbgmsg("cli_pdf: decoding SUCCESS!\n");
100
+    rc = pdf_decodestream_internal(pdf, obj, params, token);
104 101
 
105
-        if (!cli_checklimits("pdf", pdf->ctx, token->length, 0, 0))
106
-            rc = cli_writen(fout, token->content, token->length);
102
+    if ((rc == CL_SUCCESS) && !cli_checklimits("pdf", pdf->ctx, token->length, 0, 0)) {
103
+        if (cli_writen(fout, token->content, token->length) != token->length) {
104
+            cli_errmsg("cli_pdf: failed to write output file\n");
105
+            rc = CL_EWRITE;
106
+        }
107 107
     }
108 108
 
109 109
     free(token->content);
... ...
@@ -111,12 +118,24 @@ int pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, const char *st
111 111
     return rc;
112 112
 }
113 113
 
114
-static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token)
114
+static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token)
115 115
 {
116
+    const char *filter = NULL;
116 117
     int i, rc;
117 118
 
118 119
     cli_dbgmsg("cli_pdf: detected %lu filters applied\n", (long unsigned)(obj->numfilters));
119 120
 
121
+    /*
122
+     * if pdf is decryptable, scan for CRYPT filter
123
+     * if none, force a DECRYPT filter application
124
+     */
125
+    if (!(obj->flags & (1 << OBJ_FILTER_CRYPT))) {
126
+        cli_dbgmsg("cli_pdf: decoding => non-filter CRYPT\n");
127
+        if ((rc = filter_decrypt(pdf, obj, params, token, 1)) != CL_SUCCESS)
128
+            return rc;
129
+
130
+    }
131
+
120 132
     /* TODO - MAY BE SUBJECT TO CHANGE */
121 133
     for (i = 0; i < obj->numfilters; i++) {
122 134
         switch(obj->filterlist[i]) {
... ...
@@ -132,7 +151,7 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj
132 132
 
133 133
         case OBJ_FILTER_FLATE:
134 134
             cli_dbgmsg("cli_pdf: decoding [%d] => FLATEDECODE\n", obj->filterlist[i]);
135
-            rc = filter_flatedecode(pdf, obj, token);
135
+            rc = filter_flatedecode(pdf, obj, params, token);
136 136
             break;
137 137
 
138 138
         case OBJ_FILTER_AH:
... ...
@@ -140,14 +159,30 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj
140 140
             rc = filter_asciihexdecode(token);
141 141
             break;
142 142
 
143
+        case OBJ_FILTER_CRYPT:
144
+            cli_dbgmsg("cli_pdf: decoding [%d] => CRYPT\n", obj->filterlist[i]);
145
+            rc = filter_decrypt(pdf, obj, params, token, 0);
146
+            break;
147
+
143 148
         case OBJ_FILTER_JPX:
144
-        case OBJ_FILTER_DCT: //OBJ_FILTER_JBIG2
149
+            if (!filter) filter = "JPXDECODE";
150
+        case OBJ_FILTER_DCT:
151
+            if (!filter) filter = "DCTDECODE";
145 152
         case OBJ_FILTER_LZW:
153
+            if (!filter) filter = "LZWDECODE";
146 154
         case OBJ_FILTER_FAX:
147
-        case OBJ_FILTER_CRYPT:
155
+            if (!filter) filter = "FAXDECODE";
156
+        case OBJ_FILTER_JBIG2:
157
+            if (!filter) filter = "JBIG2DECODE";
158
+
159
+            cli_warnmsg("cli_pdf: unimplemented filter type [%d] => %s\n", obj->filterlist[i], filter);
160
+            filter = NULL;
161
+            rc = CL_BREAK;
162
+            break;
148 163
 
149 164
         default:
150
-            cli_warnmsg("cli_pdf: unknown filter type [%d].\n", obj->filterlist[i]);
165
+            cli_warnmsg("cli_pdf: unknown filter type [%d]\n", obj->filterlist[i]);
166
+            rc = CL_BREAK;
151 167
             break;
152 168
         }
153 169
 
... ...
@@ -160,6 +195,10 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj
160 160
     return CL_SUCCESS;
161 161
 }
162 162
 
163
+/*
164
+ * ascii85 inflation
165
+ * See http://www.piclist.com/techref/method/encode.htm (look for base85)
166
+ */
163 167
 static int filter_ascii85decode(struct pdf_token *token)
164 168
 {
165 169
     uint8_t *decoded;
... ...
@@ -363,7 +402,7 @@ static uint8_t *decode_nextlinestart(uint8_t *content, uint32_t length)
363 363
     return pt;
364 364
 }
365 365
 
366
-static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token)
366
+static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token)
367 367
 {
368 368
     uint8_t *decoded, *temp;
369 369
     uint32_t declen = 0, capacity = 0;
... ...
@@ -373,13 +412,15 @@ static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struc
373 373
     z_stream stream;
374 374
     int zstat, skip = 0, rc = CL_SUCCESS;
375 375
 
376
+    UNUSEDPARAM(params);
377
+
376 378
     if (length == 0)
377 379
         return CL_CLEAN;
378 380
 
379 381
     if (*content == '\r') {
380 382
         content++;
381 383
         length--;
382
-        //pdfobj_flag(pdf, obj, BAD_STREAMSTART);
384
+        pdfobj_flag(pdf, obj, BAD_STREAMSTART);
383 385
         /* PDF spec says stream is followed by \r\n or \n, but not \r alone.
384 386
          * Sample 0015315109, it has \r followed by zlib header.
385 387
          * Flag pdf as suspicious, and attempt to extract by skipping the \r.
... ...
@@ -431,7 +472,7 @@ static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struc
431 431
                 return CL_EMEM;
432 432
             }
433 433
 
434
-            //pdfobj_flag(pdf, obj, BAD_FLATESTART);
434
+            pdfobj_flag(pdf, obj, BAD_FLATESTART);
435 435
         }
436 436
 
437 437
         zstat = inflate(&stream, Z_NO_FLUSH);
... ...
@@ -485,12 +526,12 @@ static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struc
485 485
                        (unsigned long)declen, zstat, obj->id>>8, obj->id&0xff);
486 486
 
487 487
         if (declen == 0) {
488
-            //pdfobj_flag(pdf, obj, BAD_FLATESTART);
488
+            pdfobj_flag(pdf, obj, BAD_FLATESTART);
489 489
             cli_dbgmsg("cli_pdf: no bytes were inflated.\n");
490 490
 
491 491
             rc = CL_EFORMAT;
492 492
         } else {
493
-            //pdfobj_flag(pdf, obj, BAD_FLATE);
493
+            pdfobj_flag(pdf, obj, BAD_FLATE);
494 494
         }
495 495
         break;
496 496
     }
... ...
@@ -559,3 +600,46 @@ static int filter_asciihexdecode(struct pdf_token *token)
559 559
     }
560 560
     return rc;
561 561
 }
562
+
563
+/* modes: 0 = use default/DecodeParms, 1 = use document setting */
564
+static int filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int mode)
565
+{
566
+    char *decrypted;
567
+    off_t length = token->length;
568
+    enum enc_method enc = ENC_IDENTITY;
569
+
570
+    if (mode)
571
+        enc = get_enc_method(pdf, obj);
572
+    else if (params) {
573
+        struct pdf_dict_node *node = params->nodes;
574
+
575
+        while (node) {
576
+            if (node->type == PDF_DICT_STRING) {
577
+                if (!strncmp(node->key, "/Type", 6)) { /* optional field - Type */
578
+                    /* MUST be "CryptFilterDecodeParms" */
579
+                    cli_dbgmsg("cli_pdf: Type: %s\n", (char *)(node->value));
580
+                } else if (!strncmp(node->key, "/Name", 6)) { /* optional field - Name */
581
+                    /* overrides document and default encryption method */
582
+                    cli_dbgmsg("cli_pdf: Name: %s\n", (char *)(node->value));
583
+                    enc = parse_enc_method(pdf->CF, pdf->CF_n, (char *)(node->value), enc);
584
+                }
585
+            }
586
+            node = node->next;
587
+        }
588
+    }
589
+
590
+    decrypted = decrypt_any(pdf, obj->id, token->content, &length, enc);
591
+    if (!decrypted) {
592
+        cli_dbgmsg("cli_pdf: failed to decrypt stream\n");
593
+        return CL_EPARSE; /* TODO: what should this value be? */
594
+    }
595
+
596
+    cli_dbgmsg("cli_pdf: decrypted %lld bytes from %lu total bytes\n",
597
+               (long long int)length, (long unsigned)token->length);
598
+
599
+
600
+    free(token->content);
601
+    token->content = (uint8_t *)decrypted;
602
+    token->length = (uint32_t)length; /* this may truncate unfortunately, TODO: use 64-bit values internally? */
603
+    return CL_SUCCESS;
604
+}
562 605
new file mode 100644
... ...
@@ -0,0 +1,41 @@
0
+/*
1
+ *  Copyright (C) 2016 Cisco and/or its affiliates. All rights reserved.
2
+ *
3
+ *  Author: Kevin Lin
4
+ *
5
+ *  This program is free software; you can redistribute it and/or modify
6
+ *  it under the terms of the GNU General Public License version 2 as
7
+ *  published by the Free Software Foundation.
8
+ *
9
+ *  This program is distributed in the hope that it will be useful,
10
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+ *  GNU General Public License for more details.
13
+ *
14
+ *  You should have received a copy of the GNU General Public License
15
+ *  along with this program; if not, write to the Free Software
16
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17
+ *  MA 02110-1301, USA.
18
+ *
19
+ *  In addition, as a special exception, the copyright holders give
20
+ *  permission to link the code of portions of this program with the
21
+ *  OpenSSL library under certain conditions as described in each
22
+ *  individual source file, and distribute linked combinations
23
+ *  including the two.
24
+ *  
25
+ *  You must obey the GNU General Public License in all respects
26
+ *  for all of the code used other than OpenSSL.  If you modify
27
+ *  file(s) with this exception, you may extend this exception to your
28
+ *  version of the file(s), but you are not obligated to do so.  If you
29
+ *  do not wish to do so, delete this exception statement from your
30
+ *  version.  If you delete this exception statement from all source
31
+ *  files in the program, then also delete it here.
32
+ */
33
+#ifndef __PDFDECODE_H__
34
+#define __PDFDECODE_H__
35
+
36
+#include "pdf.h"
37
+
38
+int pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, const char *stream, uint32_t streamlen, int fout);
39
+
40
+#endif /* __PDFDECODE_H__ */