Browse code

pdfdecode: do not apply forced decryption to /XRef streams

Kevin Lin authored on 2016/04/19 06:11:12
Showing 3 changed files
... ...
@@ -871,6 +871,7 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
871 871
                 int len = p_stream;
872 872
                 const char *pstr;
873 873
                 struct pdf_dict *dparams = NULL;
874
+                int xref = 0;
874 875
 
875 876
                 length = find_length(pdf, obj, start, p_stream);
876 877
                 if (length < 0)
... ...
@@ -926,6 +927,9 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
926 926
                     }
927 927
                 }
928 928
 
929
+                if (cli_memstr(start, p_stream, "/XRef", 5))
930
+                    xref = 1;
931
+
929 932
                 cli_dbgmsg("-------------EXPERIMENTAL-------------\n");
930 933
 
931 934
                 pstr = pdf_getdict(start, &len, "/DecodeParms");
... ...
@@ -953,7 +957,7 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
953 953
                         cli_dbgmsg("cli_pdf: failed to locate DecodeParms dictionary start\n");
954 954
                 }
955 955
 
956
-                sum = pdf_decodestream(pdf, obj, dparams, start + p_stream, length, fout, &rc);
956
+                sum = pdf_decodestream(pdf, obj, dparams, start + p_stream, length, xref, fout, &rc);
957 957
                 if (dparams)
958 958
                     pdf_free_dict(dparams);
959 959
 
... ...
@@ -963,14 +967,6 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
963 963
                 }
964 964
 
965 965
                 cli_dbgmsg("-------------EXPERIMENTAL-------------\n");
966
-
967
-#if 0
968
-                if (pdf->flags & (1 << DECRYPTABLE_PDF)) {
969
-                    if (cli_memstr(start, p_stream, "/XRef", 5)) {
970
-                        cli_dbgmsg("cli_pdf: cross reference stream, skipping\n");
971
-                    }
972
-                }
973
-#endif
974 966
             } else {
975 967
                 noisy_warnmsg("cannot find stream bounds for obj %u %u\n", obj->id>>8, obj->id&0xff);
976 968
             }
... ...
@@ -65,9 +65,13 @@
65 65
 #include "bytecode_api.h"
66 66
 #include "lzw/lzwdec.h"
67 67
 
68
+#define PDFTOKEN_FLAG_XREF 0x1
69
+
68 70
 struct pdf_token {
69
-    uint32_t length;
70
-    uint8_t *content;
71
+    uint32_t flags;    /* tracking flags */
72
+
73
+    uint32_t length;   /* length of current content */
74
+    uint8_t *content;  /* content stream */
71 75
 };
72 76
 
73 77
 static  int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token);
... ...
@@ -80,7 +84,7 @@ static  int filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *obj, s
80 80
 static  int filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int mode);
81 81
 static  int filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token);
82 82
 
83
-off_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, const char *stream, uint32_t streamlen, int fout, int *rc)
83
+off_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, const char *stream, uint32_t streamlen, int xref, int fout, int *rc)
84 84
 {
85 85
     struct pdf_token *token;
86 86
     off_t rv;
... ...
@@ -104,6 +108,10 @@ off_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_d
104 104
         return -1;
105 105
     }
106 106
 
107
+    token->flags = 0;
108
+    if (xref)
109
+        token->flags |= PDFTOKEN_FLAG_XREF;
110
+
107 111
     token->content = cli_malloc(streamlen);
108 112
     if (!token->content) {
109 113
         free(token);
... ...
@@ -150,9 +158,13 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj
150 150
      * if none, force a DECRYPT filter application
151 151
      */
152 152
     if ((pdf->flags & (1 << DECRYPTABLE_PDF)) && !(obj->flags & (1 << OBJ_FILTER_CRYPT))) {
153
-        cli_dbgmsg("cli_pdf: decoding => non-filter CRYPT\n");
154
-        if ((rc = filter_decrypt(pdf, obj, params, token, 1)) != CL_SUCCESS) {
155
-            return rc;
153
+        if (token->flags & PDFTOKEN_FLAG_XREF) /* TODO: is this on all crypt filters or only the assumed one? */
154
+            cli_dbgmsg("cli_pdf: skipping decoding => non-filter CRYPT (reason: xref)\n");
155
+        else {
156
+            cli_dbgmsg("cli_pdf: decoding => non-filter CRYPT\n");
157
+            if ((rc = filter_decrypt(pdf, obj, params, token, 1)) != CL_SUCCESS) {
158
+                return rc;
159
+            }
156 160
         }
157 161
     }
158 162
 
... ...
@@ -224,15 +236,15 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj
224 224
                     reason = "detection";
225 225
                     break;
226 226
                 case CL_BREAK:
227
-                    reason = "break decoding";
227
+                    reason = "decoding break";
228 228
                     break;
229 229
                 default:
230
-                    reason = "error decoding";
230
+                    reason = "decoding error";
231 231
                     break;
232 232
                 }
233 233
 
234
-                cli_dbgmsg("cli_pdf: %s, stopping after %d (of %lu) filters\n",
235
-                           reason, i, (long unsigned)(obj->numfilters));
234
+                cli_dbgmsg("cli_pdf: stopping after %d (of %lu) filters (reason: %s)\n",
235
+                           i, (long unsigned)(obj->numfilters), reason);
236 236
                 break;
237 237
             }
238 238
         }
... ...
@@ -36,6 +36,6 @@
36 36
 
37 37
 #include "pdf.h"
38 38
 
39
-off_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, const char *stream, uint32_t streamlen, int fout, int *rc);
39
+off_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, const char *stream, uint32_t streamlen, int xref, int fout, int *rc);
40 40
 
41 41
 #endif /* __PDFDECODE_H__ */