Browse code

pdfdecode: add dumping intermediate filter buffers

Kevin Lin authored on 2016/03/31 06:30:06
Showing 1 changed files
... ...
@@ -64,18 +64,22 @@
64 64
 #include "bytecode.h"
65 65
 #include "bytecode_api.h"
66 66
 
67
+#define PDF_FILTER_DUMP_INTERMEDIATE 1
68
+
67 69
 struct pdf_token {
68 70
     uint32_t length;
69 71
     uint8_t *content;
70 72
 };
71 73
 
74
+static  int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token);
75
+static  int pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token, int lvl);
76
+
72 77
 static  int filter_ascii85decode(struct pdf_token *token);
73 78
 static  int filter_rldecode(struct pdf_token *token);
74 79
 static  int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token);
75 80
 static  int filter_asciihexdecode(struct pdf_token *token);
76 81
 static  int filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int mode);
77 82
 
78
-static  int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token);
79 83
 
80 84
 int pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, const char *stream, uint32_t streamlen, int fout)
81 85
 {
... ...
@@ -134,6 +138,12 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj
134 134
         if ((rc = filter_decrypt(pdf, obj, params, token, 1)) != CL_SUCCESS)
135 135
             return rc;
136 136
 
137
+#if PDF_FILTER_DUMP_INTERMEDIATE
138
+        if (pdf->ctx->engine->keeptmp) {
139
+            if ((rc = pdf_decode_dump(pdf, obj, token, 0)) != CL_SUCCESS)
140
+                return rc;
141
+        }
142
+#endif
137 143
     }
138 144
 
139 145
     /* TODO - MAY BE SUBJECT TO CHANGE */
... ...
@@ -186,12 +196,45 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj
186 186
             break;
187 187
         }
188 188
 
189
-        /*TODO: check rc value*/
190 189
         /*TODO: check content field*/
191 190
         /*TODO: check length value*/
192
-        /*IF INTERMEDIATE DUMPING - PUT HERE*/
191
+        /*TODO: check rc value*/
192
+#if PDF_FILTER_DUMP_INTERMEDIATE
193
+        if (pdf->ctx->engine->keeptmp) {
194
+            if ((rc = pdf_decode_dump(pdf, obj, token, i+1)) != CL_SUCCESS)
195
+                return rc;
196
+        }
197
+#endif
198
+    }
199
+
200
+    return CL_SUCCESS;
201
+}
202
+
203
+/* used only for intermediate dumping */
204
+    static int pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token, int lvl)
205
+{
206
+    char fname[1024];
207
+    int ifd;
208
+
209
+    snprintf(fname, sizeof(fname), "%s"PATHSEP"pdf%02u_%02ui", pdf->dir, (pdf->files-1), lvl);
210
+    ifd = open(fname, O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
211
+    if (ifd < 0) {
212
+        char err[128];
213
+
214
+        cli_errmsg("cli_pdf: can't create intermediate temporary file %s: %s\n", fname, cli_strerror(errno, err, sizeof(err)));
215
+        return CL_ETMPFILE;
216
+    }
217
+
218
+    cli_dbgmsg("cli_pdf: decoded filter %d obj %u %u\n", lvl, obj->id>>8, obj->id&0xff);
219
+    cli_dbgmsg("         ... to %s\n", fname);
220
+
221
+    if (cli_writen(ifd, token->content, token->length) != token->length) {
222
+        cli_errmsg("cli_pdf: failed to write output file\n");
223
+        close(ifd);
224
+        return CL_EWRITE;
193 225
     }
194 226
 
227
+    close(ifd);
195 228
     return CL_SUCCESS;
196 229
 }
197 230
 
... ...
@@ -621,7 +664,7 @@ static int filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pd
621 621
                 } else if (!strncmp(node->key, "/Name", 6)) { /* optional field - Name */
622 622
                     /* overrides document and default encryption method */
623 623
                     cli_dbgmsg("cli_pdf: Name: %s\n", (char *)(node->value));
624
-                    enc = parse_enc_method(pdf->CF, pdf->CF_n, (char *)(node->value), enc);
624
+                    //enc = parse_enc_method(pdf->CF, pdf->CF_n, (char *)(node->value), enc);
625 625
                 }
626 626
             }
627 627
             node = node->next;