Browse code

Add more properties to the PDF preclass logic

Shawn Webb authored on 2014/06/26 05:26:33
Showing 2 changed files
... ...
@@ -2408,6 +2408,10 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
2408 2408
     if (q <= eofmap) {
2409 2409
         pdf.flags |= 1 << BAD_PDF_TRAILER;
2410 2410
         cli_dbgmsg("cli_pdf: %%%%EOF not found\n");
2411
+#if HAVE_JSON
2412
+        if (pdfobj)
2413
+            cli_jsonbool(pdfobj, "NoEOF", 1);
2414
+#endif
2411 2415
     } else {
2412 2416
         const char *t;
2413 2417
 
... ...
@@ -2421,6 +2425,10 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
2421 2421
         if (q <= eofmap) {
2422 2422
             pdf.flags |= 1 << BAD_PDF_TRAILER;
2423 2423
             cli_dbgmsg("cli_pdf: startxref not found\n");
2424
+#if HAVE_JSON
2425
+            if (pdfobj)
2426
+                cli_jsonbool(pdfobj, "NoXREF", 1);
2427
+#endif
2424 2428
         } else {
2425 2429
             for (t=q;t > eofmap; t--) {
2426 2430
                 if (memcmp(t,"trailer",7) == 0)
... ...
@@ -3067,8 +3075,6 @@ static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname
3067 3067
 
3068 3068
     dict = pdf_parse_dict(pdf, obj, objsz, begin, NULL);
3069 3069
     if (dict) {
3070
-        cli_errmsg("==== ==== ==== ====\n");
3071
-        pdf_print_dict(dict, 0);
3072 3070
         pdf_free_dict(dict);
3073 3071
     }
3074 3072
 
... ...
@@ -3193,6 +3199,7 @@ static void pdf_export_json(struct pdf_struct *pdf)
3193 3193
 {
3194 3194
 #if HAVE_JSON
3195 3195
     json_object *pdfobj;
3196
+    unsigned long i;
3196 3197
 
3197 3198
     if (!(pdf))
3198 3199
         return;
... ...
@@ -3284,6 +3291,18 @@ static void pdf_export_json(struct pdf_struct *pdf)
3284 3284
             cli_jsonbool(pdfobj, "Decryptable", 1);
3285 3285
     }
3286 3286
 
3287
+    for (i=0; i < pdf->nobjs; i++) {
3288
+        if (pdf->objs[i].flags & (1<<OBJ_TRUNCATED)) {
3289
+            json_object *truncobj;
3290
+
3291
+            truncobj = cli_jsonarray(pdfobj, "TruncatedObjects");
3292
+            if (!(truncobj))
3293
+                continue;
3294
+
3295
+            cli_jsonint_array(truncobj, pdf->objs[i].id>>8);
3296
+        }
3297
+    }
3298
+
3287 3299
 cleanup:
3288 3300
     if ((pdf->stats.author)) {
3289 3301
         free(pdf->stats.author);
... ...
@@ -204,6 +204,7 @@ int is_object_reference(char *begin, char **endchar, uint32_t *id)
204 204
 
205 205
     t |= (n&0xff);
206 206
 
207
+    /* Skip even more whitespace */
207 208
     p1 = p2;
208 209
     while (p1 < end && isspace(p1[0]))
209 210
         p1++;
... ...
@@ -268,7 +269,12 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
268 268
     if ((p1 - q) == objsize)
269 269
         return NULL;
270 270
 
271
-    /* We should be at the start of the string, minus 1 */
271
+    /*
272
+     * If str is non-null:
273
+     *     We should be at the start of the string, minus 1
274
+     * Else:
275
+     *     We should be at the start of the string
276
+     */
272 277
 
273 278
     p2 = q + objsize;
274 279
     if (is_object_reference(p1, &p2, &objid)) {