... | ... |
@@ -2612,9 +2612,6 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
2612 | 2612 |
if (rc == -1) |
2613 | 2613 |
pdf.flags |= 1 << BAD_PDF_TOOMANYOBJS; |
2614 | 2614 |
|
2615 |
- /* needs to be here for JSON output decryption */ |
|
2616 |
- pdf_handle_enc(&pdf); |
|
2617 |
- |
|
2618 | 2615 |
/* must parse after finding all objs, so we can flag indirect objects */ |
2619 | 2616 |
for (i=0;i<pdf.nobjs;i++) { |
2620 | 2617 |
struct pdf_obj *obj = &pdf.objs[i]; |
... | ... |
@@ -2635,6 +2632,7 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
2635 | 2635 |
pdf_parseobj(&pdf, obj); |
2636 | 2636 |
} |
2637 | 2637 |
|
2638 |
+ pdf_handle_enc(&pdf); |
|
2638 | 2639 |
if (pdf.flags & (1 << ENCRYPTED_PDF)) |
2639 | 2640 |
cli_dbgmsg("cli_pdf: encrypted pdf found, %s!\n", |
2640 | 2641 |
(pdf.flags & (1 << DECRYPTABLE_PDF)) ? |
... | ... |
@@ -3216,8 +3214,12 @@ static void Author_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfnam |
3216 | 3216 |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES)) |
3217 | 3217 |
return; |
3218 | 3218 |
|
3219 |
- if (!(pdf->stats.author)) |
|
3220 |
- pdf->stats.author = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author", NULL, &(pdf->stats.author_b64)); |
|
3219 |
+ if (!(pdf->stats.author)) { |
|
3220 |
+ pdf->stats.author = cli_calloc(1, sizeof(struct pdf_stats_entry)); |
|
3221 |
+ if (!(pdf->stats.author)) |
|
3222 |
+ return; |
|
3223 |
+ pdf->stats.author->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author", NULL, &(pdf->stats.author->meta)); |
|
3224 |
+ } |
|
3221 | 3225 |
} |
3222 | 3226 |
#endif |
3223 | 3227 |
|
... | ... |
@@ -3232,8 +3234,12 @@ static void Creator_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfna |
3232 | 3232 |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES)) |
3233 | 3233 |
return; |
3234 | 3234 |
|
3235 |
- if (!(pdf->stats.creator)) |
|
3236 |
- pdf->stats.creator = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator", NULL, &(pdf->stats.creator_b64)); |
|
3235 |
+ if (!(pdf->stats.creator)) { |
|
3236 |
+ pdf->stats.creator = cli_calloc(1, sizeof(struct pdf_stats_entry)); |
|
3237 |
+ if (!(pdf->stats.creator)) |
|
3238 |
+ return; |
|
3239 |
+ pdf->stats.creator->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator", NULL, &(pdf->stats.creator->meta)); |
|
3240 |
+ } |
|
3237 | 3241 |
} |
3238 | 3242 |
#endif |
3239 | 3243 |
|
... | ... |
@@ -3248,8 +3254,12 @@ static void ModificationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, str |
3248 | 3248 |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES)) |
3249 | 3249 |
return; |
3250 | 3250 |
|
3251 |
- if (!(pdf->stats.modificationdate)) |
|
3252 |
- pdf->stats.modificationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate", NULL, &(pdf->stats.modificationdate_b64)); |
|
3251 |
+ if (!(pdf->stats.modificationdate)) { |
|
3252 |
+ pdf->stats.modificationdate = cli_calloc(1, sizeof(struct pdf_stats_entry)); |
|
3253 |
+ if (!(pdf->stats.modificationdate)) |
|
3254 |
+ return; |
|
3255 |
+ pdf->stats.modificationdate->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate", NULL, &(pdf->stats.modificationdate->meta)); |
|
3256 |
+ } |
|
3253 | 3257 |
} |
3254 | 3258 |
#endif |
3255 | 3259 |
|
... | ... |
@@ -3264,8 +3274,12 @@ static void CreationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct |
3264 | 3264 |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES)) |
3265 | 3265 |
return; |
3266 | 3266 |
|
3267 |
- if (!(pdf->stats.creationdate)) |
|
3268 |
- pdf->stats.creationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate", NULL, &(pdf->stats.creationdate_b64)); |
|
3267 |
+ if (!(pdf->stats.creationdate)) { |
|
3268 |
+ pdf->stats.creationdate = cli_calloc(1, sizeof(struct pdf_stats_entry)); |
|
3269 |
+ if (!(pdf->stats.creationdate)) |
|
3270 |
+ return; |
|
3271 |
+ pdf->stats.creationdate->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate", NULL, &(pdf->stats.creationdate->meta)); |
|
3272 |
+ } |
|
3269 | 3273 |
} |
3270 | 3274 |
#endif |
3271 | 3275 |
|
... | ... |
@@ -3280,8 +3294,12 @@ static void Producer_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfn |
3280 | 3280 |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES)) |
3281 | 3281 |
return; |
3282 | 3282 |
|
3283 |
- if (!(pdf->stats.producer)) |
|
3284 |
- pdf->stats.producer = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer", NULL, &(pdf->stats.producer_b64)); |
|
3283 |
+ if (!(pdf->stats.producer)) { |
|
3284 |
+ pdf->stats.producer = cli_calloc(1, sizeof(struct pdf_stats_entry)); |
|
3285 |
+ if (!(pdf->stats.producer)) |
|
3286 |
+ return; |
|
3287 |
+ pdf->stats.producer->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer", NULL, &(pdf->stats.producer->meta)); |
|
3288 |
+ } |
|
3285 | 3289 |
} |
3286 | 3290 |
#endif |
3287 | 3291 |
|
... | ... |
@@ -3296,8 +3314,12 @@ static void Title_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname |
3296 | 3296 |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES)) |
3297 | 3297 |
return; |
3298 | 3298 |
|
3299 |
- if (!(pdf->stats.title)) |
|
3300 |
- pdf->stats.title = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title", NULL, &(pdf->stats.title_b64)); |
|
3299 |
+ if (!(pdf->stats.title)) { |
|
3300 |
+ pdf->stats.title = cli_calloc(1, sizeof(struct pdf_stats_entry)); |
|
3301 |
+ if (!(pdf->stats.title)) |
|
3302 |
+ return; |
|
3303 |
+ pdf->stats.title->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title", NULL, &(pdf->stats.title->meta)); |
|
3304 |
+ } |
|
3301 | 3305 |
} |
3302 | 3306 |
#endif |
3303 | 3307 |
|
... | ... |
@@ -3312,8 +3334,12 @@ static void Keywords_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfn |
3312 | 3312 |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES)) |
3313 | 3313 |
return; |
3314 | 3314 |
|
3315 |
- if (!(pdf->stats.keywords)) |
|
3316 |
- pdf->stats.keywords = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords", NULL, &(pdf->stats.keywords_b64)); |
|
3315 |
+ if (!(pdf->stats.keywords)) { |
|
3316 |
+ pdf->stats.keywords = cli_calloc(1, sizeof(struct pdf_stats_entry)); |
|
3317 |
+ if (!(pdf->stats.keywords)) |
|
3318 |
+ return; |
|
3319 |
+ pdf->stats.keywords->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords", NULL, &(pdf->stats.keywords->meta)); |
|
3320 |
+ } |
|
3317 | 3321 |
} |
3318 | 3322 |
#endif |
3319 | 3323 |
|
... | ... |
@@ -3328,8 +3354,12 @@ static void Subject_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfna |
3328 | 3328 |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES)) |
3329 | 3329 |
return; |
3330 | 3330 |
|
3331 |
- if (!(pdf->stats.subject)) |
|
3332 |
- pdf->stats.subject = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject", NULL, &(pdf->stats.subject_b64)); |
|
3331 |
+ if (!(pdf->stats.subject)) { |
|
3332 |
+ pdf->stats.subject = cli_calloc(1, sizeof(struct pdf_stats_entry)); |
|
3333 |
+ if (!(pdf->stats.subject)) |
|
3334 |
+ return; |
|
3335 |
+ pdf->stats.subject->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject", NULL, &(pdf->stats.subject->meta)); |
|
3336 |
+ } |
|
3333 | 3337 |
} |
3334 | 3338 |
#endif |
3335 | 3339 |
|
... | ... |
@@ -3514,124 +3544,164 @@ static void pdf_export_json(struct pdf_struct *pdf) |
3514 | 3514 |
} |
3515 | 3515 |
|
3516 | 3516 |
if (pdf->stats.author) { |
3517 |
- if (pdf->stats.author_b64) { |
|
3518 |
- cli_jsonstr(pdfobj, "Author", pdf->stats.author); |
|
3519 |
- cli_jsonbool(pdfobj, "Author_base64", 1); |
|
3520 |
- } else { |
|
3521 |
- if (cli_isutf8(pdf->stats.author, strlen(pdf->stats.author))) |
|
3522 |
- cli_jsonstr(pdfobj, "Author", pdf->stats.author); |
|
3523 |
- else { |
|
3524 |
- char *b64 = (char *)cl_base64_encode(pdf->stats.author, strlen(pdf->stats.author)); |
|
3525 |
- cli_jsonstr(pdfobj, "Author", b64); |
|
3526 |
- cli_jsonbool(pdfobj, "Author_base64", 1); |
|
3527 |
- free(b64); |
|
3517 |
+ if (!pdf->stats.author->meta.success) { |
|
3518 |
+ char *out = pdf_finalize_string(pdf, pdf->stats.author->meta.obj, pdf->stats.author->data, pdf->stats.author->meta.length); |
|
3519 |
+ if (out) { |
|
3520 |
+ free(pdf->stats.author->data); |
|
3521 |
+ pdf->stats.author->data = out; |
|
3522 |
+ pdf->stats.author->meta.length = strlen(out); |
|
3523 |
+ pdf->stats.author->meta.success = 1; |
|
3528 | 3524 |
} |
3529 | 3525 |
} |
3526 |
+ |
|
3527 |
+ if (pdf->stats.author->meta.success && cli_isutf8(pdf->stats.author->data, pdf->stats.author->meta.length)) { |
|
3528 |
+ cli_jsonstr(pdfobj, "Author", pdf->stats.author->data); |
|
3529 |
+ } else { |
|
3530 |
+ char *b64 = (char *)cl_base64_encode(pdf->stats.author->data, pdf->stats.author->meta.length); |
|
3531 |
+ cli_jsonstr(pdfobj, "Author", b64); |
|
3532 |
+ cli_jsonbool(pdfobj, "Author_base64", 1); |
|
3533 |
+ free(b64); |
|
3534 |
+ } |
|
3530 | 3535 |
} |
3531 | 3536 |
if (pdf->stats.creator) { |
3532 |
- if (pdf->stats.creator_b64) { |
|
3533 |
- cli_jsonstr(pdfobj, "Creator", pdf->stats.creator); |
|
3534 |
- cli_jsonbool(pdfobj, "Creator_base64", 1); |
|
3535 |
- } else { |
|
3536 |
- if (cli_isutf8(pdf->stats.creator, strlen(pdf->stats.creator))) |
|
3537 |
- cli_jsonstr(pdfobj, "Creator", pdf->stats.creator); |
|
3538 |
- else { |
|
3539 |
- char *b64 = (char *)cl_base64_encode(pdf->stats.creator, strlen(pdf->stats.creator)); |
|
3540 |
- cli_jsonstr(pdfobj, "Creator", b64); |
|
3541 |
- cli_jsonbool(pdfobj, "Creator_base64", 1); |
|
3542 |
- free(b64); |
|
3537 |
+ if (!pdf->stats.creator->meta.success) { |
|
3538 |
+ char *out = pdf_finalize_string(pdf, pdf->stats.creator->meta.obj, pdf->stats.creator->data, pdf->stats.creator->meta.length); |
|
3539 |
+ if (out) { |
|
3540 |
+ free(pdf->stats.creator->data); |
|
3541 |
+ pdf->stats.creator->data = out; |
|
3542 |
+ pdf->stats.creator->meta.length = strlen(out); |
|
3543 |
+ pdf->stats.creator->meta.success = 1; |
|
3543 | 3544 |
} |
3544 | 3545 |
} |
3546 |
+ |
|
3547 |
+ if (pdf->stats.creator->meta.success && cli_isutf8(pdf->stats.creator->data, pdf->stats.creator->meta.length)) { |
|
3548 |
+ cli_jsonstr(pdfobj, "Creator", pdf->stats.creator->data); |
|
3549 |
+ } else { |
|
3550 |
+ char *b64 = (char *)cl_base64_encode(pdf->stats.creator->data, pdf->stats.creator->meta.length); |
|
3551 |
+ cli_jsonstr(pdfobj, "Creator", b64); |
|
3552 |
+ cli_jsonbool(pdfobj, "Creator_base64", 1); |
|
3553 |
+ free(b64); |
|
3554 |
+ } |
|
3545 | 3555 |
} |
3546 | 3556 |
if (pdf->stats.producer) { |
3547 |
- if (pdf->stats.producer_b64) { |
|
3548 |
- cli_jsonstr(pdfobj, "Producer", pdf->stats.producer); |
|
3549 |
- cli_jsonbool(pdfobj, "Producer_base64", 1); |
|
3550 |
- } else { |
|
3551 |
- if (cli_isutf8(pdf->stats.producer, strlen(pdf->stats.producer))) |
|
3552 |
- cli_jsonstr(pdfobj, "Producer", pdf->stats.producer); |
|
3553 |
- else { |
|
3554 |
- char *b64 = (char *)cl_base64_encode(pdf->stats.producer, strlen(pdf->stats.producer)); |
|
3555 |
- cli_jsonstr(pdfobj, "Producer", b64); |
|
3556 |
- cli_jsonbool(pdfobj, "Producer_base64", 1); |
|
3557 |
- free(b64); |
|
3557 |
+ if (!pdf->stats.producer->meta.success) { |
|
3558 |
+ char *out = pdf_finalize_string(pdf, pdf->stats.producer->meta.obj, pdf->stats.producer->data, pdf->stats.producer->meta.length); |
|
3559 |
+ if (out) { |
|
3560 |
+ free(pdf->stats.producer->data); |
|
3561 |
+ pdf->stats.producer->data = out; |
|
3562 |
+ pdf->stats.producer->meta.length = strlen(out); |
|
3563 |
+ pdf->stats.producer->meta.success = 1; |
|
3558 | 3564 |
} |
3559 | 3565 |
} |
3566 |
+ |
|
3567 |
+ if (pdf->stats.producer->meta.success && cli_isutf8(pdf->stats.producer->data, pdf->stats.producer->meta.length)) { |
|
3568 |
+ cli_jsonstr(pdfobj, "Producer", pdf->stats.producer->data); |
|
3569 |
+ } else { |
|
3570 |
+ char *b64 = (char *)cl_base64_encode(pdf->stats.producer->data, pdf->stats.producer->meta.length); |
|
3571 |
+ cli_jsonstr(pdfobj, "Producer", b64); |
|
3572 |
+ cli_jsonbool(pdfobj, "Producer_base64", 1); |
|
3573 |
+ free(b64); |
|
3574 |
+ } |
|
3560 | 3575 |
} |
3561 | 3576 |
if (pdf->stats.modificationdate) { |
3562 |
- if (pdf->stats.modificationdate_b64) { |
|
3563 |
- cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate); |
|
3564 |
- cli_jsonbool(pdfobj, "ModificationDate_base64", 1); |
|
3565 |
- } else { |
|
3566 |
- if (cli_isutf8(pdf->stats.modificationdate, strlen(pdf->stats.modificationdate))) |
|
3567 |
- cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate); |
|
3568 |
- else { |
|
3569 |
- char *b64 = (char *)cl_base64_encode(pdf->stats.modificationdate, strlen(pdf->stats.modificationdate)); |
|
3570 |
- cli_jsonstr(pdfobj, "ModificationDate", b64); |
|
3571 |
- cli_jsonbool(pdfobj, "ModificationDate_base64", 1); |
|
3572 |
- free(b64); |
|
3577 |
+ if (!pdf->stats.modificationdate->meta.success) { |
|
3578 |
+ char *out = pdf_finalize_string(pdf, pdf->stats.modificationdate->meta.obj, pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length); |
|
3579 |
+ if (out) { |
|
3580 |
+ free(pdf->stats.modificationdate->data); |
|
3581 |
+ pdf->stats.modificationdate->data = out; |
|
3582 |
+ pdf->stats.modificationdate->meta.length = strlen(out); |
|
3583 |
+ pdf->stats.modificationdate->meta.success = 1; |
|
3573 | 3584 |
} |
3574 | 3585 |
} |
3586 |
+ |
|
3587 |
+ if (pdf->stats.modificationdate->meta.success && cli_isutf8(pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length)) { |
|
3588 |
+ cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate->data); |
|
3589 |
+ } else { |
|
3590 |
+ char *b64 = (char *)cl_base64_encode(pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length); |
|
3591 |
+ cli_jsonstr(pdfobj, "ModificationDate", b64); |
|
3592 |
+ cli_jsonbool(pdfobj, "ModificationDate_base64", 1); |
|
3593 |
+ free(b64); |
|
3594 |
+ } |
|
3575 | 3595 |
} |
3576 | 3596 |
if (pdf->stats.creationdate) { |
3577 |
- if (pdf->stats.creationdate_b64) { |
|
3578 |
- cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate); |
|
3579 |
- cli_jsonbool(pdfobj, "CreationDate_base64", 1); |
|
3580 |
- } else { |
|
3581 |
- if (cli_isutf8(pdf->stats.creationdate, strlen(pdf->stats.creationdate))) |
|
3582 |
- cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate); |
|
3583 |
- else { |
|
3584 |
- char *b64 = (char *)cl_base64_encode(pdf->stats.creationdate, strlen(pdf->stats.creationdate)); |
|
3585 |
- cli_jsonstr(pdfobj, "CreationDate", b64); |
|
3586 |
- cli_jsonbool(pdfobj, "CreationDate_base64", 1); |
|
3587 |
- free(b64); |
|
3597 |
+ if (!pdf->stats.creationdate->meta.success) { |
|
3598 |
+ char *out = pdf_finalize_string(pdf, pdf->stats.creationdate->meta.obj, pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length); |
|
3599 |
+ if (out) { |
|
3600 |
+ free(pdf->stats.creationdate->data); |
|
3601 |
+ pdf->stats.creationdate->data = out; |
|
3602 |
+ pdf->stats.creationdate->meta.length = strlen(out); |
|
3603 |
+ pdf->stats.creationdate->meta.success = 1; |
|
3588 | 3604 |
} |
3589 | 3605 |
} |
3606 |
+ |
|
3607 |
+ if (pdf->stats.creationdate->meta.success && cli_isutf8(pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length)) { |
|
3608 |
+ cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate->data); |
|
3609 |
+ } else { |
|
3610 |
+ char *b64 = (char *)cl_base64_encode(pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length); |
|
3611 |
+ cli_jsonstr(pdfobj, "CreationDate", b64); |
|
3612 |
+ cli_jsonbool(pdfobj, "CreationDate_base64", 1); |
|
3613 |
+ free(b64); |
|
3614 |
+ } |
|
3590 | 3615 |
} |
3591 | 3616 |
if (pdf->stats.title) { |
3592 |
- if (pdf->stats.title_b64) { |
|
3593 |
- cli_jsonstr(pdfobj, "Title", pdf->stats.title); |
|
3594 |
- cli_jsonbool(pdfobj, "Title_base64", 1); |
|
3595 |
- } else { |
|
3596 |
- if (cli_isutf8(pdf->stats.title, strlen(pdf->stats.title))) |
|
3597 |
- cli_jsonstr(pdfobj, "Title", pdf->stats.title); |
|
3598 |
- else { |
|
3599 |
- char *b64 = (char *)cl_base64_encode(pdf->stats.title, strlen(pdf->stats.title)); |
|
3600 |
- cli_jsonstr(pdfobj, "Title", b64); |
|
3601 |
- cli_jsonbool(pdfobj, "Title_base64", 1); |
|
3602 |
- free(b64); |
|
3617 |
+ if (!pdf->stats.title->meta.success) { |
|
3618 |
+ char *out = pdf_finalize_string(pdf, pdf->stats.title->meta.obj, pdf->stats.title->data, pdf->stats.title->meta.length); |
|
3619 |
+ if (out) { |
|
3620 |
+ free(pdf->stats.title->data); |
|
3621 |
+ pdf->stats.title->data = out; |
|
3622 |
+ pdf->stats.title->meta.length = strlen(out); |
|
3623 |
+ pdf->stats.title->meta.success = 1; |
|
3603 | 3624 |
} |
3604 | 3625 |
} |
3626 |
+ |
|
3627 |
+ if (pdf->stats.title->meta.success && cli_isutf8(pdf->stats.title->data, pdf->stats.title->meta.length)) { |
|
3628 |
+ cli_jsonstr(pdfobj, "Title", pdf->stats.title->data); |
|
3629 |
+ } else { |
|
3630 |
+ char *b64 = (char *)cl_base64_encode(pdf->stats.title->data, pdf->stats.title->meta.length); |
|
3631 |
+ cli_jsonstr(pdfobj, "Title", b64); |
|
3632 |
+ cli_jsonbool(pdfobj, "Title_base64", 1); |
|
3633 |
+ free(b64); |
|
3634 |
+ } |
|
3605 | 3635 |
} |
3606 | 3636 |
if (pdf->stats.subject) { |
3607 |
- if (pdf->stats.subject_b64) { |
|
3608 |
- cli_jsonstr(pdfobj, "Subject", pdf->stats.subject); |
|
3609 |
- cli_jsonbool(pdfobj, "Subject_base64", 1); |
|
3610 |
- } else { |
|
3611 |
- if (cli_isutf8(pdf->stats.subject, strlen(pdf->stats.subject))) |
|
3612 |
- cli_jsonstr(pdfobj, "Subject", pdf->stats.subject); |
|
3613 |
- else { |
|
3614 |
- char *b64 = (char *)cl_base64_encode(pdf->stats.subject, strlen(pdf->stats.subject)); |
|
3615 |
- cli_jsonstr(pdfobj, "Subject", b64); |
|
3616 |
- cli_jsonbool(pdfobj, "Subject_base64", 1); |
|
3617 |
- free(b64); |
|
3637 |
+ if (!pdf->stats.subject->meta.success) { |
|
3638 |
+ char *out = pdf_finalize_string(pdf, pdf->stats.subject->meta.obj, pdf->stats.subject->data, pdf->stats.subject->meta.length); |
|
3639 |
+ if (out) { |
|
3640 |
+ free(pdf->stats.subject->data); |
|
3641 |
+ pdf->stats.subject->data = out; |
|
3642 |
+ pdf->stats.subject->meta.length = strlen(out); |
|
3643 |
+ pdf->stats.subject->meta.success = 1; |
|
3618 | 3644 |
} |
3619 | 3645 |
} |
3646 |
+ |
|
3647 |
+ if (pdf->stats.subject->meta.success && cli_isutf8(pdf->stats.subject->data, pdf->stats.subject->meta.length)) { |
|
3648 |
+ cli_jsonstr(pdfobj, "Subject", pdf->stats.subject->data); |
|
3649 |
+ } else { |
|
3650 |
+ char *b64 = (char *)cl_base64_encode(pdf->stats.subject->data, pdf->stats.subject->meta.length); |
|
3651 |
+ cli_jsonstr(pdfobj, "Subject", b64); |
|
3652 |
+ cli_jsonbool(pdfobj, "Subject_base64", 1); |
|
3653 |
+ free(b64); |
|
3654 |
+ } |
|
3620 | 3655 |
} |
3621 | 3656 |
if (pdf->stats.keywords) { |
3622 |
- if (pdf->stats.keywords_b64) { |
|
3623 |
- cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords); |
|
3624 |
- cli_jsonbool(pdfobj, "Keywords_base64", 1); |
|
3625 |
- } else { |
|
3626 |
- if (cli_isutf8(pdf->stats.keywords, strlen(pdf->stats.keywords))) |
|
3627 |
- cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords); |
|
3628 |
- else { |
|
3629 |
- char *b64 = (char *)cl_base64_encode(pdf->stats.keywords, strlen(pdf->stats.keywords)); |
|
3630 |
- cli_jsonstr(pdfobj, "Keywords", b64); |
|
3631 |
- cli_jsonbool(pdfobj, "Keywords_base64", 1); |
|
3632 |
- free(b64); |
|
3657 |
+ if (!pdf->stats.keywords->meta.success) { |
|
3658 |
+ char *out = pdf_finalize_string(pdf, pdf->stats.keywords->meta.obj, pdf->stats.keywords->data, pdf->stats.keywords->meta.length); |
|
3659 |
+ if (out) { |
|
3660 |
+ free(pdf->stats.keywords->data); |
|
3661 |
+ pdf->stats.keywords->data = out; |
|
3662 |
+ pdf->stats.keywords->meta.length = strlen(out); |
|
3663 |
+ pdf->stats.keywords->meta.success = 1; |
|
3633 | 3664 |
} |
3634 | 3665 |
} |
3666 |
+ |
|
3667 |
+ if (pdf->stats.keywords->meta.success && cli_isutf8(pdf->stats.keywords->data, pdf->stats.keywords->meta.length)) { |
|
3668 |
+ cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords->data); |
|
3669 |
+ } else { |
|
3670 |
+ char *b64 = (char *)cl_base64_encode(pdf->stats.keywords->data, pdf->stats.keywords->meta.length); |
|
3671 |
+ cli_jsonstr(pdfobj, "Keywords", b64); |
|
3672 |
+ cli_jsonbool(pdfobj, "Keywords_base64", 1); |
|
3673 |
+ free(b64); |
|
3674 |
+ } |
|
3635 | 3675 |
} |
3636 | 3676 |
if (pdf->stats.ninvalidobjs) |
3637 | 3677 |
cli_jsonint(pdfobj, "InvalidObjectCount", pdf->stats.ninvalidobjs); |
... | ... |
@@ -3713,41 +3783,57 @@ static void pdf_export_json(struct pdf_struct *pdf) |
3713 | 3713 |
|
3714 | 3714 |
cleanup: |
3715 | 3715 |
if ((pdf->stats.author)) { |
3716 |
+ if (pdf->stats.author->data) |
|
3717 |
+ free(pdf->stats.author->data); |
|
3716 | 3718 |
free(pdf->stats.author); |
3717 | 3719 |
pdf->stats.author = NULL; |
3718 | 3720 |
} |
3719 | 3721 |
|
3720 | 3722 |
if (pdf->stats.creator) { |
3723 |
+ if (pdf->stats.creator->data) |
|
3724 |
+ free(pdf->stats.creator->data); |
|
3721 | 3725 |
free(pdf->stats.creator); |
3722 | 3726 |
pdf->stats.creator = NULL; |
3723 | 3727 |
} |
3724 | 3728 |
|
3725 | 3729 |
if (pdf->stats.producer) { |
3730 |
+ if (pdf->stats.producer->data) |
|
3731 |
+ free(pdf->stats.producer->data); |
|
3726 | 3732 |
free(pdf->stats.producer); |
3727 | 3733 |
pdf->stats.producer = NULL; |
3728 | 3734 |
} |
3729 | 3735 |
|
3730 | 3736 |
if (pdf->stats.modificationdate) { |
3737 |
+ if (pdf->stats.modificationdate->data) |
|
3738 |
+ free(pdf->stats.modificationdate->data); |
|
3731 | 3739 |
free(pdf->stats.modificationdate); |
3732 | 3740 |
pdf->stats.modificationdate = NULL; |
3733 | 3741 |
} |
3734 | 3742 |
|
3735 | 3743 |
if (pdf->stats.creationdate) { |
3744 |
+ if (pdf->stats.creationdate->data) |
|
3745 |
+ free(pdf->stats.creationdate->data); |
|
3736 | 3746 |
free(pdf->stats.creationdate); |
3737 | 3747 |
pdf->stats.creationdate = NULL; |
3738 | 3748 |
} |
3739 | 3749 |
|
3740 | 3750 |
if (pdf->stats.title) { |
3751 |
+ if (pdf->stats.title->data) |
|
3752 |
+ free(pdf->stats.title->data); |
|
3741 | 3753 |
free(pdf->stats.title); |
3742 | 3754 |
pdf->stats.title = NULL; |
3743 | 3755 |
} |
3744 | 3756 |
|
3745 | 3757 |
if (pdf->stats.subject) { |
3758 |
+ if (pdf->stats.subject->data) |
|
3759 |
+ free(pdf->stats.subject->data); |
|
3746 | 3760 |
free(pdf->stats.subject); |
3747 | 3761 |
pdf->stats.subject = NULL; |
3748 | 3762 |
} |
3749 | 3763 |
|
3750 | 3764 |
if (pdf->stats.keywords) { |
3765 |
+ if (pdf->stats.keywords->data) |
|
3766 |
+ free(pdf->stats.keywords->data); |
|
3751 | 3767 |
free(pdf->stats.keywords); |
3752 | 3768 |
pdf->stats.keywords = NULL; |
3753 | 3769 |
} |
... | ... |
@@ -62,6 +62,17 @@ struct pdf_dict { |
62 | 62 |
struct pdf_dict_node *tail; |
63 | 63 |
}; |
64 | 64 |
|
65 |
+struct pdf_stats_entry { |
|
66 |
+ char *data; |
|
67 |
+ |
|
68 |
+ /* populated by pdf_parse_string */ |
|
69 |
+ struct pdf_stats_metadata { |
|
70 |
+ int length; |
|
71 |
+ struct pdf_obj *obj; |
|
72 |
+ int success; /* if finalize succeeds */ |
|
73 |
+ } meta; |
|
74 |
+}; |
|
75 |
+ |
|
65 | 76 |
struct pdf_stats { |
66 | 77 |
int32_t ninvalidobjs; /* Number of invalid objects */ |
67 | 78 |
int32_t njs; /* Number of javascript objects */ |
... | ... |
@@ -88,22 +99,14 @@ struct pdf_stats { |
88 | 88 |
int32_t nrichmedia; /* Number of RichMedia objects */ |
89 | 89 |
int32_t nacroform; /* Number of AcroForm objects */ |
90 | 90 |
int32_t nxfa; /* Number of XFA objects */ |
91 |
- char *author; /* Author of the PDF */ |
|
92 |
- int8_t author_b64; |
|
93 |
- char *creator; /* Application used to create the PDF */ |
|
94 |
- int8_t creator_b64; |
|
95 |
- char *producer; /* Application used to produce the PDF */ |
|
96 |
- int8_t producer_b64; |
|
97 |
- char *creationdate; /* Date the PDF was created */ |
|
98 |
- int8_t creationdate_b64; |
|
99 |
- char *modificationdate; /* Date the PDF was modified */ |
|
100 |
- int8_t modificationdate_b64; |
|
101 |
- char *title; /* Title of the PDF */ |
|
102 |
- int8_t title_b64; |
|
103 |
- char *subject; /* Subject of the PDF */ |
|
104 |
- int8_t subject_b64; |
|
105 |
- char *keywords; /* Keywords of the PDF */ |
|
106 |
- int8_t keywords_b64; |
|
91 |
+ struct pdf_stats_entry *author; /* Author of the PDF */ |
|
92 |
+ struct pdf_stats_entry *creator; /* Application used to create the PDF */ |
|
93 |
+ struct pdf_stats_entry *producer; /* Application used to produce the PDF */ |
|
94 |
+ struct pdf_stats_entry *creationdate; /* Date the PDF was created */ |
|
95 |
+ struct pdf_stats_entry *modificationdate; /* Date the PDF was modified */ |
|
96 |
+ struct pdf_stats_entry *title; /* Title of the PDF */ |
|
97 |
+ struct pdf_stats_entry *subject; /* Subject of the PDF */ |
|
98 |
+ struct pdf_stats_entry *keywords; /* Keywords of the PDF */ |
|
107 | 99 |
}; |
108 | 100 |
|
109 | 101 |
|
... | ... |
@@ -156,7 +159,8 @@ void pdf_handle_enc(struct pdf_struct *pdf); |
156 | 156 |
char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, off_t *length, enum enc_method enc_method); |
157 | 157 |
enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj); |
158 | 158 |
|
159 |
-char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, int8_t *b64); |
|
159 |
+char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len); |
|
160 |
+char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *stats); |
|
160 | 161 |
struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar); |
161 | 162 |
struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar); |
162 | 163 |
int is_object_reference(char *begin, char **endchar, uint32_t *id); |
... | ... |
@@ -237,12 +237,15 @@ static char *pdf_decrypt_string(struct pdf_struct *pdf, struct pdf_obj *obj, con |
237 | 237 |
return NULL; |
238 | 238 |
} |
239 | 239 |
|
240 |
-static char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len) |
|
240 |
+char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len) |
|
241 | 241 |
{ |
242 | 242 |
char *wrkstr, *output = NULL; |
243 | 243 |
size_t wrklen = len, outlen; |
244 | 244 |
unsigned int i, likelyutf = 0; |
245 | 245 |
|
246 |
+ if (!in) |
|
247 |
+ return NULL; |
|
248 |
+ |
|
246 | 249 |
/* get a working copy */ |
247 | 250 |
wrkstr = cli_calloc(len+1, sizeof(char)); |
248 | 251 |
if (!wrkstr) |
... | ... |
@@ -363,7 +366,7 @@ static char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, co |
363 | 363 |
return wrkstr; |
364 | 364 |
} |
365 | 365 |
|
366 |
-char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, int8_t *b64) |
|
366 |
+char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *meta) |
|
367 | 367 |
{ |
368 | 368 |
const char *q = objstart; |
369 | 369 |
char *p1, *p2; |
... | ... |
@@ -493,14 +496,27 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char * |
493 | 493 |
switch (*p3) { |
494 | 494 |
case '(': |
495 | 495 |
case '<': |
496 |
- res = pdf_parse_string(pdf, obj, p3, objsize2, NULL, NULL, b64); |
|
496 |
+ res = pdf_parse_string(pdf, obj, p3, objsize2, NULL, NULL, meta); |
|
497 | 497 |
break; |
498 | 498 |
default: |
499 | 499 |
res = pdf_finalize_string(pdf, obj, begin, objsize2); |
500 | 500 |
if (!res) { |
501 |
- res = (char *)cl_base64_encode(begin, objsize2); |
|
502 |
- if (b64) *b64 = 1; |
|
503 |
- } |
|
501 |
+ res = cli_calloc(1, objsize2+1); |
|
502 |
+ if (!(res)) |
|
503 |
+ return NULL; |
|
504 |
+ memcpy(res, begin, objsize2); |
|
505 |
+ res[objsize2] = '\0'; |
|
506 |
+ |
|
507 |
+ if (meta) { |
|
508 |
+ meta->length = objsize2; |
|
509 |
+ meta->obj = obj; |
|
510 |
+ meta->success = 0; |
|
511 |
+ } |
|
512 |
+ } else if (meta) { |
|
513 |
+ meta->length = strlen(res); |
|
514 |
+ meta->obj = obj; |
|
515 |
+ meta->success = 1; |
|
516 |
+ } |
|
504 | 517 |
} |
505 | 518 |
free(begin); |
506 | 519 |
} |
... | ... |
@@ -571,8 +587,21 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char * |
571 | 571 |
|
572 | 572 |
res = pdf_finalize_string(pdf, obj, p1, len); |
573 | 573 |
if (!res) { |
574 |
- res = (char *)cl_base64_encode(p1, len); |
|
575 |
- if (b64) *b64 = 1; |
|
574 |
+ res = cli_calloc(1, len+1); |
|
575 |
+ if (!(res)) |
|
576 |
+ return NULL; |
|
577 |
+ memcpy(res, p1, len); |
|
578 |
+ res[len] = '\0'; |
|
579 |
+ |
|
580 |
+ if (meta) { |
|
581 |
+ meta->length = len; |
|
582 |
+ meta->obj = obj; |
|
583 |
+ meta->success = 0; |
|
584 |
+ } |
|
585 |
+ } else if (meta) { |
|
586 |
+ meta->length = strlen(res); |
|
587 |
+ meta->obj = obj; |
|
588 |
+ meta->success = 1; |
|
576 | 589 |
} |
577 | 590 |
|
578 | 591 |
if (res && endchar) |