... | ... |
@@ -3511,22 +3511,86 @@ static void pdf_export_json(struct pdf_struct *pdf) |
3511 | 3511 |
goto cleanup; |
3512 | 3512 |
} |
3513 | 3513 |
|
3514 |
- if (pdf->stats.author) |
|
3515 |
- cli_jsonstr(pdfobj, "Author", pdf->stats.author); |
|
3516 |
- if (pdf->stats.creator) |
|
3517 |
- cli_jsonstr(pdfobj, "Creator", pdf->stats.creator); |
|
3518 |
- if (pdf->stats.producer) |
|
3519 |
- cli_jsonstr(pdfobj, "Producer", pdf->stats.producer); |
|
3520 |
- if (pdf->stats.modificationdate) |
|
3521 |
- cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate); |
|
3522 |
- if (pdf->stats.creationdate) |
|
3523 |
- cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate); |
|
3524 |
- if (pdf->stats.title) |
|
3525 |
- cli_jsonstr(pdfobj, "Title", pdf->stats.title); |
|
3526 |
- if (pdf->stats.subject) |
|
3527 |
- cli_jsonstr(pdfobj, "Subject", pdf->stats.subject); |
|
3528 |
- if (pdf->stats.keywords) |
|
3529 |
- cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords); |
|
3514 |
+ if (pdf->stats.author) { |
|
3515 |
+ if (cli_isutf8(pdf->stats.author, strlen(pdf->stats.author))) |
|
3516 |
+ cli_jsonstr(pdfobj, "Author", pdf->stats.author); |
|
3517 |
+ else { |
|
3518 |
+ char *b64 = (char *)cl_base64_encode(pdf->stats.author, strlen(pdf->stats.author)); |
|
3519 |
+ cli_jsonstr(pdfobj, "Author", b64); |
|
3520 |
+ cli_jsonbool(pdfobj, "Author_base64", 1); |
|
3521 |
+ free(b64); |
|
3522 |
+ } |
|
3523 |
+ } |
|
3524 |
+ if (pdf->stats.creator) { |
|
3525 |
+ if (cli_isutf8(pdf->stats.creator, strlen(pdf->stats.creator))) |
|
3526 |
+ cli_jsonstr(pdfobj, "Creator", pdf->stats.creator); |
|
3527 |
+ else { |
|
3528 |
+ char *b64 = (char *)cl_base64_encode(pdf->stats.creator, strlen(pdf->stats.creator)); |
|
3529 |
+ cli_jsonstr(pdfobj, "Creator", b64); |
|
3530 |
+ cli_jsonbool(pdfobj, "Creator_base64", 1); |
|
3531 |
+ free(b64); |
|
3532 |
+ } |
|
3533 |
+ } |
|
3534 |
+ if (pdf->stats.producer) { |
|
3535 |
+ if (cli_isutf8(pdf->stats.producer, strlen(pdf->stats.producer))) |
|
3536 |
+ cli_jsonstr(pdfobj, "Producer", pdf->stats.producer); |
|
3537 |
+ else { |
|
3538 |
+ char *b64 = (char *)cl_base64_encode(pdf->stats.producer, strlen(pdf->stats.producer)); |
|
3539 |
+ cli_jsonstr(pdfobj, "Producer", b64); |
|
3540 |
+ cli_jsonbool(pdfobj, "Producer_base64", 1); |
|
3541 |
+ free(b64); |
|
3542 |
+ } |
|
3543 |
+ } |
|
3544 |
+ if (pdf->stats.modificationdate) { |
|
3545 |
+ if (cli_isutf8(pdf->stats.modificationdate, strlen(pdf->stats.modificationdate))) |
|
3546 |
+ cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate); |
|
3547 |
+ else { |
|
3548 |
+ char *b64 = (char *)cl_base64_encode(pdf->stats.modificationdate, strlen(pdf->stats.modificationdate)); |
|
3549 |
+ cli_jsonstr(pdfobj, "ModificationDate", b64); |
|
3550 |
+ cli_jsonbool(pdfobj, "ModificationDate_base64", 1); |
|
3551 |
+ free(b64); |
|
3552 |
+ } |
|
3553 |
+ } |
|
3554 |
+ if (pdf->stats.creationdate) { |
|
3555 |
+ if (cli_isutf8(pdf->stats.creationdate, strlen(pdf->stats.creationdate))) |
|
3556 |
+ cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate); |
|
3557 |
+ else { |
|
3558 |
+ char *b64 = (char *)cl_base64_encode(pdf->stats.creationdate, strlen(pdf->stats.creationdate)); |
|
3559 |
+ cli_jsonstr(pdfobj, "CreationDate", b64); |
|
3560 |
+ cli_jsonbool(pdfobj, "CreationDate_base64", 1); |
|
3561 |
+ free(b64); |
|
3562 |
+ } |
|
3563 |
+ } |
|
3564 |
+ if (pdf->stats.title) { |
|
3565 |
+ if (cli_isutf8(pdf->stats.title, strlen(pdf->stats.title))) |
|
3566 |
+ cli_jsonstr(pdfobj, "Title", pdf->stats.title); |
|
3567 |
+ else { |
|
3568 |
+ char *b64 = (char *)cl_base64_encode(pdf->stats.title, strlen(pdf->stats.title)); |
|
3569 |
+ cli_jsonstr(pdfobj, "Title", b64); |
|
3570 |
+ cli_jsonbool(pdfobj, "Title_base64", 1); |
|
3571 |
+ free(b64); |
|
3572 |
+ } |
|
3573 |
+ } |
|
3574 |
+ if (pdf->stats.subject) { |
|
3575 |
+ if (cli_isutf8(pdf->stats.subject, strlen(pdf->stats.subject))) |
|
3576 |
+ cli_jsonstr(pdfobj, "Subject", pdf->stats.subject); |
|
3577 |
+ else { |
|
3578 |
+ char *b64 = (char *)cl_base64_encode(pdf->stats.subject, strlen(pdf->stats.subject)); |
|
3579 |
+ cli_jsonstr(pdfobj, "Subject", b64); |
|
3580 |
+ cli_jsonbool(pdfobj, "Subject_base64", 1); |
|
3581 |
+ free(b64); |
|
3582 |
+ } |
|
3583 |
+ } |
|
3584 |
+ if (pdf->stats.keywords) { |
|
3585 |
+ if (cli_isutf8(pdf->stats.keywords, strlen(pdf->stats.keywords))) |
|
3586 |
+ cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords); |
|
3587 |
+ else { |
|
3588 |
+ char *b64 = (char *)cl_base64_encode(pdf->stats.keywords, strlen(pdf->stats.keywords)); |
|
3589 |
+ cli_jsonstr(pdfobj, "Keywords", b64); |
|
3590 |
+ cli_jsonbool(pdfobj, "Keywords_base64", 1); |
|
3591 |
+ free(b64); |
|
3592 |
+ } |
|
3593 |
+ } |
|
3530 | 3594 |
if (pdf->stats.ninvalidobjs) |
3531 | 3595 |
cli_jsonint(pdfobj, "InvalidObjectCount", pdf->stats.ninvalidobjs); |
3532 | 3596 |
if (pdf->stats.njs) |
... | ... |
@@ -666,3 +666,49 @@ char *cli_utf16_to_utf8(const char *utf16, size_t length, utf16_type type) |
666 | 666 |
s2[j] = '\0'; |
667 | 667 |
return s2; |
668 | 668 |
} |
669 |
+ |
|
670 |
+int cli_isutf8(const unsigned char *buf, unsigned int len) |
|
671 |
+{ |
|
672 |
+ unsigned int i, j; |
|
673 |
+ |
|
674 |
+ for(i = 0; i < len; i++) { |
|
675 |
+ if((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */ |
|
676 |
+ continue; |
|
677 |
+ } else if((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */ |
|
678 |
+ return 0; |
|
679 |
+ } else { |
|
680 |
+ unsigned int following; |
|
681 |
+ |
|
682 |
+ if((buf[i] & 0x20) == 0) { /* 110xxxxx */ |
|
683 |
+ /* c = buf[i] & 0x1f; */ |
|
684 |
+ following = 1; |
|
685 |
+ } else if((buf[i] & 0x10) == 0) { /* 1110xxxx */ |
|
686 |
+ /* c = buf[i] & 0x0f; */ |
|
687 |
+ following = 2; |
|
688 |
+ } else if((buf[i] & 0x08) == 0) { /* 11110xxx */ |
|
689 |
+ /* c = buf[i] & 0x07; */ |
|
690 |
+ following = 3; |
|
691 |
+ } else if((buf[i] & 0x04) == 0) { /* 111110xx */ |
|
692 |
+ /* c = buf[i] & 0x03; */ |
|
693 |
+ following = 4; |
|
694 |
+ } else if((buf[i] & 0x02) == 0) { /* 1111110x */ |
|
695 |
+ /* c = buf[i] & 0x01; */ |
|
696 |
+ following = 5; |
|
697 |
+ } else { |
|
698 |
+ return 0; |
|
699 |
+ } |
|
700 |
+ |
|
701 |
+ for(j = 0; j < following; j++) { |
|
702 |
+ if(++i >= len) |
|
703 |
+ return 0; |
|
704 |
+ |
|
705 |
+ if((buf[i] & 0x80) == 0 || (buf[i] & 0x40)) |
|
706 |
+ return 0; |
|
707 |
+ |
|
708 |
+ /* c = (c << 6) + (buf[i] & 0x3f); */ |
|
709 |
+ } |
|
710 |
+ } |
|
711 |
+ } |
|
712 |
+ |
|
713 |
+ return 1; |
|
714 |
+} |
... | ... |
@@ -58,5 +58,7 @@ typedef enum { |
58 | 58 |
} utf16_type; |
59 | 59 |
char *cli_utf16_to_utf8(const char *utf16, size_t length, utf16_type type); |
60 | 60 |
|
61 |
+int cli_isutf8(const unsigned char *buf, unsigned int len); |
|
62 |
+ |
|
61 | 63 |
size_t cli_strlcat(char *dst, const char *src, size_t sz); /* libclamav/strlcat.c */ |
62 | 64 |
#endif |