... | ... |
@@ -63,6 +63,7 @@ |
63 | 63 |
#include "str.h" |
64 | 64 |
#include "bytecode.h" |
65 | 65 |
#include "bytecode_api.h" |
66 |
+#include "lzw/lzwdec.h" |
|
66 | 67 |
|
67 | 68 |
struct pdf_token { |
68 | 69 |
uint32_t length; |
... | ... |
@@ -77,7 +78,7 @@ static int filter_rldecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct |
77 | 77 |
static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token); |
78 | 78 |
static int filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token); |
79 | 79 |
static int filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int mode); |
80 |
- |
|
80 |
+static int filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token); |
|
81 | 81 |
|
82 | 82 |
off_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, const char *stream, uint32_t streamlen, int fout, int *rc) |
83 | 83 |
{ |
... | ... |
@@ -178,12 +179,15 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj |
178 | 178 |
rc = filter_decrypt(pdf, obj, params, token, 0); |
179 | 179 |
break; |
180 | 180 |
|
181 |
+ case OBJ_FILTER_LZW: |
|
182 |
+ cli_dbgmsg("cli_pdf: decoding [%d] => LZWDECODE\n", obj->filterlist[i]); |
|
183 |
+ rc = filter_lzwdecode(pdf, obj, params, token); |
|
184 |
+ break; |
|
185 |
+ |
|
181 | 186 |
case OBJ_FILTER_JPX: |
182 | 187 |
if (!filter) filter = "JPXDECODE"; |
183 | 188 |
case OBJ_FILTER_DCT: |
184 | 189 |
if (!filter) filter = "DCTDECODE"; |
185 |
- case OBJ_FILTER_LZW: |
|
186 |
- if (!filter) filter = "LZWDECODE"; |
|
187 | 190 |
case OBJ_FILTER_FAX: |
188 | 191 |
if (!filter) filter = "FAXDECODE"; |
189 | 192 |
case OBJ_FILTER_JBIG2: |
... | ... |
@@ -683,10 +687,12 @@ static int filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pd |
683 | 683 |
if (node->type == PDF_DICT_STRING) { |
684 | 684 |
if (!strncmp(node->key, "/Type", 6)) { /* optional field - Type */ |
685 | 685 |
/* MUST be "CryptFilterDecodeParms" */ |
686 |
- cli_dbgmsg("cli_pdf: Type: %s\n", (char *)(node->value)); |
|
686 |
+ if (node->value) |
|
687 |
+ cli_dbgmsg("cli_pdf: Type: %s\n", (char *)(node->value)); |
|
687 | 688 |
} else if (!strncmp(node->key, "/Name", 6)) { /* optional field - Name */ |
688 | 689 |
/* overrides document and default encryption method */ |
689 |
- cli_dbgmsg("cli_pdf: Name: %s\n", (char *)(node->value)); |
|
690 |
+ if (node->value) |
|
691 |
+ cli_dbgmsg("cli_pdf: Name: %s\n", (char *)(node->value)); |
|
690 | 692 |
enc = parse_enc_method(pdf->CF, pdf->CF_n, (char *)(node->value), enc); |
691 | 693 |
} |
692 | 694 |
} |
... | ... |
@@ -709,3 +715,171 @@ static int filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pd |
709 | 709 |
token->length = (uint32_t)length; /* this may truncate unfortunately, TODO: use 64-bit values internally? */ |
710 | 710 |
return CL_SUCCESS; |
711 | 711 |
} |
712 |
+ |
|
713 |
+static int filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token) |
|
714 |
+{ |
|
715 |
+ uint8_t *decoded, *temp; |
|
716 |
+ uint32_t declen = 0, capacity = 0; |
|
717 |
+ |
|
718 |
+ uint8_t *content = (uint8_t *)token->content; |
|
719 |
+ uint32_t length = token->length; |
|
720 |
+ lzw_stream stream; |
|
721 |
+ int echg = 1, lzwstat, skip = 0, rc = CL_SUCCESS; |
|
722 |
+ |
|
723 |
+ if (params) { |
|
724 |
+ struct pdf_dict_node *node = params->nodes; |
|
725 |
+ |
|
726 |
+ while (node) { |
|
727 |
+ if (node->type == PDF_DICT_STRING) { |
|
728 |
+ if (!strncmp(node->key, "/EarlyChange", 13)) { /* optional field - lzw flag */ |
|
729 |
+ char *end, *value = (char *)node->value; |
|
730 |
+ long set; |
|
731 |
+ |
|
732 |
+ if (value) { |
|
733 |
+ cli_dbgmsg("cli_pdf: EarlyChange: %s\n", value); |
|
734 |
+ set = strtol(value, &end, 10); |
|
735 |
+ if (end != value) |
|
736 |
+ echg = (int)set; |
|
737 |
+ } |
|
738 |
+ } |
|
739 |
+ } |
|
740 |
+ node = node->next; |
|
741 |
+ } |
|
742 |
+ } |
|
743 |
+ |
|
744 |
+ if (*content == '\r') { |
|
745 |
+ content++; |
|
746 |
+ length--; |
|
747 |
+ pdfobj_flag(pdf, obj, BAD_STREAMSTART); |
|
748 |
+ /* PDF spec says stream is followed by \r\n or \n, but not \r alone. |
|
749 |
+ * Sample 0015315109, it has \r followed by zlib header. |
|
750 |
+ * Flag pdf as suspicious, and attempt to extract by skipping the \r. |
|
751 |
+ */ |
|
752 |
+ if (!length) |
|
753 |
+ return CL_SUCCESS; |
|
754 |
+ } |
|
755 |
+ |
|
756 |
+ if (!(decoded = (uint8_t *)cli_calloc(BUFSIZ, sizeof(uint8_t)))) { |
|
757 |
+ cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n"); |
|
758 |
+ return CL_EMEM; |
|
759 |
+ } |
|
760 |
+ capacity = BUFSIZ; |
|
761 |
+ |
|
762 |
+ memset(&stream, 0, sizeof(stream)); |
|
763 |
+ stream.next_in = content; |
|
764 |
+ stream.avail_in = length; |
|
765 |
+ stream.next_out = decoded; |
|
766 |
+ stream.avail_out = BUFSIZ; |
|
767 |
+ |
|
768 |
+ lzwstat = lzwInit(&stream, echg ? LZW_FLAG_EARLYCHG : LZW_NOFLAGS); |
|
769 |
+ if(lzwstat != Z_OK) { |
|
770 |
+ cli_warnmsg("cli_pdf: lzwInit failed\n"); |
|
771 |
+ free(decoded); |
|
772 |
+ return CL_EMEM; |
|
773 |
+ } |
|
774 |
+ |
|
775 |
+ /* initial inflate */ |
|
776 |
+ lzwstat = lzwInflate(&stream); |
|
777 |
+ /* check if nothing written whatsoever */ |
|
778 |
+ if ((lzwstat != Z_OK) && (stream.avail_out == BUFSIZ)) { |
|
779 |
+ /* skip till EOL, and try inflating from there, sometimes |
|
780 |
+ * PDFs contain extra whitespace */ |
|
781 |
+ uint8_t *q = decode_nextlinestart(content, length); |
|
782 |
+ if (q) { |
|
783 |
+ (void)lzwInflateEnd(&stream); |
|
784 |
+ length -= q - content; |
|
785 |
+ content = q; |
|
786 |
+ |
|
787 |
+ stream.next_in = (Bytef *)content; |
|
788 |
+ stream.avail_in = length; |
|
789 |
+ stream.next_out = (Bytef *)decoded; |
|
790 |
+ stream.avail_out = capacity; |
|
791 |
+ |
|
792 |
+ lzwstat = lzwInit(&stream, echg ? LZW_FLAG_EARLYCHG : LZW_NOFLAGS); |
|
793 |
+ if(lzwstat != Z_OK) { |
|
794 |
+ cli_warnmsg("cli_pdf: lzwInit failed\n"); |
|
795 |
+ free(decoded); |
|
796 |
+ return CL_EMEM; |
|
797 |
+ } |
|
798 |
+ |
|
799 |
+ pdfobj_flag(pdf, obj, BAD_FLATESTART); |
|
800 |
+ } |
|
801 |
+ |
|
802 |
+ lzwstat = lzwInflate(&stream); |
|
803 |
+ } |
|
804 |
+ |
|
805 |
+ while (lzwstat == Z_OK && stream.avail_in) { |
|
806 |
+ /* extend output capacity if needed,*/ |
|
807 |
+ if(stream.avail_out == 0) { |
|
808 |
+ if ((rc = cli_checklimits("pdf", pdf->ctx, capacity+BUFSIZ, 0, 0)) != CL_SUCCESS) |
|
809 |
+ break; |
|
810 |
+ |
|
811 |
+ if (!(temp = cli_realloc(decoded, capacity + BUFSIZ))) { |
|
812 |
+ cli_errmsg("cli_pdf: cannot reallocate memory for decoded output\n"); |
|
813 |
+ rc = CL_EMEM; |
|
814 |
+ break; |
|
815 |
+ } |
|
816 |
+ decoded = temp; |
|
817 |
+ stream.next_out = decoded + capacity; |
|
818 |
+ stream.avail_out = BUFSIZ; |
|
819 |
+ declen += BUFSIZ; |
|
820 |
+ capacity += BUFSIZ; |
|
821 |
+ } |
|
822 |
+ |
|
823 |
+ /* continue inflation */ |
|
824 |
+ lzwstat = lzwInflate(&stream); |
|
825 |
+ } |
|
826 |
+ |
|
827 |
+ /* add stream end fragment to decoded length */ |
|
828 |
+ declen += (BUFSIZ - stream.avail_out); |
|
829 |
+ |
|
830 |
+ /* error handling */ |
|
831 |
+ switch(lzwstat) { |
|
832 |
+ case LZW_OK: |
|
833 |
+ cli_dbgmsg("cli_pdf: LZW_OK on stream inflation completion\n"); |
|
834 |
+ /* intentional fall-through */ |
|
835 |
+ case LZW_STREAM_END: |
|
836 |
+ cli_dbgmsg("cli_pdf: inflated %lu bytes from %lu total bytes (%lu bytes remaining)\n", |
|
837 |
+ (unsigned long)declen, (unsigned long)(token->length), (unsigned long)(stream.avail_in)); |
|
838 |
+ break; |
|
839 |
+ |
|
840 |
+ /* potentially fatal - *mostly* ignored as per older version */ |
|
841 |
+ case LZW_STREAM_ERROR: |
|
842 |
+ case LZW_DATA_ERROR: |
|
843 |
+ case LZW_MEM_ERROR: |
|
844 |
+ case LZW_BUF_ERROR: |
|
845 |
+ case LZW_DICT_ERROR: |
|
846 |
+ default: |
|
847 |
+ if(stream.msg) |
|
848 |
+ cli_dbgmsg("cli_pdf: after writing %lu bytes, got error \"%s\" inflating PDF stream in %u %u obj\n", |
|
849 |
+ (unsigned long)declen, stream.msg, obj->id>>8, obj->id&0xff); |
|
850 |
+ else |
|
851 |
+ cli_dbgmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF stream in %u %u obj\n", |
|
852 |
+ (unsigned long)declen, lzwstat, obj->id>>8, obj->id&0xff); |
|
853 |
+ |
|
854 |
+ if (declen == 0) { |
|
855 |
+ pdfobj_flag(pdf, obj, BAD_FLATESTART); |
|
856 |
+ cli_dbgmsg("cli_pdf: no bytes were inflated.\n"); |
|
857 |
+ |
|
858 |
+ rc = CL_EFORMAT; |
|
859 |
+ } else { |
|
860 |
+ pdfobj_flag(pdf, obj, BAD_FLATE); |
|
861 |
+ } |
|
862 |
+ break; |
|
863 |
+ } |
|
864 |
+ |
|
865 |
+ (void)lzwInflateEnd(&stream); |
|
866 |
+ |
|
867 |
+ if (rc == CL_SUCCESS) { |
|
868 |
+ free(token->content); |
|
869 |
+ |
|
870 |
+ token->content = decoded; |
|
871 |
+ token->length = declen; |
|
872 |
+ } else { |
|
873 |
+ cli_errmsg("cli_pdf: error occurred parsing byte %lu of %lu\n", |
|
874 |
+ (unsigned long)(length-stream.avail_in), (unsigned long)(token->length)); |
|
875 |
+ free(decoded); |
|
876 |
+ } |
|
877 |
+ |
|
878 |
+ return rc; |
|
879 |
+} |