GitList

libclamav/pdfdecode.c

7ded9e29	/*
1158b285	* Copyright (C) 2016-2018 Cisco and/or its affiliates. All rights reserved.
7ded9e29	* * Author: Kevin Lin * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations * including the two.
02840644	*
7ded9e29	* You must obey the GNU General Public License in all respects * for all of the code used other than OpenSSL. If you modify * file(s) with this exception, you may extend this exception to your * version of the file(s), but you are not obligated to do so. If you * do not wish to do so, delete this exception statement from your * version. If you delete this exception statement from all source * files in the program, then also delete it here. */ #if HAVE_CONFIG_H #include "clamav-config.h" #endif #include <stdio.h>
288057e9	#include <stddef.h>
7ded9e29	#include <sys/types.h> #include <sys/stat.h> #include <ctype.h> #include <string.h> #include <fcntl.h> #include <stdlib.h> #include <errno.h>
288057e9	#ifdef HAVE_LIMITS_H
7ded9e29	#include <limits.h> #endif
288057e9	#ifdef HAVE_UNISTD_H
7ded9e29	#include <unistd.h> #endif #include <zlib.h> #if HAVE_ICONV #include <iconv.h> #endif #include "clamav.h" #include "others.h" #include "pdf.h"
eaf52211	#include "pdfdecode.h"
7ded9e29	#include "str.h" #include "bytecode.h" #include "bytecode_api.h"
e8a23886	#include "lzw/lzwdec.h"
7ded9e29
a081b3e9	#define PDFTOKEN_FLAG_XREF 0x1
7ded9e29	struct pdf_token {
288057e9	uint32_t flags; /* tracking flags / uint32_t success; / successfully decoded filters / uint32_t length; / length of current content; TODO: transition to size_t / uint8_t content; /* content stream */
7ded9e29	};
19f8f7c1	static size_t pdf_decodestream_internal(struct pdf_struct pdf, struct pdf_obj obj, struct pdf_dict params, struct pdf_token token, int fout, cl_error_t status, struct objstm_struct objstm);
02840644	static cl_error_t pdf_decode_dump(struct pdf_struct pdf, struct pdf_obj obj, struct pdf_token *token, uint32_t lvl);
1158b285	static cl_error_t filter_ascii85decode(struct pdf_struct pdf, struct pdf_obj obj, struct pdf_token token); static cl_error_t filter_rldecode(struct pdf_struct pdf, struct pdf_obj obj, struct pdf_token token); static cl_error_t filter_flatedecode(struct pdf_struct pdf, struct pdf_obj obj, struct pdf_dict params, struct pdf_token token); static cl_error_t filter_asciihexdecode(struct pdf_struct pdf, struct pdf_obj obj, struct pdf_token token); static cl_error_t filter_decrypt(struct pdf_struct pdf, struct pdf_obj obj, struct pdf_dict params, struct pdf_token token, int mode); static cl_error_t filter_lzwdecode(struct pdf_struct pdf, struct pdf_obj obj, struct pdf_dict params, struct pdf_token token); /* * @brief Wrapper function for pdf_decodestream_internal.
02840644	*
1158b285	* Allocate a token object to store decoded filter data. * Parse/decode the filter data and scan it.
02840644	*
1158b285	* @param pdf Pdf context structure. * @param obj The object we found the filter content in. * @param params (optional) Dictionary parameters describing the filter data. * @param stream Filter stream buffer pointer. * @param streamlen Length of filter stream buffer. * @param xref Indicates if the stream is an /XRef stream. Do not apply forced decryption on /XRef streams. * @param fout File descriptor to write to to be scanned. * @param[out] rc Return code () * @param objstm (optional) Object stream context structure.
19f8f7c1	* @return size_t The number of bytes written to 'fout' to be scanned.
1158b285	*/
19f8f7c1	size_t pdf_decodestream(
1158b285	struct pdf_struct pdf, struct pdf_obj obj, struct pdf_dict params, const char stream, uint32_t streamlen, int xref, int fout, cl_error_t status, struct objstm_struct objstm) { struct pdf_token *token = NULL;
288057e9	size_t bytes_scanned = 0;
50876732	cli_ctx *ctx = NULL;
07a72006
1158b285	if (!status) { /* invalid args, and no way to pass back the status code */
19f8f7c1	return 0;
1158b285	}
7ded9e29
1158b285	if (!pdf \|\| !obj) { /* Invalid args */
19f8f7c1	*status = CL_EARG;
1158b285	goto done; }
7ded9e29
50876732	ctx = pdf->ctx;
7aad5a3b	if (!stream \|\| !streamlen \|\| fout < 0) {
288057e9	cli_dbgmsg("pdf_decodestream: no filters or stream on obj %u %u\n", obj->id >> 8, obj->id & 0xff);
19f8f7c1	*status = CL_ENULLARG;
1158b285	goto done;
7ded9e29	}
19f8f7c1	*status = CL_SUCCESS;
eaf52211	#if 0 if (params) pdf_print_dict(params, 0); #endif
7ded9e29	token = cli_malloc(sizeof(struct pdf_token));
7aad5a3b	if (!token) {
19f8f7c1	*status = CL_EMEM;
1158b285	goto done;
7aad5a3b	}
7ded9e29
a081b3e9	token->flags = 0; if (xref) token->flags \|= PDFTOKEN_FLAG_XREF;
bfd8ca3e	token->success = 0;
7ded9e29	token->content = cli_malloc(streamlen); if (!token->content) {
19f8f7c1	*status = CL_EMEM;
1158b285	goto done;
7ded9e29	} memcpy(token->content, stream, streamlen); token->length = streamlen;
1158b285	cli_dbgmsg("pdf_decodestream: detected %lu applied filters\n", (long unsigned)(obj->numfilters));
7aad5a3b
19f8f7c1	bytes_scanned = pdf_decodestream_internal(pdf, obj, params, token, fout, status, objstm); if ((CL_VIRUS == *status) && !SCAN_ALLMATCHES) { goto done; }
1158b285
19f8f7c1	if (0 == token->success) {
1158b285	/*
19f8f7c1	* Either: * a) it failed to decode any filters, or * b) there were no filters. * * Write out the raw stream to be scanned. * * Nota bene: If it did decode any filters, the internal() function would * have written out the decoded stream to be scanned.
1158b285	*/
bfd8ca3e	if (!cli_checklimits("pdf", pdf->ctx, streamlen, 0, 0)) {
1158b285	cli_dbgmsg("pdf_decodestream: no non-forced filters decoded, returning raw stream\n");
bfd8ca3e	if (cli_writen(fout, stream, streamlen) != streamlen) {
19f8f7c1	cli_errmsg("pdf_decodestream: failed to write raw stream to output file\n"); } else { bytes_scanned = streamlen;
bfd8ca3e	}
eaf52211	}
7ded9e29	}
1158b285	done: /* * Free up the token, and token content, if any. */
288057e9	if (NULL != token) {
1158b285	if (NULL != token->content) { free(token->content); token->content = NULL;
288057e9	token->length = 0;
1158b285	} free(token); token = NULL; } return bytes_scanned;
7ded9e29	}
1158b285	/**
02840644	* @brief Decode filter buffer data. *
1158b285	* Attempt to decompress, decrypt or otherwise parse it.
02840644	*
1158b285	* @param pdf Pdf context structure. * @param obj The object we found the filter content in. * @param params (optional) Dictionary parameters describing the filter data. * @param token Pointer to and length of filter data. * @param fout File handle to write data to to be scanned. * @param[out] status CL_CLEAN/CL_SUCCESS or CL_VIRUS/CL_E<error> * @param objstm (optional) Object stream context structure. * @return ptrdiff_t The number of bytes we wrote to 'fout'. -1 if failed out. */
19f8f7c1	static size_t pdf_decodestream_internal(
1158b285	struct pdf_struct pdf, struct pdf_obj obj, struct pdf_dict params, struct pdf_token token, int fout, cl_error_t status, struct objstm_struct objstm)
7ded9e29	{
288057e9	cl_error_t vir = CL_CLEAN; cl_error_t retval = CL_SUCCESS;
19f8f7c1	size_t bytes_scanned = 0;
50876732	cli_ctx *ctx = NULL;
288057e9	const char *filter = NULL;
02840644	uint32_t i;
7ded9e29
1158b285	if (!status) { /* invalid args, and no way to pass back the status code */
19f8f7c1	return 0;
1158b285	} if (!pdf \|\| !obj \|\| !token) { /* Invalid args */
19f8f7c1	*status = CL_EARG;
1158b285	goto done; }
288057e9
50876732	ctx = pdf->ctx;
19f8f7c1	*status = CL_SUCCESS;
288057e9
0018a8e7	/* * if pdf is decryptable, scan for CRYPT filter * if none, force a DECRYPT filter application */ if ((pdf->flags & (1 << DECRYPTABLE_PDF)) && !(obj->flags & (1 << OBJ_FILTER_CRYPT))) {
a081b3e9	if (token->flags & PDFTOKEN_FLAG_XREF) /* TODO: is this on all crypt filters or only the assumed one? */
1158b285	cli_dbgmsg("pdf_decodestream_internal: skipping decoding => non-filter CRYPT (reason: xref)\n");
a081b3e9	else {
1158b285	cli_dbgmsg("pdf_decodestream_internal: decoding => non-filter CRYPT\n"); retval = filter_decrypt(pdf, obj, params, token, 1); if (retval != CL_SUCCESS) {
19f8f7c1	*status = CL_EPARSE;
1158b285	goto done;
a081b3e9	}
0018a8e7	} }
7ded9e29	for (i = 0; i < obj->numfilters; i++) {
288057e9	switch (obj->filterlist[i]) { case OBJ_FILTER_A85:
02840644	cli_dbgmsg("pdf_decodestream_internal: decoding [%u] => ASCII85DECODE\n", obj->filterlist[i]);
288057e9	retval = filter_ascii85decode(pdf, obj, token); break;
7ded9e29
288057e9	case OBJ_FILTER_RL:
02840644	cli_dbgmsg("pdf_decodestream_internal: decoding [%u] => RLDECODE\n", obj->filterlist[i]);
288057e9	retval = filter_rldecode(pdf, obj, token); break;
739e5052
288057e9	case OBJ_FILTER_FLATE:
02840644	cli_dbgmsg("pdf_decodestream_internal: decoding [%u] => FLATEDECODE\n", obj->filterlist[i]);
288057e9	retval = filter_flatedecode(pdf, obj, params, token); break;
7ded9e29
288057e9	case OBJ_FILTER_AH:
02840644	cli_dbgmsg("pdf_decodestream_internal: decoding [%u] => ASCIIHEXDECODE\n", obj->filterlist[i]);
288057e9	retval = filter_asciihexdecode(pdf, obj, token); break;
7ded9e29
288057e9	case OBJ_FILTER_CRYPT:
02840644	cli_dbgmsg("pdf_decodestream_internal: decoding [%u] => CRYPT\n", obj->filterlist[i]);
288057e9	retval = filter_decrypt(pdf, obj, params, token, 0); break;
eaf52211
288057e9	case OBJ_FILTER_LZW:
02840644	cli_dbgmsg("pdf_decodestream_internal: decoding [%u] => LZWDECODE\n", obj->filterlist[i]);
288057e9	retval = filter_lzwdecode(pdf, obj, params, token); break;
e8a23886
288057e9	case OBJ_FILTER_JPX: if (!filter) filter = "JPXDECODE"; case OBJ_FILTER_DCT: if (!filter) filter = "DCTDECODE"; case OBJ_FILTER_FAX: if (!filter) filter = "FAXDECODE"; case OBJ_FILTER_JBIG2: if (!filter) filter = "JBIG2DECODE";
02840644	cli_dbgmsg("pdf_decodestream_internal: unimplemented filter type [%u] => %s\n", obj->filterlist[i], filter);
288057e9	filter = NULL; retval = CL_BREAK; break;
7ded9e29
288057e9	default:
02840644	cli_dbgmsg("pdf_decodestream_internal: unknown filter type [%u]\n", obj->filterlist[i]);
288057e9	retval = CL_BREAK; break;
7ded9e29	}
d593717b	if (!(token->content) \|\| !(token->length)) {
02840644	cli_dbgmsg("pdf_decodestream_internal: empty content, breaking after %u (of %u) filters\n", i, obj->numfilters);
d593717b	break; }
1158b285	if (retval != CL_SUCCESS) {
048a88e6	if (retval == CL_VIRUS && SCAN_ALLMATCHES) {
1158b285	vir = CL_VIRUS; } else {
288057e9	const char *reason;
1158b285	switch (retval) {
288057e9	case CL_VIRUS: status = CL_VIRUS; reason = "detection"; break; case CL_BREAK: status = CL_SUCCESS; reason = "decoding break"; break; default: *status = CL_EPARSE; reason = "decoding error"; break;
5c291512	}
02840644	cli_dbgmsg("pdf_decodestream_internal: stopping after %d (of %u) filters (reason: %s)\n", i, obj->numfilters, reason);
ce3cf4c6	break; }
d593717b	}
bfd8ca3e	token->success++;
d593717b
1158b285	/* Dump the stream content to a text file if keeptmp is enabled. */
fdcf5109	if (pdf->ctx->engine->keeptmp) {
288057e9	if (CL_SUCCESS != pdf_decode_dump(pdf, obj, token, i + 1)) {
19f8f7c1	cli_errmsg("pdf_decodestream_internal: failed to write decoded stream content to temp file\n");
1158b285	} } }
639615af
50876732	if ((token->success > 0) && (NULL != token->content)) {
1158b285	/*
19f8f7c1	* Looks like we successfully decoded some or all of the stream filters, * so lets write it out to a file descriptor we scan. * * In the event that we didn't decode any filters (or maybe there * weren't any filters), the calling function will do the same with * the raw stream.
1158b285	*/
19f8f7c1	if (CL_SUCCESS == cli_checklimits("pdf", pdf->ctx, token->length, 0, 0)) {
1158b285	if (cli_writen(fout, token->content, token->length) != token->length) {
19f8f7c1	cli_errmsg("pdf_decodestream_internal: failed to write decoded stream content to output file\n"); } else { bytes_scanned = token->length;
1158b285	}
07a72006	} }
19f8f7c1	if ((NULL != objstm) &&
288057e9	((CL_SUCCESS == status) \|\| ((CL_VIRUS == status) && SCAN_ALLMATCHES))) {
02840644	unsigned int objs_found = pdf->nobjs;
19f8f7c1
1158b285	/* * The caller indicated that the decoded data is an object stream. * Perform experimental object stream parsing to extract objects from the stream. */
288057e9	objstm->streambuf = (char *)token->content;
1158b285	objstm->streambuf_len = (size_t)token->length; /* Take ownership of the malloc'd buffer */ token->content = NULL;
288057e9	token->length = 0;
1158b285
19f8f7c1	/* Don't store the result. It's ok if some or all objects failed to parse. It would be far worse to add objects from a stream to the list, and then free the stream buffer due to an "error". */
288057e9	if (CL_SUCCESS != pdf_find_and_parse_objs_in_objstm(pdf, objstm)) {
1158b285	cli_dbgmsg("pdf_decodestream_internal: pdf_find_and_parse_objs_in_objstm failed!\n"); } if (pdf->nobjs <= objs_found) { cli_dbgmsg("pdf_decodestream_internal: pdf_find_and_parse_objs_in_objstm did not find any new objects!\n"); } else {
02840644	cli_dbgmsg("pdf_decodestream_internal: pdf_find_and_parse_objs_in_objstm found %u new objects.\n", pdf->nobjs - objs_found);
1158b285	} } done: if (vir == CL_VIRUS) *status = CL_VIRUS; return bytes_scanned;
07a72006	}
1158b285	/** * @brief Dump PDF filter content such as stream contents to a temp file.
02840644	*
1158b285	* Temp file is created in the pdf->dir directory. * Filename format is "pdf<pdf->files-1>_<lvl>".
02840644	*
1158b285	* @param pdf Pdf context structure. * @param obj The object we found the filter content in. * @param token The struct for the filter contents. * @param lvl A unique index to distinguish the files from each other.
02840644	* @return cl_error_t
1158b285	*/
02840644	static cl_error_t pdf_decode_dump(struct pdf_struct pdf, struct pdf_obj obj, struct pdf_token *token, uint32_t lvl)
07a72006	{ char fname[1024]; int ifd;
02840644	snprintf(fname, sizeof(fname), "%s" PATHSEP "pdf%02u_%02u", pdf->dir, (pdf->files - 1), lvl);
288057e9	ifd = open(fname, O_RDWR \| O_CREAT \| O_EXCL \| O_TRUNC \| O_BINARY, 0600);
07a72006	if (ifd < 0) { char err[128]; cli_errmsg("cli_pdf: can't create intermediate temporary file %s: %s\n", fname, cli_strerror(errno, err, sizeof(err))); return CL_ETMPFILE; }
02840644	cli_dbgmsg("cli_pdf: decoded filter %u obj %u %u\n", lvl, obj->id >> 8, obj->id & 0xff);
07a72006	cli_dbgmsg(" ... to %s\n", fname); if (cli_writen(ifd, token->content, token->length) != token->length) { cli_errmsg("cli_pdf: failed to write output file\n"); close(ifd); return CL_EWRITE;
7ded9e29	}
07a72006	close(ifd);
7ded9e29	return CL_SUCCESS; }
eaf52211	/* * ascii85 inflation * See http://www.piclist.com/techref/method/encode.htm (look for base85) */
1158b285	static cl_error_t filter_ascii85decode(struct pdf_struct pdf, struct pdf_obj obj, struct pdf_token *token)
7ded9e29	{
0018a8e7	uint8_t decoded, dptr;
7ded9e29	uint32_t declen = 0; const uint8_t ptr = (uint8_t )token->content; uint32_t remaining = token->length; int quintet = 0, rc = CL_SUCCESS; uint64_t sum = 0;
0018a8e7	/* 5:4 decoding ratio, with 1:4 expansion sequences => (4length)+1 /
288057e9	if (!(dptr = decoded = (uint8_t )cli_malloc((4 remaining) + 1))) {
7ded9e29	cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n"); return CL_EMEM; }
288057e9	if (cli_memstr((const char *)ptr, remaining, "~>", 2) == NULL)
7ded9e29	cli_dbgmsg("cli_pdf: no EOF marker found\n"); while (remaining > 0) { int byte = (remaining--) ? (int)*ptr++ : EOF;
288057e9	if ((byte == '~') && (remaining > 0) && (*ptr == '>'))
7ded9e29	byte = EOF;
288057e9	if (byte >= '!' && byte <= 'u') {
7ded9e29	sum = (sum * 85) + ((uint32_t)byte - '!');
288057e9	if (++quintet == 5) {
0018a8e7	dptr++ = (unsigned char)(sum >> 24); dptr++ = (unsigned char)((sum >> 16) & 0xFF); dptr++ = (unsigned char)((sum >> 8) & 0xFF); dptr++ = (unsigned char)(sum & 0xFF);
7ded9e29	declen += 4; quintet = 0;
288057e9	sum = 0;
7ded9e29	}
288057e9	} else if (byte == 'z') { if (quintet) {
7ded9e29	cli_dbgmsg("cli_pdf: unexpected 'z'\n"); rc = CL_EFORMAT; break; }
0018a8e7	dptr++ = '\0'; dptr++ = '\0'; dptr++ = '\0'; dptr++ = '\0';
7ded9e29	declen += 4;
288057e9	} else if (byte == EOF) {
02c120e8	cli_dbgmsg("cli_pdf: last quintet contains %d bytes\n", quintet);
288057e9	if (quintet) {
7ded9e29	int i;
288057e9	if (quintet == 1) {
02c120e8	cli_dbgmsg("cli_pdf: invalid last quintet (only 1 byte)\n");
7ded9e29	rc = CL_EFORMAT; break; }
288057e9	for (i = quintet; i < 5; i++)
7ded9e29	sum *= 85;
288057e9	if (quintet > 1)
7ded9e29	sum += (0xFFFFFF >> ((quintet - 2) * 8));
288057e9	for (i = 0; i < quintet - 1; i++)
0018a8e7	dptr++ = (uint8_t)((sum >> (24 - 8 i)) & 0xFF);
288057e9	declen += quintet - 1;
7ded9e29	} break;
288057e9	} else if (!isspace(byte)) {
7ded9e29	cli_dbgmsg("cli_pdf: invalid character 0x%x @ %lu\n",
288057e9	byte & 0xFF, (unsigned long)(token->length - remaining));
7ded9e29	rc = CL_EFORMAT; break; } } if (rc == CL_SUCCESS) { free(token->content); cli_dbgmsg("cli_pdf: deflated %lu bytes from %lu total bytes\n", (unsigned long)declen, (unsigned long)(token->length)); token->content = decoded;
288057e9	token->length = declen;
7ded9e29	} else {
a042e6f0	if (!(obj->flags & ((1 << OBJ_IMAGE) \| (1 << OBJ_TRUNCATED)))) pdfobj_flag(pdf, obj, BAD_ASCIIDECODE);
046d4cc9	cli_dbgmsg("cli_pdf: error occurred parsing byte %lu of %lu\n",
288057e9	(unsigned long)(token->length - remaining), (unsigned long)(token->length));
7ded9e29	free(decoded); } return rc; }
739e5052	/* imported from razorback */
1158b285	static cl_error_t filter_rldecode(struct pdf_struct pdf, struct pdf_obj obj, struct pdf_token *token)
7ded9e29	{
739e5052	uint8_t decoded, temp; uint32_t declen = 0, capacity = 0; uint8_t content = (uint8_t )token->content;
288057e9	uint32_t length = token->length; uint32_t offset = 0; int rc = CL_SUCCESS;
739e5052
d593717b	UNUSEDPARAM(obj);
739e5052	if (!(decoded = cli_calloc(BUFSIZ, sizeof(uint8_t)))) { cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n"); return CL_EMEM; } capacity = BUFSIZ; while (offset < length) { uint8_t srclen = content[offset++]; if (srclen < 128) { /* direct copy of (srclen + 1) bytes */ if (offset + srclen + 1 > length) { cli_dbgmsg("cli_pdf: required source length (%lu) exceeds remaining length (%lu)\n",
288057e9	(long unsigned)(offset + srclen + 1), (long unsigned)(length - offset));
739e5052	rc = CL_EFORMAT; break; } if (declen + srclen + 1 > capacity) {
288057e9	if ((rc = cli_checklimits("pdf", pdf->ctx, capacity + BUFSIZ, 0, 0)) != CL_SUCCESS)
d593717b	break;
739e5052	if (!(temp = cli_realloc(decoded, capacity + BUFSIZ))) { cli_errmsg("cli_pdf: cannot reallocate memory for decoded output\n"); rc = CL_EMEM; break; } decoded = temp; capacity += BUFSIZ; }
288057e9	memcpy(decoded + declen, content + offset, srclen + 1);
739e5052	offset += srclen + 1; declen += srclen + 1; } else if (srclen > 128) { /* copy the next byte (257 - srclen) times */ if (offset + 1 > length) { cli_dbgmsg("cli_pdf: required source length (%lu) exceeds remaining length (%lu)\n",
288057e9	(long unsigned)(offset + srclen + 1), (long unsigned)(length - offset));
739e5052	rc = CL_EFORMAT; break; } if (declen + (257 - srclen) + 1 > capacity) {
288057e9	if ((rc = cli_checklimits("pdf", pdf->ctx, capacity + BUFSIZ, 0, 0)) != CL_SUCCESS)
d593717b	break;
739e5052	if (!(temp = cli_realloc(decoded, capacity + BUFSIZ))) { cli_errmsg("cli_pdf: cannot reallocate memory for decoded output\n"); rc = CL_EMEM; break; } decoded = temp; capacity += BUFSIZ; }
288057e9	memset(decoded + declen, content[offset], 257 - srclen);
739e5052	offset++; declen += 257 - srclen; } else { /* srclen == 128 / / end of data */ cli_dbgmsg("cli_pdf: end-of-stream marker @ offset %lu (%lu bytes remaining)\n",
288057e9	(unsigned long)offset, (long unsigned)(token->length - offset));
739e5052	break; } } if (rc == CL_SUCCESS) { free(token->content);
02c120e8	cli_dbgmsg("cli_pdf: decoded %lu bytes from %lu total bytes\n",
739e5052	(unsigned long)declen, (unsigned long)(token->length)); token->content = decoded;
288057e9	token->length = declen;
739e5052	} else {
046d4cc9	cli_dbgmsg("cli_pdf: error occurred parsing byte %lu of %lu\n",
739e5052	(unsigned long)offset, (unsigned long)(token->length)); free(decoded); } return rc;
7ded9e29	} static uint8_t decode_nextlinestart(uint8_t content, uint32_t length) { uint8_t pt = content; uint32_t r; int toggle = 0; for (r = 0; r < length; r++, pt++) { if (pt == '\n' \|\| *pt == '\r') toggle = 1; else if (toggle) break; } return pt; }
1158b285	static cl_error_t filter_flatedecode(struct pdf_struct pdf, struct pdf_obj obj, struct pdf_dict params, struct pdf_token token)
7ded9e29	{ uint8_t decoded, temp; uint32_t declen = 0, capacity = 0; uint8_t content = (uint8_t )token->content;
288057e9	uint32_t length = token->length;
7ded9e29	z_stream stream;
102cd430	int zstat, rc = CL_SUCCESS;
7ded9e29
eaf52211	UNUSEDPARAM(params);
7ded9e29	if (*content == '\r') { content++; length--;
eaf52211	pdfobj_flag(pdf, obj, BAD_STREAMSTART);
7ded9e29	/* PDF spec says stream is followed by \r\n or \n, but not \r alone. * Sample 0015315109, it has \r followed by zlib header. * Flag pdf as suspicious, and attempt to extract by skipping the \r. */ if (!length)
d593717b	return CL_SUCCESS;
7ded9e29	} if (!(decoded = (uint8_t *)cli_calloc(BUFSIZ, sizeof(uint8_t)))) { cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n"); return CL_EMEM; } capacity = BUFSIZ; memset(&stream, 0, sizeof(stream));
288057e9	stream.next_in = (Bytef )content; stream.avail_in = length; stream.next_out = (Bytef )decoded;
7ded9e29	stream.avail_out = BUFSIZ; zstat = inflateInit(&stream);
288057e9	if (zstat != Z_OK) {
7ded9e29	cli_warnmsg("cli_pdf: inflateInit failed\n"); free(decoded); return CL_EMEM; } /* initial inflate / zstat = inflate(&stream, Z_NO_FLUSH); / check if nothing written whatsoever / if ((zstat != Z_OK) && (stream.avail_out == BUFSIZ)) { / skip till EOL, and try inflating from there, sometimes * PDFs contain extra whitespace / uint8_t q = decode_nextlinestart(content, length); if (q) { (void)inflateEnd(&stream); length -= q - content; content = q;
288057e9	stream.next_in = (Bytef )content; stream.avail_in = length; stream.next_out = (Bytef )decoded;
7ded9e29	stream.avail_out = capacity; zstat = inflateInit(&stream);
288057e9	if (zstat != Z_OK) {
7ded9e29	cli_warnmsg("cli_pdf: inflateInit failed\n"); free(decoded); return CL_EMEM; }
eaf52211	pdfobj_flag(pdf, obj, BAD_FLATESTART);
7ded9e29	} zstat = inflate(&stream, Z_NO_FLUSH); } while (zstat == Z_OK && stream.avail_in) {
d593717b	/* extend output capacity if needed,*/
288057e9	if (stream.avail_out == 0) { if ((rc = cli_checklimits("pdf", pdf->ctx, capacity + BUFSIZ, 0, 0)) != CL_SUCCESS)
d593717b	break; if (!(temp = cli_realloc(decoded, capacity + BUFSIZ))) { cli_errmsg("cli_pdf: cannot reallocate memory for decoded output\n"); rc = CL_EMEM; break;
7ded9e29	}
288057e9	decoded = temp; stream.next_out = decoded + capacity;
d593717b	stream.avail_out = BUFSIZ; declen += BUFSIZ; capacity += BUFSIZ;
7ded9e29	} /* continue inflation / zstat = inflate(&stream, Z_NO_FLUSH); } / add stream end fragment to decoded length / declen += (BUFSIZ - stream.avail_out); / error handling */
288057e9	switch (zstat) { case Z_OK: cli_dbgmsg("cli_pdf: Z_OK on stream inflation completion\n"); /* intentional fall-through */ case Z_STREAM_END: cli_dbgmsg("cli_pdf: inflated %lu bytes from %lu total bytes (%lu bytes remaining)\n", (unsigned long)declen, (unsigned long)(token->length), (unsigned long)(stream.avail_in)); break;
7ded9e29
288057e9	/* potentially fatal - mostly ignored as per older version */ case Z_STREAM_ERROR: case Z_NEED_DICT: case Z_DATA_ERROR: case Z_MEM_ERROR: default: if (stream.msg) cli_dbgmsg("cli_pdf: after writing %lu bytes, got error \"%s\" inflating PDF stream in %u %u obj\n", (unsigned long)declen, stream.msg, obj->id >> 8, obj->id & 0xff); else cli_dbgmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF stream in %u %u obj\n", (unsigned long)declen, zstat, obj->id >> 8, obj->id & 0xff); if (declen == 0) { pdfobj_flag(pdf, obj, BAD_FLATESTART); cli_dbgmsg("cli_pdf: no bytes were inflated.\n"); rc = CL_EFORMAT; } else { pdfobj_flag(pdf, obj, BAD_FLATE); } break;
7ded9e29	} (void)inflateEnd(&stream); if (rc == CL_SUCCESS) { free(token->content); token->content = decoded;
288057e9	token->length = declen;
7ded9e29	} else {
046d4cc9	cli_dbgmsg("cli_pdf: error occurred parsing byte %lu of %lu\n",
288057e9	(unsigned long)(length - stream.avail_in), (unsigned long)(token->length));
7ded9e29	free(decoded); } return rc; }
1158b285	static cl_error_t filter_asciihexdecode(struct pdf_struct pdf, struct pdf_obj obj, struct pdf_token *token)
7ded9e29	{ uint8_t decoded; const uint8_t content = (uint8_t *)token->content;
288057e9	uint32_t length = token->length;
7ded9e29	uint32_t i, j;
1158b285	cl_error_t rc = CL_SUCCESS;
7ded9e29
288057e9	if (!(decoded = (uint8_t *)cli_calloc(length / 2 + 1, sizeof(uint8_t)))) {
7ded9e29	cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n"); return CL_EMEM; }
288057e9	for (i = 0, j = 0; i + 1 < length; i++) {
7ded9e29	if (content[i] == ' ') continue; if (content[i] == '>') break;
288057e9	if (cli_hex2str_to((const char )content + i, (char )decoded + j, 2) == -1) {
7ded9e29	if (length - i < 4) continue; rc = CL_EFORMAT; break; } i++; j++; } if (rc == CL_SUCCESS) { free(token->content); cli_dbgmsg("cli_pdf: deflated %lu bytes from %lu total bytes\n", (unsigned long)j, (unsigned long)(token->length)); token->content = decoded;
288057e9	token->length = j;
7ded9e29	} else {
a042e6f0	if (!(obj->flags & ((1 << OBJ_IMAGE) \| (1 << OBJ_TRUNCATED)))) pdfobj_flag(pdf, obj, BAD_ASCIIDECODE);
046d4cc9	cli_dbgmsg("cli_pdf: error occurred parsing byte %lu of %lu\n",
7ded9e29	(unsigned long)i, (unsigned long)(token->length)); free(decoded); } return rc; }
eaf52211	/* modes: 0 = use default/DecodeParms, 1 = use document setting */
1158b285	static cl_error_t filter_decrypt(struct pdf_struct pdf, struct pdf_obj obj, struct pdf_dict params, struct pdf_token token, int mode)
eaf52211	{ char *decrypted;
288057e9	size_t length = (size_t)token->length;
eaf52211	enum enc_method enc = ENC_IDENTITY; if (mode) enc = get_enc_method(pdf, obj); else if (params) { struct pdf_dict_node node = params->nodes; while (node) { if (node->type == PDF_DICT_STRING) { if (!strncmp(node->key, "/Type", 6)) { / optional field - Type / / MUST be "CryptFilterDecodeParms" */
e8a23886	if (node->value) cli_dbgmsg("cli_pdf: Type: %s\n", (char *)(node->value));
eaf52211	} else if (!strncmp(node->key, "/Name", 6)) { /* optional field - Name / / overrides document and default encryption method */
e8a23886	if (node->value) cli_dbgmsg("cli_pdf: Name: %s\n", (char *)(node->value));
1d0cdc67	enc = parse_enc_method(pdf->CF, pdf->CF_n, (char *)(node->value), enc);
eaf52211	} } node = node->next; } }
fdcf5109	decrypted = decrypt_any(pdf, obj->id, (const char *)token->content, &length, enc);
eaf52211	if (!decrypted) { cli_dbgmsg("cli_pdf: failed to decrypt stream\n");
d593717b	return CL_EPARSE; /* TODO: what should this value be? CL_SUCCESS would mirror previous behavior */
eaf52211	}
e09d8843	cli_dbgmsg("cli_pdf: decrypted %zu bytes from %u total bytes\n", length, token->length);
eaf52211	free(token->content); token->content = (uint8_t *)decrypted;
288057e9	token->length = (uint32_t)length; /* this may truncate unfortunately, TODO: use 64-bit values internally? */
eaf52211	return CL_SUCCESS; }
e8a23886
1158b285	static cl_error_t filter_lzwdecode(struct pdf_struct pdf, struct pdf_obj obj, struct pdf_dict params, struct pdf_token token)
e8a23886	{ uint8_t decoded, temp; uint32_t declen = 0, capacity = 0; uint8_t content = (uint8_t )token->content;
288057e9	uint32_t length = token->length;
e8a23886	lzw_stream stream;
102cd430	int echg = 1, lzwstat, rc = CL_SUCCESS;
e8a23886
ce3cf4c6	if (pdf->ctx && !(pdf->ctx->dconf->other & OTHER_CONF_LZW)) return CL_BREAK;
e8a23886	if (params) { struct pdf_dict_node node = params->nodes; while (node) { if (node->type == PDF_DICT_STRING) { if (!strncmp(node->key, "/EarlyChange", 13)) { / optional field - lzw flag / char end, value = (char )node->value; long set; if (value) { cli_dbgmsg("cli_pdf: EarlyChange: %s\n", value); set = strtol(value, &end, 10); if (end != value) echg = (int)set; } } } node = node->next; } } if (content == '\r') { content++; length--; pdfobj_flag(pdf, obj, BAD_STREAMSTART); / PDF spec says stream is followed by \r\n or \n, but not \r alone. * Sample 0015315109, it has \r followed by zlib header. * Flag pdf as suspicious, and attempt to extract by skipping the \r. / if (!length) return CL_SUCCESS; } if (!(decoded = (uint8_t )cli_calloc(BUFSIZ, sizeof(uint8_t)))) { cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n"); return CL_EMEM; } capacity = BUFSIZ; memset(&stream, 0, sizeof(stream));
288057e9	stream.next_in = content; stream.avail_in = length; stream.next_out = decoded;
e8a23886	stream.avail_out = BUFSIZ;
5c291512	if (echg) stream.flags \|= LZW_FLAG_EARLYCHG;
e8a23886
5c291512	lzwstat = lzwInit(&stream);
288057e9	if (lzwstat != Z_OK) {
e8a23886	cli_warnmsg("cli_pdf: lzwInit failed\n"); free(decoded); return CL_EMEM; } /* initial inflate / lzwstat = lzwInflate(&stream); / check if nothing written whatsoever / if ((lzwstat != Z_OK) && (stream.avail_out == BUFSIZ)) { / skip till EOL, and try inflating from there, sometimes * PDFs contain extra whitespace / uint8_t q = decode_nextlinestart(content, length); if (q) { (void)lzwInflateEnd(&stream); length -= q - content; content = q;
288057e9	stream.next_in = (Bytef )content; stream.avail_in = length; stream.next_out = (Bytef )decoded;
e8a23886	stream.avail_out = capacity;
5c291512	lzwstat = lzwInit(&stream);
288057e9	if (lzwstat != Z_OK) {
e8a23886	cli_warnmsg("cli_pdf: lzwInit failed\n"); free(decoded); return CL_EMEM; } pdfobj_flag(pdf, obj, BAD_FLATESTART); } lzwstat = lzwInflate(&stream); } while (lzwstat == Z_OK && stream.avail_in) { /* extend output capacity if needed,*/
288057e9	if (stream.avail_out == 0) { if ((rc = cli_checklimits("pdf", pdf->ctx, capacity + BUFSIZ, 0, 0)) != CL_SUCCESS)
e8a23886	break; if (!(temp = cli_realloc(decoded, capacity + BUFSIZ))) { cli_errmsg("cli_pdf: cannot reallocate memory for decoded output\n"); rc = CL_EMEM; break; }
288057e9	decoded = temp; stream.next_out = decoded + capacity;
e8a23886	stream.avail_out = BUFSIZ; declen += BUFSIZ; capacity += BUFSIZ; } /* continue inflation / lzwstat = lzwInflate(&stream); } / add stream end fragment to decoded length / declen += (BUFSIZ - stream.avail_out); / error handling */
288057e9	switch (lzwstat) { case LZW_OK: cli_dbgmsg("cli_pdf: LZW_OK on stream inflation completion\n"); /* intentional fall-through */ case LZW_STREAM_END: cli_dbgmsg("cli_pdf: inflated %lu bytes from %lu total bytes (%lu bytes remaining)\n", (unsigned long)declen, (unsigned long)(token->length), (unsigned long)(stream.avail_in)); break;
e8a23886
288057e9	/* potentially fatal - mostly ignored as per older version */ case LZW_STREAM_ERROR: case LZW_DATA_ERROR: case LZW_MEM_ERROR: case LZW_BUF_ERROR: case LZW_DICT_ERROR: default: if (stream.msg) cli_dbgmsg("cli_pdf: after writing %lu bytes, got error \"%s\" inflating PDF stream in %u %u obj\n", (unsigned long)declen, stream.msg, obj->id >> 8, obj->id & 0xff); else cli_dbgmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF stream in %u %u obj\n", (unsigned long)declen, lzwstat, obj->id >> 8, obj->id & 0xff); if (declen == 0) { pdfobj_flag(pdf, obj, BAD_FLATESTART); cli_dbgmsg("cli_pdf: no bytes were inflated.\n"); rc = CL_EFORMAT; } else { pdfobj_flag(pdf, obj, BAD_FLATE); } break;
e8a23886	} (void)lzwInflateEnd(&stream); if (rc == CL_SUCCESS) { free(token->content); token->content = decoded;
288057e9	token->length = declen;
e8a23886	} else {
046d4cc9	cli_dbgmsg("cli_pdf: error occurred parsing byte %lu of %lu\n",
288057e9	(unsigned long)(length - stream.avail_in), (unsigned long)(token->length));
e8a23886	free(decoded); }
567c73ec	/* heuristic checks: - full dictionary heuristics? - invalid code points? */
5c291512
e8a23886	return rc; }