libclamav/pdf.c
d056cc17
 /*
6ff4e486
  *  Copyright (C) 2007-2008, 2010 Sourcefire, Inc.
2023340a
  *
6ff4e486
  *  Authors: Nigel Horne, Török Edvin
  *
  *  Also based on Matt Olney's pdf parser in snort-nrt.
d056cc17
  *
  *  This program is free software; you can redistribute it and/or modify
2023340a
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
d056cc17
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
2023340a
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
1eceda0e
  *
  * TODO: Embedded fonts
  * TODO: Predictor image handling
d056cc17
  */
95e11e5a
 static	char	const	rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $";
d056cc17
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
240d3307
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <ctype.h>
 #include <string.h>
 #include <fcntl.h>
 #include <stdlib.h>
511a59c7
 #include <errno.h>
ed6446ff
 #ifdef	HAVE_LIMITS_H
 #include <limits.h>
 #endif
9443ec4a
 #ifdef	HAVE_UNISTD_H
 #include <unistd.h>
 #endif
240d3307
 #include <zlib.h>
 
ed6446ff
 #include "clamav.h"
 #include "others.h"
654c0b96
 #include "pdf.h"
a5afcb67
 #include "scanners.h"
747c2055
 #include "fmap.h"
f461d74f
 #include "str.h"
dc200c6b
 #include "bytecode.h"
 #include "bytecode_api.h"
7606789f
 #include "md5.h"
 #include "arc4.h"
374be101
 #include "rijndael.h"
7606789f
 #include "sha256.h"
7719760b
 #include "textnorm.h"
240d3307
 
4956690d
 
1eceda0e
 #ifdef	CL_DEBUG
5cd3f734
 /*#define	SAVE_TMP	
  *Save the file being worked on in tmp */
1eceda0e
 #endif
 
6e33139f
 static	int	asciihexdecode(const char *buf, off_t len, char *output);
5aad11ce
 static	int	ascii85decode(const char *buf, off_t len, unsigned char *output);
bce73fe9
 static	const	char	*pdf_nextlinestart(const char *ptr, size_t len);
ef8219b8
 static	const	char	*pdf_nextobject(const char *ptr, size_t len);
da653b74
 
e7a27135
 static int xrefCheck(const char *xref, const char *eof)
 {
     const char *q;
5aad11ce
     while (xref < eof && (*xref == ' ' || *xref == '\n' || *xref == '\r'))
e7a27135
 	xref++;
     if (xref + 4 >= eof)
 	return -1;
     if (!memcmp(xref, "xref", 4)) {
 	cli_dbgmsg("cli_pdf: found xref\n");
 	return 0;
     }
     /* could be xref stream */
     for (q=xref; q+5 < eof; q++) {
 	if (!memcmp(q,"/XRef",4)) {
 	    cli_dbgmsg("cli_pdf: found /XRef\n");
 	    return 0;
 	}
     }
     return -1;
 }
 
374be101
 enum enc_method {
     ENC_UNKNOWN,
     ENC_NONE,
bcc68567
     ENC_IDENTITY,
374be101
     ENC_V2,
bbfad9ba
     ENC_AESV2,
     ENC_AESV3
374be101
 };
 
e7a27135
 struct pdf_struct {
     struct pdf_obj *objs;
     unsigned nobjs;
2545f976
     unsigned flags;
bcc68567
     unsigned enc_method_stream;
     unsigned enc_method_string;
     unsigned enc_method_embeddedfile;
27c8b02b
     const char *CF;
     long CF_n;
e7a27135
     const char *map;
     off_t size;
     off_t offset;
dc200c6b
     off_t startoff;
3643f3d2
     cli_ctx *ctx;
     const char *dir;
     unsigned files;
7606789f
     uint32_t enc_objid;
     char *fileID;
     unsigned fileIDlen;
     char *key;
     unsigned keylen;
e7a27135
 };
 
4956690d
 /* define this to be noisy about things that we can't parse properly */
 /*#define NOISY*/
 
 #ifdef NOISY
 #define noisy_msg(pdf, ...) cli_infomsg(pdf->ctx, __VA_ARGS__)
 #define noisy_warnmsg cli_warnmsg
 #else
 #define noisy_msg (void)
 #define noisy_warnmsg (void)
 #endif
 
e7a27135
 static const char *findNextNonWSBack(const char *q, const char *start)
 {
     while (q > start &&
 	   (*q == 0 || *q == 9 || *q == 0xa || *q == 0xc || *q == 0xd || *q == 0x20))
     {
 	q--;
     }
     return q;
 }
 
8db140ff
 static int find_stream_bounds(const char *start, off_t bytesleft, off_t bytesleft2, off_t *stream, off_t *endstream,
 			      int newline_hack)
3643f3d2
 {
     const char *q2, *q;
     if ((q2 = cli_memstr(start, bytesleft, "stream", 6))) {
 	q2 += 6;
019f1955
 	bytesleft -= q2 - start;
4619289a
 	if (bytesleft < 0)
019f1955
 	    return 0;
8db140ff
 	if (bytesleft >= 2 && q2[0] == '\xd' && q2[1] == '\xa') {
3643f3d2
 	    q2 += 2;
8db140ff
 	    if (newline_hack && q2[0] == '\xa')
 		q2++;
 	} else if (q2[0] == '\xa')
3643f3d2
 	    q2++;
 	*stream = q2 - start;
ab564992
 	bytesleft2 -= q2 - start;
4619289a
 	if (bytesleft2 <= 0)
dc5143b4
 	    return 0;
3643f3d2
 	q = q2;
ab564992
 	q2 = cli_memstr(q, bytesleft2, "endstream", 9);
3643f3d2
 	if (!q2)
9acc81d6
 	    q2 = q + bytesleft2-9; /* till EOF */
3643f3d2
 	*endstream = q2 - start;
b220bb30
 	if (*endstream < *stream)
 	    *endstream = *stream;
3643f3d2
 	return 1;
     }
     return 0;
 }
 
693757a1
 /* Expected returns: 1 if success, 0 if no more objects, -1 if error */
6c135eb4
 static int pdf_findobj(struct pdf_struct *pdf)
e7a27135
 {
3643f3d2
     const char *start, *q, *q2, *q3, *eof;
e7a27135
     struct pdf_obj *obj;
     off_t bytesleft;
     unsigned genid, objid;
 
     pdf->nobjs++;
     pdf->objs = cli_realloc2(pdf->objs, sizeof(*pdf->objs)*pdf->nobjs);
     if (!pdf->objs) {
5aad11ce
 	cli_warnmsg("cli_pdf: out of memory parsing objects (%u)\n", pdf->nobjs);
e7a27135
 	return -1;
     }
     obj = &pdf->objs[pdf->nobjs-1];
ab564992
     memset(obj, 0, sizeof(*obj));
e7a27135
     start = pdf->map+pdf->offset;
     bytesleft = pdf->size - pdf->offset;
bdbae203
     while (bytesleft > 0) {
 	q2 = cli_memstr(start, bytesleft, "obj", 3);
 	if (!q2)
 	    return 0;/* no more objs */
 	q2--;
 	bytesleft -= q2 - start;
 	if (*q2 != 0 && *q2 != 9 && *q2 != 0xa && *q2 != 0xc && *q2 != 0xd && *q2 != 0x20) {
 	    start = q2+4;
 	    bytesleft -= 4;
 	    continue;
 	}
 	break;
     }
     if (bytesleft <= 0)
 	return 0;
 
e7a27135
     q = findNextNonWSBack(q2-1, start);
     while (q > start && isdigit(*q)) { q--; }
     genid = atoi(q);
     q = findNextNonWSBack(q-1,start);
     while (q > start && isdigit(*q)) { q--; }
     objid = atoi(q);
     obj->id = (objid << 8) | (genid&0xff);
     obj->start = q2+4 - pdf->map;
     obj->flags = 0;
     bytesleft -= 4;
     eof = pdf->map + pdf->size;
     q = pdf->map + obj->start;
     while (q < eof && bytesleft > 0) {
3643f3d2
 	off_t p_stream, p_endstream;
e7a27135
 	q2 = pdf_nextobject(q, bytesleft);
 	if (!q2)
9acc81d6
 	    q2 = pdf->map + pdf->size;
e7a27135
 	bytesleft -= q2 - q;
8db140ff
 	if (find_stream_bounds(q-1, q2-q, bytesleft + (q2-q), &p_stream, &p_endstream, 1)) {
e7a27135
 	    obj->flags |= 1 << OBJ_STREAM;
9acc81d6
 	    q2 = q-1 + p_endstream + 9;
3643f3d2
 	    bytesleft -= q2 - q + 1;
9acc81d6
 	    if (bytesleft < 0) {
 		obj->flags |= 1 << OBJ_TRUNCATED;
 		pdf->offset = pdf->size;
 		return 1;/* truncated */
 	    }
3643f3d2
 	} else if ((q3 = cli_memstr(q-1, q2-q+1, "endobj", 6))) {
 	    q2 = q3 + 6;
e7a27135
 	    pdf->offset = q2 - pdf->map;
 	    return 1; /* obj found and offset positioned */
 	} else {
6c135eb4
 	    q2++;
cacd0927
 	    bytesleft--;
e7a27135
 	}
 	q = q2;
     }
9acc81d6
     obj->flags |= 1 << OBJ_TRUNCATED;
     pdf->offset = pdf->size;
     return 1;/* truncated */
e7a27135
 }
 
3643f3d2
 static int filter_writen(struct pdf_struct *pdf, struct pdf_obj *obj,
6e33139f
 			 int fout, const char *buf, off_t len, off_t *sum)
3643f3d2
 {
     if (cli_checklimits("pdf", pdf->ctx, *sum, 0, 0))
 	return len; /* pretend it was a successful write to suppress CL_EWRITE */
     *sum += len;
     return cli_writen(fout, buf, len);
 }
 
eb270d5a
 static void pdfobj_flag(struct pdf_struct *pdf, struct pdf_obj *obj, enum pdf_flag flag)
 {
     const char *s= "";
     pdf->flags |= 1 << flag;
     if (!cli_debug_flag)
 	return;
     switch (flag) {
 	case UNTERMINATED_OBJ_DICT:
 	    s = "dictionary not terminated";
 	    break;
 	case ESCAPED_COMMON_PDFNAME:
 	    /* like /JavaScript */
 	    s = "escaped common pdfname";
 	    break;
 	case BAD_STREAM_FILTERS:
 	    s = "duplicate stream filters";
 	    break;
 	case BAD_PDF_VERSION:
 	    s = "bad pdf version";
 	    break;
 	case BAD_PDF_HEADERPOS:
 	    s = "bad pdf header position";
 	    break;
 	case BAD_PDF_TRAILER:
 	    s = "bad pdf trailer";
 	    break;
 	case BAD_PDF_TOOMANYOBJS:
 	    s = "too many pdf objs";
 	    break;
 	case BAD_FLATE:
 	    s = "bad deflate stream";
 	    break;
f984f75b
 	case BAD_FLATESTART:
 	    s = "bad deflate stream start";
 	    break;
eb270d5a
 	case BAD_STREAMSTART:
 	    s = "bad stream start";
 	    break;
 	case UNKNOWN_FILTER:
 	    s = "unknown filter used";
 	    break;
 	case BAD_ASCIIDECODE:
 	    s = "bad ASCII decode";
 	    break;
 	case HEX_JAVASCRIPT:
 	    s = "hex javascript";
 	    break;
 	case BAD_INDOBJ:
 	    s = "referencing nonexistent obj";
 	    break;
 	case HAS_OPENACTION:
 	    s = "has /OpenAction";
 	    break;
c16b3abb
 	case HAS_LAUNCHACTION:
 	    s = "has /LaunchAction";
 	    break;
eb270d5a
 	case BAD_STREAMLEN:
 	    s = "bad /Length, too small";
 	    break;
f984f75b
 	case ENCRYPTED_PDF:
 	    s = "PDF is encrypted";
 	    break;
cacd0927
 	case LINEARIZED_PDF:
 	    s = "linearized PDF";
 	    break;
9acc81d6
 	case MANY_FILTERS:
 	    s = "more than 2 filters per obj";
 	    break;
374b5aea
 	case DECRYPTABLE_PDF:
 	    s = "decryptable PDF";
 	    break;
eb270d5a
     }
f984f75b
     cli_dbgmsg("cli_pdf: %s flagged in object %u %u\n", s, obj->id>>8, obj->id&0xff);
eb270d5a
 }
 
3643f3d2
 static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj,
6e33139f
 			      const char *buf, off_t len, int fout, off_t *sum)
3643f3d2
 {
38c9fc17
     int skipped = 0;
5aad11ce
     int zstat;
3643f3d2
     z_stream stream;
     off_t nbytes;
6e33139f
     char output[BUFSIZ];
3643f3d2
 
     if (len == 0)
 	return CL_CLEAN;
eb270d5a
 
     if (*buf == '\r') {
 	buf++;
 	len--;
 	pdfobj_flag(pdf, obj, BAD_STREAMSTART);
 	/* PDF spec says stream is followed by \r\n or \n, but not \r alone.
 	 * Sample 0015315109, it has \r followed by zlib header.
 	 * Flag pdf as suspicious, and attempt to extract by skipping the \r.
 	 */
 	if (!len)
 	    return CL_CLEAN;
     }
 
3643f3d2
     memset(&stream, 0, sizeof(stream));
     stream.next_in = (Bytef *)buf;
     stream.avail_in = len;
6e33139f
     stream.next_out = (Bytef *)output;
3643f3d2
     stream.avail_out = sizeof(output);
 
     zstat = inflateInit(&stream);
     if(zstat != Z_OK) {
 	cli_warnmsg("cli_pdf: inflateInit failed\n");
 	return CL_EMEM;
     }
 
     nbytes = 0;
     while(stream.avail_in) {
89590e99
 	int written;
3643f3d2
 	zstat = inflate(&stream, Z_NO_FLUSH);	/* zlib */
 	switch(zstat) {
 	    case Z_OK:
 		if(stream.avail_out == 0) {
 		    if ((written=filter_writen(pdf, obj, fout, output, sizeof(output), sum))!=sizeof(output)) {
 			cli_errmsg("cli_pdf: failed to write output file\n");
 			inflateEnd(&stream);
 			return CL_EWRITE;
 		    }
 		    nbytes += written;
6e33139f
 		    stream.next_out = (Bytef *)output;
3643f3d2
 		    stream.avail_out = sizeof(output);
 		}
 		continue;
 	    case Z_STREAM_END:
 	    default:
89590e99
 		written = sizeof(output) - stream.avail_out;
38c9fc17
 		if (!written && !nbytes && !skipped) {
 		    /* skip till EOL, and try inflating from there, sometimes
 		     * PDFs contain extra whitespace */
 		    const char *q = pdf_nextlinestart(buf, len);
 		    if (q) {
 			skipped = 1;
 			inflateEnd(&stream);
 			len -= q - buf;
019f1955
 			buf = q;
38c9fc17
 			stream.next_in = (Bytef *)buf;
 			stream.avail_in = len;
 			stream.next_out = (Bytef *)output;
 			stream.avail_out = sizeof(output);
 			zstat = inflateInit(&stream);
 			if(zstat != Z_OK) {
 			    cli_warnmsg("cli_pdf: inflateInit failed\n");
 			    return CL_EMEM;
 			}
 			pdfobj_flag(pdf, obj, BAD_FLATESTART);
 			continue;
 		    }
 		}
 
89590e99
 		if (filter_writen(pdf, obj, fout, output, written, sum)!=written) {
 		    cli_errmsg("cli_pdf: failed to write output file\n");
 		    inflateEnd(&stream);
 		    return CL_EWRITE;
 		}
 		nbytes += written;
 		stream.next_out = (Bytef *)output;
 		stream.avail_out = sizeof(output);
76cdacdd
 		if (zstat == Z_STREAM_END)
 		    break;
89590e99
 
3643f3d2
 		if(stream.msg)
 		    cli_dbgmsg("cli_pdf: after writing %lu bytes, got error \"%s\" inflating PDF stream in %u %u obj\n",
 			       (unsigned long)nbytes,
 			       stream.msg, obj->id>>8, obj->id&0xff);
 		else
 		    cli_dbgmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF stream in %u %u obj\n",
 			       (unsigned long)nbytes, zstat, obj->id>>8, obj->id&0xff);
4956690d
 		if(stream.msg)
 		    noisy_warnmsg("cli_pdf: after writing %lu bytes, got error \"%s\" inflating PDF stream in %u %u obj\n",
 			       (unsigned long)nbytes,
 			       stream.msg, obj->id>>8, obj->id&0xff);
 		else
 		    noisy_warnmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF stream in %u %u obj\n",
 			       (unsigned long)nbytes, zstat, obj->id>>8, obj->id&0xff);
f984f75b
 		/* mark stream as bad only if not encrypted */
3643f3d2
 		inflateEnd(&stream);
f984f75b
 		if (!nbytes) {
 		    pdfobj_flag(pdf, obj, BAD_FLATESTART);
f4819816
                     cli_dbgmsg("filter_flatedecode: No bytes, returning CL_EFORMAT for this stream.\n");
3a0e133b
                     return CL_EFORMAT;
f984f75b
 		} else {
 		    pdfobj_flag(pdf, obj, BAD_FLATE);
 		}
3643f3d2
 		return CL_CLEAN;
 	}
 	break;
     }
 
     if(stream.avail_out != sizeof(output)) {
 	if(filter_writen(pdf, obj, fout, output, sizeof(output) - stream.avail_out, sum) < 0) {
 	    cli_errmsg("cli_pdf: failed to write output file\n");
 	    inflateEnd(&stream);
 	    return CL_EWRITE;
 	}
     }
 
     inflateEnd(&stream);
     return CL_CLEAN;
 }
 
 static struct pdf_obj *find_obj(struct pdf_struct *pdf,
 				struct pdf_obj *obj, uint32_t objid)
 {
5aad11ce
     unsigned j;
     unsigned i;
 
     /* search starting at previous obj (if exists) */
     if (obj != pdf->objs)
 	i = obj - pdf->objs;
     else
 	i = 0;
3643f3d2
     for (j=i;j<pdf->nobjs;j++) {
 	obj = &pdf->objs[j];
 	if (obj->id == objid)
 	    return obj;
     }
     /* restart search from beginning if not found */
     for (j=0;j<i;j++) {
 	obj = &pdf->objs[j];
 	if (obj->id == objid)
 	    return obj;
     }
     return NULL;
 }
 
 static int find_length(struct pdf_struct *pdf,
 		       struct pdf_obj *obj,
 		       const char *start, off_t len)
 {
     int length;
     const char *q;
     q = cli_memstr(start, len, "/Length", 7);
     if (!q)
 	return 0;
     q++;
     len -= q - start;
     start = pdf_nextobject(q, len);
     if (!start)
 	return 0;
6e33139f
     /* len -= start - q; */
3643f3d2
     q = start;
     length = atoi(q);
     while (isdigit(*q)) q++;
     if (*q == ' ') {
 	int genid;
 	q++;
 	genid = atoi(q);
 	while(isdigit(*q)) q++;
 	if (q[0] == ' ' && q[1] == 'R') {
 	    cli_dbgmsg("cli_pdf: length is in indirect object %u %u\n", length, genid);
 	    obj = find_obj(pdf, obj, (length << 8) | (genid&0xff));
 	    if (!obj) {
 		cli_dbgmsg("cli_pdf: indirect object not found\n");
 		return 0;
 	    }
 	    q = pdf_nextobject(pdf->map+obj->start, pdf->size - obj->start);
019f1955
 	    if (!q) {
 		cli_dbgmsg("cli_pdf: next object not found\n");
 		return 0;
 	    }
3643f3d2
 	    length = atoi(q);
 	}
     }
a9d034ee
     /* limit length */
     if (start - pdf->map + length+5 > pdf->size) {
 	length = pdf->size - (start - pdf->map)-5;
     }
3643f3d2
     return length;
 }
 
7719760b
 #define DUMP_MASK ((1 << OBJ_CONTENTS) | (1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_DCT) | (1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_A85) | (1 << OBJ_EMBEDDED_FILE) | (1 << OBJ_JAVASCRIPT) | (1 << OBJ_OPENACTION) | (1 << OBJ_LAUNCHACTION))
ab564992
 
eb270d5a
 static int obj_size(struct pdf_struct *pdf, struct pdf_obj *obj, int binary)
ab564992
 {
5aad11ce
     unsigned i = obj - pdf->objs;
ab564992
     i++;
     if (i < pdf->nobjs) {
dd002dfa
 	int s = pdf->objs[i].start - obj->start - 4;
eb270d5a
 	if (s > 0) {
 	    if (!binary) {
 		const char *p = pdf->map + obj->start;
 		const char *q = p + s;
 		while (q > p && (isspace(*q) || isdigit(*q)))
 		       q--;
 		if (q > p+5 && !memcmp(q-5,"endobj",6))
 		    q -= 6;
 		q = findNextNonWSBack(q, p);
 		q++;
 		return q - p;
 	    }
dd002dfa
 	    return s;
eb270d5a
 	}
ab564992
     }
eb270d5a
     if (binary)
 	return pdf->size - obj->start;
     return pdf->offset - obj->start - 6;
ab564992
 }
 
dc200c6b
 static int run_pdf_hooks(struct pdf_struct *pdf, enum pdf_phase phase, int fd,
 			 int dumpid)
 {
     int ret;
     struct cli_bc_ctx *bc_ctx;
     cli_ctx *ctx = pdf->ctx;
     fmap_t *map;
 
     bc_ctx = cli_bytecode_context_alloc();
     if (!bc_ctx) {
 	cli_errmsg("cli_pdf: can't allocate memory for bc_ctx");
 	return CL_EMEM;
     }
 
     map = *ctx->fmap;
     if (fd != -1) {
 	map = fmap(fd, 0, 0);
 	if (!map) {
 	    cli_warnmsg("can't mmap pdf extracted obj\n");
 	    map = *ctx->fmap;
 	    fd = -1;
 	}
     }
     cli_bytecode_context_setpdf(bc_ctx, phase, pdf->nobjs, pdf->objs,
 				&pdf->flags, pdf->size, pdf->startoff);
     cli_bytecode_context_setctx(bc_ctx, ctx);
6ad45a29
     ret = cli_bytecode_runhook(ctx, ctx->engine, bc_ctx, BC_PDF, map);
dc200c6b
     cli_bytecode_context_destroy(bc_ctx);
     if (fd != -1) {
 	funmap(map);
     }
     return ret;
 }
 
bbfad9ba
 static void dbg_printhex(const char *msg, const char *hex, unsigned len);
 static void aes_decrypt(const unsigned char *in, off_t *length, unsigned char *q, char *key, unsigned key_n, int has_iv)
374be101
 {
     unsigned long rk[RKLENGTH(256)];
     unsigned char iv[16];
     unsigned len = *length;
     unsigned char pad, i;
21a33457
     int nrounds;
374be101
 
3afedd07
     cli_dbgmsg("cli_pdf: aes_decrypt: key length: %d, data length: %d\n", key_n, (int)*length);
374be101
     if (key_n > 32) {
3afedd07
 	cli_dbgmsg("cli_pdf: aes_decrypt: key length is %d!\n", key_n*8);
374be101
 	return;
     }
     if (len < 32) {
bbfad9ba
 	cli_dbgmsg("cli_pdf: aes_decrypt: len is <32: %d\n", len);
4956690d
 	noisy_warnmsg("cli_pdf: aes_decrypt: len is <32: %d\n", len);
374be101
 	return;
     }
bbfad9ba
     if (has_iv) {
 	memcpy(iv, in, 16);
 	in += 16;
 	len -= 16;
     } else
 	memset(iv, 0, sizeof(iv));
374be101
 
22ee81d0
     cli_dbgmsg("aes_decrypt: Calling rijndaelSetupDecrypt\n");
21a33457
     nrounds = rijndaelSetupDecrypt(rk, key, key_n*8);
22ee81d0
     cli_dbgmsg("aes_decrypt: Beginning rijndaelDecrypt\n");
374be101
     while (len >= 16) {
 	unsigned i;
 	rijndaelDecrypt(rk, nrounds, in, q);
 	for (i=0;i<16;i++)
 	    q[i] ^= iv[i];
 	memcpy(iv, in, 16);
 	q += 16;
 	in += 16;
 	len -= 16;
     }
bbfad9ba
     if (has_iv) {
 	len += 16;
 	pad = q[-1];
 	if (pad > 0x10) {
bcc68567
 	    cli_dbgmsg("cli_pdf: aes_decrypt: bad pad: %x (extra len: %d)\n", pad, len-16);
4956690d
 	    noisy_warnmsg("cli_pdf: aes_decrypt: bad pad: %x (extra len: %d)\n", pad, len-16);
bbfad9ba
 	    *length -= len;
374be101
 	    return;
 	}
bbfad9ba
 	q -= pad;
 	for (i=1;i<pad;i++) {
 	    if (q[i] != pad) {
 		cli_dbgmsg("cli_pdf: aes_decrypt: bad pad: %x != %x\n",q[i],pad);
4956690d
 		noisy_warnmsg("cli_pdf: aes_decrypt: bad pad: %x != %x\n",q[i],pad);
bbfad9ba
 		*length -= len;
 		return;
 	    }
 	}
 	len += pad;
374be101
     }
     *length -= len;
3afedd07
     cli_dbgmsg("cli_pdf: aes_decrypt: length is %d\n", (int)*length);
374be101
 }
 
 
bcc68567
 static char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, off_t *length,
 			 enum enc_method enc_method)
374be101
 {
     unsigned char *key, *q, result[16];
     unsigned n;
     cli_md5_ctx md5;
     struct arc4_state arc4;
 
4956690d
     if (!length || !*length || !in) {
 	noisy_warnmsg("decrypt failed for obj %u %u\n", id>>8, id&0xff);
374be101
 	return NULL;
4956690d
     }
374be101
     n = pdf->keylen + 5;
bcc68567
     if (enc_method == ENC_AESV2)
374be101
 	n += 4;
     key = cli_malloc(n);
4956690d
     if (!key) {
 	noisy_warnmsg("decrypt_any: malloc failed\n");
374be101
 	return NULL;
4956690d
     }
374be101
 
     memcpy(key, pdf->key, pdf->keylen);
     q = key + pdf->keylen;
     *q++ = id >> 8;
     *q++ = id >> 16;
     *q++ = id >> 24;
     *q++ = id;
     *q++ = 0;
bcc68567
     if (enc_method == ENC_AESV2)
374be101
 	memcpy(q, "sAlT", 4);
     cli_md5_init(&md5);
     cli_md5_update(&md5, key, n);
     cli_md5_final(result, &md5);
bbfad9ba
     free(key);
 
374be101
     n = pdf->keylen + 5;
     if (n > 16)
 	n = 16;
 
     q = cli_malloc(*length);
4956690d
     if (!q) {
 	noisy_warnmsg("decrypt_any: malloc failed\n");
374be101
 	return NULL;
4956690d
     }
374be101
 
bcc68567
     switch (enc_method) {
374be101
 	case ENC_V2:
bbfad9ba
 	    cli_dbgmsg("cli_pdf: enc is v2\n");
374be101
 	    memcpy(q, in, *length);
 	    arc4_init(&arc4, result, n);
 	    arc4_apply(&arc4, q, *length);
4956690d
 	    noisy_msg(pdf, "decrypted ARC4 data\n");
374be101
 	    break;
 	case ENC_AESV2:
bbfad9ba
 	    cli_dbgmsg("cli_pdf: enc is aesv2\n");
 	    aes_decrypt(in, length, q, result, n, 1);
4956690d
 	    noisy_msg(pdf, "decrypted AES(v2) data\n");
bbfad9ba
 	    break;
 	case ENC_AESV3:
 	    cli_dbgmsg("cli_pdf: enc is aesv3\n");
22ee81d0
 	    if (pdf->keylen == 0) {
 	        cli_dbgmsg("cli_pdf: no key\n");
 	        return NULL;
 	    }
bbfad9ba
 	    aes_decrypt(in, length, q, pdf->key, pdf->keylen, 1);
4956690d
 	    noisy_msg(pdf, "decrypted AES(v3) data\n");
374be101
 	    break;
bcc68567
 	case ENC_IDENTITY:
 	    cli_dbgmsg("cli_pdf: enc is identity\n");
 	    memcpy(q, in, *length);
4956690d
 	    noisy_msg(pdf, "identity encryption\n");
bcc68567
 	    break;
374be101
 	case ENC_NONE:
bbfad9ba
 	    cli_dbgmsg("cli_pdf: enc is none\n");
4956690d
 	    noisy_msg(pdf, "encryption is none\n");
374be101
 	    free(q);
 	    return NULL;
 	case ENC_UNKNOWN:
bbfad9ba
 	    cli_dbgmsg("cli_pdf: enc is unknown\n");
374be101
 	    free(q);
4956690d
 	    noisy_warnmsg("decrypt_any: unknown encryption method for obj %u %u\n",
 		       id>>8,id&0xff);
374be101
 	    return NULL;
     }
     return q;
 }
 
bcc68567
 static enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj)
 {
     if (obj->flags & (1 << OBJ_EMBEDDED_FILE))
 	return pdf->enc_method_embeddedfile;
     if (obj->flags & (1 << OBJ_STREAM))
 	return pdf->enc_method_stream;
     return pdf->enc_method_string;
 }
 
7719760b
 enum cstate {
     CSTATE_NONE,
     CSTATE_TJ,
     CSTATE_TJ_PAROPEN
 };
 
 static void process(struct text_norm_state *s, enum cstate *st, const char *buf, int length, int fout)
 {
     do {
 	switch (*st) {
 	    case CSTATE_NONE:
 		if (*buf == '[') *st = CSTATE_TJ;
 		else {
 		    const char *nl = memchr(buf, '\n', length);
 		    if (!nl)
 			return;
 		    length -= nl - buf;
 		    buf = nl;
 		}
 		break;
 	    case CSTATE_TJ:
 		if (*buf == '(') *st = CSTATE_TJ_PAROPEN;
 		break;
 	    case CSTATE_TJ_PAROPEN:
 		if (*buf == ')') *st = CSTATE_TJ;
 		else {
 		    if (text_normalize_buffer(s, buf, 1) != 1) {
 			cli_writen(fout, s->out, s->out_pos);
 			text_normalize_reset(s);
 		    }
 		}
 		break;
 	}
 	buf++;
 	length--;
     } while (length > 0);
 }
 
 static int pdf_scan_contents(int fd, struct pdf_struct *pdf)
 {
     struct text_norm_state s;
     char fullname[1024];
     char outbuff[BUFSIZ];
     char inbuf[BUFSIZ];
4956690d
     int fout, n, rc;
7719760b
     enum cstate st = CSTATE_NONE;
 
     snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u_c", pdf->dir, (pdf->files-1));
     fout = open(fullname,O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
     if (fout < 0) {
 	char err[128];
 	cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err)));
 	return CL_ETMPFILE;
     }
 
     text_normalize_init(&s, outbuff, sizeof(outbuff));
     while (1) {
 	n = cli_readn(fd, inbuf, sizeof(inbuf));
 	if (n <= 0)
 	    break;
 	process(&s, &st, inbuf, n, fout);
     }
     cli_writen(fout, s.out, s.out_pos);
 
4956690d
     lseek(fout, 0, SEEK_SET);
     rc = cli_magic_scandesc(fout, pdf->ctx);
7719760b
     close(fout);
4956690d
     if (!pdf->ctx->engine->keeptmp)
 	if (cli_unlink(fullname) && rc != CL_VIRUS)
 	    rc = CL_EUNLINK;
     return rc;
7719760b
 }
 
27c8b02b
 static const char *pdf_getdict(const char *q0, int* len, const char *key);
 static char *pdf_readval(const char *q, int len, const char *key);
 static enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key, enum enc_method def);
fb0c9fa2
 static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen, const char **qend, int noescape);
 
3643f3d2
 static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
 {
ab564992
     char fullname[NAME_MAX + 1];
     int fout;
     off_t sum = 0;
3643f3d2
     int rc = CL_SUCCESS;
ab564992
     char *ascii_decoded = NULL;
374be101
     char *decrypted = NULL;
dc200c6b
     int dump = 1;
ab564992
 
5b574c47
     cli_dbgmsg("pdf_extract_obj: obj %u %u\n", obj->id>>8, obj->id&0xff);
 
9acc81d6
     /* TODO: call bytecode hook here, allow override dumpability */
eb270d5a
     if ((!(obj->flags & (1 << OBJ_STREAM)) ||
 	(obj->flags & (1 << OBJ_HASFILTERS)))
 	&& !(obj->flags & DUMP_MASK)) {
ab564992
 	/* don't dump all streams */
dc200c6b
 	dump = 0;
ab564992
     }
4d808a86
     if ((obj->flags & (1 << OBJ_IMAGE)) &&
 	!(obj->flags & (1 << OBJ_FILTER_DCT))) {
 	/* don't dump / scan non-JPG images */
dc200c6b
 	dump = 0;
9acc81d6
     }
dc200c6b
     if (obj->flags & (1 << OBJ_FORCEDUMP)) {
 	/* bytecode can force dump by setting this flag */
 	dump = 1;
     }
     if (!dump)
 	return CL_CLEAN;
374be101
     cli_dbgmsg("cli_pdf: dumping obj %u %u\n", obj->id>>8, obj->id&0xff);
ab564992
     snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u", pdf->dir, pdf->files++);
     fout = open(fullname,O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
     if (fout < 0) {
 	char err[128];
 	cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err)));
 	free(ascii_decoded);
 	return CL_ETMPFILE;
     }
 
     do {
     if (obj->flags & (1 << OBJ_STREAM)) {
3643f3d2
 	const char *start = pdf->map + obj->start;
3a0e133b
         const char *flate_orig;
3643f3d2
 	off_t p_stream = 0, p_endstream = 0;
3a0e133b
 	off_t length, flate_orig_length;
3643f3d2
 	find_stream_bounds(start, pdf->size - obj->start,
ab564992
 			   pdf->size - obj->start,
8db140ff
 			   &p_stream, &p_endstream,
 			   pdf->enc_method_stream <= ENC_IDENTITY &&
 			   pdf->enc_method_embeddedfile <= ENC_IDENTITY);
3643f3d2
 	if (p_stream && p_endstream) {
 	    const char *flate_in;
 	    long ascii_decoded_size = 0;
 	    size_t size = p_endstream - p_stream;
eb270d5a
 	    off_t orig_length;
3643f3d2
 
e8c7cc21
 	    length = find_length(pdf, obj, start, p_stream);
 	    if (length < 0)
 		length = 0;
 	    orig_length = length;
9acc81d6
 	    if (length > pdf->size || obj->start + p_stream + length > pdf->size) {
 		cli_dbgmsg("cli_pdf: length out of file: %ld + %ld > %ld\n",
 			   p_stream, length, pdf->size);
4956690d
 		noisy_warnmsg("length out of file, truncated: %ld + %ld > %ld\n",
 			   p_stream, length, pdf->size);
9acc81d6
 		length = pdf->size - (obj->start + p_stream);
 	    }
e8c7cc21
 	    if (!(obj->flags & (1 << OBJ_FILTER_FLATE)) && length <= 0) {
3643f3d2
 		const char *q = start + p_endstream;
 		length = size;
 		q--;
 		if (*q == '\n') {
 		    q--;
 		    length--;
 		    if (*q == '\r')
 			length--;
 		} else if (*q == '\r') {
 		    length--;
 		}
e8c7cc21
 		if (length < 0)
 		    length = 0;
5aad11ce
 		cli_dbgmsg("cli_pdf: calculated length %ld\n", length);
eb270d5a
 	    } else {
 		if (size > length+2) {
 		    cli_dbgmsg("cli_pdf: calculated length %ld < %ld\n",
 			       length, size);
 		    length = size;
 		}
3643f3d2
 	    }
9acc81d6
 	    if (orig_length && size > orig_length + 20) {
 		cli_dbgmsg("cli_pdf: orig length: %ld, length: %ld, size: %ld\n",
 			   orig_length, length, size);
eb270d5a
 		pdfobj_flag(pdf, obj, BAD_STREAMLEN);
9acc81d6
 	    }
8da52b5d
 	    if (!length) {
3643f3d2
 		length = size;
8da52b5d
 		if (!length) {
 		    cli_dbgmsg("pdf_extract_obj: length and size both 0\n");
 		    break; /* Empty stream, nothing to scan */
 		}
 	    }
3643f3d2
 
3a0e133b
 	    flate_orig = flate_in = start + p_stream;
             flate_orig_length = length;
374be101
 	    if (pdf->flags & (1 << DECRYPTABLE_PDF)) {
27c8b02b
 		enum enc_method enc = get_enc_method(pdf, obj);
 		if (obj->flags & (1 << OBJ_FILTER_CRYPT)) {
 		    int len = p_stream;
 		    const char *q = pdf_getdict(start, &len, "/DecodeParams");
 		    enc = ENC_IDENTITY;
 		    if (q && pdf->CF) {
 			char *name = pdf_readval(q, len, "/Name");
 			cli_dbgmsg("cli_pdf: Crypt filter %s\n", name);
 			if (name && strcmp(name, "/Identity"))
 			    enc = parse_enc_method(pdf->CF, pdf->CF_n, name, enc); 
6ac55923
 			free(name);
27c8b02b
 		    }
 		}
f3199751
 		if (cli_memstr(start, p_stream, "/XRef", 5))
 		    cli_dbgmsg("cli_pdf: cross reference stream, skipping\n");
 		else {
 		    decrypted = decrypt_any(pdf, obj->id, flate_in, &length,
 					    enc);
4956690d
 		    if (decrypted)
f3199751
 			flate_in = decrypted;
 		}
374be101
 	    }
 
3643f3d2
 	    if (obj->flags & (1 << OBJ_FILTER_AH)) {
a9d034ee
 		ascii_decoded = cli_malloc(length/2 + 1);
3643f3d2
 		if (!ascii_decoded) {
241e7eb1
 		    cli_errmsg("Cannot allocate memory for ascii_decoded\n");
ab564992
 		    rc = CL_EMEM;
 		    break;
3643f3d2
 		}
374be101
 		ascii_decoded_size = asciihexdecode(flate_in,
3643f3d2
 						    length,
 						    ascii_decoded);
 	    } else if (obj->flags & (1 << OBJ_FILTER_A85)) {
a9d034ee
 		ascii_decoded = cli_malloc(length*5);
3643f3d2
 		if (!ascii_decoded) {
241e7eb1
 		    cli_errmsg("Cannot allocate memory for ascii_decoded\n");
ab564992
 		    rc = CL_EMEM;
 		    break;
3643f3d2
 		}
374be101
 		ascii_decoded_size = ascii85decode(flate_in,
3643f3d2
 						   length,
6e33139f
 						   (unsigned char*)ascii_decoded);
3643f3d2
 	    }
 	    if (ascii_decoded_size < 0) {
9acc81d6
 		/* don't flag for images or truncated objs*/
 		if (!(obj->flags &
 		      ((1 << OBJ_IMAGE) | (1 << OBJ_TRUNCATED))))
 		    pdfobj_flag(pdf, obj, BAD_ASCIIDECODE);
3643f3d2
 		cli_dbgmsg("cli_pdf: failed to asciidecode in %u %u obj\n", obj->id>>8,obj->id&0xff);
9acc81d6
 		free(ascii_decoded);
 		ascii_decoded = NULL;
 		/* attempt to directly flatedecode it */
3643f3d2
 	    }
 	    /* either direct or ascii-decoded input */
 	    if (!ascii_decoded)
 		ascii_decoded_size = length;
374be101
 	    else
 		flate_in = ascii_decoded;
3643f3d2
 
 	    if (obj->flags & (1 << OBJ_FILTER_FLATE)) {
9acc81d6
 		cli_dbgmsg("cli_pdf: deflate len %ld (orig %ld)\n", ascii_decoded_size, (long)orig_length);
3643f3d2
 		rc = filter_flatedecode(pdf, obj, flate_in, ascii_decoded_size, fout, &sum);
3a0e133b
                 if (rc == CL_EFORMAT) {
                     if (decrypted) {
                         flate_in = flate_orig;
                         ascii_decoded_size = flate_orig_length;
                     }
 		    cli_dbgmsg("cli_pdf: dumping raw stream (probably encrypted)\n");
 		    noisy_warnmsg("cli_pdf: dumping raw stream, probably encrypted and we failed to decrypt'n");
 		    if (filter_writen(pdf, obj, fout, flate_in, ascii_decoded_size, &sum) != ascii_decoded_size) {
 			cli_errmsg("cli_pdf: failed to write output file\n");
 			return CL_EWRITE;
 		    }
                 }
3643f3d2
 	    } else {
ab564992
 		if (filter_writen(pdf, obj, fout, flate_in, ascii_decoded_size, &sum) != ascii_decoded_size)
 		    rc = CL_EWRITE;
3643f3d2
 	    }
4956690d
 	} else
 	    noisy_warnmsg("cannot find stream bounds for obj %u %u\n", obj->id>>8, obj->id&0xff);
 
ab564992
     } else if (obj->flags & (1 << OBJ_JAVASCRIPT)) {
 	const char *q2;
 	const char *q = pdf->map+obj->start;
 	/* TODO: get obj-endobj size */
eb270d5a
 	off_t bytesleft = obj_size(pdf, obj, 0);
dd002dfa
 	if (bytesleft < 0)
 	    break;
ab564992
 
bcc68567
       do {
fb0c9fa2
         char *js = NULL;
         off_t js_len = 0;
3a0e133b
         const char *q3;
bcc68567
 
ab564992
 	q2 = cli_memstr(q, bytesleft, "/JavaScript", 11);
 	if (!q2)
 	    break;
55a321ea
 	bytesleft -= q2 - q + 11;
fb0c9fa2
         q = q2 + 11;
 
         js = pdf_readstring(q, bytesleft,  "/JS", NULL, &q2, !(pdf->flags & (1<<DECRYPTABLE_PDF)));
         bytesleft -= q2 - q;
         q = q2;
 
         if (js) {
             const char *out = js;
             js_len = strlen(js);
             if (pdf->flags & (1 << DECRYPTABLE_PDF)) {
                 cli_dbgmsg("cli_pdf: encrypted string\n");
 		decrypted = decrypt_any(pdf, obj->id, js, &js_len,
bcc68567
 					pdf->enc_method_string);
4956690d
 		if (decrypted) {
 		    noisy_msg(pdf, "decrypted Javascript string from obj %u %u\n", obj->id>>8,obj->id&0xff);
bcc68567
 		    out = decrypted;
4956690d
 		}
bcc68567
 	    }
fb0c9fa2
 
 	    if (filter_writen(pdf, obj, fout, out, js_len, &sum) != js_len) {
ab564992
 		rc = CL_EWRITE;
fb0c9fa2
                 free(js);
ab564992
 		break;
 	    }
fb0c9fa2
             free(js);
3afedd07
 	    cli_dbgmsg("bytesleft: %d\n", (int)bytesleft);
fb0c9fa2
 
13882281
             if (bytesleft > 0) {
                 q2 = pdf_nextobject(q, bytesleft);
                 if (!q2) q2 = q + bytesleft - 1;
                 /* non-conforming PDFs that don't escape ) properly */
                 q3 = memchr(q, ')', bytesleft);
                 if (q3 && q3 < q2) q2 = q3;
                 while (q2 > q && q2[-1] == ' ') q2--;
                 if (q2 > q) {
                     q--;
                     filter_writen(pdf, obj, fout, q, q2 - q, &sum);
                     q++;
                 }
fb0c9fa2
             }
         }
 
bcc68567
       } while (bytesleft > 0);
eb270d5a
     } else {
 	off_t bytesleft = obj_size(pdf, obj, 0);
5b574c47
 	if (bytesleft < 0) {
 	    rc = CL_EFORMAT;
 	}
 	else if (filter_writen(pdf, obj, fout , pdf->map + obj->start, bytesleft,&sum) != bytesleft)
eb270d5a
 	    rc = CL_EWRITE;
3643f3d2
     }
ab564992
     } while (0);
eb270d5a
     cli_dbgmsg("cli_pdf: extracted %ld bytes %u %u obj to %s\n", sum, obj->id>>8, obj->id&0xff, fullname);
dc200c6b
     if (sum) {
 	int rc2;
 	cli_updatelimits(pdf->ctx, sum);
 	/* TODO: invoke bytecode on this pdf obj with metainformation associated
 	 * */
 	lseek(fout, 0, SEEK_SET);
 	rc2 = cli_magic_scandesc(fout, pdf->ctx);
 	if (rc2 == CL_VIRUS || rc == CL_SUCCESS)
 	    rc = rc2;
693757a1
 	if ((rc == CL_CLEAN) || ((rc == CL_VIRUS) && (pdf->ctx->options & CL_SCAN_ALLMATCHES))) {
dc200c6b
 	    rc2 = run_pdf_hooks(pdf, PDF_PHASE_POSTDUMP, fout, obj - pdf->objs);
 	    if (rc2 == CL_VIRUS)
 		rc = rc2;
 	}
693757a1
 	if (((rc == CL_CLEAN) || ((rc == CL_VIRUS) && (pdf->ctx->options & CL_SCAN_ALLMATCHES)))
 		&& (obj->flags & (1 << OBJ_CONTENTS))) {
7719760b
 	    lseek(fout, 0, SEEK_SET);
 	    cli_dbgmsg("cli_pdf: dumping contents %u %u\n", obj->id>>8, obj->id&0xff);
 	    rc2 = pdf_scan_contents(fout, pdf);
 	    if (rc2 == CL_VIRUS)
 		rc = rc2;
4956690d
 	    noisy_msg(pdf, "extracted text from obj %u %u\n", obj->id>>8, obj->id&0xff);
7719760b
 	}
dc200c6b
     }
ab564992
     close(fout);
     free(ascii_decoded);
374be101
     free(decrypted);
ab564992
     if (!pdf->ctx->engine->keeptmp)
 	if (cli_unlink(fullname) && rc != CL_VIRUS)
 	    rc = CL_EUNLINK;
3643f3d2
     return rc;
 }
 
6c135eb4
 enum objstate {
     STATE_NONE,
     STATE_S,
     STATE_FILTER,
3643f3d2
     STATE_JAVASCRIPT,
eb270d5a
     STATE_OPENACTION,
b835a528
     STATE_LINEARIZED,
c16b3abb
     STATE_LAUNCHACTION,
7719760b
     STATE_CONTENTS,
6c135eb4
     STATE_ANY /* for actions table below */
 };
 
 struct pdfname_action {
     const char *pdfname;
dc200c6b
     enum pdf_objflags set_objflag;/* OBJ_DICT is noop */
6c135eb4
     enum objstate from_state;/* STATE_NONE is noop */
     enum objstate to_state;
 };
 
 static struct pdfname_action pdfname_actions[] = {
     {"ASCIIHexDecode", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER},
     {"ASCII85Decode", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER},
80db7712
     {"A85", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER},
9acc81d6
     {"AHx", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER},
3643f3d2
     {"EmbeddedFile", OBJ_EMBEDDED_FILE, STATE_NONE, STATE_NONE},
6c135eb4
     {"FlateDecode", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER},
80db7712
     {"Fl", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER},
9acc81d6
     {"Image", OBJ_IMAGE, STATE_NONE, STATE_NONE},
6c135eb4
     {"LZWDecode", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER},
80db7712
     {"LZW", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER},
6c135eb4
     {"RunLengthDecode", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER},
80db7712
     {"RL", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER},
6c135eb4
     {"CCITTFaxDecode", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER},
80db7712
     {"CCF", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER},
6c135eb4
     {"JBIG2Decode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER},
     {"DCTDecode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER},
80db7712
     {"DCT", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER},
6c135eb4
     {"JPXDecode", OBJ_FILTER_JPX, STATE_FILTER, STATE_FILTER},
     {"Crypt",  OBJ_FILTER_CRYPT, STATE_FILTER, STATE_NONE},
7606789f
     {"Standard", OBJ_FILTER_STANDARD, STATE_FILTER, STATE_FILTER},
b835a528
     {"Sig",    OBJ_SIGNED, STATE_ANY, STATE_NONE},
     {"V",     OBJ_SIGNED, STATE_ANY, STATE_NONE},
9acc81d6
     {"R",     OBJ_SIGNED, STATE_ANY, STATE_NONE},
b835a528
     {"Linearized", OBJ_DICT, STATE_NONE, STATE_LINEARIZED},
eb270d5a
     {"Filter", OBJ_HASFILTERS, STATE_ANY, STATE_FILTER},
3643f3d2
     {"JavaScript", OBJ_JAVASCRIPT, STATE_S, STATE_JAVASCRIPT},
6c135eb4
     {"Length", OBJ_DICT, STATE_FILTER, STATE_NONE},
     {"S", OBJ_DICT, STATE_NONE, STATE_S},
eb270d5a
     {"Type", OBJ_DICT, STATE_NONE, STATE_NONE},
c16b3abb
     {"OpenAction", OBJ_OPENACTION, STATE_ANY, STATE_OPENACTION},
7719760b
     {"Launch", OBJ_LAUNCHACTION, STATE_ANY, STATE_LAUNCHACTION},
     {"Page", OBJ_PAGE, STATE_NONE, STATE_NONE},
     {"Contents", OBJ_CONTENTS, STATE_NONE, STATE_CONTENTS}
6c135eb4
 };
 
edeb59b3
 #define KNOWN_FILTERS ((1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_RL) | (1 << OBJ_FILTER_A85) | (1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_LZW) | (1 << OBJ_FILTER_FAX) | (1 << OBJ_FILTER_DCT) | (1 << OBJ_FILTER_JPX) | (1 << OBJ_FILTER_CRYPT))
eb270d5a
 
6c135eb4
 static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj,
 			   const char *pdfname, int escapes,
6e33139f
 			   enum objstate *state)
6c135eb4
 {
     struct pdfname_action *act = NULL;
     unsigned j;
     for (j=0;j<sizeof(pdfname_actions)/sizeof(pdfname_actions[0]);j++) {
fbb55daf
 	if (!strcmp(pdfname, pdfname_actions[j].pdfname)) {
6c135eb4
 	    act = &pdfname_actions[j];
 	    break;
 	}
     }
eb270d5a
     if (!act) {
 	if (*state == STATE_FILTER &&
f984f75b
 	    !(obj->flags & (1 << OBJ_SIGNED)) &&
 	    /* these are digital signature objects, filter doesn't matter,
 	     * we don't need them anyway */
eb270d5a
 	    !(obj->flags & KNOWN_FILTERS)) {
 	    cli_dbgmsg("cli_pdf: unknown filter %s\n", pdfname);
b835a528
 	    obj->flags |= 1 << OBJ_FILTER_UNKNOWN;
eb270d5a
 	}
6c135eb4
 	return;
eb270d5a
     }
6c135eb4
     if (escapes) {
 	/* if a commonly used PDF name is escaped that is certainly
 	   suspicious. */
 	cli_dbgmsg("cli_pdf: pdfname %s is escaped\n", pdfname);
 	pdfobj_flag(pdf, obj, ESCAPED_COMMON_PDFNAME);
     }
     if (act->from_state == *state ||
 	act->from_state == STATE_ANY) {
 	*state = act->to_state;
 
 	if (*state == STATE_FILTER &&
 	    act->set_objflag !=OBJ_DICT &&
 	    (obj->flags & (1 << act->set_objflag))) {
9acc81d6
 	    cli_dbgmsg("cli_pdf: duplicate stream filter %s\n", pdfname);
6c135eb4
 	    pdfobj_flag(pdf, obj, BAD_STREAM_FILTERS);
 	}
 	obj->flags |= 1 << act->set_objflag;
     } else {
9c617dbe
 	/* auto-reset states */
6c135eb4
 	switch (*state) {
 	    case STATE_S:
 		*state = STATE_NONE;
 		break;
6e33139f
 	    default:
 		break;
6c135eb4
 	}
     }
 }
 
3f8016ce
 static int pdf_readint(const char *q0, int len, const char *key);
bbfad9ba
 
 static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len)
 {
     const char *q, *q2;
     uint32_t objid;
 
     if (len >= 16 && !strncmp(enc, "/EncryptMetadata", 16)) {
 	q = cli_memstr(enc+16, len-16, "/Encrypt", 8);
 	if (!q)
 	    return;
 	len -= q - enc;
 	enc = q;
     }
     q = enc + 8;
     len -= 8;
     q2 = pdf_nextobject(q, len);
     if (!q2 || !isdigit(*q2))
 	return;
     objid = atoi(q2) << 8;
     len -= q2 - q;
     q = q2;
     q2 = pdf_nextobject(q, len);
     if (!q2 || !isdigit(*q2))
 	return;
     objid |= atoi(q2) & 0xff;
     len -= q2 - q;
     q = q2;
     q2 = pdf_nextobject(q, len);
     if (!q2 || *q2 != 'R')
 	return;
     cli_dbgmsg("cli_pdf: Encrypt dictionary in obj %d %d\n", objid>>8, objid&0xff);
     pdf->enc_objid = objid;
 }
 
 static void pdf_parse_trailer(struct pdf_struct *pdf, const char *s, long length)
 {
     const char *enc;
     enc = cli_memstr(s, length, "/Encrypt", 8);
     if (enc) {
 	char *newID;
 	pdf->flags |= 1 << ENCRYPTED_PDF;
 	pdf_parse_encrypt(pdf, enc, s + length - enc);
fb0c9fa2
 	newID = pdf_readstring(s, length, "/ID", &pdf->fileIDlen, NULL, 0);
bbfad9ba
 	if (newID) {
 	    free(pdf->fileID);
 	    pdf->fileID = newID;
 	}
     }
 }
 
6c135eb4
 static void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
 {
     /* enough to hold common pdf names, we don't need all the names */
     char pdfname[64];
8d1ef133
     const char *q2, *q3;
     const char *nextobj, *nextopen, *nextclose;
6c135eb4
     const char *q = obj->start + pdf->map;
8d1ef133
     const char *dict, *enddict, *start;
0fe3b769
     off_t dict_length, full_dict_length;
8d1ef133
     off_t objsize = obj_size(pdf, obj, 1);
     off_t bytesleft;
9acc81d6
     unsigned i, filters=0;
8d1ef133
     unsigned blockopens=0;
6c135eb4
     enum objstate objstate = STATE_NONE;
 
8d1ef133
     if (objsize < 0)
6c135eb4
 	return;
     start = q;
8d1ef133
     bytesleft = objsize;
 
6c135eb4
     /* find start of dictionary */
     do {
8d1ef133
 	nextobj = pdf_nextobject(q, bytesleft);
 	bytesleft -= nextobj -q;
 	if (!nextobj || bytesleft < 0) {
5b574c47
 	    cli_dbgmsg("cli_pdf: %u %u obj: no dictionary\n", obj->id>>8, obj->id&0xff);
6c135eb4
 	    return;
 	}
8d1ef133
 	q3 = memchr(q-1, '<', nextobj-q+1);
 	nextobj++;
6c135eb4
 	bytesleft--;
8d1ef133
 	q = nextobj;
6c135eb4
     } while (!q3 || q3[1] != '<');
     dict = q3+2;
     q = dict;
8d1ef133
     blockopens++;
     bytesleft = objsize - (q - start);
4c19109d
     enddict = q + bytesleft - 1;
8d1ef133
 
     /* find end of dictionary block */
4c19109d
     if (bytesleft < 0) {
5b574c47
         cli_dbgmsg("cli_pdf: %u %u obj: broken dictionary\n", obj->id>>8, obj->id&0xff);
4c19109d
         return;
     }
8d1ef133
 
4c19109d
     /* while still looking ... */
     while ((q < enddict-1) && (blockopens > 0)) {
         /* find next close */
bc52aff8
         nextclose = memchr(q, '>', enddict-q);
4c19109d
         if (nextclose && (nextclose[1] == '>')) {
             /* check for nested open */
374b5aea
             while ((nextopen = memchr(q-1, '<', nextclose-q+1)) != NULL) {
4c19109d
                 if (nextopen[1] == '<') {
                     /* nested open */
                     blockopens++;
                     q = nextopen + 2;
8d1ef133
                 }
                 else {
4c19109d
                     /* unmatched < before next close */
                     q = nextopen + 2;
8d1ef133
                 }
             }
4c19109d
             /* close block */
             blockopens--;
             q = nextclose + 2;
8d1ef133
         }
4c19109d
         else if (nextclose) {
             /* found one > but not two */
             q = nextclose + 2;
         }
         else {
             /* next closing not found */
bc52aff8
             break;
4c19109d
         }
     }
8d1ef133
 
4c19109d
     /* Was end of dictionary found? */
bc52aff8
     if (blockopens) {
         /* probably truncated */
         cli_dbgmsg("cli_pdf: %u %u obj broken dictionary\n", obj->id>>8, obj->id&0xff);
4c19109d
         return;
bc52aff8
     }
8d1ef133
     enddict = nextclose;
6c135eb4
     obj->flags |= 1 << OBJ_DICT;
8d1ef133
     full_dict_length = dict_length = enddict - dict;
 
     /* This code prints the dictionary content.
     {
         char * dictionary = malloc(dict_length + 1);
         if (dictionary) {
4c19109d
             for (i = 0; i < dict_length; i++) {
                 if (isprint(dict[i]) || isspace(dict[i]))
                     dictionary[i] = dict[i];
                 else
                     dictionary[i] = '*';
             }
8d1ef133
             dictionary[dict_length] = '\0';
             cli_dbgmsg("cli_pdf: dictionary is <<%s>>\n", dictionary);
             free(dictionary);
         }
     }
     */
6c135eb4
 
9c617dbe
     /*  process pdf names */
5af966d3
     for (q = dict;dict_length > 0;) {
6c135eb4
 	int escapes = 0;
 	q2 = memchr(q, '/', dict_length);
 	if (!q2)
 	    break;
 	dict_length -= q2 - q;
3643f3d2
 	q = q2;
9c617dbe
 	/* normalize PDF names */
5af966d3
 	for (i = 0;dict_length > 0 && (i < sizeof(pdfname)-1); i++) {
6c135eb4
 	    q++;
 	    dict_length--;
 	    if (*q == '#') {
78f2c1d9
 		if (cli_hex2str_to(q+1, pdfname+i, 2) == -1)
 		    break;
6c135eb4
 		q += 2;
 		dict_length -= 2;
 		escapes = 1;
 		continue;
 	    }
9acc81d6
 	    if (*q == ' ' || *q == '\t' || *q == '\r' || *q == '\n' ||
78f2c1d9
 		*q == '/' || *q == '>' || *q == ']' || *q == '[' || *q == '<'
 		|| *q == '(')
6c135eb4
 		break;
 	    pdfname[i] = *q;
 	}
 	pdfname[i] = '\0';
 
6e33139f
 	handle_pdfname(pdf, obj, pdfname, escapes, &objstate);
b835a528
 	if (objstate == STATE_LINEARIZED) {
3f8016ce
 	    long trailer_end, trailer;
b835a528
 	    pdfobj_flag(pdf, obj, LINEARIZED_PDF);
 	    objstate = STATE_NONE;
0fe3b769
 	    trailer_end = pdf_readint(dict, full_dict_length, "/H");
6dcf6031
 	    if (trailer_end > 0 && trailer_end < pdf->size) {
bbfad9ba
 		const char *enc;
82c0e6bc
 		trailer = trailer_end - 1024;
 		if (trailer < 0) trailer = 0;
 		q2 = pdf->map + trailer;
 		cli_dbgmsg("cli_pdf: looking for trailer in linearized pdf: %ld - %ld\n", trailer, trailer_end);
bbfad9ba
 		pdf_parse_trailer(pdf, q2, trailer_end - trailer);
82c0e6bc
 		if (pdf->fileID)
bbfad9ba
 		    cli_dbgmsg("cli_pdf: found fileID\n");
82c0e6bc
 	    }
b835a528
 	}
c16b3abb
 	if (objstate == STATE_LAUNCHACTION)
 	    pdfobj_flag(pdf, obj, HAS_LAUNCHACTION);
7719760b
 	if (dict_length > 0 &&
 	    (objstate == STATE_JAVASCRIPT ||
 	     objstate == STATE_OPENACTION ||
 	     objstate == STATE_CONTENTS)) {
eb270d5a
 	    if (objstate == STATE_OPENACTION)
 		pdfobj_flag(pdf, obj, HAS_OPENACTION);
ab564992
 	    q2 = pdf_nextobject(q, dict_length);
 	    if (q2 && isdigit(*q2)) {
 		uint32_t objid = atoi(q2) << 8;
 		while (isdigit(*q2)) q2++;
 		q2 = pdf_nextobject(q2, dict_length);
 		if (q2 && isdigit(*q2)) {
 		    objid |= atoi(q2) & 0xff;
 		    q2 = pdf_nextobject(q2, dict_length);
5af966d3
 		    if (q2 && *q2 == 'R') {
ab564992
 			struct pdf_obj *obj2;
eb270d5a
 			cli_dbgmsg("cli_pdf: found %s stored in indirect object %u %u\n",
 				   pdfname,
ab564992
 				   objid >> 8, objid&0xff);
 			obj2 = find_obj(pdf, obj, objid);
eb270d5a
 			if (obj2) {
7719760b
 			    enum pdf_objflags flag =
 				objstate == STATE_JAVASCRIPT ? OBJ_JAVASCRIPT :
 				objstate == STATE_OPENACTION ? OBJ_OPENACTION :
 				OBJ_CONTENTS;
eb270d5a
 			    obj2->flags |= 1 << flag;
 			    obj->flags &= ~(1 << flag);
 			} else {
 			    pdfobj_flag(pdf, obj, BAD_INDOBJ);
 			}
ab564992
 		    }
 		}
 	    }
 	    objstate = STATE_NONE;
 	}
6c135eb4
     }
9acc81d6
     for (i=0;i<sizeof(pdfname_actions)/sizeof(pdfname_actions[0]);i++) {
 	const struct pdfname_action *act = &pdfname_actions[i];
 	if ((obj->flags & (1 << act->set_objflag)) &&
 	    act->from_state == STATE_FILTER &&
 	    act->to_state == STATE_FILTER &&
7606789f
 	    act->set_objflag != OBJ_FILTER_CRYPT &&
 	    act->set_objflag != OBJ_FILTER_STANDARD) {
9acc81d6
 	    filters++;
 	}
     }
     if (filters > 2) { /* more than 2 non-crypt filters */
 	pdfobj_flag(pdf, obj, MANY_FILTERS);
     }
b835a528
     if (obj->flags & ((1 << OBJ_SIGNED) | KNOWN_FILTERS))
 	obj->flags &= ~(1 << OBJ_FILTER_UNKNOWN);
     if (obj->flags & (1 << OBJ_FILTER_UNKNOWN))
 	pdfobj_flag(pdf, obj, UNKNOWN_FILTER);
6c135eb4
     cli_dbgmsg("cli_pdf: %u %u obj flags: %02x\n", obj->id>>8, obj->id&0xff, obj->flags);
 }
 
7606789f
 static const char *pdf_getdict(const char *q0, int* len, const char *key)
 {
     const char *q;
 
82c0e6bc
     if (*len <= 0) {
 	cli_dbgmsg("cli_pdf: bad length %d\n", *len);
 	return NULL;
     }
4c19109d
     if (!q0) {
         return NULL;
     }
7606789f
     q = cli_memstr(q0, *len, key, strlen(key));
     if (!q) {
 	cli_dbgmsg("cli_pdf: %s not found in dict\n", key);
 	return NULL;
     }
     *len -= q - q0;
     q0 = q;
     q = pdf_nextobject(q0 + 1, *len - 1);
     if (!q) {
 	cli_dbgmsg("cli_pdf: %s is invalid in dict\n", key);
 	return NULL;
     }
     if (q[-1] == '<')
 	q--;
     *len -= q - q0;
     return q;
 }
 
fb0c9fa2
 static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen, const char **qend, int noescape)
7606789f
 {
     char *s, *s0;
     const char *start, *q, *end;
     if (slen)
 	*slen = 0;
fb0c9fa2
     if (qend)
         *qend = q0;
7606789f
     q = pdf_getdict(q0, &len, key);
     if (!q)
 	return NULL;
     if (*q == '(') {
 	int paren = 1;
 	start = ++q;
664be8da
 	for (;paren > 0 && len > 0; q++,len--) {
7606789f
 	    switch (*q) {
 		case '(':
 		    paren++;
 		    break;
 		case ')':
 		    paren--;
 		    break;
 		case '\\':
 		    q++;
 		    len--;
 		    break;
 		default:
 		    break;
 	    }
 	}
fb0c9fa2
         if (qend)
             *qend = q;
7606789f
 	q--;
 	len  = q - start;
 	s0 = s = cli_malloc(len + 1);
241e7eb1
 	if (!s) {
         cli_errmsg("pdf_readstring: Unable to allocate buffer\n");
         return NULL;
     }
7606789f
 	end = start + len;
fb0c9fa2
         if (noescape) {
             memcpy(s0, start, len);
             s = s0 + len;
         } else {
7606789f
 	for (q = start;q < end;q++) {
 	    if (*q != '\\') {
 		*s++ = *q;
 	    } else {
 		q++;
 		switch (*q) {
 		    case 'n':
 			*s++ = '\n';
 			break;
 		    case 'r':
 			*s++ = '\r';
 			break;
 		    case 't':
 			*s++ = '\t';
 			break;
 		    case 'b':
 			*s++ = '\b';
 			break;
 		    case 'f':
 			*s++ = '\f';
 			break;
 		    case '(':/* fall-through */
 		    case ')':/* fall-through */
 		    case '\\':
 			*s++ = *q;
 			break;
 		    case '\n':
 			/* ignore */
 			break;
 		    case '\r':
 			/* ignore */
 			if (q+1 < end && q[1] == '\n')
 			    q++;
 			break;
 		    case '0':
 		    case '1':
 		    case '2':
 		    case '3':
 		    case '4':
 		    case '5':
 		    case '6':
 		    case '7':
 		    case '8':
 		    case '9':
 			/* octal escape */
 			if (q+2 < end)
 			    q++;
 			*s++ = 64*(q[0] - '0')+
 			      8*(q[1] - '0')+
 			        (q[2] - '0');
 			break;
 		    default:
 			/* ignore */
fb0c9fa2
                         *s++ = '\\';
                         q--;
7606789f
 			break;
 		}
 	    }
 	}
fb0c9fa2
         }
7606789f
 	*s++ = '\0';
 	if (slen)
 	    *slen = s - s0 - 1;
 	return s0;
     }
     if (*q == '<') {
 	start = ++q;
 	q = memchr(q+1, '>', len);
 	if (!q)
 	    return NULL;
fb0c9fa2
         if (qend)
             *qend = q;
7606789f
 	s = cli_malloc((q - start)/2 + 1);
a2459d01
 	if (s == NULL) { /* oops, couldn't allocate memory */
 	  cli_dbgmsg("cli_pdf: unable to allocate memory...\n");
 	  return NULL;
 	}
1ed631f9
 	if (cli_hex2str_to(start, s, q - start)) {
 	    cli_dbgmsg("cli_pdf: %s has bad hex value\n", key);
 	    free(s);
 	    return NULL;
 	}
7606789f
 	s[(q-start)/2] = '\0';
 	if (slen)
 	    *slen = (q - start)/2;
 	return s;
     }
     cli_dbgmsg("cli_pdf: %s is invalid string in dict\n", key);
     return NULL;
 }
 
374be101
 static char *pdf_readval(const char *q, int len, const char *key)
 {
     const char *end;
     char *s;
 
     q = pdf_getdict(q, &len, key);
     if (!q || len <= 0)
 	return NULL;
     while (len > 0 && *q && *q == ' ') { q++; len--; }
     if (*q != '/')
 	return NULL;
     q++;
     len--;
     end = q;
     while (len > 0 && *end && !(*end == '/' || (len > 1 && end[0] == '>' && end[1] == '>'))) {
 	end++; len--;
     }
     s = cli_malloc(end - q + 1);
     if (!s)
 	return NULL;
     memcpy(s, q, end-q);
     s[end-q] = '\0';
     return s;
 }
 
7606789f
 static int pdf_readint(const char *q0, int len, const char *key)
 {
     const char *q  = pdf_getdict(q0, &len, key);
     if (!q)
 	return -1;
     return atoi(q);
 }
 
 static int pdf_readbool(const char *q0, int len, const char *key, int Default)
 {
     const char *q  = pdf_getdict(q0, &len, key);
     if (!q || len < 5)
 	return Default;
     if (!strncmp(q, "true", 4))
 	return 1;
     if (!strncmp(q, "false", 5))
 	return 0;
     cli_dbgmsg("cli_pdf: invalid value for %s bool\n", key);
     return Default;
 }
 
 static const char *key_padding =
 "\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4e\x56\xff\xfa\x01\x08"
 "\x2e\x2e\x00\xB6\xD0\x68\x3E\x80\x2F\x0C\xA9\xFE\x64\x53\x69\x7A";
 
 static void dbg_printhex(const char *msg, const char *hex, unsigned len)
 {
     if (cli_debug_flag) {
 	char *kh = cli_str2hex(hex, len);
 	cli_dbgmsg("cli_pdf: %s: %s\n", msg, kh);
 	free(kh);
     }
 }
 
 static void check_user_password(struct pdf_struct *pdf, int R, const char *O,
 				const char *U, int32_t P, int EM,
bbfad9ba
 				const char *UE,
7606789f
 				unsigned length, unsigned oulen)
 {
     unsigned i;
     uint8_t result[16];
     char data[32];
     cli_md5_ctx md5;
     struct arc4_state arc4;
     unsigned password_empty = 0;
 
     dbg_printhex("U: ", U, 32);
     dbg_printhex("O: ", O, 32);
     if (R == 5) {
 	uint8_t result2[32];
 	SHA256_CTX sha256;
 	/* supplement to ISO3200, 3.5.2 Algorithm 3.11 */
 	sha256_init(&sha256);
 	/* user validation salt */
 	sha256_update(&sha256, U+32, 8);
 	sha256_final(&sha256, result2);
 	dbg_printhex("Computed U", result2, 32);
 	if (!memcmp(result2, U, 32)) {
bbfad9ba
 	    off_t n;
7606789f
 	    password_empty = 1;
 	    /* Algorithm 3.2a could be used to recover encryption key */
bbfad9ba
 	    sha256_init(&sha256);
 	    sha256_update(&sha256, U+40, 8);
 	    sha256_final(&sha256, result2);
 	    n = UE ? strlen(UE) : 0;
 	    if (n != 32) {
3afedd07
 		cli_dbgmsg("cli_pdf: UE length is not 32: %d\n", (int)n);
4956690d
 		noisy_warnmsg("cli_pdf: UE length is not 32: %d\n", n);
bbfad9ba
 	    } else {
 		pdf->keylen = 32;
 		pdf->key = cli_malloc(32);
241e7eb1
 		if (!pdf->key) {
             cli_errmsg("check_user_password: Cannot allocate memory for pdf->key\n");
             return;
         }
bbfad9ba
 		aes_decrypt(UE, &n, pdf->key, result2, 32, 0);
 		dbg_printhex("cli_pdf: Candidate encryption key", pdf->key, pdf->keylen);
 	    }
7606789f
 	}
374b5aea
     } else if ((R >= 2) && (R <= 4)) {
7606789f
 	/* 7.6.3.3 Algorithm 2 */
 	cli_md5_init(&md5);
 	/* empty password, password == padding */
 	cli_md5_update(&md5, key_padding, 32);
 	cli_md5_update(&md5, O, 32);
 	P = le32_to_host(P);
 	cli_md5_update(&md5, &P, 4);
 	cli_md5_update(&md5, pdf->fileID, pdf->fileIDlen);
 	if (R >= 4 && !EM) {
 	    uint32_t v = 0xFFFFFFFF;
 	    cli_md5_update(&md5, &v, 4);
 	}
 	cli_md5_final(result, &md5);
374b5aea
 	if (length > 128)
 	    length = 128;
7606789f
 	if (R >= 3) {
 	    for (i=0;i<50;i++) {
 		cli_md5_init(&md5);
 		cli_md5_update(&md5, result, length/8);
 		cli_md5_final(result, &md5);
 	    }
 	}
 	if (R == 2)
 	    length = 40;
 	pdf->keylen = length / 8;
 	pdf->key = cli_malloc(pdf->keylen);
 	if (!pdf->key)
 	    return;
 	memcpy(pdf->key, result, pdf->keylen);
bbfad9ba
 	dbg_printhex("md5", result, 16);
7606789f
 	dbg_printhex("Candidate encryption key", pdf->key, pdf->keylen);
 
 	/* 7.6.3.3 Algorithm 6 */
 	if (R == 2) {
 	    /* 7.6.3.3 Algorithm 4 */
 	    memcpy(data, key_padding, 32);
 	    arc4_init(&arc4, pdf->key, pdf->keylen);
 	    arc4_apply(&arc4, data, 32);
b8b055c5
 	    dbg_printhex("computed U (R2)", data, 32);
7606789f
 	    if (!memcmp(data, U, 32))
 		password_empty = 1;
 	} else if (R >= 3) {
 	    unsigned len = pdf->keylen;
 	    /* 7.6.3.3 Algorithm 5 */
 	    cli_md5_init(&md5);
 	    cli_md5_update(&md5, key_padding, 32);
 	    cli_md5_update(&md5, pdf->fileID, pdf->fileIDlen);
 	    cli_md5_final(result, &md5);
 	    memcpy(data, pdf->key, len);
 	    arc4_init(&arc4, data, len);
 	    arc4_apply(&arc4, result, 16);
 	    for (i=1;i<=19;i++) {
 		unsigned j;
 		for (j=0;j<len;j++)
 		    data[j] = pdf->key[j] ^ i;
 		arc4_init(&arc4, data, len);
 		arc4_apply(&arc4, result, 16);
 	    }
b8b055c5
 	    dbg_printhex("fileID", pdf->fileID, pdf->fileIDlen);
 	    dbg_printhex("computed U (R>=3)", result, 16);
7606789f
 	    if (!memcmp(result, U, 16))
 		password_empty = 1;
 	} else {
 	    cli_dbgmsg("cli_pdf: invalid revision %d\n", R);
4956690d
 	    noisy_warnmsg("cli_pdf: invalid revision %d\n", R);
7606789f
 	}
     }
374b5aea
     else {
 	/* Supported R is in {2,3,4,5} */
 	cli_dbgmsg("cli_pdf: R value out of range\n");
 	noisy_warnmsg("cli_pdf: R value out of range\n");
 	return;
     }
7606789f
     if (password_empty) {
 	cli_dbgmsg("cli_pdf: user password is empty\n");
4956690d
 	noisy_msg(pdf, "cli_pdf: encrypted PDF found, user password is empty, will attempt to decrypt\n");
7606789f
 	/* The key we computed above is the key used to encrypt the streams.
 	 * We could decrypt it now if we wanted to */
 	pdf->flags |= 1 << DECRYPTABLE_PDF;
     } else {
 	cli_dbgmsg("cli_pdf: user/owner password would be required for decryption\n");
4956690d
 	noisy_warnmsg("cli_pdf: encrypted PDF found, user password is NOT empty, cannot decrypt!\n");
7606789f
 	/* the key is not valid, we would need the user or the owner password to
 	 * decrypt */
     }
 }
 
bcc68567
 static enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key, enum enc_method def)
 {
     const char *q;
     char *CFM = NULL;
884b2e73
     enum enc_method ret = ENC_UNKNOWN;
bcc68567
     if (!key)
 	return def;
     if (!strcmp(key, "Identity"))
 	return ENC_IDENTITY;
     q = pdf_getdict(dict, &len, key);
     if (!q)
 	return def;
     CFM = pdf_readval(q, len, "/CFM");
     if (CFM) {
 	cli_dbgmsg("cli_pdf: %s CFM: %s\n", key, CFM);
7f2beb0e
 	if (!strncmp(CFM,"V2", 2)){
884b2e73
 	    ret = ENC_V2;
7f2beb0e
 	}    
884b2e73
 	else if (!strncmp(CFM,"AESV2",5)){
 	    ret = ENC_AESV2;
7f2beb0e
 	}    
884b2e73
 	else if (!strncmp(CFM,"AESV3",5)){
 	    ret = ENC_AESV3;
7f2beb0e
 	}    
884b2e73
 	else if (!strncmp(CFM,"None",4)){
 	    ret = ENC_NONE;
7f2beb0e
 	}
 	free(CFM);
bcc68567
     }
884b2e73
     return ret;
bcc68567
 }
 
7606789f
 static void pdf_handle_enc(struct pdf_struct *pdf)
 {
     struct pdf_obj *obj;
bbfad9ba
     uint32_t len, required_flags, n, R, P, length, EM = 1, i, oulen;
bcc68567
     char *O, *U, *UE, *StmF, *StrF, *EFF;
7606789f
     const char *q, *q2;
 
3f8016ce
     if (pdf->enc_objid == ~0u)
7606789f
 	return;
3f8016ce
     if (!pdf->fileID) {
 	cli_dbgmsg("cli_pdf: pdf_handle_enc no file ID\n");
4956690d
 	noisy_warnmsg("cli_pdf: pdf_handle_enc no file ID\n");
0b073f2e
 	return;
3f8016ce
     }
     obj = find_obj(pdf, pdf->objs, pdf->enc_objid);
     if (!obj) {
 	cli_dbgmsg("cli_pdf: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff);
4956690d
 	noisy_warnmsg("cli_pdf: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff);
7606789f
 	return;
3f8016ce
     }
7606789f
     len = obj_size(pdf, obj, 1);
     q = pdf->map + obj->start;
 
bcc68567
     O = U = UE = StmF = StrF = EFF = NULL;
7606789f
     do {
374be101
 
bcc68567
 	pdf->enc_method_string = ENC_UNKNOWN;
 	pdf->enc_method_stream = ENC_UNKNOWN;
 	pdf->enc_method_embeddedfile = ENC_UNKNOWN;
7261220f
 	P = pdf_readint(q, len, "/P");
 	if (P == ~0u) {
 	    cli_dbgmsg("cli_pdf: invalid P\n");
4956690d
 	    noisy_warnmsg("cli_pdf: invalid P\n");
7261220f
 	    break;
 	}
7606789f
 
 	q2 = cli_memstr(q, len, "/Standard", 9);
 	if (!q2) {
 	    cli_dbgmsg("cli_pdf: /Standard not found\n");
4956690d
 	    noisy_warnmsg("cli_pdf: /Standard not found\n");
7606789f
 	    break;
 	}
3f8016ce
 	/* we can have both of these:
 	* /AESV2/Length /Standard/Length
 	* /Length /Standard
 	* make sure we don't mistake AES's length for Standard's */
 	length = pdf_readint(q2, len - (q2 - q), "/Length");
 	if (length == ~0u)
 	    length = pdf_readint(q, len, "/Length");
 	if (length < 40) {
 	    cli_dbgmsg("cli_pdf: invalid length: %d\n", length);
 	    length = 40;
 	}
7606789f
 
 	R = pdf_readint(q, len, "/R");
 	if (R == ~0u) {
 	    cli_dbgmsg("cli_pdf: invalid R\n");
4956690d
 	    noisy_warnmsg("cli_pdf: invalid R\n");
7606789f
 	    break;
 	}
374b5aea
 	if ((R > 5) || (R < 2)) {
 	    cli_dbgmsg("cli_pdf: R value outside supported range [2..5]\n");
 	    noisy_warnmsg("cli_pdf: R value outside supported range [2..5]\n");
 	    break;
 	}
7606789f
 
 	if (R < 5)
 	    oulen = 32;
 	else
 	    oulen = 48;
bbfad9ba
 	if (R == 2 || R == 3) {
bcc68567
 	    pdf->enc_method_stream = ENC_V2;
 	    pdf->enc_method_string = ENC_V2;
 	    pdf->enc_method_embeddedfile = ENC_V2;
bbfad9ba
 	} else if (R == 4 || R == 5) {
 	    EM = pdf_readbool(q, len, "/EncryptMetadata", 1);
bcc68567
 	    StmF = pdf_readval(q, len, "/StmF");
 	    StrF = pdf_readval(q, len, "/StrF");
 	    EFF = pdf_readval(q, len, "/EFF");
 	    n = len;
27c8b02b
 	    pdf->CF = pdf_getdict(q, &n, "/CF");
 	    pdf->CF_n = n;
bcc68567
 	    if (StmF)
 		cli_dbgmsg("cli_pdf: StmF: %s\n", StmF);
 	    if (StrF)
 		cli_dbgmsg("cli_pdf: StrF: %s\n", StrF);
 	    if (EFF)
 		cli_dbgmsg("cli_pdf: EFF: %s\n", EFF);
27c8b02b
 	    pdf->enc_method_stream = parse_enc_method(pdf->CF, n, StmF, ENC_IDENTITY);
 	    pdf->enc_method_string = parse_enc_method(pdf->CF, n, StrF, ENC_IDENTITY);
 	    pdf->enc_method_embeddedfile = parse_enc_method(pdf->CF, n, EFF, pdf->enc_method_stream);
2307bd6a
 	    free(StmF);
 	    free(StrF);
 	    free(EFF);
bcc68567
 
bbfad9ba
 	    cli_dbgmsg("cli_pdf: EncryptMetadata: %s\n",
 		       EM ? "true" : "false");
 	    if (R == 4)
 		length = 128;
 	    else {
 		n = 0;
fb0c9fa2
 		UE = pdf_readstring(q, len, "/UE", &n, NULL, 0);
bbfad9ba
 		length = 256;
 	    }
 	}
 	if (length == ~0u)
 	    length = 40;
7606789f
 
 	n = 0;
fb0c9fa2
 	O = pdf_readstring(q, len, "/O", &n, NULL, 0);
7606789f
 	if (!O || n < oulen) {
 	    cli_dbgmsg("cli_pdf: invalid O: %d\n", n);
4956690d
 	    cli_dbgmsg("cli_pdf: invalid O: %d\n", n);
7606789f
 	    if (O)
 		dbg_printhex("invalid O", O, n);
 	    break;
 	}
 	if (n > oulen) {
 	    for (i=oulen;i<n;i++)
 		if (O[i])
 		    break;
 	    if (i != n) {
 		dbg_printhex("too long O", O, n);
4956690d
 		noisy_warnmsg("too long O", O, n);
7606789f
 		break;
 	    }
 	}
 
 	n = 0;
fb0c9fa2
 	U = pdf_readstring(q, len, "/U", &n, NULL, 0);
7606789f
 	if (!U || n < oulen) {
 	    cli_dbgmsg("cli_pdf: invalid U: %d\n", n);
4956690d
 	    noisy_warnmsg("cli_pdf: invalid U: %d\n", n);
7606789f
 	    if (U)
 		dbg_printhex("invalid U", U, n);
 	    break;
 	}
 	if (n > oulen) {
 	    for (i=oulen;i<n;i++)
 		if (U[i])
 		    break;
 	    if (i != n) {
 		dbg_printhex("too long U", U, n);
 		break;
 	    }
 	}
 	cli_dbgmsg("cli_pdf: Encrypt R: %d, P %x, length: %d\n", R, P, length);
 	if (length % 8) {
 	    cli_dbgmsg("cli_pdf: wrong key length, not multiple of 8\n");
4956690d
 	    noisy_warnmsg("cli_pdf: wrong key length, not multiple of 8\n");
7606789f
 	    break;
 	}
bbfad9ba
 	check_user_password(pdf, R, O, U, P, EM, UE, length, oulen);
7606789f
     } while (0);
     free(O);
     free(U);
bbfad9ba
     free(UE);
7606789f
 }
 
e7a27135
 int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
 {
     struct pdf_struct pdf;
     fmap_t *map = *ctx->fmap;
     size_t size = map->len - offset;
     off_t versize = size > 1032 ? 1032 : size;
     off_t map_off, bytesleft;
     long xref;
     const char *pdfver, *start, *eofmap, *q, *eof;
f4819816
     int rc, badobjects = 0;
693757a1
     unsigned i, alerts = 0;
e7a27135
 
     cli_dbgmsg("in cli_pdf(%s)\n", dir);
     memset(&pdf, 0, sizeof(pdf));
3643f3d2
     pdf.ctx = ctx;
     pdf.dir = dir;
7606789f
     pdf.enc_objid = ~0u;
e7a27135
 
     pdfver = start = fmap_need_off_once(map, offset, versize);
 
     /* Check PDF version */
     if (!pdfver) {
a9d034ee
 	cli_errmsg("cli_pdf: mmap() failed (1)\n");
e7a27135
 	return CL_EMAP;
     }
     /* offset is 0 when coming from filetype2 */
     pdfver = cli_memstr(pdfver, versize, "%PDF-", 5);
     if (!pdfver) {
 	cli_dbgmsg("cli_pdf: no PDF- header found\n");
4956690d
 	noisy_warnmsg("cli_pdf: no PDF- header found\n");
e7a27135
 	return CL_SUCCESS;
     }
     /* Check for PDF-1.[0-9]. Although 1.7 is highest now, allow for future
      * versions */
     if (pdfver[5] != '1' || pdfver[6] != '.' ||
 	pdfver[7] < '1' || pdfver[7] > '9') {
6c135eb4
 	pdf.flags |= 1 << BAD_PDF_VERSION;
e7a27135
 	cli_dbgmsg("cli_pdf: bad pdf version: %.8s\n", pdfver);
     }
     if (pdfver != start || offset) {
6c135eb4
 	pdf.flags |= 1 << BAD_PDF_HEADERPOS;
5aad11ce
 	cli_dbgmsg("cli_pdf: PDF header is not at position 0: %ld\n",pdfver-start+offset);
e7a27135
     }
     offset += pdfver - start;
 
     /* find trailer and xref, don't fail if not found */
8f6bf9fc
     map_off = (off_t)map->len - 2048;
e7a27135
     if (map_off < 0)
 	map_off = 0;
     bytesleft = map->len - map_off;
     eofmap = fmap_need_off_once(map, map_off, bytesleft);
     if (!eofmap) {
a9d034ee
 	cli_errmsg("cli_pdf: mmap() failed (2)\n");
e7a27135
 	return CL_EMAP;
     }
     eof = eofmap + bytesleft;
     for (q=&eofmap[bytesleft-5]; q > eofmap; q--) {
 	if (memcmp(q, "%%EOF", 5) == 0)
 	    break;
     }
     if (q <= eofmap) {
6c135eb4
 	pdf.flags |= 1 << BAD_PDF_TRAILER;
e7a27135
 	cli_dbgmsg("cli_pdf: %%%%EOF not found\n");
     } else {
f984f75b
 	const char *t;
2db6eb29
 	/*size = q - eofmap + map_off;*/
8f18920f
 	q -= 9;
e7a27135
 	for (;q > eofmap;q--) {
 	    if (memcmp(q, "startxref", 9) == 0)
 		break;
 	}
 	if (q <= eofmap) {
6c135eb4
 	    pdf.flags |= 1 << BAD_PDF_TRAILER;
 	    cli_dbgmsg("cli_pdf: startxref not found\n");
f984f75b
 	} else {
 	    for (t=q;t > eofmap; t--) {
 		if (memcmp(t,"trailer",7) == 0)
 		    break;
 	    }
7606789f
 
bbfad9ba
 	    pdf_parse_trailer(&pdf, eofmap, eof - eofmap);
f984f75b
 	    q += 9;
 	    while (q < eof && (*q == ' ' || *q == '\n' || *q == '\r')) { q++; }
 	    xref = atol(q);
 	    bytesleft = map->len - offset - xref;
 	    if (bytesleft > 4096)
 		bytesleft = 4096;
 	    q = fmap_need_off_once(map, offset + xref, bytesleft);
 	    if (!q || xrefCheck(q, q+bytesleft) == -1) {
 		cli_dbgmsg("cli_pdf: did not find valid xref\n");
 		pdf.flags |= 1 << BAD_PDF_TRAILER;
 	    }
e7a27135
 	}
     }
a9d034ee
     size -= offset;
e7a27135
 
     pdf.size = size;
dc200c6b
     pdf.map = fmap_need_off(map, offset, size);
     pdf.startoff = offset;
e7a27135
     if (!pdf.map) {
a9d034ee
 	cli_errmsg("cli_pdf: mmap() failed (3)\n");
e7a27135
 	return CL_EMAP;
     }
f73212dc
     rc = run_pdf_hooks(&pdf, PDF_PHASE_PRE, -1, -1);
693757a1
     if ((rc == CL_VIRUS) && SCAN_ALL) {
         cli_dbgmsg("cli_pdf: (pre hooks) returned %d\n", rc);
         alerts++;
         rc = CL_CLEAN;
     }
     else if (rc) {
f4819816
 	cli_dbgmsg("cli_pdf: (pre hooks) returning %d\n", rc);
e142504b
 	return rc == CL_BREAK ? CL_CLEAN : rc;
f73212dc
     }
693757a1
 
9c617dbe
     /* parse PDF and find obj offsets */
6c135eb4
     while ((rc = pdf_findobj(&pdf)) > 0) {
e7a27135
 	struct pdf_obj *obj = &pdf.objs[pdf.nobjs-1];
bbfad9ba
 	cli_dbgmsg("cli_pdf: found %d %d obj @%ld\n", obj->id >> 8, obj->id&0xff, obj->start + offset);
e7a27135
     }
eb270d5a
     if (pdf.nobjs)
 	pdf.nobjs--;
e7a27135
     if (rc == -1)
6c135eb4
 	pdf.flags |= 1 << BAD_PDF_TOOMANYOBJS;
e7a27135
 
eb270d5a
     /* must parse after finding all objs, so we can flag indirect objects */
     for (i=0;i<pdf.nobjs;i++) {
 	struct pdf_obj *obj = &pdf.objs[i];
 	pdf_parseobj(&pdf, obj);
     }
 
7606789f
     pdf_handle_enc(&pdf);
bbfad9ba
     if (pdf.flags & (1 << ENCRYPTED_PDF))
 	cli_dbgmsg("cli_pdf: encrypted pdf found, %s!\n",
 		   (pdf.flags & (1 << DECRYPTABLE_PDF)) ?
 		   "decryptable" : "not decryptable, stream will probably fail to decompress");
7606789f
 
     if (DETECT_ENCRYPTED &&
 	(pdf.flags & (1 << ENCRYPTED_PDF)) &&
 	!(pdf.flags & (1 << DECRYPTABLE_PDF))) {
 	/* It is encrypted, and a password/key needs to be supplied to decrypt.
 	 * This doesn't trigger for PDFs that are encrypted but don't need
 	 * a password to decrypt */
6ad45a29
 	cli_append_virus(ctx, "Heuristics.Encrypted.PDF");
693757a1
 	alerts++;
         if (!SCAN_ALL)
             rc = CL_VIRUS;
7606789f
     }
 
693757a1
     if (!rc) {
7606789f
 	rc = run_pdf_hooks(&pdf, PDF_PHASE_PARSED, -1, -1);
693757a1
         cli_dbgmsg("cli_pdf: (parsed hooks) returned %d\n", rc);
         if (rc == CL_VIRUS) {
             alerts++;
             if (SCAN_ALL) {
                 rc = CL_CLEAN;
             }
         }
     }
 
9c617dbe
     /* extract PDF objs */
dc200c6b
     for (i=0;!rc && i<pdf.nobjs;i++) {
693757a1
         struct pdf_obj *obj = &pdf.objs[i];
         rc = pdf_extract_obj(&pdf, obj);
         switch (rc) {
             case CL_EFORMAT:
                 /* Don't halt on one bad object */
                 cli_dbgmsg("cli_pdf: bad format object, skipping to next\n");
                 badobjects++;
                 rc = CL_CLEAN;
                 break;
             case CL_VIRUS:
                 alerts++;
                 if (SCAN_ALL) {
                     rc = CL_CLEAN;
                 }
                 break;
             default:
                 break;
f4819816
         }
3643f3d2
     }
 
f984f75b
     if (pdf.flags & (1 << ENCRYPTED_PDF))
b835a528
 	pdf.flags &= ~ ((1 << BAD_FLATESTART) | (1 << BAD_STREAMSTART) |
 	    (1 << BAD_ASCIIDECODE));
f984f75b
 
dc200c6b
    if (pdf.flags && !rc) {
6c135eb4
 	cli_dbgmsg("cli_pdf: flags 0x%02x\n", pdf.flags);
dc200c6b
 	rc = run_pdf_hooks(&pdf, PDF_PHASE_END, -1, -1);
693757a1
         if (rc == CL_VIRUS) {
             alerts++;
             if (SCAN_ALL) {
                 rc = CL_CLEAN;
             }
         }
         if (!rc && (ctx->options & CL_SCAN_ALGORITHMIC)) {
             if (pdf.flags & (1 << ESCAPED_COMMON_PDFNAME)) {
                 /* for example /Fl#61te#44#65#63#6f#64#65 instead of /FlateDecode */
                 cli_append_virus(ctx, "Heuristics.PDF.ObfuscatedNameObject");
                 rc = cli_found_possibly_unwanted(ctx);
             }
         }
2a599782
 #if 0
b835a528
 	/* TODO: find both trailers, and /Encrypt settings */
 	if (pdf.flags & (1 << LINEARIZED_PDF))
 	    pdf.flags &= ~ (1 << BAD_ASCIIDECODE);
9acc81d6
 	if (pdf.flags & (1 << MANY_FILTERS))
 	    pdf.flags &= ~ (1 << BAD_ASCIIDECODE);
dc200c6b
 	if (!rc && (pdf.flags &
eb270d5a
 	    ((1 << BAD_PDF_TOOMANYOBJS) | (1 << BAD_STREAM_FILTERS) |
dc200c6b
 	     (1<<BAD_FLATE) | (1<<BAD_ASCIIDECODE)|
     	     (1<<UNTERMINATED_OBJ_DICT) | (1<<UNKNOWN_FILTER)))) {
eb270d5a
 	    rc = CL_EUNPACK;
 	}
 #endif
ab564992
     }
f4819816
 
693757a1
     if (alerts) {
         rc = CL_VIRUS;
     }
     else if (!rc && badobjects) {
f4819816
         rc = CL_EFORMAT;
     }
 
3643f3d2
     cli_dbgmsg("cli_pdf: returning %d\n", rc);
     free(pdf.objs);
7606789f
     free(pdf.fileID);
     free(pdf.key);
f4819816
 
a95d300f
     /* PDF hooks may abort, don't return CL_BREAK to caller! */
     return rc == CL_BREAK ? CL_CLEAN : rc;
e7a27135
 }
 
6e33139f
 static int asciihexdecode(const char *buf, off_t len, char *output)
3643f3d2
 {
     unsigned i,j;
cacd0927
     for (i=0,j=0;i+1<len;i++) {
3643f3d2
 	if (buf[i] == ' ')
 	    continue;
 	if (buf[i] == '>')
 	    break;
b5ed1fe6
 	if (cli_hex2str_to(buf+i, output+j, 2) == -1) {
9acc81d6
 	    if (len - i < 4)
 		continue;
 	    return -1;
 	}
b5ed1fe6
 	j++;
a9d034ee
 	i++;
3643f3d2
     }
     return j;
 }
67355216
 /*
  * ascii85 inflation, returns number of bytes in output, -1 for error
  *
  * See http://www.piclist.com/techref/method/encode.htm (look for base85)
  */
da653b74
 static int
b02bab2b
 ascii85decode(const char *buf, off_t len, unsigned char *output)
da653b74
 {
67355216
 	const char *ptr;
da653b74
 	uint32_t sum = 0;
 	int quintet = 0;
 	int ret = 0;
 
f461d74f
 	if(cli_memstr(buf, len, "~>", 2) == NULL)
dbfb485b
 		cli_dbgmsg("cli_pdf: ascii85decode: no EOF marker found\n");
67355216
 
 	ptr = buf;
 
ed6446ff
 	cli_dbgmsg("cli_pdf: ascii85decode %lu bytes\n", (unsigned long)len);
da653b74
 
bce73fe9
 	while(len > 0) {
 		int byte = (len--) ? (int)*ptr++ : EOF;
da653b74
 
25117526
 		if((byte == '~') && (len > 0) && (*ptr == '>'))
da653b74
 			byte = EOF;
 
 		if(byte >= '!' && byte <= 'u') {
3fe56d48
 			sum = (sum * 85) + ((uint32_t)byte - '!');
da653b74
 			if(++quintet == 5) {
e8130f50
 				*output++ = (unsigned char)(sum >> 24);
 				*output++ = (unsigned char)((sum >> 16) & 0xFF);
 				*output++ = (unsigned char)((sum >> 8) & 0xFF);
 				*output++ = (unsigned char)(sum & 0xFF);
da653b74
 				ret += 4;
 				quintet = 0;
 				sum = 0;
 			}
 		} else if(byte == 'z') {
 			if(quintet) {
bbfad9ba
 				cli_dbgmsg("cli_pdf: ascii85decode: unexpected 'z'\n");
da653b74
 				return -1;
 			}
 			*output++ = '\0';
 			*output++ = '\0';
 			*output++ = '\0';
 			*output++ = '\0';
 			ret += 4;
 		} else if(byte == EOF) {
bbfad9ba
 			cli_dbgmsg("cli_pdf: ascii85decode: quintet %d\n", quintet);
da653b74
 			if(quintet) {
 				int i;
 
 				if(quintet == 1) {
bbfad9ba
 					cli_dbgmsg("cli_pdf: ascii85Decode: only 1 byte in last quintet\n");
da653b74
 					return -1;
 				}
3fe56d48
 				for(i = quintet; i < 5; i++)
 					sum *= 85;
 
da653b74
 				if(quintet > 1)
 					sum += (0xFFFFFF >> ((quintet - 2) * 8));
a91013cd
 				ret += quintet-1;
da653b74
 				for(i = 0; i < quintet - 1; i++)
e8130f50
 					*output++ = (unsigned char)((sum >> (24 - 8 * i)) & 0xFF);
da653b74
 			}
 			break;
 		} else if(!isspace(byte)) {
bbfad9ba
 			cli_dbgmsg("cli_pdf: ascii85Decode: invalid character 0x%x, len %lu\n",
95e11e5a
 				byte & 0xFF, (unsigned long)len);
da653b74
 			return -1;
 		}
 	}
 	return ret;
 }
bce73fe9
 
 /*
  * Find the start of the next line
  */
 static const char *
 pdf_nextlinestart(const char *ptr, size_t len)
 {
 	while(strchr("\r\n", *ptr) == NULL) {
 		if(--len == 0L)
 			return NULL;
 		ptr++;
 	}
 	while(strchr("\r\n", *ptr) != NULL) {
 		if(--len == 0L)
 			return NULL;
 		ptr++;
 	}
 	return ptr;
 }
9be10a55
 
ef8219b8
 /*
  * Return the start of the next PDF object.
  * This assumes that we're not in a stream.
  */
 static const char *
 pdf_nextobject(const char *ptr, size_t len)
 {
 	const char *p;
 	int inobject = 1;
 
 	while(len) {
 		switch(*ptr) {
 			case '\n':
 			case '\r':
 			case '%':	/* comment */
 				p = pdf_nextlinestart(ptr, len);
 				if(p == NULL)
 					return NULL;
 				len -= (size_t)(p - ptr);
 				ptr = p;
 				inobject = 0;
 				break;
 
9be10a55
 			case ' ':
 			case '\t':
f53acfcd
 			case '[':	/* Start of an array object */
ef8219b8
 			case '\v':
 			case '\f':
1eceda0e
 			case '<':	/* Start of a dictionary object */
ef8219b8
 				inobject = 0;
9be10a55
 				ptr++;
 				len--;
 				break;
1eceda0e
 			case '/':	/* Start of a name object */
 				return ptr;
eb270d5a
 			case '(': /* start of JS */
 				return ptr;
9be10a55
 			default:
ef8219b8
 				if(!inobject)
 					/* TODO: parse and return object type */
9be10a55
 					return ptr;
 				ptr++;
 				len--;
 		}
 	}
 	return NULL;
 }