libclamav/pdf.c
d056cc17
 /*
2023340a
  *  Copyright (C) 2007-2008 Sourcefire, Inc.
  *
  *  Authors: Nigel Horne
d056cc17
  *
  *  This program is free software; you can redistribute it and/or modify
2023340a
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
d056cc17
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
2023340a
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
1eceda0e
  *
  * TODO: Embedded fonts
  * TODO: Predictor image handling
d056cc17
  */
95e11e5a
 static	char	const	rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $";
d056cc17
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
7f49ea4b
 #ifdef	HAVE_MMAP
240d3307
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <ctype.h>
 #include <string.h>
 #include <fcntl.h>
 #include <stdlib.h>
511a59c7
 #include <errno.h>
ed6446ff
 #ifdef	HAVE_LIMITS_H
 #include <limits.h>
 #endif
9443ec4a
 #ifdef	HAVE_UNISTD_H
 #include <unistd.h>
 #endif
ed6446ff
 
 #ifdef HAVE_SYS_MMAN_H
 #include <sys/mman.h>
 #endif
240d3307
 
 #include <zlib.h>
 
925ece3d
 #ifdef	C_WINDOWS
 #include <io.h>
 #endif
 
ed6446ff
 #include "clamav.h"
 #include "others.h"
240d3307
 #include "mbox.h"
654c0b96
 #include "pdf.h"
a5afcb67
 #include "scanners.h"
240d3307
 
d0d1afd7
 #ifndef	O_BINARY
 #define	O_BINARY	0
 #endif
 
1eceda0e
 #ifdef	CL_DEBUG
5cd3f734
 /*#define	SAVE_TMP	
  *Save the file being worked on in tmp */
1eceda0e
 #endif
 
96522097
 static	int	try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fout, cli_ctx *ctx);
 static	int	flatedecode(unsigned char *buf, off_t len, int fout, cli_ctx *ctx);
b02bab2b
 static	int	ascii85decode(const char *buf, off_t len, unsigned char *output);
bce73fe9
 static	const	char	*pdf_nextlinestart(const char *ptr, size_t len);
ef8219b8
 static	const	char	*pdf_nextobject(const char *ptr, size_t len);
ceabee13
 static	const	char	*cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns);
da653b74
 
144df7c1
 /*
  * TODO: handle embedded URLs if (options&CL_SCAN_MAILURL)
  */
d056cc17
 int
72ce4b70
 cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset)
d056cc17
 {
6c9dc98d
 	off_t size;	/* total number of bytes in the file */
8affc406
 	off_t bytesleft, trailerlength;
dbfb485b
 	char *buf;	/* start of memory mapped area */
bce73fe9
 	const char *p, *q, *trailerstart;
6c9dc98d
 	const char *xrefstart;	/* cross reference table */
70502709
 	/*size_t xreflength;*/
b432851f
 	table_t *md5table;
a5afcb67
 	int printed_predictor_message, printed_embedded_font_message, rc;
3470220c
 	unsigned int files;
bf3e4471
 	struct stat statb;
240d3307
 
925ece3d
 	cli_dbgmsg("in cli_pdf(%s)\n", dir);
798308de
 
dbfb485b
 	if(fstat(desc, &statb) < 0) {
 		cli_errmsg("cli_pdf: fstat() failed\n");
240d3307
 		return CL_EOPEN;
dbfb485b
 	}
240d3307
 
72ce4b70
 	size = statb.st_size - offset;
240d3307
 
139823ca
 	if(size <= 7)	/* doesn't even include the file header */
dbfb485b
 		return CL_CLEAN;
139823ca
 
72ce4b70
 	p = buf = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, offset);
dbfb485b
 	if(buf == MAP_FAILED) {
 		cli_errmsg("cli_pdf: mmap() failed\n");
871177cd
 		return CL_EMAP;
bf3e4471
 	}
 
95e11e5a
 	cli_dbgmsg("cli_pdf: scanning %lu bytes\n", (unsigned long)size);
0a097146
 
139823ca
 	/* Lines are terminated by \r, \n or both */
 
 	/* File Header */
72ce4b70
 	bytesleft = size - 5;
 	for(q = p; bytesleft; bytesleft--, q++) {
 	    if(!strncasecmp(q, "%PDF-", 5)) {
 		bytesleft = size - (off_t) (q - p);
 		p = q;
 		break;
 	    }
139823ca
 	}
 
72ce4b70
 	if(!bytesleft) {
 	    munmap(buf, size);
 	    cli_dbgmsg("cli_pdf: file header not found\n");
 	    return CL_CLEAN;
139823ca
 	}
 
 	/* Find the file trailer */
72ce4b70
 	for(q = &p[bytesleft - 5]; q > p; --q)
 		if(strncasecmp(q, "%%EOF", 5) == 0)
139823ca
 			break;
 
7fc055e6
 	if(q <= p) {
dbfb485b
 		munmap(buf, size);
 		cli_dbgmsg("cli_pdf: trailer not found\n");
 		return CL_CLEAN;
139823ca
 	}
 
b533a221
 	for(trailerstart = &q[-7]; trailerstart > p; --trailerstart)
bce73fe9
 		if(memcmp(trailerstart, "trailer", 7) == 0)
139823ca
 			break;
 
 	/*
bce73fe9
 	 * q points to the end of the trailer section
139823ca
 	 */
bce73fe9
 	trailerlength = (long)(q - trailerstart);
 	if(cli_pmemstr(trailerstart, trailerlength, "Encrypt", 7)) {
501e5d12
 		/*
 		 * This tends to mean that the file is, in effect, read-only
9fe789f8
 		 * http://www.cs.cmu.edu/~dst/Adobe/Gallery/anon21jul01-pdf-encryption.txt
 		 * http://www.adobe.com/devnet/pdf/
501e5d12
 		 */
dbfb485b
 		munmap(buf, size);
 		cli_dbgmsg("cli_pdf: Encrypted PDF files not yet supported\n");
 		return CL_CLEAN;
501e5d12
 	}
 
ef8219b8
 	/*
 	 * not true, since edits may put data after the trailer
bce73fe9
 	bytesleft -= trailerlength;
ef8219b8
 	 */
bce73fe9
 
76fb2ef1
 	/*
 	 * FIXME: Handle more than one xref section in the xref table
 	 */
6c9dc98d
 	for(xrefstart = trailerstart; xrefstart > p; --xrefstart)
 		if(memcmp(xrefstart, "xref", 4) == 0)
76fb2ef1
 			/*
 			 * Make sure it's the start of the line, not a startxref
 			 * token
 			 */
 			if((xrefstart[-1] == '\n') || (xrefstart[-1] == '\r'))
 				break;
6c9dc98d
 
 	if(xrefstart == p) {
dbfb485b
 		munmap(buf, size);
 		cli_dbgmsg("cli_pdf: xref not found\n");
 		return CL_CLEAN;
6c9dc98d
 	}
 
1eceda0e
 	printed_predictor_message = printed_embedded_font_message = 0;
 
ff7d16a7
 	md5table = tableCreate();
ef8219b8
 	/*
 	 * not true, since edits may put data after the trailer
70502709
 	xreflength = (size_t)(trailerstart - xrefstart);
6c9dc98d
 	bytesleft -= xreflength;
ef8219b8
 	 */
6c9dc98d
 
3470220c
 	files = 0;
 
a5afcb67
 	rc = CL_CLEAN;
 
6c9dc98d
 	/*
ef8219b8
 	 * The body section consists of a sequence of indirect objects
6c9dc98d
 	 */
d070d475
 	while((p < xrefstart) && (cli_checklimits("cli_pdf", ctx, 0, 0, 0)==CL_CLEAN) &&
bf3e4471
 	      ((q = pdf_nextobject(p, bytesleft)) != NULL)) {
f97bcc8a
 		int is_ascii85decode, is_flatedecode, fout, len, has_cr;
d8ab9ddc
 		/*int object_number, generation_number;*/
6c9dc98d
 		const char *objstart, *objend, *streamstart, *streamend;
d070d475
 		unsigned char *md5digest;
b432851f
 		unsigned long length, objlen, real_streamlen, calculated_streamlen;
1eceda0e
 		int is_embedded_font, predictor;
240d3307
 		char fullname[NAME_MAX + 1];
f53acfcd
 
a5afcb67
 		rc = CL_CLEAN;
ef8219b8
 		if(q == xrefstart)
 			break;
 		if(memcmp(q, "xref", 4) == 0)
 			break;
616fd006
 
 		/*object_number = atoi(q);*/
8affc406
 		bytesleft -= (off_t)(q - p);
616fd006
 		p = q;
 
 		if(memcmp(q, "endobj", 6) == 0)
 			continue;
ef8219b8
 		if(!isdigit(*q)) {
dbfb485b
 			cli_dbgmsg("cli_pdf: Object number missing\n");
ef8219b8
 			break;
 		}
 		q = pdf_nextobject(p, bytesleft);
 		if((q == NULL) || !isdigit(*q)) {
dbfb485b
 			cli_dbgmsg("cli_pdf: Generation number missing\n");
ef8219b8
 			break;
 		}
a5f514a4
 		/*generation_number = atoi(q);*/
8affc406
 		bytesleft -= (off_t)(q - p);
ef8219b8
 		p = q;
 
 		q = pdf_nextobject(p, bytesleft);
 		if((q == NULL) || (memcmp(q, "obj", 3) != 0)) {
dbfb485b
 			cli_dbgmsg("cli_pdf: Indirect object missing \"obj\"\n");
ef8219b8
 			break;
 		}
 
8affc406
 		bytesleft -= (off_t)((q - p) + 3);
ef8219b8
 		objstart = p = &q[3];
6c9dc98d
 		objend = cli_pmemstr(p, bytesleft, "endobj", 6);
 		if(objend == NULL) {
dbfb485b
 			cli_dbgmsg("cli_pdf: No matching endobj\n");
240d3307
 			break;
 		}
8affc406
 		bytesleft -= (off_t)((objend - p) + 6);
6c9dc98d
 		p = &objend[6];
b432851f
 		objlen = (unsigned long)(objend - objstart);
240d3307
 
6c9dc98d
 		/* Is this object a stream? */
bce73fe9
 		streamstart = cli_pmemstr(objstart, objlen, "stream", 6);
 		if(streamstart == NULL)
 			continue;
240d3307
 
1eceda0e
 		is_embedded_font = length = is_ascii85decode =
 			is_flatedecode = 0;
 		predictor = 1;
 
bce73fe9
 		/*
 		 * TODO: handle F and FFilter?
 		 */
9be10a55
 		q = objstart;
 		while(q < streamstart) {
ef8219b8
 			if(*q == '/') {	/* name object */
f53acfcd
 				/*cli_dbgmsg("Name object %8.8s\n", q+1, q+1);*/
6c9dc98d
 				if(strncmp(++q, "Length ", 7) == 0) {
 					q += 7;
 					length = atoi(q);
 					while(isdigit(*q))
 						q++;
f97bcc8a
 					/*
 					 * Note: incremental updates are not
 					 *	supported
 					 */
 					if((bytesleft > 11) && strncmp(q, " 0 R", 4) == 0) {
7bc22596
 						const char *r, *nq;
 						int opt_failed = 0;
51d1895a
 						size_t len;
f0506577
 						char b[14];
f97bcc8a
 
 						q += 4;
dbfb485b
 						cli_dbgmsg("cli_pdf: Length is in indirect obj %lu\n",
f97bcc8a
 							length);
 						snprintf(b, sizeof(b),
7bc22596
 							"%lu 0 obj", length);
b432851f
 						length = (unsigned long)strlen(b);
7bc22596
 						/* optimization: assume objects
 						 * are sequential */
 						nq = q;
51d1895a
 						len = buf + size - q;
7bc22596
 						do {
 							r = cli_pmemstr(nq, len, b, length);
 							if (r > nq) {
 								const char x = *(r-1);
 								if (x == '\n' || x=='\r') {
 									--r;
 									break;
 								}
 							}
 							if (r) {
 								len -= r+1-nq;
 								nq = r + 1;
 							} else if (!opt_failed) {
 								/* we failed optimized match,
 								 * try matching from the beginning
 								 */
 								len = q - buf;
 								r = nq = buf;
 								/* prevent
 								 * infloop */
 								opt_failed = 1;
 							}
 						} while (r);
f97bcc8a
 						if(r) {
 							r += length - 1;
 							r = pdf_nextobject(r, bytesleft - (r - q));
 							if(r) {
 								length = atoi(r);
 								while(isdigit(*r))
 									r++;
dbfb485b
 								cli_dbgmsg("cli_pdf: length in '%s' %lu\n",
f0506577
 									&b[1],
 									length);
f97bcc8a
 							}
 						} else
dbfb485b
 							cli_dbgmsg("cli_pdf: Couldn't find '%s'\n",
f0506577
 								&b[1]);
f97bcc8a
 					}
6c9dc98d
 					q--;
1eceda0e
 				} else if(strncmp(q, "Length2 ", 8) == 0)
 					is_embedded_font = 1;
 				else if(strncmp(q, "Predictor ", 10) == 0) {
 					q += 10;
 					predictor = atoi(q);
 					while(isdigit(*q))
 						q++;
 					q--;
6c9dc98d
 				} else if(strncmp(q, "FlateDecode", 11) == 0) {
da653b74
 					is_flatedecode = 1;
f53acfcd
 					q += 11;
 				} else if(strncmp(q, "ASCII85Decode", 13) == 0) {
da653b74
 					is_ascii85decode = 1;
6c9dc98d
 					q += 13;
240d3307
 				}
 			}
ef8219b8
 			q = pdf_nextobject(q, (size_t)(streamstart - q));
9be10a55
 			if(q == NULL)
 				break;
 		}
ce42a31a
 
1eceda0e
 		if(is_embedded_font) {
 			/*
 			 * Need some documentation, the only I can find a
1299feef
 			 * reference to is not free, if some kind soul wishes
1eceda0e
 			 * to donate a copy, please contact me!
 			 * (http://safari.adobepress.com/0321304748)
 			 */
 			if(!printed_embedded_font_message) {
dbfb485b
 				cli_dbgmsg("cli_pdf: Embedded fonts not yet supported\n");
1eceda0e
 				printed_embedded_font_message = 1;
 			}
 			continue;
 		}
 		if(predictor > 1) {
 			/*
 			 * Needs some thought
 			 */
 			if(!printed_predictor_message) {
dbfb485b
 				cli_dbgmsg("cli_pdf: Predictor %d not honoured for embedded image\n",
1eceda0e
 					predictor);
 				printed_predictor_message = 1;
 			}
 			continue;
 		}
 
6c9dc98d
 		/* objend points to the end of the object (start of "endobj") */
 		streamstart += 6;	/* go past the word "stream" */
 		len = (int)(objend - streamstart);
 		q = pdf_nextlinestart(streamstart, len);
 		if(q == NULL)
bce73fe9
 			break;
6c9dc98d
 		len -= (int)(q - streamstart);
 		streamstart = q;
 		streamend = cli_pmemstr(streamstart, len, "endstream\n", 10);
 		if(streamend == NULL) {
 			streamend = cli_pmemstr(streamstart, len, "endstream\r", 10);
 			if(streamend == NULL) {
dbfb485b
 				cli_dbgmsg("cli_pdf: No endstream\n");
0a097146
 				break;
 			}
f97bcc8a
 			has_cr = 1;
918f7aaa
 		} else
 			has_cr = 0;
d0d1afd7
 		snprintf(fullname, sizeof(fullname), "%s/pdf%02u", dir, files);
 		fout = open(fullname, O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
240d3307
 		if(fout < 0) {
e68d70e7
 			char err[128];
 			cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err)));
bbc4f890
 			rc = CL_ETMPFILE;
 			break;
240d3307
 		}
 
1eceda0e
 		/*
 		 * Calculate the length ourself, the Length parameter is often
 		 * wrong
 		 */
d9781001
 		if((*--streamend != '\n') && (*streamend != '\r'))
39327ef2
 			streamend++;
f97bcc8a
 		else if(has_cr && (*--streamend != '\r'))
39327ef2
 			streamend++;
1eceda0e
 
 		if(streamend <= streamstart) {
bf3e4471
 			close(fout);
dbfb485b
 			cli_dbgmsg("cli_pdf: Empty stream\n");
997a0e0b
 			if (cli_unlink(fullname)) {
871177cd
 				rc = CL_EUNLINK;
997a0e0b
 				break;
 			}
1eceda0e
 			continue;
 		}
f97bcc8a
 		calculated_streamlen = (int)(streamend - streamstart);
1eceda0e
 		real_streamlen = length;
 
dbfb485b
 		cli_dbgmsg("cli_pdf: length %lu, calculated_streamlen %lu isFlate %d isASCII85 %d\n",
1eceda0e
 			length, calculated_streamlen,
 			is_flatedecode, is_ascii85decode);
bce73fe9
 
b17efc99
 		if(calculated_streamlen != real_streamlen) {
 			cli_dbgmsg("cli_pdf: Incorrect Length field in file attempting to recover\n");
 			if(real_streamlen > calculated_streamlen)
 				real_streamlen = calculated_streamlen;
 		}
bce73fe9
 #if	0
 		/* FIXME: this isn't right... */
 		if(length)
 			/*streamlen = (is_flatedecode) ? length : MIN(length, streamlen);*/
 			streamlen = MIN(length, streamlen);
 #endif
 
da653b74
 		if(is_ascii85decode) {
41273d08
 			unsigned char *tmpbuf;
d070d475
 			int ret = cli_checklimits("cli_pdf", ctx, calculated_streamlen * 5, calculated_streamlen, real_streamlen);
86e209d6
 
 			if(ret != CL_CLEAN) {
 				close(fout);
997a0e0b
 				if (cli_unlink(fullname)) {
871177cd
 					rc = CL_EUNLINK;
997a0e0b
 					break;
 				}
86e209d6
 				continue;
 			}
 
 			tmpbuf = cli_malloc(calculated_streamlen * 5);
550ee789
 
1160fc1d
 			if(tmpbuf == NULL) {
b8705ec8
 				close(fout);
997a0e0b
 				if (cli_unlink(fullname)) {
871177cd
 					rc = CL_EUNLINK;
997a0e0b
 					break;
 				}
1160fc1d
 				continue;
 			}
 
1eceda0e
 			ret = ascii85decode(streamstart, calculated_streamlen, tmpbuf);
bbc4f890
 
bce73fe9
 			if(ret == -1) {
da653b74
 				free(tmpbuf);
b8705ec8
 				close(fout);
997a0e0b
 				if (cli_unlink(fullname)) {
871177cd
 					rc = CL_EUNLINK;
997a0e0b
 					break;
 				}
240d3307
 				continue;
 			}
44399452
 			if(ret) {
9443ec4a
 				unsigned char *t;
b432851f
 
 				real_streamlen = ret;
44399452
 				/* free unused trailing bytes */
dbfb485b
 				t = (unsigned char *)cli_realloc(tmpbuf,calculated_streamlen);
b432851f
 				if(t == NULL) {
 					free(tmpbuf);
 					close(fout);
997a0e0b
 					if (cli_unlink(fullname)) {
871177cd
 						rc = CL_EUNLINK;
997a0e0b
 						break;
 					}
b432851f
 					continue;
 				}
 				tmpbuf = t;
44399452
 				/*
 				 * Note that it will probably be both
 				 * ascii85encoded and flateencoded
 				 */
86e209d6
 
21e605f4
 				if(is_flatedecode)
 					rc = try_flatedecode((unsigned char *)tmpbuf, real_streamlen, real_streamlen, fout, ctx);
 				else
871177cd
 				  rc = (unsigned long)cli_writen(fout, (const char *)streamstart, real_streamlen)==real_streamlen ? CL_CLEAN : CL_EWRITE;
550ee789
 			}
da653b74
 			free(tmpbuf);
86e209d6
 		} else if(is_flatedecode) {
21e605f4
 			rc = try_flatedecode((unsigned char *)streamstart, real_streamlen, calculated_streamlen, fout, ctx);
 
86e209d6
 		} else {
95e11e5a
 			cli_dbgmsg("cli_pdf: writing %lu bytes from the stream\n",
1eceda0e
 				(unsigned long)real_streamlen);
d070d475
 			if((rc = cli_checklimits("cli_pdf", ctx, real_streamlen, 0, 0))==CL_CLEAN)
871177cd
 				rc = (unsigned long)cli_writen(fout, (const char *)streamstart, real_streamlen) == real_streamlen ? CL_CLEAN : CL_EWRITE;
88fbd274
 		}
240d3307
 
d070d475
 		if (rc == CL_CLEAN) {
db9d275c
 			cli_dbgmsg("cli_pdf: extracted file %u to %s\n", files, fullname);
 			files++;
d070d475
 	
 			lseek(fout, 0, SEEK_SET);
e74cdbc2
 			if((md5digest = cli_md5digest(fout))) {
 				unsigned int i;
 				char md5str[33];
d070d475
 
e74cdbc2
 				for(i = 0; i < 16; i++)
 					sprintf(md5str + 2*i, "%02x", md5digest[i]);
 				md5str[32] = 0;
d070d475
 				free(md5digest);
 
e74cdbc2
 				if(tableFind(md5table, md5str) >= 0) {
 					cli_dbgmsg("cli_pdf: not scanning duplicate embedded file '%s'\n", fullname);
6f467453
 					ctx->scannedfiles++;
e74cdbc2
 					close(fout);
997a0e0b
 					if (cli_unlink(fullname)) {
871177cd
 						rc = CL_EUNLINK;
997a0e0b
 						break;
 					}
e74cdbc2
 					continue;
 				} else
 					tableInsert(md5table, md5str, 1);
 			}
d070d475
 
 			lseek(fout, 0, SEEK_SET);
 			rc = cli_magic_scandesc(fout, ctx);
 		}
240d3307
 		close(fout);
33068e09
 		if(!ctx->engine->keeptmp)
871177cd
 			if (cli_unlink(fullname)) rc = CL_EUNLINK;
d070d475
 		if(rc != CL_CLEAN) break;
240d3307
 	}
 
dbfb485b
 	munmap(buf, size);
0a097146
 
ff7d16a7
 	tableDestroy(md5table);
 
bbc4f890
 	cli_dbgmsg("cli_pdf: returning %d\n", rc);
550ee789
 	return rc;
d056cc17
 }
da653b74
 
21e605f4
 /*
871177cd
  * flate inflation
21e605f4
  */
da653b74
 static int
96522097
 try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fout, cli_ctx *ctx)
1eceda0e
 {
86e209d6
 	int ret = cli_checklimits("cli_pdf", ctx, real_len, 0, 0);
1eceda0e
 
86e209d6
 	if (ret==CL_CLEAN && flatedecode(buf, real_len, fout, ctx) == CL_SUCCESS)
d070d475
 		return CL_CLEAN;
1eceda0e
 
9e3242ca
 	if(real_len == calculated_len) {
 		/*
 		 * Nothing more we can do to inflate
 		 */
dbfb485b
 		cli_dbgmsg("cli_pdf: Bad compression in flate stream\n");
 		return CL_CLEAN;
9e3242ca
 	}
1eceda0e
 
86e209d6
 	if(cli_checklimits("cli_pdf", ctx, calculated_len, 0, 0)!=CL_CLEAN)
 		return CL_CLEAN;
 
f97bcc8a
 	ret = flatedecode(buf, calculated_len, fout, ctx);
d070d475
 	if(ret == CL_CLEAN)
 		return CL_CLEAN;
f97bcc8a
 
 	/* i.e. the PDF file is broken :-( */
dbfb485b
 	cli_dbgmsg("cli_pdf: Bad compressed block length in flate stream\n");
f97bcc8a
 
 	return ret;
1eceda0e
 }
 
 static int
96522097
 flatedecode(unsigned char *buf, off_t len, int fout, cli_ctx *ctx)
da653b74
 {
b80ae277
 	int zstat, ret;
4c32a40d
 	off_t nbytes;
da653b74
 	z_stream stream;
 	unsigned char output[BUFSIZ];
1eceda0e
 #ifdef	SAVE_TMP
 	char tmpfilename[16];
 	int tmpfd;
 #endif
da653b74
 
ed6446ff
 	cli_dbgmsg("cli_pdf: flatedecode %lu bytes\n", (unsigned long)len);
da653b74
 
f0506577
 	if(len == 0) {
dbfb485b
 		cli_dbgmsg("cli_pdf: flatedecode len == 0\n");
21e605f4
 		return CL_CLEAN;
f0506577
 	}
 
1eceda0e
 #ifdef	SAVE_TMP
 	/*
 	 * Copy the embedded area for debugging, so that if it falls over
 	 * we have a copy of the offending data. This is debugging code
 	 * that you shouldn't of course install in a live environment. I am
 	 * not interested in hearing about security issues with this section
 	 * of the parser.
 	 */
 	strcpy(tmpfilename, "/tmp/pdfXXXXXX");
 	tmpfd = mkstemp(tmpfilename);
 	if(tmpfd < 0) {
 		perror(tmpfilename);
dbfb485b
 		cli_errmsg("cli_pdf: Can't make debugging file\n");
1eceda0e
 	} else {
 		FILE *tmpfp = fdopen(tmpfd, "w");
 
 		if(tmpfp) {
 			fwrite(buf, sizeof(char), len, tmpfp);
 			fclose(tmpfp);
39327ef2
 			cli_dbgmsg("cli_pdf: flatedecode: debugging file is %s\n",
 				tmpfilename);
1eceda0e
 		} else
 			cli_errmsg("cli_pdf: can't fdopen debugging file\n");
 	}
 #endif
da653b74
 	stream.zalloc = (alloc_func)Z_NULL;
 	stream.zfree = (free_func)Z_NULL;
 	stream.opaque = (void *)NULL;
95e11e5a
 	stream.next_in = (Bytef *)buf;
da653b74
 	stream.avail_in = len;
501e5d12
 	stream.next_out = output;
 	stream.avail_out = sizeof(output);
da653b74
 
 	zstat = inflateInit(&stream);
 	if(zstat != Z_OK) {
1405207a
 		cli_warnmsg("cli_pdf: inflateInit failed\n");
dbfb485b
 		return CL_EMEM;
da653b74
 	}
9f2bc4ca
 
4c32a40d
 	nbytes = 0;
9f2bc4ca
 
918f7aaa
 	while(stream.avail_in) {
72910996
 		zstat = inflate(&stream, Z_NO_FLUSH);	/* zlib */
da653b74
 		switch(zstat) {
 			case Z_OK:
1160fc1d
 				if(stream.avail_out == 0) {
dbfb485b
 				  	int written;
 					if ((written=cli_writen(fout, output, sizeof(output)))!=sizeof(output)) {
 						cli_errmsg("cli_pdf: failed to write output file\n");
 						inflateEnd(&stream);
871177cd
 						return CL_EWRITE;
dbfb485b
 					}
 					nbytes += written;
9f2bc4ca
 
d91ab809
 					if((ret=cli_checklimits("cli_pdf", ctx, nbytes, 0, 0))!=CL_CLEAN) {
4c32a40d
 						inflateEnd(&stream);
b80ae277
 						return ret;
4c32a40d
 					}
1160fc1d
 					stream.next_out = output;
501e5d12
 					stream.avail_out = sizeof(output);
1160fc1d
 				}
da653b74
 				continue;
 			case Z_STREAM_END:
 				break;
 			default:
fb53f48e
 				if(stream.msg)
dbfb485b
 					cli_dbgmsg("cli_pdf: after writing %lu bytes, got error \"%s\" inflating PDF attachment\n",
ed6446ff
 						(unsigned long)nbytes,
 						stream.msg);
fb53f48e
 				else
dbfb485b
 					cli_dbgmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF attachment\n",
ed6446ff
 						(unsigned long)nbytes, zstat);
da653b74
 				inflateEnd(&stream);
dbfb485b
 				return CL_CLEAN;
da653b74
 		}
 		break;
 	}
 
dbfb485b
 	if(stream.avail_out != sizeof(output)) {
 		if(cli_writen(fout, output, sizeof(output) - stream.avail_out) < 0) {
 			cli_errmsg("cli_pdf: failed to write output file\n");
 			inflateEnd(&stream);
871177cd
 			return CL_EWRITE;
dbfb485b
 		}
 	}
 			
1eceda0e
 #ifdef	SAVE_TMP
997a0e0b
 	if (cli_unlink(tmpfilename)) {
 		inflateEnd(&stream);
871177cd
 		return CL_EUNLINK;
997a0e0b
 	}
1eceda0e
 #endif
dbfb485b
 	inflateEnd(&stream);
 	return CL_CLEAN;
da653b74
 }
 
67355216
 /*
  * ascii85 inflation, returns number of bytes in output, -1 for error
  *
  * See http://www.piclist.com/techref/method/encode.htm (look for base85)
  */
da653b74
 static int
b02bab2b
 ascii85decode(const char *buf, off_t len, unsigned char *output)
da653b74
 {
67355216
 	const char *ptr;
da653b74
 	uint32_t sum = 0;
 	int quintet = 0;
 	int ret = 0;
 
67355216
 	if(cli_pmemstr(buf, len, "~>", 2) == NULL)
dbfb485b
 		cli_dbgmsg("cli_pdf: ascii85decode: no EOF marker found\n");
67355216
 
 	ptr = buf;
 
ed6446ff
 	cli_dbgmsg("cli_pdf: ascii85decode %lu bytes\n", (unsigned long)len);
da653b74
 
bce73fe9
 	while(len > 0) {
 		int byte = (len--) ? (int)*ptr++ : EOF;
da653b74
 
 		if((byte == '~') && (*ptr == '>'))
 			byte = EOF;
 
 		if(byte >= '!' && byte <= 'u') {
3fe56d48
 			sum = (sum * 85) + ((uint32_t)byte - '!');
da653b74
 			if(++quintet == 5) {
e8130f50
 				*output++ = (unsigned char)(sum >> 24);
 				*output++ = (unsigned char)((sum >> 16) & 0xFF);
 				*output++ = (unsigned char)((sum >> 8) & 0xFF);
 				*output++ = (unsigned char)(sum & 0xFF);
da653b74
 				ret += 4;
 				quintet = 0;
 				sum = 0;
 			}
 		} else if(byte == 'z') {
 			if(quintet) {
dbfb485b
 				cli_dbgmsg("ascii85decode: unexpected 'z'\n");
da653b74
 				return -1;
 			}
 			*output++ = '\0';
 			*output++ = '\0';
 			*output++ = '\0';
 			*output++ = '\0';
 			ret += 4;
 		} else if(byte == EOF) {
67355216
 			cli_dbgmsg("ascii85decode: quintet %d\n", quintet);
da653b74
 			if(quintet) {
 				int i;
 
 				if(quintet == 1) {
dbfb485b
 					cli_dbgmsg("ascii85Decode: only 1 byte in last quintet\n");
da653b74
 					return -1;
 				}
3fe56d48
 				for(i = quintet; i < 5; i++)
 					sum *= 85;
 
da653b74
 				if(quintet > 1)
 					sum += (0xFFFFFF >> ((quintet - 2) * 8));
 				ret += quintet;
 				for(i = 0; i < quintet - 1; i++)
e8130f50
 					*output++ = (unsigned char)((sum >> (24 - 8 * i)) & 0xFF);
da653b74
 				quintet = 0;
 			}
6c9dc98d
 			len = 0;
da653b74
 			break;
 		} else if(!isspace(byte)) {
dbfb485b
 			cli_dbgmsg("ascii85Decode: invalid character 0x%x, len %lu\n",
95e11e5a
 				byte & 0xFF, (unsigned long)len);
da653b74
 			return -1;
 		}
 	}
 	return ret;
 }
bce73fe9
 
 /*
  * Find the start of the next line
  */
 static const char *
 pdf_nextlinestart(const char *ptr, size_t len)
 {
 	while(strchr("\r\n", *ptr) == NULL) {
 		if(--len == 0L)
 			return NULL;
 		ptr++;
 	}
 	while(strchr("\r\n", *ptr) != NULL) {
 		if(--len == 0L)
 			return NULL;
 		ptr++;
 	}
 	return ptr;
 }
9be10a55
 
ef8219b8
 /*
  * Return the start of the next PDF object.
  * This assumes that we're not in a stream.
  */
 static const char *
 pdf_nextobject(const char *ptr, size_t len)
 {
 	const char *p;
 	int inobject = 1;
 
 	while(len) {
 		switch(*ptr) {
 			case '\n':
 			case '\r':
 			case '%':	/* comment */
 				p = pdf_nextlinestart(ptr, len);
 				if(p == NULL)
 					return NULL;
 				len -= (size_t)(p - ptr);
 				ptr = p;
 				inobject = 0;
 				break;
 
9be10a55
 			case ' ':
 			case '\t':
f53acfcd
 			case '[':	/* Start of an array object */
ef8219b8
 			case '\v':
 			case '\f':
1eceda0e
 			case '<':	/* Start of a dictionary object */
ef8219b8
 				inobject = 0;
9be10a55
 				ptr++;
 				len--;
 				break;
1eceda0e
 			case '/':	/* Start of a name object */
 				return ptr;
9be10a55
 			default:
ef8219b8
 				if(!inobject)
 					/* TODO: parse and return object type */
9be10a55
 					return ptr;
 				ptr++;
 				len--;
 		}
 	}
 	return NULL;
 }
ceabee13
 
 /*
  * like cli_memstr - but returns the location of the match
  * FIXME: need a case insensitive version
  */
 static const char *
 cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns)
 {
 	const char *pt, *hay;
 	size_t n;
 
 	if(haystack == needle)
 		return haystack;
 
 	if(hs < ns)
 		return NULL;
 
 	if(memcmp(haystack, needle, ns) == 0)
 		return haystack;
 
 	pt = hay = haystack;
 	n = hs;
 
 	while((pt = memchr(hay, needle[0], n)) != NULL) {
f2ba44ae
 		n -= (size_t)(pt - hay);
ceabee13
 		if(n < ns)
 			break;
 
 		if(memcmp(pt, needle, ns) == 0)
 			return pt;
 
 		if(hay == pt) {
 			n--;
 			hay++;
 		} else
 			hay = pt;
 	}
 
 	return NULL;
 }
8b6f8404
 #else	/*!HAVE_MMAP*/
83d14d9a
 
 #include "clamav.h"
 #include "others.h"
 #include "pdf.h"
 
8b6f8404
 int
72ce4b70
 cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset)
8b6f8404
 {
dbfb485b
 	cli_dbgmsg("File not decoded - PDF decoding needs mmap() (for now)\n");
8b6f8404
 	return CL_CLEAN;
 }
 #endif