d056cc17 |
/* |
2023340a |
* Copyright (C) 2007-2008 Sourcefire, Inc.
*
* Authors: Nigel Horne |
d056cc17 |
*
* This program is free software; you can redistribute it and/or modify |
2023340a |
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation. |
d056cc17 |
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software |
2023340a |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA. |
1eceda0e |
*
* TODO: Embedded fonts
* TODO: Predictor image handling |
d056cc17 |
*/ |
95e11e5a |
static char const rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $"; |
d056cc17 |
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
|
7f49ea4b |
#ifdef HAVE_MMAP |
240d3307 |
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <ctype.h>
#include <string.h>
#include <fcntl.h>
#include <stdlib.h> |
511a59c7 |
#include <errno.h> |
ed6446ff |
#ifdef HAVE_LIMITS_H
#include <limits.h>
#endif |
9443ec4a |
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif |
ed6446ff |
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif |
240d3307 |
#ifdef HAVE_ZLIB_H
#include <zlib.h>
#endif
|
925ece3d |
#ifdef C_WINDOWS
#include <io.h>
#endif
|
ed6446ff |
#include "clamav.h"
#include "others.h" |
240d3307 |
#include "mbox.h" |
654c0b96 |
#include "pdf.h" |
a5afcb67 |
#include "scanners.h" |
240d3307 |
|
d0d1afd7 |
#ifndef O_BINARY
#define O_BINARY 0
#endif
|
1eceda0e |
#ifdef CL_DEBUG |
5cd3f734 |
/*#define SAVE_TMP
*Save the file being worked on in tmp */ |
1eceda0e |
#endif
|
96522097 |
static int try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fout, cli_ctx *ctx);
static int flatedecode(unsigned char *buf, off_t len, int fout, cli_ctx *ctx); |
b02bab2b |
static int ascii85decode(const char *buf, off_t len, unsigned char *output); |
bce73fe9 |
static const char *pdf_nextlinestart(const char *ptr, size_t len); |
ef8219b8 |
static const char *pdf_nextobject(const char *ptr, size_t len); |
ceabee13 |
static const char *cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns); |
da653b74 |
|
144df7c1 |
/*
* TODO: handle embedded URLs if (options&CL_SCAN_MAILURL)
*/ |
d056cc17 |
int |
72ce4b70 |
cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
d056cc17 |
{ |
6c9dc98d |
off_t size; /* total number of bytes in the file */ |
8affc406 |
off_t bytesleft, trailerlength; |
dbfb485b |
char *buf; /* start of memory mapped area */ |
bce73fe9 |
const char *p, *q, *trailerstart; |
6c9dc98d |
const char *xrefstart; /* cross reference table */ |
70502709 |
/*size_t xreflength;*/ |
b432851f |
table_t *md5table; |
a5afcb67 |
int printed_predictor_message, printed_embedded_font_message, rc; |
3470220c |
unsigned int files; |
bf3e4471 |
struct stat statb; |
240d3307 |
|
925ece3d |
cli_dbgmsg("in cli_pdf(%s)\n", dir); |
798308de |
|
dbfb485b |
if(fstat(desc, &statb) < 0) {
cli_errmsg("cli_pdf: fstat() failed\n"); |
240d3307 |
return CL_EOPEN; |
dbfb485b |
} |
240d3307 |
|
72ce4b70 |
size = statb.st_size - offset; |
240d3307 |
|
139823ca |
if(size <= 7) /* doesn't even include the file header */ |
dbfb485b |
return CL_CLEAN; |
139823ca |
|
72ce4b70 |
p = buf = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, offset); |
dbfb485b |
if(buf == MAP_FAILED) {
cli_errmsg("cli_pdf: mmap() failed\n"); |
240d3307 |
return CL_EMEM; |
bf3e4471 |
}
|
95e11e5a |
cli_dbgmsg("cli_pdf: scanning %lu bytes\n", (unsigned long)size); |
0a097146 |
|
139823ca |
/* Lines are terminated by \r, \n or both */
/* File Header */ |
72ce4b70 |
bytesleft = size - 5;
for(q = p; bytesleft; bytesleft--, q++) {
if(!strncasecmp(q, "%PDF-", 5)) {
bytesleft = size - (off_t) (q - p);
p = q;
break;
} |
139823ca |
}
|
72ce4b70 |
if(!bytesleft) {
munmap(buf, size);
cli_dbgmsg("cli_pdf: file header not found\n");
return CL_CLEAN; |
139823ca |
}
/* Find the file trailer */ |
72ce4b70 |
for(q = &p[bytesleft - 5]; q > p; --q)
if(strncasecmp(q, "%%EOF", 5) == 0) |
139823ca |
break;
|
7fc055e6 |
if(q <= p) { |
dbfb485b |
munmap(buf, size);
cli_dbgmsg("cli_pdf: trailer not found\n");
return CL_CLEAN; |
139823ca |
}
|
b533a221 |
for(trailerstart = &q[-7]; trailerstart > p; --trailerstart) |
bce73fe9 |
if(memcmp(trailerstart, "trailer", 7) == 0) |
139823ca |
break;
/* |
bce73fe9 |
* q points to the end of the trailer section |
139823ca |
*/ |
bce73fe9 |
trailerlength = (long)(q - trailerstart);
if(cli_pmemstr(trailerstart, trailerlength, "Encrypt", 7)) { |
501e5d12 |
/*
* This tends to mean that the file is, in effect, read-only |
9fe789f8 |
* http://www.cs.cmu.edu/~dst/Adobe/Gallery/anon21jul01-pdf-encryption.txt
* http://www.adobe.com/devnet/pdf/ |
501e5d12 |
*/ |
dbfb485b |
munmap(buf, size);
cli_dbgmsg("cli_pdf: Encrypted PDF files not yet supported\n");
return CL_CLEAN; |
501e5d12 |
}
|
ef8219b8 |
/*
* not true, since edits may put data after the trailer |
bce73fe9 |
bytesleft -= trailerlength; |
ef8219b8 |
*/ |
bce73fe9 |
|
76fb2ef1 |
/*
* FIXME: Handle more than one xref section in the xref table
*/ |
6c9dc98d |
for(xrefstart = trailerstart; xrefstart > p; --xrefstart)
if(memcmp(xrefstart, "xref", 4) == 0) |
76fb2ef1 |
/*
* Make sure it's the start of the line, not a startxref
* token
*/
if((xrefstart[-1] == '\n') || (xrefstart[-1] == '\r'))
break; |
6c9dc98d |
if(xrefstart == p) { |
dbfb485b |
munmap(buf, size);
cli_dbgmsg("cli_pdf: xref not found\n");
return CL_CLEAN; |
6c9dc98d |
}
|
1eceda0e |
printed_predictor_message = printed_embedded_font_message = 0;
|
ff7d16a7 |
md5table = tableCreate(); |
ef8219b8 |
/*
* not true, since edits may put data after the trailer |
70502709 |
xreflength = (size_t)(trailerstart - xrefstart); |
6c9dc98d |
bytesleft -= xreflength; |
ef8219b8 |
*/ |
6c9dc98d |
|
3470220c |
files = 0;
|
a5afcb67 |
rc = CL_CLEAN;
|
6c9dc98d |
/* |
ef8219b8 |
* The body section consists of a sequence of indirect objects |
6c9dc98d |
*/ |
d070d475 |
while((p < xrefstart) && (cli_checklimits("cli_pdf", ctx, 0, 0, 0)==CL_CLEAN) && |
bf3e4471 |
((q = pdf_nextobject(p, bytesleft)) != NULL)) { |
f97bcc8a |
int is_ascii85decode, is_flatedecode, fout, len, has_cr; |
d8ab9ddc |
/*int object_number, generation_number;*/ |
6c9dc98d |
const char *objstart, *objend, *streamstart, *streamend; |
d070d475 |
unsigned char *md5digest; |
b432851f |
unsigned long length, objlen, real_streamlen, calculated_streamlen; |
1eceda0e |
int is_embedded_font, predictor; |
240d3307 |
char fullname[NAME_MAX + 1]; |
f53acfcd |
|
a5afcb67 |
rc = CL_CLEAN; |
ef8219b8 |
if(q == xrefstart)
break;
if(memcmp(q, "xref", 4) == 0)
break; |
616fd006 |
/*object_number = atoi(q);*/ |
8affc406 |
bytesleft -= (off_t)(q - p); |
616fd006 |
p = q;
if(memcmp(q, "endobj", 6) == 0)
continue; |
ef8219b8 |
if(!isdigit(*q)) { |
dbfb485b |
cli_dbgmsg("cli_pdf: Object number missing\n"); |
ef8219b8 |
break;
}
q = pdf_nextobject(p, bytesleft);
if((q == NULL) || !isdigit(*q)) { |
dbfb485b |
cli_dbgmsg("cli_pdf: Generation number missing\n"); |
ef8219b8 |
break;
} |
a5f514a4 |
/*generation_number = atoi(q);*/ |
8affc406 |
bytesleft -= (off_t)(q - p); |
ef8219b8 |
p = q;
q = pdf_nextobject(p, bytesleft);
if((q == NULL) || (memcmp(q, "obj", 3) != 0)) { |
dbfb485b |
cli_dbgmsg("cli_pdf: Indirect object missing \"obj\"\n"); |
ef8219b8 |
break;
}
|
8affc406 |
bytesleft -= (off_t)((q - p) + 3); |
ef8219b8 |
objstart = p = &q[3]; |
6c9dc98d |
objend = cli_pmemstr(p, bytesleft, "endobj", 6);
if(objend == NULL) { |
dbfb485b |
cli_dbgmsg("cli_pdf: No matching endobj\n"); |
240d3307 |
break;
} |
8affc406 |
bytesleft -= (off_t)((objend - p) + 6); |
6c9dc98d |
p = &objend[6]; |
b432851f |
objlen = (unsigned long)(objend - objstart); |
240d3307 |
|
6c9dc98d |
/* Is this object a stream? */ |
bce73fe9 |
streamstart = cli_pmemstr(objstart, objlen, "stream", 6);
if(streamstart == NULL)
continue; |
240d3307 |
|
1eceda0e |
is_embedded_font = length = is_ascii85decode =
is_flatedecode = 0;
predictor = 1;
|
bce73fe9 |
/*
* TODO: handle F and FFilter?
*/ |
9be10a55 |
q = objstart;
while(q < streamstart) { |
ef8219b8 |
if(*q == '/') { /* name object */ |
f53acfcd |
/*cli_dbgmsg("Name object %8.8s\n", q+1, q+1);*/ |
6c9dc98d |
if(strncmp(++q, "Length ", 7) == 0) {
q += 7;
length = atoi(q);
while(isdigit(*q))
q++; |
f97bcc8a |
/*
* Note: incremental updates are not
* supported
*/
if((bytesleft > 11) && strncmp(q, " 0 R", 4) == 0) { |
7bc22596 |
const char *r, *nq;
int opt_failed = 0; |
51d1895a |
size_t len; |
f0506577 |
char b[14]; |
f97bcc8a |
q += 4; |
dbfb485b |
cli_dbgmsg("cli_pdf: Length is in indirect obj %lu\n", |
f97bcc8a |
length);
snprintf(b, sizeof(b), |
7bc22596 |
"%lu 0 obj", length); |
b432851f |
length = (unsigned long)strlen(b); |
7bc22596 |
/* optimization: assume objects
* are sequential */
nq = q; |
51d1895a |
len = buf + size - q; |
7bc22596 |
do {
r = cli_pmemstr(nq, len, b, length);
if (r > nq) {
const char x = *(r-1);
if (x == '\n' || x=='\r') {
--r;
break;
}
}
if (r) {
len -= r+1-nq;
nq = r + 1;
} else if (!opt_failed) {
/* we failed optimized match,
* try matching from the beginning
*/
len = q - buf;
r = nq = buf;
/* prevent
* infloop */
opt_failed = 1;
}
} while (r); |
f97bcc8a |
if(r) {
r += length - 1;
r = pdf_nextobject(r, bytesleft - (r - q));
if(r) {
length = atoi(r);
while(isdigit(*r))
r++; |
dbfb485b |
cli_dbgmsg("cli_pdf: length in '%s' %lu\n", |
f0506577 |
&b[1],
length); |
f97bcc8a |
}
} else |
dbfb485b |
cli_dbgmsg("cli_pdf: Couldn't find '%s'\n", |
f0506577 |
&b[1]); |
f97bcc8a |
} |
6c9dc98d |
q--; |
1eceda0e |
} else if(strncmp(q, "Length2 ", 8) == 0)
is_embedded_font = 1;
else if(strncmp(q, "Predictor ", 10) == 0) {
q += 10;
predictor = atoi(q);
while(isdigit(*q))
q++;
q--; |
6c9dc98d |
} else if(strncmp(q, "FlateDecode", 11) == 0) { |
da653b74 |
is_flatedecode = 1; |
f53acfcd |
q += 11;
} else if(strncmp(q, "ASCII85Decode", 13) == 0) { |
da653b74 |
is_ascii85decode = 1; |
6c9dc98d |
q += 13; |
240d3307 |
}
} |
ef8219b8 |
q = pdf_nextobject(q, (size_t)(streamstart - q)); |
9be10a55 |
if(q == NULL)
break;
} |
ce42a31a |
|
1eceda0e |
if(is_embedded_font) {
/*
* Need some documentation, the only I can find a |
1299feef |
* reference to is not free, if some kind soul wishes |
1eceda0e |
* to donate a copy, please contact me!
* (http://safari.adobepress.com/0321304748)
*/
if(!printed_embedded_font_message) { |
dbfb485b |
cli_dbgmsg("cli_pdf: Embedded fonts not yet supported\n"); |
1eceda0e |
printed_embedded_font_message = 1;
}
continue;
}
if(predictor > 1) {
/*
* Needs some thought
*/
if(!printed_predictor_message) { |
dbfb485b |
cli_dbgmsg("cli_pdf: Predictor %d not honoured for embedded image\n", |
1eceda0e |
predictor);
printed_predictor_message = 1;
}
continue;
}
|
6c9dc98d |
/* objend points to the end of the object (start of "endobj") */
streamstart += 6; /* go past the word "stream" */
len = (int)(objend - streamstart);
q = pdf_nextlinestart(streamstart, len);
if(q == NULL) |
bce73fe9 |
break; |
6c9dc98d |
len -= (int)(q - streamstart);
streamstart = q;
streamend = cli_pmemstr(streamstart, len, "endstream\n", 10);
if(streamend == NULL) {
streamend = cli_pmemstr(streamstart, len, "endstream\r", 10);
if(streamend == NULL) { |
dbfb485b |
cli_dbgmsg("cli_pdf: No endstream\n"); |
0a097146 |
break;
} |
f97bcc8a |
has_cr = 1; |
918f7aaa |
} else
has_cr = 0; |
d0d1afd7 |
snprintf(fullname, sizeof(fullname), "%s/pdf%02u", dir, files);
fout = open(fullname, O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600); |
240d3307 |
if(fout < 0) {
cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, strerror(errno)); |
bbc4f890 |
rc = CL_ETMPFILE;
break; |
240d3307 |
}
|
1eceda0e |
/*
* Calculate the length ourself, the Length parameter is often
* wrong
*/ |
d9781001 |
if((*--streamend != '\n') && (*streamend != '\r')) |
39327ef2 |
streamend++; |
f97bcc8a |
else if(has_cr && (*--streamend != '\r')) |
39327ef2 |
streamend++; |
1eceda0e |
if(streamend <= streamstart) { |
bf3e4471 |
close(fout); |
dbfb485b |
cli_dbgmsg("cli_pdf: Empty stream\n"); |
997a0e0b |
if (cli_unlink(fullname)) {
rc = CL_EIO;
break;
} |
1eceda0e |
continue;
} |
f97bcc8a |
calculated_streamlen = (int)(streamend - streamstart); |
1eceda0e |
real_streamlen = length;
|
dbfb485b |
cli_dbgmsg("cli_pdf: length %lu, calculated_streamlen %lu isFlate %d isASCII85 %d\n", |
1eceda0e |
length, calculated_streamlen,
is_flatedecode, is_ascii85decode); |
bce73fe9 |
|
b17efc99 |
if(calculated_streamlen != real_streamlen) {
cli_dbgmsg("cli_pdf: Incorrect Length field in file attempting to recover\n");
if(real_streamlen > calculated_streamlen)
real_streamlen = calculated_streamlen;
} |
bce73fe9 |
#if 0
/* FIXME: this isn't right... */
if(length)
/*streamlen = (is_flatedecode) ? length : MIN(length, streamlen);*/
streamlen = MIN(length, streamlen);
#endif
|
da653b74 |
if(is_ascii85decode) { |
41273d08 |
unsigned char *tmpbuf; |
d070d475 |
int ret = cli_checklimits("cli_pdf", ctx, calculated_streamlen * 5, calculated_streamlen, real_streamlen); |
86e209d6 |
if(ret != CL_CLEAN) {
close(fout); |
997a0e0b |
if (cli_unlink(fullname)) {
rc = CL_EIO;
break;
} |
86e209d6 |
continue;
}
tmpbuf = cli_malloc(calculated_streamlen * 5); |
550ee789 |
|
1160fc1d |
if(tmpbuf == NULL) { |
b8705ec8 |
close(fout); |
997a0e0b |
if (cli_unlink(fullname)) {
rc = CL_EIO;
break;
} |
1160fc1d |
continue;
}
|
1eceda0e |
ret = ascii85decode(streamstart, calculated_streamlen, tmpbuf); |
bbc4f890 |
|
bce73fe9 |
if(ret == -1) { |
da653b74 |
free(tmpbuf); |
b8705ec8 |
close(fout); |
997a0e0b |
if (cli_unlink(fullname)) {
rc = CL_EIO;
break;
} |
240d3307 |
continue;
} |
44399452 |
if(ret) { |
9443ec4a |
unsigned char *t; |
b432851f |
real_streamlen = ret; |
44399452 |
/* free unused trailing bytes */ |
dbfb485b |
t = (unsigned char *)cli_realloc(tmpbuf,calculated_streamlen); |
b432851f |
if(t == NULL) {
free(tmpbuf);
close(fout); |
997a0e0b |
if (cli_unlink(fullname)) {
rc = CL_EIO;
break;
} |
b432851f |
continue;
}
tmpbuf = t; |
44399452 |
/*
* Note that it will probably be both
* ascii85encoded and flateencoded
*/ |
86e209d6 |
|
21e605f4 |
if(is_flatedecode)
rc = try_flatedecode((unsigned char *)tmpbuf, real_streamlen, real_streamlen, fout, ctx);
else |
96522097 |
rc = (unsigned long)cli_writen(fout, (const char *)streamstart, real_streamlen)==real_streamlen ? CL_CLEAN : CL_EIO; |
550ee789 |
} |
da653b74 |
free(tmpbuf); |
86e209d6 |
} else if(is_flatedecode) { |
21e605f4 |
rc = try_flatedecode((unsigned char *)streamstart, real_streamlen, calculated_streamlen, fout, ctx);
|
86e209d6 |
} else { |
95e11e5a |
cli_dbgmsg("cli_pdf: writing %lu bytes from the stream\n", |
1eceda0e |
(unsigned long)real_streamlen); |
d070d475 |
if((rc = cli_checklimits("cli_pdf", ctx, real_streamlen, 0, 0))==CL_CLEAN) |
96522097 |
rc = (unsigned long)cli_writen(fout, (const char *)streamstart, real_streamlen) == real_streamlen ? CL_CLEAN : CL_EIO; |
88fbd274 |
} |
240d3307 |
|
d070d475 |
if (rc == CL_CLEAN) { |
db9d275c |
cli_dbgmsg("cli_pdf: extracted file %u to %s\n", files, fullname);
files++; |
d070d475 |
lseek(fout, 0, SEEK_SET); |
e74cdbc2 |
if((md5digest = cli_md5digest(fout))) {
unsigned int i;
char md5str[33]; |
d070d475 |
|
e74cdbc2 |
for(i = 0; i < 16; i++)
sprintf(md5str + 2*i, "%02x", md5digest[i]);
md5str[32] = 0; |
d070d475 |
free(md5digest);
|
e74cdbc2 |
if(tableFind(md5table, md5str) >= 0) {
cli_dbgmsg("cli_pdf: not scanning duplicate embedded file '%s'\n", fullname); |
6f467453 |
ctx->scannedfiles++; |
e74cdbc2 |
close(fout); |
997a0e0b |
if (cli_unlink(fullname)) {
rc = CL_EIO;
break;
} |
e74cdbc2 |
continue;
} else
tableInsert(md5table, md5str, 1);
} |
d070d475 |
lseek(fout, 0, SEEK_SET);
rc = cli_magic_scandesc(fout, ctx);
} |
240d3307 |
close(fout); |
997a0e0b |
if(!cli_leavetemps_flag)
if (cli_unlink(fullname)) rc = CL_EIO; |
d070d475 |
if(rc != CL_CLEAN) break; |
240d3307 |
}
|
dbfb485b |
munmap(buf, size); |
0a097146 |
|
ff7d16a7 |
tableDestroy(md5table);
|
bbc4f890 |
cli_dbgmsg("cli_pdf: returning %d\n", rc); |
550ee789 |
return rc; |
d056cc17 |
} |
da653b74 |
|
21e605f4 |
/*
* flate inflation - returns clamAV status, e.g CL_SUCCESS, CL_EZIP
*/ |
da653b74 |
static int |
96522097 |
try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fout, cli_ctx *ctx) |
1eceda0e |
{ |
86e209d6 |
int ret = cli_checklimits("cli_pdf", ctx, real_len, 0, 0); |
1eceda0e |
|
86e209d6 |
if (ret==CL_CLEAN && flatedecode(buf, real_len, fout, ctx) == CL_SUCCESS) |
d070d475 |
return CL_CLEAN; |
1eceda0e |
|
9e3242ca |
if(real_len == calculated_len) {
/*
* Nothing more we can do to inflate
*/ |
dbfb485b |
cli_dbgmsg("cli_pdf: Bad compression in flate stream\n");
return CL_CLEAN; |
9e3242ca |
} |
1eceda0e |
|
86e209d6 |
if(cli_checklimits("cli_pdf", ctx, calculated_len, 0, 0)!=CL_CLEAN)
return CL_CLEAN;
|
f97bcc8a |
ret = flatedecode(buf, calculated_len, fout, ctx); |
d070d475 |
if(ret == CL_CLEAN)
return CL_CLEAN; |
f97bcc8a |
/* i.e. the PDF file is broken :-( */ |
dbfb485b |
cli_dbgmsg("cli_pdf: Bad compressed block length in flate stream\n"); |
f97bcc8a |
return ret; |
1eceda0e |
}
static int |
96522097 |
flatedecode(unsigned char *buf, off_t len, int fout, cli_ctx *ctx) |
da653b74 |
{ |
b80ae277 |
int zstat, ret; |
4c32a40d |
off_t nbytes; |
da653b74 |
z_stream stream;
unsigned char output[BUFSIZ]; |
1eceda0e |
#ifdef SAVE_TMP
char tmpfilename[16];
int tmpfd;
#endif |
da653b74 |
|
ed6446ff |
cli_dbgmsg("cli_pdf: flatedecode %lu bytes\n", (unsigned long)len); |
da653b74 |
|
f0506577 |
if(len == 0) { |
dbfb485b |
cli_dbgmsg("cli_pdf: flatedecode len == 0\n"); |
21e605f4 |
return CL_CLEAN; |
f0506577 |
}
|
1eceda0e |
#ifdef SAVE_TMP
/*
* Copy the embedded area for debugging, so that if it falls over
* we have a copy of the offending data. This is debugging code
* that you shouldn't of course install in a live environment. I am
* not interested in hearing about security issues with this section
* of the parser.
*/
strcpy(tmpfilename, "/tmp/pdfXXXXXX");
tmpfd = mkstemp(tmpfilename);
if(tmpfd < 0) {
perror(tmpfilename); |
dbfb485b |
cli_errmsg("cli_pdf: Can't make debugging file\n"); |
1eceda0e |
} else {
FILE *tmpfp = fdopen(tmpfd, "w");
if(tmpfp) {
fwrite(buf, sizeof(char), len, tmpfp);
fclose(tmpfp); |
39327ef2 |
cli_dbgmsg("cli_pdf: flatedecode: debugging file is %s\n",
tmpfilename); |
1eceda0e |
} else
cli_errmsg("cli_pdf: can't fdopen debugging file\n");
}
#endif |
da653b74 |
stream.zalloc = (alloc_func)Z_NULL;
stream.zfree = (free_func)Z_NULL;
stream.opaque = (void *)NULL; |
95e11e5a |
stream.next_in = (Bytef *)buf; |
da653b74 |
stream.avail_in = len; |
501e5d12 |
stream.next_out = output;
stream.avail_out = sizeof(output); |
da653b74 |
zstat = inflateInit(&stream);
if(zstat != Z_OK) { |
1405207a |
cli_warnmsg("cli_pdf: inflateInit failed\n"); |
dbfb485b |
return CL_EMEM; |
da653b74 |
} |
9f2bc4ca |
|
4c32a40d |
nbytes = 0; |
9f2bc4ca |
|
918f7aaa |
while(stream.avail_in) { |
72910996 |
zstat = inflate(&stream, Z_NO_FLUSH); /* zlib */ |
da653b74 |
switch(zstat) {
case Z_OK: |
1160fc1d |
if(stream.avail_out == 0) { |
dbfb485b |
int written;
if ((written=cli_writen(fout, output, sizeof(output)))!=sizeof(output)) {
cli_errmsg("cli_pdf: failed to write output file\n");
inflateEnd(&stream);
return CL_EIO;
}
nbytes += written; |
9f2bc4ca |
|
d91ab809 |
if((ret=cli_checklimits("cli_pdf", ctx, nbytes, 0, 0))!=CL_CLEAN) { |
4c32a40d |
inflateEnd(&stream); |
b80ae277 |
return ret; |
4c32a40d |
} |
1160fc1d |
stream.next_out = output; |
501e5d12 |
stream.avail_out = sizeof(output); |
1160fc1d |
} |
da653b74 |
continue;
case Z_STREAM_END:
break;
default: |
fb53f48e |
if(stream.msg) |
dbfb485b |
cli_dbgmsg("cli_pdf: after writing %lu bytes, got error \"%s\" inflating PDF attachment\n", |
ed6446ff |
(unsigned long)nbytes,
stream.msg); |
fb53f48e |
else |
dbfb485b |
cli_dbgmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF attachment\n", |
ed6446ff |
(unsigned long)nbytes, zstat); |
da653b74 |
inflateEnd(&stream); |
dbfb485b |
return CL_CLEAN; |
da653b74 |
}
break;
}
|
dbfb485b |
if(stream.avail_out != sizeof(output)) {
if(cli_writen(fout, output, sizeof(output) - stream.avail_out) < 0) {
cli_errmsg("cli_pdf: failed to write output file\n");
inflateEnd(&stream); |
21e605f4 |
return CL_EIO; |
dbfb485b |
}
}
|
1eceda0e |
#ifdef SAVE_TMP |
997a0e0b |
if (cli_unlink(tmpfilename)) {
inflateEnd(&stream);
return CL_EIO;
} |
1eceda0e |
#endif |
dbfb485b |
inflateEnd(&stream);
return CL_CLEAN; |
da653b74 |
}
|
67355216 |
/*
* ascii85 inflation, returns number of bytes in output, -1 for error
*
* See http://www.piclist.com/techref/method/encode.htm (look for base85)
*/ |
da653b74 |
static int |
b02bab2b |
ascii85decode(const char *buf, off_t len, unsigned char *output) |
da653b74 |
{ |
67355216 |
const char *ptr; |
da653b74 |
uint32_t sum = 0;
int quintet = 0;
int ret = 0;
|
67355216 |
if(cli_pmemstr(buf, len, "~>", 2) == NULL) |
dbfb485b |
cli_dbgmsg("cli_pdf: ascii85decode: no EOF marker found\n"); |
67355216 |
ptr = buf;
|
ed6446ff |
cli_dbgmsg("cli_pdf: ascii85decode %lu bytes\n", (unsigned long)len); |
da653b74 |
|
bce73fe9 |
while(len > 0) {
int byte = (len--) ? (int)*ptr++ : EOF; |
da653b74 |
if((byte == '~') && (*ptr == '>'))
byte = EOF;
if(byte >= '!' && byte <= 'u') { |
3fe56d48 |
sum = (sum * 85) + ((uint32_t)byte - '!'); |
da653b74 |
if(++quintet == 5) { |
e8130f50 |
*output++ = (unsigned char)(sum >> 24);
*output++ = (unsigned char)((sum >> 16) & 0xFF);
*output++ = (unsigned char)((sum >> 8) & 0xFF);
*output++ = (unsigned char)(sum & 0xFF); |
da653b74 |
ret += 4;
quintet = 0;
sum = 0;
}
} else if(byte == 'z') {
if(quintet) { |
dbfb485b |
cli_dbgmsg("ascii85decode: unexpected 'z'\n"); |
da653b74 |
return -1;
}
*output++ = '\0';
*output++ = '\0';
*output++ = '\0';
*output++ = '\0';
ret += 4;
} else if(byte == EOF) { |
67355216 |
cli_dbgmsg("ascii85decode: quintet %d\n", quintet); |
da653b74 |
if(quintet) {
int i;
if(quintet == 1) { |
dbfb485b |
cli_dbgmsg("ascii85Decode: only 1 byte in last quintet\n"); |
da653b74 |
return -1;
} |
3fe56d48 |
for(i = quintet; i < 5; i++)
sum *= 85;
|
da653b74 |
if(quintet > 1)
sum += (0xFFFFFF >> ((quintet - 2) * 8));
ret += quintet;
for(i = 0; i < quintet - 1; i++) |
e8130f50 |
*output++ = (unsigned char)((sum >> (24 - 8 * i)) & 0xFF); |
da653b74 |
quintet = 0;
} |
6c9dc98d |
len = 0; |
da653b74 |
break;
} else if(!isspace(byte)) { |
dbfb485b |
cli_dbgmsg("ascii85Decode: invalid character 0x%x, len %lu\n", |
95e11e5a |
byte & 0xFF, (unsigned long)len); |
da653b74 |
return -1;
}
}
return ret;
} |
bce73fe9 |
/*
* Find the start of the next line
*/
static const char *
pdf_nextlinestart(const char *ptr, size_t len)
{
while(strchr("\r\n", *ptr) == NULL) {
if(--len == 0L)
return NULL;
ptr++;
}
while(strchr("\r\n", *ptr) != NULL) {
if(--len == 0L)
return NULL;
ptr++;
}
return ptr;
} |
9be10a55 |
|
ef8219b8 |
/*
* Return the start of the next PDF object.
* This assumes that we're not in a stream.
*/
static const char *
pdf_nextobject(const char *ptr, size_t len)
{
const char *p;
int inobject = 1;
while(len) {
switch(*ptr) {
case '\n':
case '\r':
case '%': /* comment */
p = pdf_nextlinestart(ptr, len);
if(p == NULL)
return NULL;
len -= (size_t)(p - ptr);
ptr = p;
inobject = 0;
break;
|
9be10a55 |
case ' ':
case '\t': |
f53acfcd |
case '[': /* Start of an array object */ |
ef8219b8 |
case '\v':
case '\f': |
1eceda0e |
case '<': /* Start of a dictionary object */ |
ef8219b8 |
inobject = 0; |
9be10a55 |
ptr++;
len--;
break; |
1eceda0e |
case '/': /* Start of a name object */
return ptr; |
9be10a55 |
default: |
ef8219b8 |
if(!inobject)
/* TODO: parse and return object type */ |
9be10a55 |
return ptr;
ptr++;
len--;
}
}
return NULL;
} |
ceabee13 |
/*
* like cli_memstr - but returns the location of the match
* FIXME: need a case insensitive version
*/
static const char *
cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns)
{
const char *pt, *hay;
size_t n;
if(haystack == needle)
return haystack;
if(hs < ns)
return NULL;
if(memcmp(haystack, needle, ns) == 0)
return haystack;
pt = hay = haystack;
n = hs;
while((pt = memchr(hay, needle[0], n)) != NULL) { |
f2ba44ae |
n -= (size_t)(pt - hay); |
ceabee13 |
if(n < ns)
break;
if(memcmp(pt, needle, ns) == 0)
return pt;
if(hay == pt) {
n--;
hay++;
} else
hay = pt;
}
return NULL;
} |
8b6f8404 |
#else /*!HAVE_MMAP*/ |
83d14d9a |
#include "clamav.h"
#include "others.h"
#include "pdf.h"
|
8b6f8404 |
int |
72ce4b70 |
cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
8b6f8404 |
{ |
dbfb485b |
cli_dbgmsg("File not decoded - PDF decoding needs mmap() (for now)\n"); |
8b6f8404 |
return CL_CLEAN;
}
#endif |