d056cc17 |
/* |
6ff4e486 |
* Copyright (C) 2007-2008, 2010 Sourcefire, Inc. |
2023340a |
* |
6ff4e486 |
* Authors: Nigel Horne, Török Edvin
*
* Also based on Matt Olney's pdf parser in snort-nrt. |
d056cc17 |
*
* This program is free software; you can redistribute it and/or modify |
2023340a |
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation. |
d056cc17 |
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software |
2023340a |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA. |
1eceda0e |
*
* TODO: Embedded fonts
* TODO: Predictor image handling |
d056cc17 |
*/ |
95e11e5a |
static char const rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $"; |
d056cc17 |
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
|
240d3307 |
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <ctype.h>
#include <string.h>
#include <fcntl.h>
#include <stdlib.h> |
511a59c7 |
#include <errno.h> |
ed6446ff |
#ifdef HAVE_LIMITS_H
#include <limits.h>
#endif |
9443ec4a |
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif |
240d3307 |
#include <zlib.h>
|
ed6446ff |
#include "clamav.h"
#include "others.h" |
654c0b96 |
#include "pdf.h" |
a5afcb67 |
#include "scanners.h" |
747c2055 |
#include "fmap.h" |
f461d74f |
#include "str.h" |
dc200c6b |
#include "bytecode.h"
#include "bytecode_api.h" |
240d3307 |
|
1eceda0e |
#ifdef CL_DEBUG |
5cd3f734 |
/*#define SAVE_TMP
*Save the file being worked on in tmp */ |
1eceda0e |
#endif
|
6e33139f |
static int asciihexdecode(const char *buf, off_t len, char *output); |
5aad11ce |
static int ascii85decode(const char *buf, off_t len, unsigned char *output); |
bce73fe9 |
static const char *pdf_nextlinestart(const char *ptr, size_t len); |
ef8219b8 |
static const char *pdf_nextobject(const char *ptr, size_t len); |
da653b74 |
|
6ff4e486 |
#if 1 |
e7a27135 |
static int xrefCheck(const char *xref, const char *eof)
{
const char *q; |
5aad11ce |
while (xref < eof && (*xref == ' ' || *xref == '\n' || *xref == '\r')) |
e7a27135 |
xref++;
if (xref + 4 >= eof)
return -1;
if (!memcmp(xref, "xref", 4)) {
cli_dbgmsg("cli_pdf: found xref\n");
return 0;
}
/* could be xref stream */
for (q=xref; q+5 < eof; q++) {
if (!memcmp(q,"/XRef",4)) {
cli_dbgmsg("cli_pdf: found /XRef\n");
return 0;
}
}
return -1;
}
struct pdf_struct {
struct pdf_obj *objs;
unsigned nobjs; |
2545f976 |
unsigned flags; |
e7a27135 |
const char *map;
off_t size;
off_t offset; |
dc200c6b |
off_t startoff; |
3643f3d2 |
cli_ctx *ctx;
const char *dir;
unsigned files; |
e7a27135 |
};
static const char *findNextNonWSBack(const char *q, const char *start)
{
while (q > start &&
(*q == 0 || *q == 9 || *q == 0xa || *q == 0xc || *q == 0xd || *q == 0x20))
{
q--;
}
return q;
}
|
ab564992 |
static int find_stream_bounds(const char *start, off_t bytesleft, off_t bytesleft2, off_t *stream, off_t *endstream) |
3643f3d2 |
{
const char *q2, *q;
if ((q2 = cli_memstr(start, bytesleft, "stream", 6))) {
q2 += 6;
if (q2[0] == '\xd' && q2[1] == '\xa')
q2 += 2;
if (q2[0] == '\xa')
q2++;
*stream = q2 - start; |
ab564992 |
bytesleft2 -= q2 - start; |
3643f3d2 |
q = q2; |
ab564992 |
q2 = cli_memstr(q, bytesleft2, "endstream", 9); |
3643f3d2 |
if (!q2) |
9acc81d6 |
q2 = q + bytesleft2-9; /* till EOF */ |
3643f3d2 |
*endstream = q2 - start; |
b220bb30 |
if (*endstream < *stream)
*endstream = *stream; |
3643f3d2 |
return 1;
}
return 0;
}
|
6c135eb4 |
static int pdf_findobj(struct pdf_struct *pdf) |
e7a27135 |
{ |
3643f3d2 |
const char *start, *q, *q2, *q3, *eof; |
e7a27135 |
struct pdf_obj *obj;
off_t bytesleft;
unsigned genid, objid;
pdf->nobjs++;
pdf->objs = cli_realloc2(pdf->objs, sizeof(*pdf->objs)*pdf->nobjs);
if (!pdf->objs) { |
5aad11ce |
cli_warnmsg("cli_pdf: out of memory parsing objects (%u)\n", pdf->nobjs); |
e7a27135 |
return -1;
}
obj = &pdf->objs[pdf->nobjs-1]; |
ab564992 |
memset(obj, 0, sizeof(*obj)); |
e7a27135 |
start = pdf->map+pdf->offset;
bytesleft = pdf->size - pdf->offset;
q2 = cli_memstr(start, bytesleft, " obj", 4);
if (!q2)
return 0;/* no more objs */
bytesleft -= q2 - start;
q = findNextNonWSBack(q2-1, start);
while (q > start && isdigit(*q)) { q--; }
genid = atoi(q);
q = findNextNonWSBack(q-1,start);
while (q > start && isdigit(*q)) { q--; }
objid = atoi(q);
obj->id = (objid << 8) | (genid&0xff);
obj->start = q2+4 - pdf->map;
obj->flags = 0;
bytesleft -= 4;
eof = pdf->map + pdf->size;
q = pdf->map + obj->start;
while (q < eof && bytesleft > 0) { |
3643f3d2 |
off_t p_stream, p_endstream; |
e7a27135 |
q2 = pdf_nextobject(q, bytesleft);
if (!q2) |
9acc81d6 |
q2 = pdf->map + pdf->size; |
e7a27135 |
bytesleft -= q2 - q; |
ab564992 |
if (find_stream_bounds(q-1, q2-q, bytesleft + (q2-q), &p_stream, &p_endstream)) { |
e7a27135 |
obj->flags |= 1 << OBJ_STREAM; |
9acc81d6 |
q2 = q-1 + p_endstream + 9; |
3643f3d2 |
bytesleft -= q2 - q + 1; |
9acc81d6 |
if (bytesleft < 0) {
obj->flags |= 1 << OBJ_TRUNCATED;
pdf->offset = pdf->size;
return 1;/* truncated */
} |
3643f3d2 |
} else if ((q3 = cli_memstr(q-1, q2-q+1, "endobj", 6))) {
q2 = q3 + 6; |
e7a27135 |
pdf->offset = q2 - pdf->map;
return 1; /* obj found and offset positioned */
} else { |
6c135eb4 |
q2++; |
cacd0927 |
bytesleft--; |
e7a27135 |
}
q = q2;
} |
9acc81d6 |
obj->flags |= 1 << OBJ_TRUNCATED;
pdf->offset = pdf->size;
return 1;/* truncated */ |
e7a27135 |
}
|
3643f3d2 |
static int filter_writen(struct pdf_struct *pdf, struct pdf_obj *obj, |
6e33139f |
int fout, const char *buf, off_t len, off_t *sum) |
3643f3d2 |
{
if (cli_checklimits("pdf", pdf->ctx, *sum, 0, 0))
return len; /* pretend it was a successful write to suppress CL_EWRITE */
*sum += len;
return cli_writen(fout, buf, len);
}
|
eb270d5a |
static void pdfobj_flag(struct pdf_struct *pdf, struct pdf_obj *obj, enum pdf_flag flag)
{
const char *s= "";
pdf->flags |= 1 << flag;
if (!cli_debug_flag)
return;
switch (flag) {
case UNTERMINATED_OBJ_DICT:
s = "dictionary not terminated";
break;
case ESCAPED_COMMON_PDFNAME:
/* like /JavaScript */
s = "escaped common pdfname";
break;
case BAD_STREAM_FILTERS:
s = "duplicate stream filters";
break;
case BAD_PDF_VERSION:
s = "bad pdf version";
break;
case BAD_PDF_HEADERPOS:
s = "bad pdf header position";
break;
case BAD_PDF_TRAILER:
s = "bad pdf trailer";
break;
case BAD_PDF_TOOMANYOBJS:
s = "too many pdf objs";
break;
case BAD_FLATE:
s = "bad deflate stream";
break; |
f984f75b |
case BAD_FLATESTART:
s = "bad deflate stream start";
break; |
eb270d5a |
case BAD_STREAMSTART:
s = "bad stream start";
break;
case UNKNOWN_FILTER:
s = "unknown filter used";
break;
case BAD_ASCIIDECODE:
s = "bad ASCII decode";
break;
case HEX_JAVASCRIPT:
s = "hex javascript";
break;
case BAD_INDOBJ:
s = "referencing nonexistent obj";
break;
case HAS_OPENACTION:
s = "has /OpenAction";
break;
case BAD_STREAMLEN:
s = "bad /Length, too small";
break; |
f984f75b |
case ENCRYPTED_PDF:
s = "PDF is encrypted";
break; |
cacd0927 |
case LINEARIZED_PDF:
s = "linearized PDF";
break; |
9acc81d6 |
case MANY_FILTERS:
s = "more than 2 filters per obj";
break; |
eb270d5a |
} |
f984f75b |
cli_dbgmsg("cli_pdf: %s flagged in object %u %u\n", s, obj->id>>8, obj->id&0xff); |
eb270d5a |
}
|
3643f3d2 |
static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, |
6e33139f |
const char *buf, off_t len, int fout, off_t *sum) |
3643f3d2 |
{ |
38c9fc17 |
int skipped = 0; |
5aad11ce |
int zstat; |
3643f3d2 |
z_stream stream;
off_t nbytes; |
6e33139f |
char output[BUFSIZ]; |
3643f3d2 |
if (len == 0)
return CL_CLEAN; |
eb270d5a |
if (*buf == '\r') {
buf++;
len--;
pdfobj_flag(pdf, obj, BAD_STREAMSTART);
/* PDF spec says stream is followed by \r\n or \n, but not \r alone.
* Sample 0015315109, it has \r followed by zlib header.
* Flag pdf as suspicious, and attempt to extract by skipping the \r.
*/
if (!len)
return CL_CLEAN;
}
|
3643f3d2 |
memset(&stream, 0, sizeof(stream));
stream.next_in = (Bytef *)buf;
stream.avail_in = len; |
6e33139f |
stream.next_out = (Bytef *)output; |
3643f3d2 |
stream.avail_out = sizeof(output);
zstat = inflateInit(&stream);
if(zstat != Z_OK) {
cli_warnmsg("cli_pdf: inflateInit failed\n");
return CL_EMEM;
}
nbytes = 0;
while(stream.avail_in) { |
89590e99 |
int written; |
3643f3d2 |
zstat = inflate(&stream, Z_NO_FLUSH); /* zlib */
switch(zstat) {
case Z_OK:
if(stream.avail_out == 0) {
if ((written=filter_writen(pdf, obj, fout, output, sizeof(output), sum))!=sizeof(output)) {
cli_errmsg("cli_pdf: failed to write output file\n");
inflateEnd(&stream);
return CL_EWRITE;
}
nbytes += written; |
6e33139f |
stream.next_out = (Bytef *)output; |
3643f3d2 |
stream.avail_out = sizeof(output);
}
continue;
case Z_STREAM_END:
default: |
89590e99 |
written = sizeof(output) - stream.avail_out; |
38c9fc17 |
if (!written && !nbytes && !skipped) {
/* skip till EOL, and try inflating from there, sometimes
* PDFs contain extra whitespace */
const char *q = pdf_nextlinestart(buf, len);
if (q) {
skipped = 1;
buf = q;
inflateEnd(&stream);
len -= q - buf;
stream.next_in = (Bytef *)buf;
stream.avail_in = len;
stream.next_out = (Bytef *)output;
stream.avail_out = sizeof(output);
zstat = inflateInit(&stream);
if(zstat != Z_OK) {
cli_warnmsg("cli_pdf: inflateInit failed\n");
return CL_EMEM;
}
pdfobj_flag(pdf, obj, BAD_FLATESTART);
continue;
}
}
|
89590e99 |
if (filter_writen(pdf, obj, fout, output, written, sum)!=written) {
cli_errmsg("cli_pdf: failed to write output file\n");
inflateEnd(&stream);
return CL_EWRITE;
}
nbytes += written;
stream.next_out = (Bytef *)output;
stream.avail_out = sizeof(output); |
76cdacdd |
if (zstat == Z_STREAM_END)
break; |
89590e99 |
|
3643f3d2 |
if(stream.msg)
cli_dbgmsg("cli_pdf: after writing %lu bytes, got error \"%s\" inflating PDF stream in %u %u obj\n",
(unsigned long)nbytes,
stream.msg, obj->id>>8, obj->id&0xff);
else
cli_dbgmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF stream in %u %u obj\n",
(unsigned long)nbytes, zstat, obj->id>>8, obj->id&0xff); |
f984f75b |
/* mark stream as bad only if not encrypted */ |
3643f3d2 |
inflateEnd(&stream); |
f984f75b |
if (!nbytes) {
cli_dbgmsg("cli_pdf: dumping raw stream (probably encrypted)\n");
if (filter_writen(pdf, obj, fout, buf, len, sum) != len) {
cli_errmsg("cli_pdf: failed to write output file\n");
return CL_EWRITE;
}
pdfobj_flag(pdf, obj, BAD_FLATESTART);
} else {
pdfobj_flag(pdf, obj, BAD_FLATE);
} |
3643f3d2 |
return CL_CLEAN;
}
break;
}
if(stream.avail_out != sizeof(output)) {
if(filter_writen(pdf, obj, fout, output, sizeof(output) - stream.avail_out, sum) < 0) {
cli_errmsg("cli_pdf: failed to write output file\n");
inflateEnd(&stream);
return CL_EWRITE;
}
}
inflateEnd(&stream);
return CL_CLEAN;
}
static struct pdf_obj *find_obj(struct pdf_struct *pdf,
struct pdf_obj *obj, uint32_t objid)
{ |
5aad11ce |
unsigned j;
unsigned i;
/* search starting at previous obj (if exists) */
if (obj != pdf->objs)
i = obj - pdf->objs;
else
i = 0; |
3643f3d2 |
for (j=i;j<pdf->nobjs;j++) {
obj = &pdf->objs[j];
if (obj->id == objid)
return obj;
}
/* restart search from beginning if not found */
for (j=0;j<i;j++) {
obj = &pdf->objs[j];
if (obj->id == objid)
return obj;
}
return NULL;
}
static int find_length(struct pdf_struct *pdf,
struct pdf_obj *obj,
const char *start, off_t len)
{
int length;
const char *q;
q = cli_memstr(start, len, "/Length", 7);
if (!q)
return 0;
q++;
len -= q - start;
start = pdf_nextobject(q, len);
if (!start)
return 0; |
6e33139f |
/* len -= start - q; */ |
3643f3d2 |
q = start;
length = atoi(q);
while (isdigit(*q)) q++;
if (*q == ' ') {
int genid;
q++;
genid = atoi(q);
while(isdigit(*q)) q++;
if (q[0] == ' ' && q[1] == 'R') {
cli_dbgmsg("cli_pdf: length is in indirect object %u %u\n", length, genid);
obj = find_obj(pdf, obj, (length << 8) | (genid&0xff));
if (!obj) {
cli_dbgmsg("cli_pdf: indirect object not found\n");
return 0;
}
q = pdf_nextobject(pdf->map+obj->start, pdf->size - obj->start);
length = atoi(q);
}
} |
a9d034ee |
/* limit length */
if (start - pdf->map + length+5 > pdf->size) {
length = pdf->size - (start - pdf->map)-5;
} |
3643f3d2 |
return length;
}
|
4d808a86 |
#define DUMP_MASK ((1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_DCT) | (1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_A85) | (1 << OBJ_EMBEDDED_FILE) | (1 << OBJ_JAVASCRIPT) | (1 << OBJ_OPENACTION)) |
ab564992 |
|
eb270d5a |
static int obj_size(struct pdf_struct *pdf, struct pdf_obj *obj, int binary) |
ab564992 |
{ |
5aad11ce |
unsigned i = obj - pdf->objs; |
ab564992 |
i++;
if (i < pdf->nobjs) { |
dd002dfa |
int s = pdf->objs[i].start - obj->start - 4; |
eb270d5a |
if (s > 0) {
if (!binary) {
const char *p = pdf->map + obj->start;
const char *q = p + s;
while (q > p && (isspace(*q) || isdigit(*q)))
q--;
if (q > p+5 && !memcmp(q-5,"endobj",6))
q -= 6;
q = findNextNonWSBack(q, p);
q++;
return q - p;
} |
dd002dfa |
return s; |
eb270d5a |
} |
ab564992 |
} |
eb270d5a |
if (binary)
return pdf->size - obj->start;
return pdf->offset - obj->start - 6; |
ab564992 |
}
|
dc200c6b |
static int run_pdf_hooks(struct pdf_struct *pdf, enum pdf_phase phase, int fd,
int dumpid)
{
int ret;
struct cli_bc_ctx *bc_ctx;
cli_ctx *ctx = pdf->ctx;
fmap_t *map;
bc_ctx = cli_bytecode_context_alloc();
if (!bc_ctx) {
cli_errmsg("cli_pdf: can't allocate memory for bc_ctx");
return CL_EMEM;
}
map = *ctx->fmap;
if (fd != -1) {
map = fmap(fd, 0, 0);
if (!map) {
cli_warnmsg("can't mmap pdf extracted obj\n");
map = *ctx->fmap;
fd = -1;
}
}
cli_bytecode_context_setpdf(bc_ctx, phase, pdf->nobjs, pdf->objs,
&pdf->flags, pdf->size, pdf->startoff);
cli_bytecode_context_setctx(bc_ctx, ctx);
ret = cli_bytecode_runhook(ctx, ctx->engine, bc_ctx, BC_PDF, map, ctx->virname);
cli_bytecode_context_destroy(bc_ctx);
if (fd != -1) {
funmap(map);
}
return ret;
}
|
3643f3d2 |
static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
{ |
ab564992 |
char fullname[NAME_MAX + 1];
int fout;
off_t sum = 0; |
3643f3d2 |
int rc = CL_SUCCESS; |
ab564992 |
char *ascii_decoded = NULL; |
dc200c6b |
int dump = 1; |
ab564992 |
|
9acc81d6 |
/* TODO: call bytecode hook here, allow override dumpability */ |
eb270d5a |
if ((!(obj->flags & (1 << OBJ_STREAM)) ||
(obj->flags & (1 << OBJ_HASFILTERS)))
&& !(obj->flags & DUMP_MASK)) { |
ab564992 |
/* don't dump all streams */ |
dc200c6b |
dump = 0; |
ab564992 |
} |
4d808a86 |
if ((obj->flags & (1 << OBJ_IMAGE)) &&
!(obj->flags & (1 << OBJ_FILTER_DCT))) {
/* don't dump / scan non-JPG images */ |
dc200c6b |
dump = 0; |
9acc81d6 |
} |
dc200c6b |
if (obj->flags & (1 << OBJ_FORCEDUMP)) {
/* bytecode can force dump by setting this flag */
dump = 1;
}
if (!dump)
return CL_CLEAN;
cli_dbgmsg("cli_pdf: dumping obj %u %u\n", obj->id>>8, obj->id); |
ab564992 |
snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u", pdf->dir, pdf->files++);
fout = open(fullname,O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
if (fout < 0) {
char err[128];
cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err)));
free(ascii_decoded);
return CL_ETMPFILE;
}
do {
if (obj->flags & (1 << OBJ_STREAM)) { |
3643f3d2 |
const char *start = pdf->map + obj->start;
off_t p_stream = 0, p_endstream = 0;
off_t length;
find_stream_bounds(start, pdf->size - obj->start, |
ab564992 |
pdf->size - obj->start, |
3643f3d2 |
&p_stream, &p_endstream);
if (p_stream && p_endstream) {
const char *flate_in;
long ascii_decoded_size = 0;
size_t size = p_endstream - p_stream; |
eb270d5a |
off_t orig_length; |
3643f3d2 |
|
e8c7cc21 |
length = find_length(pdf, obj, start, p_stream);
if (length < 0)
length = 0;
orig_length = length; |
9acc81d6 |
if (length > pdf->size || obj->start + p_stream + length > pdf->size) {
cli_dbgmsg("cli_pdf: length out of file: %ld + %ld > %ld\n",
p_stream, length, pdf->size);
length = pdf->size - (obj->start + p_stream);
} |
e8c7cc21 |
if (!(obj->flags & (1 << OBJ_FILTER_FLATE)) && length <= 0) { |
3643f3d2 |
const char *q = start + p_endstream;
length = size;
q--;
if (*q == '\n') {
q--;
length--;
if (*q == '\r')
length--;
} else if (*q == '\r') {
length--;
} |
e8c7cc21 |
if (length < 0)
length = 0; |
5aad11ce |
cli_dbgmsg("cli_pdf: calculated length %ld\n", length); |
eb270d5a |
} else {
if (size > length+2) {
cli_dbgmsg("cli_pdf: calculated length %ld < %ld\n",
length, size);
length = size;
} |
3643f3d2 |
} |
9acc81d6 |
if (orig_length && size > orig_length + 20) {
cli_dbgmsg("cli_pdf: orig length: %ld, length: %ld, size: %ld\n",
orig_length, length, size); |
eb270d5a |
pdfobj_flag(pdf, obj, BAD_STREAMLEN); |
9acc81d6 |
} |
3643f3d2 |
if (!length)
length = size;
if (obj->flags & (1 << OBJ_FILTER_AH)) { |
a9d034ee |
ascii_decoded = cli_malloc(length/2 + 1); |
3643f3d2 |
if (!ascii_decoded) {
cli_errmsg("Cannot allocate memory for asciidecode\n"); |
ab564992 |
rc = CL_EMEM;
break; |
3643f3d2 |
}
ascii_decoded_size = asciihexdecode(start + p_stream,
length,
ascii_decoded);
} else if (obj->flags & (1 << OBJ_FILTER_A85)) { |
a9d034ee |
ascii_decoded = cli_malloc(length*5); |
3643f3d2 |
if (!ascii_decoded) {
cli_errmsg("Cannot allocate memory for asciidecode\n"); |
ab564992 |
rc = CL_EMEM;
break; |
3643f3d2 |
}
ascii_decoded_size = ascii85decode(start+p_stream,
length, |
6e33139f |
(unsigned char*)ascii_decoded); |
3643f3d2 |
}
if (ascii_decoded_size < 0) { |
9acc81d6 |
/* don't flag for images or truncated objs*/
if (!(obj->flags &
((1 << OBJ_IMAGE) | (1 << OBJ_TRUNCATED))))
pdfobj_flag(pdf, obj, BAD_ASCIIDECODE); |
3643f3d2 |
cli_dbgmsg("cli_pdf: failed to asciidecode in %u %u obj\n", obj->id>>8,obj->id&0xff); |
9acc81d6 |
free(ascii_decoded);
ascii_decoded = NULL;
/* attempt to directly flatedecode it */ |
3643f3d2 |
}
/* either direct or ascii-decoded input */
if (!ascii_decoded)
ascii_decoded_size = length;
flate_in = ascii_decoded ? ascii_decoded : start+p_stream;
if (obj->flags & (1 << OBJ_FILTER_FLATE)) { |
9acc81d6 |
cli_dbgmsg("cli_pdf: deflate len %ld (orig %ld)\n", ascii_decoded_size, (long)orig_length); |
3643f3d2 |
rc = filter_flatedecode(pdf, obj, flate_in, ascii_decoded_size, fout, &sum);
} else { |
ab564992 |
if (filter_writen(pdf, obj, fout, flate_in, ascii_decoded_size, &sum) != ascii_decoded_size)
rc = CL_EWRITE; |
3643f3d2 |
} |
ab564992 |
}
} else if (obj->flags & (1 << OBJ_JAVASCRIPT)) {
const char *q2;
const char *q = pdf->map+obj->start;
/* TODO: get obj-endobj size */ |
eb270d5a |
off_t bytesleft = obj_size(pdf, obj, 0); |
dd002dfa |
if (bytesleft < 0)
break; |
ab564992 |
q2 = cli_memstr(q, bytesleft, "/JavaScript", 11);
if (!q2)
break;
bytesleft -= q2 - q; |
eb270d5a |
do {
q2++;
bytesleft--; |
ab564992 |
q = pdf_nextobject(q2, bytesleft);
if (!q)
break;
bytesleft -= q - q2; |
eb270d5a |
q2 = q;
} while (*q == '/');
if (!q)
break; |
ab564992 |
if (*q == '(') {
if (filter_writen(pdf, obj, fout, q+1, bytesleft-1, &sum) != (bytesleft-1)) {
rc = CL_EWRITE;
break;
}
} else if (*q == '<') {
char *decoded;
q2 = memchr(q+1, '>', bytesleft);
if (!q2) q2 = q + bytesleft;
decoded = cli_malloc(q2 - q);
if (!decoded) {
rc = CL_EMEM;
break;
}
cli_hex2str_to(q2, decoded, q2-q-1);
decoded[q2-q-1] = '\0';
cli_dbgmsg("cli_pdf: found hexadecimal encoded javascript in %u %u obj\n",
obj->id>>8, obj->id&0xff); |
eb270d5a |
pdfobj_flag(pdf, obj, HEX_JAVASCRIPT); |
ab564992 |
filter_writen(pdf, obj, fout, decoded, q2-q-1, &sum);
free(decoded); |
3643f3d2 |
} |
eb270d5a |
} else {
off_t bytesleft = obj_size(pdf, obj, 0);
if (filter_writen(pdf, obj, fout , pdf->map + obj->start, bytesleft,&sum) != bytesleft)
rc = CL_EWRITE; |
3643f3d2 |
} |
ab564992 |
} while (0); |
eb270d5a |
cli_dbgmsg("cli_pdf: extracted %ld bytes %u %u obj to %s\n", sum, obj->id>>8, obj->id&0xff, fullname); |
dc200c6b |
if (sum) {
int rc2;
cli_updatelimits(pdf->ctx, sum);
/* TODO: invoke bytecode on this pdf obj with metainformation associated
* */
lseek(fout, 0, SEEK_SET);
rc2 = cli_magic_scandesc(fout, pdf->ctx);
if (rc2 == CL_VIRUS || rc == CL_SUCCESS)
rc = rc2;
if (rc == CL_CLEAN) {
rc2 = run_pdf_hooks(pdf, PDF_PHASE_POSTDUMP, fout, obj - pdf->objs);
if (rc2 == CL_VIRUS)
rc = rc2;
}
} |
ab564992 |
close(fout);
free(ascii_decoded);
if (!pdf->ctx->engine->keeptmp)
if (cli_unlink(fullname) && rc != CL_VIRUS)
rc = CL_EUNLINK; |
3643f3d2 |
return rc;
}
|
6c135eb4 |
enum objstate {
STATE_NONE,
STATE_S,
STATE_FILTER, |
3643f3d2 |
STATE_JAVASCRIPT, |
eb270d5a |
STATE_OPENACTION, |
b835a528 |
STATE_LINEARIZED, |
6c135eb4 |
STATE_ANY /* for actions table below */
};
struct pdfname_action {
const char *pdfname; |
dc200c6b |
enum pdf_objflags set_objflag;/* OBJ_DICT is noop */ |
6c135eb4 |
enum objstate from_state;/* STATE_NONE is noop */
enum objstate to_state;
};
static struct pdfname_action pdfname_actions[] = {
{"ASCIIHexDecode", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER},
{"ASCII85Decode", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER}, |
80db7712 |
{"A85", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER}, |
9acc81d6 |
{"AHx", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER}, |
3643f3d2 |
{"EmbeddedFile", OBJ_EMBEDDED_FILE, STATE_NONE, STATE_NONE}, |
6c135eb4 |
{"FlateDecode", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER}, |
80db7712 |
{"Fl", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER}, |
9acc81d6 |
{"Image", OBJ_IMAGE, STATE_NONE, STATE_NONE}, |
6c135eb4 |
{"LZWDecode", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER}, |
80db7712 |
{"LZW", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER}, |
6c135eb4 |
{"RunLengthDecode", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER}, |
80db7712 |
{"RL", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER}, |
6c135eb4 |
{"CCITTFaxDecode", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER}, |
80db7712 |
{"CCF", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER}, |
6c135eb4 |
{"JBIG2Decode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER},
{"DCTDecode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER}, |
80db7712 |
{"DCT", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER}, |
6c135eb4 |
{"JPXDecode", OBJ_FILTER_JPX, STATE_FILTER, STATE_FILTER},
{"Crypt", OBJ_FILTER_CRYPT, STATE_FILTER, STATE_NONE}, |
f984f75b |
{"Standard", OBJ_FILTER_CRYPT, STATE_FILTER, STATE_FILTER}, |
b835a528 |
{"Sig", OBJ_SIGNED, STATE_ANY, STATE_NONE},
{"V", OBJ_SIGNED, STATE_ANY, STATE_NONE}, |
9acc81d6 |
{"R", OBJ_SIGNED, STATE_ANY, STATE_NONE}, |
b835a528 |
{"Linearized", OBJ_DICT, STATE_NONE, STATE_LINEARIZED}, |
eb270d5a |
{"Filter", OBJ_HASFILTERS, STATE_ANY, STATE_FILTER}, |
3643f3d2 |
{"JavaScript", OBJ_JAVASCRIPT, STATE_S, STATE_JAVASCRIPT}, |
6c135eb4 |
{"Length", OBJ_DICT, STATE_FILTER, STATE_NONE},
{"S", OBJ_DICT, STATE_NONE, STATE_S}, |
eb270d5a |
{"Type", OBJ_DICT, STATE_NONE, STATE_NONE},
{"OpenAction", OBJ_OPENACTION, STATE_ANY, STATE_OPENACTION} |
6c135eb4 |
};
|
edeb59b3 |
#define KNOWN_FILTERS ((1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_RL) | (1 << OBJ_FILTER_A85) | (1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_LZW) | (1 << OBJ_FILTER_FAX) | (1 << OBJ_FILTER_DCT) | (1 << OBJ_FILTER_JPX) | (1 << OBJ_FILTER_CRYPT)) |
eb270d5a |
|
6c135eb4 |
static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj,
const char *pdfname, int escapes, |
6e33139f |
enum objstate *state) |
6c135eb4 |
{
struct pdfname_action *act = NULL;
unsigned j;
for (j=0;j<sizeof(pdfname_actions)/sizeof(pdfname_actions[0]);j++) { |
fbb55daf |
if (!strcmp(pdfname, pdfname_actions[j].pdfname)) { |
6c135eb4 |
act = &pdfname_actions[j];
break;
}
} |
eb270d5a |
if (!act) {
if (*state == STATE_FILTER && |
f984f75b |
!(obj->flags & (1 << OBJ_SIGNED)) &&
/* these are digital signature objects, filter doesn't matter,
* we don't need them anyway */ |
eb270d5a |
!(obj->flags & KNOWN_FILTERS)) {
cli_dbgmsg("cli_pdf: unknown filter %s\n", pdfname); |
b835a528 |
obj->flags |= 1 << OBJ_FILTER_UNKNOWN; |
eb270d5a |
} |
6c135eb4 |
return; |
eb270d5a |
} |
6c135eb4 |
if (escapes) {
/* if a commonly used PDF name is escaped that is certainly
suspicious. */
cli_dbgmsg("cli_pdf: pdfname %s is escaped\n", pdfname);
pdfobj_flag(pdf, obj, ESCAPED_COMMON_PDFNAME);
}
if (act->from_state == *state ||
act->from_state == STATE_ANY) {
*state = act->to_state;
if (*state == STATE_FILTER &&
act->set_objflag !=OBJ_DICT &&
(obj->flags & (1 << act->set_objflag))) { |
9acc81d6 |
cli_dbgmsg("cli_pdf: duplicate stream filter %s\n", pdfname); |
6c135eb4 |
pdfobj_flag(pdf, obj, BAD_STREAM_FILTERS);
}
obj->flags |= 1 << act->set_objflag;
} else { |
9c617dbe |
/* auto-reset states */ |
6c135eb4 |
switch (*state) {
case STATE_S:
*state = STATE_NONE;
break; |
6e33139f |
default:
break; |
6c135eb4 |
}
}
}
static void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
{
/* enough to hold common pdf names, we don't need all the names */
char pdfname[64];
const char *q2, *q3;
const char *q = obj->start + pdf->map;
const char *dict, *start;
off_t dict_length; |
9acc81d6 |
off_t bytesleft = obj_size(pdf, obj, 1);
unsigned i, filters=0; |
6c135eb4 |
enum objstate objstate = STATE_NONE;
if (bytesleft < 0)
return;
start = q;
/* find start of dictionary */
do {
q2 = pdf_nextobject(q, bytesleft);
bytesleft -= q2 -q;
if (!q2 || bytesleft < 0) {
return;
}
q3 = memchr(q-1, '<', q2-q+1);
q2++;
bytesleft--;
q = q2;
} while (!q3 || q3[1] != '<');
dict = q3+2;
q = dict; |
d1a28db0 |
bytesleft = obj_size(pdf, obj, 1) - (q - start); |
6c135eb4 |
/* find end of dictionary */
do {
q2 = pdf_nextobject(q, bytesleft);
bytesleft -= q2 -q;
if (!q2 || bytesleft < 0) {
return;
}
q3 = memchr(q-1, '>', q2-q+1);
q2++;
bytesleft--;
q = q2;
} while (!q3 || q3[1] != '>');
obj->flags |= 1 << OBJ_DICT;
dict_length = q3 - dict;
|
9c617dbe |
/* process pdf names */ |
6c135eb4 |
for (q = dict;dict_length;) {
int escapes = 0;
q2 = memchr(q, '/', dict_length);
if (!q2)
break;
dict_length -= q2 - q; |
3643f3d2 |
q = q2; |
9c617dbe |
/* normalize PDF names */ |
6c135eb4 |
for (i = 0;dict_length && (i < sizeof(pdfname)-1); i++) {
q++;
dict_length--;
if (*q == '#') {
cli_hex2str_to(q+1, pdfname+i, 2);
q += 2;
dict_length -= 2;
escapes = 1;
continue;
} |
9acc81d6 |
if (*q == ' ' || *q == '\t' || *q == '\r' || *q == '\n' || |
5e2b776b |
*q == '/' || *q == '>' || *q == ']' || *q == '[' || *q == '<') |
6c135eb4 |
break;
pdfname[i] = *q;
}
pdfname[i] = '\0';
|
6e33139f |
handle_pdfname(pdf, obj, pdfname, escapes, &objstate); |
b835a528 |
if (objstate == STATE_LINEARIZED) {
pdfobj_flag(pdf, obj, LINEARIZED_PDF);
objstate = STATE_NONE;
} |
eb270d5a |
if (objstate == STATE_JAVASCRIPT ||
objstate == STATE_OPENACTION) {
if (objstate == STATE_OPENACTION)
pdfobj_flag(pdf, obj, HAS_OPENACTION); |
ab564992 |
q2 = pdf_nextobject(q, dict_length);
if (q2 && isdigit(*q2)) {
uint32_t objid = atoi(q2) << 8;
while (isdigit(*q2)) q2++;
q2 = pdf_nextobject(q2, dict_length);
if (q2 && isdigit(*q2)) {
objid |= atoi(q2) & 0xff;
q2 = pdf_nextobject(q2, dict_length);
if (*q2 == 'R') {
struct pdf_obj *obj2; |
eb270d5a |
cli_dbgmsg("cli_pdf: found %s stored in indirect object %u %u\n",
pdfname, |
ab564992 |
objid >> 8, objid&0xff);
obj2 = find_obj(pdf, obj, objid); |
eb270d5a |
if (obj2) { |
dc200c6b |
enum pdf_objflags flag = objstate == STATE_JAVASCRIPT ? |
eb270d5a |
OBJ_JAVASCRIPT : OBJ_OPENACTION;
obj2->flags |= 1 << flag;
obj->flags &= ~(1 << flag);
} else {
pdfobj_flag(pdf, obj, BAD_INDOBJ);
} |
ab564992 |
}
}
}
objstate = STATE_NONE;
} |
6c135eb4 |
} |
9acc81d6 |
for (i=0;i<sizeof(pdfname_actions)/sizeof(pdfname_actions[0]);i++) {
const struct pdfname_action *act = &pdfname_actions[i];
if ((obj->flags & (1 << act->set_objflag)) &&
act->from_state == STATE_FILTER &&
act->to_state == STATE_FILTER &&
act->set_objflag != OBJ_FILTER_CRYPT) {
filters++;
}
}
if (filters > 2) { /* more than 2 non-crypt filters */
pdfobj_flag(pdf, obj, MANY_FILTERS);
} |
b835a528 |
if (obj->flags & ((1 << OBJ_SIGNED) | KNOWN_FILTERS))
obj->flags &= ~(1 << OBJ_FILTER_UNKNOWN);
if (obj->flags & (1 << OBJ_FILTER_UNKNOWN))
pdfobj_flag(pdf, obj, UNKNOWN_FILTER); |
6c135eb4 |
cli_dbgmsg("cli_pdf: %u %u obj flags: %02x\n", obj->id>>8, obj->id&0xff, obj->flags);
}
|
e7a27135 |
int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
{
struct pdf_struct pdf;
fmap_t *map = *ctx->fmap;
size_t size = map->len - offset;
off_t versize = size > 1032 ? 1032 : size;
off_t map_off, bytesleft;
long xref;
const char *pdfver, *start, *eofmap, *q, *eof;
int rc; |
3643f3d2 |
unsigned i; |
e7a27135 |
cli_dbgmsg("in cli_pdf(%s)\n", dir);
memset(&pdf, 0, sizeof(pdf)); |
3643f3d2 |
pdf.ctx = ctx;
pdf.dir = dir; |
e7a27135 |
pdfver = start = fmap_need_off_once(map, offset, versize);
/* Check PDF version */
if (!pdfver) { |
a9d034ee |
cli_errmsg("cli_pdf: mmap() failed (1)\n"); |
e7a27135 |
return CL_EMAP;
}
/* offset is 0 when coming from filetype2 */
pdfver = cli_memstr(pdfver, versize, "%PDF-", 5);
if (!pdfver) {
cli_dbgmsg("cli_pdf: no PDF- header found\n");
return CL_SUCCESS;
}
/* Check for PDF-1.[0-9]. Although 1.7 is highest now, allow for future
* versions */
if (pdfver[5] != '1' || pdfver[6] != '.' ||
pdfver[7] < '1' || pdfver[7] > '9') { |
6c135eb4 |
pdf.flags |= 1 << BAD_PDF_VERSION; |
e7a27135 |
cli_dbgmsg("cli_pdf: bad pdf version: %.8s\n", pdfver);
}
if (pdfver != start || offset) { |
6c135eb4 |
pdf.flags |= 1 << BAD_PDF_HEADERPOS; |
5aad11ce |
cli_dbgmsg("cli_pdf: PDF header is not at position 0: %ld\n",pdfver-start+offset); |
e7a27135 |
}
offset += pdfver - start;
/* find trailer and xref, don't fail if not found */
map_off = map->len - 2048;
if (map_off < 0)
map_off = 0;
bytesleft = map->len - map_off;
eofmap = fmap_need_off_once(map, map_off, bytesleft);
if (!eofmap) { |
a9d034ee |
cli_errmsg("cli_pdf: mmap() failed (2)\n"); |
e7a27135 |
return CL_EMAP;
}
eof = eofmap + bytesleft;
for (q=&eofmap[bytesleft-5]; q > eofmap; q--) {
if (memcmp(q, "%%EOF", 5) == 0)
break;
}
if (q <= eofmap) { |
6c135eb4 |
pdf.flags |= 1 << BAD_PDF_TRAILER; |
e7a27135 |
cli_dbgmsg("cli_pdf: %%%%EOF not found\n");
} else { |
f984f75b |
const char *t; |
e7a27135 |
size = q - eofmap + map_off;
for (;q > eofmap;q--) {
if (memcmp(q, "startxref", 9) == 0)
break;
}
if (q <= eofmap) { |
6c135eb4 |
pdf.flags |= 1 << BAD_PDF_TRAILER;
cli_dbgmsg("cli_pdf: startxref not found\n"); |
f984f75b |
} else {
for (t=q;t > eofmap; t--) {
if (memcmp(t,"trailer",7) == 0)
break;
}
if (t > eofmap) {
if (cli_memstr(t, q-t, "/Encrypt", 8)) {
pdf.flags |= 1 << ENCRYPTED_PDF;
cli_dbgmsg("cli_pdf: encrypted pdf found, stream will probably fail to decompress!\n");
}
}
q += 9;
while (q < eof && (*q == ' ' || *q == '\n' || *q == '\r')) { q++; }
xref = atol(q);
bytesleft = map->len - offset - xref;
if (bytesleft > 4096)
bytesleft = 4096;
q = fmap_need_off_once(map, offset + xref, bytesleft);
if (!q || xrefCheck(q, q+bytesleft) == -1) {
cli_dbgmsg("cli_pdf: did not find valid xref\n");
pdf.flags |= 1 << BAD_PDF_TRAILER;
} |
e7a27135 |
}
} |
a9d034ee |
size -= offset; |
e7a27135 |
pdf.size = size; |
dc200c6b |
pdf.map = fmap_need_off(map, offset, size);
pdf.startoff = offset; |
e7a27135 |
if (!pdf.map) { |
a9d034ee |
cli_errmsg("cli_pdf: mmap() failed (3)\n"); |
e7a27135 |
return CL_EMAP;
} |
9c617dbe |
/* parse PDF and find obj offsets */ |
6c135eb4 |
while ((rc = pdf_findobj(&pdf)) > 0) { |
e7a27135 |
struct pdf_obj *obj = &pdf.objs[pdf.nobjs-1];
cli_dbgmsg("found %d %d obj @%ld\n", obj->id >> 8, obj->id&0xff, obj->start + offset);
} |
eb270d5a |
if (pdf.nobjs)
pdf.nobjs--; |
e7a27135 |
if (rc == -1) |
6c135eb4 |
pdf.flags |= 1 << BAD_PDF_TOOMANYOBJS; |
e7a27135 |
|
eb270d5a |
/* must parse after finding all objs, so we can flag indirect objects */
for (i=0;i<pdf.nobjs;i++) {
struct pdf_obj *obj = &pdf.objs[i];
pdf_parseobj(&pdf, obj);
}
|
dc200c6b |
rc = run_pdf_hooks(&pdf, PDF_PHASE_PARSED, -1, -1); |
9c617dbe |
/* extract PDF objs */ |
dc200c6b |
for (i=0;!rc && i<pdf.nobjs;i++) { |
3643f3d2 |
struct pdf_obj *obj = &pdf.objs[i];
rc = pdf_extract_obj(&pdf, obj);
}
|
f984f75b |
if (pdf.flags & (1 << ENCRYPTED_PDF)) |
b835a528 |
pdf.flags &= ~ ((1 << BAD_FLATESTART) | (1 << BAD_STREAMSTART) |
(1 << BAD_ASCIIDECODE)); |
f984f75b |
|
dc200c6b |
if (pdf.flags && !rc) { |
6c135eb4 |
cli_dbgmsg("cli_pdf: flags 0x%02x\n", pdf.flags); |
dc200c6b |
rc = run_pdf_hooks(&pdf, PDF_PHASE_END, -1, -1);
if (!rc && (ctx->options & CL_SCAN_ALGORITHMIC)) {
if (pdf.flags & (1 << ESCAPED_COMMON_PDFNAME)) {
/* for example /Fl#61te#44#65#63#6f#64#65 instead of /FlateDecode */
*ctx->virname = "Heuristics.PDF.ObfuscatedNameObject"; |
57549ff4 |
rc = cli_found_possibly_unwanted(ctx); |
dc200c6b |
}
} |
2a599782 |
#if 0 |
b835a528 |
/* TODO: find both trailers, and /Encrypt settings */
if (pdf.flags & (1 << LINEARIZED_PDF))
pdf.flags &= ~ (1 << BAD_ASCIIDECODE); |
9acc81d6 |
if (pdf.flags & (1 << MANY_FILTERS))
pdf.flags &= ~ (1 << BAD_ASCIIDECODE); |
dc200c6b |
if (!rc && (pdf.flags & |
eb270d5a |
((1 << BAD_PDF_TOOMANYOBJS) | (1 << BAD_STREAM_FILTERS) | |
dc200c6b |
(1<<BAD_FLATE) | (1<<BAD_ASCIIDECODE)|
(1<<UNTERMINATED_OBJ_DICT) | (1<<UNKNOWN_FILTER)))) { |
eb270d5a |
rc = CL_EUNPACK;
}
#endif |
ab564992 |
} |
3643f3d2 |
cli_dbgmsg("cli_pdf: returning %d\n", rc);
free(pdf.objs);
return rc; |
e7a27135 |
}
|
6ff4e486 |
#else |
5aad11ce |
static int try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fout, cli_ctx *ctx);
static int flatedecode(unsigned char *buf, off_t len, int fout, cli_ctx *ctx); |
d056cc17 |
int |
6ff4e486 |
cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
d056cc17 |
{ |
6c9dc98d |
off_t size; /* total number of bytes in the file */ |
8affc406 |
off_t bytesleft, trailerlength; |
dbfb485b |
char *buf; /* start of memory mapped area */ |
bce73fe9 |
const char *p, *q, *trailerstart; |
6c9dc98d |
const char *xrefstart; /* cross reference table */ |
70502709 |
/*size_t xreflength;*/ |
a5afcb67 |
int printed_predictor_message, printed_embedded_font_message, rc; |
3470220c |
unsigned int files; |
49cc1e3c |
fmap_t *map = *ctx->fmap; |
f461d74f |
int opt_failed = 0; |
240d3307 |
|
925ece3d |
cli_dbgmsg("in cli_pdf(%s)\n", dir); |
2d5dbc37 |
size = map->len - offset; |
240d3307 |
|
139823ca |
if(size <= 7) /* doesn't even include the file header */ |
dbfb485b |
return CL_CLEAN; |
139823ca |
|
084d19aa |
p = buf = fmap_need_off_once(map, 0, size); /* FIXME: really port to fmap */ |
747c2055 |
if(!buf) { |
dbfb485b |
cli_errmsg("cli_pdf: mmap() failed\n"); |
871177cd |
return CL_EMAP; |
bf3e4471 |
}
|
95e11e5a |
cli_dbgmsg("cli_pdf: scanning %lu bytes\n", (unsigned long)size); |
0a097146 |
|
139823ca |
/* Lines are terminated by \r, \n or both */
/* File Header */ |
72ce4b70 |
bytesleft = size - 5;
for(q = p; bytesleft; bytesleft--, q++) {
if(!strncasecmp(q, "%PDF-", 5)) {
bytesleft = size - (off_t) (q - p);
p = q;
break;
} |
139823ca |
}
|
72ce4b70 |
if(!bytesleft) {
cli_dbgmsg("cli_pdf: file header not found\n");
return CL_CLEAN; |
139823ca |
}
/* Find the file trailer */ |
72ce4b70 |
for(q = &p[bytesleft - 5]; q > p; --q)
if(strncasecmp(q, "%%EOF", 5) == 0) |
139823ca |
break;
|
7fc055e6 |
if(q <= p) { |
dbfb485b |
cli_dbgmsg("cli_pdf: trailer not found\n");
return CL_CLEAN; |
139823ca |
}
|
b533a221 |
for(trailerstart = &q[-7]; trailerstart > p; --trailerstart) |
bce73fe9 |
if(memcmp(trailerstart, "trailer", 7) == 0) |
139823ca |
break;
/* |
bce73fe9 |
* q points to the end of the trailer section |
139823ca |
*/ |
bce73fe9 |
trailerlength = (long)(q - trailerstart); |
f461d74f |
if(cli_memstr(trailerstart, trailerlength, "Encrypt", 7)) { |
501e5d12 |
/*
* This tends to mean that the file is, in effect, read-only |
9fe789f8 |
* http://www.cs.cmu.edu/~dst/Adobe/Gallery/anon21jul01-pdf-encryption.txt
* http://www.adobe.com/devnet/pdf/ |
501e5d12 |
*/ |
dbfb485b |
cli_dbgmsg("cli_pdf: Encrypted PDF files not yet supported\n");
return CL_CLEAN; |
501e5d12 |
}
|
ef8219b8 |
/*
* not true, since edits may put data after the trailer |
bce73fe9 |
bytesleft -= trailerlength; |
ef8219b8 |
*/ |
bce73fe9 |
|
76fb2ef1 |
/*
* FIXME: Handle more than one xref section in the xref table
*/ |
6c9dc98d |
for(xrefstart = trailerstart; xrefstart > p; --xrefstart)
if(memcmp(xrefstart, "xref", 4) == 0) |
76fb2ef1 |
/*
* Make sure it's the start of the line, not a startxref
* token
*/
if((xrefstart[-1] == '\n') || (xrefstart[-1] == '\r'))
break; |
6c9dc98d |
if(xrefstart == p) { |
dbfb485b |
cli_dbgmsg("cli_pdf: xref not found\n");
return CL_CLEAN; |
6c9dc98d |
}
|
1eceda0e |
printed_predictor_message = printed_embedded_font_message = 0;
|
ef8219b8 |
/*
* not true, since edits may put data after the trailer |
70502709 |
xreflength = (size_t)(trailerstart - xrefstart); |
6c9dc98d |
bytesleft -= xreflength; |
ef8219b8 |
*/ |
6c9dc98d |
|
3470220c |
files = 0;
|
a5afcb67 |
rc = CL_CLEAN;
|
6c9dc98d |
/* |
ef8219b8 |
* The body section consists of a sequence of indirect objects |
6c9dc98d |
*/ |
d070d475 |
while((p < xrefstart) && (cli_checklimits("cli_pdf", ctx, 0, 0, 0)==CL_CLEAN) && |
bf3e4471 |
((q = pdf_nextobject(p, bytesleft)) != NULL)) { |
f97bcc8a |
int is_ascii85decode, is_flatedecode, fout, len, has_cr; |
d8ab9ddc |
/*int object_number, generation_number;*/ |
6c9dc98d |
const char *objstart, *objend, *streamstart, *streamend; |
b432851f |
unsigned long length, objlen, real_streamlen, calculated_streamlen; |
1eceda0e |
int is_embedded_font, predictor; |
240d3307 |
char fullname[NAME_MAX + 1]; |
f53acfcd |
|
a5afcb67 |
rc = CL_CLEAN; |
ef8219b8 |
if(q == xrefstart)
break;
if(memcmp(q, "xref", 4) == 0)
break; |
616fd006 |
/*object_number = atoi(q);*/ |
8affc406 |
bytesleft -= (off_t)(q - p); |
616fd006 |
p = q;
if(memcmp(q, "endobj", 6) == 0)
continue; |
ef8219b8 |
if(!isdigit(*q)) { |
dbfb485b |
cli_dbgmsg("cli_pdf: Object number missing\n"); |
ef8219b8 |
break;
}
q = pdf_nextobject(p, bytesleft);
if((q == NULL) || !isdigit(*q)) { |
dbfb485b |
cli_dbgmsg("cli_pdf: Generation number missing\n"); |
ef8219b8 |
break;
} |
a5f514a4 |
/*generation_number = atoi(q);*/ |
8affc406 |
bytesleft -= (off_t)(q - p); |
ef8219b8 |
p = q;
q = pdf_nextobject(p, bytesleft);
if((q == NULL) || (memcmp(q, "obj", 3) != 0)) { |
dbfb485b |
cli_dbgmsg("cli_pdf: Indirect object missing \"obj\"\n"); |
ef8219b8 |
break;
}
|
8affc406 |
bytesleft -= (off_t)((q - p) + 3); |
ef8219b8 |
objstart = p = &q[3]; |
f461d74f |
objend = cli_memstr(p, bytesleft, "endobj", 6); |
6c9dc98d |
if(objend == NULL) { |
dbfb485b |
cli_dbgmsg("cli_pdf: No matching endobj\n"); |
240d3307 |
break;
} |
8affc406 |
bytesleft -= (off_t)((objend - p) + 6); |
6c9dc98d |
p = &objend[6]; |
b432851f |
objlen = (unsigned long)(objend - objstart); |
240d3307 |
|
6c9dc98d |
/* Is this object a stream? */ |
f461d74f |
streamstart = cli_memstr(objstart, objlen, "stream", 6); |
bce73fe9 |
if(streamstart == NULL)
continue; |
240d3307 |
|
1eceda0e |
is_embedded_font = length = is_ascii85decode =
is_flatedecode = 0;
predictor = 1;
|
bce73fe9 |
/*
* TODO: handle F and FFilter?
*/ |
9be10a55 |
q = objstart;
while(q < streamstart) { |
ef8219b8 |
if(*q == '/') { /* name object */ |
f53acfcd |
/*cli_dbgmsg("Name object %8.8s\n", q+1, q+1);*/ |
6c9dc98d |
if(strncmp(++q, "Length ", 7) == 0) {
q += 7;
length = atoi(q);
while(isdigit(*q))
q++; |
f97bcc8a |
/*
* Note: incremental updates are not
* supported
*/
if((bytesleft > 11) && strncmp(q, " 0 R", 4) == 0) { |
7bc22596 |
const char *r, *nq; |
f0506577 |
char b[14]; |
f97bcc8a |
q += 4; |
dbfb485b |
cli_dbgmsg("cli_pdf: Length is in indirect obj %lu\n", |
f97bcc8a |
length);
snprintf(b, sizeof(b), |
7bc22596 |
"%lu 0 obj", length); |
b432851f |
length = (unsigned long)strlen(b); |
7bc22596 |
/* optimization: assume objects
* are sequential */ |
f461d74f |
if(!opt_failed) {
nq = q;
len = buf + size - q;
} else {
nq = buf;
len = q - buf;
} |
7bc22596 |
do { |
f461d74f |
r = cli_memstr(nq, len, b, length); |
7bc22596 |
if (r > nq) {
const char x = *(r-1);
if (x == '\n' || x=='\r') {
--r;
break;
}
}
if (r) { |
f461d74f |
len -= r + length - nq;
nq = r + length; |
7bc22596 |
} else if (!opt_failed) {
/* we failed optimized match,
* try matching from the beginning
*/
len = q - buf;
r = nq = buf;
/* prevent
* infloop */
opt_failed = 1;
}
} while (r); |
f97bcc8a |
if(r) {
r += length - 1;
r = pdf_nextobject(r, bytesleft - (r - q));
if(r) {
length = atoi(r);
while(isdigit(*r))
r++; |
dbfb485b |
cli_dbgmsg("cli_pdf: length in '%s' %lu\n", |
f0506577 |
&b[1],
length); |
f97bcc8a |
}
} else |
dbfb485b |
cli_dbgmsg("cli_pdf: Couldn't find '%s'\n", |
f0506577 |
&b[1]); |
f97bcc8a |
} |
6c9dc98d |
q--; |
1eceda0e |
} else if(strncmp(q, "Length2 ", 8) == 0)
is_embedded_font = 1;
else if(strncmp(q, "Predictor ", 10) == 0) {
q += 10;
predictor = atoi(q);
while(isdigit(*q))
q++;
q--; |
6c9dc98d |
} else if(strncmp(q, "FlateDecode", 11) == 0) { |
da653b74 |
is_flatedecode = 1; |
f53acfcd |
q += 11;
} else if(strncmp(q, "ASCII85Decode", 13) == 0) { |
da653b74 |
is_ascii85decode = 1; |
6c9dc98d |
q += 13; |
240d3307 |
}
} |
ef8219b8 |
q = pdf_nextobject(q, (size_t)(streamstart - q)); |
9be10a55 |
if(q == NULL)
break;
} |
ce42a31a |
|
1eceda0e |
if(is_embedded_font) {
/*
* Need some documentation, the only I can find a |
1299feef |
* reference to is not free, if some kind soul wishes |
1eceda0e |
* to donate a copy, please contact me!
* (http://safari.adobepress.com/0321304748)
*/
if(!printed_embedded_font_message) { |
dbfb485b |
cli_dbgmsg("cli_pdf: Embedded fonts not yet supported\n"); |
1eceda0e |
printed_embedded_font_message = 1;
}
continue;
}
if(predictor > 1) {
/*
* Needs some thought
*/
if(!printed_predictor_message) { |
dbfb485b |
cli_dbgmsg("cli_pdf: Predictor %d not honoured for embedded image\n", |
1eceda0e |
predictor);
printed_predictor_message = 1;
}
continue;
}
|
6c9dc98d |
/* objend points to the end of the object (start of "endobj") */
streamstart += 6; /* go past the word "stream" */
len = (int)(objend - streamstart);
q = pdf_nextlinestart(streamstart, len);
if(q == NULL) |
bce73fe9 |
break; |
6c9dc98d |
len -= (int)(q - streamstart);
streamstart = q; |
f461d74f |
streamend = cli_memstr(streamstart, len, "endstream\n", 10); |
6c9dc98d |
if(streamend == NULL) { |
f461d74f |
streamend = cli_memstr(streamstart, len, "endstream\r", 10); |
6c9dc98d |
if(streamend == NULL) { |
dbfb485b |
cli_dbgmsg("cli_pdf: No endstream\n"); |
0a097146 |
break;
} |
f97bcc8a |
has_cr = 1; |
918f7aaa |
} else
has_cr = 0; |
58481352 |
snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u", dir, files); |
d0d1afd7 |
fout = open(fullname, O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600); |
240d3307 |
if(fout < 0) { |
e68d70e7 |
char err[128];
cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err))); |
bbc4f890 |
rc = CL_ETMPFILE;
break; |
240d3307 |
}
|
1eceda0e |
/*
* Calculate the length ourself, the Length parameter is often
* wrong
*/ |
d9781001 |
if((*--streamend != '\n') && (*streamend != '\r')) |
39327ef2 |
streamend++; |
f97bcc8a |
else if(has_cr && (*--streamend != '\r')) |
39327ef2 |
streamend++; |
1eceda0e |
if(streamend <= streamstart) { |
bf3e4471 |
close(fout); |
dbfb485b |
cli_dbgmsg("cli_pdf: Empty stream\n"); |
997a0e0b |
if (cli_unlink(fullname)) { |
871177cd |
rc = CL_EUNLINK; |
997a0e0b |
break;
} |
1eceda0e |
continue;
} |
f97bcc8a |
calculated_streamlen = (int)(streamend - streamstart); |
1eceda0e |
real_streamlen = length;
|
dbfb485b |
cli_dbgmsg("cli_pdf: length %lu, calculated_streamlen %lu isFlate %d isASCII85 %d\n", |
1eceda0e |
length, calculated_streamlen,
is_flatedecode, is_ascii85decode); |
bce73fe9 |
|
b17efc99 |
if(calculated_streamlen != real_streamlen) {
cli_dbgmsg("cli_pdf: Incorrect Length field in file attempting to recover\n");
if(real_streamlen > calculated_streamlen)
real_streamlen = calculated_streamlen;
} |
bce73fe9 |
#if 0
/* FIXME: this isn't right... */
if(length)
/*streamlen = (is_flatedecode) ? length : MIN(length, streamlen);*/
streamlen = MIN(length, streamlen);
#endif
|
da653b74 |
if(is_ascii85decode) { |
41273d08 |
unsigned char *tmpbuf; |
d070d475 |
int ret = cli_checklimits("cli_pdf", ctx, calculated_streamlen * 5, calculated_streamlen, real_streamlen); |
86e209d6 |
if(ret != CL_CLEAN) {
close(fout); |
997a0e0b |
if (cli_unlink(fullname)) { |
871177cd |
rc = CL_EUNLINK; |
997a0e0b |
break;
} |
86e209d6 |
continue;
}
tmpbuf = cli_malloc(calculated_streamlen * 5); |
550ee789 |
|
1160fc1d |
if(tmpbuf == NULL) { |
b8705ec8 |
close(fout); |
997a0e0b |
if (cli_unlink(fullname)) { |
871177cd |
rc = CL_EUNLINK; |
997a0e0b |
break;
} |
1160fc1d |
continue;
}
|
1eceda0e |
ret = ascii85decode(streamstart, calculated_streamlen, tmpbuf); |
bbc4f890 |
|
bce73fe9 |
if(ret == -1) { |
da653b74 |
free(tmpbuf); |
b8705ec8 |
close(fout); |
997a0e0b |
if (cli_unlink(fullname)) { |
871177cd |
rc = CL_EUNLINK; |
997a0e0b |
break;
} |
240d3307 |
continue;
} |
44399452 |
if(ret) { |
9443ec4a |
unsigned char *t; |
f0eb3945 |
unsigned size; |
b432851f |
real_streamlen = ret; |
44399452 |
/* free unused trailing bytes */ |
f0eb3945 |
size = real_streamlen > calculated_streamlen ? real_streamlen : calculated_streamlen;
t = (unsigned char *)cli_realloc(tmpbuf,size); |
b432851f |
if(t == NULL) {
free(tmpbuf);
close(fout); |
997a0e0b |
if (cli_unlink(fullname)) { |
871177cd |
rc = CL_EUNLINK; |
997a0e0b |
break;
} |
b432851f |
continue;
}
tmpbuf = t; |
44399452 |
/*
* Note that it will probably be both
* ascii85encoded and flateencoded
*/ |
86e209d6 |
|
21e605f4 |
if(is_flatedecode)
rc = try_flatedecode((unsigned char *)tmpbuf, real_streamlen, real_streamlen, fout, ctx);
else |
871177cd |
rc = (unsigned long)cli_writen(fout, (const char *)streamstart, real_streamlen)==real_streamlen ? CL_CLEAN : CL_EWRITE; |
550ee789 |
} |
da653b74 |
free(tmpbuf); |
86e209d6 |
} else if(is_flatedecode) { |
21e605f4 |
rc = try_flatedecode((unsigned char *)streamstart, real_streamlen, calculated_streamlen, fout, ctx);
|
86e209d6 |
} else { |
95e11e5a |
cli_dbgmsg("cli_pdf: writing %lu bytes from the stream\n", |
1eceda0e |
(unsigned long)real_streamlen); |
d070d475 |
if((rc = cli_checklimits("cli_pdf", ctx, real_streamlen, 0, 0))==CL_CLEAN) |
871177cd |
rc = (unsigned long)cli_writen(fout, (const char *)streamstart, real_streamlen) == real_streamlen ? CL_CLEAN : CL_EWRITE; |
88fbd274 |
} |
240d3307 |
|
d070d475 |
if (rc == CL_CLEAN) { |
db9d275c |
cli_dbgmsg("cli_pdf: extracted file %u to %s\n", files, fullname);
files++; |
d070d475 |
lseek(fout, 0, SEEK_SET);
rc = cli_magic_scandesc(fout, ctx);
} |
240d3307 |
close(fout); |
33068e09 |
if(!ctx->engine->keeptmp) |
871177cd |
if (cli_unlink(fullname)) rc = CL_EUNLINK; |
d070d475 |
if(rc != CL_CLEAN) break; |
240d3307 |
}
|
0a097146 |
|
bbc4f890 |
cli_dbgmsg("cli_pdf: returning %d\n", rc); |
550ee789 |
return rc; |
d056cc17 |
} |
da653b74 |
|
21e605f4 |
/* |
871177cd |
* flate inflation |
21e605f4 |
*/ |
da653b74 |
static int |
96522097 |
try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fout, cli_ctx *ctx) |
1eceda0e |
{ |
86e209d6 |
int ret = cli_checklimits("cli_pdf", ctx, real_len, 0, 0); |
1eceda0e |
|
86e209d6 |
if (ret==CL_CLEAN && flatedecode(buf, real_len, fout, ctx) == CL_SUCCESS) |
d070d475 |
return CL_CLEAN; |
1eceda0e |
|
9e3242ca |
if(real_len == calculated_len) {
/*
* Nothing more we can do to inflate
*/ |
dbfb485b |
cli_dbgmsg("cli_pdf: Bad compression in flate stream\n");
return CL_CLEAN; |
9e3242ca |
} |
1eceda0e |
|
86e209d6 |
if(cli_checklimits("cli_pdf", ctx, calculated_len, 0, 0)!=CL_CLEAN)
return CL_CLEAN;
|
f97bcc8a |
ret = flatedecode(buf, calculated_len, fout, ctx); |
d070d475 |
if(ret == CL_CLEAN)
return CL_CLEAN; |
f97bcc8a |
/* i.e. the PDF file is broken :-( */ |
dbfb485b |
cli_dbgmsg("cli_pdf: Bad compressed block length in flate stream\n"); |
f97bcc8a |
return ret; |
1eceda0e |
}
static int |
96522097 |
flatedecode(unsigned char *buf, off_t len, int fout, cli_ctx *ctx) |
da653b74 |
{ |
b80ae277 |
int zstat, ret; |
4c32a40d |
off_t nbytes; |
da653b74 |
z_stream stream;
unsigned char output[BUFSIZ]; |
1eceda0e |
#ifdef SAVE_TMP
char tmpfilename[16];
int tmpfd;
#endif |
da653b74 |
|
ed6446ff |
cli_dbgmsg("cli_pdf: flatedecode %lu bytes\n", (unsigned long)len); |
da653b74 |
|
f0506577 |
if(len == 0) { |
dbfb485b |
cli_dbgmsg("cli_pdf: flatedecode len == 0\n"); |
21e605f4 |
return CL_CLEAN; |
f0506577 |
}
|
1eceda0e |
#ifdef SAVE_TMP
/*
* Copy the embedded area for debugging, so that if it falls over
* we have a copy of the offending data. This is debugging code
* that you shouldn't of course install in a live environment. I am
* not interested in hearing about security issues with this section
* of the parser.
*/
strcpy(tmpfilename, "/tmp/pdfXXXXXX");
tmpfd = mkstemp(tmpfilename);
if(tmpfd < 0) {
perror(tmpfilename); |
dbfb485b |
cli_errmsg("cli_pdf: Can't make debugging file\n"); |
1eceda0e |
} else {
FILE *tmpfp = fdopen(tmpfd, "w");
if(tmpfp) {
fwrite(buf, sizeof(char), len, tmpfp);
fclose(tmpfp); |
39327ef2 |
cli_dbgmsg("cli_pdf: flatedecode: debugging file is %s\n",
tmpfilename); |
1eceda0e |
} else
cli_errmsg("cli_pdf: can't fdopen debugging file\n");
}
#endif |
da653b74 |
stream.zalloc = (alloc_func)Z_NULL;
stream.zfree = (free_func)Z_NULL;
stream.opaque = (void *)NULL; |
95e11e5a |
stream.next_in = (Bytef *)buf; |
da653b74 |
stream.avail_in = len; |
501e5d12 |
stream.next_out = output;
stream.avail_out = sizeof(output); |
da653b74 |
zstat = inflateInit(&stream);
if(zstat != Z_OK) { |
1405207a |
cli_warnmsg("cli_pdf: inflateInit failed\n"); |
dbfb485b |
return CL_EMEM; |
da653b74 |
} |
9f2bc4ca |
|
4c32a40d |
nbytes = 0; |
9f2bc4ca |
|
918f7aaa |
while(stream.avail_in) { |
72910996 |
zstat = inflate(&stream, Z_NO_FLUSH); /* zlib */ |
da653b74 |
switch(zstat) {
case Z_OK: |
1160fc1d |
if(stream.avail_out == 0) { |
dbfb485b |
int written;
if ((written=cli_writen(fout, output, sizeof(output)))!=sizeof(output)) {
cli_errmsg("cli_pdf: failed to write output file\n");
inflateEnd(&stream); |
871177cd |
return CL_EWRITE; |
dbfb485b |
}
nbytes += written; |
9f2bc4ca |
|
d91ab809 |
if((ret=cli_checklimits("cli_pdf", ctx, nbytes, 0, 0))!=CL_CLEAN) { |
4c32a40d |
inflateEnd(&stream); |
b80ae277 |
return ret; |
4c32a40d |
} |
1160fc1d |
stream.next_out = output; |
501e5d12 |
stream.avail_out = sizeof(output); |
1160fc1d |
} |
da653b74 |
continue;
case Z_STREAM_END:
break;
default: |
fb53f48e |
if(stream.msg) |
dbfb485b |
cli_dbgmsg("cli_pdf: after writing %lu bytes, got error \"%s\" inflating PDF attachment\n", |
ed6446ff |
(unsigned long)nbytes,
stream.msg); |
fb53f48e |
else |
dbfb485b |
cli_dbgmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF attachment\n", |
ed6446ff |
(unsigned long)nbytes, zstat); |
da653b74 |
inflateEnd(&stream); |
dbfb485b |
return CL_CLEAN; |
da653b74 |
}
break;
}
|
dbfb485b |
if(stream.avail_out != sizeof(output)) {
if(cli_writen(fout, output, sizeof(output) - stream.avail_out) < 0) {
cli_errmsg("cli_pdf: failed to write output file\n");
inflateEnd(&stream); |
871177cd |
return CL_EWRITE; |
dbfb485b |
}
}
|
1eceda0e |
#ifdef SAVE_TMP |
997a0e0b |
if (cli_unlink(tmpfilename)) {
inflateEnd(&stream); |
871177cd |
return CL_EUNLINK; |
997a0e0b |
} |
1eceda0e |
#endif |
dbfb485b |
inflateEnd(&stream);
return CL_CLEAN; |
da653b74 |
} |
6ff4e486 |
#endif |
da653b74 |
|
6e33139f |
static int asciihexdecode(const char *buf, off_t len, char *output) |
3643f3d2 |
{
unsigned i,j; |
cacd0927 |
for (i=0,j=0;i+1<len;i++) { |
3643f3d2 |
if (buf[i] == ' ')
continue;
if (buf[i] == '>')
break; |
9acc81d6 |
if (cli_hex2str_to(buf+i, output+j++, 2) == -1) {
if (len - i < 4)
continue;
return -1;
} |
a9d034ee |
i++; |
3643f3d2 |
}
return j;
} |
67355216 |
/*
* ascii85 inflation, returns number of bytes in output, -1 for error
*
* See http://www.piclist.com/techref/method/encode.htm (look for base85)
*/ |
da653b74 |
static int |
b02bab2b |
ascii85decode(const char *buf, off_t len, unsigned char *output) |
da653b74 |
{ |
67355216 |
const char *ptr; |
da653b74 |
uint32_t sum = 0;
int quintet = 0;
int ret = 0;
|
f461d74f |
if(cli_memstr(buf, len, "~>", 2) == NULL) |
dbfb485b |
cli_dbgmsg("cli_pdf: ascii85decode: no EOF marker found\n"); |
67355216 |
ptr = buf;
|
ed6446ff |
cli_dbgmsg("cli_pdf: ascii85decode %lu bytes\n", (unsigned long)len); |
da653b74 |
|
bce73fe9 |
while(len > 0) {
int byte = (len--) ? (int)*ptr++ : EOF; |
da653b74 |
if((byte == '~') && (*ptr == '>'))
byte = EOF;
if(byte >= '!' && byte <= 'u') { |
3fe56d48 |
sum = (sum * 85) + ((uint32_t)byte - '!'); |
da653b74 |
if(++quintet == 5) { |
e8130f50 |
*output++ = (unsigned char)(sum >> 24);
*output++ = (unsigned char)((sum >> 16) & 0xFF);
*output++ = (unsigned char)((sum >> 8) & 0xFF);
*output++ = (unsigned char)(sum & 0xFF); |
da653b74 |
ret += 4;
quintet = 0;
sum = 0;
}
} else if(byte == 'z') {
if(quintet) { |
dbfb485b |
cli_dbgmsg("ascii85decode: unexpected 'z'\n"); |
da653b74 |
return -1;
}
*output++ = '\0';
*output++ = '\0';
*output++ = '\0';
*output++ = '\0';
ret += 4;
} else if(byte == EOF) { |
67355216 |
cli_dbgmsg("ascii85decode: quintet %d\n", quintet); |
da653b74 |
if(quintet) {
int i;
if(quintet == 1) { |
dbfb485b |
cli_dbgmsg("ascii85Decode: only 1 byte in last quintet\n"); |
da653b74 |
return -1;
} |
3fe56d48 |
for(i = quintet; i < 5; i++)
sum *= 85;
|
da653b74 |
if(quintet > 1)
sum += (0xFFFFFF >> ((quintet - 2) * 8));
ret += quintet;
for(i = 0; i < quintet - 1; i++) |
e8130f50 |
*output++ = (unsigned char)((sum >> (24 - 8 * i)) & 0xFF); |
da653b74 |
}
break;
} else if(!isspace(byte)) { |
dbfb485b |
cli_dbgmsg("ascii85Decode: invalid character 0x%x, len %lu\n", |
95e11e5a |
byte & 0xFF, (unsigned long)len); |
da653b74 |
return -1;
}
}
return ret;
} |
bce73fe9 |
/*
* Find the start of the next line
*/
static const char *
pdf_nextlinestart(const char *ptr, size_t len)
{
while(strchr("\r\n", *ptr) == NULL) {
if(--len == 0L)
return NULL;
ptr++;
}
while(strchr("\r\n", *ptr) != NULL) {
if(--len == 0L)
return NULL;
ptr++;
}
return ptr;
} |
9be10a55 |
|
ef8219b8 |
/*
* Return the start of the next PDF object.
* This assumes that we're not in a stream.
*/
static const char *
pdf_nextobject(const char *ptr, size_t len)
{
const char *p;
int inobject = 1;
while(len) {
switch(*ptr) {
case '\n':
case '\r':
case '%': /* comment */
p = pdf_nextlinestart(ptr, len);
if(p == NULL)
return NULL;
len -= (size_t)(p - ptr);
ptr = p;
inobject = 0;
break;
|
9be10a55 |
case ' ':
case '\t': |
f53acfcd |
case '[': /* Start of an array object */ |
ef8219b8 |
case '\v':
case '\f': |
1eceda0e |
case '<': /* Start of a dictionary object */ |
ef8219b8 |
inobject = 0; |
9be10a55 |
ptr++;
len--;
break; |
1eceda0e |
case '/': /* Start of a name object */
return ptr; |
eb270d5a |
case '(': /* start of JS */
return ptr; |
9be10a55 |
default: |
ef8219b8 |
if(!inobject)
/* TODO: parse and return object type */ |
9be10a55 |
return ptr;
ptr++;
len--;
}
}
return NULL;
} |