d056cc17 |
/* |
4a2576fe |
* Copyright (C) 2015, 2017-2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved. |
15a8a022 |
* Copyright (C) 2007-2014 Sourcefire, Inc. |
2023340a |
* |
6ff4e486 |
* Authors: Nigel Horne, Török Edvin
*
* Also based on Matt Olney's pdf parser in snort-nrt. |
d056cc17 |
*
* This program is free software; you can redistribute it and/or modify |
2023340a |
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation. |
d056cc17 |
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software |
2023340a |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA. |
1eceda0e |
*
* TODO: Embedded fonts
* TODO: Predictor image handling |
d056cc17 |
*/
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
|
240d3307 |
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <ctype.h>
#include <string.h>
#include <fcntl.h>
#include <stdlib.h> |
511a59c7 |
#include <errno.h> |
ed6446ff |
#ifdef HAVE_LIMITS_H
#include <limits.h>
#endif |
9443ec4a |
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif |
240d3307 |
#include <zlib.h>
|
e746f010 |
#if HAVE_ICONV |
063f0d25 |
#include <iconv.h> |
e746f010 |
#endif |
063f0d25 |
|
45e60c0c |
#ifdef _WIN32
#include <stdint.h>
#endif
|
ed6446ff |
#include "clamav.h"
#include "others.h" |
654c0b96 |
#include "pdf.h" |
7aad5a3b |
#include "pdfdecode.h" |
a5afcb67 |
#include "scanners.h" |
747c2055 |
#include "fmap.h" |
f461d74f |
#include "str.h" |
dc200c6b |
#include "bytecode.h"
#include "bytecode_api.h" |
7606789f |
#include "arc4.h" |
374be101 |
#include "rijndael.h" |
7719760b |
#include "textnorm.h" |
5f31c9b4 |
#include "conv.h" |
ebcca55f |
#include "json_api.h" |
4956690d |
|
1eceda0e |
#ifdef CL_DEBUG |
5cd3f734 |
/*#define SAVE_TMP
*Save the file being worked on in tmp */ |
1eceda0e |
#endif
|
c1a785c4 |
struct pdf_struct;
|
6e33139f |
static int asciihexdecode(const char *buf, off_t len, char *output); |
5aad11ce |
static int ascii85decode(const char *buf, off_t len, unsigned char *output); |
bce73fe9 |
static const char *pdf_nextlinestart(const char *ptr, size_t len); |
ef8219b8 |
static const char *pdf_nextobject(const char *ptr, size_t len); |
da653b74 |
|
ebcca55f |
/* PDF statistics callbacks and related */ |
224d1c4d |
struct pdfname_action; |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
ebcca55f |
static void pdf_export_json(struct pdf_struct *);
|
224d1c4d |
static void ASCIIHexDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void ASCII85Decode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void EmbeddedFile_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void FlateDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void Image_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void LZWDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void RunLengthDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void CCITTFaxDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void JBIG2Decode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void DCTDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void JPXDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void Crypt_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void Standard_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void Sig_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void JavaScript_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void OpenAction_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void Launch_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void Page_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void Author_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void Creator_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void Producer_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void CreationDate_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void ModificationDate_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void Title_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void Subject_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void Keywords_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void Pages_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
static void Colors_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act); |
09ff1409 |
static void RichMedia_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act);
static void AcroForm_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act);
static void XFA_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act); |
63803da5 |
#endif |
ebcca55f |
/* End PDF statistics callbacks and related */ |
49bc4992 |
|
e7a27135 |
static int xrefCheck(const char *xref, const char *eof)
{
const char *q; |
15a8a022 |
|
5aad11ce |
while (xref < eof && (*xref == ' ' || *xref == '\n' || *xref == '\r')) |
15a8a022 |
xref++;
|
e7a27135 |
if (xref + 4 >= eof) |
15a8a022 |
return -1;
|
e7a27135 |
if (!memcmp(xref, "xref", 4)) { |
15a8a022 |
cli_dbgmsg("cli_pdf: found xref\n");
return 0; |
e7a27135 |
} |
15a8a022 |
|
e7a27135 |
/* could be xref stream */
for (q=xref; q+5 < eof; q++) { |
15a8a022 |
if (!memcmp(q,"/XRef",4)) {
cli_dbgmsg("cli_pdf: found /XRef\n");
return 0;
} |
e7a27135 |
} |
15a8a022 |
|
e7a27135 |
return -1;
}
|
4956690d |
/* define this to be noisy about things that we can't parse properly */ |
87a6cf95 |
#undef NOISY |
4956690d |
#ifdef NOISY
#define noisy_msg(pdf, ...) cli_infomsg(pdf->ctx, __VA_ARGS__) |
e09d8843 |
#define noisy_warnmsg(...) cli_warnmsg(__VA_ARGS__) |
4956690d |
#else |
e09d8843 |
#define noisy_msg(pdf, ...)
#define noisy_warnmsg(...) |
4956690d |
#endif
|
e7a27135 |
static const char *findNextNonWSBack(const char *q, const char *start)
{ |
15a8a022 |
while (q > start && (*q == 0 || *q == 9 || *q == 0xa || *q == 0xc || *q == 0xd || *q == 0x20))
q--;
|
e7a27135 |
return q;
}
|
15a8a022 |
static int find_stream_bounds(const char *start, off_t bytesleft, off_t bytesleft2, off_t *stream, off_t *endstream, int newline_hack) |
3643f3d2 |
{
const char *q2, *q;
if ((q2 = cli_memstr(start, bytesleft, "stream", 6))) { |
15a8a022 |
q2 += 6;
bytesleft -= q2 - start;
if (bytesleft < 0)
return 0;
if (bytesleft >= 2 && q2[0] == '\xd' && q2[1] == '\xa') {
q2 += 2;
if (newline_hack && (bytesleft > 2) && q2[0] == '\xa')
q2++;
} else if (bytesleft && q2[0] == '\xa') {
q2++;
}
*stream = q2 - start;
bytesleft2 -= q2 - start;
if (bytesleft2 <= 0)
return 0;
q = q2;
q2 = cli_memstr(q, bytesleft2, "endstream", 9);
if (!q2)
q2 = q + bytesleft2-9; /* till EOF */
*endstream = q2 - start;
if (*endstream < *stream)
*endstream = *stream;
return 1; |
3643f3d2 |
} |
15a8a022 |
|
3643f3d2 |
return 0;
}
|
693757a1 |
/* Expected returns: 1 if success, 0 if no more objects, -1 if error */ |
930b9395 |
int pdf_findobj(struct pdf_struct *pdf) |
e7a27135 |
{ |
3643f3d2 |
const char *start, *q, *q2, *q3, *eof; |
e7a27135 |
struct pdf_obj *obj;
off_t bytesleft;
unsigned genid, objid;
pdf->nobjs++;
pdf->objs = cli_realloc2(pdf->objs, sizeof(*pdf->objs)*pdf->nobjs);
if (!pdf->objs) { |
15a8a022 |
cli_warnmsg("cli_pdf: out of memory parsing objects (%u)\n", pdf->nobjs);
return -1; |
e7a27135 |
} |
15a8a022 |
|
e7a27135 |
obj = &pdf->objs[pdf->nobjs-1]; |
ab564992 |
memset(obj, 0, sizeof(*obj)); |
e7a27135 |
start = pdf->map+pdf->offset;
bytesleft = pdf->size - pdf->offset; |
bdbae203 |
while (bytesleft > 0) { |
15a8a022 |
q2 = cli_memstr(start, bytesleft, "obj", 3);
if (!q2)
return 0;/* no more objs */
q2--;
bytesleft -= q2 - start;
if (*q2 != 0 && *q2 != 9 && *q2 != 0xa && *q2 != 0xc && *q2 != 0xd && *q2 != 0x20) {
start = q2+4;
bytesleft -= 4;
continue;
}
break; |
bdbae203 |
} |
15a8a022 |
|
bdbae203 |
if (bytesleft <= 0) |
15a8a022 |
return 0; |
bdbae203 |
|
e7a27135 |
q = findNextNonWSBack(q2-1, start); |
15a8a022 |
while (q > start && isdigit(*q))
q--;
|
e7a27135 |
genid = atoi(q);
q = findNextNonWSBack(q-1,start); |
15a8a022 |
while (q > start && isdigit(*q))
q--;
|
e7a27135 |
objid = atoi(q);
obj->id = (objid << 8) | (genid&0xff);
obj->start = q2+4 - pdf->map;
obj->flags = 0;
bytesleft -= 4;
eof = pdf->map + pdf->size;
q = pdf->map + obj->start; |
15a8a022 |
|
e7a27135 |
while (q < eof && bytesleft > 0) { |
15a8a022 |
off_t p_stream, p_endstream;
q2 = pdf_nextobject(q, bytesleft);
if (!q2)
q2 = pdf->map + pdf->size;
bytesleft -= q2 - q;
if (find_stream_bounds(q-1, q2-q, bytesleft + (q2-q), &p_stream, &p_endstream, 1)) {
obj->flags |= 1 << OBJ_STREAM;
q2 = q-1 + p_endstream + 9;
bytesleft -= q2 - q + 1;
if (bytesleft < 0) {
obj->flags |= 1 << OBJ_TRUNCATED;
pdf->offset = pdf->size;
return 1;/* truncated */
}
} else if ((q3 = cli_memstr(q-1, q2-q+1, "endobj", 6))) {
q2 = q3 + 6;
pdf->offset = q2 - pdf->map;
return 1; /* obj found and offset positioned */
} else {
q2++;
bytesleft--;
}
q = q2; |
e7a27135 |
} |
15a8a022 |
|
9acc81d6 |
obj->flags |= 1 << OBJ_TRUNCATED;
pdf->offset = pdf->size; |
15a8a022 |
|
9acc81d6 |
return 1;/* truncated */ |
e7a27135 |
}
|
e09d8843 |
static size_t filter_writen(struct pdf_struct *pdf, struct pdf_obj *obj, int fout, const char *buf, size_t len, size_t *sum) |
3643f3d2 |
{ |
cd94be7a |
UNUSEDPARAM(obj);
|
e09d8843 |
if (cli_checklimits("pdf", pdf->ctx, (unsigned long)*sum, 0, 0)) /* TODO: May truncate for large values on 64-bit platforms */ |
15a8a022 |
return len; /* pretend it was a successful write to suppress CL_EWRITE */
|
3643f3d2 |
*sum += len; |
15a8a022 |
|
e09d8843 |
return cli_writen(fout, buf, (unsigned int)len); |
3643f3d2 |
}
|
1d0cdc67 |
void pdfobj_flag(struct pdf_struct *pdf, struct pdf_obj *obj, enum pdf_flag flag) |
eb270d5a |
{
const char *s= "";
pdf->flags |= 1 << flag;
if (!cli_debug_flag) |
15a8a022 |
return;
|
eb270d5a |
switch (flag) { |
15a8a022 |
case UNTERMINATED_OBJ_DICT:
s = "dictionary not terminated";
break;
case ESCAPED_COMMON_PDFNAME:
/* like /JavaScript */
s = "escaped common pdfname";
break;
case BAD_STREAM_FILTERS:
s = "duplicate stream filters";
break;
case BAD_PDF_VERSION:
s = "bad pdf version";
break;
case BAD_PDF_HEADERPOS:
s = "bad pdf header position";
break;
case BAD_PDF_TRAILER:
s = "bad pdf trailer";
break;
case BAD_PDF_TOOMANYOBJS:
s = "too many pdf objs";
break;
case BAD_FLATE:
s = "bad deflate stream";
break;
case BAD_FLATESTART:
s = "bad deflate stream start";
break;
case BAD_STREAMSTART:
s = "bad stream start";
break;
case UNKNOWN_FILTER:
s = "unknown filter used";
break;
case BAD_ASCIIDECODE:
s = "bad ASCII decode";
break;
case HEX_JAVASCRIPT:
s = "hex javascript";
break;
case BAD_INDOBJ:
s = "referencing nonexistent obj";
break;
case HAS_OPENACTION:
s = "has /OpenAction";
break;
case HAS_LAUNCHACTION:
s = "has /LaunchAction";
break;
case BAD_STREAMLEN:
s = "bad /Length, too small";
break;
case ENCRYPTED_PDF:
s = "PDF is encrypted";
break;
case LINEARIZED_PDF:
s = "linearized PDF";
break;
case MANY_FILTERS:
s = "more than 2 filters per obj";
break;
case DECRYPTABLE_PDF:
s = "decryptable PDF";
break; |
eb270d5a |
} |
15a8a022 |
|
f984f75b |
cli_dbgmsg("cli_pdf: %s flagged in object %u %u\n", s, obj->id>>8, obj->id&0xff); |
eb270d5a |
}
|
930b9395 |
struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid) |
3643f3d2 |
{ |
1412b807 |
uint32_t j;
uint32_t i; |
5aad11ce |
/* search starting at previous obj (if exists) */ |
15a8a022 |
i = (obj != pdf->objs) ? obj - pdf->objs : 0;
|
3643f3d2 |
for (j=i;j<pdf->nobjs;j++) { |
15a8a022 |
obj = &pdf->objs[j];
if (obj->id == objid)
return obj; |
3643f3d2 |
} |
15a8a022 |
|
3643f3d2 |
/* restart search from beginning if not found */
for (j=0;j<i;j++) { |
15a8a022 |
obj = &pdf->objs[j];
if (obj->id == objid)
return obj; |
3643f3d2 |
} |
15a8a022 |
|
3643f3d2 |
return NULL;
}
|
15a8a022 |
static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *start, off_t len) |
3643f3d2 |
{
int length;
const char *q; |
15a8a022 |
|
3643f3d2 |
q = cli_memstr(start, len, "/Length", 7);
if (!q) |
15a8a022 |
return 0;
|
3643f3d2 |
q++;
len -= q - start;
start = pdf_nextobject(q, len);
if (!start) |
15a8a022 |
return 0;
|
6e33139f |
/* len -= start - q; */ |
3643f3d2 |
q = start;
length = atoi(q); |
15a8a022 |
while (isdigit(*q))
q++;
|
3643f3d2 |
if (*q == ' ') { |
15a8a022 |
int genid;
q++;
genid = atoi(q);
while(isdigit(*q))
q++;
if (q[0] == ' ' && q[1] == 'R') {
cli_dbgmsg("cli_pdf: length is in indirect object %u %u\n", length, genid);
obj = find_obj(pdf, obj, (length << 8) | (genid&0xff));
if (!obj) {
cli_dbgmsg("cli_pdf: indirect object not found\n");
return 0;
}
q = pdf_nextobject(pdf->map+obj->start, pdf->size - obj->start);
if (!q) {
cli_dbgmsg("cli_pdf: next object not found\n");
return 0;
}
length = atoi(q);
} |
3643f3d2 |
} |
15a8a022 |
|
a9d034ee |
/* limit length */ |
15a8a022 |
if (start - pdf->map + length+5 > pdf->size)
length = pdf->size - (start - pdf->map)-5;
|
3643f3d2 |
return length;
}
|
7719760b |
#define DUMP_MASK ((1 << OBJ_CONTENTS) | (1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_DCT) | (1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_A85) | (1 << OBJ_EMBEDDED_FILE) | (1 << OBJ_JAVASCRIPT) | (1 << OBJ_OPENACTION) | (1 << OBJ_LAUNCHACTION)) |
ab564992 |
|
eb270d5a |
static int obj_size(struct pdf_struct *pdf, struct pdf_obj *obj, int binary) |
ab564992 |
{ |
5aad11ce |
unsigned i = obj - pdf->objs; |
15a8a022 |
|
ab564992 |
i++;
if (i < pdf->nobjs) { |
15a8a022 |
int s = pdf->objs[i].start - obj->start - 4;
if (s > 0) {
if (!binary) {
const char *p = pdf->map + obj->start;
const char *q = p + s;
while (q > p && (isspace(*q) || isdigit(*q)))
q--;
if (q > p+5 && !memcmp(q-5,"endobj",6))
q -= 6;
q = findNextNonWSBack(q, p);
q++;
return q - p;
}
return s;
} |
ab564992 |
} |
15a8a022 |
|
eb270d5a |
if (binary) |
15a8a022 |
return pdf->size - obj->start;
|
eb270d5a |
return pdf->offset - obj->start - 6; |
ab564992 |
}
|
15a8a022 |
static int run_pdf_hooks(struct pdf_struct *pdf, enum pdf_phase phase, int fd, int dumpid) |
dc200c6b |
{
int ret;
struct cli_bc_ctx *bc_ctx;
cli_ctx *ctx = pdf->ctx;
fmap_t *map;
|
cd94be7a |
UNUSEDPARAM(dumpid);
|
dc200c6b |
bc_ctx = cli_bytecode_context_alloc();
if (!bc_ctx) { |
15a8a022 |
cli_errmsg("cli_pdf: can't allocate memory for bc_ctx");
return CL_EMEM; |
dc200c6b |
}
map = *ctx->fmap;
if (fd != -1) { |
15a8a022 |
map = fmap(fd, 0, 0);
if (!map) { |
db5c5d72 |
cli_dbgmsg("can't mmap pdf extracted obj\n"); |
15a8a022 |
map = *ctx->fmap;
fd = -1;
} |
dc200c6b |
} |
15a8a022 |
cli_bytecode_context_setpdf(bc_ctx, phase, pdf->nobjs, pdf->objs, &pdf->flags, pdf->size, pdf->startoff); |
dc200c6b |
cli_bytecode_context_setctx(bc_ctx, ctx); |
6ad45a29 |
ret = cli_bytecode_runhook(ctx, ctx->engine, bc_ctx, BC_PDF, map); |
dc200c6b |
cli_bytecode_context_destroy(bc_ctx); |
15a8a022 |
if (fd != -1)
funmap(map);
|
dc200c6b |
return ret;
}
|
bbfad9ba |
static void dbg_printhex(const char *msg, const char *hex, unsigned len); |
cd94be7a |
|
e09d8843 |
static void aes_decrypt(const unsigned char *in, size_t *length, unsigned char *q, char *key, unsigned key_n, int has_iv) |
374be101 |
{
unsigned long rk[RKLENGTH(256)];
unsigned char iv[16]; |
e09d8843 |
size_t len = *length; |
374be101 |
unsigned char pad, i; |
21a33457 |
int nrounds; |
374be101 |
|
e09d8843 |
cli_dbgmsg("cli_pdf: aes_decrypt: key length: %d, data length: %zu\n", key_n, *length); |
374be101 |
if (key_n > 32) { |
15a8a022 |
cli_dbgmsg("cli_pdf: aes_decrypt: key length is %d!\n", key_n*8);
return; |
374be101 |
} |
15a8a022 |
|
374be101 |
if (len < 32) { |
e09d8843 |
cli_dbgmsg("cli_pdf: aes_decrypt: len is <32: %zu\n", len);
noisy_warnmsg("cli_pdf: aes_decrypt: len is <32: %zu\n", len); |
15a8a022 |
return; |
374be101 |
} |
15a8a022 |
|
bbfad9ba |
if (has_iv) { |
15a8a022 |
memcpy(iv, in, 16);
in += 16;
len -= 16;
} else {
memset(iv, 0, sizeof(iv));
} |
374be101 |
|
22ee81d0 |
cli_dbgmsg("aes_decrypt: Calling rijndaelSetupDecrypt\n"); |
cd94be7a |
nrounds = rijndaelSetupDecrypt(rk, (const unsigned char *)key, key_n*8); |
1cea6b05 |
if (!nrounds) {
cli_dbgmsg("cli_pdf: aes_decrypt: nrounds = 0\n");
return;
} |
22ee81d0 |
cli_dbgmsg("aes_decrypt: Beginning rijndaelDecrypt\n"); |
15a8a022 |
|
374be101 |
while (len >= 16) { |
15a8a022 |
unsigned i;
rijndaelDecrypt(rk, nrounds, in, q);
for (i=0;i<16;i++)
q[i] ^= iv[i];
memcpy(iv, in, 16);
q += 16;
in += 16;
len -= 16; |
374be101 |
} |
bbfad9ba |
if (has_iv) { |
15a8a022 |
len += 16;
pad = q[-1];
if (pad > 0x10) { |
e09d8843 |
cli_dbgmsg("cli_pdf: aes_decrypt: bad pad: %x (extra len: %zu)\n", pad, len-16);
noisy_warnmsg("cli_pdf: aes_decrypt: bad pad: %x (extra len: %zu)\n", pad, len-16); |
15a8a022 |
*length -= len;
return;
}
q -= pad;
for (i=1;i<pad;i++) {
if (q[i] != pad) {
cli_dbgmsg("cli_pdf: aes_decrypt: bad pad: %x != %x\n",q[i],pad);
noisy_warnmsg("cli_pdf: aes_decrypt: bad pad: %x != %x\n",q[i],pad);
*length -= len;
return;
}
}
len += pad; |
374be101 |
} |
15a8a022 |
|
374be101 |
*length -= len; |
15a8a022 |
|
e09d8843 |
cli_dbgmsg("cli_pdf: aes_decrypt: length is %zu\n", *length); |
374be101 |
}
|
e09d8843 |
char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, size_t *length, enum enc_method enc_method) |
374be101 |
{
unsigned char *key, *q, result[16];
unsigned n;
struct arc4_state arc4;
|
4956690d |
if (!length || !*length || !in) { |
15a8a022 |
noisy_warnmsg("decrypt failed for obj %u %u\n", id>>8, id&0xff);
return NULL; |
4956690d |
} |
15a8a022 |
|
374be101 |
n = pdf->keylen + 5; |
bcc68567 |
if (enc_method == ENC_AESV2) |
15a8a022 |
n += 4;
|
374be101 |
key = cli_malloc(n); |
4956690d |
if (!key) { |
15a8a022 |
noisy_warnmsg("decrypt_any: malloc failed\n");
return NULL; |
4956690d |
} |
374be101 |
memcpy(key, pdf->key, pdf->keylen);
q = key + pdf->keylen;
*q++ = id >> 8;
*q++ = id >> 16;
*q++ = id >> 24;
*q++ = id;
*q++ = 0; |
bcc68567 |
if (enc_method == ENC_AESV2) |
15a8a022 |
memcpy(q, "sAlT", 4);
|
b2e7c931 |
cl_hash_data("md5", key, n, result, NULL); |
bbfad9ba |
free(key);
|
374be101 |
n = pdf->keylen + 5;
if (n > 16) |
15a8a022 |
n = 16; |
374be101 |
|
66c53a53 |
q = cli_calloc(*length, sizeof(char)); |
4956690d |
if (!q) { |
15a8a022 |
noisy_warnmsg("decrypt_any: malloc failed\n");
return NULL; |
4956690d |
} |
374be101 |
|
bcc68567 |
switch (enc_method) { |
15a8a022 |
case ENC_V2:
cli_dbgmsg("cli_pdf: enc is v2\n");
memcpy(q, in, *length);
arc4_init(&arc4, result, n); |
e09d8843 |
arc4_apply(&arc4, q, (unsigned)*length); /* TODO: may truncate for very large lengths */ |
15a8a022 |
noisy_msg(pdf, "decrypted ARC4 data\n");
break;
case ENC_AESV2:
cli_dbgmsg("cli_pdf: enc is aesv2\n"); |
cd94be7a |
aes_decrypt((const unsigned char *)in, length, q, (char *)result, n, 1); |
15a8a022 |
noisy_msg(pdf, "decrypted AES(v2) data\n");
break;
case ENC_AESV3:
cli_dbgmsg("cli_pdf: enc is aesv3\n");
if (pdf->keylen == 0) {
cli_dbgmsg("cli_pdf: no key\n");
return NULL;
}
|
cd94be7a |
aes_decrypt((const unsigned char *)in, length, q, pdf->key, pdf->keylen, 1); |
15a8a022 |
noisy_msg(pdf, "decrypted AES(v3) data\n");
break;
case ENC_IDENTITY:
cli_dbgmsg("cli_pdf: enc is identity\n");
memcpy(q, in, *length);
noisy_msg(pdf, "identity encryption\n");
break;
case ENC_NONE:
cli_dbgmsg("cli_pdf: enc is none\n");
noisy_msg(pdf, "encryption is none\n");
free(q);
return NULL;
case ENC_UNKNOWN:
cli_dbgmsg("cli_pdf: enc is unknown\n");
free(q);
noisy_warnmsg("decrypt_any: unknown encryption method for obj %u %u\n",
id>>8,id&0xff);
return NULL; |
374be101 |
} |
15a8a022 |
|
cd94be7a |
return (char *)q; |
374be101 |
}
|
e2b1880f |
enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj) |
bcc68567 |
{
if (obj->flags & (1 << OBJ_EMBEDDED_FILE)) |
15a8a022 |
return pdf->enc_method_embeddedfile;
|
bcc68567 |
if (obj->flags & (1 << OBJ_STREAM)) |
15a8a022 |
return pdf->enc_method_stream;
|
bcc68567 |
return pdf->enc_method_string;
}
|
7719760b |
enum cstate {
CSTATE_NONE,
CSTATE_TJ,
CSTATE_TJ_PAROPEN
};
static void process(struct text_norm_state *s, enum cstate *st, const char *buf, int length, int fout)
{
do { |
15a8a022 |
switch (*st) {
case CSTATE_NONE:
if (*buf == '[') {
*st = CSTATE_TJ;
} else {
const char *nl = memchr(buf, '\n', length);
if (!nl)
return;
length -= nl - buf;
buf = nl;
}
break;
case CSTATE_TJ:
if (*buf == '(')
*st = CSTATE_TJ_PAROPEN;
break;
case CSTATE_TJ_PAROPEN:
if (*buf == ')') {
*st = CSTATE_TJ;
} else { |
cd94be7a |
if (text_normalize_buffer(s, (const unsigned char *)buf, 1) != 1) { |
15a8a022 |
cli_writen(fout, s->out, s->out_pos);
text_normalize_reset(s);
}
}
break;
}
buf++;
length--; |
7719760b |
} while (length > 0);
}
static int pdf_scan_contents(int fd, struct pdf_struct *pdf)
{
struct text_norm_state s;
char fullname[1024];
char outbuff[BUFSIZ];
char inbuf[BUFSIZ]; |
4956690d |
int fout, n, rc; |
7719760b |
enum cstate st = CSTATE_NONE;
snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u_c", pdf->dir, (pdf->files-1));
fout = open(fullname,O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
if (fout < 0) { |
15a8a022 |
char err[128];
cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err)));
return CL_ETMPFILE; |
7719760b |
}
|
cd94be7a |
text_normalize_init(&s, (unsigned char *)outbuff, sizeof(outbuff)); |
7719760b |
while (1) { |
15a8a022 |
n = cli_readn(fd, inbuf, sizeof(inbuf));
if (n <= 0)
break;
process(&s, &st, inbuf, n, fout); |
7719760b |
} |
15a8a022 |
|
7719760b |
cli_writen(fout, s.out, s.out_pos);
|
4956690d |
lseek(fout, 0, SEEK_SET);
rc = cli_magic_scandesc(fout, pdf->ctx); |
7719760b |
close(fout); |
15a8a022 |
|
4956690d |
if (!pdf->ctx->engine->keeptmp) |
15a8a022 |
if (cli_unlink(fullname) && rc != CL_VIRUS)
rc = CL_EUNLINK;
|
4956690d |
return rc; |
7719760b |
}
|
27c8b02b |
static const char *pdf_getdict(const char *q0, int* len, const char *key);
static char *pdf_readval(const char *q, int len, const char *key); |
fb0c9fa2 |
static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen, const char **qend, int noescape);
|
930b9395 |
int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
3643f3d2 |
{ |
ab564992 |
char fullname[NAME_MAX + 1];
int fout; |
e09d8843 |
ptrdiff_t sum = 0; |
3643f3d2 |
int rc = CL_SUCCESS; |
dc200c6b |
int dump = 1; |
ab564992 |
|
5b574c47 |
cli_dbgmsg("pdf_extract_obj: obj %u %u\n", obj->id>>8, obj->id&0xff);
|
9acc81d6 |
/* TODO: call bytecode hook here, allow override dumpability */ |
15a8a022 |
if ((!(obj->flags & (1 << OBJ_STREAM)) || (obj->flags & (1 << OBJ_HASFILTERS))) && !(obj->flags & DUMP_MASK)) {
/* don't dump all streams */
dump = 0; |
ab564992 |
} |
15a8a022 |
if ((obj->flags & (1 << OBJ_IMAGE)) && !(obj->flags & (1 << OBJ_FILTER_DCT))) {
/* don't dump / scan non-JPG images */
dump = 0; |
9acc81d6 |
} |
15a8a022 |
|
dc200c6b |
if (obj->flags & (1 << OBJ_FORCEDUMP)) { |
15a8a022 |
/* bytecode can force dump by setting this flag */
dump = 1; |
dc200c6b |
} |
15a8a022 |
|
dc200c6b |
if (!dump) |
15a8a022 |
return CL_CLEAN;
|
374be101 |
cli_dbgmsg("cli_pdf: dumping obj %u %u\n", obj->id>>8, obj->id&0xff); |
15a8a022 |
|
ab564992 |
snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u", pdf->dir, pdf->files++);
fout = open(fullname,O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
if (fout < 0) { |
15a8a022 |
char err[128];
cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err)));
return CL_ETMPFILE; |
ab564992 |
}
|
1412b807 |
if (!(flags & PDF_EXTRACT_OBJ_SCAN))
obj->path = strdup(fullname);
|
ab564992 |
do { |
15a8a022 |
if (obj->flags & (1 << OBJ_STREAM)) {
const char *start = pdf->map + obj->start;
off_t p_stream = 0, p_endstream = 0; |
7aad5a3b |
off_t length; |
15a8a022 |
find_stream_bounds(start, pdf->size - obj->start,
pdf->size - obj->start,
&p_stream, &p_endstream,
pdf->enc_method_stream <= ENC_IDENTITY &&
pdf->enc_method_embeddedfile <= ENC_IDENTITY);
if (p_stream && p_endstream) {
size_t size = p_endstream - p_stream;
off_t orig_length; |
7aad5a3b |
int len = p_stream;
const char *pstr;
struct pdf_dict *dparams = NULL; |
a081b3e9 |
int xref = 0; |
15a8a022 |
length = find_length(pdf, obj, start, p_stream);
if (length < 0)
length = 0;
orig_length = length;
if (length > pdf->size || obj->start + p_stream + length > pdf->size) { |
e09d8843 |
cli_dbgmsg("cli_pdf: length out of file: %lld + %lld > %lld\n",
(long long)p_stream, (long long)length, (long long)pdf->size);
noisy_warnmsg("length out of file, truncated: %lld + %lld > %lld\n",
(long long)p_stream, (long long)length, (long long)pdf->size); |
15a8a022 |
length = pdf->size - (obj->start + p_stream); |
3a0e133b |
} |
fb0c9fa2 |
|
15a8a022 |
if (!(obj->flags & (1 << OBJ_FILTER_FLATE)) && length <= 0) {
const char *q = start + p_endstream;
length = size; |
13882281 |
q--; |
15a8a022 |
if (*q == '\n') {
q--;
length--;
if (*q == '\r')
length--;
} else if (*q == '\r') {
length--;
}
if (length < 0)
length = 0;
|
31064b37 |
cli_dbgmsg("cli_pdf: calculated length %lld\n", (long long)length); |
15a8a022 |
} else { |
cd94be7a |
if (size > (size_t)length+2) { |
31064b37 |
cli_dbgmsg("cli_pdf: calculated length %zu < %zu\n",
(size_t)length, size); |
15a8a022 |
length = size;
}
}
|
cd94be7a |
if (orig_length && size > (size_t)orig_length + 20) { |
31064b37 |
cli_dbgmsg("cli_pdf: orig length: %lld, length: %lld, size: %zu\n",
(long long)orig_length, (long long)length, size); |
15a8a022 |
pdfobj_flag(pdf, obj, BAD_STREAMLEN);
}
if (!length) {
length = size;
if (!length) {
cli_dbgmsg("pdf_extract_obj: length and size both 0\n");
break; /* Empty stream, nothing to scan */
}
}
|
a081b3e9 |
if (cli_memstr(start, p_stream, "/XRef", 5))
xref = 1;
|
7aad5a3b |
cli_dbgmsg("-------------EXPERIMENTAL-------------\n"); |
15a8a022 |
|
7aad5a3b |
pstr = pdf_getdict(start, &len, "/DecodeParms");
if (!pstr)
pstr = pdf_getdict(start, &len, "/DP"); |
15a8a022 |
|
7aad5a3b |
if (pstr) {
unsigned int objsz = obj_size(pdf, obj, 1); |
15a8a022 |
|
8da5f4c8 |
/* shift pstr left to "<<" for pdf_parse_dict */
while ((*pstr == '<') && (pstr > start)) {
pstr--;
len++;
}
/* shift pstr right to "<<" for pdf_parse_dict */ |
7aad5a3b |
while ((*pstr != '<') && (len > 0)) {
pstr++;
len--; |
15a8a022 |
}
|
7aad5a3b |
if (len > 4)
dparams = pdf_parse_dict(pdf, obj, objsz, (char *)pstr, NULL);
else
cli_dbgmsg("cli_pdf: failed to locate DecodeParms dictionary start\n"); |
13882281 |
} |
15a8a022 |
|
e09d8843 |
sum = pdf_decodestream(pdf, obj, dparams, start + p_stream, (uint32_t)length, xref, fout, &rc); |
7aad5a3b |
if (dparams)
pdf_free_dict(dparams); |
15a8a022 |
|
5c291512 |
if (sum < 0 || (rc == CL_VIRUS && !(pdf->ctx->options & CL_SCAN_ALLMATCHES))) {
sum = 0; /* prevents post-filter scan */
break;
} |
15a8a022 |
|
7aad5a3b |
cli_dbgmsg("-------------EXPERIMENTAL-------------\n"); |
15a8a022 |
} else {
noisy_warnmsg("cannot find stream bounds for obj %u %u\n", obj->id>>8, obj->id&0xff); |
fb0c9fa2 |
}
|
15a8a022 |
} else if (obj->flags & (1 << OBJ_JAVASCRIPT)) {
const char *q2;
const char *q = pdf->map+obj->start;
/* TODO: get obj-endobj size */
off_t bytesleft = obj_size(pdf, obj, 0);
if (bytesleft < 0)
break;
do {
char *js = NULL; |
e09d8843 |
size_t js_len = 0; |
15a8a022 |
const char *q3;
q2 = cli_memstr(q, bytesleft, "/JavaScript", 11);
if (!q2)
break;
bytesleft -= q2 - q + 11;
q = q2 + 11;
js = pdf_readstring(q, bytesleft, "/JS", NULL, &q2, !(pdf->flags & (1<<DECRYPTABLE_PDF)));
bytesleft -= q2 - q;
q = q2;
if (js) { |
7aad5a3b |
char *decrypted = NULL; |
15a8a022 |
const char *out = js;
js_len = strlen(js);
if (pdf->flags & (1 << DECRYPTABLE_PDF)) {
cli_dbgmsg("cli_pdf: encrypted string\n"); |
e09d8843 |
decrypted = decrypt_any(pdf, obj->id, js, &js_len, pdf->enc_method_string); |
15a8a022 |
if (decrypted) {
noisy_msg(pdf, "decrypted Javascript string from obj %u %u\n", obj->id>>8,obj->id&0xff);
out = decrypted;
}
}
|
e09d8843 |
if (filter_writen(pdf, obj, fout, out, js_len, (size_t*)&sum) != js_len) { |
15a8a022 |
rc = CL_EWRITE;
free(js);
break;
}
|
7aad5a3b |
free(decrypted); |
15a8a022 |
free(js);
cli_dbgmsg("bytesleft: %d\n", (int)bytesleft);
if (bytesleft > 0) {
q2 = pdf_nextobject(q, bytesleft);
if (!q2)
q2 = q + bytesleft - 1;
/* non-conforming PDFs that don't escape ) properly */
q3 = memchr(q, ')', bytesleft);
if (q3 && q3 < q2)
q2 = q3;
while (q2 > q && q2[-1] == ' ')
q2--;
if (q2 > q) {
q--; |
e09d8843 |
filter_writen(pdf, obj, fout, q, q2 - q, (size_t*)&sum); |
15a8a022 |
q++;
}
}
}
} while (bytesleft > 0);
} else {
off_t bytesleft = obj_size(pdf, obj, 0);
if (bytesleft < 0)
rc = CL_EFORMAT; |
e09d8843 |
else if (filter_writen(pdf, obj, fout , pdf->map + obj->start, bytesleft, (size_t*)&sum) != (size_t)bytesleft) |
15a8a022 |
rc = CL_EWRITE;
} |
ab564992 |
} while (0); |
15a8a022 |
|
a5e2b97d |
cli_dbgmsg("cli_pdf: extracted %td bytes %u %u obj\n", sum, obj->id>>8, obj->id&0xff); |
df085913 |
cli_dbgmsg(" ... to %s\n", fullname); |
15a8a022 |
|
1412b807 |
if (flags & PDF_EXTRACT_OBJ_SCAN && sum) { |
15a8a022 |
int rc2;
cli_updatelimits(pdf->ctx, sum);
/* TODO: invoke bytecode on this pdf obj with metainformation associated */
lseek(fout, 0, SEEK_SET);
rc2 = cli_magic_scandesc(fout, pdf->ctx);
if (rc2 == CL_VIRUS || rc == CL_SUCCESS)
rc = rc2;
if ((rc == CL_CLEAN) || ((rc == CL_VIRUS) && (pdf->ctx->options & CL_SCAN_ALLMATCHES))) {
rc2 = run_pdf_hooks(pdf, PDF_PHASE_POSTDUMP, fout, obj - pdf->objs);
if (rc2 == CL_VIRUS)
rc = rc2;
}
if (((rc == CL_CLEAN) || ((rc == CL_VIRUS) && (pdf->ctx->options & CL_SCAN_ALLMATCHES))) && (obj->flags & (1 << OBJ_CONTENTS))) {
lseek(fout, 0, SEEK_SET);
cli_dbgmsg("cli_pdf: dumping contents %u %u\n", obj->id>>8, obj->id&0xff);
rc2 = pdf_scan_contents(fout, pdf);
if (rc2 == CL_VIRUS)
rc = rc2;
noisy_msg(pdf, "extracted text from obj %u %u\n", obj->id>>8, obj->id&0xff);
} |
dc200c6b |
} |
15a8a022 |
|
ab564992 |
close(fout); |
15a8a022 |
|
1412b807 |
if (flags & PDF_EXTRACT_OBJ_SCAN && !pdf->ctx->engine->keeptmp) |
15a8a022 |
if (cli_unlink(fullname) && rc != CL_VIRUS)
rc = CL_EUNLINK;
|
3643f3d2 |
return rc;
}
|
6c135eb4 |
enum objstate {
STATE_NONE,
STATE_S,
STATE_FILTER, |
3643f3d2 |
STATE_JAVASCRIPT, |
eb270d5a |
STATE_OPENACTION, |
b835a528 |
STATE_LINEARIZED, |
c16b3abb |
STATE_LAUNCHACTION, |
7719760b |
STATE_CONTENTS, |
6c135eb4 |
STATE_ANY /* for actions table below */
};
|
f7f9b88c |
#define NAMEFLAG_NONE 0x0
#define NAMEFLAG_HEURISTIC 0x1
|
6c135eb4 |
struct pdfname_action {
const char *pdfname; |
dc200c6b |
enum pdf_objflags set_objflag;/* OBJ_DICT is noop */ |
6c135eb4 |
enum objstate from_state;/* STATE_NONE is noop */
enum objstate to_state; |
f7f9b88c |
uint32_t nameflags; |
63803da5 |
#if HAVE_JSON |
224d1c4d |
void (*pdf_stats_cb)(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act); |
63803da5 |
#endif |
6c135eb4 |
};
|
63803da5 |
#if HAVE_JSON |
6c135eb4 |
static struct pdfname_action pdfname_actions[] = { |
f7f9b88c |
{"ASCIIHexDecode", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCIIHexDecode_cb},
{"ASCII85Decode", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCII85Decode_cb},
{"A85", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCII85Decode_cb},
{"AHx", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCIIHexDecode_cb},
{"EmbeddedFile", OBJ_EMBEDDED_FILE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, EmbeddedFile_cb},
{"FlateDecode", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, FlateDecode_cb},
{"Fl", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, FlateDecode_cb},
{"Image", OBJ_IMAGE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, Image_cb},
{"LZWDecode", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, LZWDecode_cb},
{"LZW", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, LZWDecode_cb},
{"RunLengthDecode", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, RunLengthDecode_cb},
{"RL", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, RunLengthDecode_cb},
{"CCITTFaxDecode", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, CCITTFaxDecode_cb},
{"CCF", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, CCITTFaxDecode_cb},
{"JBIG2Decode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, JBIG2Decode_cb},
{"DCTDecode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, DCTDecode_cb},
{"DCT", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, DCTDecode_cb},
{"JPXDecode", OBJ_FILTER_JPX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, JPXDecode_cb},
{"Crypt", OBJ_FILTER_CRYPT, STATE_FILTER, STATE_NONE, NAMEFLAG_HEURISTIC, Crypt_cb},
{"Standard", OBJ_FILTER_STANDARD, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, Standard_cb},
{"Sig", OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC, Sig_cb},
{"V", OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC, NULL},
{"R", OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC, NULL},
{"Linearized", OBJ_DICT, STATE_NONE, STATE_LINEARIZED, NAMEFLAG_HEURISTIC, NULL},
{"Filter", OBJ_HASFILTERS, STATE_ANY, STATE_FILTER, NAMEFLAG_HEURISTIC, NULL},
{"JavaScript", OBJ_JAVASCRIPT, STATE_S, STATE_JAVASCRIPT, NAMEFLAG_HEURISTIC, JavaScript_cb},
{"Length", OBJ_DICT, STATE_FILTER, STATE_NONE, NAMEFLAG_HEURISTIC, NULL},
{"S", OBJ_DICT, STATE_NONE, STATE_S, NAMEFLAG_HEURISTIC, NULL},
{"Type", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, NULL},
{"OpenAction", OBJ_OPENACTION, STATE_ANY, STATE_OPENACTION, NAMEFLAG_HEURISTIC, OpenAction_cb},
{"Launch", OBJ_LAUNCHACTION, STATE_ANY, STATE_LAUNCHACTION, NAMEFLAG_HEURISTIC, Launch_cb},
{"Page", OBJ_PAGE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, Page_cb},
{"Contents", OBJ_CONTENTS, STATE_NONE, STATE_CONTENTS, NAMEFLAG_HEURISTIC, NULL},
{"Author", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Author_cb},
{"Producer", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Producer_cb},
{"CreationDate", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, CreationDate_cb},
{"ModDate", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, ModificationDate_cb},
{"Creator", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Creator_cb},
{"Title", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Title_cb},
{"Keywords", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Keywords_cb},
{"Subject", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Subject_cb},
{"Pages", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Pages_cb},
{"Colors", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Colors_cb},
{"RichMedia", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, RichMedia_cb},
{"AcroForm", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, AcroForm_cb},
{"XFA", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, XFA_cb} |
6c135eb4 |
}; |
63803da5 |
#else
static struct pdfname_action pdfname_actions[] = { |
f7f9b88c |
{"ASCIIHexDecode", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
{"ASCII85Decode", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
{"A85", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
{"AHx", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
{"EmbeddedFile", OBJ_EMBEDDED_FILE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC},
{"FlateDecode", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
{"Fl", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
{"Image", OBJ_IMAGE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC},
{"LZWDecode", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
{"LZW", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
{"RunLengthDecode", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
{"RL", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
{"CCITTFaxDecode", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
{"CCF", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
{"JBIG2Decode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
{"DCTDecode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
{"DCT", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
{"JPXDecode", OBJ_FILTER_JPX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
{"Crypt", OBJ_FILTER_CRYPT, STATE_FILTER, STATE_NONE, NAMEFLAG_HEURISTIC},
{"Standard", OBJ_FILTER_STANDARD, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
{"Sig", OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC},
{"V", OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC},
{"R", OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC},
{"Linearized", OBJ_DICT, STATE_NONE, STATE_LINEARIZED, NAMEFLAG_HEURISTIC},
{"Filter", OBJ_HASFILTERS, STATE_ANY, STATE_FILTER, NAMEFLAG_HEURISTIC},
{"JavaScript", OBJ_JAVASCRIPT, STATE_S, STATE_JAVASCRIPT, NAMEFLAG_HEURISTIC},
{"Length", OBJ_DICT, STATE_FILTER, STATE_NONE, NAMEFLAG_HEURISTIC},
{"S", OBJ_DICT, STATE_NONE, STATE_S, NAMEFLAG_HEURISTIC},
{"Type", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC},
{"OpenAction", OBJ_OPENACTION, STATE_ANY, STATE_OPENACTION, NAMEFLAG_HEURISTIC},
{"Launch", OBJ_LAUNCHACTION, STATE_ANY, STATE_LAUNCHACTION, NAMEFLAG_HEURISTIC},
{"Page", OBJ_PAGE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC},
{"Contents", OBJ_CONTENTS, STATE_NONE, STATE_CONTENTS, NAMEFLAG_HEURISTIC} |
63803da5 |
};
#endif |
6c135eb4 |
|
edeb59b3 |
#define KNOWN_FILTERS ((1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_RL) | (1 << OBJ_FILTER_A85) | (1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_LZW) | (1 << OBJ_FILTER_FAX) | (1 << OBJ_FILTER_DCT) | (1 << OBJ_FILTER_JPX) | (1 << OBJ_FILTER_CRYPT)) |
eb270d5a |
|
15a8a022 |
static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj, const char *pdfname, int escapes, enum objstate *state) |
6c135eb4 |
{
struct pdfname_action *act = NULL;
unsigned j; |
15a8a022 |
|
5091689d |
obj->statsflags |= OBJ_FLAG_PDFNAME_DONE;
|
6c135eb4 |
for (j=0;j<sizeof(pdfname_actions)/sizeof(pdfname_actions[0]);j++) { |
15a8a022 |
if (!strcmp(pdfname, pdfname_actions[j].pdfname)) {
act = &pdfname_actions[j];
break;
} |
6c135eb4 |
} |
15a8a022 |
|
eb270d5a |
if (!act) { |
15a8a022 |
/* these are digital signature objects, filter doesn't matter,
* we don't need them anyway */
if (*state == STATE_FILTER && !(obj->flags & (1 << OBJ_SIGNED)) && !(obj->flags & KNOWN_FILTERS)) {
cli_dbgmsg("cli_pdf: unknown filter %s\n", pdfname);
obj->flags |= 1 << OBJ_FILTER_UNKNOWN;
}
return; |
eb270d5a |
} |
15a8a022 |
|
a9584bfe |
/* record filter order */ |
c8ba4ae2 |
if (obj->numfilters < PDF_FILTERLIST_MAX && (*state == STATE_FILTER) && ((1 << act->set_objflag) & KNOWN_FILTERS)) |
a9584bfe |
obj->filterlist[obj->numfilters++] = act->set_objflag;
|
f7f9b88c |
if ((act->nameflags & NAMEFLAG_HEURISTIC) && escapes) {
/* if a commonly used PDF name is escaped that is certainly
suspicious. */
cli_dbgmsg("cli_pdf: pdfname %s is escaped\n", pdfname);
pdfobj_flag(pdf, obj, ESCAPED_COMMON_PDFNAME);
}
|
63803da5 |
#if HAVE_JSON |
49bc4992 |
if ((act->pdf_stats_cb))
act->pdf_stats_cb(pdf, obj, act); |
63803da5 |
#endif |
49bc4992 |
|
15a8a022 |
if (act->from_state == *state || act->from_state == STATE_ANY) {
*state = act->to_state;
|
c8ba4ae2 |
if (*state == STATE_FILTER && act->set_objflag != OBJ_DICT && (obj->flags & (1 << act->set_objflag))) { |
15a8a022 |
cli_dbgmsg("cli_pdf: duplicate stream filter %s\n", pdfname);
pdfobj_flag(pdf, obj, BAD_STREAM_FILTERS);
}
obj->flags |= 1 << act->set_objflag; |
6c135eb4 |
} else { |
15a8a022 |
/* auto-reset states */
switch (*state) {
case STATE_S:
*state = STATE_NONE;
break;
default:
break;
} |
6c135eb4 |
}
}
|
3f8016ce |
static int pdf_readint(const char *q0, int len, const char *key); |
bbfad9ba |
static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len)
{
const char *q, *q2;
uint32_t objid;
if (len >= 16 && !strncmp(enc, "/EncryptMetadata", 16)) { |
15a8a022 |
q = cli_memstr(enc+16, len-16, "/Encrypt", 8);
if (!q)
return;
len -= q - enc;
enc = q; |
bbfad9ba |
} |
15a8a022 |
|
bbfad9ba |
q = enc + 8;
len -= 8;
q2 = pdf_nextobject(q, len);
if (!q2 || !isdigit(*q2)) |
15a8a022 |
return;
|
bbfad9ba |
objid = atoi(q2) << 8;
len -= q2 - q;
q = q2;
q2 = pdf_nextobject(q, len);
if (!q2 || !isdigit(*q2)) |
15a8a022 |
return;
|
bbfad9ba |
objid |= atoi(q2) & 0xff;
len -= q2 - q;
q = q2;
q2 = pdf_nextobject(q, len);
if (!q2 || *q2 != 'R') |
15a8a022 |
return;
|
bbfad9ba |
cli_dbgmsg("cli_pdf: Encrypt dictionary in obj %d %d\n", objid>>8, objid&0xff); |
15a8a022 |
|
bbfad9ba |
pdf->enc_objid = objid;
}
static void pdf_parse_trailer(struct pdf_struct *pdf, const char *s, long length)
{
const char *enc; |
15a8a022 |
|
bbfad9ba |
enc = cli_memstr(s, length, "/Encrypt", 8);
if (enc) { |
15a8a022 |
char *newID;
pdf->flags |= 1 << ENCRYPTED_PDF;
pdf_parse_encrypt(pdf, enc, s + length - enc);
newID = pdf_readstring(s, length, "/ID", &pdf->fileIDlen, NULL, 0);
if (newID) {
free(pdf->fileID);
pdf->fileID = newID;
} |
bbfad9ba |
}
}
|
930b9395 |
void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj) |
6c135eb4 |
{
/* enough to hold common pdf names, we don't need all the names */
char pdfname[64]; |
8d1ef133 |
const char *q2, *q3; |
c9a070c9 |
const char *nextobj = NULL, *nextopen = NULL, *nextclose = NULL; |
6c135eb4 |
const char *q = obj->start + pdf->map; |
8d1ef133 |
const char *dict, *enddict, *start; |
0fe3b769 |
off_t dict_length, full_dict_length; |
8d1ef133 |
off_t objsize = obj_size(pdf, obj, 1);
off_t bytesleft; |
d18d7221 |
size_t i;
unsigned filters=0, blockopens=0; |
6c135eb4 |
enum objstate objstate = STATE_NONE; |
fdfd5814 |
#if HAVE_JSON
json_object *pdfobj=NULL, *jsonobj=NULL;
#endif |
6c135eb4 |
|
8d1ef133 |
if (objsize < 0) |
15a8a022 |
return;
|
6c135eb4 |
start = q; |
8d1ef133 |
bytesleft = objsize;
|
6c135eb4 |
/* find start of dictionary */
do { |
15a8a022 |
nextobj = pdf_nextobject(q, bytesleft);
bytesleft -= nextobj -q;
if (!nextobj || bytesleft < 0) {
cli_dbgmsg("cli_pdf: %u %u obj: no dictionary\n", obj->id>>8, obj->id&0xff); |
fdfd5814 |
#if HAVE_JSON
if (!(pdfobj) && pdf->ctx->wrkproperty != NULL) {
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
if (!(pdfobj))
return;
}
if (pdfobj) {
if (!(jsonobj))
jsonobj = cli_jsonarray(pdfobj, "ObjectsWithoutDictionaries");
if (jsonobj)
cli_jsonint_array(jsonobj, obj->id>>8);
}
#endif |
15a8a022 |
return;
}
q3 = memchr(q-1, '<', nextobj-q+1);
nextobj++;
bytesleft--;
q = nextobj; |
6c135eb4 |
} while (!q3 || q3[1] != '<');
dict = q3+2;
q = dict; |
8d1ef133 |
blockopens++;
bytesleft = objsize - (q - start); |
4c19109d |
enddict = q + bytesleft - 1; |
8d1ef133 |
/* find end of dictionary block */ |
4c19109d |
if (bytesleft < 0) { |
5b574c47 |
cli_dbgmsg("cli_pdf: %u %u obj: broken dictionary\n", obj->id>>8, obj->id&0xff); |
fdfd5814 |
#if HAVE_JSON
if (!(pdfobj) && pdf->ctx->wrkproperty != NULL) {
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
if (!(pdfobj))
return;
}
if (pdfobj) {
if (!(jsonobj))
jsonobj = cli_jsonarray(pdfobj, "ObjectsWithBrokenDictionaries");
if (jsonobj)
cli_jsonint_array(jsonobj, obj->id>>8);
}
#endif |
4c19109d |
return;
} |
8d1ef133 |
|
4c19109d |
/* while still looking ... */
while ((q < enddict-1) && (blockopens > 0)) {
/* find next close */ |
bc52aff8 |
nextclose = memchr(q, '>', enddict-q); |
4c19109d |
if (nextclose && (nextclose[1] == '>')) {
/* check for nested open */ |
374b5aea |
while ((nextopen = memchr(q-1, '<', nextclose-q+1)) != NULL) { |
4c19109d |
if (nextopen[1] == '<') {
/* nested open */
blockopens++;
q = nextopen + 2; |
8d1ef133 |
}
else { |
4c19109d |
/* unmatched < before next close */
q = nextopen + 2; |
8d1ef133 |
}
} |
4c19109d |
/* close block */
blockopens--;
q = nextclose + 2; |
8d1ef133 |
} |
4c19109d |
else if (nextclose) {
/* found one > but not two */
q = nextclose + 2;
}
else {
/* next closing not found */ |
bc52aff8 |
break; |
4c19109d |
}
} |
8d1ef133 |
|
4c19109d |
/* Was end of dictionary found? */ |
bc52aff8 |
if (blockopens) {
/* probably truncated */
cli_dbgmsg("cli_pdf: %u %u obj broken dictionary\n", obj->id>>8, obj->id&0xff); |
fdfd5814 |
#if HAVE_JSON
if (!(pdfobj) && pdf->ctx->wrkproperty != NULL) {
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
if (!(pdfobj))
return;
}
if (pdfobj) {
if (!(jsonobj))
jsonobj = cli_jsonarray(pdfobj, "ObjectsWithBrokenDictionaries");
if (jsonobj)
cli_jsonint_array(jsonobj, obj->id>>8);
}
#endif |
4c19109d |
return; |
bc52aff8 |
} |
15a8a022 |
|
8d1ef133 |
enddict = nextclose; |
6c135eb4 |
obj->flags |= 1 << OBJ_DICT; |
8d1ef133 |
full_dict_length = dict_length = enddict - dict;
/* This code prints the dictionary content.
{
char * dictionary = malloc(dict_length + 1);
if (dictionary) { |
4c19109d |
for (i = 0; i < dict_length; i++) { |
115b6306 |
if (dict[i] == '\r')
dictionary[i] = '\n';
else if (isprint(dict[i]) || isspace(dict[i])) |
4c19109d |
dictionary[i] = dict[i];
else
dictionary[i] = '*';
} |
8d1ef133 |
dictionary[dict_length] = '\0';
cli_dbgmsg("cli_pdf: dictionary is <<%s>>\n", dictionary);
free(dictionary);
}
}
*/ |
6c135eb4 |
|
9c617dbe |
/* process pdf names */ |
5af966d3 |
for (q = dict;dict_length > 0;) { |
15a8a022 |
int escapes = 0, breakout=0;
q2 = memchr(q, '/', dict_length);
if (!q2)
break;
dict_length -= q2 - q;
q = q2;
/* normalize PDF names */
for (i = 0;dict_length > 0 && (i < sizeof(pdfname)-1); i++) {
q++;
dict_length--;
if (*q == '#') {
if (cli_hex2str_to(q+1, pdfname+i, 2) == -1)
break;
q += 2;
dict_length -= 2;
escapes = 1;
continue;
}
switch (*q) {
case ' ':
case '\t':
case '\r':
case '\n':
case '/':
case '>':
case '[':
case ']':
case '<':
case '(':
breakout = 1;
}
if (breakout)
break;
pdfname[i] = *q;
}
pdfname[i] = '\0';
handle_pdfname(pdf, obj, pdfname, escapes, &objstate);
if (objstate == STATE_LINEARIZED) {
long trailer_end, trailer;
pdfobj_flag(pdf, obj, LINEARIZED_PDF);
objstate = STATE_NONE;
trailer_end = pdf_readint(dict, full_dict_length, "/H");
if (trailer_end > 0 && trailer_end < pdf->size) {
trailer = trailer_end - 1024;
if (trailer < 0)
trailer = 0;
q2 = pdf->map + trailer;
cli_dbgmsg("cli_pdf: looking for trailer in linearized pdf: %ld - %ld\n", trailer, trailer_end);
pdf_parse_trailer(pdf, q2, trailer_end - trailer);
if (pdf->fileID)
cli_dbgmsg("cli_pdf: found fileID\n");
}
}
if (objstate == STATE_LAUNCHACTION)
pdfobj_flag(pdf, obj, HAS_LAUNCHACTION);
if (dict_length > 0 && (objstate == STATE_JAVASCRIPT || objstate == STATE_OPENACTION || objstate == STATE_CONTENTS)) { |
69b4a223 |
off_t dict_remaining = dict_length;
|
15a8a022 |
if (objstate == STATE_OPENACTION)
pdfobj_flag(pdf, obj, HAS_OPENACTION);
|
69b4a223 |
q2 = pdf_nextobject(q, dict_remaining); |
15a8a022 |
if (q2 && isdigit(*q2)) { |
69b4a223 |
const char * q2_old = NULL;
dict_remaining -= (off_t)(q2 - q);
|
15a8a022 |
uint32_t objid = atoi(q2) << 8;
while (isdigit(*q2))
q2++;
|
69b4a223 |
q2_old = q2;
q2 = pdf_nextobject(q2, dict_remaining); |
15a8a022 |
if (q2 && isdigit(*q2)) { |
69b4a223 |
dict_remaining -= (off_t)(q2 - q2_old); |
15a8a022 |
objid |= atoi(q2) & 0xff;
|
69b4a223 |
q2 = pdf_nextobject(q2, dict_remaining); |
15a8a022 |
if (q2 && *q2 == 'R') {
struct pdf_obj *obj2;
cli_dbgmsg("cli_pdf: found %s stored in indirect object %u %u\n", pdfname, objid >> 8, objid&0xff);
obj2 = find_obj(pdf, obj, objid);
if (obj2) {
enum pdf_objflags flag =
objstate == STATE_JAVASCRIPT ? OBJ_JAVASCRIPT :
objstate == STATE_OPENACTION ? OBJ_OPENACTION :
OBJ_CONTENTS;
obj2->flags |= 1 << flag;
obj->flags &= ~(1 << flag);
} else {
pdfobj_flag(pdf, obj, BAD_INDOBJ);
}
}
}
}
objstate = STATE_NONE;
} |
6c135eb4 |
} |
15a8a022 |
|
9acc81d6 |
for (i=0;i<sizeof(pdfname_actions)/sizeof(pdfname_actions[0]);i++) { |
15a8a022 |
const struct pdfname_action *act = &pdfname_actions[i];
if ((obj->flags & (1 << act->set_objflag)) &&
act->from_state == STATE_FILTER &&
act->to_state == STATE_FILTER &&
act->set_objflag != OBJ_FILTER_CRYPT &&
act->set_objflag != OBJ_FILTER_STANDARD) {
filters++;
} |
9acc81d6 |
} |
15a8a022 |
if (filters > 2) {
/* more than 2 non-crypt filters */
pdfobj_flag(pdf, obj, MANY_FILTERS); |
9acc81d6 |
} |
15a8a022 |
|
b835a528 |
if (obj->flags & ((1 << OBJ_SIGNED) | KNOWN_FILTERS)) |
15a8a022 |
obj->flags &= ~(1 << OBJ_FILTER_UNKNOWN);
|
b835a528 |
if (obj->flags & (1 << OBJ_FILTER_UNKNOWN)) |
15a8a022 |
pdfobj_flag(pdf, obj, UNKNOWN_FILTER);
|
6c135eb4 |
cli_dbgmsg("cli_pdf: %u %u obj flags: %02x\n", obj->id>>8, obj->id&0xff, obj->flags);
}
|
7606789f |
static const char *pdf_getdict(const char *q0, int* len, const char *key)
{
const char *q;
|
82c0e6bc |
if (*len <= 0) { |
15a8a022 |
cli_dbgmsg("cli_pdf: bad length %d\n", *len); |
4c19109d |
return NULL;
} |
15a8a022 |
if (!q0)
return NULL;
|
7606789f |
q = cli_memstr(q0, *len, key, strlen(key));
if (!q) { |
15a8a022 |
cli_dbgmsg("cli_pdf: %s not found in dict\n", key);
return NULL; |
7606789f |
} |
15a8a022 |
|
7606789f |
*len -= q - q0;
q0 = q;
q = pdf_nextobject(q0 + 1, *len - 1);
if (!q) { |
15a8a022 |
cli_dbgmsg("cli_pdf: %s is invalid in dict\n", key);
return NULL; |
7606789f |
} |
15a8a022 |
|
7606789f |
if (q[-1] == '<') |
15a8a022 |
q--;
|
7606789f |
*len -= q - q0;
return q;
}
|
fb0c9fa2 |
static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen, const char **qend, int noescape) |
7606789f |
{
char *s, *s0;
const char *start, *q, *end;
if (slen) |
15a8a022 |
*slen = 0;
|
fb0c9fa2 |
if (qend)
*qend = q0; |
15a8a022 |
|
7606789f |
q = pdf_getdict(q0, &len, key);
if (!q) |
15a8a022 |
return NULL;
|
7606789f |
if (*q == '(') { |
15a8a022 |
int paren = 1;
start = ++q;
for (;paren > 0 && len > 0; q++,len--) {
switch (*q) {
case '(':
paren++;
break;
case ')':
paren--;
break;
case '\\':
q++;
len--;
break;
default:
break;
}
}
|
fb0c9fa2 |
if (qend)
*qend = q; |
15a8a022 |
q--;
len = q - start;
s0 = s = cli_malloc(len + 1);
if (!s) {
cli_errmsg("pdf_readstring: Unable to allocate buffer\n");
return NULL;
}
end = start + len; |
fb0c9fa2 |
if (noescape) {
memcpy(s0, start, len);
s = s0 + len;
} else { |
15a8a022 |
for (q = start;q < end;q++) {
if (*q != '\\') {
*s++ = *q;
} else {
q++;
switch (*q) {
case 'n':
*s++ = '\n';
break;
case 'r':
*s++ = '\r';
break;
case 't':
*s++ = '\t';
break;
case 'b':
*s++ = '\b';
break;
case 'f':
*s++ = '\f';
break;
case '(':/* fall-through */
case ')':/* fall-through */
case '\\':
*s++ = *q;
break;
case '\n':
/* ignore */
break;
case '\r':
/* ignore */
if (q+1 < end && q[1] == '\n')
q++;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
/* octal escape */
if (q+2 < end)
q++;
*s++ = 64*(q[0] - '0') + 8*(q[1] - '0') + (q[2] - '0');
break;
default:
/* ignore */ |
fb0c9fa2 |
*s++ = '\\';
q--; |
15a8a022 |
break;
}
}
} |
fb0c9fa2 |
} |
15a8a022 |
*s++ = '\0';
if (slen)
*slen = s - s0 - 1;
return s0; |
7606789f |
} |
15a8a022 |
|
7606789f |
if (*q == '<') { |
15a8a022 |
start = ++q;
q = memchr(q+1, '>', len);
if (!q)
return NULL;
|
fb0c9fa2 |
if (qend)
*qend = q; |
15a8a022 |
s = cli_malloc((q - start)/2 + 1);
if (s == NULL) { /* oops, couldn't allocate memory */
cli_dbgmsg("cli_pdf: unable to allocate memory...\n");
return NULL;
}
if (cli_hex2str_to(start, s, q - start)) {
cli_dbgmsg("cli_pdf: %s has bad hex value\n", key);
free(s);
return NULL;
}
s[(q-start)/2] = '\0';
if (slen)
*slen = (q - start)/2;
return s; |
7606789f |
} |
15a8a022 |
|
7606789f |
cli_dbgmsg("cli_pdf: %s is invalid string in dict\n", key);
return NULL;
}
|
374be101 |
static char *pdf_readval(const char *q, int len, const char *key)
{
const char *end;
char *s; |
09365897 |
int origlen = len; |
374be101 |
q = pdf_getdict(q, &len, key);
if (!q || len <= 0) |
15a8a022 |
return NULL;
while (len > 0 && *q && *q == ' ') {
q++;
len--;
}
|
374be101 |
if (*q != '/') |
15a8a022 |
return NULL;
|
374be101 |
q++;
len--;
end = q; |
15a8a022 |
|
374be101 |
while (len > 0 && *end && !(*end == '/' || (len > 1 && end[0] == '>' && end[1] == '>'))) { |
15a8a022 |
end++;
len--; |
374be101 |
} |
15a8a022 |
|
09365897 |
/* end-of-buffer whitespace trimming */
while (len < origlen && isspace(*(end-1))) {
end--;
len++;
}
|
374be101 |
s = cli_malloc(end - q + 1);
if (!s) |
15a8a022 |
return NULL;
|
374be101 |
memcpy(s, q, end-q);
s[end-q] = '\0'; |
15a8a022 |
|
374be101 |
return s;
}
|
7606789f |
static int pdf_readint(const char *q0, int len, const char *key)
{
const char *q = pdf_getdict(q0, &len, key); |
15a8a022 |
return (q != NULL) ? atoi(q) : -1; |
7606789f |
}
static int pdf_readbool(const char *q0, int len, const char *key, int Default)
{
const char *q = pdf_getdict(q0, &len, key); |
15a8a022 |
|
7606789f |
if (!q || len < 5) |
15a8a022 |
return Default;
|
7606789f |
if (!strncmp(q, "true", 4)) |
15a8a022 |
return 1;
|
7606789f |
if (!strncmp(q, "false", 5)) |
15a8a022 |
return 0;
|
7606789f |
cli_dbgmsg("cli_pdf: invalid value for %s bool\n", key); |
15a8a022 |
|
7606789f |
return Default;
}
static const char *key_padding =
"\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4e\x56\xff\xfa\x01\x08"
"\x2e\x2e\x00\xB6\xD0\x68\x3E\x80\x2F\x0C\xA9\xFE\x64\x53\x69\x7A";
static void dbg_printhex(const char *msg, const char *hex, unsigned len)
{
if (cli_debug_flag) { |
15a8a022 |
char *kh = cli_str2hex(hex, len);
cli_dbgmsg("cli_pdf: %s: %s\n", msg, kh);
free(kh); |
7606789f |
}
}
static void check_user_password(struct pdf_struct *pdf, int R, const char *O,
const char *U, int32_t P, int EM, |
bbfad9ba |
const char *UE, |
7606789f |
unsigned length, unsigned oulen)
{
unsigned i;
uint8_t result[16];
char data[32];
struct arc4_state arc4;
unsigned password_empty = 0;
|
cd94be7a |
UNUSEDPARAM(oulen);
|
7606789f |
dbg_printhex("U: ", U, 32);
dbg_printhex("O: ", O, 32);
if (R == 5) { |
15a8a022 |
uint8_t result2[32];
/* supplement to ISO3200, 3.5.2 Algorithm 3.11 */
/* user validation salt */
cl_sha256(U+32, 8, result2, NULL); |
cd94be7a |
dbg_printhex("Computed U", (const char *)result2, 32); |
15a8a022 |
if (!memcmp(result2, U, 32)) { |
e09d8843 |
size_t UE_len; |
15a8a022 |
/* Algorithm 3.2a could be used to recover encryption key */
password_empty = 1;
cl_sha256(U+40, 8, result2, NULL); |
e09d8843 |
UE_len = UE ? strlen(UE) : 0;
if (UE_len != 32) {
cli_dbgmsg("cli_pdf: UE length is not 32: %zu\n", UE_len);
noisy_warnmsg("cli_pdf: UE length is not 32: %zu\n", UE_len); |
15a8a022 |
} else {
pdf->keylen = 32;
pdf->key = cli_malloc(32);
if (!pdf->key) {
cli_errmsg("check_user_password: Cannot allocate memory for pdf->key\n");
return;
}
|
e09d8843 |
aes_decrypt((const unsigned char *)UE, &UE_len, (unsigned char *)(pdf->key), (char *)result2, 32, 0); |
15a8a022 |
dbg_printhex("cli_pdf: Candidate encryption key", pdf->key, pdf->keylen);
} |
241e7eb1 |
} |
374b5aea |
} else if ((R >= 2) && (R <= 4)) { |
b2e7c931 |
unsigned char *d;
size_t sz = 68 + pdf->fileIDlen + (R >= 4 && !EM ? 4 : 0);
d = calloc(1, sz);
if (!(d))
return;
memcpy(d, key_padding, 32);
memcpy(d+32, O, 32);
P = le32_to_host(P);
memcpy(d+64, &P, 4);
memcpy(d+68, pdf->fileID, pdf->fileIDlen);
|
15a8a022 |
/* 7.6.3.3 Algorithm 2 */
/* empty password, password == padding */
if (R >= 4 && !EM) {
uint32_t v = 0xFFFFFFFF;
memcpy(d+68+pdf->fileIDlen, &v, 4);
}
cl_hash_data("md5", d, sz, result, NULL);
free(d);
if (length > 128)
length = 128;
if (R >= 3) {
/* Yes, this really is on purpose */
for (i=0;i<50;i++)
cl_hash_data("md5", result, length/8, result, NULL);
}
if (R == 2)
length = 40;
pdf->keylen = length / 8;
pdf->key = cli_malloc(pdf->keylen);
if (!pdf->key) |
b2e7c931 |
return;
|
15a8a022 |
memcpy(pdf->key, result, pdf->keylen); |
cd94be7a |
dbg_printhex("md5", (const char *)result, 16); |
15a8a022 |
dbg_printhex("Candidate encryption key", pdf->key, pdf->keylen);
/* 7.6.3.3 Algorithm 6 */
if (R == 2) {
/* 7.6.3.3 Algorithm 4 */
memcpy(data, key_padding, 32); |
cd94be7a |
arc4_init(&arc4, (const uint8_t *)(pdf->key), pdf->keylen);
arc4_apply(&arc4, (uint8_t *)data, 32); |
15a8a022 |
dbg_printhex("computed U (R2)", data, 32);
if (!memcmp(data, U, 32))
password_empty = 1;
} else if (R >= 3) {
unsigned len = pdf->keylen;
unsigned char *d;
d = calloc(1, 32 + pdf->fileIDlen);
if (!(d))
return;
/* 7.6.3.3 Algorithm 5 */
memcpy(d, key_padding, 32);
memcpy(d+32, pdf->fileID, pdf->fileIDlen);
cl_hash_data("md5", d, 32 + pdf->fileIDlen, result, NULL);
memcpy(data, pdf->key, len);
|
cd94be7a |
arc4_init(&arc4, (const uint8_t *)data, len); |
15a8a022 |
arc4_apply(&arc4, result, 16);
for (i=1;i<=19;i++) {
unsigned j;
for (j=0;j<len;j++)
data[j] = pdf->key[j] ^ i;
|
cd94be7a |
arc4_init(&arc4, (const uint8_t *)data, len); |
15a8a022 |
arc4_apply(&arc4, result, 16);
}
dbg_printhex("fileID", pdf->fileID, pdf->fileIDlen); |
cd94be7a |
dbg_printhex("computed U (R>=3)", (const char *)result, 16); |
15a8a022 |
if (!memcmp(result, U, 16))
password_empty = 1; |
15fce6e0 |
free(d); |
15a8a022 |
} else {
cli_dbgmsg("cli_pdf: invalid revision %d\n", R);
noisy_warnmsg("cli_pdf: invalid revision %d\n", R);
}
} else {
/* Supported R is in {2,3,4,5} */
cli_dbgmsg("cli_pdf: R value out of range\n");
noisy_warnmsg("cli_pdf: R value out of range\n");
return; |
374b5aea |
} |
15a8a022 |
|
7606789f |
if (password_empty) { |
15a8a022 |
cli_dbgmsg("cli_pdf: user password is empty\n");
noisy_msg(pdf, "cli_pdf: encrypted PDF found, user password is empty, will attempt to decrypt\n");
/* The key we computed above is the key used to encrypt the streams.
* We could decrypt it now if we wanted to */
pdf->flags |= 1 << DECRYPTABLE_PDF; |
7606789f |
} else { |
15a8a022 |
/* the key is not valid, we would need the user or the owner password to decrypt */
cli_dbgmsg("cli_pdf: user/owner password would be required for decryption\n");
noisy_warnmsg("cli_pdf: encrypted PDF found, user password is NOT empty, cannot decrypt!\n"); |
7606789f |
}
}
|
1d0cdc67 |
enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key, enum enc_method def) |
bcc68567 |
{
const char *q;
char *CFM = NULL; |
884b2e73 |
enum enc_method ret = ENC_UNKNOWN; |
15a8a022 |
|
bcc68567 |
if (!key) |
15a8a022 |
return def;
|
bcc68567 |
if (!strcmp(key, "Identity")) |
15a8a022 |
return ENC_IDENTITY;
|
cd94be7a |
q = pdf_getdict(dict, (int *)(&len), key); |
bcc68567 |
if (!q) |
15a8a022 |
return def;
|
bcc68567 |
CFM = pdf_readval(q, len, "/CFM");
if (CFM) { |
15a8a022 |
cli_dbgmsg("cli_pdf: %s CFM: %s\n", key, CFM);
if (!strncmp(CFM,"V2", 2))
ret = ENC_V2;
else if (!strncmp(CFM,"AESV2",5))
ret = ENC_AESV2;
else if (!strncmp(CFM,"AESV3",5))
ret = ENC_AESV3;
else if (!strncmp(CFM,"None",4))
ret = ENC_NONE;
free(CFM); |
bcc68567 |
} |
15a8a022 |
|
884b2e73 |
return ret; |
bcc68567 |
}
|
e2b1880f |
void pdf_handle_enc(struct pdf_struct *pdf) |
7606789f |
{
struct pdf_obj *obj; |
cd94be7a |
uint32_t len, n, R, P, length, EM = 1, i, oulen; |
bcc68567 |
char *O, *U, *UE, *StmF, *StrF, *EFF; |
7606789f |
const char *q, *q2;
|
3f8016ce |
if (pdf->enc_objid == ~0u) |
15a8a022 |
return; |
3f8016ce |
if (!pdf->fileID) { |
15a8a022 |
cli_dbgmsg("cli_pdf: pdf_handle_enc no file ID\n");
noisy_warnmsg("cli_pdf: pdf_handle_enc no file ID\n");
return; |
3f8016ce |
} |
15a8a022 |
|
3f8016ce |
obj = find_obj(pdf, pdf->objs, pdf->enc_objid);
if (!obj) { |
15a8a022 |
cli_dbgmsg("cli_pdf: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff);
noisy_warnmsg("cli_pdf: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff);
return; |
3f8016ce |
} |
15a8a022 |
|
7606789f |
len = obj_size(pdf, obj, 1);
q = pdf->map + obj->start;
|
bcc68567 |
O = U = UE = StmF = StrF = EFF = NULL; |
7606789f |
do { |
374be101 |
|
15a8a022 |
pdf->enc_method_string = ENC_UNKNOWN;
pdf->enc_method_stream = ENC_UNKNOWN;
pdf->enc_method_embeddedfile = ENC_UNKNOWN;
P = pdf_readint(q, len, "/P");
if (P == ~0u) {
cli_dbgmsg("cli_pdf: invalid P\n");
noisy_warnmsg("cli_pdf: invalid P\n");
break;
} |
7606789f |
|
15a8a022 |
q2 = cli_memstr(q, len, "/Standard", 9);
if (!q2) {
cli_dbgmsg("cli_pdf: /Standard not found\n");
noisy_warnmsg("cli_pdf: /Standard not found\n");
break;
} |
7606789f |
|
15a8a022 |
/* we can have both of these:
* /AESV2/Length /Standard/Length
* /Length /Standard
* make sure we don't mistake AES's length for Standard's */
length = pdf_readint(q2, len - (q2 - q), "/Length");
if (length == ~0u)
length = pdf_readint(q, len, "/Length");
if (length < 40) {
cli_dbgmsg("cli_pdf: invalid length: %d\n", length);
length = 40;
} |
7606789f |
|
15a8a022 |
R = pdf_readint(q, len, "/R");
if (R == ~0u) {
cli_dbgmsg("cli_pdf: invalid R\n");
noisy_warnmsg("cli_pdf: invalid R\n");
break;
} |
7606789f |
|
15a8a022 |
if ((R > 5) || (R < 2)) {
cli_dbgmsg("cli_pdf: R value outside supported range [2..5]\n");
noisy_warnmsg("cli_pdf: R value outside supported range [2..5]\n");
break;
}
if (R < 5)
oulen = 32;
else
oulen = 48;
if (R == 2 || R == 3) {
pdf->enc_method_stream = ENC_V2;
pdf->enc_method_string = ENC_V2;
pdf->enc_method_embeddedfile = ENC_V2;
} else if (R == 4 || R == 5) {
EM = pdf_readbool(q, len, "/EncryptMetadata", 1);
StmF = pdf_readval(q, len, "/StmF");
StrF = pdf_readval(q, len, "/StrF");
EFF = pdf_readval(q, len, "/EFF");
n = len; |
cd94be7a |
pdf->CF = pdf_getdict(q, (int *)(&n), "/CF"); |
15a8a022 |
pdf->CF_n = n;
if (StmF)
cli_dbgmsg("cli_pdf: StmF: %s\n", StmF);
if (StrF)
cli_dbgmsg("cli_pdf: StrF: %s\n", StrF);
if (EFF)
cli_dbgmsg("cli_pdf: EFF: %s\n", EFF);
pdf->enc_method_stream = parse_enc_method(pdf->CF, n, StmF, ENC_IDENTITY);
pdf->enc_method_string = parse_enc_method(pdf->CF, n, StrF, ENC_IDENTITY);
pdf->enc_method_embeddedfile = parse_enc_method(pdf->CF, n, EFF, pdf->enc_method_stream);
free(StmF);
free(StrF);
free(EFF);
cli_dbgmsg("cli_pdf: EncryptMetadata: %s\n", EM ? "true" : "false");
if (R == 4) {
length = 128;
} else {
n = 0;
UE = pdf_readstring(q, len, "/UE", &n, NULL, 0);
length = 256;
}
}
if (length == ~0u)
length = 40;
n = 0;
O = pdf_readstring(q, len, "/O", &n, NULL, 0);
if (!O || n < oulen) {
cli_dbgmsg("cli_pdf: invalid O: %d\n", n);
cli_dbgmsg("cli_pdf: invalid O: %d\n", n);
if (O)
dbg_printhex("invalid O", O, n);
break;
}
if (n > oulen) {
for (i=oulen;i<n;i++)
if (O[i])
break;
if (i != n) {
dbg_printhex("too long O", O, n); |
e09d8843 |
noisy_warnmsg("too long O: %u", n); |
15a8a022 |
break;
}
}
n = 0;
U = pdf_readstring(q, len, "/U", &n, NULL, 0);
if (!U || n < oulen) { |
e09d8843 |
cli_dbgmsg("cli_pdf: invalid U: %u\n", n);
noisy_warnmsg("cli_pdf: invalid U: %u\n", n); |
15a8a022 |
if (U)
dbg_printhex("invalid U", U, n);
break;
}
if (n > oulen) {
for (i=oulen;i<n;i++)
if (U[i])
break;
if (i != n) {
dbg_printhex("too long U", U, n);
break;
}
}
|
e09d8843 |
cli_dbgmsg("cli_pdf: Encrypt R: %d, P %x, length: %u\n", R, P, length); |
15a8a022 |
if (length % 8) {
cli_dbgmsg("cli_pdf: wrong key length, not multiple of 8\n");
noisy_warnmsg("cli_pdf: wrong key length, not multiple of 8\n");
break;
}
check_user_password(pdf, R, O, U, P, EM, UE, length, oulen); |
7606789f |
} while (0); |
15a8a022 |
|
7606789f |
free(O);
free(U); |
bbfad9ba |
free(UE); |
7606789f |
}
|
e7a27135 |
int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
{
struct pdf_struct pdf;
fmap_t *map = *ctx->fmap;
size_t size = map->len - offset;
off_t versize = size > 1032 ? 1032 : size;
off_t map_off, bytesleft;
long xref; |
53c957a9 |
const char *pdfver, *tmp, *start, *eofmap, *q, *eof; |
f4819816 |
int rc, badobjects = 0; |
693757a1 |
unsigned i, alerts = 0; |
930b9395 |
#if HAVE_JSON
json_object *pdfobj=NULL; |
8ec930c3 |
char *begin, *end, *p1; |
930b9395 |
#endif |
e7a27135 |
cli_dbgmsg("in cli_pdf(%s)\n", dir);
memset(&pdf, 0, sizeof(pdf)); |
3643f3d2 |
pdf.ctx = ctx;
pdf.dir = dir; |
7606789f |
pdf.enc_objid = ~0u; |
e7a27135 |
pdfver = start = fmap_need_off_once(map, offset, versize);
/* Check PDF version */
if (!pdfver) { |
15a8a022 |
cli_errmsg("cli_pdf: mmap() failed (1)\n");
return CL_EMAP; |
e7a27135 |
} |
15a8a022 |
|
930b9395 |
#if HAVE_JSON
if (ctx->wrkproperty)
pdfobj = cli_jsonobj(ctx->wrkproperty, "PDFStats");
#endif
|
e7a27135 |
/* offset is 0 when coming from filetype2 */ |
53c957a9 |
tmp = cli_memstr(pdfver, versize, "%PDF-", 5);
if (!tmp) { |
15a8a022 |
cli_dbgmsg("cli_pdf: no PDF- header found\n");
noisy_warnmsg("cli_pdf: no PDF- header found\n"); |
63803da5 |
#if HAVE_JSON |
69080de3 |
pdf_export_json(&pdf); |
63803da5 |
#endif |
15a8a022 |
return CL_SUCCESS; |
e7a27135 |
} |
15a8a022 |
|
53c957a9 |
versize -= tmp - pdfver;
pdfver = tmp;
if (versize < 8) {
return CL_EFORMAT;
}
|
15a8a022 |
/* Check for PDF-1.[0-9]. Although 1.7 is highest now, allow for future versions */ |
e7a27135 |
if (pdfver[5] != '1' || pdfver[6] != '.' || |
15a8a022 |
pdfver[7] < '1' || pdfver[7] > '9') {
pdf.flags |= 1 << BAD_PDF_VERSION;
cli_dbgmsg("cli_pdf: bad pdf version: %.8s\n", pdfver); |
930b9395 |
#if HAVE_JSON
if (pdfobj)
cli_jsonbool(pdfobj, "BadVersion", 1);
#endif |
8ec930c3 |
} else {
#if HAVE_JSON
if (pdfobj) {
begin = (char *)(pdfver+5);
end = begin+2;
strtoul(end, &end, 10);
p1 = cli_calloc((end - begin) + 2, 1);
if (p1) {
strncpy(p1, begin, end - begin);
p1[end - begin] = '\0';
cli_jsonstr(pdfobj, "PDFVersion", p1);
free(p1);
}
}
#endif |
e7a27135 |
} |
15a8a022 |
|
e7a27135 |
if (pdfver != start || offset) { |
15a8a022 |
pdf.flags |= 1 << BAD_PDF_HEADERPOS; |
31064b37 |
cli_dbgmsg("cli_pdf: PDF header is not at position 0: %lld\n", (long long)(pdfver - start + offset)); |
930b9395 |
#if HAVE_JSON
if (pdfobj)
cli_jsonbool(pdfobj, "BadVersionLocation", 1);
#endif |
e7a27135 |
} |
15a8a022 |
|
e7a27135 |
offset += pdfver - start;
/* find trailer and xref, don't fail if not found */ |
8f6bf9fc |
map_off = (off_t)map->len - 2048; |
e7a27135 |
if (map_off < 0) |
15a8a022 |
map_off = 0;
|
e7a27135 |
bytesleft = map->len - map_off; |
15a8a022 |
|
e7a27135 |
eofmap = fmap_need_off_once(map, map_off, bytesleft);
if (!eofmap) { |
15a8a022 |
cli_errmsg("cli_pdf: mmap() failed (2)\n"); |
63803da5 |
#if HAVE_JSON |
69080de3 |
pdf_export_json(&pdf); |
63803da5 |
#endif |
15a8a022 |
return CL_EMAP; |
e7a27135 |
} |
15a8a022 |
|
e7a27135 |
eof = eofmap + bytesleft;
for (q=&eofmap[bytesleft-5]; q > eofmap; q--) { |
15a8a022 |
if (memcmp(q, "%%EOF", 5) == 0)
break; |
e7a27135 |
} |
15a8a022 |
|
e7a27135 |
if (q <= eofmap) { |
15a8a022 |
pdf.flags |= 1 << BAD_PDF_TRAILER;
cli_dbgmsg("cli_pdf: %%%%EOF not found\n"); |
c7fd0220 |
#if HAVE_JSON
if (pdfobj)
cli_jsonbool(pdfobj, "NoEOF", 1);
#endif |
e7a27135 |
} else { |
15a8a022 |
const char *t;
/*size = q - eofmap + map_off;*/
q -= 9;
for (;q > eofmap;q--) {
if (memcmp(q, "startxref", 9) == 0)
break;
}
if (q <= eofmap) {
pdf.flags |= 1 << BAD_PDF_TRAILER;
cli_dbgmsg("cli_pdf: startxref not found\n"); |
c7fd0220 |
#if HAVE_JSON
if (pdfobj)
cli_jsonbool(pdfobj, "NoXREF", 1);
#endif |
15a8a022 |
} else {
for (t=q;t > eofmap; t--) {
if (memcmp(t,"trailer",7) == 0)
break;
}
pdf_parse_trailer(&pdf, eofmap, eof - eofmap);
q += 9;
while (q < eof && (*q == ' ' || *q == '\n' || *q == '\r')) { q++; }
xref = atol(q);
bytesleft = map->len - offset - xref;
if (bytesleft > 4096)
bytesleft = 4096;
q = fmap_need_off_once(map, offset + xref, bytesleft);
if (!q || xrefCheck(q, q+bytesleft) == -1) {
cli_dbgmsg("cli_pdf: did not find valid xref\n");
pdf.flags |= 1 << BAD_PDF_TRAILER;
}
} |
e7a27135 |
}
|
15a8a022 |
size -= offset; |
e7a27135 |
pdf.size = size; |
dc200c6b |
pdf.map = fmap_need_off(map, offset, size); |
e7a27135 |
if (!pdf.map) { |
15a8a022 |
cli_errmsg("cli_pdf: mmap() failed (3)\n"); |
63803da5 |
#if HAVE_JSON |
69080de3 |
pdf_export_json(&pdf); |
63803da5 |
#endif |
15a8a022 |
return CL_EMAP; |
e7a27135 |
} |
15a8a022 |
pdf.startoff = offset;
|
f73212dc |
rc = run_pdf_hooks(&pdf, PDF_PHASE_PRE, -1, -1); |
693757a1 |
if ((rc == CL_VIRUS) && SCAN_ALL) {
cli_dbgmsg("cli_pdf: (pre hooks) returned %d\n", rc);
alerts++;
rc = CL_CLEAN; |
15a8a022 |
} else if (rc) {
cli_dbgmsg("cli_pdf: (pre hooks) returning %d\n", rc); |
63803da5 |
#if HAVE_JSON |
69080de3 |
pdf_export_json(&pdf); |
63803da5 |
#endif |
15a8a022 |
return rc == CL_BREAK ? CL_CLEAN : rc; |
f73212dc |
} |
693757a1 |
|
9c617dbe |
/* parse PDF and find obj offsets */ |
6c135eb4 |
while ((rc = pdf_findobj(&pdf)) > 0) { |
15a8a022 |
struct pdf_obj *obj = &pdf.objs[pdf.nobjs-1];
|
31064b37 |
cli_dbgmsg("cli_pdf: found %d %d obj @%lld\n", obj->id >> 8, obj->id&0xff, (long long)(obj->start + offset)); |
e7a27135 |
} |
15a8a022 |
|
eb270d5a |
if (pdf.nobjs) |
15a8a022 |
pdf.nobjs--;
|
e7a27135 |
if (rc == -1) |
15a8a022 |
pdf.flags |= 1 << BAD_PDF_TOOMANYOBJS; |
e7a27135 |
|
eb270d5a |
/* must parse after finding all objs, so we can flag indirect objects */
for (i=0;i<pdf.nobjs;i++) { |
15a8a022 |
struct pdf_obj *obj = &pdf.objs[i]; |
9e60856f |
if (cli_checktimelimit(ctx) != CL_SUCCESS) {
cli_errmsg("Timeout reached in the PDF parser\n"); |
63803da5 |
#if HAVE_JSON |
9e60856f |
pdf_export_json(&pdf); |
63803da5 |
#endif |
9e60856f |
free(pdf.objs);
if (pdf.fileID)
free(pdf.fileID);
if (pdf.key)
free(pdf.key);
return CL_ETIMEOUT;
}
|
15a8a022 |
pdf_parseobj(&pdf, obj); |
eb270d5a |
}
|
9d33052f |
pdf_handle_enc(&pdf); |
bbfad9ba |
if (pdf.flags & (1 << ENCRYPTED_PDF)) |
15a8a022 |
cli_dbgmsg("cli_pdf: encrypted pdf found, %s!\n",
(pdf.flags & (1 << DECRYPTABLE_PDF)) ?
"decryptable" : "not decryptable, stream will probably fail to decompress"); |
7606789f |
if (DETECT_ENCRYPTED && |
15a8a022 |
(pdf.flags & (1 << ENCRYPTED_PDF)) &&
!(pdf.flags & (1 << DECRYPTABLE_PDF))) {
/* It is encrypted, and a password/key needs to be supplied to decrypt.
* This doesn't trigger for PDFs that are encrypted but don't need
* a password to decrypt */ |
87a6cf95 |
rc = cli_append_virus(ctx, "Heuristics.Encrypted.PDF");
if (rc == CL_VIRUS) {
alerts++;
if (SCAN_ALL)
rc = CL_CLEAN;
} |
7606789f |
}
|
693757a1 |
if (!rc) { |
15a8a022 |
rc = run_pdf_hooks(&pdf, PDF_PHASE_PARSED, -1, -1); |
693757a1 |
cli_dbgmsg("cli_pdf: (parsed hooks) returned %d\n", rc);
if (rc == CL_VIRUS) {
alerts++;
if (SCAN_ALL) {
rc = CL_CLEAN;
}
}
}
|
9c617dbe |
/* extract PDF objs */ |
dc200c6b |
for (i=0;!rc && i<pdf.nobjs;i++) { |
693757a1 |
struct pdf_obj *obj = &pdf.objs[i]; |
9e60856f |
if (cli_checktimelimit(ctx) != CL_SUCCESS) {
cli_errmsg("Timeout reached in the PDF parser\n"); |
63803da5 |
#if HAVE_JSON |
9e60856f |
pdf_export_json(&pdf); |
63803da5 |
#endif |
9e60856f |
free(pdf.objs);
if (pdf.fileID)
free(pdf.fileID);
if (pdf.key)
free(pdf.key);
return CL_ETIMEOUT;
}
|
1412b807 |
rc = pdf_extract_obj(&pdf, obj, PDF_EXTRACT_OBJ_SCAN); |
693757a1 |
switch (rc) {
case CL_EFORMAT:
/* Don't halt on one bad object */
cli_dbgmsg("cli_pdf: bad format object, skipping to next\n");
badobjects++; |
49bc4992 |
pdf.stats.ninvalidobjs++; |
693757a1 |
rc = CL_CLEAN;
break;
case CL_VIRUS:
alerts++;
if (SCAN_ALL) {
rc = CL_CLEAN;
}
break;
default:
break; |
f4819816 |
} |
3643f3d2 |
}
|
f984f75b |
if (pdf.flags & (1 << ENCRYPTED_PDF)) |
15a8a022 |
pdf.flags &= ~ ((1 << BAD_FLATESTART) | (1 << BAD_STREAMSTART) | (1 << BAD_ASCIIDECODE)); |
f984f75b |
|
dc200c6b |
if (pdf.flags && !rc) { |
15a8a022 |
cli_dbgmsg("cli_pdf: flags 0x%02x\n", pdf.flags);
rc = run_pdf_hooks(&pdf, PDF_PHASE_END, -1, -1); |
693757a1 |
if (rc == CL_VIRUS) {
alerts++;
if (SCAN_ALL) {
rc = CL_CLEAN;
}
} |
15a8a022 |
|
6ee2df5b |
if (!rc && SCAN_ALGO && (ctx->dconf->other & OTHER_CONF_PDFNAMEOBJ)) { |
693757a1 |
if (pdf.flags & (1 << ESCAPED_COMMON_PDFNAME)) {
/* for example /Fl#61te#44#65#63#6f#64#65 instead of /FlateDecode */ |
cbf5017a |
cli_append_possibly_unwanted(ctx, "Heuristics.PDF.ObfuscatedNameObject"); |
693757a1 |
}
} |
2a599782 |
#if 0 |
b835a528 |
/* TODO: find both trailers, and /Encrypt settings */
if (pdf.flags & (1 << LINEARIZED_PDF))
pdf.flags &= ~ (1 << BAD_ASCIIDECODE); |
9acc81d6 |
if (pdf.flags & (1 << MANY_FILTERS))
pdf.flags &= ~ (1 << BAD_ASCIIDECODE); |
dc200c6b |
if (!rc && (pdf.flags & |
eb270d5a |
((1 << BAD_PDF_TOOMANYOBJS) | (1 << BAD_STREAM_FILTERS) | |
dc200c6b |
(1<<BAD_FLATE) | (1<<BAD_ASCIIDECODE)|
(1<<UNTERMINATED_OBJ_DICT) | (1<<UNKNOWN_FILTER)))) { |
eb270d5a |
rc = CL_EUNPACK;
}
#endif |
ab564992 |
} |
f4819816 |
|
693757a1 |
if (alerts) {
rc = CL_VIRUS;
} |
15a8a022 |
|
693757a1 |
else if (!rc && badobjects) { |
f4819816 |
rc = CL_EFORMAT;
}
|
63803da5 |
#if HAVE_JSON |
ebcca55f |
pdf_export_json(&pdf); |
63803da5 |
#endif |
ebcca55f |
|
3643f3d2 |
cli_dbgmsg("cli_pdf: returning %d\n", rc);
free(pdf.objs); |
7606789f |
free(pdf.fileID);
free(pdf.key); |
f4819816 |
|
a95d300f |
/* PDF hooks may abort, don't return CL_BREAK to caller! */
return rc == CL_BREAK ? CL_CLEAN : rc; |
e7a27135 |
}
|
bce73fe9 |
/*
* Find the start of the next line
*/
static const char *
pdf_nextlinestart(const char *ptr, size_t len)
{ |
15a8a022 |
while(strchr("\r\n", *ptr) == NULL) {
if(--len == 0L)
return NULL;
ptr++;
}
while(strchr("\r\n", *ptr) != NULL) {
if(--len == 0L)
return NULL;
ptr++;
}
return ptr; |
bce73fe9 |
} |
9be10a55 |
|
ef8219b8 |
/*
* Return the start of the next PDF object.
* This assumes that we're not in a stream.
*/
static const char *
pdf_nextobject(const char *ptr, size_t len)
{ |
15a8a022 |
const char *p;
int inobject = 1;
while(len) {
switch(*ptr) {
case '\n':
case '\r':
case '%': /* comment */
p = pdf_nextlinestart(ptr, len);
if(p == NULL)
return NULL;
len -= (size_t)(p - ptr);
ptr = p;
inobject = 0;
break;
case ' ':
case '\t':
case '[': /* Start of an array object */
case '\v':
case '\f':
case '<': /* Start of a dictionary object */
inobject = 0;
ptr++;
len--;
break;
case '/': /* Start of a name object */
return ptr;
case '(': /* start of JS */
return ptr;
default:
if(!inobject) {
/* TODO: parse and return object type */
return ptr;
}
ptr++;
len--;
}
}
return NULL; |
9be10a55 |
} |
49bc4992 |
/* PDF statistics */ |
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void ASCIIHexDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
49bc4992 |
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
49bc4992 |
if (!(pdf))
return;
pdf->stats.nasciihexdecode++;
} |
63803da5 |
#endif |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void ASCII85Decode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
49bc4992 |
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
49bc4992 |
if (!(pdf))
return;
pdf->stats.nascii85decode++;
} |
63803da5 |
#endif |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void EmbeddedFile_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
49bc4992 |
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
49bc4992 |
if (!(pdf))
return;
pdf->stats.nembeddedfile++;
} |
63803da5 |
#endif |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void FlateDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
49bc4992 |
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
49bc4992 |
if (!(pdf))
return;
pdf->stats.nflate++;
} |
63803da5 |
#endif |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void Image_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
49bc4992 |
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
49bc4992 |
if (!(pdf))
return;
pdf->stats.nimage++;
} |
63803da5 |
#endif |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void LZWDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
49bc4992 |
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
49bc4992 |
if (!(pdf))
return;
pdf->stats.nlzw++;
} |
63803da5 |
#endif |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void RunLengthDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
49bc4992 |
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
49bc4992 |
if (!(pdf))
return;
pdf->stats.nrunlengthdecode++;
} |
63803da5 |
#endif |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void CCITTFaxDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
49bc4992 |
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
49bc4992 |
if (!(pdf))
return;
pdf->stats.nfaxdecode++;
} |
63803da5 |
#endif |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void JBIG2Decode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
49bc4992 |
{ |
cd94be7a |
struct json_object *pdfobj, *jbig2arr;
UNUSEDPARAM(obj);
UNUSEDPARAM(act); |
49bc4992 |
|
084707b3 |
if (!(pdf))
return;
|
0bed896b |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
|
084707b3 |
if (!(pdf->ctx->wrkproperty))
return;
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
if (!(pdfobj))
return;
jbig2arr = cli_jsonarray(pdfobj, "JBIG2Objects");
if (!(jbig2arr))
return;
cli_jsonint_array(jbig2arr, obj->id>>8);
pdf->stats.njbig2decode++; |
49bc4992 |
} |
63803da5 |
#endif |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void DCTDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
49bc4992 |
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
49bc4992 |
if (!(pdf))
return;
pdf->stats.ndctdecode++;
} |
63803da5 |
#endif |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void JPXDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
49bc4992 |
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
49bc4992 |
if (!(pdf))
return;
pdf->stats.njpxdecode++;
} |
63803da5 |
#endif |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void Crypt_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
49bc4992 |
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
49bc4992 |
if (!(pdf))
return;
pdf->stats.ncrypt++;
} |
63803da5 |
#endif |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void Standard_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
49bc4992 |
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
49bc4992 |
if (!(pdf))
return;
pdf->stats.nstandard++;
} |
63803da5 |
#endif |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void Sig_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
49bc4992 |
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
49bc4992 |
if (!(pdf))
return;
pdf->stats.nsigned++;
} |
63803da5 |
#endif |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void JavaScript_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
49bc4992 |
{ |
cd94be7a |
struct json_object *pdfobj, *jbig2arr;
UNUSEDPARAM(act); |
cd1d52d1 |
|
49bc4992 |
if (!(pdf))
return;
|
0bed896b |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
|
cd1d52d1 |
if (!(pdf->ctx->wrkproperty))
return;
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
if (!(pdfobj))
return;
jbig2arr = cli_jsonarray(pdfobj, "JavascriptObjects");
if (!(jbig2arr))
return;
cli_jsonint_array(jbig2arr, obj->id>>8);
|
49bc4992 |
pdf->stats.njs++;
} |
63803da5 |
#endif |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void OpenAction_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
49bc4992 |
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
49bc4992 |
if (!(pdf))
return;
pdf->stats.nopenaction++;
} |
63803da5 |
#endif |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void Launch_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
49bc4992 |
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
49bc4992 |
if (!(pdf))
return;
pdf->stats.nlaunch++;
} |
63803da5 |
#endif |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void Page_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
49bc4992 |
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
49bc4992 |
if (!(pdf))
return;
pdf->stats.npage++;
} |
63803da5 |
#endif |
49bc4992 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void Author_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
063f0d25 |
{ |
cd94be7a |
UNUSEDPARAM(act);
|
063f0d25 |
if (!(pdf))
return;
|
0bed896b |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
|
9d33052f |
if (!(pdf->stats.author)) {
pdf->stats.author = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.author))
return;
pdf->stats.author->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author", NULL, &(pdf->stats.author->meta));
} |
063f0d25 |
} |
63803da5 |
#endif |
063f0d25 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void Creator_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
063f0d25 |
{ |
cd94be7a |
UNUSEDPARAM(act);
|
063f0d25 |
if (!(pdf))
return;
|
0bed896b |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
|
9d33052f |
if (!(pdf->stats.creator)) {
pdf->stats.creator = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.creator))
return;
pdf->stats.creator->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator", NULL, &(pdf->stats.creator->meta));
} |
063f0d25 |
} |
63803da5 |
#endif |
063f0d25 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void ModificationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
063f0d25 |
{ |
cd94be7a |
UNUSEDPARAM(act);
|
063f0d25 |
if (!(pdf))
return;
|
0bed896b |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
|
9d33052f |
if (!(pdf->stats.modificationdate)) {
pdf->stats.modificationdate = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.modificationdate))
return;
pdf->stats.modificationdate->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate", NULL, &(pdf->stats.modificationdate->meta));
} |
063f0d25 |
} |
63803da5 |
#endif |
063f0d25 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void CreationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
063f0d25 |
{ |
cd94be7a |
UNUSEDPARAM(act);
|
063f0d25 |
if (!(pdf))
return;
|
0bed896b |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
|
9d33052f |
if (!(pdf->stats.creationdate)) {
pdf->stats.creationdate = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.creationdate))
return;
pdf->stats.creationdate->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate", NULL, &(pdf->stats.creationdate->meta));
} |
063f0d25 |
} |
63803da5 |
#endif |
063f0d25 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void Producer_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
063f0d25 |
{ |
cd94be7a |
UNUSEDPARAM(act);
|
063f0d25 |
if (!(pdf))
return;
|
0bed896b |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
|
9d33052f |
if (!(pdf->stats.producer)) {
pdf->stats.producer = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.producer))
return;
pdf->stats.producer->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer", NULL, &(pdf->stats.producer->meta));
} |
063f0d25 |
} |
63803da5 |
#endif |
063f0d25 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void Title_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
754f976a |
{ |
cd94be7a |
UNUSEDPARAM(act);
|
754f976a |
if (!(pdf))
return;
|
0bed896b |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
|
9d33052f |
if (!(pdf->stats.title)) {
pdf->stats.title = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.title))
return;
pdf->stats.title->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title", NULL, &(pdf->stats.title->meta));
} |
754f976a |
} |
63803da5 |
#endif |
754f976a |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void Keywords_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
754f976a |
{ |
cd94be7a |
UNUSEDPARAM(act);
|
754f976a |
if (!(pdf))
return;
|
0bed896b |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
|
9d33052f |
if (!(pdf->stats.keywords)) {
pdf->stats.keywords = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.keywords))
return;
pdf->stats.keywords->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords", NULL, &(pdf->stats.keywords->meta));
} |
754f976a |
} |
63803da5 |
#endif |
754f976a |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void Subject_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
754f976a |
{ |
cd94be7a |
UNUSEDPARAM(act);
|
754f976a |
if (!(pdf))
return;
|
0bed896b |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
|
9d33052f |
if (!(pdf->stats.subject)) {
pdf->stats.subject = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.subject))
return;
pdf->stats.subject->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject", NULL, &(pdf->stats.subject->meta));
} |
754f976a |
} |
63803da5 |
#endif |
754f976a |
|
63803da5 |
#if HAVE_JSON |
09ff1409 |
static void RichMedia_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
09ff1409 |
if (!(pdf))
return;
pdf->stats.nrichmedia++;
} |
63803da5 |
#endif |
09ff1409 |
|
63803da5 |
#if HAVE_JSON |
09ff1409 |
static void AcroForm_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
09ff1409 |
if (!(pdf))
return;
pdf->stats.nacroform++;
} |
63803da5 |
#endif |
09ff1409 |
|
63803da5 |
#if HAVE_JSON |
09ff1409 |
static void XFA_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
{ |
cd94be7a |
UNUSEDPARAM(obj);
UNUSEDPARAM(act);
|
09ff1409 |
if (!(pdf))
return;
pdf->stats.nxfa++;
} |
63803da5 |
#endif |
09ff1409 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
ca78e3b3 |
{ |
440f1fff |
struct pdf_array *array;
const char *objstart = (const char *)(obj->start + pdf->map);
const char *begin; |
cd94be7a |
unsigned int objsz; |
440f1fff |
unsigned long npages=0, count;
struct pdf_array_node *node;
json_object *pdfobj;
|
cd94be7a |
UNUSEDPARAM(act);
|
440f1fff |
if (!(pdf) || !(pdf->ctx->wrkproperty))
return;
|
0bed896b |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
|
706c2943 |
objsz = obj_size(pdf, obj, 1);
|
440f1fff |
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
if (!(pdfobj))
return;
begin = cli_memstr(objstart, objsz, "/Kids", 5);
if (!(begin))
return;
begin += 5;
|
cd94be7a |
array = pdf_parse_array(pdf, obj, objsz, (char *)begin, NULL); |
dd101bee |
if (!(array)) {
cli_jsonbool(pdfobj, "IncorrectPagesCount", 1); |
7a98488d |
return; |
dd101bee |
} |
7a98488d |
for (node = array->nodes; node != NULL; node = node->next)
if (node->datasz) |
dd101bee |
if (strchr((char *)(node->data), 'R')) |
7a98488d |
npages++; |
440f1fff |
begin = cli_memstr(obj->start + pdf->map, objsz, "/Count", 6);
if (!(begin)) {
cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);
goto cleanup;
}
begin += 6;
while (begin - objstart < objsz && isspace(begin[0]))
begin++;
if (begin - objstart >= objsz) {
goto cleanup;
}
count = strtoul(begin, NULL, 10);
if (count != npages)
cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);
cleanup:
pdf_free_array(array); |
ca78e3b3 |
} |
63803da5 |
#endif |
ca78e3b3 |
|
63803da5 |
#if HAVE_JSON |
224d1c4d |
static void Colors_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
ca78e3b3 |
{
json_object *colorsobj, *pdfobj;
unsigned long ncolors;
char *start, *p1; |
cd94be7a |
size_t objsz;
UNUSEDPARAM(act); |
ca78e3b3 |
if (!(pdf) || !(pdf->ctx) || !(pdf->ctx->wrkproperty))
return;
|
0bed896b |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
|
cd94be7a |
objsz = obj_size(pdf, obj, 1);
start = (char *)(obj->start + pdf->map); |
ca78e3b3 |
|
cd94be7a |
p1 = (char *)cli_memstr(start, objsz, "/Colors", 7); |
ca78e3b3 |
if (!(p1))
return;
p1 += 7;
/* Ensure that we have at least one whitespace character plus at least one number */
if (objsz - (p1 - start) < 2)
return;
while (p1 - start < objsz && isspace(p1[0]))
p1++;
|
cd94be7a |
if ((size_t)(p1 - start) == objsz) |
ca78e3b3 |
return;
ncolors = strtoul(p1, NULL, 10);
/* We only care if the number of colors > 2**24 */
if (ncolors < 1<<24)
return;
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
if (!(pdfobj))
return;
colorsobj = cli_jsonarray(pdfobj, "BigColors");
if (!(colorsobj))
return;
cli_jsonint_array(colorsobj, obj->id>>8);
} |
63803da5 |
#endif |
ca78e3b3 |
|
63803da5 |
#if HAVE_JSON |
ebcca55f |
static void pdf_export_json(struct pdf_struct *pdf)
{
json_object *pdfobj; |
c7fd0220 |
unsigned long i; |
ebcca55f |
if (!(pdf))
return;
|
a5570b79 |
if (!(pdf->ctx)) {
goto cleanup;
} |
ebcca55f |
|
a5570b79 |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES) || !(pdf->ctx->wrkproperty)) {
goto cleanup;
} |
ebcca55f |
|
084707b3 |
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats"); |
a5570b79 |
if (!(pdfobj)) {
goto cleanup;
} |
ebcca55f |
|
0e7442f1 |
if (pdf->stats.author) { |
9d33052f |
if (!pdf->stats.author->meta.success) {
char *out = pdf_finalize_string(pdf, pdf->stats.author->meta.obj, pdf->stats.author->data, pdf->stats.author->meta.length);
if (out) {
free(pdf->stats.author->data);
pdf->stats.author->data = out;
pdf->stats.author->meta.length = strlen(out);
pdf->stats.author->meta.success = 1; |
24db616f |
} |
0e7442f1 |
} |
9d33052f |
if (pdf->stats.author->meta.success && cli_isutf8(pdf->stats.author->data, pdf->stats.author->meta.length)) {
cli_jsonstr(pdfobj, "Author", pdf->stats.author->data); |
d010b117 |
} else if (pdf->stats.author->data && pdf->stats.author->meta.length) { |
5f31c9b4 |
char *b64 = cl_base64_encode(pdf->stats.author->data, pdf->stats.author->meta.length); |
9d33052f |
cli_jsonstr(pdfobj, "Author", b64);
cli_jsonbool(pdfobj, "Author_base64", 1);
free(b64); |
5f31c9b4 |
} else {
cli_jsonstr(pdfobj, "Author", ""); |
9d33052f |
} |
0e7442f1 |
}
if (pdf->stats.creator) { |
9d33052f |
if (!pdf->stats.creator->meta.success) {
char *out = pdf_finalize_string(pdf, pdf->stats.creator->meta.obj, pdf->stats.creator->data, pdf->stats.creator->meta.length);
if (out) {
free(pdf->stats.creator->data);
pdf->stats.creator->data = out;
pdf->stats.creator->meta.length = strlen(out);
pdf->stats.creator->meta.success = 1; |
24db616f |
} |
0e7442f1 |
} |
9d33052f |
if (pdf->stats.creator->meta.success && cli_isutf8(pdf->stats.creator->data, pdf->stats.creator->meta.length)) {
cli_jsonstr(pdfobj, "Creator", pdf->stats.creator->data); |
d010b117 |
} else if (pdf->stats.creator->data && pdf->stats.creator->meta.length) { |
5f31c9b4 |
char *b64 = cl_base64_encode(pdf->stats.creator->data, pdf->stats.creator->meta.length); |
9d33052f |
cli_jsonstr(pdfobj, "Creator", b64);
cli_jsonbool(pdfobj, "Creator_base64", 1);
free(b64); |
5f31c9b4 |
} else {
cli_jsonstr(pdfobj, "Creator", ""); |
9d33052f |
} |
0e7442f1 |
}
if (pdf->stats.producer) { |
9d33052f |
if (!pdf->stats.producer->meta.success) {
char *out = pdf_finalize_string(pdf, pdf->stats.producer->meta.obj, pdf->stats.producer->data, pdf->stats.producer->meta.length);
if (out) {
free(pdf->stats.producer->data);
pdf->stats.producer->data = out;
pdf->stats.producer->meta.length = strlen(out);
pdf->stats.producer->meta.success = 1; |
24db616f |
} |
0e7442f1 |
} |
9d33052f |
if (pdf->stats.producer->meta.success && cli_isutf8(pdf->stats.producer->data, pdf->stats.producer->meta.length)) {
cli_jsonstr(pdfobj, "Producer", pdf->stats.producer->data); |
d010b117 |
} else if (pdf->stats.producer->data && pdf->stats.producer->meta.length) { |
5f31c9b4 |
char *b64 = cl_base64_encode(pdf->stats.producer->data, pdf->stats.producer->meta.length); |
9d33052f |
cli_jsonstr(pdfobj, "Producer", b64);
cli_jsonbool(pdfobj, "Producer_base64", 1);
free(b64); |
5f31c9b4 |
} else {
cli_jsonstr(pdfobj, "Producer", ""); |
9d33052f |
} |
0e7442f1 |
}
if (pdf->stats.modificationdate) { |
9d33052f |
if (!pdf->stats.modificationdate->meta.success) {
char *out = pdf_finalize_string(pdf, pdf->stats.modificationdate->meta.obj, pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length);
if (out) {
free(pdf->stats.modificationdate->data);
pdf->stats.modificationdate->data = out;
pdf->stats.modificationdate->meta.length = strlen(out);
pdf->stats.modificationdate->meta.success = 1; |
24db616f |
} |
0e7442f1 |
} |
9d33052f |
if (pdf->stats.modificationdate->meta.success && cli_isutf8(pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length)) {
cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate->data); |
d010b117 |
} else if (pdf->stats.modificationdate->data && pdf->stats.modificationdate->meta.length) { |
5f31c9b4 |
char *b64 = cl_base64_encode(pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length); |
9d33052f |
cli_jsonstr(pdfobj, "ModificationDate", b64);
cli_jsonbool(pdfobj, "ModificationDate_base64", 1);
free(b64); |
5f31c9b4 |
} else {
cli_jsonstr(pdfobj, "ModificationDate", ""); |
9d33052f |
} |
0e7442f1 |
}
if (pdf->stats.creationdate) { |
9d33052f |
if (!pdf->stats.creationdate->meta.success) {
char *out = pdf_finalize_string(pdf, pdf->stats.creationdate->meta.obj, pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length);
if (out) {
free(pdf->stats.creationdate->data);
pdf->stats.creationdate->data = out;
pdf->stats.creationdate->meta.length = strlen(out);
pdf->stats.creationdate->meta.success = 1; |
24db616f |
} |
0e7442f1 |
} |
9d33052f |
if (pdf->stats.creationdate->meta.success && cli_isutf8(pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length)) {
cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate->data); |
d010b117 |
} else if (pdf->stats.creationdate->data && pdf->stats.creationdate->meta.length) { |
5f31c9b4 |
char *b64 = cl_base64_encode(pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length); |
9d33052f |
cli_jsonstr(pdfobj, "CreationDate", b64);
cli_jsonbool(pdfobj, "CreationDate_base64", 1);
free(b64); |
5f31c9b4 |
} else {
cli_jsonstr(pdfobj, "CreationDate", ""); |
9d33052f |
} |
0e7442f1 |
}
if (pdf->stats.title) { |
9d33052f |
if (!pdf->stats.title->meta.success) {
char *out = pdf_finalize_string(pdf, pdf->stats.title->meta.obj, pdf->stats.title->data, pdf->stats.title->meta.length);
if (out) {
free(pdf->stats.title->data);
pdf->stats.title->data = out;
pdf->stats.title->meta.length = strlen(out);
pdf->stats.title->meta.success = 1; |
24db616f |
} |
0e7442f1 |
} |
9d33052f |
if (pdf->stats.title->meta.success && cli_isutf8(pdf->stats.title->data, pdf->stats.title->meta.length)) {
cli_jsonstr(pdfobj, "Title", pdf->stats.title->data); |
d010b117 |
} else if (pdf->stats.title->data && pdf->stats.title->meta.length) { |
5f31c9b4 |
char *b64 = cl_base64_encode(pdf->stats.title->data, pdf->stats.title->meta.length); |
9d33052f |
cli_jsonstr(pdfobj, "Title", b64);
cli_jsonbool(pdfobj, "Title_base64", 1);
free(b64); |
5f31c9b4 |
} else {
cli_jsonstr(pdfobj, "Title", ""); |
9d33052f |
} |
0e7442f1 |
}
if (pdf->stats.subject) { |
9d33052f |
if (!pdf->stats.subject->meta.success) {
char *out = pdf_finalize_string(pdf, pdf->stats.subject->meta.obj, pdf->stats.subject->data, pdf->stats.subject->meta.length);
if (out) {
free(pdf->stats.subject->data);
pdf->stats.subject->data = out;
pdf->stats.subject->meta.length = strlen(out);
pdf->stats.subject->meta.success = 1; |
24db616f |
} |
0e7442f1 |
} |
9d33052f |
if (pdf->stats.subject->meta.success && cli_isutf8(pdf->stats.subject->data, pdf->stats.subject->meta.length)) {
cli_jsonstr(pdfobj, "Subject", pdf->stats.subject->data); |
d010b117 |
} else if (pdf->stats.subject->data && pdf->stats.subject->meta.length) { |
5f31c9b4 |
char *b64 = cl_base64_encode(pdf->stats.subject->data, pdf->stats.subject->meta.length); |
9d33052f |
cli_jsonstr(pdfobj, "Subject", b64);
cli_jsonbool(pdfobj, "Subject_base64", 1);
free(b64); |
5f31c9b4 |
} else {
cli_jsonstr(pdfobj, "Subject", ""); |
9d33052f |
} |
0e7442f1 |
}
if (pdf->stats.keywords) { |
9d33052f |
if (!pdf->stats.keywords->meta.success) {
char *out = pdf_finalize_string(pdf, pdf->stats.keywords->meta.obj, pdf->stats.keywords->data, pdf->stats.keywords->meta.length);
if (out) {
free(pdf->stats.keywords->data);
pdf->stats.keywords->data = out;
pdf->stats.keywords->meta.length = strlen(out);
pdf->stats.keywords->meta.success = 1; |
24db616f |
} |
0e7442f1 |
} |
9d33052f |
if (pdf->stats.keywords->meta.success && cli_isutf8(pdf->stats.keywords->data, pdf->stats.keywords->meta.length)) {
cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords->data); |
d010b117 |
} else if (pdf->stats.keywords->data && pdf->stats.keywords->meta.length) { |
5f31c9b4 |
char *b64 = cl_base64_encode(pdf->stats.keywords->data, pdf->stats.keywords->meta.length); |
9d33052f |
cli_jsonstr(pdfobj, "Keywords", b64);
cli_jsonbool(pdfobj, "Keywords_base64", 1);
free(b64); |
5f31c9b4 |
} else { |
188e40ae |
cli_jsonstr(pdfobj, "Keywords", ""); |
9d33052f |
} |
0e7442f1 |
} |
ebcca55f |
if (pdf->stats.ninvalidobjs)
cli_jsonint(pdfobj, "InvalidObjectCount", pdf->stats.ninvalidobjs);
if (pdf->stats.njs)
cli_jsonint(pdfobj, "JavaScriptObjectCount", pdf->stats.njs);
if (pdf->stats.nflate)
cli_jsonint(pdfobj, "DeflateObjectCount", pdf->stats.nflate);
if (pdf->stats.nactivex)
cli_jsonint(pdfobj, "ActiveXObjectCount", pdf->stats.nactivex);
if (pdf->stats.nflash)
cli_jsonint(pdfobj, "FlashObjectCount", pdf->stats.nflash);
if (pdf->stats.ncolors)
cli_jsonint(pdfobj, "ColorCount", pdf->stats.ncolors);
if (pdf->stats.nasciihexdecode)
cli_jsonint(pdfobj, "AsciiHexDecodeObjectCount", pdf->stats.nasciihexdecode);
if (pdf->stats.nascii85decode)
cli_jsonint(pdfobj, "Ascii85DecodeObjectCount", pdf->stats.nascii85decode);
if (pdf->stats.nembeddedfile)
cli_jsonint(pdfobj, "EmbeddedFileCount", pdf->stats.nembeddedfile);
if (pdf->stats.nimage)
cli_jsonint(pdfobj, "ImageCount", pdf->stats.nimage);
if (pdf->stats.nlzw)
cli_jsonint(pdfobj, "LZWCount", pdf->stats.nlzw);
if (pdf->stats.nrunlengthdecode)
cli_jsonint(pdfobj, "RunLengthDecodeCount", pdf->stats.nrunlengthdecode);
if (pdf->stats.nfaxdecode)
cli_jsonint(pdfobj, "FaxDecodeCount", pdf->stats.nfaxdecode);
if (pdf->stats.njbig2decode)
cli_jsonint(pdfobj, "JBIG2DecodeCount", pdf->stats.njbig2decode);
if (pdf->stats.ndctdecode)
cli_jsonint(pdfobj, "DCTDecodeCount", pdf->stats.ndctdecode);
if (pdf->stats.njpxdecode)
cli_jsonint(pdfobj, "JPXDecodeCount", pdf->stats.njpxdecode);
if (pdf->stats.ncrypt)
cli_jsonint(pdfobj, "CryptCount", pdf->stats.ncrypt);
if (pdf->stats.nstandard)
cli_jsonint(pdfobj, "StandardCount", pdf->stats.nstandard);
if (pdf->stats.nsigned)
cli_jsonint(pdfobj, "SignedCount", pdf->stats.nsigned);
if (pdf->stats.nopenaction)
cli_jsonint(pdfobj, "OpenActionCount", pdf->stats.nopenaction);
if (pdf->stats.nlaunch)
cli_jsonint(pdfobj, "LaunchCount", pdf->stats.nlaunch);
if (pdf->stats.npage)
cli_jsonint(pdfobj, "PageCount", pdf->stats.npage); |
09ff1409 |
if (pdf->stats.nrichmedia)
cli_jsonint(pdfobj, "RichMediaCount", pdf->stats.nrichmedia);
if (pdf->stats.nacroform)
cli_jsonint(pdfobj, "AcroFormCount", pdf->stats.nacroform);
if (pdf->stats.nxfa)
cli_jsonint(pdfobj, "XFACount", pdf->stats.nxfa); |
cfeac6cd |
if (pdf->flags & (1 << BAD_PDF_VERSION))
cli_jsonbool(pdfobj, "BadVersion", 1);
if (pdf->flags & (1 << BAD_PDF_HEADERPOS))
cli_jsonbool(pdfobj, "BadHeaderPosition", 1);
if (pdf->flags & (1 << BAD_PDF_TRAILER))
cli_jsonbool(pdfobj, "BadTrailer", 1);
if (pdf->flags & (1 << BAD_PDF_TOOMANYOBJS))
cli_jsonbool(pdfobj, "TooManyObjects", 1);
if (pdf->flags & (1 << ENCRYPTED_PDF)) {
cli_jsonbool(pdfobj, "Encrypted", 1);
if (pdf->flags & (1 << DECRYPTABLE_PDF))
cli_jsonbool(pdfobj, "Decryptable", 1); |
fc84532e |
else
cli_jsonbool(pdfobj, "Decryptable", 0); |
cfeac6cd |
} |
a5570b79 |
|
c7fd0220 |
for (i=0; i < pdf->nobjs; i++) {
if (pdf->objs[i].flags & (1<<OBJ_TRUNCATED)) {
json_object *truncobj;
truncobj = cli_jsonarray(pdfobj, "TruncatedObjects");
if (!(truncobj))
continue;
cli_jsonint_array(truncobj, pdf->objs[i].id>>8);
}
}
|
a5570b79 |
cleanup:
if ((pdf->stats.author)) { |
9d33052f |
if (pdf->stats.author->data)
free(pdf->stats.author->data); |
a5570b79 |
free(pdf->stats.author);
pdf->stats.author = NULL;
}
if (pdf->stats.creator) { |
9d33052f |
if (pdf->stats.creator->data)
free(pdf->stats.creator->data); |
a5570b79 |
free(pdf->stats.creator);
pdf->stats.creator = NULL;
}
if (pdf->stats.producer) { |
9d33052f |
if (pdf->stats.producer->data)
free(pdf->stats.producer->data); |
a5570b79 |
free(pdf->stats.producer);
pdf->stats.producer = NULL;
}
if (pdf->stats.modificationdate) { |
9d33052f |
if (pdf->stats.modificationdate->data)
free(pdf->stats.modificationdate->data); |
a5570b79 |
free(pdf->stats.modificationdate);
pdf->stats.modificationdate = NULL;
}
if (pdf->stats.creationdate) { |
9d33052f |
if (pdf->stats.creationdate->data)
free(pdf->stats.creationdate->data); |
a5570b79 |
free(pdf->stats.creationdate);
pdf->stats.creationdate = NULL;
} |
754f976a |
if (pdf->stats.title) { |
9d33052f |
if (pdf->stats.title->data)
free(pdf->stats.title->data); |
754f976a |
free(pdf->stats.title);
pdf->stats.title = NULL;
}
if (pdf->stats.subject) { |
9d33052f |
if (pdf->stats.subject->data)
free(pdf->stats.subject->data); |
754f976a |
free(pdf->stats.subject);
pdf->stats.subject = NULL;
}
if (pdf->stats.keywords) { |
9d33052f |
if (pdf->stats.keywords->data)
free(pdf->stats.keywords->data); |
754f976a |
free(pdf->stats.keywords);
pdf->stats.keywords = NULL;
} |
ebcca55f |
} |
63803da5 |
#endif |