libclamav/pdf.c
d056cc17
 /*
4a2576fe
  *  Copyright (C) 2015, 2017-2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
15a8a022
  *  Copyright (C) 2007-2014 Sourcefire, Inc.
2023340a
  *
6ff4e486
  *  Authors: Nigel Horne, Török Edvin
  *
  *  Also based on Matt Olney's pdf parser in snort-nrt.
d056cc17
  *
  *  This program is free software; you can redistribute it and/or modify
2023340a
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
d056cc17
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
2023340a
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
1eceda0e
  *
  * TODO: Embedded fonts
  * TODO: Predictor image handling
d056cc17
  */
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
240d3307
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <ctype.h>
 #include <string.h>
 #include <fcntl.h>
 #include <stdlib.h>
511a59c7
 #include <errno.h>
ed6446ff
 #ifdef	HAVE_LIMITS_H
 #include <limits.h>
 #endif
9443ec4a
 #ifdef	HAVE_UNISTD_H
 #include <unistd.h>
 #endif
240d3307
 #include <zlib.h>
 
e746f010
 #if HAVE_ICONV
063f0d25
 #include <iconv.h>
e746f010
 #endif
063f0d25
 
45e60c0c
 #ifdef _WIN32
 #include <stdint.h>
 #endif
 
ed6446ff
 #include "clamav.h"
 #include "others.h"
654c0b96
 #include "pdf.h"
7aad5a3b
 #include "pdfdecode.h"
a5afcb67
 #include "scanners.h"
747c2055
 #include "fmap.h"
f461d74f
 #include "str.h"
dc200c6b
 #include "bytecode.h"
 #include "bytecode_api.h"
7606789f
 #include "arc4.h"
374be101
 #include "rijndael.h"
7719760b
 #include "textnorm.h"
5f31c9b4
 #include "conv.h"
ebcca55f
 #include "json_api.h"
4956690d
 
1eceda0e
 #ifdef	CL_DEBUG
5cd3f734
 /*#define	SAVE_TMP	
  *Save the file being worked on in tmp */
1eceda0e
 #endif
 
c1a785c4
 struct pdf_struct;
 
6e33139f
 static	int	asciihexdecode(const char *buf, off_t len, char *output);
5aad11ce
 static	int	ascii85decode(const char *buf, off_t len, unsigned char *output);
bce73fe9
 static	const	char	*pdf_nextlinestart(const char *ptr, size_t len);
ef8219b8
 static	const	char	*pdf_nextobject(const char *ptr, size_t len);
da653b74
 
ebcca55f
 /* PDF statistics callbacks and related */
224d1c4d
 struct pdfname_action;
49bc4992
 
63803da5
 #if HAVE_JSON
ebcca55f
 static void pdf_export_json(struct pdf_struct *);
 
224d1c4d
 static void ASCIIHexDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void ASCII85Decode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void EmbeddedFile_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void FlateDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void Image_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void LZWDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void RunLengthDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void CCITTFaxDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void JBIG2Decode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void DCTDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void JPXDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void Crypt_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void Standard_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void Sig_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void JavaScript_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void OpenAction_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void Launch_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void Page_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void Author_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void Creator_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void Producer_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void CreationDate_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void ModificationDate_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void Title_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void Subject_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void Keywords_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void Pages_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
 static void Colors_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act);
09ff1409
 static void RichMedia_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act);
 static void AcroForm_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act);
 static void XFA_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act);
63803da5
 #endif
ebcca55f
 /* End PDF statistics callbacks and related */
49bc4992
 
e7a27135
 static int xrefCheck(const char *xref, const char *eof)
 {
     const char *q;
15a8a022
 
5aad11ce
     while (xref < eof && (*xref == ' ' || *xref == '\n' || *xref == '\r'))
15a8a022
         xref++;
 
e7a27135
     if (xref + 4 >= eof)
15a8a022
         return -1;
 
e7a27135
     if (!memcmp(xref, "xref", 4)) {
15a8a022
         cli_dbgmsg("cli_pdf: found xref\n");
         return 0;
e7a27135
     }
15a8a022
 
e7a27135
     /* could be xref stream */
     for (q=xref; q+5 < eof; q++) {
15a8a022
         if (!memcmp(q,"/XRef",4)) {
             cli_dbgmsg("cli_pdf: found /XRef\n");
             return 0;
         }
e7a27135
     }
15a8a022
 
e7a27135
     return -1;
 }
 
4956690d
 /* define this to be noisy about things that we can't parse properly */
87a6cf95
 #undef NOISY
4956690d
 
 #ifdef NOISY
 #define noisy_msg(pdf, ...) cli_infomsg(pdf->ctx, __VA_ARGS__)
e09d8843
 #define noisy_warnmsg(...) cli_warnmsg(__VA_ARGS__)
4956690d
 #else
e09d8843
 #define noisy_msg(pdf, ...)
 #define noisy_warnmsg(...)
4956690d
 #endif
 
e7a27135
 static const char *findNextNonWSBack(const char *q, const char *start)
 {
15a8a022
     while (q > start && (*q == 0 || *q == 9 || *q == 0xa || *q == 0xc || *q == 0xd || *q == 0x20))
         q--;
 
e7a27135
     return q;
 }
 
15a8a022
 static int find_stream_bounds(const char *start, off_t bytesleft, off_t bytesleft2, off_t *stream, off_t *endstream, int newline_hack)
3643f3d2
 {
     const char *q2, *q;
     if ((q2 = cli_memstr(start, bytesleft, "stream", 6))) {
15a8a022
         q2 += 6;
         bytesleft -= q2 - start;
         if (bytesleft < 0)
             return 0;
 
         if (bytesleft >= 2 && q2[0] == '\xd' && q2[1] == '\xa') {
             q2 += 2;
             if (newline_hack && (bytesleft > 2) && q2[0] == '\xa')
                 q2++;
         } else if (bytesleft && q2[0] == '\xa') {
             q2++;
         }
 
         *stream = q2 - start;
         bytesleft2 -= q2 - start;
         if (bytesleft2 <= 0)
             return 0;
 
         q = q2;
         q2 = cli_memstr(q, bytesleft2, "endstream", 9);
         if (!q2)
             q2 = q + bytesleft2-9; /* till EOF */
 
         *endstream = q2 - start;
         if (*endstream < *stream)
             *endstream = *stream;
 
         return 1;
3643f3d2
     }
15a8a022
 
3643f3d2
     return 0;
 }
 
693757a1
 /* Expected returns: 1 if success, 0 if no more objects, -1 if error */
930b9395
 int pdf_findobj(struct pdf_struct *pdf)
e7a27135
 {
3643f3d2
     const char *start, *q, *q2, *q3, *eof;
e7a27135
     struct pdf_obj *obj;
     off_t bytesleft;
bf6e777f
     unsigned long genid, objid;
e7a27135
 
     pdf->nobjs++;
     pdf->objs = cli_realloc2(pdf->objs, sizeof(*pdf->objs)*pdf->nobjs);
     if (!pdf->objs) {
15a8a022
         cli_warnmsg("cli_pdf: out of memory parsing objects (%u)\n", pdf->nobjs);
         return -1;
e7a27135
     }
15a8a022
 
e7a27135
     obj = &pdf->objs[pdf->nobjs-1];
ab564992
     memset(obj, 0, sizeof(*obj));
e7a27135
     start = pdf->map+pdf->offset;
     bytesleft = pdf->size - pdf->offset;
bdbae203
     while (bytesleft > 0) {
15a8a022
         q2 = cli_memstr(start, bytesleft, "obj", 3);
         if (!q2)
             return 0;/* no more objs */
 
         q2--;
         bytesleft -= q2 - start;
         if (*q2 != 0 && *q2 != 9 && *q2 != 0xa && *q2 != 0xc && *q2 != 0xd && *q2 != 0x20) {
             start = q2+4;
             bytesleft -= 4;
             continue;
         }
 
         break;
bdbae203
     }
15a8a022
 
bdbae203
     if (bytesleft <= 0)
15a8a022
         return 0;
bdbae203
 
e7a27135
     q = findNextNonWSBack(q2-1, start);
15a8a022
     while (q > start && isdigit(*q))
         q--;
 
bf6e777f
     if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, (long*)&genid)) {
         cli_dbgmsg("cli_pdf: Failed to parse object genid (%u)\n", pdf->nobjs);
         return -1;
     }
e7a27135
     q = findNextNonWSBack(q-1,start);
15a8a022
     while (q > start && isdigit(*q))
         q--;
 
bf6e777f
     if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, (long*)&objid)) {
         /*
          * PDFs with multiple revisions will have %%EOF before the end of the file, 
          * followed by the next revision of the PDF.  If this is the case, we can 
          * detect it and continue parsing after the %%EOF.
          */
         if (q - 4 > start) {
             const char* lastfile = q - 4;
             if (0 != strncmp(lastfile, "\%\%EOF", 5)) {
                 /* Nope, wasn't %%EOF, I guess just fail out. */
                 cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs);
                 return -1;
             }
             /* Yup, Looks, like the file continues after %%EOF.  
              * Probably another revision.  Keep parsing... */
             q++;
             cli_dbgmsg("cli_pdf: \%\%EOF detected before end of file, at %zu\n", (size_t)q);
         } else {
             /* Failed parsing at the very beginning */
             cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs);
             return -1;
         }
         /* Try again, with offset slightly adjusted */
         if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, (long*)&objid)) {
             cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs);
             return -1;
         }
         cli_dbgmsg("cli_pdf: There appears to be an additional revision. Continuing to parse...\n");
     }
e7a27135
     obj->id = (objid << 8) | (genid&0xff);
     obj->start = q2+4 - pdf->map;
     obj->flags = 0;
     bytesleft -= 4;
     eof = pdf->map + pdf->size;
     q = pdf->map + obj->start;
15a8a022
 
e7a27135
     while (q < eof && bytesleft > 0) {
15a8a022
         off_t p_stream, p_endstream;
         q2 = pdf_nextobject(q, bytesleft);
         if (!q2)
             q2 = pdf->map + pdf->size;
 
         bytesleft -= q2 - q;
         if (find_stream_bounds(q-1, q2-q, bytesleft + (q2-q), &p_stream, &p_endstream, 1)) {
             obj->flags |= 1 << OBJ_STREAM;
             q2 = q-1 + p_endstream + 9;
             bytesleft -= q2 - q + 1;
 
             if (bytesleft < 0) {
                 obj->flags |= 1 << OBJ_TRUNCATED;
                 pdf->offset = pdf->size;
                 return 1;/* truncated */
             }
         } else if ((q3 = cli_memstr(q-1, q2-q+1, "endobj", 6))) {
             q2 = q3 + 6;
             pdf->offset = q2 - pdf->map;
             return 1; /* obj found and offset positioned */
         } else {
             q2++;
             bytesleft--;
         }
 
         q = q2;
e7a27135
     }
15a8a022
 
9acc81d6
     obj->flags |= 1 << OBJ_TRUNCATED;
     pdf->offset = pdf->size;
15a8a022
 
9acc81d6
     return 1;/* truncated */
e7a27135
 }
 
e09d8843
 static size_t filter_writen(struct pdf_struct *pdf, struct pdf_obj *obj, int fout, const char *buf, size_t len, size_t *sum)
3643f3d2
 {
cd94be7a
     UNUSEDPARAM(obj);
 
e09d8843
     if (cli_checklimits("pdf", pdf->ctx, (unsigned long)*sum, 0, 0)) /* TODO: May truncate for large values on 64-bit platforms */
15a8a022
         return len; /* pretend it was a successful write to suppress CL_EWRITE */
 
3643f3d2
     *sum += len;
15a8a022
 
e09d8843
     return cli_writen(fout, buf, (unsigned int)len);
3643f3d2
 }
 
1d0cdc67
 void pdfobj_flag(struct pdf_struct *pdf, struct pdf_obj *obj, enum pdf_flag flag)
eb270d5a
 {
     const char *s= "";
     pdf->flags |= 1 << flag;
     if (!cli_debug_flag)
15a8a022
         return;
 
eb270d5a
     switch (flag) {
15a8a022
     case UNTERMINATED_OBJ_DICT:
         s = "dictionary not terminated";
         break;
     case ESCAPED_COMMON_PDFNAME:
         /* like /JavaScript */
         s = "escaped common pdfname";
         break;
     case BAD_STREAM_FILTERS:
         s = "duplicate stream filters";
         break;
     case BAD_PDF_VERSION:
         s = "bad pdf version";
         break;
     case BAD_PDF_HEADERPOS:
         s = "bad pdf header position";
         break;
     case BAD_PDF_TRAILER:
         s = "bad pdf trailer";
         break;
     case BAD_PDF_TOOMANYOBJS:
         s = "too many pdf objs";
         break;
     case BAD_FLATE:
         s = "bad deflate stream";
         break;
     case BAD_FLATESTART:
         s = "bad deflate stream start";
         break;
     case BAD_STREAMSTART:
         s = "bad stream start";
         break;
     case UNKNOWN_FILTER:
         s = "unknown filter used";
         break;
     case BAD_ASCIIDECODE:
         s = "bad ASCII decode";
         break;
     case HEX_JAVASCRIPT:
         s = "hex javascript";
         break;
     case BAD_INDOBJ:
         s = "referencing nonexistent obj";
         break;
     case HAS_OPENACTION:
         s = "has /OpenAction";
         break;
     case HAS_LAUNCHACTION:
         s = "has /LaunchAction";
         break;
     case BAD_STREAMLEN:
         s = "bad /Length, too small";
         break;
     case ENCRYPTED_PDF:
         s = "PDF is encrypted";
         break;
     case LINEARIZED_PDF:
         s = "linearized PDF";
         break;
     case MANY_FILTERS:
         s = "more than 2 filters per obj";
         break;
     case DECRYPTABLE_PDF:
         s = "decryptable PDF";
         break;
eb270d5a
     }
15a8a022
 
f984f75b
     cli_dbgmsg("cli_pdf: %s flagged in object %u %u\n", s, obj->id>>8, obj->id&0xff);
eb270d5a
 }
 
930b9395
 struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid)
3643f3d2
 {
1412b807
     uint32_t j;
     uint32_t i;
5aad11ce
 
     /* search starting at previous obj (if exists) */
15a8a022
     i = (obj != pdf->objs) ? obj - pdf->objs : 0;
 
3643f3d2
     for (j=i;j<pdf->nobjs;j++) {
15a8a022
         obj = &pdf->objs[j];
         if (obj->id == objid)
             return obj;
3643f3d2
     }
15a8a022
 
3643f3d2
     /* restart search from beginning if not found */
     for (j=0;j<i;j++) {
15a8a022
         obj = &pdf->objs[j];
         if (obj->id == objid)
             return obj;
3643f3d2
     }
15a8a022
 
3643f3d2
     return NULL;
 }
 
15a8a022
 static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *start, off_t len)
3643f3d2
 {
bf6e777f
     unsigned long length;
3643f3d2
     const char *q;
15a8a022
 
3643f3d2
     q = cli_memstr(start, len, "/Length", 7);
     if (!q)
15a8a022
         return 0;
 
3643f3d2
     q++;
     len -= q - start;
     start = pdf_nextobject(q, len);
     if (!start)
15a8a022
         return 0;
 
6e33139f
     /* len -= start - q; */
3643f3d2
     q = start;
bf6e777f
     if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)len, 0, 10, (long*)&length)) {
         cli_dbgmsg("cli_pdf: failed to parse object length\n");
         return 0;
     }
 
53cbdee3
     while (isdigit(*q) && len > 0) {
15a8a022
         q++;
53cbdee3
         len--;
     }
15a8a022
 
53cbdee3
     if (*q == ' ' && len > 0) {
bf6e777f
         unsigned long genid;
15a8a022
         q++;
53cbdee3
         len--;
bf6e777f
         if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)len, 0, 10, (long*)&genid)) {
             cli_dbgmsg("cli_pdf: failed to parse object genid\n");
             return 0;
         }
15a8a022
 
53cbdee3
         while(isdigit(*q) && len > 0) {
15a8a022
             q++;
53cbdee3
             len--;
         }
15a8a022
 
         if (q[0] == ' ' && q[1] == 'R') {
bf6e777f
             cli_dbgmsg("cli_pdf: length is in indirect object %lu %lu\n", length, genid);
15a8a022
 
             obj = find_obj(pdf, obj, (length << 8) | (genid&0xff));
             if (!obj) {
                 cli_dbgmsg("cli_pdf: indirect object not found\n");
                 return 0;
             }
 
             q = pdf_nextobject(pdf->map+obj->start, pdf->size - obj->start);
             if (!q) {
                 cli_dbgmsg("cli_pdf: next object not found\n");
                 return 0;
             }
 
bf6e777f
             if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)len, 0, 10, (long*)&length)) {
                 cli_dbgmsg("cli_pdf: failed to parse object length from indirect object\n");
                 return 0;
             }
15a8a022
         }
3643f3d2
     }
15a8a022
 
a9d034ee
     /* limit length */
15a8a022
     if (start - pdf->map + length+5 > pdf->size)
         length = pdf->size - (start - pdf->map)-5;
 
3643f3d2
     return length;
 }
 
7719760b
 #define DUMP_MASK ((1 << OBJ_CONTENTS) | (1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_DCT) | (1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_A85) | (1 << OBJ_EMBEDDED_FILE) | (1 << OBJ_JAVASCRIPT) | (1 << OBJ_OPENACTION) | (1 << OBJ_LAUNCHACTION))
ab564992
 
eb270d5a
 static int obj_size(struct pdf_struct *pdf, struct pdf_obj *obj, int binary)
ab564992
 {
5aad11ce
     unsigned i = obj - pdf->objs;
15a8a022
 
ab564992
     i++;
     if (i < pdf->nobjs) {
15a8a022
         int s = pdf->objs[i].start - obj->start - 4;
         if (s > 0) {
             if (!binary) {
                 const char *p = pdf->map + obj->start;
                 const char *q = p + s;
 
                 while (q > p && (isspace(*q) || isdigit(*q)))
                        q--;
 
                 if (q > p+5 && !memcmp(q-5,"endobj",6))
                     q -= 6;
 
                 q = findNextNonWSBack(q, p);
                 q++;
 
                 return q - p;
             }
 
             return s;
         }
ab564992
     }
15a8a022
 
eb270d5a
     if (binary)
15a8a022
         return pdf->size - obj->start;
 
eb270d5a
     return pdf->offset - obj->start - 6;
ab564992
 }
 
15a8a022
 static int run_pdf_hooks(struct pdf_struct *pdf, enum pdf_phase phase, int fd, int dumpid)
dc200c6b
 {
     int ret;
     struct cli_bc_ctx *bc_ctx;
     cli_ctx *ctx = pdf->ctx;
     fmap_t *map;
 
cd94be7a
     UNUSEDPARAM(dumpid);
 
dc200c6b
     bc_ctx = cli_bytecode_context_alloc();
     if (!bc_ctx) {
15a8a022
         cli_errmsg("cli_pdf: can't allocate memory for bc_ctx");
         return CL_EMEM;
dc200c6b
     }
 
     map = *ctx->fmap;
     if (fd != -1) {
15a8a022
         map = fmap(fd, 0, 0);
         if (!map) {
db5c5d72
             cli_dbgmsg("can't mmap pdf extracted obj\n");
15a8a022
             map = *ctx->fmap;
             fd = -1;
         }
dc200c6b
     }
15a8a022
 
     cli_bytecode_context_setpdf(bc_ctx, phase, pdf->nobjs, pdf->objs, &pdf->flags, pdf->size, pdf->startoff);
dc200c6b
     cli_bytecode_context_setctx(bc_ctx, ctx);
6ad45a29
     ret = cli_bytecode_runhook(ctx, ctx->engine, bc_ctx, BC_PDF, map);
dc200c6b
     cli_bytecode_context_destroy(bc_ctx);
15a8a022
 
     if (fd != -1)
         funmap(map);
 
dc200c6b
     return ret;
 }
 
bbfad9ba
 static void dbg_printhex(const char *msg, const char *hex, unsigned len);
cd94be7a
 
e09d8843
 static void aes_decrypt(const unsigned char *in, size_t *length, unsigned char *q, char *key, unsigned key_n, int has_iv)
374be101
 {
     unsigned long rk[RKLENGTH(256)];
     unsigned char iv[16];
e09d8843
     size_t len = *length;
374be101
     unsigned char pad, i;
21a33457
     int nrounds;
374be101
 
e09d8843
     cli_dbgmsg("cli_pdf: aes_decrypt: key length: %d, data length: %zu\n", key_n, *length);
374be101
     if (key_n > 32) {
15a8a022
         cli_dbgmsg("cli_pdf: aes_decrypt: key length is %d!\n", key_n*8);
         return;
374be101
     }
15a8a022
 
374be101
     if (len < 32) {
e09d8843
         cli_dbgmsg("cli_pdf: aes_decrypt: len is <32: %zu\n", len);
         noisy_warnmsg("cli_pdf: aes_decrypt: len is <32: %zu\n", len);
15a8a022
         return;
374be101
     }
15a8a022
 
bbfad9ba
     if (has_iv) {
15a8a022
         memcpy(iv, in, 16);
         in += 16;
         len -= 16;
     } else {
         memset(iv, 0, sizeof(iv));
     }
374be101
 
22ee81d0
     cli_dbgmsg("aes_decrypt: Calling rijndaelSetupDecrypt\n");
cd94be7a
     nrounds = rijndaelSetupDecrypt(rk, (const unsigned char *)key, key_n*8);
1cea6b05
     if (!nrounds) {
53cbdee3
     cli_dbgmsg("cli_pdf: aes_decrypt: nrounds = 0\n");
     return;
1cea6b05
     }
22ee81d0
     cli_dbgmsg("aes_decrypt: Beginning rijndaelDecrypt\n");
15a8a022
 
374be101
     while (len >= 16) {
15a8a022
         unsigned i;
 
         rijndaelDecrypt(rk, nrounds, in, q);
         for (i=0;i<16;i++)
             q[i] ^= iv[i];
 
         memcpy(iv, in, 16);
 
         q += 16;
         in += 16;
         len -= 16;
374be101
     }
bbfad9ba
     if (has_iv) {
15a8a022
         len += 16;
         pad = q[-1];
 
         if (pad > 0x10) {
e09d8843
             cli_dbgmsg("cli_pdf: aes_decrypt: bad pad: %x (extra len: %zu)\n", pad, len-16);
             noisy_warnmsg("cli_pdf: aes_decrypt: bad pad: %x (extra len: %zu)\n", pad, len-16);
15a8a022
             *length -= len;
             return;
         }
 
         q -= pad;
         for (i=1;i<pad;i++) {
             if (q[i] != pad) {
                 cli_dbgmsg("cli_pdf: aes_decrypt: bad pad: %x != %x\n",q[i],pad);
                 noisy_warnmsg("cli_pdf: aes_decrypt: bad pad: %x != %x\n",q[i],pad);
                 *length -= len;
 
                 return;
             }
         }
 
         len += pad;
374be101
     }
15a8a022
 
374be101
     *length -= len;
15a8a022
 
e09d8843
     cli_dbgmsg("cli_pdf: aes_decrypt: length is %zu\n", *length);
374be101
 }
 
 
e09d8843
 char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, size_t *length, enum enc_method enc_method)
374be101
 {
     unsigned char *key, *q, result[16];
     unsigned n;
     struct arc4_state arc4;
 
4956690d
     if (!length || !*length || !in) {
15a8a022
         noisy_warnmsg("decrypt failed for obj %u %u\n", id>>8, id&0xff);
         return NULL;
4956690d
     }
15a8a022
 
374be101
     n = pdf->keylen + 5;
bcc68567
     if (enc_method == ENC_AESV2)
15a8a022
         n += 4;
 
374be101
     key = cli_malloc(n);
4956690d
     if (!key) {
15a8a022
         noisy_warnmsg("decrypt_any: malloc failed\n");
         return NULL;
4956690d
     }
374be101
 
     memcpy(key, pdf->key, pdf->keylen);
     q = key + pdf->keylen;
     *q++ = id >> 8;
     *q++ = id >> 16;
     *q++ = id >> 24;
     *q++ = id;
     *q++ = 0;
bcc68567
     if (enc_method == ENC_AESV2)
15a8a022
         memcpy(q, "sAlT", 4);
 
b2e7c931
     cl_hash_data("md5", key, n, result, NULL);
bbfad9ba
     free(key);
 
374be101
     n = pdf->keylen + 5;
     if (n > 16)
15a8a022
         n = 16;
374be101
 
66c53a53
     q = cli_calloc(*length, sizeof(char));
4956690d
     if (!q) {
15a8a022
         noisy_warnmsg("decrypt_any: malloc failed\n");
         return NULL;
4956690d
     }
374be101
 
bcc68567
     switch (enc_method) {
15a8a022
     case ENC_V2:
         cli_dbgmsg("cli_pdf: enc is v2\n");
         memcpy(q, in, *length);
         arc4_init(&arc4, result, n);
e09d8843
         arc4_apply(&arc4, q, (unsigned)*length); /* TODO: may truncate for very large lengths */
15a8a022
 
         noisy_msg(pdf, "decrypted ARC4 data\n");
 
         break;
     case ENC_AESV2:
         cli_dbgmsg("cli_pdf: enc is aesv2\n");
cd94be7a
         aes_decrypt((const unsigned char *)in, length, q, (char *)result, n, 1);
15a8a022
 
         noisy_msg(pdf, "decrypted AES(v2) data\n");
 
         break;
     case ENC_AESV3:
         cli_dbgmsg("cli_pdf: enc is aesv3\n");
         if (pdf->keylen == 0) {
             cli_dbgmsg("cli_pdf: no key\n");
             return NULL;
         }
 
cd94be7a
         aes_decrypt((const unsigned char *)in, length, q, pdf->key, pdf->keylen, 1);
15a8a022
 
         noisy_msg(pdf, "decrypted AES(v3) data\n");
 
         break;
     case ENC_IDENTITY:
         cli_dbgmsg("cli_pdf: enc is identity\n");
         memcpy(q, in, *length);
 
         noisy_msg(pdf, "identity encryption\n");
 
         break;
     case ENC_NONE:
         cli_dbgmsg("cli_pdf: enc is none\n");
 
         noisy_msg(pdf, "encryption is none\n");
 
         free(q);
         return NULL;
     case ENC_UNKNOWN:
         cli_dbgmsg("cli_pdf: enc is unknown\n");
         free(q);
 
         noisy_warnmsg("decrypt_any: unknown encryption method for obj %u %u\n",
                id>>8,id&0xff);
 
         return NULL;
374be101
     }
15a8a022
 
cd94be7a
     return (char *)q;
374be101
 }
 
e2b1880f
 enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj)
bcc68567
 {
     if (obj->flags & (1 << OBJ_EMBEDDED_FILE))
15a8a022
         return pdf->enc_method_embeddedfile;
 
bcc68567
     if (obj->flags & (1 << OBJ_STREAM))
15a8a022
         return pdf->enc_method_stream;
 
bcc68567
     return pdf->enc_method_string;
 }
 
7719760b
 enum cstate {
     CSTATE_NONE,
     CSTATE_TJ,
     CSTATE_TJ_PAROPEN
 };
 
 static void process(struct text_norm_state *s, enum cstate *st, const char *buf, int length, int fout)
 {
     do {
15a8a022
         switch (*st) {
         case CSTATE_NONE:
             if (*buf == '[') {
                 *st = CSTATE_TJ;
             } else {
                 const char *nl = memchr(buf, '\n', length);
                 if (!nl)
                     return;
 
                 length -= nl - buf;
                 buf = nl;
             }
 
             break;
         case CSTATE_TJ:
             if (*buf == '(')
                 *st = CSTATE_TJ_PAROPEN;
 
             break;
         case CSTATE_TJ_PAROPEN:
             if (*buf == ')') {
                 *st = CSTATE_TJ;
             } else {
cd94be7a
                 if (text_normalize_buffer(s, (const unsigned char *)buf, 1) != 1) {
15a8a022
                     cli_writen(fout, s->out, s->out_pos);
                     text_normalize_reset(s);
                 }
             }
 
             break;
         }
 
         buf++;
         length--;
7719760b
     } while (length > 0);
 }
 
 static int pdf_scan_contents(int fd, struct pdf_struct *pdf)
 {
     struct text_norm_state s;
     char fullname[1024];
     char outbuff[BUFSIZ];
     char inbuf[BUFSIZ];
4956690d
     int fout, n, rc;
7719760b
     enum cstate st = CSTATE_NONE;
 
     snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u_c", pdf->dir, (pdf->files-1));
     fout = open(fullname,O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
     if (fout < 0) {
15a8a022
         char err[128];
 
         cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err)));
         return CL_ETMPFILE;
7719760b
     }
 
cd94be7a
     text_normalize_init(&s, (unsigned char *)outbuff, sizeof(outbuff));
7719760b
     while (1) {
15a8a022
         n = cli_readn(fd, inbuf, sizeof(inbuf));
         if (n <= 0)
             break;
 
         process(&s, &st, inbuf, n, fout);
7719760b
     }
15a8a022
 
7719760b
     cli_writen(fout, s.out, s.out_pos);
 
4956690d
     lseek(fout, 0, SEEK_SET);
     rc = cli_magic_scandesc(fout, pdf->ctx);
7719760b
     close(fout);
15a8a022
 
4956690d
     if (!pdf->ctx->engine->keeptmp)
15a8a022
         if (cli_unlink(fullname) && rc != CL_VIRUS)
             rc = CL_EUNLINK;
 
4956690d
     return rc;
7719760b
 }
 
27c8b02b
 static const char *pdf_getdict(const char *q0, int* len, const char *key);
 static char *pdf_readval(const char *q, int len, const char *key);
fb0c9fa2
 static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen, const char **qend, int noescape);
 
930b9395
 int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
3643f3d2
 {
ab564992
     char fullname[NAME_MAX + 1];
     int fout;
e09d8843
     ptrdiff_t sum = 0;
3643f3d2
     int rc = CL_SUCCESS;
dc200c6b
     int dump = 1;
ab564992
 
5b574c47
     cli_dbgmsg("pdf_extract_obj: obj %u %u\n", obj->id>>8, obj->id&0xff);
 
9acc81d6
     /* TODO: call bytecode hook here, allow override dumpability */
15a8a022
     if ((!(obj->flags & (1 << OBJ_STREAM)) || (obj->flags & (1 << OBJ_HASFILTERS))) && !(obj->flags & DUMP_MASK)) {
         /* don't dump all streams */
         dump = 0;
ab564992
     }
15a8a022
 
     if ((obj->flags & (1 << OBJ_IMAGE)) && !(obj->flags & (1 << OBJ_FILTER_DCT))) {
         /* don't dump / scan non-JPG images */
         dump = 0;
9acc81d6
     }
15a8a022
 
dc200c6b
     if (obj->flags & (1 << OBJ_FORCEDUMP)) {
15a8a022
         /* bytecode can force dump by setting this flag */
         dump = 1;
dc200c6b
     }
15a8a022
 
dc200c6b
     if (!dump)
15a8a022
         return CL_CLEAN;
 
374be101
     cli_dbgmsg("cli_pdf: dumping obj %u %u\n", obj->id>>8, obj->id&0xff);
15a8a022
 
ab564992
     snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u", pdf->dir, pdf->files++);
     fout = open(fullname,O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
     if (fout < 0) {
15a8a022
         char err[128];
         cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err)));
 
         return CL_ETMPFILE;
ab564992
     }
 
1412b807
     if (!(flags & PDF_EXTRACT_OBJ_SCAN))
         obj->path = strdup(fullname);
 
ab564992
     do {
15a8a022
         if (obj->flags & (1 << OBJ_STREAM)) {
             const char *start = pdf->map + obj->start;
             off_t p_stream = 0, p_endstream = 0;
7aad5a3b
             off_t length;
15a8a022
 
             find_stream_bounds(start, pdf->size - obj->start,
                        pdf->size - obj->start,
                        &p_stream, &p_endstream,
                        pdf->enc_method_stream <= ENC_IDENTITY &&
                        pdf->enc_method_embeddedfile <= ENC_IDENTITY);
 
             if (p_stream && p_endstream) {
                 size_t size = p_endstream - p_stream;
                 off_t orig_length;
7aad5a3b
                 int len = p_stream;
                 const char *pstr;
                 struct pdf_dict *dparams = NULL;
a081b3e9
                 int xref = 0;
15a8a022
 
                 length = find_length(pdf, obj, start, p_stream);
                 if (length < 0)
                     length = 0;
 
                 orig_length = length;
                 if (length > pdf->size || obj->start + p_stream + length > pdf->size) {
e09d8843
                     cli_dbgmsg("cli_pdf: length out of file: %lld + %lld > %lld\n",
                            (long long)p_stream, (long long)length, (long long)pdf->size);
                     noisy_warnmsg("length out of file, truncated: %lld + %lld > %lld\n",
                            (long long)p_stream, (long long)length, (long long)pdf->size);
15a8a022
                     length = pdf->size - (obj->start + p_stream);
3a0e133b
                 }
fb0c9fa2
 
15a8a022
                 if (!(obj->flags & (1 << OBJ_FILTER_FLATE)) && length <= 0) {
                     const char *q = start + p_endstream;
                     length = size;
13882281
                     q--;
15a8a022
 
                     if (*q == '\n') {
                         q--;
                         length--;
 
                         if (*q == '\r')
                             length--;
                     } else if (*q == '\r') {
                         length--;
                     }
 
                     if (length < 0)
                         length = 0;
 
31064b37
                     cli_dbgmsg("cli_pdf: calculated length %lld\n", (long long)length);
15a8a022
                 } else {
cd94be7a
                     if (size > (size_t)length+2) {
31064b37
                         cli_dbgmsg("cli_pdf: calculated length %zu < %zu\n",
                                    (size_t)length, size);
15a8a022
                         length = size;
                     }
                 }
 
cd94be7a
                 if (orig_length && size > (size_t)orig_length + 20) {
31064b37
                     cli_dbgmsg("cli_pdf: orig length: %lld, length: %lld, size: %zu\n",
                                (long long)orig_length, (long long)length, size);
15a8a022
                     pdfobj_flag(pdf, obj, BAD_STREAMLEN);
                 }
 
                 if (!length) {
                     length = size;
                     if (!length) {
                         cli_dbgmsg("pdf_extract_obj: length and size both 0\n");
                         break; /* Empty stream, nothing to scan */
                     }
                 }
 
a081b3e9
                 if (cli_memstr(start, p_stream, "/XRef", 5))
                     xref = 1;
 
7aad5a3b
                 cli_dbgmsg("-------------EXPERIMENTAL-------------\n");
15a8a022
 
7aad5a3b
                 pstr = pdf_getdict(start, &len, "/DecodeParms");
                 if (!pstr)
                     pstr = pdf_getdict(start, &len, "/DP");
15a8a022
 
7aad5a3b
                 if (pstr) {
                     unsigned int objsz = obj_size(pdf, obj, 1);
15a8a022
 
8da5f4c8
                     /* shift pstr left to "<<" for pdf_parse_dict */
                     while ((*pstr == '<') && (pstr > start)) {
                         pstr--;
                         len++;
                     }
 
                     /* shift pstr right to "<<" for pdf_parse_dict */
7aad5a3b
                     while ((*pstr != '<') && (len > 0)) {
                         pstr++;
                         len--;
15a8a022
                     }
 
7aad5a3b
                     if (len > 4)
                         dparams = pdf_parse_dict(pdf, obj, objsz, (char *)pstr, NULL);
                     else
                         cli_dbgmsg("cli_pdf: failed to locate DecodeParms dictionary start\n");
13882281
                 }
15a8a022
 
e09d8843
                 sum = pdf_decodestream(pdf, obj, dparams, start + p_stream, (uint32_t)length, xref, fout, &rc);
7aad5a3b
                 if (dparams)
                     pdf_free_dict(dparams);
15a8a022
 
5c291512
                 if (sum < 0 || (rc == CL_VIRUS && !(pdf->ctx->options & CL_SCAN_ALLMATCHES))) {
                     sum = 0; /* prevents post-filter scan */
                     break;
                 }
15a8a022
 
7aad5a3b
                 cli_dbgmsg("-------------EXPERIMENTAL-------------\n");
15a8a022
             } else {
                 noisy_warnmsg("cannot find stream bounds for obj %u %u\n", obj->id>>8, obj->id&0xff);
fb0c9fa2
             }
 
15a8a022
         } else if (obj->flags & (1 << OBJ_JAVASCRIPT)) {
             const char *q2;
             const char *q = pdf->map+obj->start;
             /* TODO: get obj-endobj size */
             off_t bytesleft = obj_size(pdf, obj, 0);
             if (bytesleft < 0)
                 break;
 
             do {
                 char *js = NULL;
e09d8843
                 size_t js_len = 0;
15a8a022
                 const char *q3;
 
                 q2 = cli_memstr(q, bytesleft, "/JavaScript", 11);
                 if (!q2)
                     break;
 
                 bytesleft -= q2 - q + 11;
                 q = q2 + 11;
 
                 js = pdf_readstring(q, bytesleft,  "/JS", NULL, &q2, !(pdf->flags & (1<<DECRYPTABLE_PDF)));
                 bytesleft -= q2 - q;
                 q = q2;
 
                 if (js) {
7aad5a3b
                     char *decrypted = NULL;
15a8a022
                     const char *out = js;
                     js_len = strlen(js);
                     if (pdf->flags & (1 << DECRYPTABLE_PDF)) {
                         cli_dbgmsg("cli_pdf: encrypted string\n");
e09d8843
                         decrypted = decrypt_any(pdf, obj->id, js, &js_len, pdf->enc_method_string);
15a8a022
 
                         if (decrypted) {
                             noisy_msg(pdf, "decrypted Javascript string from obj %u %u\n", obj->id>>8,obj->id&0xff);
                             out = decrypted;
                         }
                     }
 
e09d8843
                     if (filter_writen(pdf, obj, fout, out, js_len, (size_t*)&sum) != js_len) {
15a8a022
                         rc = CL_EWRITE;
                                 free(js);
                         break;
                     }
 
7aad5a3b
                     free(decrypted);
15a8a022
                     free(js);
                     cli_dbgmsg("bytesleft: %d\n", (int)bytesleft);
 
                     if (bytesleft > 0) {
                         q2 = pdf_nextobject(q, bytesleft);
                         if (!q2)
                             q2 = q + bytesleft - 1;
 
                         /* non-conforming PDFs that don't escape ) properly */
                         q3 = memchr(q, ')', bytesleft);
                         if (q3 && q3 < q2)
                             q2 = q3;
 
                         while (q2 > q && q2[-1] == ' ')
                             q2--;
 
                         if (q2 > q) {
                             q--;
e09d8843
                             filter_writen(pdf, obj, fout, q, q2 - q, (size_t*)&sum);
15a8a022
                             q++;
                         }
                     }
                 }
 
             } while (bytesleft > 0);
         } else {
             off_t bytesleft = obj_size(pdf, obj, 0);
 
             if (bytesleft < 0)
                 rc = CL_EFORMAT;
e09d8843
             else if (filter_writen(pdf, obj, fout , pdf->map + obj->start, bytesleft, (size_t*)&sum) != (size_t)bytesleft)
15a8a022
                 rc = CL_EWRITE;
         }
ab564992
     } while (0);
15a8a022
 
a5e2b97d
     cli_dbgmsg("cli_pdf: extracted %td bytes %u %u obj\n", sum, obj->id>>8, obj->id&0xff);
df085913
     cli_dbgmsg("         ... to %s\n", fullname);
15a8a022
 
1412b807
     if (flags & PDF_EXTRACT_OBJ_SCAN && sum) {
15a8a022
         int rc2;
 
         cli_updatelimits(pdf->ctx, sum);
 
         /* TODO: invoke bytecode on this pdf obj with metainformation associated */
         lseek(fout, 0, SEEK_SET);
         rc2 = cli_magic_scandesc(fout, pdf->ctx);
         if (rc2 == CL_VIRUS || rc == CL_SUCCESS)
             rc = rc2;
 
         if ((rc == CL_CLEAN) || ((rc == CL_VIRUS) && (pdf->ctx->options & CL_SCAN_ALLMATCHES))) {
             rc2 = run_pdf_hooks(pdf, PDF_PHASE_POSTDUMP, fout, obj - pdf->objs);
             if (rc2 == CL_VIRUS)
                 rc = rc2;
         }
 
         if (((rc == CL_CLEAN) || ((rc == CL_VIRUS) && (pdf->ctx->options & CL_SCAN_ALLMATCHES))) && (obj->flags & (1 << OBJ_CONTENTS))) {
             lseek(fout, 0, SEEK_SET);
             cli_dbgmsg("cli_pdf: dumping contents %u %u\n", obj->id>>8, obj->id&0xff);
 
             rc2 = pdf_scan_contents(fout, pdf);
             if (rc2 == CL_VIRUS)
                 rc = rc2;
 
             noisy_msg(pdf, "extracted text from obj %u %u\n", obj->id>>8, obj->id&0xff);
         }
dc200c6b
     }
15a8a022
 
ab564992
     close(fout);
15a8a022
 
1412b807
     if (flags & PDF_EXTRACT_OBJ_SCAN && !pdf->ctx->engine->keeptmp)
15a8a022
         if (cli_unlink(fullname) && rc != CL_VIRUS)
             rc = CL_EUNLINK;
 
3643f3d2
     return rc;
 }
 
6c135eb4
 enum objstate {
     STATE_NONE,
     STATE_S,
     STATE_FILTER,
3643f3d2
     STATE_JAVASCRIPT,
eb270d5a
     STATE_OPENACTION,
b835a528
     STATE_LINEARIZED,
c16b3abb
     STATE_LAUNCHACTION,
7719760b
     STATE_CONTENTS,
6c135eb4
     STATE_ANY /* for actions table below */
 };
 
f7f9b88c
 #define NAMEFLAG_NONE       0x0
 #define NAMEFLAG_HEURISTIC  0x1
 
6c135eb4
 struct pdfname_action {
     const char *pdfname;
dc200c6b
     enum pdf_objflags set_objflag;/* OBJ_DICT is noop */
6c135eb4
     enum objstate from_state;/* STATE_NONE is noop */
     enum objstate to_state;
f7f9b88c
     uint32_t nameflags;
63803da5
 #if HAVE_JSON
224d1c4d
     void (*pdf_stats_cb)(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act);
63803da5
 #endif
6c135eb4
 };
 
63803da5
 #if HAVE_JSON
6c135eb4
 static struct pdfname_action pdfname_actions[] = {
f7f9b88c
     {"ASCIIHexDecode", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCIIHexDecode_cb},
     {"ASCII85Decode", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCII85Decode_cb},
     {"A85", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCII85Decode_cb},
     {"AHx", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCIIHexDecode_cb},
     {"EmbeddedFile", OBJ_EMBEDDED_FILE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, EmbeddedFile_cb},
     {"FlateDecode", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, FlateDecode_cb},
     {"Fl", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, FlateDecode_cb},
     {"Image", OBJ_IMAGE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, Image_cb},
     {"LZWDecode", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, LZWDecode_cb},
     {"LZW", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, LZWDecode_cb},
     {"RunLengthDecode", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, RunLengthDecode_cb},
     {"RL", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, RunLengthDecode_cb},
     {"CCITTFaxDecode", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, CCITTFaxDecode_cb},
     {"CCF", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, CCITTFaxDecode_cb},
     {"JBIG2Decode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, JBIG2Decode_cb},
     {"DCTDecode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, DCTDecode_cb},
     {"DCT", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, DCTDecode_cb},
     {"JPXDecode", OBJ_FILTER_JPX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, JPXDecode_cb},
     {"Crypt",  OBJ_FILTER_CRYPT, STATE_FILTER, STATE_NONE, NAMEFLAG_HEURISTIC, Crypt_cb},
     {"Standard", OBJ_FILTER_STANDARD, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, Standard_cb},
     {"Sig",    OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC, Sig_cb},
     {"V",     OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC, NULL},
     {"R",     OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC, NULL},
     {"Linearized", OBJ_DICT, STATE_NONE, STATE_LINEARIZED, NAMEFLAG_HEURISTIC, NULL},
     {"Filter", OBJ_HASFILTERS, STATE_ANY, STATE_FILTER, NAMEFLAG_HEURISTIC, NULL},
     {"JavaScript", OBJ_JAVASCRIPT, STATE_S, STATE_JAVASCRIPT, NAMEFLAG_HEURISTIC, JavaScript_cb},
     {"Length", OBJ_DICT, STATE_FILTER, STATE_NONE, NAMEFLAG_HEURISTIC, NULL},
     {"S", OBJ_DICT, STATE_NONE, STATE_S, NAMEFLAG_HEURISTIC, NULL},
     {"Type", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, NULL},
     {"OpenAction", OBJ_OPENACTION, STATE_ANY, STATE_OPENACTION, NAMEFLAG_HEURISTIC, OpenAction_cb},
     {"Launch", OBJ_LAUNCHACTION, STATE_ANY, STATE_LAUNCHACTION, NAMEFLAG_HEURISTIC, Launch_cb},
     {"Page", OBJ_PAGE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, Page_cb},
     {"Contents", OBJ_CONTENTS, STATE_NONE, STATE_CONTENTS, NAMEFLAG_HEURISTIC, NULL},
     {"Author", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Author_cb},
     {"Producer", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Producer_cb},
     {"CreationDate", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, CreationDate_cb},
     {"ModDate", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, ModificationDate_cb},
     {"Creator", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Creator_cb},
     {"Title", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Title_cb},
     {"Keywords", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Keywords_cb},
     {"Subject", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Subject_cb},
     {"Pages", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Pages_cb},
     {"Colors", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Colors_cb},
     {"RichMedia", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, RichMedia_cb},
     {"AcroForm", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, AcroForm_cb},
     {"XFA", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, XFA_cb}
6c135eb4
 };
63803da5
 #else
 static struct pdfname_action pdfname_actions[] = {
f7f9b88c
     {"ASCIIHexDecode", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
     {"ASCII85Decode", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
     {"A85", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
     {"AHx", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
     {"EmbeddedFile", OBJ_EMBEDDED_FILE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC},
     {"FlateDecode", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
     {"Fl", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
     {"Image", OBJ_IMAGE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC},
     {"LZWDecode", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
     {"LZW", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
     {"RunLengthDecode", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
     {"RL", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
     {"CCITTFaxDecode", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
     {"CCF", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
     {"JBIG2Decode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
     {"DCTDecode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
     {"DCT", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
     {"JPXDecode", OBJ_FILTER_JPX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
     {"Crypt",  OBJ_FILTER_CRYPT, STATE_FILTER, STATE_NONE, NAMEFLAG_HEURISTIC},
     {"Standard", OBJ_FILTER_STANDARD, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
     {"Sig",    OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC},
     {"V",     OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC},
     {"R",     OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC},
     {"Linearized", OBJ_DICT, STATE_NONE, STATE_LINEARIZED, NAMEFLAG_HEURISTIC},
     {"Filter", OBJ_HASFILTERS, STATE_ANY, STATE_FILTER, NAMEFLAG_HEURISTIC},
     {"JavaScript", OBJ_JAVASCRIPT, STATE_S, STATE_JAVASCRIPT, NAMEFLAG_HEURISTIC},
     {"Length", OBJ_DICT, STATE_FILTER, STATE_NONE, NAMEFLAG_HEURISTIC},
     {"S", OBJ_DICT, STATE_NONE, STATE_S, NAMEFLAG_HEURISTIC},
     {"Type", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC},
     {"OpenAction", OBJ_OPENACTION, STATE_ANY, STATE_OPENACTION, NAMEFLAG_HEURISTIC},
     {"Launch", OBJ_LAUNCHACTION, STATE_ANY, STATE_LAUNCHACTION, NAMEFLAG_HEURISTIC},
     {"Page", OBJ_PAGE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC},
     {"Contents", OBJ_CONTENTS, STATE_NONE, STATE_CONTENTS, NAMEFLAG_HEURISTIC}
63803da5
 };
 #endif
6c135eb4
 
edeb59b3
 #define KNOWN_FILTERS ((1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_RL) | (1 << OBJ_FILTER_A85) | (1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_LZW) | (1 << OBJ_FILTER_FAX) | (1 << OBJ_FILTER_DCT) | (1 << OBJ_FILTER_JPX) | (1 << OBJ_FILTER_CRYPT))
eb270d5a
 
15a8a022
 static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj, const char *pdfname, int escapes, enum objstate *state)
6c135eb4
 {
     struct pdfname_action *act = NULL;
     unsigned j;
15a8a022
 
5091689d
     obj->statsflags |= OBJ_FLAG_PDFNAME_DONE;
 
6c135eb4
     for (j=0;j<sizeof(pdfname_actions)/sizeof(pdfname_actions[0]);j++) {
15a8a022
         if (!strcmp(pdfname, pdfname_actions[j].pdfname)) {
             act = &pdfname_actions[j];
             break;
         }
6c135eb4
     }
15a8a022
 
eb270d5a
     if (!act) {
15a8a022
         /* these are digital signature objects, filter doesn't matter,
          * we don't need them anyway */
         if (*state == STATE_FILTER && !(obj->flags & (1 << OBJ_SIGNED)) && !(obj->flags & KNOWN_FILTERS)) {
             cli_dbgmsg("cli_pdf: unknown filter %s\n", pdfname);
             obj->flags |= 1 << OBJ_FILTER_UNKNOWN;
         }
 
         return;
eb270d5a
     }
15a8a022
 
a9584bfe
     /* record filter order */
c8ba4ae2
     if (obj->numfilters < PDF_FILTERLIST_MAX && (*state == STATE_FILTER) && ((1 << act->set_objflag) & KNOWN_FILTERS))
a9584bfe
         obj->filterlist[obj->numfilters++] = act->set_objflag;
 
f7f9b88c
     if ((act->nameflags & NAMEFLAG_HEURISTIC) && escapes) {
         /* if a commonly used PDF name is escaped that is certainly
            suspicious. */
         cli_dbgmsg("cli_pdf: pdfname %s is escaped\n", pdfname);
         pdfobj_flag(pdf, obj, ESCAPED_COMMON_PDFNAME);
     }
 
63803da5
 #if HAVE_JSON
49bc4992
     if ((act->pdf_stats_cb))
         act->pdf_stats_cb(pdf, obj, act);
63803da5
 #endif
49bc4992
 
15a8a022
     if (act->from_state == *state || act->from_state == STATE_ANY) {
         *state = act->to_state;
 
c8ba4ae2
         if (*state == STATE_FILTER && act->set_objflag != OBJ_DICT && (obj->flags & (1 << act->set_objflag))) {
15a8a022
             cli_dbgmsg("cli_pdf: duplicate stream filter %s\n", pdfname);
             pdfobj_flag(pdf, obj, BAD_STREAM_FILTERS);
         }
 
         obj->flags |= 1 << act->set_objflag;
6c135eb4
     } else {
15a8a022
         /* auto-reset states */
         switch (*state) {
         case STATE_S:
             *state = STATE_NONE;
             break;
         default:
             break;
         }
6c135eb4
     }
 }
 
3f8016ce
 static int pdf_readint(const char *q0, int len, const char *key);
bbfad9ba
 
 static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len)
 {
     const char *q, *q2;
bf6e777f
     unsigned long objid;
     unsigned long genid;
bbfad9ba
 
     if (len >= 16 && !strncmp(enc, "/EncryptMetadata", 16)) {
15a8a022
         q = cli_memstr(enc+16, len-16, "/Encrypt", 8);
         if (!q)
             return;
 
         len -= q - enc;
         enc = q;
bbfad9ba
     }
15a8a022
 
bbfad9ba
     q = enc + 8;
     len -= 8;
     q2 = pdf_nextobject(q, len);
     if (!q2 || !isdigit(*q2))
15a8a022
         return;
bbfad9ba
     len -= q2 - q;
     q = q2;
53cbdee3
 
bf6e777f
     if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)len, 0, 10, (long*)&objid)) {
         cli_dbgmsg("cli_pdf: Found Encrypt dictionary but failed to parse objid\n");
         return;
     }
     objid = objid << 8;
bbfad9ba
     q2 = pdf_nextobject(q, len);
     if (!q2 || !isdigit(*q2))
15a8a022
         return;
bbfad9ba
     len -= q2 - q;
     q = q2;
53cbdee3
 
bf6e777f
     if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)len, 0, 10, (long*)&genid)) {
         cli_dbgmsg("cli_pdf: Found Encrypt dictionary but failed to parse genid\n");
         return;
     }
     objid |= genid & 0xff; 
bbfad9ba
     q2 = pdf_nextobject(q, len);
     if (!q2 || *q2 != 'R')
15a8a022
         return;
 
bf6e777f
     cli_dbgmsg("cli_pdf: Encrypt dictionary in obj %lu %lu\n", objid>>8, objid&0xff);
15a8a022
 
bbfad9ba
     pdf->enc_objid = objid;
 }
 
 static void pdf_parse_trailer(struct pdf_struct *pdf, const char *s, long length)
 {
     const char *enc;
15a8a022
 
bbfad9ba
     enc = cli_memstr(s, length, "/Encrypt", 8);
     if (enc) {
15a8a022
         char *newID;
 
         pdf->flags |= 1 << ENCRYPTED_PDF;
         pdf_parse_encrypt(pdf, enc, s + length - enc);
         newID = pdf_readstring(s, length, "/ID", &pdf->fileIDlen, NULL, 0);
 
         if (newID) {
             free(pdf->fileID);
             pdf->fileID = newID;
         }
bbfad9ba
     }
 }
 
930b9395
 void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
6c135eb4
 {
     /* enough to hold common pdf names, we don't need all the names */
     char pdfname[64];
8d1ef133
     const char *q2, *q3;
c9a070c9
     const char *nextobj = NULL, *nextopen = NULL, *nextclose = NULL;
6c135eb4
     const char *q = obj->start + pdf->map;
8d1ef133
     const char *dict, *enddict, *start;
0fe3b769
     off_t dict_length, full_dict_length;
8d1ef133
     off_t objsize = obj_size(pdf, obj, 1);
     off_t bytesleft;
d18d7221
     size_t i;
     unsigned filters=0, blockopens=0;
6c135eb4
     enum objstate objstate = STATE_NONE;
fdfd5814
 #if HAVE_JSON
     json_object *pdfobj=NULL, *jsonobj=NULL;
 #endif
6c135eb4
 
8d1ef133
     if (objsize < 0)
15a8a022
         return;
 
6c135eb4
     start = q;
8d1ef133
     bytesleft = objsize;
 
6c135eb4
     /* find start of dictionary */
     do {
15a8a022
         nextobj = pdf_nextobject(q, bytesleft);
         bytesleft -= nextobj -q;
 
         if (!nextobj || bytesleft < 0) {
             cli_dbgmsg("cli_pdf: %u %u obj: no dictionary\n", obj->id>>8, obj->id&0xff);
fdfd5814
 #if HAVE_JSON
             if (!(pdfobj) && pdf->ctx->wrkproperty != NULL) {
                 pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
                 if (!(pdfobj))
                     return;
             }
 
             if (pdfobj) {
                 if (!(jsonobj))
                     jsonobj = cli_jsonarray(pdfobj, "ObjectsWithoutDictionaries");
                 if (jsonobj)
                     cli_jsonint_array(jsonobj, obj->id>>8);
             }
 #endif
15a8a022
             return;
         }
 
         q3 = memchr(q-1, '<', nextobj-q+1);
         nextobj++;
         bytesleft--;
         q = nextobj;
6c135eb4
     } while (!q3 || q3[1] != '<');
     dict = q3+2;
     q = dict;
8d1ef133
     blockopens++;
     bytesleft = objsize - (q - start);
4c19109d
     enddict = q + bytesleft - 1;
8d1ef133
 
     /* find end of dictionary block */
4c19109d
     if (bytesleft < 0) {
5b574c47
         cli_dbgmsg("cli_pdf: %u %u obj: broken dictionary\n", obj->id>>8, obj->id&0xff);
fdfd5814
 #if HAVE_JSON
         if (!(pdfobj) && pdf->ctx->wrkproperty != NULL) {
             pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
             if (!(pdfobj))
                 return;
         }
 
         if (pdfobj) {
             if (!(jsonobj))
                 jsonobj = cli_jsonarray(pdfobj, "ObjectsWithBrokenDictionaries");
             if (jsonobj)
                 cli_jsonint_array(jsonobj, obj->id>>8);
         }
 #endif
4c19109d
         return;
     }
8d1ef133
 
4c19109d
     /* while still looking ... */
     while ((q < enddict-1) && (blockopens > 0)) {
         /* find next close */
bc52aff8
         nextclose = memchr(q, '>', enddict-q);
4c19109d
         if (nextclose && (nextclose[1] == '>')) {
             /* check for nested open */
374b5aea
             while ((nextopen = memchr(q-1, '<', nextclose-q+1)) != NULL) {
4c19109d
                 if (nextopen[1] == '<') {
                     /* nested open */
                     blockopens++;
                     q = nextopen + 2;
8d1ef133
                 }
                 else {
4c19109d
                     /* unmatched < before next close */
                     q = nextopen + 2;
8d1ef133
                 }
             }
4c19109d
             /* close block */
             blockopens--;
             q = nextclose + 2;
8d1ef133
         }
4c19109d
         else if (nextclose) {
             /* found one > but not two */
             q = nextclose + 2;
         }
         else {
             /* next closing not found */
bc52aff8
             break;
4c19109d
         }
     }
8d1ef133
 
4c19109d
     /* Was end of dictionary found? */
bc52aff8
     if (blockopens) {
         /* probably truncated */
         cli_dbgmsg("cli_pdf: %u %u obj broken dictionary\n", obj->id>>8, obj->id&0xff);
fdfd5814
 #if HAVE_JSON
         if (!(pdfobj) && pdf->ctx->wrkproperty != NULL) {
             pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
             if (!(pdfobj))
                 return;
         }
 
         if (pdfobj) {
             if (!(jsonobj))
                 jsonobj = cli_jsonarray(pdfobj, "ObjectsWithBrokenDictionaries");
             if (jsonobj)
                 cli_jsonint_array(jsonobj, obj->id>>8);
         }
 #endif
4c19109d
         return;
bc52aff8
     }
15a8a022
 
8d1ef133
     enddict = nextclose;
6c135eb4
     obj->flags |= 1 << OBJ_DICT;
8d1ef133
     full_dict_length = dict_length = enddict - dict;
 
     /* This code prints the dictionary content.
     {
         char * dictionary = malloc(dict_length + 1);
         if (dictionary) {
4c19109d
             for (i = 0; i < dict_length; i++) {
115b6306
                 if (dict[i] == '\r')
                     dictionary[i] = '\n';
                 else if (isprint(dict[i]) || isspace(dict[i]))
4c19109d
                     dictionary[i] = dict[i];
                 else
                     dictionary[i] = '*';
             }
8d1ef133
             dictionary[dict_length] = '\0';
             cli_dbgmsg("cli_pdf: dictionary is <<%s>>\n", dictionary);
             free(dictionary);
         }
     }
     */
6c135eb4
 
9c617dbe
     /*  process pdf names */
5af966d3
     for (q = dict;dict_length > 0;) {
15a8a022
         int escapes = 0, breakout=0;
         q2 = memchr(q, '/', dict_length);
         if (!q2)
             break;
 
         dict_length -= q2 - q;
         q = q2;
         /* normalize PDF names */
         for (i = 0;dict_length > 0 && (i < sizeof(pdfname)-1); i++) {
             q++;
             dict_length--;
 
             if (*q == '#') {
                 if (cli_hex2str_to(q+1, pdfname+i, 2) == -1)
                     break;
 
                 q += 2;
                 dict_length -= 2;
                 escapes = 1;
                 continue;
             }
 
             switch (*q) {
             case ' ':
             case '\t':
             case '\r':
             case '\n':
             case '/':
             case '>':
             case '[':
             case ']':
             case '<':
             case '(':
                 breakout = 1;
             }
 
             if (breakout)
                 break;
 
             pdfname[i] = *q;
         }
 
         pdfname[i] = '\0';
 
         handle_pdfname(pdf, obj, pdfname, escapes, &objstate);
         if (objstate == STATE_LINEARIZED) {
             long trailer_end, trailer;
 
             pdfobj_flag(pdf, obj, LINEARIZED_PDF);
             objstate = STATE_NONE;
             trailer_end = pdf_readint(dict, full_dict_length, "/H");
             if (trailer_end > 0 && trailer_end < pdf->size) {
                 trailer = trailer_end - 1024;
                 if (trailer < 0)
                     trailer = 0;
 
                 q2 = pdf->map + trailer;
                 cli_dbgmsg("cli_pdf: looking for trailer in linearized pdf: %ld - %ld\n", trailer, trailer_end);
                 pdf_parse_trailer(pdf, q2, trailer_end - trailer);
                 if (pdf->fileID)
                     cli_dbgmsg("cli_pdf: found fileID\n");
             }
         }
 
         if (objstate == STATE_LAUNCHACTION)
             pdfobj_flag(pdf, obj, HAS_LAUNCHACTION);
         if (dict_length > 0 && (objstate == STATE_JAVASCRIPT || objstate == STATE_OPENACTION || objstate == STATE_CONTENTS)) {
69b4a223
             off_t dict_remaining = dict_length;
 
15a8a022
             if (objstate == STATE_OPENACTION)
                 pdfobj_flag(pdf, obj, HAS_OPENACTION);
 
69b4a223
             q2 = pdf_nextobject(q, dict_remaining);
15a8a022
             if (q2 && isdigit(*q2)) {
69b4a223
                 const char * q2_old = NULL;
bf6e777f
                 unsigned long objid;
                 unsigned long genid;
 
69b4a223
                 dict_remaining -= (off_t)(q2 - q);
 
bf6e777f
                 if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)dict_remaining, 0, 10, (long*)&objid)) {
                     cli_dbgmsg("cli_pdf: failed to parse object objid\n");
                     return;
                 }
                 objid = objid << 8;
 
15a8a022
                 while (isdigit(*q2))
                     q2++;
 
69b4a223
                 q2_old = q2;
                 q2 = pdf_nextobject(q2, dict_remaining);
15a8a022
                 if (q2 && isdigit(*q2)) {
69b4a223
                     dict_remaining -= (off_t)(q2 - q2_old);
bf6e777f
                     if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)dict_remaining, 0, 10, (long*)&genid)) {
                         cli_dbgmsg("cli_pdf: failed to parse object genid\n");
                         return;
                     }
                     objid |= genid & 0xff;
15a8a022
 
69b4a223
                     q2 = pdf_nextobject(q2, dict_remaining);
15a8a022
                     if (q2 && *q2 == 'R') {
                         struct pdf_obj *obj2;
 
bf6e777f
                         cli_dbgmsg("cli_pdf: found %s stored in indirect object %lu %lu\n", pdfname, objid >> 8, objid&0xff);
15a8a022
                         obj2 = find_obj(pdf, obj, objid);
                         if (obj2) {
                             enum pdf_objflags flag =
                                 objstate == STATE_JAVASCRIPT ? OBJ_JAVASCRIPT :
                                 objstate == STATE_OPENACTION ? OBJ_OPENACTION :
 
                             OBJ_CONTENTS;
                             obj2->flags |= 1 << flag;
                             obj->flags &= ~(1 << flag);
                         } else {
                             pdfobj_flag(pdf, obj, BAD_INDOBJ);
                         }
                     }
                 }
             }
 
             objstate = STATE_NONE;
         }
6c135eb4
     }
15a8a022
 
9acc81d6
     for (i=0;i<sizeof(pdfname_actions)/sizeof(pdfname_actions[0]);i++) {
15a8a022
         const struct pdfname_action *act = &pdfname_actions[i];
 
         if ((obj->flags & (1 << act->set_objflag)) &&
             act->from_state == STATE_FILTER &&
             act->to_state == STATE_FILTER &&
             act->set_objflag != OBJ_FILTER_CRYPT &&
             act->set_objflag != OBJ_FILTER_STANDARD) {
             filters++;
         }
9acc81d6
     }
15a8a022
 
     if (filters > 2) {
         /* more than 2 non-crypt filters */
         pdfobj_flag(pdf, obj, MANY_FILTERS);
9acc81d6
     }
15a8a022
 
b835a528
     if (obj->flags & ((1 << OBJ_SIGNED) | KNOWN_FILTERS))
15a8a022
         obj->flags &= ~(1 << OBJ_FILTER_UNKNOWN);
 
b835a528
     if (obj->flags & (1 << OBJ_FILTER_UNKNOWN))
15a8a022
         pdfobj_flag(pdf, obj, UNKNOWN_FILTER);
 
6c135eb4
     cli_dbgmsg("cli_pdf: %u %u obj flags: %02x\n", obj->id>>8, obj->id&0xff, obj->flags);
 }
 
a79be759
 /**
  * @brief   Given a pointer to a dictionary object and a key, get the key's value.
  *
  * @param q0            Offset of the start of the dictionary.
  * @param[in,out] len   In: The number of bytes in the dictionary.
  *                      Out: The number of bytes remaining from the start
  *                           of the value to the end of the dict
  * @param key           Null terminated 'key' to search for.
  * @return const char*  Address of the dictionary key's 'value'.
  */
7606789f
 static const char *pdf_getdict(const char *q0, int* len, const char *key)
 {
     const char *q;
 
82c0e6bc
     if (*len <= 0) {
15a8a022
         cli_dbgmsg("cli_pdf: bad length %d\n", *len);
4c19109d
         return NULL;
     }
15a8a022
 
     if (!q0)
         return NULL;
 
a79be759
     /* find the key */
7606789f
     q = cli_memstr(q0, *len, key, strlen(key));
     if (!q) {
15a8a022
         cli_dbgmsg("cli_pdf: %s not found in dict\n", key);
         return NULL;
7606789f
     }
15a8a022
 
7606789f
     *len -= q - q0;
     q0 = q;
a79be759
 
     /* find the start of the value object */
7606789f
     q = pdf_nextobject(q0 + 1, *len - 1);
     if (!q) {
15a8a022
         cli_dbgmsg("cli_pdf: %s is invalid in dict\n", key);
         return NULL;
7606789f
     }
15a8a022
 
a79be759
     /* if the value is a dictionary object, include the < > brackets.*/
7606789f
     if (q[-1] == '<')
15a8a022
         q--;
 
7606789f
     *len -= q - q0;
     return q;
 }
 
fb0c9fa2
 static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen, const char **qend, int noescape)
7606789f
 {
     char *s, *s0;
     const char *start, *q, *end;
     if (slen)
15a8a022
         *slen = 0;
 
fb0c9fa2
     if (qend)
         *qend = q0;
15a8a022
 
7606789f
     q = pdf_getdict(q0, &len, key);
a79be759
     if (!q || len <= 0)
15a8a022
         return NULL;
 
7606789f
     if (*q == '(') {
15a8a022
         int paren = 1;
         start = ++q;
a79be759
         len--;
15a8a022
         for (;paren > 0 && len > 0; q++,len--) {
             switch (*q) {
             case '(':
                 paren++;
                 break;
             case ')':
                 paren--;
                 break;
             case '\\':
                 q++;
                 len--;
                 break;
             default:
                 break;
             }
         }
 
a79be759
         if (len <= 0) {
             cli_errmsg("pdf_readstring: Invalid, truncated dictionary.\n");
             return NULL;
         }
 
fb0c9fa2
         if (qend)
             *qend = q;
15a8a022
 
         q--;
         len  = q - start;
         s0 = s = cli_malloc(len + 1);
         if (!s) {
             cli_errmsg("pdf_readstring: Unable to allocate buffer\n");
             return NULL;
         }
 
         end = start + len;
fb0c9fa2
         if (noescape) {
             memcpy(s0, start, len);
             s = s0 + len;
         } else {
15a8a022
             for (q = start;q < end;q++) {
                 if (*q != '\\') {
                     *s++ = *q;
                 } else {
                     q++;
                     switch (*q) {
                     case 'n':
                         *s++ = '\n';
                         break;
                     case 'r':
                         *s++ = '\r';
                         break;
                     case 't':
                         *s++ = '\t';
                         break;
                     case 'b':
                         *s++ = '\b';
                         break;
                     case 'f':
                         *s++ = '\f';
                         break;
                     case '(':/* fall-through */
                     case ')':/* fall-through */
                     case '\\':
                         *s++ = *q;
                         break;
                     case '\n':
                         /* ignore */
                         break;
                     case '\r':
                         /* ignore */
                         if (q+1 < end && q[1] == '\n')
                             q++;
                         break;
                     case '0':
                     case '1':
                     case '2':
                     case '3':
                     case '4':
                     case '5':
                     case '6':
                     case '7':
                     case '8':
                     case '9':
                         /* octal escape */
                         if (q+2 < end)
                             q++;
 
                         *s++ = 64*(q[0] - '0') + 8*(q[1] - '0') + (q[2] - '0');
                         break;
                     default:
                         /* ignore */
fb0c9fa2
                         *s++ = '\\';
                         q--;
15a8a022
                         break;
                     }
                 }
             }
fb0c9fa2
         }
15a8a022
 
         *s++ = '\0';
         if (slen)
             *slen = s - s0 - 1;
 
         return s0;
7606789f
     }
15a8a022
 
7606789f
     if (*q == '<') {
15a8a022
         start = ++q;
         q = memchr(q+1, '>', len);
         if (!q)
             return NULL;
 
fb0c9fa2
         if (qend)
             *qend = q;
15a8a022
 
         s = cli_malloc((q - start)/2 + 1);
         if (s == NULL) { /* oops, couldn't allocate memory */
           cli_dbgmsg("cli_pdf: unable to allocate memory...\n");
           return NULL;
         }
 
         if (cli_hex2str_to(start, s, q - start)) {
             cli_dbgmsg("cli_pdf: %s has bad hex value\n", key);
             free(s);
             return NULL;
         }
 
         s[(q-start)/2] = '\0';
         if (slen)
             *slen = (q - start)/2;
 
         return s;
7606789f
     }
15a8a022
 
7606789f
     cli_dbgmsg("cli_pdf: %s is invalid string in dict\n", key);
     return NULL;
 }
 
374be101
 static char *pdf_readval(const char *q, int len, const char *key)
 {
     const char *end;
     char *s;
09365897
     int origlen = len;
374be101
 
     q = pdf_getdict(q, &len, key);
     if (!q || len <= 0)
15a8a022
         return NULL;
 
     while (len > 0 && *q && *q == ' ') {
         q++;
         len--;
     }
 
374be101
     if (*q != '/')
15a8a022
         return NULL;
 
374be101
     q++;
     len--;
     end = q;
15a8a022
 
374be101
     while (len > 0 && *end && !(*end == '/' || (len > 1 && end[0] == '>' && end[1] == '>'))) {
15a8a022
         end++;
         len--;
374be101
     }
15a8a022
 
09365897
     /* end-of-buffer whitespace trimming */
     while (len < origlen && isspace(*(end-1))) {
         end--;
         len++;
     }
 
374be101
     s = cli_malloc(end - q + 1);
     if (!s)
15a8a022
         return NULL;
 
374be101
     memcpy(s, q, end-q);
     s[end-q] = '\0';
15a8a022
 
374be101
     return s;
 }
 
7606789f
 static int pdf_readint(const char *q0, int len, const char *key)
 {
bf6e777f
     long value = 0;
7606789f
     const char *q  = pdf_getdict(q0, &len, key);
15a8a022
 
bf6e777f
     if (q == NULL) {
         value = -1;
     }
     else if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)len, 0, 10, &value)) {
         value = -1;
     }
     return value;
7606789f
 }
 
 static int pdf_readbool(const char *q0, int len, const char *key, int Default)
 {
     const char *q  = pdf_getdict(q0, &len, key);
15a8a022
 
7606789f
     if (!q || len < 5)
15a8a022
         return Default;
 
7606789f
     if (!strncmp(q, "true", 4))
15a8a022
         return 1;
 
7606789f
     if (!strncmp(q, "false", 5))
15a8a022
         return 0;
 
7606789f
     cli_dbgmsg("cli_pdf: invalid value for %s bool\n", key);
15a8a022
 
7606789f
     return Default;
 }
 
 static const char *key_padding =
 "\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4e\x56\xff\xfa\x01\x08"
 "\x2e\x2e\x00\xB6\xD0\x68\x3E\x80\x2F\x0C\xA9\xFE\x64\x53\x69\x7A";
 
 static void dbg_printhex(const char *msg, const char *hex, unsigned len)
 {
     if (cli_debug_flag) {
15a8a022
         char *kh = cli_str2hex(hex, len);
 
         cli_dbgmsg("cli_pdf: %s: %s\n", msg, kh);
 
         free(kh);
7606789f
     }
 }
 
 static void check_user_password(struct pdf_struct *pdf, int R, const char *O,
53cbdee3
                 const char *U, int32_t P, int EM,
                 const char *UE,
                 unsigned length, unsigned oulen)
7606789f
 {
     unsigned i;
     uint8_t result[16];
     char data[32];
     struct arc4_state arc4;
     unsigned password_empty = 0;
 
cd94be7a
     UNUSEDPARAM(oulen);
 
7606789f
     dbg_printhex("U: ", U, 32);
     dbg_printhex("O: ", O, 32);
     if (R == 5) {
15a8a022
         uint8_t result2[32];
 
         /* supplement to ISO3200, 3.5.2 Algorithm 3.11 */
         /* user validation salt */
         cl_sha256(U+32, 8, result2, NULL);
cd94be7a
         dbg_printhex("Computed U", (const char *)result2, 32);
15a8a022
         if (!memcmp(result2, U, 32)) {
e09d8843
             size_t UE_len;
15a8a022
 
             /* Algorithm 3.2a could be used to recover encryption key */
             password_empty = 1;
             cl_sha256(U+40, 8, result2, NULL);
e09d8843
             UE_len = UE ? strlen(UE) : 0;
             if (UE_len != 32) {
                 cli_dbgmsg("cli_pdf: UE length is not 32: %zu\n", UE_len);
                 noisy_warnmsg("cli_pdf: UE length is not 32: %zu\n", UE_len);
15a8a022
             } else {
                 pdf->keylen = 32;
                 pdf->key = cli_malloc(32);
                 if (!pdf->key) {
                     cli_errmsg("check_user_password: Cannot allocate memory for pdf->key\n");
                     return;
                 }
 
e09d8843
                 aes_decrypt((const unsigned char *)UE, &UE_len, (unsigned char *)(pdf->key), (char *)result2, 32, 0);
15a8a022
                 dbg_printhex("cli_pdf: Candidate encryption key", pdf->key, pdf->keylen);
             }
241e7eb1
         }
374b5aea
     } else if ((R >= 2) && (R <= 4)) {
b2e7c931
         unsigned char *d;
         size_t sz = 68 + pdf->fileIDlen + (R >= 4 && !EM ? 4 : 0);
         d = calloc(1, sz);
 
         if (!(d))
             return;
 
         memcpy(d, key_padding, 32);
         memcpy(d+32, O, 32);
         P = le32_to_host(P);
         memcpy(d+64, &P, 4);
         memcpy(d+68, pdf->fileID, pdf->fileIDlen);
 
15a8a022
         /* 7.6.3.3 Algorithm 2 */
         /* empty password, password == padding */
         if (R >= 4 && !EM) {
             uint32_t v = 0xFFFFFFFF;
             memcpy(d+68+pdf->fileIDlen, &v, 4);
         }
 
         cl_hash_data("md5", d, sz, result, NULL);
         free(d);
         if (length > 128)
             length = 128;
         if (R >= 3) {
             /* Yes, this really is on purpose */
             for (i=0;i<50;i++)
                 cl_hash_data("md5", result, length/8, result, NULL);
         }
         if (R == 2)
             length = 40;
 
         pdf->keylen = length / 8;
         pdf->key = cli_malloc(pdf->keylen);
         if (!pdf->key)
b2e7c931
             return;
 
15a8a022
         memcpy(pdf->key, result, pdf->keylen);
cd94be7a
         dbg_printhex("md5", (const char *)result, 16);
15a8a022
         dbg_printhex("Candidate encryption key", pdf->key, pdf->keylen);
 
         /* 7.6.3.3 Algorithm 6 */
         if (R == 2) {
             /* 7.6.3.3 Algorithm 4 */
             memcpy(data, key_padding, 32);
cd94be7a
             arc4_init(&arc4, (const uint8_t *)(pdf->key), pdf->keylen);
             arc4_apply(&arc4, (uint8_t *)data, 32);
15a8a022
             dbg_printhex("computed U (R2)", data, 32);
             if (!memcmp(data, U, 32))
                 password_empty = 1;
         } else if (R >= 3) {
             unsigned len = pdf->keylen;
             unsigned char *d;
 
             d = calloc(1, 32 + pdf->fileIDlen);
             if (!(d))
                 return;
 
             /* 7.6.3.3 Algorithm 5 */
             memcpy(d, key_padding, 32);
             memcpy(d+32, pdf->fileID, pdf->fileIDlen);
             cl_hash_data("md5", d, 32 + pdf->fileIDlen, result, NULL);
             memcpy(data, pdf->key, len);
 
cd94be7a
             arc4_init(&arc4, (const uint8_t *)data, len);
15a8a022
             arc4_apply(&arc4, result, 16);
             for (i=1;i<=19;i++) {
                 unsigned j;
 
                 for (j=0;j<len;j++)
                     data[j] = pdf->key[j] ^ i;
 
cd94be7a
                 arc4_init(&arc4, (const uint8_t *)data, len);
15a8a022
                 arc4_apply(&arc4, result, 16);
             }
 
             dbg_printhex("fileID", pdf->fileID, pdf->fileIDlen);
cd94be7a
             dbg_printhex("computed U (R>=3)", (const char *)result, 16);
15a8a022
             if (!memcmp(result, U, 16))
                 password_empty = 1;
15fce6e0
             free(d);
15a8a022
         } else {
             cli_dbgmsg("cli_pdf: invalid revision %d\n", R);
             noisy_warnmsg("cli_pdf: invalid revision %d\n", R);
         }
     } else {
         /* Supported R is in {2,3,4,5} */
         cli_dbgmsg("cli_pdf: R value out of range\n");
         noisy_warnmsg("cli_pdf: R value out of range\n");
 
         return;
374b5aea
     }
15a8a022
 
7606789f
     if (password_empty) {
15a8a022
         cli_dbgmsg("cli_pdf: user password is empty\n");
         noisy_msg(pdf, "cli_pdf: encrypted PDF found, user password is empty, will attempt to decrypt\n");
         /* The key we computed above is the key used to encrypt the streams.
          * We could decrypt it now if we wanted to */
         pdf->flags |= 1 << DECRYPTABLE_PDF;
7606789f
     } else {
15a8a022
         /* the key is not valid, we would need the user or the owner password to decrypt */
         cli_dbgmsg("cli_pdf: user/owner password would be required for decryption\n");
         noisy_warnmsg("cli_pdf: encrypted PDF found, user password is NOT empty, cannot decrypt!\n");
7606789f
     }
 }
 
1d0cdc67
 enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key, enum enc_method def)
bcc68567
 {
     const char *q;
     char *CFM = NULL;
884b2e73
     enum enc_method ret = ENC_UNKNOWN;
15a8a022
 
bcc68567
     if (!key)
15a8a022
         return def;
 
bcc68567
     if (!strcmp(key, "Identity"))
15a8a022
         return ENC_IDENTITY;
 
cd94be7a
     q = pdf_getdict(dict, (int *)(&len), key);
bcc68567
     if (!q)
15a8a022
         return def;
 
bcc68567
     CFM = pdf_readval(q, len, "/CFM");
     if (CFM) {
15a8a022
         cli_dbgmsg("cli_pdf: %s CFM: %s\n", key, CFM);
         if (!strncmp(CFM,"V2", 2))
             ret = ENC_V2;
         else if (!strncmp(CFM,"AESV2",5))
             ret = ENC_AESV2;
         else if (!strncmp(CFM,"AESV3",5))
             ret = ENC_AESV3;
         else if (!strncmp(CFM,"None",4))
             ret = ENC_NONE;
 
         free(CFM);
bcc68567
     }
15a8a022
 
884b2e73
     return ret;
bcc68567
 }
 
e2b1880f
 void pdf_handle_enc(struct pdf_struct *pdf)
7606789f
 {
     struct pdf_obj *obj;
cd94be7a
     uint32_t len, n, R, P, length, EM = 1, i, oulen;
bcc68567
     char *O, *U, *UE, *StmF, *StrF, *EFF;
7606789f
     const char *q, *q2;
 
3f8016ce
     if (pdf->enc_objid == ~0u)
15a8a022
         return;
3f8016ce
     if (!pdf->fileID) {
15a8a022
         cli_dbgmsg("cli_pdf: pdf_handle_enc no file ID\n");
         noisy_warnmsg("cli_pdf: pdf_handle_enc no file ID\n");
         return;
3f8016ce
     }
15a8a022
 
3f8016ce
     obj = find_obj(pdf, pdf->objs, pdf->enc_objid);
     if (!obj) {
15a8a022
         cli_dbgmsg("cli_pdf: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff);
         noisy_warnmsg("cli_pdf: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff);
         return;
3f8016ce
     }
15a8a022
 
7606789f
     len = obj_size(pdf, obj, 1);
     q = pdf->map + obj->start;
 
bcc68567
     O = U = UE = StmF = StrF = EFF = NULL;
7606789f
     do {
374be101
 
15a8a022
         pdf->enc_method_string = ENC_UNKNOWN;
         pdf->enc_method_stream = ENC_UNKNOWN;
         pdf->enc_method_embeddedfile = ENC_UNKNOWN;
         P = pdf_readint(q, len, "/P");
         if (P == ~0u) {
             cli_dbgmsg("cli_pdf: invalid P\n");
             noisy_warnmsg("cli_pdf: invalid P\n");
             break;
         }
7606789f
 
15a8a022
         q2 = cli_memstr(q, len, "/Standard", 9);
         if (!q2) {
             cli_dbgmsg("cli_pdf: /Standard not found\n");
             noisy_warnmsg("cli_pdf: /Standard not found\n");
             break;
         }
7606789f
 
15a8a022
         /* we can have both of these:
         * /AESV2/Length /Standard/Length
         * /Length /Standard
         * make sure we don't mistake AES's length for Standard's */
         length = pdf_readint(q2, len - (q2 - q), "/Length");
         if (length == ~0u)
             length = pdf_readint(q, len, "/Length");
 
         if (length < 40) {
             cli_dbgmsg("cli_pdf: invalid length: %d\n", length);
             length = 40;
         }
7606789f
 
15a8a022
         R = pdf_readint(q, len, "/R");
         if (R == ~0u) {
             cli_dbgmsg("cli_pdf: invalid R\n");
             noisy_warnmsg("cli_pdf: invalid R\n");
             break;
         }
7606789f
 
15a8a022
         if ((R > 5) || (R < 2)) {
             cli_dbgmsg("cli_pdf: R value outside supported range [2..5]\n");
             noisy_warnmsg("cli_pdf: R value outside supported range [2..5]\n");
             break;
         }
 
         if (R < 5)
             oulen = 32;
         else
             oulen = 48;
 
         if (R == 2 || R == 3) {
             pdf->enc_method_stream = ENC_V2;
             pdf->enc_method_string = ENC_V2;
             pdf->enc_method_embeddedfile = ENC_V2;
         } else if (R == 4 || R == 5) {
             EM = pdf_readbool(q, len, "/EncryptMetadata", 1);
             StmF = pdf_readval(q, len, "/StmF");
             StrF = pdf_readval(q, len, "/StrF");
             EFF = pdf_readval(q, len, "/EFF");
             n = len;
cd94be7a
             pdf->CF = pdf_getdict(q, (int *)(&n), "/CF");
15a8a022
             pdf->CF_n = n;
 
             if (StmF)
                 cli_dbgmsg("cli_pdf: StmF: %s\n", StmF);
             if (StrF)
                 cli_dbgmsg("cli_pdf: StrF: %s\n", StrF);
             if (EFF)
                 cli_dbgmsg("cli_pdf: EFF: %s\n", EFF);
 
             pdf->enc_method_stream = parse_enc_method(pdf->CF, n, StmF, ENC_IDENTITY);
             pdf->enc_method_string = parse_enc_method(pdf->CF, n, StrF, ENC_IDENTITY);
             pdf->enc_method_embeddedfile = parse_enc_method(pdf->CF, n, EFF, pdf->enc_method_stream);
 
             free(StmF);
             free(StrF);
             free(EFF);
 
             cli_dbgmsg("cli_pdf: EncryptMetadata: %s\n", EM ? "true" : "false");
 
             if (R == 4) {
                 length = 128;
             } else {
                 n = 0;
                 UE = pdf_readstring(q, len, "/UE", &n, NULL, 0);
                 length = 256;
             }
         }
 
         if (length == ~0u)
             length = 40;
 
         n = 0;
         O = pdf_readstring(q, len, "/O", &n, NULL, 0);
         if (!O || n < oulen) {
             cli_dbgmsg("cli_pdf: invalid O: %d\n", n);
             cli_dbgmsg("cli_pdf: invalid O: %d\n", n);
             if (O)
                 dbg_printhex("invalid O", O, n);
 
             break;
         }
         if (n > oulen) {
             for (i=oulen;i<n;i++)
                 if (O[i])
                     break;
 
             if (i != n) {
                 dbg_printhex("too long O", O, n);
e09d8843
                 noisy_warnmsg("too long O: %u", n);
15a8a022
                 break;
             }
         }
 
         n = 0;
         U = pdf_readstring(q, len, "/U", &n, NULL, 0);
         if (!U || n < oulen) {
e09d8843
             cli_dbgmsg("cli_pdf: invalid U: %u\n", n);
             noisy_warnmsg("cli_pdf: invalid U: %u\n", n);
15a8a022
 
             if (U)
                 dbg_printhex("invalid U", U, n);
 
             break;
         }
 
         if (n > oulen) {
             for (i=oulen;i<n;i++)
                 if (U[i])
                     break;
             if (i != n) {
                 dbg_printhex("too long U", U, n);
                 break;
             }
         }
 
e09d8843
         cli_dbgmsg("cli_pdf: Encrypt R: %d, P %x, length: %u\n", R, P, length);
15a8a022
         if (length % 8) {
             cli_dbgmsg("cli_pdf: wrong key length, not multiple of 8\n");
             noisy_warnmsg("cli_pdf: wrong key length, not multiple of 8\n");
             break;
         }
         check_user_password(pdf, R, O, U, P, EM, UE, length, oulen);
7606789f
     } while (0);
15a8a022
 
7606789f
     free(O);
     free(U);
bbfad9ba
     free(UE);
7606789f
 }
 
e7a27135
 int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
 {
     struct pdf_struct pdf;
     fmap_t *map = *ctx->fmap;
     size_t size = map->len - offset;
     off_t versize = size > 1032 ? 1032 : size;
     off_t map_off, bytesleft;
     long xref;
53c957a9
     const char *pdfver, *tmp, *start, *eofmap, *q, *eof;
f4819816
     int rc, badobjects = 0;
693757a1
     unsigned i, alerts = 0;
930b9395
 #if HAVE_JSON
     json_object *pdfobj=NULL;
8ec930c3
     char *begin, *end, *p1;
930b9395
 #endif
e7a27135
 
     cli_dbgmsg("in cli_pdf(%s)\n", dir);
     memset(&pdf, 0, sizeof(pdf));
3643f3d2
     pdf.ctx = ctx;
     pdf.dir = dir;
7606789f
     pdf.enc_objid = ~0u;
e7a27135
 
     pdfver = start = fmap_need_off_once(map, offset, versize);
 
     /* Check PDF version */
     if (!pdfver) {
15a8a022
         cli_errmsg("cli_pdf: mmap() failed (1)\n");
         return CL_EMAP;
e7a27135
     }
15a8a022
 
930b9395
 #if HAVE_JSON
     if (ctx->wrkproperty)
         pdfobj = cli_jsonobj(ctx->wrkproperty, "PDFStats");
 #endif
 
e7a27135
     /* offset is 0 when coming from filetype2 */
53c957a9
     tmp = cli_memstr(pdfver, versize, "%PDF-", 5);
     if (!tmp) {
15a8a022
         cli_dbgmsg("cli_pdf: no PDF- header found\n");
         noisy_warnmsg("cli_pdf: no PDF- header found\n");
63803da5
 #if HAVE_JSON
69080de3
         pdf_export_json(&pdf);
63803da5
 #endif
15a8a022
         return CL_SUCCESS;
e7a27135
     }
15a8a022
 
53c957a9
     versize -= tmp - pdfver;
     pdfver = tmp;
 
     if (versize < 8) {
         return CL_EFORMAT;
     }
 
15a8a022
     /* Check for PDF-1.[0-9]. Although 1.7 is highest now, allow for future versions */
e7a27135
     if (pdfver[5] != '1' || pdfver[6] != '.' ||
15a8a022
         pdfver[7] < '1' || pdfver[7] > '9') {
         pdf.flags |= 1 << BAD_PDF_VERSION;
         cli_dbgmsg("cli_pdf: bad pdf version: %.8s\n", pdfver);
930b9395
 #if HAVE_JSON
         if (pdfobj)
             cli_jsonbool(pdfobj, "BadVersion", 1);
 #endif
8ec930c3
     } else {
 #if HAVE_JSON
         if (pdfobj) {
             begin = (char *)(pdfver+5);
             end = begin+2;
             strtoul(end, &end, 10);
             p1 = cli_calloc((end - begin) + 2, 1);
             if (p1) {
                 strncpy(p1, begin, end - begin);
                 p1[end - begin] = '\0';
                 cli_jsonstr(pdfobj, "PDFVersion", p1);
                 free(p1);
             }
         }
 #endif
e7a27135
     }
15a8a022
 
e7a27135
     if (pdfver != start || offset) {
15a8a022
         pdf.flags |= 1 << BAD_PDF_HEADERPOS;
31064b37
         cli_dbgmsg("cli_pdf: PDF header is not at position 0: %lld\n", (long long)(pdfver - start + offset));
930b9395
 #if HAVE_JSON
         if (pdfobj)
             cli_jsonbool(pdfobj, "BadVersionLocation", 1);
 #endif
e7a27135
     }
15a8a022
 
e7a27135
     offset += pdfver - start;
 
     /* find trailer and xref, don't fail if not found */
8f6bf9fc
     map_off = (off_t)map->len - 2048;
e7a27135
     if (map_off < 0)
15a8a022
         map_off = 0;
 
e7a27135
     bytesleft = map->len - map_off;
15a8a022
 
e7a27135
     eofmap = fmap_need_off_once(map, map_off, bytesleft);
     if (!eofmap) {
15a8a022
         cli_errmsg("cli_pdf: mmap() failed (2)\n");
63803da5
 #if HAVE_JSON
69080de3
         pdf_export_json(&pdf);
63803da5
 #endif
15a8a022
         return CL_EMAP;
e7a27135
     }
15a8a022
 
e7a27135
     eof = eofmap + bytesleft;
     for (q=&eofmap[bytesleft-5]; q > eofmap; q--) {
15a8a022
         if (memcmp(q, "%%EOF", 5) == 0)
             break;
e7a27135
     }
15a8a022
 
e7a27135
     if (q <= eofmap) {
15a8a022
         pdf.flags |= 1 << BAD_PDF_TRAILER;
         cli_dbgmsg("cli_pdf: %%%%EOF not found\n");
c7fd0220
 #if HAVE_JSON
         if (pdfobj)
             cli_jsonbool(pdfobj, "NoEOF", 1);
 #endif
e7a27135
     } else {
15a8a022
         const char *t;
 
         /*size = q - eofmap + map_off;*/
         q -= 9;
         for (;q > eofmap;q--) {
             if (memcmp(q, "startxref", 9) == 0)
                 break;
         }
 
         if (q <= eofmap) {
             pdf.flags |= 1 << BAD_PDF_TRAILER;
             cli_dbgmsg("cli_pdf: startxref not found\n");
c7fd0220
 #if HAVE_JSON
             if (pdfobj)
                 cli_jsonbool(pdfobj, "NoXREF", 1);
 #endif
15a8a022
         } else {
             for (t=q;t > eofmap; t--) {
                 if (memcmp(t,"trailer",7) == 0)
                     break;
             }
 
             pdf_parse_trailer(&pdf, eofmap, eof - eofmap);
             q += 9;
 
             while (q < eof && (*q == ' ' || *q == '\n' || *q == '\r')) { q++; }
 
bf6e777f
             if (CL_SUCCESS != cli_strntol_wrap(q, q - eofmap + map_off, 0, 10, &xref)) {
                 cli_dbgmsg("cli_pdf: failed to parse PDF trailer xref\n");
15a8a022
                 pdf.flags |= 1 << BAD_PDF_TRAILER;
             }
bf6e777f
             else {
                 bytesleft = map->len - offset - xref;
                 if (bytesleft > 4096)
                     bytesleft = 4096;
 
                 q = fmap_need_off_once(map, offset + xref, bytesleft);
                 if (!q || xrefCheck(q, q+bytesleft) == -1) {
                     cli_dbgmsg("cli_pdf: did not find valid xref\n");
                     pdf.flags |= 1 << BAD_PDF_TRAILER;
                 }
             }
15a8a022
         }
e7a27135
     }
 
15a8a022
     size -= offset;
e7a27135
     pdf.size = size;
dc200c6b
     pdf.map = fmap_need_off(map, offset, size);
e7a27135
     if (!pdf.map) {
15a8a022
         cli_errmsg("cli_pdf: mmap() failed (3)\n");
63803da5
 #if HAVE_JSON
69080de3
         pdf_export_json(&pdf);
63803da5
 #endif
15a8a022
         return CL_EMAP;
e7a27135
     }
15a8a022
 
     pdf.startoff = offset;
 
f73212dc
     rc = run_pdf_hooks(&pdf, PDF_PHASE_PRE, -1, -1);
693757a1
     if ((rc == CL_VIRUS) && SCAN_ALL) {
         cli_dbgmsg("cli_pdf: (pre hooks) returned %d\n", rc);
         alerts++;
         rc = CL_CLEAN;
15a8a022
     } else if (rc) {
         cli_dbgmsg("cli_pdf: (pre hooks) returning %d\n", rc);
63803da5
 #if HAVE_JSON
69080de3
         pdf_export_json(&pdf);
63803da5
 #endif
15a8a022
         return rc == CL_BREAK ? CL_CLEAN : rc;
f73212dc
     }
693757a1
 
9c617dbe
     /* parse PDF and find obj offsets */
6c135eb4
     while ((rc = pdf_findobj(&pdf)) > 0) {
15a8a022
         struct pdf_obj *obj = &pdf.objs[pdf.nobjs-1];
 
31064b37
         cli_dbgmsg("cli_pdf: found %d %d obj @%lld\n", obj->id >> 8, obj->id&0xff, (long long)(obj->start + offset));
e7a27135
     }
15a8a022
 
eb270d5a
     if (pdf.nobjs)
15a8a022
         pdf.nobjs--;
 
e7a27135
     if (rc == -1)
15a8a022
         pdf.flags |= 1 << BAD_PDF_TOOMANYOBJS;
e7a27135
 
eb270d5a
     /* must parse after finding all objs, so we can flag indirect objects */
     for (i=0;i<pdf.nobjs;i++) {
15a8a022
         struct pdf_obj *obj = &pdf.objs[i];
9e60856f
 
         if (cli_checktimelimit(ctx) != CL_SUCCESS) {
             cli_errmsg("Timeout reached in the PDF parser\n");
63803da5
 #if HAVE_JSON
9e60856f
             pdf_export_json(&pdf);
63803da5
 #endif
9e60856f
             free(pdf.objs);
             if (pdf.fileID)
                 free(pdf.fileID);
             if (pdf.key)
                 free(pdf.key);
             return CL_ETIMEOUT;
         }
 
15a8a022
         pdf_parseobj(&pdf, obj);
eb270d5a
     }
 
9d33052f
     pdf_handle_enc(&pdf);
bbfad9ba
     if (pdf.flags & (1 << ENCRYPTED_PDF))
15a8a022
         cli_dbgmsg("cli_pdf: encrypted pdf found, %s!\n",
                (pdf.flags & (1 << DECRYPTABLE_PDF)) ?
                "decryptable" : "not decryptable, stream will probably fail to decompress");
7606789f
 
     if (DETECT_ENCRYPTED &&
15a8a022
        (pdf.flags & (1 << ENCRYPTED_PDF)) &&
        !(pdf.flags & (1 << DECRYPTABLE_PDF))) {
         /* It is encrypted, and a password/key needs to be supplied to decrypt.
          * This doesn't trigger for PDFs that are encrypted but don't need
          * a password to decrypt */
87a6cf95
         rc = cli_append_virus(ctx, "Heuristics.Encrypted.PDF");
         if (rc == CL_VIRUS) { 
             alerts++;
             if (SCAN_ALL)
                 rc = CL_CLEAN;
         }
7606789f
     }
 
693757a1
     if (!rc) {
15a8a022
         rc = run_pdf_hooks(&pdf, PDF_PHASE_PARSED, -1, -1);
693757a1
         cli_dbgmsg("cli_pdf: (parsed hooks) returned %d\n", rc);
         if (rc == CL_VIRUS) {
             alerts++;
             if (SCAN_ALL) {
                 rc = CL_CLEAN;
             }
         }
     }
 
9c617dbe
     /* extract PDF objs */
dc200c6b
     for (i=0;!rc && i<pdf.nobjs;i++) {
693757a1
         struct pdf_obj *obj = &pdf.objs[i];
9e60856f
 
         if (cli_checktimelimit(ctx) != CL_SUCCESS) {
             cli_errmsg("Timeout reached in the PDF parser\n");
63803da5
 #if HAVE_JSON
9e60856f
             pdf_export_json(&pdf);
63803da5
 #endif
9e60856f
             free(pdf.objs);
             if (pdf.fileID)
                 free(pdf.fileID);
             if (pdf.key)
                 free(pdf.key);
             return CL_ETIMEOUT;
         }
 
1412b807
         rc = pdf_extract_obj(&pdf, obj, PDF_EXTRACT_OBJ_SCAN);
693757a1
         switch (rc) {
             case CL_EFORMAT:
                 /* Don't halt on one bad object */
                 cli_dbgmsg("cli_pdf: bad format object, skipping to next\n");
                 badobjects++;
49bc4992
                 pdf.stats.ninvalidobjs++;
693757a1
                 rc = CL_CLEAN;
                 break;
             case CL_VIRUS:
                 alerts++;
                 if (SCAN_ALL) {
                     rc = CL_CLEAN;
                 }
                 break;
             default:
                 break;
f4819816
         }
3643f3d2
     }
 
f984f75b
     if (pdf.flags & (1 << ENCRYPTED_PDF))
15a8a022
         pdf.flags &= ~ ((1 << BAD_FLATESTART) | (1 << BAD_STREAMSTART) | (1 << BAD_ASCIIDECODE));
f984f75b
 
dc200c6b
    if (pdf.flags && !rc) {
15a8a022
         cli_dbgmsg("cli_pdf: flags 0x%02x\n", pdf.flags);
         rc = run_pdf_hooks(&pdf, PDF_PHASE_END, -1, -1);
693757a1
         if (rc == CL_VIRUS) {
             alerts++;
             if (SCAN_ALL) {
                 rc = CL_CLEAN;
             }
         }
15a8a022
 
6ee2df5b
         if (!rc && SCAN_ALGO && (ctx->dconf->other & OTHER_CONF_PDFNAMEOBJ)) {
693757a1
             if (pdf.flags & (1 << ESCAPED_COMMON_PDFNAME)) {
                 /* for example /Fl#61te#44#65#63#6f#64#65 instead of /FlateDecode */
cbf5017a
                 cli_append_possibly_unwanted(ctx, "Heuristics.PDF.ObfuscatedNameObject");
693757a1
             }
         }
2a599782
 #if 0
53cbdee3
     /* TODO: find both trailers, and /Encrypt settings */
     if (pdf.flags & (1 << LINEARIZED_PDF))
         pdf.flags &= ~ (1 << BAD_ASCIIDECODE);
     if (pdf.flags & (1 << MANY_FILTERS))
         pdf.flags &= ~ (1 << BAD_ASCIIDECODE);
     if (!rc && (pdf.flags &
         ((1 << BAD_PDF_TOOMANYOBJS) | (1 << BAD_STREAM_FILTERS) |
          (1<<BAD_FLATE) | (1<<BAD_ASCIIDECODE)|
              (1<<UNTERMINATED_OBJ_DICT) | (1<<UNKNOWN_FILTER)))) {
         rc = CL_EUNPACK;
     }
eb270d5a
 #endif
ab564992
     }
f4819816
 
693757a1
     if (alerts) {
         rc = CL_VIRUS;
     }
15a8a022
 
693757a1
     else if (!rc && badobjects) {
f4819816
         rc = CL_EFORMAT;
     }
 
63803da5
 #if HAVE_JSON
ebcca55f
     pdf_export_json(&pdf);
63803da5
 #endif
ebcca55f
 
3643f3d2
     cli_dbgmsg("cli_pdf: returning %d\n", rc);
     free(pdf.objs);
7606789f
     free(pdf.fileID);
     free(pdf.key);
f4819816
 
a95d300f
     /* PDF hooks may abort, don't return CL_BREAK to caller! */
     return rc == CL_BREAK ? CL_CLEAN : rc;
e7a27135
 }
 
bce73fe9
 /*
  * Find the start of the next line
  */
 static const char *
 pdf_nextlinestart(const char *ptr, size_t len)
 {
15a8a022
     while(strchr("\r\n", *ptr) == NULL) {
         if(--len == 0L)
             return NULL;
 
         ptr++;
     }
 
     while(strchr("\r\n", *ptr) != NULL) {
         if(--len == 0L)
             return NULL;
 
         ptr++;
     }
 
     return ptr;
bce73fe9
 }
9be10a55
 
ef8219b8
 /*
  * Return the start of the next PDF object.
  * This assumes that we're not in a stream.
  */
 static const char *
 pdf_nextobject(const char *ptr, size_t len)
 {
15a8a022
     const char *p;
     int inobject = 1;
 
     while(len) {
         switch(*ptr) {
         case '\n':
         case '\r':
         case '%':   /* comment */
             p = pdf_nextlinestart(ptr, len);
             if(p == NULL)
                 return NULL;
 
             len -= (size_t)(p - ptr);
             ptr = p;
             inobject = 0;
 
             break;
         case ' ':
         case '\t':
         case '[':   /* Start of an array object */
         case '\v':
         case '\f':
         case '<':   /* Start of a dictionary object */
             inobject = 0;
             ptr++;
             len--;
 
             break;
         case '/':   /* Start of a name object */
             return ptr;
         case '(': /* start of JS */
             return ptr;
         default:
             if(!inobject) {
                 /* TODO: parse and return object type */
                 return ptr;
             }
 
             ptr++;
             len--;
         }
     }
 
     return NULL;
9be10a55
 }
49bc4992
 
 /* PDF statistics */
63803da5
 #if HAVE_JSON
224d1c4d
 static void ASCIIHexDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
49bc4992
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
49bc4992
     if (!(pdf))
         return;
 
     pdf->stats.nasciihexdecode++;
 }
63803da5
 #endif
49bc4992
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void ASCII85Decode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
49bc4992
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
49bc4992
     if (!(pdf))
         return;
 
     pdf->stats.nascii85decode++;
 }
63803da5
 #endif
49bc4992
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void EmbeddedFile_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
49bc4992
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
49bc4992
     if (!(pdf))
         return;
 
     pdf->stats.nembeddedfile++;
 }
63803da5
 #endif
49bc4992
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void FlateDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
49bc4992
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
49bc4992
     if (!(pdf))
         return;
 
     pdf->stats.nflate++;
 }
63803da5
 #endif
49bc4992
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void Image_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
49bc4992
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
49bc4992
     if (!(pdf))
         return;
 
     pdf->stats.nimage++;
 }
63803da5
 #endif
49bc4992
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void LZWDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
49bc4992
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
49bc4992
     if (!(pdf))
         return;
 
     pdf->stats.nlzw++;
 }
63803da5
 #endif
49bc4992
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void RunLengthDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
49bc4992
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
49bc4992
     if (!(pdf))
         return;
 
     pdf->stats.nrunlengthdecode++;
 }
63803da5
 #endif
49bc4992
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void CCITTFaxDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
49bc4992
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
49bc4992
     if (!(pdf))
         return;
 
     pdf->stats.nfaxdecode++;
 }
63803da5
 #endif
49bc4992
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void JBIG2Decode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
49bc4992
 {
cd94be7a
     struct json_object *pdfobj, *jbig2arr;
 
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
49bc4992
 
084707b3
     if (!(pdf))
         return;
 
0bed896b
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
         return;
 
084707b3
     if (!(pdf->ctx->wrkproperty))
         return;
 
     pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
     if (!(pdfobj))
         return;
 
     jbig2arr = cli_jsonarray(pdfobj, "JBIG2Objects");
     if (!(jbig2arr))
         return;
 
     cli_jsonint_array(jbig2arr, obj->id>>8);
 
     pdf->stats.njbig2decode++;
49bc4992
 }
63803da5
 #endif
49bc4992
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void DCTDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
49bc4992
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
49bc4992
     if (!(pdf))
         return;
 
     pdf->stats.ndctdecode++;
 }
63803da5
 #endif
49bc4992
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void JPXDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
49bc4992
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
49bc4992
     if (!(pdf))
         return;
 
     pdf->stats.njpxdecode++;
 }
63803da5
 #endif
49bc4992
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void Crypt_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
49bc4992
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
49bc4992
     if (!(pdf))
         return;
 
     pdf->stats.ncrypt++;
 }
63803da5
 #endif
49bc4992
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void Standard_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
49bc4992
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
49bc4992
     if (!(pdf))
         return;
 
     pdf->stats.nstandard++;
 }
63803da5
 #endif
49bc4992
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void Sig_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
49bc4992
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
49bc4992
     if (!(pdf))
         return;
 
     pdf->stats.nsigned++;
 }
63803da5
 #endif
49bc4992
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void JavaScript_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
49bc4992
 {
cd94be7a
     struct json_object *pdfobj, *jbig2arr;
 
     UNUSEDPARAM(act);
cd1d52d1
 
49bc4992
     if (!(pdf))
         return;
 
0bed896b
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
         return;
 
cd1d52d1
     if (!(pdf->ctx->wrkproperty))
         return;
 
     pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
     if (!(pdfobj))
         return;
 
     jbig2arr = cli_jsonarray(pdfobj, "JavascriptObjects");
     if (!(jbig2arr))
         return;
 
     cli_jsonint_array(jbig2arr, obj->id>>8);
 
49bc4992
     pdf->stats.njs++;
 }
63803da5
 #endif
49bc4992
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void OpenAction_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
49bc4992
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
49bc4992
     if (!(pdf))
         return;
 
     pdf->stats.nopenaction++;
 }
63803da5
 #endif
49bc4992
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void Launch_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
49bc4992
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
49bc4992
     if (!(pdf))
         return;
 
     pdf->stats.nlaunch++;
 }
63803da5
 #endif
49bc4992
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void Page_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
49bc4992
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
49bc4992
     if (!(pdf))
         return;
 
     pdf->stats.npage++;
 }
63803da5
 #endif
49bc4992
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void Author_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
063f0d25
 {
cd94be7a
     UNUSEDPARAM(act);
 
063f0d25
     if (!(pdf))
         return;
 
0bed896b
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
         return;
 
9d33052f
     if (!(pdf->stats.author)) {
         pdf->stats.author = cli_calloc(1, sizeof(struct pdf_stats_entry));
         if (!(pdf->stats.author))
             return;
         pdf->stats.author->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author", NULL, &(pdf->stats.author->meta));
     }
063f0d25
 }
63803da5
 #endif
063f0d25
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void Creator_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
063f0d25
 {
cd94be7a
     UNUSEDPARAM(act);
 
063f0d25
     if (!(pdf))
         return;
 
0bed896b
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
         return;
 
9d33052f
     if (!(pdf->stats.creator)) {
         pdf->stats.creator = cli_calloc(1, sizeof(struct pdf_stats_entry));
         if (!(pdf->stats.creator))
             return;
         pdf->stats.creator->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator", NULL, &(pdf->stats.creator->meta));
     }
063f0d25
 }
63803da5
 #endif
063f0d25
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void ModificationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
063f0d25
 {
cd94be7a
     UNUSEDPARAM(act);
 
063f0d25
     if (!(pdf))
         return;
 
0bed896b
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
         return;
 
9d33052f
     if (!(pdf->stats.modificationdate)) {
         pdf->stats.modificationdate = cli_calloc(1, sizeof(struct pdf_stats_entry));
         if (!(pdf->stats.modificationdate))
             return;
         pdf->stats.modificationdate->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate", NULL, &(pdf->stats.modificationdate->meta));
     }
063f0d25
 }
63803da5
 #endif
063f0d25
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void CreationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
063f0d25
 {
cd94be7a
     UNUSEDPARAM(act);
 
063f0d25
     if (!(pdf))
         return;
 
0bed896b
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
         return;
 
9d33052f
     if (!(pdf->stats.creationdate)) {
         pdf->stats.creationdate = cli_calloc(1, sizeof(struct pdf_stats_entry));
         if (!(pdf->stats.creationdate))
             return;
         pdf->stats.creationdate->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate", NULL, &(pdf->stats.creationdate->meta));
     }
063f0d25
 }
63803da5
 #endif
063f0d25
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void Producer_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
063f0d25
 {
cd94be7a
     UNUSEDPARAM(act);
 
063f0d25
     if (!(pdf))
         return;
 
0bed896b
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
         return;
 
9d33052f
     if (!(pdf->stats.producer)) {
         pdf->stats.producer = cli_calloc(1, sizeof(struct pdf_stats_entry));
         if (!(pdf->stats.producer))
             return;
         pdf->stats.producer->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer", NULL, &(pdf->stats.producer->meta));
     }
063f0d25
 }
63803da5
 #endif
063f0d25
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void Title_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
754f976a
 {
cd94be7a
     UNUSEDPARAM(act);
 
754f976a
     if (!(pdf))
         return;
 
0bed896b
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
         return;
 
9d33052f
     if (!(pdf->stats.title)) {
         pdf->stats.title = cli_calloc(1, sizeof(struct pdf_stats_entry));
         if (!(pdf->stats.title))
             return;
         pdf->stats.title->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title", NULL, &(pdf->stats.title->meta));
     }
754f976a
 }
63803da5
 #endif
754f976a
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void Keywords_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
754f976a
 {
cd94be7a
     UNUSEDPARAM(act);
 
754f976a
     if (!(pdf))
         return;
 
0bed896b
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
         return;
 
9d33052f
     if (!(pdf->stats.keywords)) {
         pdf->stats.keywords = cli_calloc(1, sizeof(struct pdf_stats_entry));
         if (!(pdf->stats.keywords))
             return;
         pdf->stats.keywords->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords", NULL, &(pdf->stats.keywords->meta));
     }
754f976a
 }
63803da5
 #endif
754f976a
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void Subject_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
754f976a
 {
cd94be7a
     UNUSEDPARAM(act);
 
754f976a
     if (!(pdf))
         return;
 
0bed896b
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
         return;
 
9d33052f
     if (!(pdf->stats.subject)) {
         pdf->stats.subject = cli_calloc(1, sizeof(struct pdf_stats_entry));
         if (!(pdf->stats.subject))
             return;
         pdf->stats.subject->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject", NULL, &(pdf->stats.subject->meta));
     }
754f976a
 }
63803da5
 #endif
754f976a
 
63803da5
 #if HAVE_JSON
09ff1409
 static void RichMedia_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
09ff1409
     if (!(pdf))
         return;
 
     pdf->stats.nrichmedia++;
 }
63803da5
 #endif
09ff1409
 
63803da5
 #if HAVE_JSON
09ff1409
 static void AcroForm_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
09ff1409
     if (!(pdf))
         return;
 
     pdf->stats.nacroform++;
 }
63803da5
 #endif
09ff1409
 
63803da5
 #if HAVE_JSON
09ff1409
 static void XFA_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
 {
cd94be7a
     UNUSEDPARAM(obj);
     UNUSEDPARAM(act);
 
09ff1409
     if (!(pdf))
         return;
 
     pdf->stats.nxfa++;
 }
63803da5
 #endif
09ff1409
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
ca78e3b3
 {
440f1fff
     struct pdf_array *array;
     const char *objstart = (const char *)(obj->start + pdf->map);
     const char *begin;
cd94be7a
     unsigned int objsz;
440f1fff
     unsigned long npages=0, count;
     struct pdf_array_node *node;
     json_object *pdfobj;
 
cd94be7a
     UNUSEDPARAM(act);
 
440f1fff
     if (!(pdf) || !(pdf->ctx->wrkproperty))
         return;
 
0bed896b
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
         return;
 
706c2943
     objsz = obj_size(pdf, obj, 1);
 
440f1fff
     pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
     if (!(pdfobj))
         return;
 
     begin = cli_memstr(objstart, objsz, "/Kids", 5);
     if (!(begin))
         return;
 
     begin += 5;
 
cd94be7a
     array = pdf_parse_array(pdf, obj, objsz, (char *)begin, NULL);
dd101bee
     if (!(array)) {
         cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);
7a98488d
         return;
dd101bee
     }
7a98488d
 
     for (node = array->nodes; node != NULL; node = node->next)
         if (node->datasz)
dd101bee
             if (strchr((char *)(node->data), 'R'))
7a98488d
                 npages++;
440f1fff
 
     begin = cli_memstr(obj->start + pdf->map, objsz, "/Count", 6);
     if (!(begin)) {
         cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);
         goto cleanup;
     }
 
     begin += 6;
     while (begin - objstart <  objsz && isspace(begin[0]))
         begin++;
 
     if (begin - objstart >= objsz) {
         goto cleanup;
     }
 
bf6e777f
     if ((CL_SUCCESS != cli_strntol_wrap(begin, (size_t)(obj->start + pdf->map + objsz - begin), 0, 10, (long*)&count)) ||
         (count != npages)) {
440f1fff
         cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);
bf6e777f
     }
440f1fff
 
 cleanup:
     pdf_free_array(array);
ca78e3b3
 }
63803da5
 #endif
ca78e3b3
 
63803da5
 #if HAVE_JSON
224d1c4d
 static void Colors_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
ca78e3b3
 {
     json_object *colorsobj, *pdfobj;
     unsigned long ncolors;
     char *start, *p1;
cd94be7a
     size_t objsz;
 
     UNUSEDPARAM(act);
ca78e3b3
 
     if (!(pdf) || !(pdf->ctx) || !(pdf->ctx->wrkproperty))
         return;
 
0bed896b
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
         return;
 
cd94be7a
     objsz = obj_size(pdf, obj, 1);
 
     start = (char *)(obj->start + pdf->map);
ca78e3b3
 
cd94be7a
     p1 = (char *)cli_memstr(start, objsz, "/Colors", 7);
ca78e3b3
     if (!(p1))
         return;
 
     p1 += 7;
 
     /* Ensure that we have at least one whitespace character plus at least one number */
     if (objsz - (p1 - start) < 2)
         return;
 
     while (p1 - start < objsz && isspace(p1[0]))
         p1++;
 
cd94be7a
     if ((size_t)(p1 - start) == objsz)
ca78e3b3
         return;
 
bf6e777f
     if (CL_SUCCESS != cli_strntol_wrap(p1, (size_t)((p1 - start) - objsz), 0, 10, (long*)&ncolors))
         return;
ca78e3b3
 
     /* We only care if the number of colors > 2**24 */
     if (ncolors < 1<<24)
         return;
 
     pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
     if (!(pdfobj))
         return;
 
     colorsobj = cli_jsonarray(pdfobj, "BigColors");
     if (!(colorsobj))
         return;
 
     cli_jsonint_array(colorsobj, obj->id>>8);
 }
63803da5
 #endif
ca78e3b3
 
63803da5
 #if HAVE_JSON
ebcca55f
 static void pdf_export_json(struct pdf_struct *pdf)
 {
     json_object *pdfobj;
c7fd0220
     unsigned long i;
ebcca55f
 
     if (!(pdf))
         return;
 
a5570b79
     if (!(pdf->ctx)) {
         goto cleanup;
     }
ebcca55f
 
a5570b79
     if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES) || !(pdf->ctx->wrkproperty)) {
         goto cleanup;
     }
ebcca55f
 
084707b3
     pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
a5570b79
     if (!(pdfobj)) {
         goto cleanup;
     }
ebcca55f
 
0e7442f1
     if (pdf->stats.author) {
9d33052f
         if (!pdf->stats.author->meta.success) {
             char *out = pdf_finalize_string(pdf, pdf->stats.author->meta.obj, pdf->stats.author->data, pdf->stats.author->meta.length);
             if (out) {
                 free(pdf->stats.author->data);
                 pdf->stats.author->data = out;
                 pdf->stats.author->meta.length = strlen(out);
                 pdf->stats.author->meta.success = 1;
24db616f
             }
0e7442f1
         }
9d33052f
 
         if (pdf->stats.author->meta.success && cli_isutf8(pdf->stats.author->data, pdf->stats.author->meta.length)) {
             cli_jsonstr(pdfobj, "Author", pdf->stats.author->data);
d010b117
         } else if (pdf->stats.author->data && pdf->stats.author->meta.length) {
5f31c9b4
             char *b64 = cl_base64_encode(pdf->stats.author->data, pdf->stats.author->meta.length);
9d33052f
             cli_jsonstr(pdfobj, "Author", b64);
             cli_jsonbool(pdfobj, "Author_base64", 1);
             free(b64);
5f31c9b4
         } else {
             cli_jsonstr(pdfobj, "Author", "");
9d33052f
         }
0e7442f1
     }
     if (pdf->stats.creator) {
9d33052f
         if (!pdf->stats.creator->meta.success) {
             char *out = pdf_finalize_string(pdf, pdf->stats.creator->meta.obj, pdf->stats.creator->data, pdf->stats.creator->meta.length);
             if (out) {
                 free(pdf->stats.creator->data);
                 pdf->stats.creator->data = out;
                 pdf->stats.creator->meta.length = strlen(out);
                 pdf->stats.creator->meta.success = 1;
24db616f
             }
0e7442f1
         }
9d33052f
 
         if (pdf->stats.creator->meta.success && cli_isutf8(pdf->stats.creator->data, pdf->stats.creator->meta.length)) {
             cli_jsonstr(pdfobj, "Creator", pdf->stats.creator->data);
d010b117
         } else if (pdf->stats.creator->data && pdf->stats.creator->meta.length) {
5f31c9b4
             char *b64 = cl_base64_encode(pdf->stats.creator->data, pdf->stats.creator->meta.length);
9d33052f
             cli_jsonstr(pdfobj, "Creator", b64);
             cli_jsonbool(pdfobj, "Creator_base64", 1);
             free(b64);
5f31c9b4
         } else {
             cli_jsonstr(pdfobj, "Creator", "");
9d33052f
         }
0e7442f1
     }
     if (pdf->stats.producer) {
9d33052f
         if (!pdf->stats.producer->meta.success) {
             char *out = pdf_finalize_string(pdf, pdf->stats.producer->meta.obj, pdf->stats.producer->data, pdf->stats.producer->meta.length);
             if (out) {
                 free(pdf->stats.producer->data);
                 pdf->stats.producer->data = out;
                 pdf->stats.producer->meta.length = strlen(out);
                 pdf->stats.producer->meta.success = 1;
24db616f
             }
0e7442f1
         }
9d33052f
 
         if (pdf->stats.producer->meta.success && cli_isutf8(pdf->stats.producer->data, pdf->stats.producer->meta.length)) {
             cli_jsonstr(pdfobj, "Producer", pdf->stats.producer->data);
d010b117
         } else if (pdf->stats.producer->data && pdf->stats.producer->meta.length) {
5f31c9b4
             char *b64 = cl_base64_encode(pdf->stats.producer->data, pdf->stats.producer->meta.length);
9d33052f
             cli_jsonstr(pdfobj, "Producer", b64);
             cli_jsonbool(pdfobj, "Producer_base64", 1);
             free(b64);
5f31c9b4
         } else {
             cli_jsonstr(pdfobj, "Producer", "");
9d33052f
         }
0e7442f1
     }
     if (pdf->stats.modificationdate) {
9d33052f
         if (!pdf->stats.modificationdate->meta.success) {
             char *out = pdf_finalize_string(pdf, pdf->stats.modificationdate->meta.obj, pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length);
             if (out) {
                 free(pdf->stats.modificationdate->data);
                 pdf->stats.modificationdate->data = out;
                 pdf->stats.modificationdate->meta.length = strlen(out);
                 pdf->stats.modificationdate->meta.success = 1;
24db616f
             }
0e7442f1
         }
9d33052f
 
         if (pdf->stats.modificationdate->meta.success && cli_isutf8(pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length)) {
             cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate->data);
d010b117
         } else if (pdf->stats.modificationdate->data && pdf->stats.modificationdate->meta.length) {
5f31c9b4
             char *b64 = cl_base64_encode(pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length);
9d33052f
             cli_jsonstr(pdfobj, "ModificationDate", b64);
             cli_jsonbool(pdfobj, "ModificationDate_base64", 1);
             free(b64);
5f31c9b4
         } else {
             cli_jsonstr(pdfobj, "ModificationDate", "");
9d33052f
         }
0e7442f1
     }
     if (pdf->stats.creationdate) {
9d33052f
         if (!pdf->stats.creationdate->meta.success) {
             char *out = pdf_finalize_string(pdf, pdf->stats.creationdate->meta.obj, pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length);
             if (out) {
                 free(pdf->stats.creationdate->data);
                 pdf->stats.creationdate->data = out;
                 pdf->stats.creationdate->meta.length = strlen(out);
                 pdf->stats.creationdate->meta.success = 1;
24db616f
             }
0e7442f1
         }
9d33052f
 
         if (pdf->stats.creationdate->meta.success && cli_isutf8(pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length)) {
             cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate->data);
d010b117
         } else if (pdf->stats.creationdate->data && pdf->stats.creationdate->meta.length) {
5f31c9b4
             char *b64 = cl_base64_encode(pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length);
9d33052f
             cli_jsonstr(pdfobj, "CreationDate", b64);
             cli_jsonbool(pdfobj, "CreationDate_base64", 1);
             free(b64);
5f31c9b4
         } else {
             cli_jsonstr(pdfobj, "CreationDate", "");
9d33052f
         }
0e7442f1
     }
     if (pdf->stats.title) {
9d33052f
         if (!pdf->stats.title->meta.success) {
             char *out = pdf_finalize_string(pdf, pdf->stats.title->meta.obj, pdf->stats.title->data, pdf->stats.title->meta.length);
             if (out) {
                 free(pdf->stats.title->data);
                 pdf->stats.title->data = out;
                 pdf->stats.title->meta.length = strlen(out);
                 pdf->stats.title->meta.success = 1;
24db616f
             }
0e7442f1
         }
9d33052f
 
         if (pdf->stats.title->meta.success && cli_isutf8(pdf->stats.title->data, pdf->stats.title->meta.length)) {
             cli_jsonstr(pdfobj, "Title", pdf->stats.title->data);
d010b117
         } else if (pdf->stats.title->data && pdf->stats.title->meta.length) {
5f31c9b4
             char *b64 = cl_base64_encode(pdf->stats.title->data, pdf->stats.title->meta.length);
9d33052f
             cli_jsonstr(pdfobj, "Title", b64);
             cli_jsonbool(pdfobj, "Title_base64", 1);
             free(b64);
5f31c9b4
         } else {
             cli_jsonstr(pdfobj, "Title", "");
9d33052f
         }
0e7442f1
     }
     if (pdf->stats.subject) {
9d33052f
         if (!pdf->stats.subject->meta.success) {
             char *out = pdf_finalize_string(pdf, pdf->stats.subject->meta.obj, pdf->stats.subject->data, pdf->stats.subject->meta.length);
             if (out) {
                 free(pdf->stats.subject->data);
                 pdf->stats.subject->data = out;
                 pdf->stats.subject->meta.length = strlen(out);
                 pdf->stats.subject->meta.success = 1;
24db616f
             }
0e7442f1
         }
9d33052f
 
         if (pdf->stats.subject->meta.success && cli_isutf8(pdf->stats.subject->data, pdf->stats.subject->meta.length)) {
             cli_jsonstr(pdfobj, "Subject", pdf->stats.subject->data);
d010b117
         } else if (pdf->stats.subject->data && pdf->stats.subject->meta.length) {
5f31c9b4
             char *b64 = cl_base64_encode(pdf->stats.subject->data, pdf->stats.subject->meta.length);
9d33052f
             cli_jsonstr(pdfobj, "Subject", b64);
             cli_jsonbool(pdfobj, "Subject_base64", 1);
             free(b64);
5f31c9b4
         } else {
             cli_jsonstr(pdfobj, "Subject", "");
9d33052f
         }
0e7442f1
     }
     if (pdf->stats.keywords) {
9d33052f
         if (!pdf->stats.keywords->meta.success) {
             char *out = pdf_finalize_string(pdf, pdf->stats.keywords->meta.obj, pdf->stats.keywords->data, pdf->stats.keywords->meta.length);
             if (out) {
                 free(pdf->stats.keywords->data);
                 pdf->stats.keywords->data = out;
                 pdf->stats.keywords->meta.length = strlen(out);
                 pdf->stats.keywords->meta.success = 1;
24db616f
             }
0e7442f1
         }
9d33052f
 
         if (pdf->stats.keywords->meta.success && cli_isutf8(pdf->stats.keywords->data, pdf->stats.keywords->meta.length)) {
             cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords->data);
d010b117
         } else if (pdf->stats.keywords->data && pdf->stats.keywords->meta.length) {
5f31c9b4
             char *b64 = cl_base64_encode(pdf->stats.keywords->data, pdf->stats.keywords->meta.length);
9d33052f
             cli_jsonstr(pdfobj, "Keywords", b64);
             cli_jsonbool(pdfobj, "Keywords_base64", 1);
             free(b64);
5f31c9b4
         } else {
188e40ae
             cli_jsonstr(pdfobj, "Keywords", "");
9d33052f
         }
0e7442f1
     }
ebcca55f
     if (pdf->stats.ninvalidobjs)
         cli_jsonint(pdfobj, "InvalidObjectCount", pdf->stats.ninvalidobjs);
     if (pdf->stats.njs)
         cli_jsonint(pdfobj, "JavaScriptObjectCount", pdf->stats.njs);
     if (pdf->stats.nflate)
         cli_jsonint(pdfobj, "DeflateObjectCount", pdf->stats.nflate);
     if (pdf->stats.nactivex)
         cli_jsonint(pdfobj, "ActiveXObjectCount", pdf->stats.nactivex);
     if (pdf->stats.nflash)
         cli_jsonint(pdfobj, "FlashObjectCount", pdf->stats.nflash);
     if (pdf->stats.ncolors)
         cli_jsonint(pdfobj, "ColorCount", pdf->stats.ncolors);
     if (pdf->stats.nasciihexdecode)
         cli_jsonint(pdfobj, "AsciiHexDecodeObjectCount", pdf->stats.nasciihexdecode);
     if (pdf->stats.nascii85decode)
         cli_jsonint(pdfobj, "Ascii85DecodeObjectCount", pdf->stats.nascii85decode);
     if (pdf->stats.nembeddedfile)
         cli_jsonint(pdfobj, "EmbeddedFileCount", pdf->stats.nembeddedfile);
     if (pdf->stats.nimage)
         cli_jsonint(pdfobj, "ImageCount", pdf->stats.nimage);
     if (pdf->stats.nlzw)
         cli_jsonint(pdfobj, "LZWCount", pdf->stats.nlzw);
     if (pdf->stats.nrunlengthdecode)
         cli_jsonint(pdfobj, "RunLengthDecodeCount", pdf->stats.nrunlengthdecode);
     if (pdf->stats.nfaxdecode)
         cli_jsonint(pdfobj, "FaxDecodeCount", pdf->stats.nfaxdecode);
     if (pdf->stats.njbig2decode)
         cli_jsonint(pdfobj, "JBIG2DecodeCount", pdf->stats.njbig2decode);
     if (pdf->stats.ndctdecode)
         cli_jsonint(pdfobj, "DCTDecodeCount", pdf->stats.ndctdecode);
     if (pdf->stats.njpxdecode)
         cli_jsonint(pdfobj, "JPXDecodeCount", pdf->stats.njpxdecode);
     if (pdf->stats.ncrypt)
         cli_jsonint(pdfobj, "CryptCount", pdf->stats.ncrypt);
     if (pdf->stats.nstandard)
         cli_jsonint(pdfobj, "StandardCount", pdf->stats.nstandard);
     if (pdf->stats.nsigned)
         cli_jsonint(pdfobj, "SignedCount", pdf->stats.nsigned);
     if (pdf->stats.nopenaction)
         cli_jsonint(pdfobj, "OpenActionCount", pdf->stats.nopenaction);
     if (pdf->stats.nlaunch)
         cli_jsonint(pdfobj, "LaunchCount", pdf->stats.nlaunch);
     if (pdf->stats.npage)
         cli_jsonint(pdfobj, "PageCount", pdf->stats.npage);
09ff1409
     if (pdf->stats.nrichmedia)
         cli_jsonint(pdfobj, "RichMediaCount", pdf->stats.nrichmedia);
     if (pdf->stats.nacroform)
         cli_jsonint(pdfobj, "AcroFormCount", pdf->stats.nacroform);
     if (pdf->stats.nxfa)
         cli_jsonint(pdfobj, "XFACount", pdf->stats.nxfa);
cfeac6cd
     if (pdf->flags & (1 << BAD_PDF_VERSION))
         cli_jsonbool(pdfobj, "BadVersion", 1);
     if (pdf->flags & (1 << BAD_PDF_HEADERPOS))
         cli_jsonbool(pdfobj, "BadHeaderPosition", 1);
     if (pdf->flags & (1 << BAD_PDF_TRAILER))
         cli_jsonbool(pdfobj, "BadTrailer", 1);
     if (pdf->flags & (1 << BAD_PDF_TOOMANYOBJS))
         cli_jsonbool(pdfobj, "TooManyObjects", 1);
     if (pdf->flags & (1 << ENCRYPTED_PDF)) {
         cli_jsonbool(pdfobj, "Encrypted", 1);
         if (pdf->flags & (1 << DECRYPTABLE_PDF))
             cli_jsonbool(pdfobj, "Decryptable", 1);
fc84532e
         else
             cli_jsonbool(pdfobj, "Decryptable", 0);
cfeac6cd
     }
a5570b79
 
c7fd0220
     for (i=0; i < pdf->nobjs; i++) {
         if (pdf->objs[i].flags & (1<<OBJ_TRUNCATED)) {
             json_object *truncobj;
 
             truncobj = cli_jsonarray(pdfobj, "TruncatedObjects");
             if (!(truncobj))
                 continue;
 
             cli_jsonint_array(truncobj, pdf->objs[i].id>>8);
         }
     }
 
a5570b79
 cleanup:
     if ((pdf->stats.author)) {
9d33052f
         if (pdf->stats.author->data)
             free(pdf->stats.author->data);
a5570b79
         free(pdf->stats.author);
         pdf->stats.author = NULL;
     }
 
     if (pdf->stats.creator) {
9d33052f
         if (pdf->stats.creator->data)
             free(pdf->stats.creator->data);
a5570b79
         free(pdf->stats.creator);
         pdf->stats.creator = NULL;
     }
 
     if (pdf->stats.producer) {
9d33052f
         if (pdf->stats.producer->data)
             free(pdf->stats.producer->data);
a5570b79
         free(pdf->stats.producer);
         pdf->stats.producer = NULL;
     }
 
     if (pdf->stats.modificationdate) {
9d33052f
         if (pdf->stats.modificationdate->data)
             free(pdf->stats.modificationdate->data);
a5570b79
         free(pdf->stats.modificationdate);
         pdf->stats.modificationdate = NULL;
     }
 
     if (pdf->stats.creationdate) {
9d33052f
         if (pdf->stats.creationdate->data)
             free(pdf->stats.creationdate->data);
a5570b79
         free(pdf->stats.creationdate);
         pdf->stats.creationdate = NULL;
     }
754f976a
 
     if (pdf->stats.title) {
9d33052f
         if (pdf->stats.title->data)
             free(pdf->stats.title->data);
754f976a
         free(pdf->stats.title);
         pdf->stats.title = NULL;
     }
 
     if (pdf->stats.subject) {
9d33052f
         if (pdf->stats.subject->data)
             free(pdf->stats.subject->data);
754f976a
         free(pdf->stats.subject);
         pdf->stats.subject = NULL;
     }
 
     if (pdf->stats.keywords) {
9d33052f
         if (pdf->stats.keywords->data)
             free(pdf->stats.keywords->data);
754f976a
         free(pdf->stats.keywords);
         pdf->stats.keywords = NULL;
     }
ebcca55f
 }
63803da5
 #endif