libclamav/pdfdecode.c
7ded9e29
 /*
1158b285
  *  Copyright (C) 2016-2018 Cisco and/or its affiliates. All rights reserved.
7ded9e29
  *
  *  Author: Kevin Lin
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
  *
  *  In addition, as a special exception, the copyright holders give
  *  permission to link the code of portions of this program with the
  *  OpenSSL library under certain conditions as described in each
  *  individual source file, and distribute linked combinations
  *  including the two.
02840644
  *
7ded9e29
  *  You must obey the GNU General Public License in all respects
  *  for all of the code used other than OpenSSL.  If you modify
  *  file(s) with this exception, you may extend this exception to your
  *  version of the file(s), but you are not obligated to do so.  If you
  *  do not wish to do so, delete this exception statement from your
  *  version.  If you delete this exception statement from all source
  *  files in the program, then also delete it here.
  */
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
 #include <stdio.h>
288057e9
 #include <stddef.h>
7ded9e29
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <ctype.h>
 #include <string.h>
 #include <fcntl.h>
 #include <stdlib.h>
 #include <errno.h>
288057e9
 #ifdef HAVE_LIMITS_H
7ded9e29
 #include <limits.h>
 #endif
288057e9
 #ifdef HAVE_UNISTD_H
7ded9e29
 #include <unistd.h>
 #endif
 #include <zlib.h>
 
 #if HAVE_ICONV
 #include <iconv.h>
 #endif
 
 #include "clamav.h"
 #include "others.h"
 #include "pdf.h"
eaf52211
 #include "pdfdecode.h"
7ded9e29
 #include "str.h"
 #include "bytecode.h"
 #include "bytecode_api.h"
e8a23886
 #include "lzw/lzwdec.h"
7ded9e29
 
a081b3e9
 #define PDFTOKEN_FLAG_XREF 0x1
 
7ded9e29
 struct pdf_token {
288057e9
     uint32_t flags;   /* tracking flags */
     uint32_t success; /* successfully decoded filters */
     uint32_t length;  /* length of current content; TODO: transition to size_t */
     uint8_t *content; /* content stream */
7ded9e29
 };
 
19f8f7c1
 static size_t pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int fout, cl_error_t *status, struct objstm_struct *objstm);
02840644
 static cl_error_t pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token, uint32_t lvl);
1158b285
 
 static cl_error_t filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token);
 static cl_error_t filter_rldecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token);
 static cl_error_t filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token);
 static cl_error_t filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token);
 static cl_error_t filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int mode);
 static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token);
 
 /**
  * @brief       Wrapper function for pdf_decodestream_internal.
02840644
  *
1158b285
  * Allocate a token object to store decoded filter data.
  * Parse/decode the filter data and scan it.
02840644
  *
1158b285
  * @param pdf       Pdf context structure.
  * @param obj       The object we found the filter content in.
  * @param params    (optional) Dictionary parameters describing the filter data.
  * @param stream    Filter stream buffer pointer.
  * @param streamlen Length of filter stream buffer.
  * @param xref      Indicates if the stream is an /XRef stream.  Do not apply forced decryption on /XRef streams.
  * @param fout      File descriptor to write to to be scanned.
  * @param[out] rc   Return code ()
  * @param objstm    (optional) Object stream context structure.
19f8f7c1
  * @return size_t   The number of bytes written to 'fout' to be scanned.
1158b285
  */
19f8f7c1
 size_t pdf_decodestream(
1158b285
     struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params,
     const char *stream, uint32_t streamlen, int xref, int fout, cl_error_t *status,
     struct objstm_struct *objstm)
 {
     struct pdf_token *token = NULL;
288057e9
     size_t bytes_scanned    = 0;
50876732
     cli_ctx *ctx            = NULL;
07a72006
 
1158b285
     if (!status) {
         /* invalid args, and no way to pass back the status code */
19f8f7c1
         return 0;
1158b285
     }
7ded9e29
 
1158b285
     if (!pdf || !obj) {
         /* Invalid args */
19f8f7c1
         *status = CL_EARG;
1158b285
         goto done;
     }
7ded9e29
 
50876732
     ctx = pdf->ctx;
 
7aad5a3b
     if (!stream || !streamlen || fout < 0) {
288057e9
         cli_dbgmsg("pdf_decodestream: no filters or stream on obj %u %u\n", obj->id >> 8, obj->id & 0xff);
19f8f7c1
         *status = CL_ENULLARG;
1158b285
         goto done;
7ded9e29
     }
 
19f8f7c1
     *status = CL_SUCCESS;
 
eaf52211
 #if 0
     if (params)
         pdf_print_dict(params, 0);
 #endif
 
7ded9e29
     token = cli_malloc(sizeof(struct pdf_token));
7aad5a3b
     if (!token) {
19f8f7c1
         *status = CL_EMEM;
1158b285
         goto done;
7aad5a3b
     }
7ded9e29
 
a081b3e9
     token->flags = 0;
     if (xref)
         token->flags |= PDFTOKEN_FLAG_XREF;
 
bfd8ca3e
     token->success = 0;
 
7ded9e29
     token->content = cli_malloc(streamlen);
     if (!token->content) {
19f8f7c1
         *status = CL_EMEM;
1158b285
         goto done;
7ded9e29
     }
     memcpy(token->content, stream, streamlen);
     token->length = streamlen;
 
1158b285
     cli_dbgmsg("pdf_decodestream: detected %lu applied filters\n", (long unsigned)(obj->numfilters));
7aad5a3b
 
19f8f7c1
     bytes_scanned = pdf_decodestream_internal(pdf, obj, params, token, fout, status, objstm);
 
     if ((CL_VIRUS == *status) && !SCAN_ALLMATCHES) {
         goto done;
     }
1158b285
 
19f8f7c1
     if (0 == token->success) {
1158b285
         /*
19f8f7c1
          * Either:
          *  a) it failed to decode any filters, or
          *  b) there were no filters.
          *
          * Write out the raw stream to be scanned.
          *
          * Nota bene: If it did decode any filters, the internal() function would
          *            have written out the decoded stream to be scanned.
1158b285
          */
bfd8ca3e
         if (!cli_checklimits("pdf", pdf->ctx, streamlen, 0, 0)) {
1158b285
             cli_dbgmsg("pdf_decodestream: no non-forced filters decoded, returning raw stream\n");
bfd8ca3e
 
             if (cli_writen(fout, stream, streamlen) != streamlen) {
19f8f7c1
                 cli_errmsg("pdf_decodestream: failed to write raw stream to output file\n");
             } else {
                 bytes_scanned = streamlen;
bfd8ca3e
             }
eaf52211
         }
7ded9e29
     }
 
1158b285
 done:
     /*
      * Free up the token, and token content, if any.
      */
288057e9
     if (NULL != token) {
1158b285
         if (NULL != token->content) {
             free(token->content);
             token->content = NULL;
288057e9
             token->length  = 0;
1158b285
         }
         free(token);
         token = NULL;
     }
 
     return bytes_scanned;
7ded9e29
 }
 
1158b285
 /**
02840644
  * @brief       Decode filter buffer data.
  *
1158b285
  * Attempt to decompress, decrypt or otherwise parse it.
02840644
  *
1158b285
  * @param pdf           Pdf context structure.
  * @param obj           The object we found the filter content in.
  * @param params        (optional) Dictionary parameters describing the filter data.
  * @param token         Pointer to and length of filter data.
  * @param fout          File handle to write data to to be scanned.
  * @param[out] status   CL_CLEAN/CL_SUCCESS or CL_VIRUS/CL_E<error>
  * @param objstm        (optional) Object stream context structure.
  * @return ptrdiff_t    The number of bytes we wrote to 'fout'. -1 if failed out.
  */
19f8f7c1
 static size_t pdf_decodestream_internal(
1158b285
     struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params,
     struct pdf_token *token, int fout, cl_error_t *status, struct objstm_struct *objstm)
7ded9e29
 {
288057e9
     cl_error_t vir       = CL_CLEAN;
     cl_error_t retval    = CL_SUCCESS;
19f8f7c1
     size_t bytes_scanned = 0;
50876732
     cli_ctx *ctx         = NULL;
288057e9
     const char *filter   = NULL;
02840644
     uint32_t i;
7ded9e29
 
1158b285
     if (!status) {
         /* invalid args, and no way to pass back the status code */
19f8f7c1
         return 0;
1158b285
     }
 
     if (!pdf || !obj || !token) {
         /* Invalid args */
19f8f7c1
         *status = CL_EARG;
1158b285
         goto done;
     }
288057e9
 
50876732
     ctx     = pdf->ctx;
19f8f7c1
     *status = CL_SUCCESS;
288057e9
 
0018a8e7
     /*
      * if pdf is decryptable, scan for CRYPT filter
      * if none, force a DECRYPT filter application
      */
     if ((pdf->flags & (1 << DECRYPTABLE_PDF)) && !(obj->flags & (1 << OBJ_FILTER_CRYPT))) {
a081b3e9
         if (token->flags & PDFTOKEN_FLAG_XREF) /* TODO: is this on all crypt filters or only the assumed one? */
1158b285
             cli_dbgmsg("pdf_decodestream_internal: skipping decoding => non-filter CRYPT (reason: xref)\n");
a081b3e9
         else {
1158b285
             cli_dbgmsg("pdf_decodestream_internal: decoding => non-filter CRYPT\n");
             retval = filter_decrypt(pdf, obj, params, token, 1);
             if (retval != CL_SUCCESS) {
19f8f7c1
                 *status = CL_EPARSE;
1158b285
                 goto done;
a081b3e9
             }
0018a8e7
         }
     }
 
7ded9e29
     for (i = 0; i < obj->numfilters; i++) {
288057e9
         switch (obj->filterlist[i]) {
             case OBJ_FILTER_A85:
02840644
                 cli_dbgmsg("pdf_decodestream_internal: decoding [%u] => ASCII85DECODE\n", obj->filterlist[i]);
288057e9
                 retval = filter_ascii85decode(pdf, obj, token);
                 break;
7ded9e29
 
288057e9
             case OBJ_FILTER_RL:
02840644
                 cli_dbgmsg("pdf_decodestream_internal: decoding [%u] => RLDECODE\n", obj->filterlist[i]);
288057e9
                 retval = filter_rldecode(pdf, obj, token);
                 break;
739e5052
 
288057e9
             case OBJ_FILTER_FLATE:
02840644
                 cli_dbgmsg("pdf_decodestream_internal: decoding [%u] => FLATEDECODE\n", obj->filterlist[i]);
288057e9
                 retval = filter_flatedecode(pdf, obj, params, token);
                 break;
7ded9e29
 
288057e9
             case OBJ_FILTER_AH:
02840644
                 cli_dbgmsg("pdf_decodestream_internal: decoding [%u] => ASCIIHEXDECODE\n", obj->filterlist[i]);
288057e9
                 retval = filter_asciihexdecode(pdf, obj, token);
                 break;
7ded9e29
 
288057e9
             case OBJ_FILTER_CRYPT:
02840644
                 cli_dbgmsg("pdf_decodestream_internal: decoding [%u] => CRYPT\n", obj->filterlist[i]);
288057e9
                 retval = filter_decrypt(pdf, obj, params, token, 0);
                 break;
eaf52211
 
288057e9
             case OBJ_FILTER_LZW:
02840644
                 cli_dbgmsg("pdf_decodestream_internal: decoding [%u] => LZWDECODE\n", obj->filterlist[i]);
288057e9
                 retval = filter_lzwdecode(pdf, obj, params, token);
                 break;
e8a23886
 
288057e9
             case OBJ_FILTER_JPX:
                 if (!filter) filter = "JPXDECODE";
             case OBJ_FILTER_DCT:
                 if (!filter) filter = "DCTDECODE";
             case OBJ_FILTER_FAX:
                 if (!filter) filter = "FAXDECODE";
             case OBJ_FILTER_JBIG2:
                 if (!filter) filter = "JBIG2DECODE";
 
02840644
                 cli_dbgmsg("pdf_decodestream_internal: unimplemented filter type [%u] => %s\n", obj->filterlist[i], filter);
288057e9
                 filter = NULL;
                 retval = CL_BREAK;
                 break;
7ded9e29
 
288057e9
             default:
02840644
                 cli_dbgmsg("pdf_decodestream_internal: unknown filter type [%u]\n", obj->filterlist[i]);
288057e9
                 retval = CL_BREAK;
                 break;
7ded9e29
         }
 
d593717b
         if (!(token->content) || !(token->length)) {
02840644
             cli_dbgmsg("pdf_decodestream_internal: empty content, breaking after %u (of %u) filters\n", i, obj->numfilters);
d593717b
             break;
         }
 
1158b285
         if (retval != CL_SUCCESS) {
048a88e6
             if (retval == CL_VIRUS && SCAN_ALLMATCHES) {
1158b285
                 vir = CL_VIRUS;
             } else {
288057e9
                 const char *reason;
1158b285
 
                 switch (retval) {
288057e9
                     case CL_VIRUS:
                         *status = CL_VIRUS;
                         reason  = "detection";
                         break;
                     case CL_BREAK:
                         *status = CL_SUCCESS;
                         reason  = "decoding break";
                         break;
                     default:
                         *status = CL_EPARSE;
                         reason  = "decoding error";
                         break;
5c291512
                 }
 
02840644
                 cli_dbgmsg("pdf_decodestream_internal: stopping after %d (of %u) filters (reason: %s)\n", i, obj->numfilters, reason);
ce3cf4c6
                 break;
             }
d593717b
         }
bfd8ca3e
         token->success++;
d593717b
 
1158b285
         /* Dump the stream content to a text file if keeptmp is enabled. */
fdcf5109
         if (pdf->ctx->engine->keeptmp) {
288057e9
             if (CL_SUCCESS != pdf_decode_dump(pdf, obj, token, i + 1)) {
19f8f7c1
                 cli_errmsg("pdf_decodestream_internal: failed to write decoded stream content to temp file\n");
1158b285
             }
         }
     }
639615af
 
50876732
     if ((token->success > 0) && (NULL != token->content)) {
1158b285
         /*
19f8f7c1
          * Looks like we successfully decoded some or all of the stream filters,
          * so lets write it out to a file descriptor we scan.
          *
          * In the event that we didn't decode any filters (or maybe there
          * weren't any filters), the calling function will do the same with
          * the raw stream.
1158b285
          */
19f8f7c1
         if (CL_SUCCESS == cli_checklimits("pdf", pdf->ctx, token->length, 0, 0)) {
1158b285
             if (cli_writen(fout, token->content, token->length) != token->length) {
19f8f7c1
                 cli_errmsg("pdf_decodestream_internal: failed to write decoded stream content to output file\n");
             } else {
                 bytes_scanned = token->length;
1158b285
             }
07a72006
         }
     }
 
19f8f7c1
     if ((NULL != objstm) &&
288057e9
         ((CL_SUCCESS == *status) || ((CL_VIRUS == *status) && SCAN_ALLMATCHES))) {
02840644
         unsigned int objs_found = pdf->nobjs;
19f8f7c1
 
1158b285
         /*
          * The caller indicated that the decoded data is an object stream.
          * Perform experimental object stream parsing to extract objects from the stream.
          */
288057e9
         objstm->streambuf     = (char *)token->content;
1158b285
         objstm->streambuf_len = (size_t)token->length;
 
         /* Take ownership of the malloc'd buffer */
         token->content = NULL;
288057e9
         token->length  = 0;
1158b285
 
19f8f7c1
         /* Don't store the result. It's ok if some or all objects failed to parse.
            It would be far worse to add objects from a stream to the list, and then free
            the stream buffer due to an "error". */
288057e9
         if (CL_SUCCESS != pdf_find_and_parse_objs_in_objstm(pdf, objstm)) {
1158b285
             cli_dbgmsg("pdf_decodestream_internal: pdf_find_and_parse_objs_in_objstm failed!\n");
         }
 
         if (pdf->nobjs <= objs_found) {
             cli_dbgmsg("pdf_decodestream_internal: pdf_find_and_parse_objs_in_objstm did not find any new objects!\n");
         } else {
02840644
             cli_dbgmsg("pdf_decodestream_internal: pdf_find_and_parse_objs_in_objstm found %u new objects.\n", pdf->nobjs - objs_found);
1158b285
         }
     }
 
 done:
 
     if (vir == CL_VIRUS)
         *status = CL_VIRUS;
 
     return bytes_scanned;
07a72006
 }
 
1158b285
 /**
  * @brief   Dump PDF filter content such as stream contents to a temp file.
02840644
  *
1158b285
  * Temp file is created in the pdf->dir directory.
  * Filename format is "pdf<pdf->files-1>_<lvl>".
02840644
  *
1158b285
  * @param pdf   Pdf context structure.
  * @param obj   The object we found the filter content in.
  * @param token The struct for the filter contents.
  * @param lvl   A unique index to distinguish the files from each other.
02840644
  * @return cl_error_t
1158b285
  */
02840644
 static cl_error_t pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token, uint32_t lvl)
07a72006
 {
     char fname[1024];
     int ifd;
 
02840644
     snprintf(fname, sizeof(fname), "%s" PATHSEP "pdf%02u_%02u", pdf->dir, (pdf->files - 1), lvl);
288057e9
     ifd = open(fname, O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_BINARY, 0600);
07a72006
     if (ifd < 0) {
         char err[128];
 
         cli_errmsg("cli_pdf: can't create intermediate temporary file %s: %s\n", fname, cli_strerror(errno, err, sizeof(err)));
         return CL_ETMPFILE;
     }
 
02840644
     cli_dbgmsg("cli_pdf: decoded filter %u obj %u %u\n", lvl, obj->id >> 8, obj->id & 0xff);
07a72006
     cli_dbgmsg("         ... to %s\n", fname);
 
     if (cli_writen(ifd, token->content, token->length) != token->length) {
         cli_errmsg("cli_pdf: failed to write output file\n");
         close(ifd);
         return CL_EWRITE;
7ded9e29
     }
 
07a72006
     close(ifd);
7ded9e29
     return CL_SUCCESS;
 }
 
eaf52211
 /*
  * ascii85 inflation
  * See http://www.piclist.com/techref/method/encode.htm (look for base85)
  */
1158b285
 static cl_error_t filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token)
7ded9e29
 {
0018a8e7
     uint8_t *decoded, *dptr;
7ded9e29
     uint32_t declen = 0;
 
     const uint8_t *ptr = (uint8_t *)token->content;
     uint32_t remaining = token->length;
     int quintet = 0, rc = CL_SUCCESS;
     uint64_t sum = 0;
 
0018a8e7
     /* 5:4 decoding ratio, with 1:4 expansion sequences => (4*length)+1 */
288057e9
     if (!(dptr = decoded = (uint8_t *)cli_malloc((4 * remaining) + 1))) {
7ded9e29
         cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n");
         return CL_EMEM;
     }
 
288057e9
     if (cli_memstr((const char *)ptr, remaining, "~>", 2) == NULL)
7ded9e29
         cli_dbgmsg("cli_pdf: no EOF marker found\n");
 
     while (remaining > 0) {
         int byte = (remaining--) ? (int)*ptr++ : EOF;
 
288057e9
         if ((byte == '~') && (remaining > 0) && (*ptr == '>'))
7ded9e29
             byte = EOF;
 
288057e9
         if (byte >= '!' && byte <= 'u') {
7ded9e29
             sum = (sum * 85) + ((uint32_t)byte - '!');
288057e9
             if (++quintet == 5) {
0018a8e7
                 *dptr++ = (unsigned char)(sum >> 24);
                 *dptr++ = (unsigned char)((sum >> 16) & 0xFF);
                 *dptr++ = (unsigned char)((sum >> 8) & 0xFF);
                 *dptr++ = (unsigned char)(sum & 0xFF);
7ded9e29
 
                 declen += 4;
                 quintet = 0;
288057e9
                 sum     = 0;
7ded9e29
             }
288057e9
         } else if (byte == 'z') {
             if (quintet) {
7ded9e29
                 cli_dbgmsg("cli_pdf: unexpected 'z'\n");
                 rc = CL_EFORMAT;
                 break;
             }
 
0018a8e7
             *dptr++ = '\0';
             *dptr++ = '\0';
             *dptr++ = '\0';
             *dptr++ = '\0';
7ded9e29
 
             declen += 4;
288057e9
         } else if (byte == EOF) {
02c120e8
             cli_dbgmsg("cli_pdf: last quintet contains %d bytes\n", quintet);
288057e9
             if (quintet) {
7ded9e29
                 int i;
 
288057e9
                 if (quintet == 1) {
02c120e8
                     cli_dbgmsg("cli_pdf: invalid last quintet (only 1 byte)\n");
7ded9e29
                     rc = CL_EFORMAT;
                     break;
                 }
 
288057e9
                 for (i = quintet; i < 5; i++)
7ded9e29
                     sum *= 85;
 
288057e9
                 if (quintet > 1)
7ded9e29
                     sum += (0xFFFFFF >> ((quintet - 2) * 8));
 
288057e9
                 for (i = 0; i < quintet - 1; i++)
0018a8e7
                     *dptr++ = (uint8_t)((sum >> (24 - 8 * i)) & 0xFF);
288057e9
                 declen += quintet - 1;
7ded9e29
             }
 
             break;
288057e9
         } else if (!isspace(byte)) {
7ded9e29
             cli_dbgmsg("cli_pdf: invalid character 0x%x @ %lu\n",
288057e9
                        byte & 0xFF, (unsigned long)(token->length - remaining));
7ded9e29
 
             rc = CL_EFORMAT;
             break;
         }
     }
 
     if (rc == CL_SUCCESS) {
         free(token->content);
 
         cli_dbgmsg("cli_pdf: deflated %lu bytes from %lu total bytes\n",
                    (unsigned long)declen, (unsigned long)(token->length));
 
         token->content = decoded;
288057e9
         token->length  = declen;
7ded9e29
     } else {
a042e6f0
         if (!(obj->flags & ((1 << OBJ_IMAGE) | (1 << OBJ_TRUNCATED))))
             pdfobj_flag(pdf, obj, BAD_ASCIIDECODE);
 
046d4cc9
         cli_dbgmsg("cli_pdf: error occurred parsing byte %lu of %lu\n",
288057e9
                    (unsigned long)(token->length - remaining), (unsigned long)(token->length));
7ded9e29
         free(decoded);
     }
     return rc;
 }
 
739e5052
 /* imported from razorback */
1158b285
 static cl_error_t filter_rldecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token)
7ded9e29
 {
739e5052
     uint8_t *decoded, *temp;
     uint32_t declen = 0, capacity = 0;
 
     uint8_t *content = (uint8_t *)token->content;
288057e9
     uint32_t length  = token->length;
     uint32_t offset  = 0;
     int rc           = CL_SUCCESS;
739e5052
 
d593717b
     UNUSEDPARAM(obj);
 
739e5052
     if (!(decoded = cli_calloc(BUFSIZ, sizeof(uint8_t)))) {
         cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n");
         return CL_EMEM;
     }
     capacity = BUFSIZ;
 
     while (offset < length) {
         uint8_t srclen = content[offset++];
         if (srclen < 128) {
             /* direct copy of (srclen + 1) bytes */
             if (offset + srclen + 1 > length) {
                 cli_dbgmsg("cli_pdf: required source length (%lu) exceeds remaining length (%lu)\n",
288057e9
                            (long unsigned)(offset + srclen + 1), (long unsigned)(length - offset));
739e5052
                 rc = CL_EFORMAT;
                 break;
             }
             if (declen + srclen + 1 > capacity) {
288057e9
                 if ((rc = cli_checklimits("pdf", pdf->ctx, capacity + BUFSIZ, 0, 0)) != CL_SUCCESS)
d593717b
                     break;
 
739e5052
                 if (!(temp = cli_realloc(decoded, capacity + BUFSIZ))) {
                     cli_errmsg("cli_pdf: cannot reallocate memory for decoded output\n");
                     rc = CL_EMEM;
                     break;
                 }
                 decoded = temp;
                 capacity += BUFSIZ;
             }
 
288057e9
             memcpy(decoded + declen, content + offset, srclen + 1);
739e5052
             offset += srclen + 1;
             declen += srclen + 1;
         } else if (srclen > 128) {
             /* copy the next byte (257 - srclen) times */
             if (offset + 1 > length) {
                 cli_dbgmsg("cli_pdf: required source length (%lu) exceeds remaining length (%lu)\n",
288057e9
                            (long unsigned)(offset + srclen + 1), (long unsigned)(length - offset));
739e5052
                 rc = CL_EFORMAT;
                 break;
             }
             if (declen + (257 - srclen) + 1 > capacity) {
288057e9
                 if ((rc = cli_checklimits("pdf", pdf->ctx, capacity + BUFSIZ, 0, 0)) != CL_SUCCESS)
d593717b
                     break;
 
739e5052
                 if (!(temp = cli_realloc(decoded, capacity + BUFSIZ))) {
                     cli_errmsg("cli_pdf: cannot reallocate memory for decoded output\n");
                     rc = CL_EMEM;
                     break;
                 }
                 decoded = temp;
                 capacity += BUFSIZ;
             }
 
288057e9
             memset(decoded + declen, content[offset], 257 - srclen);
739e5052
             offset++;
             declen += 257 - srclen;
         } else { /* srclen == 128 */
             /* end of data */
             cli_dbgmsg("cli_pdf: end-of-stream marker @ offset %lu (%lu bytes remaining)\n",
288057e9
                        (unsigned long)offset, (long unsigned)(token->length - offset));
739e5052
             break;
         }
     }
 
     if (rc == CL_SUCCESS) {
         free(token->content);
 
02c120e8
         cli_dbgmsg("cli_pdf: decoded %lu bytes from %lu total bytes\n",
739e5052
                    (unsigned long)declen, (unsigned long)(token->length));
 
         token->content = decoded;
288057e9
         token->length  = declen;
739e5052
     } else {
046d4cc9
         cli_dbgmsg("cli_pdf: error occurred parsing byte %lu of %lu\n",
739e5052
                    (unsigned long)offset, (unsigned long)(token->length));
         free(decoded);
     }
     return rc;
7ded9e29
 }
 
 static uint8_t *decode_nextlinestart(uint8_t *content, uint32_t length)
 {
     uint8_t *pt = content;
     uint32_t r;
     int toggle = 0;
 
     for (r = 0; r < length; r++, pt++) {
         if (*pt == '\n' || *pt == '\r')
             toggle = 1;
         else if (toggle)
             break;
     }
 
     return pt;
 }
 
1158b285
 static cl_error_t filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token)
7ded9e29
 {
     uint8_t *decoded, *temp;
     uint32_t declen = 0, capacity = 0;
 
     uint8_t *content = (uint8_t *)token->content;
288057e9
     uint32_t length  = token->length;
7ded9e29
     z_stream stream;
102cd430
     int zstat, rc = CL_SUCCESS;
7ded9e29
 
eaf52211
     UNUSEDPARAM(params);
 
7ded9e29
     if (*content == '\r') {
         content++;
         length--;
eaf52211
         pdfobj_flag(pdf, obj, BAD_STREAMSTART);
7ded9e29
         /* PDF spec says stream is followed by \r\n or \n, but not \r alone.
          * Sample 0015315109, it has \r followed by zlib header.
          * Flag pdf as suspicious, and attempt to extract by skipping the \r.
          */
         if (!length)
d593717b
             return CL_SUCCESS;
7ded9e29
     }
 
     if (!(decoded = (uint8_t *)cli_calloc(BUFSIZ, sizeof(uint8_t)))) {
         cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n");
         return CL_EMEM;
     }
     capacity = BUFSIZ;
 
     memset(&stream, 0, sizeof(stream));
288057e9
     stream.next_in   = (Bytef *)content;
     stream.avail_in  = length;
     stream.next_out  = (Bytef *)decoded;
7ded9e29
     stream.avail_out = BUFSIZ;
 
     zstat = inflateInit(&stream);
288057e9
     if (zstat != Z_OK) {
7ded9e29
         cli_warnmsg("cli_pdf: inflateInit failed\n");
         free(decoded);
         return CL_EMEM;
     }
 
     /* initial inflate */
     zstat = inflate(&stream, Z_NO_FLUSH);
     /* check if nothing written whatsoever */
     if ((zstat != Z_OK) && (stream.avail_out == BUFSIZ)) {
         /* skip till EOL, and try inflating from there, sometimes
          * PDFs contain extra whitespace */
         uint8_t *q = decode_nextlinestart(content, length);
         if (q) {
             (void)inflateEnd(&stream);
             length -= q - content;
             content = q;
 
288057e9
             stream.next_in   = (Bytef *)content;
             stream.avail_in  = length;
             stream.next_out  = (Bytef *)decoded;
7ded9e29
             stream.avail_out = capacity;
 
             zstat = inflateInit(&stream);
288057e9
             if (zstat != Z_OK) {
7ded9e29
                 cli_warnmsg("cli_pdf: inflateInit failed\n");
                 free(decoded);
                 return CL_EMEM;
             }
 
eaf52211
             pdfobj_flag(pdf, obj, BAD_FLATESTART);
7ded9e29
         }
 
         zstat = inflate(&stream, Z_NO_FLUSH);
     }
 
     while (zstat == Z_OK && stream.avail_in) {
d593717b
         /* extend output capacity if needed,*/
288057e9
         if (stream.avail_out == 0) {
             if ((rc = cli_checklimits("pdf", pdf->ctx, capacity + BUFSIZ, 0, 0)) != CL_SUCCESS)
d593717b
                 break;
 
             if (!(temp = cli_realloc(decoded, capacity + BUFSIZ))) {
                 cli_errmsg("cli_pdf: cannot reallocate memory for decoded output\n");
                 rc = CL_EMEM;
                 break;
7ded9e29
             }
288057e9
             decoded          = temp;
             stream.next_out  = decoded + capacity;
d593717b
             stream.avail_out = BUFSIZ;
             declen += BUFSIZ;
             capacity += BUFSIZ;
7ded9e29
         }
 
         /* continue inflation */
         zstat = inflate(&stream, Z_NO_FLUSH);
     }
 
     /* add stream end fragment to decoded length */
     declen += (BUFSIZ - stream.avail_out);
 
     /* error handling */
288057e9
     switch (zstat) {
         case Z_OK:
             cli_dbgmsg("cli_pdf: Z_OK on stream inflation completion\n");
             /* intentional fall-through */
         case Z_STREAM_END:
             cli_dbgmsg("cli_pdf: inflated %lu bytes from %lu total bytes (%lu bytes remaining)\n",
                        (unsigned long)declen, (unsigned long)(token->length), (unsigned long)(stream.avail_in));
             break;
7ded9e29
 
288057e9
         /* potentially fatal - *mostly* ignored as per older version */
         case Z_STREAM_ERROR:
         case Z_NEED_DICT:
         case Z_DATA_ERROR:
         case Z_MEM_ERROR:
         default:
             if (stream.msg)
                 cli_dbgmsg("cli_pdf: after writing %lu bytes, got error \"%s\" inflating PDF stream in %u %u obj\n",
                            (unsigned long)declen, stream.msg, obj->id >> 8, obj->id & 0xff);
             else
                 cli_dbgmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF stream in %u %u obj\n",
                            (unsigned long)declen, zstat, obj->id >> 8, obj->id & 0xff);
 
             if (declen == 0) {
                 pdfobj_flag(pdf, obj, BAD_FLATESTART);
                 cli_dbgmsg("cli_pdf: no bytes were inflated.\n");
 
                 rc = CL_EFORMAT;
             } else {
                 pdfobj_flag(pdf, obj, BAD_FLATE);
             }
             break;
7ded9e29
     }
 
     (void)inflateEnd(&stream);
 
     if (rc == CL_SUCCESS) {
         free(token->content);
 
         token->content = decoded;
288057e9
         token->length  = declen;
7ded9e29
     } else {
046d4cc9
         cli_dbgmsg("cli_pdf: error occurred parsing byte %lu of %lu\n",
288057e9
                    (unsigned long)(length - stream.avail_in), (unsigned long)(token->length));
7ded9e29
         free(decoded);
     }
 
     return rc;
 }
 
1158b285
 static cl_error_t filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token)
7ded9e29
 {
     uint8_t *decoded;
 
     const uint8_t *content = (uint8_t *)token->content;
288057e9
     uint32_t length        = token->length;
7ded9e29
     uint32_t i, j;
1158b285
     cl_error_t rc = CL_SUCCESS;
7ded9e29
 
288057e9
     if (!(decoded = (uint8_t *)cli_calloc(length / 2 + 1, sizeof(uint8_t)))) {
7ded9e29
         cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n");
         return CL_EMEM;
     }
 
288057e9
     for (i = 0, j = 0; i + 1 < length; i++) {
7ded9e29
         if (content[i] == ' ')
             continue;
 
         if (content[i] == '>')
             break;
 
288057e9
         if (cli_hex2str_to((const char *)content + i, (char *)decoded + j, 2) == -1) {
7ded9e29
             if (length - i < 4)
                 continue;
 
             rc = CL_EFORMAT;
             break;
         }
 
         i++;
         j++;
     }
 
     if (rc == CL_SUCCESS) {
         free(token->content);
 
         cli_dbgmsg("cli_pdf: deflated %lu bytes from %lu total bytes\n",
                    (unsigned long)j, (unsigned long)(token->length));
 
         token->content = decoded;
288057e9
         token->length  = j;
7ded9e29
     } else {
a042e6f0
         if (!(obj->flags & ((1 << OBJ_IMAGE) | (1 << OBJ_TRUNCATED))))
             pdfobj_flag(pdf, obj, BAD_ASCIIDECODE);
 
046d4cc9
         cli_dbgmsg("cli_pdf: error occurred parsing byte %lu of %lu\n",
7ded9e29
                    (unsigned long)i, (unsigned long)(token->length));
         free(decoded);
     }
     return rc;
 }
eaf52211
 
 /* modes: 0 = use default/DecodeParms, 1 = use document setting */
1158b285
 static cl_error_t filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int mode)
eaf52211
 {
     char *decrypted;
288057e9
     size_t length       = (size_t)token->length;
eaf52211
     enum enc_method enc = ENC_IDENTITY;
 
     if (mode)
         enc = get_enc_method(pdf, obj);
     else if (params) {
         struct pdf_dict_node *node = params->nodes;
 
         while (node) {
             if (node->type == PDF_DICT_STRING) {
                 if (!strncmp(node->key, "/Type", 6)) { /* optional field - Type */
                     /* MUST be "CryptFilterDecodeParms" */
e8a23886
                     if (node->value)
                         cli_dbgmsg("cli_pdf: Type: %s\n", (char *)(node->value));
eaf52211
                 } else if (!strncmp(node->key, "/Name", 6)) { /* optional field - Name */
                     /* overrides document and default encryption method */
e8a23886
                     if (node->value)
                         cli_dbgmsg("cli_pdf: Name: %s\n", (char *)(node->value));
1d0cdc67
                     enc = parse_enc_method(pdf->CF, pdf->CF_n, (char *)(node->value), enc);
eaf52211
                 }
             }
             node = node->next;
         }
     }
 
fdcf5109
     decrypted = decrypt_any(pdf, obj->id, (const char *)token->content, &length, enc);
eaf52211
     if (!decrypted) {
         cli_dbgmsg("cli_pdf: failed to decrypt stream\n");
d593717b
         return CL_EPARSE; /* TODO: what should this value be? CL_SUCCESS would mirror previous behavior */
eaf52211
     }
 
e09d8843
     cli_dbgmsg("cli_pdf: decrypted %zu bytes from %u total bytes\n",
                length, token->length);
eaf52211
 
     free(token->content);
     token->content = (uint8_t *)decrypted;
288057e9
     token->length  = (uint32_t)length; /* this may truncate unfortunately, TODO: use 64-bit values internally? */
eaf52211
     return CL_SUCCESS;
 }
e8a23886
 
1158b285
 static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token)
e8a23886
 {
     uint8_t *decoded, *temp;
     uint32_t declen = 0, capacity = 0;
 
     uint8_t *content = (uint8_t *)token->content;
288057e9
     uint32_t length  = token->length;
e8a23886
     lzw_stream stream;
102cd430
     int echg = 1, lzwstat, rc = CL_SUCCESS;
e8a23886
 
ce3cf4c6
     if (pdf->ctx && !(pdf->ctx->dconf->other & OTHER_CONF_LZW))
         return CL_BREAK;
 
e8a23886
     if (params) {
         struct pdf_dict_node *node = params->nodes;
 
         while (node) {
             if (node->type == PDF_DICT_STRING) {
                 if (!strncmp(node->key, "/EarlyChange", 13)) { /* optional field - lzw flag */
                     char *end, *value = (char *)node->value;
                     long set;
 
                     if (value) {
                         cli_dbgmsg("cli_pdf: EarlyChange: %s\n", value);
                         set = strtol(value, &end, 10);
                         if (end != value)
                             echg = (int)set;
                     }
                 }
             }
             node = node->next;
         }
     }
 
     if (*content == '\r') {
         content++;
         length--;
         pdfobj_flag(pdf, obj, BAD_STREAMSTART);
         /* PDF spec says stream is followed by \r\n or \n, but not \r alone.
          * Sample 0015315109, it has \r followed by zlib header.
          * Flag pdf as suspicious, and attempt to extract by skipping the \r.
          */
         if (!length)
             return CL_SUCCESS;
     }
 
     if (!(decoded = (uint8_t *)cli_calloc(BUFSIZ, sizeof(uint8_t)))) {
         cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n");
         return CL_EMEM;
     }
     capacity = BUFSIZ;
 
     memset(&stream, 0, sizeof(stream));
288057e9
     stream.next_in   = content;
     stream.avail_in  = length;
     stream.next_out  = decoded;
e8a23886
     stream.avail_out = BUFSIZ;
5c291512
     if (echg)
         stream.flags |= LZW_FLAG_EARLYCHG;
e8a23886
 
5c291512
     lzwstat = lzwInit(&stream);
288057e9
     if (lzwstat != Z_OK) {
e8a23886
         cli_warnmsg("cli_pdf: lzwInit failed\n");
         free(decoded);
         return CL_EMEM;
     }
 
     /* initial inflate */
     lzwstat = lzwInflate(&stream);
     /* check if nothing written whatsoever */
     if ((lzwstat != Z_OK) && (stream.avail_out == BUFSIZ)) {
         /* skip till EOL, and try inflating from there, sometimes
          * PDFs contain extra whitespace */
         uint8_t *q = decode_nextlinestart(content, length);
         if (q) {
             (void)lzwInflateEnd(&stream);
             length -= q - content;
             content = q;
 
288057e9
             stream.next_in   = (Bytef *)content;
             stream.avail_in  = length;
             stream.next_out  = (Bytef *)decoded;
e8a23886
             stream.avail_out = capacity;
 
5c291512
             lzwstat = lzwInit(&stream);
288057e9
             if (lzwstat != Z_OK) {
e8a23886
                 cli_warnmsg("cli_pdf: lzwInit failed\n");
                 free(decoded);
                 return CL_EMEM;
             }
 
             pdfobj_flag(pdf, obj, BAD_FLATESTART);
         }
 
         lzwstat = lzwInflate(&stream);
     }
 
     while (lzwstat == Z_OK && stream.avail_in) {
         /* extend output capacity if needed,*/
288057e9
         if (stream.avail_out == 0) {
             if ((rc = cli_checklimits("pdf", pdf->ctx, capacity + BUFSIZ, 0, 0)) != CL_SUCCESS)
e8a23886
                 break;
 
             if (!(temp = cli_realloc(decoded, capacity + BUFSIZ))) {
                 cli_errmsg("cli_pdf: cannot reallocate memory for decoded output\n");
                 rc = CL_EMEM;
                 break;
             }
288057e9
             decoded          = temp;
             stream.next_out  = decoded + capacity;
e8a23886
             stream.avail_out = BUFSIZ;
             declen += BUFSIZ;
             capacity += BUFSIZ;
         }
 
         /* continue inflation */
         lzwstat = lzwInflate(&stream);
     }
 
     /* add stream end fragment to decoded length */
     declen += (BUFSIZ - stream.avail_out);
 
     /* error handling */
288057e9
     switch (lzwstat) {
         case LZW_OK:
             cli_dbgmsg("cli_pdf: LZW_OK on stream inflation completion\n");
             /* intentional fall-through */
         case LZW_STREAM_END:
             cli_dbgmsg("cli_pdf: inflated %lu bytes from %lu total bytes (%lu bytes remaining)\n",
                        (unsigned long)declen, (unsigned long)(token->length), (unsigned long)(stream.avail_in));
             break;
e8a23886
 
288057e9
         /* potentially fatal - *mostly* ignored as per older version */
         case LZW_STREAM_ERROR:
         case LZW_DATA_ERROR:
         case LZW_MEM_ERROR:
         case LZW_BUF_ERROR:
         case LZW_DICT_ERROR:
         default:
             if (stream.msg)
                 cli_dbgmsg("cli_pdf: after writing %lu bytes, got error \"%s\" inflating PDF stream in %u %u obj\n",
                            (unsigned long)declen, stream.msg, obj->id >> 8, obj->id & 0xff);
             else
                 cli_dbgmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF stream in %u %u obj\n",
                            (unsigned long)declen, lzwstat, obj->id >> 8, obj->id & 0xff);
 
             if (declen == 0) {
                 pdfobj_flag(pdf, obj, BAD_FLATESTART);
                 cli_dbgmsg("cli_pdf: no bytes were inflated.\n");
 
                 rc = CL_EFORMAT;
             } else {
                 pdfobj_flag(pdf, obj, BAD_FLATE);
             }
             break;
e8a23886
     }
 
     (void)lzwInflateEnd(&stream);
 
     if (rc == CL_SUCCESS) {
         free(token->content);
 
         token->content = decoded;
288057e9
         token->length  = declen;
e8a23886
     } else {
046d4cc9
         cli_dbgmsg("cli_pdf: error occurred parsing byte %lu of %lu\n",
288057e9
                    (unsigned long)(length - stream.avail_in), (unsigned long)(token->length));
e8a23886
         free(decoded);
     }
 
567c73ec
     /*
        heuristic checks:
        - full dictionary heuristics?
        - invalid code points?
     */
5c291512
 
e8a23886
     return rc;
 }