libclamav/pdfng.c
12506782
 /*
8dc0817e
  *  Copyright (C) 2014, 2017-2018 Cisco and/or its affiliates. All rights reserved.
12506782
  *
  *  Author: Shawn Webb
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
  *
  *  In addition, as a special exception, the copyright holders give
  *  permission to link the code of portions of this program with the
  *  OpenSSL library under certain conditions as described in each
  *  individual source file, and distribute linked combinations
  *  including the two.
02840644
  *
12506782
  *  You must obey the GNU General Public License in all respects
  *  for all of the code used other than OpenSSL.  If you modify
  *  file(s) with this exception, you may extend this exception to your
  *  version of the file(s), but you are not obligated to do so.  If you
  *  do not wish to do so, delete this exception statement from your
  *  version.  If you delete this exception statement from all source
  *  files in the program, then also delete it here.
  */
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <ctype.h>
 #include <string.h>
 #include <fcntl.h>
 #include <stdlib.h>
 #include <errno.h>
288057e9
 #ifdef HAVE_LIMITS_H
12506782
 #include <limits.h>
 #endif
288057e9
 #ifdef HAVE_UNISTD_H
12506782
 #include <unistd.h>
 #endif
 #include <zlib.h>
 
 #if HAVE_ICONV
 #include <iconv.h>
 #endif
 
 #include "clamav.h"
 #include "others.h"
 #include "pdf.h"
 #include "scanners.h"
 #include "fmap.h"
 #include "str.h"
 #include "bytecode.h"
 #include "bytecode_api.h"
 #include "arc4.h"
 #include "rijndael.h"
 #include "textnorm.h"
 #include "json_api.h"
ddc14219
 #include "conv.h"
12506782
 
cd94be7a
 char *pdf_convert_utf(char *begin, size_t sz);
 
12506782
 char *pdf_convert_utf(char *begin, size_t sz)
 {
288057e9
     char *res = NULL;
0a185b82
     char *buf, *outbuf;
12506782
 #if HAVE_ICONV
0a185b82
     char *p1, *p2;
ddc14219
     size_t inlen, outlen, i;
12506782
     char *encodings[] = {
         "UTF-16",
288057e9
         NULL};
12506782
     iconv_t cd;
0a185b82
 #endif
12506782
 
288057e9
     buf = cli_calloc(1, sz + 1);
12506782
     if (!(buf))
         return NULL;
3a925de0
     memcpy(buf, begin, sz);
12506782
 
0a185b82
 #if HAVE_ICONV
12506782
     p1 = buf;
 
288057e9
     p2 = outbuf = cli_calloc(1, sz + 1);
12506782
     if (!(outbuf)) {
         free(buf);
         return NULL;
     }
 
288057e9
     for (i = 0; encodings[i] != NULL; i++) {
         p1    = buf;
         p2    = outbuf;
3a925de0
         inlen = outlen = sz;
12506782
 
         cd = iconv_open("UTF-8", encodings[i]);
         if (cd == (iconv_t)(-1)) {
c00baa37
             char errbuf[128];
288057e9
             cli_strerror(errno, errbuf, sizeof(errbuf));
c00baa37
             cli_errmsg("pdf_convert_utf: could not initialize iconv for encoding %s: %s\n", encodings[i], errbuf);
12506782
             continue;
         }
 
48ae4768
         iconv(cd, (char **)(&p1), &inlen, &p2, &outlen);
12506782
 
3a925de0
         if (outlen == sz) {
12506782
             /* Decoding unsuccessful right from the start */
             iconv_close(cd);
             continue;
         }
 
3a925de0
         outbuf[sz - outlen] = '\0';
12506782
 
         res = strdup(outbuf);
         iconv_close(cd);
         break;
     }
0a185b82
 #else
3a925de0
     outbuf = cli_utf16_to_utf8(buf, sz, UTF16_BOM);
ab9611d4
     if (!outbuf) {
         free(buf);
0a185b82
         return NULL;
ab9611d4
     }
12506782
 
0a185b82
     res = strdup(outbuf);
 #endif
12506782
     free(buf);
     free(outbuf);
e098bf4b
 
12506782
     return res;
 }
 
 int is_object_reference(char *begin, char **endchar, uint32_t *id)
 {
     char *end = *endchar;
288057e9
     char *p1  = begin, *p2;
12506782
     unsigned long n;
288057e9
     uint32_t t = 0;
12506782
 
     /*
      * Object references are always this format:
      * XXXX YYYY R
      * Where XXXX is the object ID and YYYY is the revision ID of the object.
      * The letter R signifies that this is a reference.
      *
      * In between each item can be an arbitrary amount of whitespace.
      */
 
     /* Skip whitespace */
     while (p1 < end && isspace(p1[0]))
         p1++;
 
     if (p1 == end)
         return 0;
 
4c597c6f
     if (!isdigit(p1[0]))
12506782
         return 0;
 
     /* Ensure strtoul() isn't going to go past our buffer */
288057e9
     p2 = p1 + 1;
12506782
     while (p2 < end && !isspace(p2[0]))
         p2++;
 
     if (p2 == end)
         return 0;
 
     n = strtoul(p1, &p2, 10);
     if (n == ULONG_MAX && errno)
         return 0;
 
288057e9
     t = n << 8;
12506782
 
     /* Skip more whitespace */
     p1 = p2;
     while (p1 < end && isspace(p1[0]))
         p1++;
 
     if (p1 == end)
         return 0;
 
4c597c6f
     if (!isdigit(p1[0]))
12506782
         return 0;
 
     /* Ensure strtoul() is going to go past our buffer */
288057e9
     p2 = p1 + 1;
12506782
     while (p2 < end && !isspace(p2[0]))
         p2++;
 
     if (p2 == end)
         return 0;
 
     n = strtoul(p1, &p2, 10);
     if (n == ULONG_MAX && errno)
         return 0;
 
288057e9
     t |= (n & 0xff);
12506782
 
c7fd0220
     /* Skip even more whitespace */
12506782
     p1 = p2;
     while (p1 < end && isspace(p1[0]))
         p1++;
 
     if (p1 == end)
         return 0;
 
     if (p1[0] == 'R') {
288057e9
         *endchar = p1 + 1;
12506782
         if (id)
             *id = t;
 
         return 1;
     }
 
     return 0;
 }
 
e09d8843
 static char *pdf_decrypt_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t *length)
e2b1880f
 {
     enum enc_method enc;
 
     /* handled only once in cli_pdf() */
     //pdf_handle_enc(pdf);
     if (pdf->flags & (1 << DECRYPTABLE_PDF)) {
         enc = get_enc_method(pdf, obj);
         return decrypt_any(pdf, obj->id, in, length, enc);
     }
     return NULL;
 }
 
9d33052f
 char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len)
e2b1880f
 {
     char *wrkstr, *output = NULL;
288057e9
     size_t wrklen          = len, outlen, i;
e09d8843
     unsigned int likelyutf = 0;
e2b1880f
 
9d33052f
     if (!in)
         return NULL;
 
e2b1880f
     /* get a working copy */
288057e9
     wrkstr = cli_calloc(len + 1, sizeof(char));
e2b1880f
     if (!wrkstr)
         return NULL;
     memcpy(wrkstr, in, len);
 
00daf527
     //cli_errmsg("pdf_final: start(%d):   %s\n", wrklen, wrkstr);
e2b1880f
 
     /* convert PDF specific escape sequences, like octal sequences */
     /* TODO: replace the escape sequences directly in the wrkstr   */
     if (strchr(wrkstr, '\\')) {
288057e9
         output = cli_calloc(wrklen + 1, sizeof(char));
f4cbcd53
         if (!output) {
             free(wrkstr);
e2b1880f
             return NULL;
f4cbcd53
         }
e2b1880f
 
         outlen = 0;
         for (i = 0; i < wrklen; ++i) {
288057e9
             if ((i + 1 < wrklen) && wrkstr[i] == '\\') {
                 if ((i + 3 < wrklen) &&
                     (isdigit(wrkstr[i + 1]) && isdigit(wrkstr[i + 2]) && isdigit(wrkstr[i + 3]))) {
e2b1880f
                     /* octal sequence */
                     char octal[4], *check;
                     unsigned long value;
 
288057e9
                     memcpy(octal, &wrkstr[i + 1], 3);
e2b1880f
                     octal[3] = '\0';
 
                     value = (char)strtoul(octal, &check, 8);
                     /* check if all characters were converted */
                     if (check == &octal[3])
                         output[outlen++] = value;
                     i += 3; /* 4 with for loop [\ddd] */
                 } else {
                     /* other sequences */
288057e9
                     switch (wrkstr[i + 1]) {
                         case 'n':
                             output[outlen++] = 0x0a;
                             break;
                         case 'r':
                             output[outlen++] = 0x0d;
                             break;
                         case 't':
                             output[outlen++] = 0x09;
                             break;
                         case 'b':
                             output[outlen++] = 0x08;
                             break;
                         case 'f':
                             output[outlen++] = 0x0c;
                             break;
                         case '(':
                             output[outlen++] = 0x28;
                             break;
                         case ')':
                             output[outlen++] = 0x29;
                             break;
                         case '\\':
                             output[outlen++] = 0x5c;
                             break;
                         default:
                             /* IGNORE THE REVERSE SOLIDUS - PDF3000-2008 */
                             break;
e2b1880f
                     }
                     i += 1; /* 2 with for loop [\c] */
                 }
             } else {
                 output[outlen++] = wrkstr[i];
             }
         }
 
         free(wrkstr);
288057e9
         wrkstr = cli_calloc(outlen + 1, sizeof(char));
1f294919
         if (!wrkstr) {
             free(output);
             return NULL;
         }
         memcpy(wrkstr, output, outlen);
e2b1880f
         free(output);
         wrklen = outlen;
     }
 
00daf527
     //cli_errmsg("pdf_final: escaped(%d): %s\n", wrklen, wrkstr);
e2b1880f
 
     /* check for encryption and decrypt */
288057e9
     if (pdf->flags & (1 << ENCRYPTED_PDF)) {
e09d8843
         size_t tmpsz = wrklen;
288057e9
         output       = pdf_decrypt_string(pdf, obj, wrkstr, &tmpsz);
         outlen       = tmpsz;
e2b1880f
         free(wrkstr);
         if (output) {
288057e9
             wrkstr = cli_calloc(outlen + 1, sizeof(char));
d7effb63
             if (!wrkstr) {
                 free(output);
                 return NULL;
             }
             memcpy(wrkstr, output, outlen);
7bbb67ea
             free(output);
e2b1880f
             wrklen = outlen;
         } else {
             return NULL;
         }
     }
 
00daf527
     //cli_errmsg("pdf_final: decrypt(%d): %s\n", wrklen, wrkstr);
e2b1880f
 
     /* check for UTF-* and convert to UTF-8 */
     for (i = 0; i < wrklen; ++i) {
         if (((unsigned char)wrkstr[i] > (unsigned char)0x7f) || (wrkstr[i] == '\0')) {
             likelyutf = 1;
             break;
         }
     }
 
     if (likelyutf) {
         output = pdf_convert_utf(wrkstr, wrklen);
         free(wrkstr);
         wrkstr = output;
     }
 
00daf527
     //cli_errmsg("pdf_final: postutf(%d): %s\n", wrklen, wrkstr);
e2b1880f
 
     return wrkstr;
 }
 
9d33052f
 char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *meta)
12506782
 {
1158b285
     const char *q = objstart;
12506782
     char *p1, *p2;
cd94be7a
     size_t len, checklen;
e2b1880f
     char *res = NULL;
12506782
     uint32_t objid;
 
1158b285
     if (obj->objstm) {
         if (objsize > (size_t)(obj->objstm->streambuf_len - (objstart - obj->objstm->streambuf))) {
             /* Possible attempt to exploit bb11980 */
             cli_dbgmsg("Malformed PDF: Alleged size of obj in object stream in PDF would extend further than the object stream data.\n");
             return NULL;
         }
288057e9
     } else {
1158b285
         if (objsize > (size_t)(pdf->size - (objstart - pdf->map))) {
             /* Possible attempt to exploit bb11980 */
             cli_dbgmsg("Malformed PDF: Alleged size of obj in PDF would extend further than the PDF data.\n");
             return NULL;
         }
36903968
     }
 
12506782
     /*
      * Yes, all of this is required to find the start and end of a potentially UTF-* string
      *
      * First, find the key of the key/value pair we're looking for in this object.
      * Second, determine whether the value points to another object (NOTE: this is sketchy behavior)
      * Third, attempt to determine if we're ASCII or UTF-*
      * If we're ASCII, just copy the ASCII string into a new heap-allocated string and return that
      * Fourth, Attempt to decode from UTF-* to UTF-8
      */
 
     if (str) {
         checklen = strlen(str);
 
         if (objsize < strlen(str) + 3)
             return NULL;
 
288057e9
         for (p1 = (char *)q; (size_t)(p1 - q) < objsize - checklen; p1++)
12506782
             if (!strncmp(p1, str, checklen))
                 break;
 
cd94be7a
         if ((size_t)(p1 - q) == objsize - checklen)
12506782
             return NULL;
 
         p1 += checklen;
     } else {
cd94be7a
         p1 = (char *)q;
12506782
     }
 
cd94be7a
     while ((size_t)(p1 - q) < objsize && isspace(p1[0]))
12506782
         p1++;
 
cd94be7a
     if ((size_t)(p1 - q) == objsize)
12506782
         return NULL;
 
c7fd0220
     /*
      * If str is non-null:
      *     We should be at the start of the string, minus 1
      * Else:
      *     We should be at the start of the string
      */
12506782
 
cd94be7a
     p2 = (char *)(q + objsize);
12506782
     if (is_object_reference(p1, &p2, &objid)) {
         struct pdf_obj *newobj;
571d8349
         char *begin, *p3;
12506782
         STATBUF sb;
         uint32_t objflags;
         int fd;
571d8349
         size_t objsize2;
12506782
 
         newobj = find_obj(pdf, obj, objid);
         if (!(newobj))
             return NULL;
 
         if (newobj == obj)
             return NULL;
 
02840644
         /*
12506782
          * If pdf_handlename hasn't been called for this object,
          * then parse the object prior to extracting it
          */
         if (!(newobj->statsflags & OBJ_FLAG_PDFNAME_DONE))
             pdf_parseobj(pdf, newobj);
 
         /* Extract the object. Force pdf_extract_obj() to dump this object. */
         objflags = newobj->flags;
         newobj->flags |= (1 << OBJ_FORCEDUMP);
 
         if (pdf_extract_obj(pdf, newobj, PDF_EXTRACT_OBJ_NONE) != CL_SUCCESS)
             return NULL;
 
         newobj->flags = objflags;
 
         if (!(newobj->path))
             return NULL;
 
         fd = open(newobj->path, O_RDONLY);
         if (fd == -1) {
             cli_unlink(newobj->path);
             free(newobj->path);
             newobj->path = NULL;
             return NULL;
         }
 
         if (FSTAT(fd, &sb)) {
             close(fd);
             cli_unlink(newobj->path);
             free(newobj->path);
             newobj->path = NULL;
             return NULL;
         }
 
         if (sb.st_size) {
288057e9
             begin = calloc(1, sb.st_size + 1);
12506782
             if (!(begin)) {
                 close(fd);
                 cli_unlink(newobj->path);
                 free(newobj->path);
                 newobj->path = NULL;
                 return NULL;
             }
 
             if (read(fd, begin, sb.st_size) != sb.st_size) {
                 close(fd);
                 cli_unlink(newobj->path);
                 free(newobj->path);
                 newobj->path = NULL;
                 free(begin);
                 return NULL;
             }
 
288057e9
             p3       = begin;
571d8349
             objsize2 = sb.st_size;
             while ((size_t)(p3 - begin) < objsize2 && isspace(p3[0])) {
                 p3++;
                 objsize2--;
             }
 
             switch (*p3) {
12506782
                 case '(':
                 case '<':
9d33052f
                     res = pdf_parse_string(pdf, obj, p3, objsize2, NULL, NULL, meta);
12506782
                     break;
                 default:
e2b1880f
                     res = pdf_finalize_string(pdf, obj, begin, objsize2);
                     if (!res) {
288057e9
                         res = cli_calloc(1, objsize2 + 1);
76c9c9dd
                         if (!(res)) {
                             close(fd);
                             cli_unlink(newobj->path);
                             free(newobj->path);
                             newobj->path = NULL;
                             free(begin);
9d33052f
                             return NULL;
76c9c9dd
                         }
9d33052f
                         memcpy(res, begin, objsize2);
                         res[objsize2] = '\0';
 
                         if (meta) {
288057e9
                             meta->length  = objsize2;
                             meta->obj     = obj;
9d33052f
                             meta->success = 0;
                         }
                     } else if (meta) {
288057e9
                         meta->length  = strlen(res);
                         meta->obj     = obj;
9d33052f
                         meta->success = 1;
                     }
12506782
             }
24db616f
             free(begin);
12506782
         }
 
         close(fd);
         cli_unlink(newobj->path);
         free(newobj->path);
         newobj->path = NULL;
 
         if (endchar)
             *endchar = p2;
 
         return res;
     }
 
     if (*p1 == '<') {
         /* Hex string */
 
288057e9
         p2 = p1 + 1;
1158b285
         while ((size_t)(p2 - objstart) < objsize && *p2 != '>')
12506782
             p2++;
 
1158b285
         if ((size_t)(p2 - objstart) == objsize) {
12506782
             return NULL;
         }
 
218e1626
         res = pdf_finalize_string(pdf, obj, p1, (p2 - p1) + 1);
         if (!res) {
             res = cli_calloc(1, (p2 - p1) + 2);
             if (!(res))
                 return NULL;
             memcpy(res, p1, (p2 - p1) + 1);
             res[(p2 - p1) + 1] = '\0';
 
             if (meta) {
288057e9
                 meta->length  = (p2 - p1) + 1;
                 meta->obj     = obj;
218e1626
                 meta->success = 0;
             }
         } else if (meta) {
288057e9
             meta->length  = strlen(res);
             meta->obj     = obj;
218e1626
             meta->success = 1;
         }
 
         if (res && endchar)
12506782
             *endchar = p2;
 
         return res;
     }
 
     /* We should be at the start of a string literal (...) here */
     if (*p1 != '(')
         return NULL;
 
     /* Make a best effort to find the end of the string and determine if UTF-* */
     p2 = ++p1;
700ed96a
 
36903968
     while (p2 < objstart + objsize) {
288057e9
         int shouldbreak = 0;
12506782
 
148f3243
         switch (*p2) {
             case '\\':
12506782
                 p2++;
148f3243
                 break;
             case ')':
288057e9
                 shouldbreak = 1;
148f3243
                 break;
12506782
         }
 
         if (shouldbreak) {
             p2--;
             break;
         }
148f3243
 
         p2++;
12506782
     }
 
36903968
     if (p2 >= objstart + objsize)
12506782
         return NULL;
 
     len = (size_t)(p2 - p1) + 1;
 
e2b1880f
     res = pdf_finalize_string(pdf, obj, p1, len);
     if (!res) {
288057e9
         res = cli_calloc(1, len + 1);
9d33052f
         if (!(res))
             return NULL;
         memcpy(res, p1, len);
         res[len] = '\0';
 
         if (meta) {
288057e9
             meta->length  = len;
             meta->obj     = obj;
9d33052f
             meta->success = 0;
         }
     } else if (meta) {
288057e9
         meta->length  = strlen(res);
         meta->obj     = obj;
9d33052f
         meta->success = 1;
12506782
     }
 
     if (res && endchar)
         *endchar = p2;
 
     return res;
 }
 
1158b285
 struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar)
12506782
 {
288057e9
     struct pdf_dict *res       = NULL;
     struct pdf_dict_node *node = NULL;
12506782
     const char *objstart;
     char *end;
288057e9
     unsigned int in_string = 0, ninner = 0;
12506782
 
     /* Sanity checking */
     if (!(pdf) || !(obj) || !(begin))
         return NULL;
 
1158b285
     objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
                              : (const char *)(obj->start + pdf->map);
12506782
 
1158b285
     if (begin < objstart || (size_t)(begin - objstart) >= objsize - 2)
12506782
         return NULL;
 
     if (begin[0] != '<' || begin[1] != '<')
         return NULL;
 
     /* Find the end of the dictionary */
     end = begin;
1158b285
     while ((size_t)(end - objstart) < objsize) {
288057e9
         int increment = 1;
12506782
         if (in_string) {
02a22582
             if (*end == '\\') {
                 end += 2;
                 continue;
             }
 
12506782
             if (*end == ')')
                 in_string = 0;
 
             end++;
             continue;
         }
 
         switch (*end) {
             case '(':
288057e9
                 in_string = 1;
12506782
                 break;
             case '<':
1158b285
                 if ((size_t)(end - objstart) <= objsize - 2 && end[1] == '<')
12506782
                     ninner++;
288057e9
                 increment = 2;
12506782
                 break;
             case '>':
1158b285
                 if ((size_t)(end - objstart) <= objsize - 2 && end[1] == '>')
12506782
                     ninner--;
288057e9
                 increment = 2;
12506782
                 break;
         }
 
1158b285
         if ((size_t)(end - objstart) <= objsize - 2)
12506782
             if (end[0] == '>' && end[1] == '>' && ninner == 0)
                 break;
 
02a22582
         end += increment;
12506782
     }
 
     /* More sanity checking */
1158b285
     if ((size_t)(end - objstart) >= objsize - 2)
12506782
         return NULL;
 
     if (end[0] != '>' || end[1] != '>')
         return NULL;
 
     res = cli_calloc(1, sizeof(struct pdf_dict));
     if (!(res))
         return NULL;
 
     /* Loop through each element of the dictionary */
     begin += 2;
     while (begin < end) {
288057e9
         char *val = NULL, *key = NULL, *p1, *p2;
         struct pdf_dict *dict = NULL;
         struct pdf_array *arr = NULL;
         unsigned int nhex     = 0, i;
12506782
 
         /* Skip any whitespaces */
         while (begin < end && isspace(begin[0]))
             begin++;
 
         if (begin == end)
             break;
 
         /* Get the key */
288057e9
         p1 = begin + 1;
02a22582
         while (p1 < end && !isspace(p1[0])) {
288057e9
             int breakout = 0;
02a22582
 
             switch (*p1) {
                 case '<':
                 case '[':
                 case '(':
b1351d6b
                 case '/':
                 case '\r':
                 case '\n':
                 case ' ':
                 case '\t':
288057e9
                     breakout = 1;
02a22582
                     break;
b1351d6b
                 case '#':
                     /* Key name obfuscated with hex characters */
                     nhex++;
288057e9
                     if (p1 > end - 3) {
b1351d6b
                         return res;
                     }
 
                     break;
02a22582
             }
288057e9
 
02a22582
             if (breakout)
                 break;
 
12506782
             p1++;
02a22582
         }
12506782
 
         if (p1 == end)
             break;
 
         key = cli_calloc((p1 - begin) + 2, 1);
         if (!(key))
             break;
 
b1351d6b
         if (nhex == 0) {
             /* Key isn't obfuscated with hex. Just copy the string */
             strncpy(key, begin, p1 - begin);
             key[p1 - begin] = '\0';
         } else {
288057e9
             for (i = 0, p2 = begin; p2 < p1; p2++, i++) {
b1351d6b
                 if (*p2 == '#') {
288057e9
                     cli_hex2str_to(p2 + 1, key + i, 2);
b1351d6b
                     p2 += 2;
                 } else {
                     key[i] = *p2;
                 }
             }
         }
12506782
 
         /* Now for the value */
         begin = p1;
 
         /* Skip any whitespaces */
         while (begin < end && isspace(begin[0]))
             begin++;
 
         if (begin == end) {
             free(key);
             break;
         }
 
         switch (begin[0]) {
             case '(':
288057e9
                 val   = pdf_parse_string(pdf, obj, begin, end - objstart, NULL, &p1, NULL);
                 begin = p1 + 2;
12506782
                 break;
             case '[':
288057e9
                 arr   = pdf_parse_array(pdf, obj, end - objstart, begin, &p1);
                 begin = p1 + 1;
12506782
                 break;
             case '<':
1158b285
                 if ((size_t)(begin - objstart) < objsize - 2) {
12506782
                     if (begin[1] == '<') {
288057e9
                         dict  = pdf_parse_dict(pdf, obj, end - objstart, begin, &p1);
                         begin = p1 + 2;
12506782
                         break;
                     }
                 }
 
288057e9
                 val   = pdf_parse_string(pdf, obj, begin, end - objstart, NULL, &p1, NULL);
                 begin = p1 + 2;
12506782
                 break;
             default:
288057e9
                 p1 = (begin[0] == '/') ? begin + 1 : begin;
12506782
                 while (p1 < end) {
                     int shouldbreak = 0;
                     switch (p1[0]) {
                         case '>':
                         case '/':
288057e9
                             shouldbreak = 1;
12506782
                             break;
                     }
 
                     if (shouldbreak)
                         break;
 
                     p1++;
                 }
 
                 is_object_reference(begin, &p1, NULL);
 
                 val = cli_calloc((p1 - begin) + 2, 1);
                 if (!(val))
                     break;
 
                 strncpy(val, begin, p1 - begin);
                 val[p1 - begin] = '\0';
 
                 if (p1[0] != '/')
288057e9
                     begin = p1 + 1;
12506782
                 else
                     begin = p1;
 
                 break;
         }
 
         if (!(val) && !(dict) && !(arr)) {
             free(key);
             break;
         }
 
         if (!(res->nodes)) {
             res->nodes = res->tail = node = cli_calloc(1, sizeof(struct pdf_dict_node));
             if (!(node)) {
                 free(key);
438eb313
                 if (dict)
                     pdf_free_dict(dict);
                 if (val)
                     free(val);
                 if (arr)
                     pdf_free_array(arr);
12506782
                 break;
             }
         } else {
             node = calloc(1, sizeof(struct pdf_dict_node));
             if (!(node)) {
                 free(key);
438eb313
                 if (dict)
                     pdf_free_dict(dict);
                 if (val)
                     free(val);
                 if (arr)
                     pdf_free_array(arr);
12506782
                 break;
             }
 
             node->prev = res->tail;
             if (res->tail)
                 res->tail->next = node;
             res->tail = node;
         }
 
         node->key = key;
         if ((val)) {
288057e9
             node->value   = val;
12506782
             node->valuesz = strlen(val);
288057e9
             node->type    = PDF_DICT_STRING;
12506782
         } else if ((arr)) {
288057e9
             node->value   = arr;
12506782
             node->valuesz = sizeof(struct pdf_array);
288057e9
             node->type    = PDF_DICT_ARRAY;
12506782
         } else if ((dict)) {
288057e9
             node->value   = dict;
12506782
             node->valuesz = sizeof(struct pdf_dict);
288057e9
             node->type    = PDF_DICT_DICT;
12506782
         }
     }
 
     if (endchar)
         *endchar = end;
 
     return res;
 }
 
1158b285
 struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar)
12506782
 {
288057e9
     struct pdf_array *res       = NULL;
     struct pdf_array_node *node = NULL;
438eb313
     const char *objstart;
cd94be7a
     char *end;
288057e9
     int in_string = 0, ninner = 0;
12506782
 
     /* Sanity checking */
     if (!(pdf) || !(obj) || !(begin))
         return NULL;
 
1158b285
     objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
                              : (const char *)(obj->start + pdf->map);
438eb313
 
1158b285
     if (begin < objstart || (size_t)(begin - objstart) >= objsize)
12506782
         return NULL;
 
     if (begin[0] != '[')
         return NULL;
 
     /* Find the end of the array */
     end = begin;
1158b285
     while ((size_t)(end - objstart) < objsize) {
12506782
         if (in_string) {
02a22582
             if (*end == '\\') {
                 end += 2;
                 continue;
             }
 
12506782
             if (*end == ')')
                 in_string = 0;
 
             end++;
             continue;
         }
 
         switch (*end) {
             case '(':
288057e9
                 in_string = 1;
12506782
                 break;
             case '[':
                 ninner++;
                 break;
             case ']':
                 ninner--;
                 break;
         }
 
         if (*end == ']' && ninner == 0)
             break;
 
         end++;
     }
 
     /* More sanity checking */
1158b285
     if ((size_t)(end - objstart) >= objsize)
12506782
         return NULL;
 
     if (*end != ']')
         return NULL;
 
     res = cli_calloc(1, sizeof(struct pdf_array));
     if (!(res))
         return NULL;
 
     begin++;
     while (begin < end) {
288057e9
         char *val             = NULL, *p1;
         struct pdf_array *arr = NULL;
         struct pdf_dict *dict = NULL;
12506782
 
         while (begin < end && isspace(begin[0]))
             begin++;
 
         if (begin == end)
             break;
 
         switch (begin[0]) {
             case '<':
1158b285
                 if ((size_t)(begin - objstart) < objsize - 2 && begin[1] == '<') {
5090e3b8
                     dict = pdf_parse_dict(pdf, obj, end - objstart, begin, &begin);
288057e9
                     begin += 2;
12506782
                     break;
                 }
 
22cb38ed
                 /* Not a dictionary. Intentionally fall through. */
12506782
             case '(':
5090e3b8
                 val = pdf_parse_string(pdf, obj, begin, end - objstart, NULL, &begin, NULL);
02a22582
                 begin += 2;
12506782
                 break;
             case '[':
                 /* XXX We should have a recursion counter here */
5090e3b8
                 arr = pdf_parse_array(pdf, obj, end - objstart, begin, &begin);
288057e9
                 begin += 1;
12506782
                 break;
             default:
                 p1 = end;
                 if (!is_object_reference(begin, &p1, NULL)) {
288057e9
                     p1 = begin + 1;
12506782
                     while (p1 < end && !isspace(p1[0]))
                         p1++;
                 }
 
                 val = cli_calloc((p1 - begin) + 2, 1);
                 if (!(val))
                     break;
 
                 strncpy(val, begin, p1 - begin);
                 val[p1 - begin] = '\0';
 
                 begin = p1;
                 break;
         }
 
         /* Parse error, just return what we could */
         if (!(val) && !(arr) && !(dict))
             break;
 
         if (!(node)) {
             res->nodes = res->tail = node = calloc(1, sizeof(struct pdf_array_node));
438eb313
             if (!(node)) {
                 if (dict)
                     pdf_free_dict(dict);
                 if (val)
                     free(val);
                 if (arr)
                     pdf_free_array(arr);
 
12506782
                 break;
438eb313
             }
12506782
         } else {
             node = calloc(1, sizeof(struct pdf_array_node));
438eb313
             if (!(node)) {
                 if (dict)
                     pdf_free_dict(dict);
                 if (val)
                     free(val);
                 if (arr)
                     pdf_free_array(arr);
 
12506782
                 break;
438eb313
             }
12506782
 
             node->prev = res->tail;
             if (res->tail)
                 res->tail->next = node;
             res->tail = node;
         }
 
         if (val != NULL) {
288057e9
             node->type   = PDF_ARR_STRING;
             node->data   = val;
12506782
             node->datasz = strlen(val);
         } else if (dict != NULL) {
288057e9
             node->type   = PDF_ARR_DICT;
             node->data   = dict;
12506782
             node->datasz = sizeof(struct pdf_dict);
         } else {
288057e9
             node->type   = PDF_ARR_ARRAY;
             node->data   = arr;
12506782
             node->datasz = sizeof(struct pdf_array);
         }
     }
 
     if (endchar)
         *endchar = end;
 
     return res;
 }
 
 void pdf_free_dict(struct pdf_dict *dict)
 {
     struct pdf_dict_node *node, *next;
 
     node = dict->nodes;
     while (node != NULL) {
         free(node->key);
 
         if (node->type == PDF_DICT_STRING)
             free(node->value);
         else if (node->type == PDF_DICT_ARRAY)
             pdf_free_array((struct pdf_array *)(node->value));
         else if (node->type == PDF_DICT_DICT)
             pdf_free_dict((struct pdf_dict *)(node->value));
 
         next = node->next;
         free(node);
         node = next;
     }
 
     free(dict);
 }
 
 void pdf_free_array(struct pdf_array *array)
 {
     struct pdf_array_node *node, *next;
 
     if (!(array))
         return;
 
     node = array->nodes;
     while (node != NULL) {
         if (node->type == PDF_ARR_ARRAY)
             pdf_free_array((struct pdf_array *)(node->data));
         else if (node->type == PDF_ARR_DICT)
             pdf_free_dict((struct pdf_dict *)(node->data));
         else
             free(node->data);
 
         next = node->next;
         free(node);
         node = next;
     }
 
     free(array);
 }
 
 void pdf_print_array(struct pdf_array *array, unsigned long depth)
 {
     struct pdf_array_node *node;
     unsigned long i;
 
288057e9
     for (i = 0, node = array->nodes; node != NULL; node = node->next, i++) {
12506782
         if (node->type == PDF_ARR_STRING)
             cli_errmsg("array[%lu][%lu]: %s\n", depth, i, (char *)(node->data));
         else
288057e9
             pdf_print_array((struct pdf_array *)(node->data), depth + 1);
12506782
     }
 }
 
 void pdf_print_dict(struct pdf_dict *dict, unsigned long depth)
 {
     struct pdf_dict_node *node;
 
     for (node = dict->nodes; node != NULL; node = node->next) {
         if (node->type == PDF_DICT_STRING) {
             cli_errmsg("dict[%lu][%s]: %s\n", depth, node->key, (char *)(node->value));
         } else if (node->type == PDF_DICT_ARRAY) {
             cli_errmsg("dict[%lu][%s]: Array =>\n", depth, node->key);
             pdf_print_array((struct pdf_array *)(node->value), depth);
         } else if (node->type == PDF_DICT_DICT) {
288057e9
             pdf_print_dict((struct pdf_dict *)(node->value), depth + 1);
12506782
         }
     }
 }