libclamav/pdfng.c
12506782
 /*
8dc0817e
  *  Copyright (C) 2014, 2017-2018 Cisco and/or its affiliates. All rights reserved.
12506782
  *
  *  Author: Shawn Webb
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
  *
  *  In addition, as a special exception, the copyright holders give
  *  permission to link the code of portions of this program with the
  *  OpenSSL library under certain conditions as described in each
  *  individual source file, and distribute linked combinations
  *  including the two.
  *  
  *  You must obey the GNU General Public License in all respects
  *  for all of the code used other than OpenSSL.  If you modify
  *  file(s) with this exception, you may extend this exception to your
  *  version of the file(s), but you are not obligated to do so.  If you
  *  do not wish to do so, delete this exception statement from your
  *  version.  If you delete this exception statement from all source
  *  files in the program, then also delete it here.
  */
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <ctype.h>
 #include <string.h>
 #include <fcntl.h>
 #include <stdlib.h>
 #include <errno.h>
 #ifdef	HAVE_LIMITS_H
 #include <limits.h>
 #endif
 #ifdef	HAVE_UNISTD_H
 #include <unistd.h>
 #endif
 #include <zlib.h>
 
 #if HAVE_ICONV
 #include <iconv.h>
 #endif
 
 #include "clamav.h"
 #include "others.h"
 #include "pdf.h"
 #include "scanners.h"
 #include "fmap.h"
 #include "str.h"
 #include "bytecode.h"
 #include "bytecode_api.h"
 #include "arc4.h"
 #include "rijndael.h"
 #include "textnorm.h"
 #include "json_api.h"
ddc14219
 #include "conv.h"
12506782
 
cd94be7a
 char *pdf_convert_utf(char *begin, size_t sz);
 
12506782
 char *pdf_convert_utf(char *begin, size_t sz)
 {
     char *res=NULL;
0a185b82
     char *buf, *outbuf;
12506782
 #if HAVE_ICONV
0a185b82
     char *p1, *p2;
ddc14219
     size_t inlen, outlen, i;
12506782
     char *encodings[] = {
         "UTF-16",
         NULL
     };
     iconv_t cd;
0a185b82
 #endif
12506782
 
3a925de0
     buf = cli_calloc(1, sz+1);
12506782
     if (!(buf))
         return NULL;
3a925de0
     memcpy(buf, begin, sz);
12506782
 
0a185b82
 #if HAVE_ICONV
12506782
     p1 = buf;
 
3a925de0
     p2 = outbuf = cli_calloc(1, sz+1);
12506782
     if (!(outbuf)) {
         free(buf);
         return NULL;
     }
 
     for (i=0; encodings[i] != NULL; i++) {
         p1 = buf;
         p2 = outbuf;
3a925de0
         inlen = outlen = sz;
12506782
 
         cd = iconv_open("UTF-8", encodings[i]);
         if (cd == (iconv_t)(-1)) {
c00baa37
             char errbuf[128];
             cli_strerror(errno, errbuf, sizeof(errbuf)); 
             cli_errmsg("pdf_convert_utf: could not initialize iconv for encoding %s: %s\n", encodings[i], errbuf);
12506782
             continue;
         }
 
48ae4768
         iconv(cd, (char **)(&p1), &inlen, &p2, &outlen);
12506782
 
3a925de0
         if (outlen == sz) {
12506782
             /* Decoding unsuccessful right from the start */
             iconv_close(cd);
             continue;
         }
 
3a925de0
         outbuf[sz - outlen] = '\0';
12506782
 
         res = strdup(outbuf);
         iconv_close(cd);
         break;
     }
0a185b82
 #else
3a925de0
     outbuf = cli_utf16_to_utf8(buf, sz, UTF16_BOM);
ab9611d4
     if (!outbuf) {
         free(buf);
0a185b82
         return NULL;
ab9611d4
     }
12506782
 
0a185b82
     res = strdup(outbuf);
 #endif
12506782
     free(buf);
     free(outbuf);
e098bf4b
 
12506782
     return res;
 }
 
 int is_object_reference(char *begin, char **endchar, uint32_t *id)
 {
     char *end = *endchar;
     char *p1=begin, *p2;
     unsigned long n;
     uint32_t t=0;
 
     /*
      * Object references are always this format:
      * XXXX YYYY R
      * Where XXXX is the object ID and YYYY is the revision ID of the object.
      * The letter R signifies that this is a reference.
      *
      * In between each item can be an arbitrary amount of whitespace.
      */
 
     /* Skip whitespace */
     while (p1 < end && isspace(p1[0]))
         p1++;
 
     if (p1 == end)
         return 0;
 
4c597c6f
     if (!isdigit(p1[0]))
12506782
         return 0;
 
     /* Ensure strtoul() isn't going to go past our buffer */
     p2 = p1+1;
     while (p2 < end && !isspace(p2[0]))
         p2++;
 
     if (p2 == end)
         return 0;
 
     n = strtoul(p1, &p2, 10);
     if (n == ULONG_MAX && errno)
         return 0;
 
     t = n<<8;
 
     /* Skip more whitespace */
     p1 = p2;
     while (p1 < end && isspace(p1[0]))
         p1++;
 
     if (p1 == end)
         return 0;
 
4c597c6f
     if (!isdigit(p1[0]))
12506782
         return 0;
 
     /* Ensure strtoul() is going to go past our buffer */
     p2 = p1+1;
     while (p2 < end && !isspace(p2[0]))
         p2++;
 
     if (p2 == end)
         return 0;
 
     n = strtoul(p1, &p2, 10);
     if (n == ULONG_MAX && errno)
         return 0;
 
     t |= (n&0xff);
 
c7fd0220
     /* Skip even more whitespace */
12506782
     p1 = p2;
     while (p1 < end && isspace(p1[0]))
         p1++;
 
     if (p1 == end)
         return 0;
 
     if (p1[0] == 'R') {
         *endchar = p1+1;
         if (id)
             *id = t;
 
         return 1;
     }
 
     return 0;
 }
 
e09d8843
 static char *pdf_decrypt_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t *length)
e2b1880f
 {
     enum enc_method enc;
 
     /* handled only once in cli_pdf() */
     //pdf_handle_enc(pdf);
     if (pdf->flags & (1 << DECRYPTABLE_PDF)) {
         enc = get_enc_method(pdf, obj);
         return decrypt_any(pdf, obj->id, in, length, enc);
     }
     return NULL;
 }
 
9d33052f
 char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len)
e2b1880f
 {
     char *wrkstr, *output = NULL;
e09d8843
     size_t wrklen = len, outlen, i;
     unsigned int likelyutf = 0;
e2b1880f
 
9d33052f
     if (!in)
         return NULL;
 
e2b1880f
     /* get a working copy */
     wrkstr = cli_calloc(len+1, sizeof(char));
     if (!wrkstr)
         return NULL;
     memcpy(wrkstr, in, len);
 
00daf527
     //cli_errmsg("pdf_final: start(%d):   %s\n", wrklen, wrkstr);
e2b1880f
 
     /* convert PDF specific escape sequences, like octal sequences */
     /* TODO: replace the escape sequences directly in the wrkstr   */
     if (strchr(wrkstr, '\\')) {
         output = cli_calloc(wrklen+1, sizeof(char));
f4cbcd53
         if (!output) {
             free(wrkstr);
e2b1880f
             return NULL;
f4cbcd53
         }
e2b1880f
 
         outlen = 0;
         for (i = 0; i < wrklen; ++i) {
             if ((i+1 < wrklen) && wrkstr[i] == '\\') {
                 if ((i+3 < wrklen) &&
                     (isdigit(wrkstr[i+1]) && isdigit(wrkstr[i+2]) && isdigit(wrkstr[i+3]))) {
                     /* octal sequence */
                     char octal[4], *check;
                     unsigned long value;
 
                     memcpy(octal, &wrkstr[i+1], 3);
                     octal[3] = '\0';
 
                     value = (char)strtoul(octal, &check, 8);
                     /* check if all characters were converted */
                     if (check == &octal[3])
                         output[outlen++] = value;
                     i += 3; /* 4 with for loop [\ddd] */
                 } else {
                     /* other sequences */
                     switch(wrkstr[i+1]) {
                     case 'n':
                         output[outlen++] = 0x0a;
                         break;
                     case 'r':
                         output[outlen++] = 0x0d;
                         break;
                     case 't':
                         output[outlen++] = 0x09;
                         break;
                     case 'b':
                         output[outlen++] = 0x08;
                         break;
                     case 'f':
                         output[outlen++] = 0x0c;
                         break;
                     case '(':
                         output[outlen++] = 0x28;
                         break;
                     case ')':
                         output[outlen++] = 0x29;
                         break;
                     case '\\':
                         output[outlen++] = 0x5c;
                         break;
                     default:
                         /* IGNORE THE REVERSE SOLIDUS - PDF3000-2008 */
                         break;
                     }
                     i += 1; /* 2 with for loop [\c] */
                 }
             } else {
                 output[outlen++] = wrkstr[i];
             }
         }
 
         free(wrkstr);
1f294919
         wrkstr = cli_calloc(outlen+1, sizeof(char));
         if (!wrkstr) {
             free(output);
             return NULL;
         }
         memcpy(wrkstr, output, outlen);
e2b1880f
         free(output);
         wrklen = outlen;
     }
 
00daf527
     //cli_errmsg("pdf_final: escaped(%d): %s\n", wrklen, wrkstr);
e2b1880f
 
     /* check for encryption and decrypt */
     if (pdf->flags & (1 << ENCRYPTED_PDF))
     {
e09d8843
         size_t tmpsz = wrklen;
e2b1880f
         output = pdf_decrypt_string(pdf, obj, wrkstr, &tmpsz);
e09d8843
         outlen = tmpsz;
e2b1880f
         free(wrkstr);
         if (output) {
d7effb63
             wrkstr = cli_calloc(outlen+1, sizeof(char));
             if (!wrkstr) {
                 free(output);
                 return NULL;
             }
             memcpy(wrkstr, output, outlen);
7bbb67ea
             free(output);
e2b1880f
             wrklen = outlen;
         } else {
             return NULL;
         }
     }
 
00daf527
     //cli_errmsg("pdf_final: decrypt(%d): %s\n", wrklen, wrkstr);
e2b1880f
 
     /* check for UTF-* and convert to UTF-8 */
     for (i = 0; i < wrklen; ++i) {
         if (((unsigned char)wrkstr[i] > (unsigned char)0x7f) || (wrkstr[i] == '\0')) {
             likelyutf = 1;
             break;
         }
     }
 
     if (likelyutf) {
         output = pdf_convert_utf(wrkstr, wrklen);
         free(wrkstr);
         wrkstr = output;
     }
 
00daf527
     //cli_errmsg("pdf_final: postutf(%d): %s\n", wrklen, wrkstr);
e2b1880f
 
     return wrkstr;
 }
 
9d33052f
 char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *meta)
12506782
 {
89d5207b
     const char *q = objstart;
12506782
     char *p1, *p2;
cd94be7a
     size_t len, checklen;
e2b1880f
     char *res = NULL;
12506782
     uint32_t objid;
b8d65b72
     size_t i;
12506782
 
89d5207b
     if (obj->objstm) {
         if (objsize > (size_t)(obj->objstm->streambuf_len - (objstart - obj->objstm->streambuf))) {
             /* Possible attempt to exploit bb11980 */
             cli_dbgmsg("Malformed PDF: Alleged size of obj in object stream in PDF would extend further than the object stream data.\n");
             return NULL;
         }
     }
     else {
         if (objsize > (size_t)(pdf->size - (objstart - pdf->map))) {
             /* Possible attempt to exploit bb11980 */
             cli_dbgmsg("Malformed PDF: Alleged size of obj in PDF would extend further than the PDF data.\n");
             return NULL;
         }
36903968
     }
 
12506782
     /*
      * Yes, all of this is required to find the start and end of a potentially UTF-* string
      *
      * First, find the key of the key/value pair we're looking for in this object.
      * Second, determine whether the value points to another object (NOTE: this is sketchy behavior)
      * Third, attempt to determine if we're ASCII or UTF-*
      * If we're ASCII, just copy the ASCII string into a new heap-allocated string and return that
      * Fourth, Attempt to decode from UTF-* to UTF-8
      */
 
     if (str) {
         checklen = strlen(str);
 
         if (objsize < strlen(str) + 3)
             return NULL;
 
cd94be7a
         for (p1=(char *)q; (size_t)(p1 - q) < objsize-checklen; p1++)
12506782
             if (!strncmp(p1, str, checklen))
                 break;
 
cd94be7a
         if ((size_t)(p1 - q) == objsize - checklen)
12506782
             return NULL;
 
         p1 += checklen;
     } else {
cd94be7a
         p1 = (char *)q;
12506782
     }
 
cd94be7a
     while ((size_t)(p1 - q) < objsize && isspace(p1[0]))
12506782
         p1++;
 
cd94be7a
     if ((size_t)(p1 - q) == objsize)
12506782
         return NULL;
 
c7fd0220
     /*
      * If str is non-null:
      *     We should be at the start of the string, minus 1
      * Else:
      *     We should be at the start of the string
      */
12506782
 
cd94be7a
     p2 = (char *)(q + objsize);
12506782
     if (is_object_reference(p1, &p2, &objid)) {
         struct pdf_obj *newobj;
571d8349
         char *begin, *p3;
12506782
         STATBUF sb;
         uint32_t objflags;
         int fd;
571d8349
         size_t objsize2;
12506782
 
         newobj = find_obj(pdf, obj, objid);
         if (!(newobj))
             return NULL;
 
         if (newobj == obj)
             return NULL;
 
         /* 
          * If pdf_handlename hasn't been called for this object,
          * then parse the object prior to extracting it
          */
         if (!(newobj->statsflags & OBJ_FLAG_PDFNAME_DONE))
             pdf_parseobj(pdf, newobj);
 
         /* Extract the object. Force pdf_extract_obj() to dump this object. */
         objflags = newobj->flags;
         newobj->flags |= (1 << OBJ_FORCEDUMP);
 
         if (pdf_extract_obj(pdf, newobj, PDF_EXTRACT_OBJ_NONE) != CL_SUCCESS)
             return NULL;
 
         newobj->flags = objflags;
 
         if (!(newobj->path))
             return NULL;
 
         fd = open(newobj->path, O_RDONLY);
         if (fd == -1) {
             cli_unlink(newobj->path);
             free(newobj->path);
             newobj->path = NULL;
             return NULL;
         }
 
         if (FSTAT(fd, &sb)) {
             close(fd);
             cli_unlink(newobj->path);
             free(newobj->path);
             newobj->path = NULL;
             return NULL;
         }
 
         if (sb.st_size) {
48ae4768
             begin = calloc(1, sb.st_size+1);
12506782
             if (!(begin)) {
                 close(fd);
                 cli_unlink(newobj->path);
                 free(newobj->path);
                 newobj->path = NULL;
                 return NULL;
             }
 
             if (read(fd, begin, sb.st_size) != sb.st_size) {
                 close(fd);
                 cli_unlink(newobj->path);
                 free(newobj->path);
                 newobj->path = NULL;
                 free(begin);
                 return NULL;
             }
 
571d8349
             p3 = begin;
             objsize2 = sb.st_size;
             while ((size_t)(p3 - begin) < objsize2 && isspace(p3[0])) {
                 p3++;
                 objsize2--;
             }
 
             switch (*p3) {
12506782
                 case '(':
                 case '<':
9d33052f
                     res = pdf_parse_string(pdf, obj, p3, objsize2, NULL, NULL, meta);
12506782
                     break;
                 default:
e2b1880f
                     res = pdf_finalize_string(pdf, obj, begin, objsize2);
                     if (!res) {
9d33052f
                         res = cli_calloc(1, objsize2+1);
76c9c9dd
                         if (!(res)) {
                             close(fd);
                             cli_unlink(newobj->path);
                             free(newobj->path);
                             newobj->path = NULL;
                             free(begin);
9d33052f
                             return NULL;
76c9c9dd
                         }
9d33052f
                         memcpy(res, begin, objsize2);
                         res[objsize2] = '\0';
 
                         if (meta) {
                             meta->length = objsize2;
                             meta->obj = obj;
                             meta->success = 0;
                         }
                     } else if (meta) {
                         meta->length = strlen(res);
                         meta->obj = obj;
                         meta->success = 1;
                     }
12506782
             }
24db616f
             free(begin);
12506782
         }
 
         close(fd);
         cli_unlink(newobj->path);
         free(newobj->path);
         newobj->path = NULL;
 
         if (endchar)
             *endchar = p2;
 
         return res;
     }
 
     if (*p1 == '<') {
         /* Hex string */
 
         p2 = p1+1;
89d5207b
         while ((size_t)(p2 - objstart) < objsize && *p2 != '>')
12506782
             p2++;
 
89d5207b
         if ((size_t)(p2 - objstart) == objsize) {
12506782
             return NULL;
         }
 
 
218e1626
         res = pdf_finalize_string(pdf, obj, p1, (p2 - p1) + 1);
         if (!res) {
             res = cli_calloc(1, (p2 - p1) + 2);
             if (!(res))
                 return NULL;
             memcpy(res, p1, (p2 - p1) + 1);
             res[(p2 - p1) + 1] = '\0';
 
             if (meta) {
                 meta->length = (p2 - p1) + 1;
                 meta->obj = obj;
                 meta->success = 0;
             }
         } else if (meta) {
             meta->length = strlen(res);
             meta->obj = obj;
             meta->success = 1;
         }
 
         if (res && endchar)
12506782
             *endchar = p2;
 
         return res;
     }
 
     /* We should be at the start of a string literal (...) here */
     if (*p1 != '(')
         return NULL;
 
     /* Make a best effort to find the end of the string and determine if UTF-* */
     p2 = ++p1;
700ed96a
 
36903968
     while (p2 < objstart + objsize) {
148f3243
         int shouldbreak=0;
12506782
 
148f3243
         switch (*p2) {
             case '\\':
12506782
                 p2++;
148f3243
                 break;
             case ')':
                 shouldbreak=1;
                 break;
12506782
         }
 
         if (shouldbreak) {
             p2--;
             break;
         }
148f3243
 
         p2++;
12506782
     }
 
36903968
     if (p2 >= objstart + objsize)
12506782
         return NULL;
 
     len = (size_t)(p2 - p1) + 1;
 
e2b1880f
     res = pdf_finalize_string(pdf, obj, p1, len);
     if (!res) {
9d33052f
         res = cli_calloc(1, len+1);
         if (!(res))
             return NULL;
         memcpy(res, p1, len);
         res[len] = '\0';
 
         if (meta) {
             meta->length = len;
             meta->obj = obj;
             meta->success = 0;
         }
     } else if (meta) {
         meta->length = strlen(res);
         meta->obj = obj;
         meta->success = 1;
12506782
     }
 
     if (res && endchar)
         *endchar = p2;
 
     return res;
 }
 
89d5207b
 struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar)
12506782
 {
     struct pdf_dict *res=NULL;
     struct pdf_dict_node *node=NULL;
     const char *objstart;
     char *end;
     unsigned int in_string=0, ninner=0;
 
     /* Sanity checking */
     if (!(pdf) || !(obj) || !(begin))
         return NULL;
 
89d5207b
     objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
                              : (const char *)(obj->start + pdf->map);
12506782
 
89d5207b
     if (begin < objstart || (size_t)(begin - objstart) >= objsize - 2)
12506782
         return NULL;
 
     if (begin[0] != '<' || begin[1] != '<')
         return NULL;
 
     /* Find the end of the dictionary */
     end = begin;
89d5207b
     while ((size_t)(end - objstart) < objsize) {
02a22582
         int increment=1;
12506782
         if (in_string) {
02a22582
             if (*end == '\\') {
                 end += 2;
                 continue;
             }
 
12506782
             if (*end == ')')
                 in_string = 0;
 
             end++;
             continue;
         }
 
         switch (*end) {
             case '(':
                 in_string=1;
                 break;
             case '<':
89d5207b
                 if ((size_t)(end - objstart) <= objsize - 2 && end[1] == '<')
12506782
                     ninner++;
02a22582
                 increment=2;
12506782
                 break;
             case '>':
89d5207b
                 if ((size_t)(end - objstart) <= objsize - 2 && end[1] == '>')
12506782
                     ninner--;
02a22582
                 increment=2;
12506782
                 break;
         }
 
89d5207b
         if ((size_t)(end - objstart) <= objsize - 2)
12506782
             if (end[0] == '>' && end[1] == '>' && ninner == 0)
                 break;
 
02a22582
         end += increment;
12506782
     }
 
     /* More sanity checking */
89d5207b
     if ((size_t)(end - objstart) >= objsize - 2)
12506782
         return NULL;
 
     if (end[0] != '>' || end[1] != '>')
         return NULL;
 
     res = cli_calloc(1, sizeof(struct pdf_dict));
     if (!(res))
         return NULL;
 
     /* Loop through each element of the dictionary */
     begin += 2;
     while (begin < end) {
b1351d6b
         char *val=NULL, *key=NULL, *p1, *p2;
12506782
         struct pdf_dict *dict=NULL;
         struct pdf_array *arr=NULL;
b1351d6b
         unsigned int nhex=0, i;
12506782
 
         /* Skip any whitespaces */
         while (begin < end && isspace(begin[0]))
             begin++;
 
         if (begin == end)
             break;
 
         /* Get the key */
         p1 = begin+1;
02a22582
         while (p1 < end && !isspace(p1[0])) {
             int breakout=0;
 
             switch (*p1) {
                 case '<':
                 case '[':
                 case '(':
b1351d6b
                 case '/':
                 case '\r':
                 case '\n':
                 case ' ':
                 case '\t':
02a22582
                     breakout=1;
                     break;
b1351d6b
                 case '#':
                     /* Key name obfuscated with hex characters */
                     nhex++;
                     if (p1 > end-3) {
                         return res;
                     }
 
                     break;
02a22582
             }
             
             if (breakout)
                 break;
 
12506782
             p1++;
02a22582
         }
12506782
 
         if (p1 == end)
             break;
 
         key = cli_calloc((p1 - begin) + 2, 1);
         if (!(key))
             break;
 
b1351d6b
         if (nhex == 0) {
             /* Key isn't obfuscated with hex. Just copy the string */
             strncpy(key, begin, p1 - begin);
             key[p1 - begin] = '\0';
         } else {
             for (i=0, p2 = begin; p2 < p1; p2++, i++) {
                 if (*p2 == '#') {
                     cli_hex2str_to(p2+1, key+i, 2);
                     p2 += 2;
                 } else {
                     key[i] = *p2;
                 }
             }
         }
12506782
 
         /* Now for the value */
         begin = p1;
 
         /* Skip any whitespaces */
         while (begin < end && isspace(begin[0]))
             begin++;
 
         if (begin == end) {
             free(key);
             break;
         }
 
         switch (begin[0]) {
             case '(':
992de2e2
                 val = pdf_parse_string(pdf, obj, begin, end - objstart, NULL, &p1, NULL);
12506782
                 begin = p1+2;
                 break;
             case '[':
992de2e2
                 arr = pdf_parse_array(pdf, obj, end - objstart, begin, &p1);
12506782
                 begin = p1+1;
                 break;
             case '<':
89d5207b
                 if ((size_t)(begin - objstart) < objsize - 2) {
12506782
                     if (begin[1] == '<') {
992de2e2
                         dict = pdf_parse_dict(pdf, obj, end - objstart, begin, &p1);
12506782
                         begin = p1+2;
                         break;
                     }
                 }
 
992de2e2
                 val = pdf_parse_string(pdf, obj, begin, end - objstart, NULL, &p1, NULL);
12506782
                 begin = p1+2;
                 break;
             default:
                 p1 = (begin[0] == '/') ? begin+1 : begin;
                 while (p1 < end) {
                     int shouldbreak = 0;
                     switch (p1[0]) {
                         case '>':
                         case '/':
                             shouldbreak=1;
                             break;
                     }
 
                     if (shouldbreak)
                         break;
 
                     p1++;
                 }
 
                 is_object_reference(begin, &p1, NULL);
 
                 val = cli_calloc((p1 - begin) + 2, 1);
                 if (!(val))
                     break;
 
                 strncpy(val, begin, p1 - begin);
                 val[p1 - begin] = '\0';
 
                 if (p1[0] != '/')
                     begin = p1+1;
                 else
                     begin = p1;
 
                 break;
         }
 
         if (!(val) && !(dict) && !(arr)) {
             free(key);
             break;
         }
 
         if (!(res->nodes)) {
             res->nodes = res->tail = node = cli_calloc(1, sizeof(struct pdf_dict_node));
             if (!(node)) {
                 free(key);
438eb313
                 if (dict)
                     pdf_free_dict(dict);
                 if (val)
                     free(val);
                 if (arr)
                     pdf_free_array(arr);
12506782
                 break;
             }
         } else {
             node = calloc(1, sizeof(struct pdf_dict_node));
             if (!(node)) {
                 free(key);
438eb313
                 if (dict)
                     pdf_free_dict(dict);
                 if (val)
                     free(val);
                 if (arr)
                     pdf_free_array(arr);
12506782
                 break;
             }
 
             node->prev = res->tail;
             if (res->tail)
                 res->tail->next = node;
             res->tail = node;
         }
 
         node->key = key;
         if ((val)) {
             node->value = val;
             node->valuesz = strlen(val);
             node->type = PDF_DICT_STRING;
         } else if ((arr)) {
             node->value = arr;
             node->valuesz = sizeof(struct pdf_array);
             node->type = PDF_DICT_ARRAY;
         } else if ((dict)) {
             node->value = dict;
             node->valuesz = sizeof(struct pdf_dict);
             node->type = PDF_DICT_DICT;
         }
     }
 
     if (endchar)
         *endchar = end;
 
     return res;
 }
 
89d5207b
 struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar)
12506782
 {
     struct pdf_array *res=NULL;
     struct pdf_array_node *node=NULL;
438eb313
     const char *objstart;
cd94be7a
     char *end;
12506782
     int in_string=0, ninner=0;
 
     /* Sanity checking */
     if (!(pdf) || !(obj) || !(begin))
         return NULL;
 
89d5207b
     objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
                              : (const char *)(obj->start + pdf->map);
438eb313
 
89d5207b
     if (begin < objstart || (size_t)(begin - objstart) >= objsize)
12506782
         return NULL;
 
     if (begin[0] != '[')
         return NULL;
 
     /* Find the end of the array */
     end = begin;
89d5207b
     while ((size_t)(end - objstart) < objsize) {
12506782
         if (in_string) {
02a22582
             if (*end == '\\') {
                 end += 2;
                 continue;
             }
 
12506782
             if (*end == ')')
                 in_string = 0;
 
             end++;
             continue;
         }
 
         switch (*end) {
             case '(':
                 in_string=1;
                 break;
             case '[':
                 ninner++;
                 break;
             case ']':
                 ninner--;
                 break;
         }
 
         if (*end == ']' && ninner == 0)
             break;
 
         end++;
     }
 
     /* More sanity checking */
89d5207b
     if ((size_t)(end - objstart) >= objsize)
12506782
         return NULL;
 
     if (*end != ']')
         return NULL;
 
     res = cli_calloc(1, sizeof(struct pdf_array));
     if (!(res))
         return NULL;
 
     begin++;
     while (begin < end) {
         char *val=NULL, *p1;
         struct pdf_array *arr=NULL;
         struct pdf_dict *dict=NULL;
 
         while (begin < end && isspace(begin[0]))
             begin++;
 
         if (begin == end)
             break;
 
         switch (begin[0]) {
             case '<':
89d5207b
                 if ((size_t)(begin - objstart) < objsize - 2 && begin[1] == '<') {
992de2e2
                     dict = pdf_parse_dict(pdf, obj, end - objstart, begin, &begin);
02a22582
                     begin+=2;
12506782
                     break;
                 }
 
22cb38ed
                 /* Not a dictionary. Intentionally fall through. */
12506782
             case '(':
992de2e2
                 val = pdf_parse_string(pdf, obj, begin, end - objstart, NULL, &begin, NULL);
02a22582
                 begin += 2;
12506782
                 break;
             case '[':
                 /* XXX We should have a recursion counter here */
992de2e2
                 arr = pdf_parse_array(pdf, obj, end - objstart, begin, &begin);
02a22582
                 begin+=1;
12506782
                 break;
             default:
                 p1 = end;
                 if (!is_object_reference(begin, &p1, NULL)) {
                     p1 = begin+1;
                     while (p1 < end && !isspace(p1[0]))
                         p1++;
                 }
 
                 val = cli_calloc((p1 - begin) + 2, 1);
                 if (!(val))
                     break;
 
                 strncpy(val, begin, p1 - begin);
                 val[p1 - begin] = '\0';
 
                 begin = p1;
                 break;
         }
 
         /* Parse error, just return what we could */
         if (!(val) && !(arr) && !(dict))
             break;
 
         if (!(node)) {
             res->nodes = res->tail = node = calloc(1, sizeof(struct pdf_array_node));
438eb313
             if (!(node)) {
                 if (dict)
                     pdf_free_dict(dict);
                 if (val)
                     free(val);
                 if (arr)
                     pdf_free_array(arr);
 
12506782
                 break;
438eb313
             }
12506782
         } else {
             node = calloc(1, sizeof(struct pdf_array_node));
438eb313
             if (!(node)) {
                 if (dict)
                     pdf_free_dict(dict);
                 if (val)
                     free(val);
                 if (arr)
                     pdf_free_array(arr);
 
12506782
                 break;
438eb313
             }
12506782
 
             node->prev = res->tail;
             if (res->tail)
                 res->tail->next = node;
             res->tail = node;
         }
 
         if (val != NULL) {
             node->type = PDF_ARR_STRING;
             node->data = val;
             node->datasz = strlen(val);
         } else if (dict != NULL) {
             node->type = PDF_ARR_DICT;
             node->data = dict;
             node->datasz = sizeof(struct pdf_dict);
         } else {
             node->type = PDF_ARR_ARRAY;
             node->data = arr;
             node->datasz = sizeof(struct pdf_array);
         }
     }
 
     if (endchar)
         *endchar = end;
 
     return res;
 }
 
 void pdf_free_dict(struct pdf_dict *dict)
 {
     struct pdf_dict_node *node, *next;
 
     node = dict->nodes;
     while (node != NULL) {
         free(node->key);
 
         if (node->type == PDF_DICT_STRING)
             free(node->value);
         else if (node->type == PDF_DICT_ARRAY)
             pdf_free_array((struct pdf_array *)(node->value));
         else if (node->type == PDF_DICT_DICT)
             pdf_free_dict((struct pdf_dict *)(node->value));
 
         next = node->next;
         free(node);
         node = next;
     }
 
     free(dict);
 }
 
 void pdf_free_array(struct pdf_array *array)
 {
     struct pdf_array_node *node, *next;
 
     if (!(array))
         return;
 
     node = array->nodes;
     while (node != NULL) {
         if (node->type == PDF_ARR_ARRAY)
             pdf_free_array((struct pdf_array *)(node->data));
         else if (node->type == PDF_ARR_DICT)
             pdf_free_dict((struct pdf_dict *)(node->data));
         else
             free(node->data);
 
         next = node->next;
         free(node);
         node = next;
     }
 
     free(array);
 }
 
 void pdf_print_array(struct pdf_array *array, unsigned long depth)
 {
     struct pdf_array_node *node;
     unsigned long i;
 
     for (i=0, node = array->nodes; node != NULL; node = node->next, i++) {
         if (node->type == PDF_ARR_STRING)
             cli_errmsg("array[%lu][%lu]: %s\n", depth, i, (char *)(node->data));
         else
             pdf_print_array((struct pdf_array *)(node->data), depth+1);
     }
 }
 
 void pdf_print_dict(struct pdf_dict *dict, unsigned long depth)
 {
     struct pdf_dict_node *node;
 
     for (node = dict->nodes; node != NULL; node = node->next) {
         if (node->type == PDF_DICT_STRING) {
             cli_errmsg("dict[%lu][%s]: %s\n", depth, node->key, (char *)(node->value));
         } else if (node->type == PDF_DICT_ARRAY) {
             cli_errmsg("dict[%lu][%s]: Array =>\n", depth, node->key);
             pdf_print_array((struct pdf_array *)(node->value), depth);
         } else if (node->type == PDF_DICT_DICT) {
             pdf_print_dict((struct pdf_dict *)(node->value), depth+1);
         }
     }
 }