GitList

Browse code

Added new pdf object stream parsing capability.

Micah Snyder (micasnyd) authored on 2018/08/15 06:00:31
Showing 11 changed files

libclamav/bytecode.h index d710c5b..c166a9d 100644
libclamav/bytecode_api.c index 880cfe2..87d1569 100644
libclamav/bytecode_priv.h index 5680f82..6f856b0 100644
libclamav/clamav.h index f3d2c09..b1b881c 100644
libclamav/pdf.c index 0883967..daed092 100644
libclamav/pdf.h index 660ba04..6cf1577 100644
libclamav/pdfdecode.c index 36d8573..a6522dd 100644
libclamav/pdfdecode.h index 10333f6..504f31e 100644
libclamav/pdfng.c index c31f2f2..a406004 100644
libclamav/str.c index c3e8ea0..54d47f2 100644
libclamav/str.h index a2b013e..5fe6770 100644

@@ -94,7 +94,7 @@ int cli_bytecode_context_setparam_int(struct cli_bc_ctx *ctx, unsigned i, uint64
                      int cli_bytecode_context_setparam_ptr(struct cli_bc_ctx *ctx, unsigned i, void *data, unsigned datalen);
                      int cli_bytecode_context_setfile(struct cli_bc_ctx *ctx, fmap_t *map);
                      int cli_bytecode_context_setpe(struct cli_bc_ctx *ctx, const struct cli_pe_hook_data *data, const struct cli_exe_section *sections);
                     -int cli_bytecode_context_setpdf(struct cli_bc_ctx *ctx, unsigned phase, unsigned nobjs, struct pdf_obj *objs, uint32_t *pdf_flags, uint32_t pdfsize, uint32_t pdfstartoff);
                     +int cli_bytecode_context_setpdf(struct cli_bc_ctx *ctx, unsigned phase, unsigned nobjs, struct pdf_obj **objs, uint32_t *pdf_flags, uint32_t pdfsize, uint32_t pdfstartoff);
                      int cli_bytecode_context_clear(struct cli_bc_ctx *ctx);
                      /* returns file descriptor, sets tempfile. Caller takes ownership, and is
                       * responsible for freeing/unlinking */

libclamav/bytecode_api.c

History View file @ 89d5207

@@ -1427,7 +1427,7 @@ uint32_t cli_bcapi_check_platform(struct cli_bc_ctx *ctx , uint32_t a, uint32_t
                      int cli_bytecode_context_setpdf(struct cli_bc_ctx *ctx, unsigned phase,
                                                      unsigned nobjs,
                     -                                struct pdf_obj *objs, uint32_t *pdf_flags,
                     +                                struct pdf_obj **objs, uint32_t *pdf_flags,
                                                      uint32_t pdfsize, uint32_t pdfstartoff)
+                     {
                          ctx->pdf_nobjs = nobjs;
@@ -1470,7 +1470,7 @@ int32_t cli_bcapi_pdf_lookupobj(struct cli_bc_ctx *ctx , uint32_t objid)
                          if (!ctx->pdf_phase)
                              return -1;
                          for (i=0;i<ctx->pdf_nobjs;i++) {
                     -        if (ctx->pdf_objs[i].id == objid)
                     +        if (ctx->pdf_objs[i]->id == objid)
                                  return i;
+                         }
                          return -1;
@@ -1484,8 +1484,8 @@ uint32_t cli_bcapi_pdf_getobjsize(struct cli_bc_ctx *ctx , int32_t objidx)
+                            )
                              return 0;
                          if ((uint32_t)(objidx + 1) == ctx->pdf_nobjs)
                     -        return ctx->pdf_size - ctx->pdf_objs[objidx].start;
                     -    return ctx->pdf_objs[objidx+1].start - ctx->pdf_objs[objidx].start - 4;
                     +        return ctx->pdf_size - ctx->pdf_objs[objidx]->start;
                     +    return ctx->pdf_objs[objidx+1]->start - ctx->pdf_objs[objidx]->start - 4;
+                     }
                      const uint8_t* cli_bcapi_pdf_getobj(struct cli_bc_ctx *ctx , int32_t objidx, uint32_t amount)
@@ -1493,7 +1493,7 @@ const uint8_t* cli_bcapi_pdf_getobj(struct cli_bc_ctx *ctx , int32_t objidx, uin
                          uint32_t size = cli_bcapi_pdf_getobjsize(ctx, objidx);
                          if (amount > size)
                              return NULL;
                     -    return fmap_need_off(ctx->fmap, ctx->pdf_objs[objidx].start, amount);
                     +    return fmap_need_off(ctx->fmap, ctx->pdf_objs[objidx]->start, amount);
+                     }
                      int32_t cli_bcapi_pdf_getobjid(struct cli_bc_ctx *ctx , int32_t objidx)
@@ -1501,7 +1501,7 @@ int32_t cli_bcapi_pdf_getobjid(struct cli_bc_ctx *ctx , int32_t objidx)
                          if (!ctx->pdf_phase ||
                              (uint32_t)objidx >= ctx->pdf_nobjs)
                              return -1;
                     -    return ctx->pdf_objs[objidx].id;
                     +    return ctx->pdf_objs[objidx]->id;
+                     }
                      int32_t cli_bcapi_pdf_getobjflags(struct cli_bc_ctx *ctx , int32_t objidx)
@@ -1509,7 +1509,7 @@ int32_t cli_bcapi_pdf_getobjflags(struct cli_bc_ctx *ctx , int32_t objidx)
                          if (!ctx->pdf_phase ||
                              (uint32_t)objidx >= ctx->pdf_nobjs)
                              return -1;
                     -    return ctx->pdf_objs[objidx].flags;
                     +    return ctx->pdf_objs[objidx]->flags;
+                     }
                      int32_t cli_bcapi_pdf_setobjflags(struct cli_bc_ctx *ctx , int32_t objidx, int32_t flags)
@@ -1518,9 +1518,9 @@ int32_t cli_bcapi_pdf_setobjflags(struct cli_bc_ctx *ctx , int32_t objidx, int32
                              (uint32_t)objidx >= ctx->pdf_nobjs)
                              return -1;
                          cli_dbgmsg("cli_pdf: bytecode setobjflags %08x -> %08x\n",
                     -               ctx->pdf_objs[objidx].flags,
                     +               ctx->pdf_objs[objidx]->flags,
                                     flags);
                     -    ctx->pdf_objs[objidx].flags = flags;
                     +    ctx->pdf_objs[objidx]->flags = flags;
                          return 0;
+                     }
@@ -1529,7 +1529,7 @@ int32_t cli_bcapi_pdf_get_offset(struct cli_bc_ctx *ctx , int32_t objidx)
                          if (!ctx->pdf_phase ||
                              (uint32_t)objidx >= ctx->pdf_nobjs)
                              return -1;
                     -    return ctx->pdf_startoff + ctx->pdf_objs[objidx].start;
                     +    return ctx->pdf_startoff + ctx->pdf_objs[objidx]->start;
+                     }
                      int32_t cli_bcapi_pdf_get_phase(struct cli_bc_ctx *ctx)

libclamav/bytecode_priv.h

History View file @ 89d5207

@@ -184,7 +184,7 @@ struct cli_bc_ctx {
                          uint32_t lsigcnt[64];
                          uint32_t lsigoff[64];
                          uint32_t pdf_nobjs;
                     -    struct pdf_obj *pdf_objs;
                     +    struct pdf_obj **pdf_objs;
                          uint32_t* pdf_flags;
                          uint32_t pdf_size;
                          uint32_t pdf_startoff;

libclamav/clamav.h

History View file @ 89d5207

@@ -73,7 +73,7 @@ extern "C"
                      #define CL_COUNT_PRECISION 4096
                      /* return codes */
                     -typedef enum {
                     +typedef enum cl_error_t {
                          /* libclamav specific */
                          CL_CLEAN = 0,
                          CL_SUCCESS = 0,

libclamav/pdf.c

History View file @ 89d5207

@@ -119,6 +119,11 @@ static void XFA_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_a
                      #endif
                      /* End PDF statistics callbacks and related */
                     +static int pdf_readint(const char *q0, int len, const char *key);
                     +static const char *pdf_getdict(const char *q0, int* len, const char *key);
                     +static char *pdf_readval(const char *q, int len, const char *key);
                     +static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen, const char **qend, int noescape);
+                    +
                      static int xrefCheck(const char *xref, const char *eof)
+                     {
                          const char *q;
@@ -156,6 +161,14 @@ static int xrefCheck(const char *xref, const char *eof)
                      #define noisy_warnmsg(...)
                      #endif
                     +/**
                     + * @brief   Searching BACKwards, find the next character that is not a whitespace.
                     + *
                     + * @param q         Index to start from (at the end of the search space)
                     + * @param start     Beginning of the search space.
                     + *
                     + * @return const char*  Address of the final non-whitespace character OR the same address as the start.
                     + */
                      static const char *findNextNonWSBack(const char *q, const char *start)
+                     {
                          while (q > start && (*q == 0 || *q == 9 || *q == 0xa || *q == 0xc || *q == 0xd || *q == 0x20))
@@ -164,15 +177,56 @@ static const char *findNextNonWSBack(const char *q, const char *start)
                          return q;
+                     }
                     -static int find_stream_bounds(const char *start, off_t bytesleft, off_t bytesleft2, off_t *stream, off_t *endstream, int newline_hack)
                     +/**
                     + * @brief   Searching FORwards, find the next character that is not a whitespace.
                     + *
                     + * @param q         Index to start from (at the end of the search space)
                     + * @param start     Beginning of the search space.
                     + *
                     + * @return const char*  Address of the final non-whitespace character OR the same address as the start.
                     + */
                     +static const char *findNextNonWS(const char *q, const char *end)
                     +{
                     +    while (q < end && (*q == 0 || *q == 9 || *q == 0xa || *q == 0xc || *q == 0xd || *q == 0x20))
                     +        q++;
+                    +
                     +    return q;
                     +}
+                    +
                     +/**
                     + * @brief   Find bounds of stream.
                     + *
                     + * PDF streams are prefixed with "stream" and suffixed with "endstream".
                     + * Return value indicates success or failure.
                     + *
                     + * @param start             start address of search space.
                     + * @param bytesleft         size of search space for "stream"
                     + * @param bytesleft2        size of search space for "endstream"
                     + * @param[out] stream       output param, address of start of stream data
                     + * @param[out] endstream    output param, address of end of stream data
                     + * @param newline_hack      hack to support newlines that are \r\n, and not just \n or just \r.
                     + *
                     + * @return int  1 if stream bounds were found.
                     + * @return int  0 if stream bounds could not be found.
                     + */
                     +static int find_stream_bounds(
                     +    const char *start,
                     +    off_t bytesleft,
                     +    off_t bytesleft2,
                     +    off_t *stream,
                     +    off_t *endstream,
                     +    int newline_hack)
+                     {
                          const char *q2, *q;
+                    +
                     +    /* Begin by finding the "stream" string that prefixes stream data. */
                          if ((q2 = cli_memstr(start, bytesleft, "stream", 6))) {
                              q2 += 6;
                              bytesleft -= q2 - start;
                              if (bytesleft < 0)
                                  return 0;
                     +        /* Skip any new line charcters. */
                              if (bytesleft >= 2 && q2[0] == '\xd' && q2[1] == '\xa') {
                                  q2 += 2;
                                  if (newline_hack && (bytesleft > 2) && q2[0] == '\xa')
@@ -182,16 +236,23 @@ static int find_stream_bounds(const char *start, off_t bytesleft, off_t byteslef
+                             }
                              *stream = q2 - start;
+                    +
                              bytesleft2 -= q2 - start;
                              if (bytesleft2 <= 0)
                                  return 0;
                     +        /* Now find the "endstream" string that suffixes stream data */
                              q = q2;
                              q2 = cli_memstr(q, bytesleft2, "endstream", 9);
                     -        if (!q2)
                     +        if (!q2) {
                     +            /* Couldn't find "endstream", but that's ok --
                     +             * -- we'll just count the data we have until EOF. */
                                  q2 = q + bytesleft2-9; /* till EOF */
                     +        }
                              *endstream = q2 - start;
+                    +
                     +        /* Double-check that endstream >= stream */
                              if (*endstream < *stream)
                                  *endstream = *stream;
@@ -202,61 +263,273 @@ static int find_stream_bounds(const char *start, off_t bytesleft, off_t byteslef
+                     }
                      /**
                     - * @brief  Finds the next obj and adds it to our list of objects, and increments nobj.
                     - *
                     - * @param pdf   PDF structure
                     - * @return int  -1 if error
                     - * @return int  0 if no more objects
                     - * @return int  1 if success
                     - * @return int  2 if an invalid object was discovered, may be skipped.
                     + * @brief Find the next *indirect* object in an object stream, adds it to our list of
                     + *        objects, and increments nobj.
                     + *
                     + * Indirect objects in a stream DON'T begin with "obj" and end with "endobj".
                     + * Instead, they have an obj ID and an offset from the first object to point you
                     + * right at them.
                     + *
                     + * If found, objstm->current will be updated to the next obj id.
                     + *
                     + * All objects in an object stream are indirect and thus do not begin or start
                     + * with "obj" or "endobj".  Instead, the object stream takes the following
                     + * format.
                     + *
                     + *      <dictionary describing stream> objstm content endobjstm
                     + *
                     + * where content looks something like the following:
                     + *
                     + *      15 0 16 3 17 46 (ab)<</IDS 8 0 R/JavaScript 27 0 R/URLS 9 0 R>><</Names[(Test)28 0 R]>>
                     + *
                     + * In the above example, the literal string (ab) is indirect object # 15, and
                     + * begins at offset 0 of the set of objects.  The next object, # 16 begis at
                     + * offset 3 is a dictionary.  The final object is also a dictionary, beginning
                     + * at offset 46.
                     + *
                     + * @param pdf   Pdf struct that keeps track of all information found in the PDF.
                     + * @param objstm
                     + *
                     + * @return CL_SUCCESS  if success
                     + * @return CL_EPARSE   if parsing error
                     + * @return CL_EMEM     if error allocating memory
                     + * @return CL_EARG     if invalid arguments
                     + */
                     +int pdf_findobj_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm, struct pdf_obj **obj_found)
                     +{
                     +    cl_error_t status = CL_EPARSE;
                     +    struct pdf_obj *obj = NULL;
                     +    unsigned long objid = 0, objsize = 0, objoff = 0;
                     +    const char *index = NULL;
                     +    size_t bytes_remaining = 0;
+                    +
                     +    if (NULL == pdf || NULL == objstm) {
                     +        cli_warnmsg("pdf_findobj_in_objstm: invalid arguments\n");
                     +        return CL_EARG;
                     +    }
+                    +
                     +    *obj_found = NULL;
+                    +
                     +    index = objstm->streambuf + objstm->current_pair;
                     +    bytes_remaining = objstm->streambuf_len - objstm->current_pair;
+                    +
                     +    obj = calloc(sizeof(struct pdf_obj), 1);
                     +    if (!obj) {
                     +        cli_warnmsg("pdf_findobj_in_objstm: out of memory finding objects in stream\n");
                     +        status = CL_EMEM;
                     +        goto done;
                     +    }
+                    +
                     +    /* This object is in a stream, not in the regular map buffer. */
                     +    obj->objstm = objstm;
+                    +
                     +    /* objstm->current_pair points directly to the obj id */
                     +    if (CL_SUCCESS != cli_strntoul_wrap(index, bytes_remaining, 0, 10, &objid)) {
                     +        /* Failed to find objid */
                     +        cli_dbgmsg("pdf_findobj_in_objstm: Failed to find objid for obj in object stream\n");
                     +        status = CL_EPARSE;
                     +        goto done;
                     +    }
+                    +
                     +    /* Find the obj offset that appears just after the obj id*/
                     +    while ((index < objstm->streambuf + objstm->streambuf_len) && isdigit(*index)) {
                     +        index++;
                     +        bytes_remaining--;
                     +    }
                     +    index = findNextNonWS(index, objstm->streambuf + objstm->first);
                     +    bytes_remaining = objstm->streambuf + objstm->streambuf_len - index;
+                    +
                     +    if (CL_SUCCESS != cli_strntoul_wrap(index, bytes_remaining, 0, 10, &objoff)) {
                     +        /* Failed to find obj offset */
                     +        cli_dbgmsg("pdf_findobj_in_objstm: Failed to find obj offset for obj in object stream\n");
                     +        status = CL_EPARSE;
                     +        goto done;
                     +    }
+                    +
                     +    objstm->current = objstm->first + objoff;
+                    +
                     +    obj->id = (objid << 8) | (0 & 0xff);
                     +    obj->start = objstm->current;
                     +    obj->flags = 0;
+                    +
                     +    objstm->nobjs_found++;
+                    +
                     +    while ((index < objstm->streambuf + objstm->streambuf_len) && isdigit(*index)) {
                     +        index++;
                     +        bytes_remaining--;
                     +    }
                     +    objstm->current_pair = (uint32_t)(findNextNonWS(index, objstm->streambuf + objstm->first) - objstm->streambuf);
+                    +
                     +    /* Update current_pair, if there are more */
                     +    if ((objstm->nobjs_found < objstm->n) &&
                     +        (index < objstm->streambuf + objstm->streambuf_len))
                     +    {
                     +        unsigned long next_objid = 0, next_objoff = 0;
+                    +
                     +        /*
                     +         * While we're at it,
                     +         *   lets record the size as running up to the next object offset.
                     +         *
                     +         * To do so, we will need to parse the next obj pair.
                     +         */
                     +        /* objstm->current_pair points directly to the obj id */
                     +        index = objstm->streambuf + objstm->current_pair;
                     +        bytes_remaining = objstm->streambuf + objstm->streambuf_len - index;
+                    +
                     +        if (CL_SUCCESS != cli_strntoul_wrap(index, bytes_remaining, 0, 10, &next_objid)) {
                     +            /* Failed to find objid for next obj */
                     +            cli_dbgmsg("pdf_findobj_in_objstm: Failed to find next objid for obj in object stream though there should be {%u} more.\n", objstm->n - objstm->nobjs_found);
                     +            status = CL_EPARSE;
                     +            goto done;
                     +        }
+                    +
                     +        /* Find the obj offset that appears just after the obj id*/
                     +        while ((index < objstm->streambuf + objstm->streambuf_len) && isdigit(*index)) {
                     +            index++;
                     +            bytes_remaining--;
                     +        }
                     +        index = findNextNonWS(index, objstm->streambuf + objstm->first);
                     +        bytes_remaining = objstm->streambuf + objstm->streambuf_len - index;
+                    +
                     +        if (CL_SUCCESS != cli_strntoul_wrap(index, bytes_remaining, 0, 10, &next_objoff)) {
                     +            /* Failed to find obj offset for next obj */
                     +            cli_dbgmsg("pdf_findobj_in_objstm: Failed to find next obj offset for obj in object stream though there should be {%u} more.\n", objstm->n - objstm->nobjs_found);
                     +            status = CL_EPARSE;
                     +            goto done;
                     +        }
+                    +
                     +        obj->size = objstm->first + next_objoff - obj->start;
                     +    }
                     +    else
                     +    {
                     +        /*
                     +         * Should be no more objects. We should verify.
                     +         *
                     +         * Either way...
                     +         *   obj->size should be the rest of the buffer.
                     +         */
                     +        if (objstm->nobjs_found < objstm->n) {
                     +            cli_warnmsg("pdf_findobj_in_objstm: Fewer objects found in object stream than expected!\n");
                     +        }
+                    +
                     +        obj->size = objstm->streambuf_len - obj->start;
                     +    }
+                    +
                     +    /* Success! Add the object to the list of all objects found. */
                     +    pdf->nobjs++;
                     +    pdf->objs = cli_realloc2(pdf->objs, sizeof(struct pdf_obj*) * pdf->nobjs);
                     +    if (!pdf->objs) {
                     +        cli_warnmsg("pdf_findobj_in_objstm: out of memory finding objects in stream\n");
                     +        status = CL_EMEM;
                     +        goto done;
                     +    }
                     +    pdf->objs[pdf->nobjs-1] = obj;
+                    +
                     +    *obj_found = obj;
+                    +
                     +    status = CL_SUCCESS;
+                    +
                     +done:
                     +    if (CL_SUCCESS != status) {
                     +        if (NULL != obj) {
                     +            free(obj);
                     +        }
                     +    }
                     +    return status;
                     +}
+                    +
                     +/**
                     + * @brief Find the next *indirect* object.
                     + *
                     + * Indirect objects begin with "obj" and end with "endobj".
                     + * Identify objects that contain streams.
                     + * Identify truncated objects.
                     + *
                     + * If found, pdf->offset will be updated to just after the "endobj".
                     + * If truncated, pdf->offset will == pdf->size.
                     + * If not found, pdf->offset will not be updated.
                     + *
                     + * @param pdf   Pdf context struct that keeps track of all information found in the PDF.
                     + *
                     + * @return CL_SUCCESS  if success
                     + * @return CL_BREAK    if no more objects
                     + * @return CL_EPARSE   if parsing error
                     + * @return CL_EMEM     if error allocating memory
                       */
                     -int pdf_findobj(struct pdf_struct *pdf)
                     +cl_error_t pdf_findobj(struct pdf_struct *pdf)
+                     {
                     +    cl_error_t status = CL_EPARSE;
                          const char *start, *q, *q2, *q3, *eof;
                     -    struct pdf_obj *obj;
                     +    struct pdf_obj *obj = NULL;
                          off_t bytesleft;
                          unsigned long genid, objid;
                          pdf->nobjs++;
                     -    pdf->objs = cli_realloc2(pdf->objs, sizeof(*pdf->objs)*pdf->nobjs);
                     +    pdf->objs = cli_realloc2(pdf->objs, sizeof(struct pdf_obj*) * pdf->nobjs);
                          if (!pdf->objs) {
                     -        cli_warnmsg("cli_pdf: out of memory parsing objects (%u)\n", pdf->nobjs);
                     -        return -1;
                     +        status = CL_EMEM;
                     +        goto done;
                     +    }
+                    +
                     +    obj = malloc(sizeof(struct pdf_obj));
                     +    if (!obj) {
                     +        status = CL_EMEM;
                     +        goto done;
+                         }
                     +    pdf->objs[pdf->nobjs-1] = obj;
                     -    obj = &pdf->objs[pdf->nobjs-1];
                          memset(obj, 0, sizeof(*obj));
                     -    start = pdf->map+pdf->offset;
+                    +
                     +    start = pdf->map + pdf->offset;
                          bytesleft = pdf->size - pdf->offset;
                     -    while (bytesleft > 0) {
+                    +
                     +    /* Indirect objects located outside of an object stream are prefaced with "obj"
                     +     * and suffixed with "endobj".  Find the "obj" preface. */
                     +    while (bytesleft > 0)
                     +    {
                              q2 = cli_memstr(start, bytesleft, "obj", 3);
                     -        if (!q2)
                     -            return 0;/* no more objs */
                     +        if (!q2) {
                     +            status = CL_BREAK; /* no more objs */
                     +            goto done;
                     +        }
                     +        /* verify that "obj" has a whitespace before it, and is not the end of
                     +         * a previous string like... "globj" */
                              q2--;
                              bytesleft -= q2 - start;
+                    +
                              if (*q2 != 0 && *q2 != 9 && *q2 != 0xa && *q2 != 0xc && *q2 != 0xd && *q2 != 0x20) {
                     +            /* This instance of the "obj" string appears to be part of another string.
                     +             * Skip it, and keep searching for an object. */
                                  start = q2+4;
                                  bytesleft -= 4;
                                  continue;
+                             }
                     -        break;
                     +        break; /* Found it. q2 should point to the whitespace before the "obj" string */
+                         }
                     -    if (bytesleft <= 0)
                     -        return 0;
                     +    if (bytesleft <= 0) {
                     +        status = CL_BREAK; /* No "obj" found. */
                     +        goto done;
                     +    }
+                    +
                     +    /* "obj" found! */
                     +    /* Find the generation id (genid) that appears before the "obj" */
                          q = findNextNonWSBack(q2-1, start);
                          while (q > start && isdigit(*q))
                              q--;
                          if (CL_SUCCESS != cli_strntoul_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, &genid)) {
                     -        cli_dbgmsg("cli_pdf: Failed to parse object genid (%u)\n", pdf->nobjs);
                     +        cli_dbgmsg("pdf_findobj: Failed to parse object genid (# objects found: %u)\n", pdf->nobjs);
                              /* Failed to parse, probably not a real object.  Skip past the "obj" thing, and continue. */
                              pdf->offset = q2 + 4 - pdf->map;
                     -        return 2;
                     +        status = CL_EPARSE;
                     +        goto done;
+                         }
+                    +
                     +    /* Find the object id (objid) that appers before the genid */
                          q = findNextNonWSBack(q-1,start);
                          while (q > start && isdigit(*q))
                              q--;
@@ -271,59 +544,82 @@ int pdf_findobj(struct pdf_struct *pdf)
                                  const char* lastfile = q - 4;
                                  if (0 != strncmp(lastfile, "\%\%EOF", 5)) {
                                      /* Nope, wasn't %%EOF */
                     -                cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs);
                     +                cli_dbgmsg("pdf_findobj: Failed to parse object objid (# objects found: %u)\n", pdf->nobjs);
                                      /* Skip past the "obj" thing, and continue. */
                                      pdf->offset = q2 + 4 - pdf->map;
                     -                return 2;
                     +                status = CL_EPARSE;
                     +                goto done;
+                                 }
                                  /* Yup, Looks, like the file continues after %%EOF.
                                   * Probably another revision.  Keep parsing... */
                                  q++;
                     -            cli_dbgmsg("cli_pdf: \%\%EOF detected before end of file, at %zu\n", (size_t)q);
                     +            cli_dbgmsg("pdf_findobj: \%\%EOF detected before end of file, at %zu\n", (size_t)q);
                              } else {
                                  /* Failed parsing at the very beginning */
                     -            cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs);
                     +            cli_dbgmsg("pdf_findobj: Failed to parse object objid (# objects found: %u)\n", pdf->nobjs);
                                  /* Probably not a real object.  Skip past the "obj" thing, and continue. */
                                  pdf->offset = q2 + 4 - pdf->map;
                     -            return 2;
                     +            status = CL_EPARSE;
                     +            goto done;
+                             }
                              /* Try again, with offset slightly adjusted */
                              if (CL_SUCCESS != cli_strntoul_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, &objid)) {
                     -            cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs);
                     +            cli_dbgmsg("pdf_findobj: Failed to parse object objid (# objects found: %u)\n", pdf->nobjs);
                                  /* Still failed... Probably not a real object.  Skip past the "obj" thing, and continue. */
                                  pdf->offset = q2 + 4 - pdf->map;
                     -            return 2;
                     +            status = CL_EPARSE;
                     +            goto done;
+                             }
                     -        cli_dbgmsg("cli_pdf: There appears to be an additional revision. Continuing to parse...\n");
                     +        cli_dbgmsg("pdf_findobj: There appears to be an additional revision. Continuing to parse...\n");
+                         }
                     -    obj->id = (objid << 8) | (genid&0xff);
                     -    obj->start = q2+4 - pdf->map;
+                    +
                     +    /*
                     +     * Ok so we have the objid, genid, and "obj" string.
                     +     *   Time to store that information and then ...
                     +     *     ... investigate what kind of object this is.
                     +     */
                     +    obj->id = (objid << 8) | (genid & 0xff);
                     +    obj->start = q2+4 - pdf->map; /* obj start begins just after the "obj" string */
                          obj->flags = 0;
+                    +
                          bytesleft -= 4;
                          eof = pdf->map + pdf->size;
                          q = pdf->map + obj->start;
                     -    while (q < eof && bytesleft > 0) {
                     +    while (q < eof && bytesleft > 0)
                     +    {
                              off_t p_stream, p_endstream;
                              q2 = pdf_nextobject(q, bytesleft);
                              if (!q2)
                     -            q2 = pdf->map + pdf->size;
                     +            q2 = pdf->map + pdf->size; /* No interesting objects found, fast-forward to eof */
                              bytesleft -= q2 - q;
                              if (find_stream_bounds(q-1, q2-q, bytesleft + (q2-q), &p_stream, &p_endstream, 1)) {
                     +            /*
                     +             * Found obj that contains a stream.
                     +             */
                                  obj->flags |= 1 << OBJ_STREAM;
                                  q2 = q-1 + p_endstream + 9;
                                  bytesleft -= q2 - q + 1;
                                  if (bytesleft < 0) {
                     +                /* ... and the stream is truncated.  Hmm... */
                                      obj->flags |= 1 << OBJ_TRUNCATED;
                                      pdf->offset = pdf->size;
                     -                return 1;/* truncated */
+                    +
                     +                status = CL_SUCCESS;
                     +                goto done; /* Truncated file, no end to obj/stream.
                     +                            * The next call to pdf_findobj() will return no more objects. */
+                                 }
                              } else if ((q3 = cli_memstr(q-1, q2-q+1, "endobj", 6))) {
                     +            /*
                     +             * obj found and offset positioned. ideal return case
                     +             */
                                  q2 = q3 + 6;
                     -            pdf->offset = q2 - pdf->map;
                     -            return 1; /* obj found and offset positioned */
                     +            pdf->offset = q2 - pdf->map; /* update the offset to just after the endobj */
+                    +
                     +            status = CL_SUCCESS;
                     +            goto done;
                              } else {
                                  q2++;
                                  bytesleft--;
@@ -335,7 +631,32 @@ int pdf_findobj(struct pdf_struct *pdf)
                          obj->flags |= 1 << OBJ_TRUNCATED;
                          pdf->offset = pdf->size;
                     -    return 1;/* truncated */
                     +    status = CL_SUCCESS; /* truncated file, no end to obj. */
+                    +
                     +done:
                     +    if (status == CL_SUCCESS) {
                     +        cli_dbgmsg("pdf_findobj: found %d %d obj @%lld\n", obj->id >> 8, obj->id&0xff, (long long)(obj->start + pdf->startoff));
                     +    }
                     +    else
                     +    {
                     +        if(status == CL_BREAK) {
                     +            cli_dbgmsg("pdf_findobj: No more objects (# objects found: %u)\n", pdf->nobjs);
                     +        } else if(status == CL_EMEM) {
                     +            cli_warnmsg("pdf_findobj: Error allocating memory (# objects found: %u)\n", pdf->nobjs);
                     +        } else {
                     +            cli_dbgmsg("pdf_findobj: Unexpected status code %d.\n", status);
                     +        }
                     +        /* Remove the unused obj reference from our list of objects found */
                     +        /* No need to realloc pdf->objs back down.  It won't leak. */
                     +        pdf->objs[pdf->nobjs-1] = NULL;
                     +        pdf->nobjs--;
+                    +
                     +        /* Free up the obj struct. */
                     +        if (NULL != obj)
                     +            free(obj);
                     +    }
+                    +
                     +    return status;
+                     }
                      static size_t filter_writen(struct pdf_struct *pdf, struct pdf_obj *obj, int fout, const char *buf, size_t len, size_t *sum)
@@ -424,7 +745,7 @@ void pdfobj_flag(struct pdf_struct *pdf, struct pdf_obj *obj, enum pdf_flag flag
                              break;
+                         }
                     -    cli_dbgmsg("cli_pdf: %s flagged in object %u %u\n", s, obj->id>>8, obj->id&0xff);
                     +    cli_dbgmsg("pdfobj_flag: %s flagged in object %u %u\n", s, obj->id>>8, obj->id&0xff);
+                     }
                      struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid)
@@ -433,17 +754,20 @@ struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t o
                          uint32_t i;
                          /* search starting at previous obj (if exists) */
                     -    i = (obj != pdf->objs) ? obj - pdf->objs : 0;
                     +    for (i = 0; i < pdf->nobjs; i++) {
                     +        if (pdf->objs[i] == obj)
                     +            break;
                     +    }
                     -    for (j=i;j<pdf->nobjs;j++) {
                     -        obj = &pdf->objs[j];
                     +    for (j = i; j < pdf->nobjs; j++) {
                     +        obj = pdf->objs[j];
                              if (obj->id == objid)
                                  return obj;
+                         }
                          /* restart search from beginning if not found */
                     -    for (j=0;j<i;j++) {
                     -        obj = &pdf->objs[j];
                     +    for (j = 0; j < i; j++) {
                     +        obj = pdf->objs[j];
                              if (obj->id == objid)
                                  return obj;
+                         }
@@ -451,72 +775,173 @@ struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t o
                          return NULL;
+                     }
                     -static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *start, off_t len)
                     +/**
                     + * @brief   Find and interpret the "/Length" dictionary key value.
                     + *
                     + * The value may be:
                     + *  - a direct object (i.e. just a number)
                     + *  - an indirect object, where the value is somewhere else in the document and we have to look it up.
                     + *    indirect objects are referenced using an object id (objid), generation id (genid) genid, and the letter 'R'.
                     + *
                     + * Example dictionary with a single key "/Length" that relies direct object for the value.
                     + *
                     + *      1 0 obj
                     + *          << /Length 534
                     + *              /Filter [ /ASCII85Decode /LZWDecode ]
                     + *          >>
                     + *          stream
                     + *              J..)6T`?p&<!J9%_[umg"B7/Z7KNXbN'S+,*Q/&"OLT'FLIDK#!n`$"<Atdi`\Vn%b%)&'cA*VnK\CJY(sF>c!Jnl@
                     + *              RM]WM;jjH6Gnc75idkL5]+cPZKEBPWdR>FF(kj1_R%W_d&/jS!;iuad7h?[L-F$+]]0A3Ck*$I0KZ?;<)CJtqi65Xb
                     + *              Vc3\n5ua:Q/=0$W<#N3U;H,MQKqfg1?:lUpR;6oN[C2E4ZNr8Udn.'p+?#X+1>0Kuk$bCDF/(3fL5]Oq)^kJZ!C2H1
                     + *              'TO]Rl?Q:&'<5&iP!$Rq;BXRecDN[IJB`,)o8XJOSJ9sDS]hQ;Rj@!ND)bD_q&C\g:inYC%)&u#:u,M6Bm%IY!Kb1+
                     + *              ":aAa'S`ViJglLb8<W9k6Yl\\0McJQkDeLWdPN?9A'jX*al>iG1p&i;eVoK&juJHs9%;Xomop"5KatWRT"JQ#qYuL,
                     + *              JD?M$0QP)lKn06l1apKDC@\qJ4B!!(5m+j.7F790m(Vj88l8Q:_CZ(Gm1%X\N1&u!FKHMB~>
                     + *          endstream
                     + *      endobj
                     + *
                     + * Example dictionary with a single key "/Length" that relies on an indirect object for the value.
                     + *
                     + *      7 0 obj
                     + *          << /Length 8 0 R >> % An indirect reference to object 8, with generation id 0.
                     + *          stream
                     + *              BT
                     + *                  /F1 12 Tf
                     + *                   72 712 Td
                     + *                  ( A stream with an indirect length ) Tj
                     + *              ET
                     + *          endstream
                     + *      endobj
                     + *
                     + *      8 0 obj
                     + *          77 % The length of the preceding stream
                     + *      endobj
                     + *
                     + * @param pdf       Pdf context structure.
                     + * @param obj       Pdf object context structure.
                     + * @param start     Pointer start of the dictionary string.
                     + * @param len       Remaining length of the dictioary string in bytes.
                     + * @return size_t   Unsigned integer value of the "/Length" key
                     + */
                     +static size_t find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *dict_start, size_t dict_len)
+                     {
                     -    unsigned long length;
                     -    const char *q;
                     +    size_t length = 0;
                     +    const char *obj_start = dict_start;
                     +    size_t bytes_remaining = dict_len;
                     +    unsigned long length_ul = 0;
                     +    const char *index;
                     -    q = cli_memstr(start, len, "/Length", 7);
                     -    if (!q)
                     +    if (bytes_remaining < 8) {
                              return 0;
                     +    }
                     -    q++;
                     -    len -= q - start;
                     -    start = pdf_nextobject(q, len);
                     -    if (!start)
                     +    /*
                     +     * Find the "/Length" dictionary key
                     +     */
                     +    index = cli_memstr(obj_start, bytes_remaining, "/Length", 7);
                     +    if (!index)
                              return 0;
                     -    len -= start - q;
                     -    q = start;
                     -    if (CL_SUCCESS != cli_strntoul_wrap(q, (size_t)len, 0, 10, &length)) {
                     -        cli_dbgmsg("cli_pdf: failed to parse object length\n");
                     +    if (bytes_remaining < 1) {
                              return 0;
+                         }
                     -    while (isdigit(*q) && len > 0) {
                     -        q++;
                     -        len--;
                     +    /* Step the index into the "/Length" string. */
                     +    index++;
                     +    bytes_remaining -= index - obj_start;
+                    +
                     +    /* Find the start of the next direct or indirect object.
                     +     * pdf_nextobject() assumes we started searching from within a previous object */
                     +    obj_start = pdf_nextobject(index, bytes_remaining);
                     +    if (!obj_start)
                     +        return 0;
+                    +
                     +    if (bytes_remaining < obj_start - index) {
                     +        return 0;
                     +    }
                     +    bytes_remaining -= obj_start - index;
                     +    index = obj_start;
+                    +
                     +    /* Read the value.  This could either be the direct length value,
                     +       or the object id of the indirect object that has the length */
                     +    if (CL_SUCCESS != cli_strntoul_wrap(index, bytes_remaining, 0, 10, &length_ul)) {
                     +        cli_dbgmsg("find_length: failed to parse object length\n");
                     +        return 0;
                     +    }
                     +    length = length_ul; /* length or maybe object id */
+                    +
                     +    /*
                     +     * Keep parsing, skipping past the first integer that might have been what we wanted.
                     +     * If it's an indirect object, we'll find a Generation ID followed by the letter 'R'
                     +     * I.e. something like " 0 R"
                     +     */
                     +    while ((bytes_remaining > 0) && isdigit(*index)) {
                     +        index++;
                     +        bytes_remaining--;
+                         }
                     -    if (*q == ' ' && len > 0) {
                     +    if ((bytes_remaining > 0) && (*index == ' ')) {
                              unsigned long genid;
                     -        q++;
                     -        len--;
                     -        if (CL_SUCCESS != cli_strntoul_wrap(q, (size_t)len, 0, 10, &genid)) {
                     -            cli_dbgmsg("cli_pdf: failed to parse object genid\n");
+                    +
                     +        index++;
                     +        bytes_remaining--;
+                    +
                     +        if (CL_SUCCESS != cli_strntoul_wrap(index, bytes_remaining, 0, 10, &genid)) {
                     +            cli_dbgmsg("find_length: failed to parse object genid\n");
                                  return 0;
+                             }
                     -        while(isdigit(*q) && len > 0) {
                     -            q++;
                     -            len--;
                     +        while((bytes_remaining > 0) && isdigit(*index)) {
                     +            index++;
                     +            bytes_remaining--;
                     +        }
+                    +
                     +        if (bytes_remaining < 2) {
                     +            return 0;
+                             }
                     -        if (q[0] == ' ' && q[1] == 'R') {
                     -            cli_dbgmsg("cli_pdf: length is in indirect object %lu %lu\n", length, genid);
                     +        if (index[0] == ' ' && index[1] == 'R') {
                     +            /*
                     +             * Ok so we found a genid and that 'R'.  Which means that first value
                     +             * was actually the objid.
                     +             * We can look up the indirect object using this information.
                     +             */
                     +            unsigned long objid = length;
                     +            const char* indirect_obj_start = NULL;
+                    +
                     +            cli_dbgmsg("find_length: length is in indirect object %lu %lu\n", objid, genid);
                                  obj = find_obj(pdf, obj, (length << 8) | (genid&0xff));
                                  if (!obj) {
                     -                cli_dbgmsg("cli_pdf: indirect object not found\n");
                     +                cli_dbgmsg("find_length: indirect object not found\n");
                                      return 0;
+                                 }
                     -            q = pdf_nextobject(pdf->map+obj->start, pdf->size - obj->start);
                     -            if (!q) {
                     -                cli_dbgmsg("cli_pdf: next object not found\n");
                     +            indirect_obj_start = pdf->map + obj->start;
                     +            bytes_remaining = pdf->size - obj->start;
+                    +
                     +            /* Ok so we found the indirect object, lets read the value. */
                     +            index = pdf_nextobject(indirect_obj_start, bytes_remaining);
                     +            if (!index) {
                     +                cli_dbgmsg("find_length: next object not found\n");
                                      return 0;
+                                 }
+                    +
                     +            if (bytes_remaining < index - indirect_obj_start) {
                     +                return 0;
                     +            }
                     +            bytes_remaining -= index - indirect_obj_start;
                     -            if (CL_SUCCESS != cli_strntoul_wrap(q, (size_t)len, 0, 10, &length)) {
                     -                cli_dbgmsg("cli_pdf: failed to parse object length from indirect object\n");
                     +            /* Found the value, so lets parse it as an unsigned long */
                     +            if (CL_SUCCESS != cli_strntoul_wrap(index, bytes_remaining, 0, 10, &length)) {
                     +                cli_dbgmsg("find_length: failed to parse object length from indirect object\n");
                                      return 0;
+                                 }
+                             }
+                         }
                          /* limit length */
                     -    if (start - pdf->map + length+5 > pdf->size)
                     -        length = pdf->size - (start - pdf->map)-5;
                     +    if (obj_start - pdf->map + length + 5 > pdf->size)
                     +        length = pdf->size - (obj_start - pdf->map) - 5;
                          return length;
+                     }
@@ -525,36 +950,98 @@ static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
                      static int obj_size(struct pdf_struct *pdf, struct pdf_obj *obj, int binary)
+                     {
                     -    unsigned i = obj - pdf->objs;
                     +    if (0 == obj->size)
                     +    {
                     +        /*
                     +         * Programmatically determine size if not already known.
                     +         */
                     +        unsigned i = 0;
+                    +
                     +        /* Find the index of the current object */
                     +        for (i = 0; i < pdf->nobjs; i++) {
                     +            if (pdf->objs[i] == obj)
                     +                break;
                     +        }
+                    +
                     +        /* Find the next object that exists in the same buffer (pdf fmap, or object stream) */
                     +        if (i < pdf->nobjs) {
                     +            i++;
                     +        }
+                    +
                     +        if (obj->objstm == NULL) {
                     +            /* Current object isn't in an object stream, we want to find
                     +             * the next object that also isn't in an object stream. */
                     +            for ( ; i < pdf->nobjs; i++) {
                     +                if (pdf->objs[i]->objstm == NULL)
                     +                    break;
                     +            }
                     +        } else {
                     +            /* Current object is in an object stream, we want to find
                     +             * the next object that is in the same object stream.
                     +             *
                     +             * This really shouldn't happen, so throw a warning and
                     +             * then see if we can solve it anyhow */
                     +            cli_warnmsg("obj_size: Encountered pdf object in an object stream that has an unknown size!!\n");
+                    +
                     +            for ( ; i < pdf->nobjs; i++) {
                     +                if (pdf->objs[i]->objstm == obj->objstm)
                     +                    break;
                     +            }
                     +        }
+                    +
                     +        /* Step backwards from the "next" object to find the end of the current object */
                     +        if (i < pdf->nobjs) {
                     +            int s = pdf->objs[i]->start - obj->start - 4;
                     +            if (s > 0) {
                     +                if (!binary) {
                     +                    const char *p = NULL;
                     +                    const char *q = NULL;
+                    +
                     +                    if (obj->objstm == NULL) {
                     +                        p = pdf->map + obj->start;
                     +                    } else {
                     +                        p = obj->objstm->streambuf + obj->start;
                     +                    }
                     +                    q = p + s;
                     -    i++;
                     -    if (i < pdf->nobjs) {
                     -        int s = pdf->objs[i].start - obj->start - 4;
                     -        if (s > 0) {
                     -            if (!binary) {
                     -                const char *p = pdf->map + obj->start;
                     -                const char *q = p + s;
                     +                    while (q > p && (isspace(*q) || isdigit(*q)))
                     +                        q--;
                     -                while (q > p && (isspace(*q) || isdigit(*q)))
                     -                       q--;
                     +                    if (q > p+5 && !memcmp(q-5,"endobj",6))
                     +                        q -= 6;
                     -                if (q > p+5 && !memcmp(q-5,"endobj",6))
                     -                    q -= 6;
                     +                    q = findNextNonWSBack(q, p);
                     +                    q++;
                     -                q = findNextNonWSBack(q, p);
                     -                q++;
                     +                    obj->size = q - p;
                     +                    goto done;
                     +                }
                     -                return q - p;
                     +                obj->size = s;
                     +                goto done;
+                                 }
                     +        }
                     -            return s;
                     +        /* If we've gotten this far, we didn't find a "next" object... so our
                     +         * current object must be at the end of the pdf fmap or the end of the
                     +         * object stream. */
                     +        if (obj->objstm == NULL) {
                     +            /* Current object isn't in an object stream, so we can determine object
                     +             * size based on the remaining size of the file (in theory). */
                     +            if (binary)
                     +                obj->size = pdf->size - obj->start;
                     +            else
                     +                obj->size = pdf->offset - obj->start - 6; /* This hack I think assumes that we reached the end of the file when finding objects. */
                     +        } else {
                     +            /* Current object is in an object stream, we want to find
                     +             * the next object that is in the same object stream. */
                     +            obj->size = obj->objstm->streambuf_len - obj->start;
+                             }
+                         }
                     -    if (binary)
                     -        return pdf->size - obj->start;
                     +done:
                     -    return pdf->offset - obj->start - 6;
                     +    return obj->size;
+                     }
                      static int run_pdf_hooks(struct pdf_struct *pdf, enum pdf_phase phase, int fd, int dumpid)
@@ -568,7 +1055,7 @@ static int run_pdf_hooks(struct pdf_struct *pdf, enum pdf_phase phase, int fd, i
                          bc_ctx = cli_bytecode_context_alloc();
                          if (!bc_ctx) {
                     -        cli_errmsg("cli_pdf: can't allocate memory for bc_ctx");
                     +        cli_errmsg("run_pdf_hooks: can't allocate memory for bc_ctx\n");
                              return CL_EMEM;
+                         }
@@ -576,7 +1063,7 @@ static int run_pdf_hooks(struct pdf_struct *pdf, enum pdf_phase phase, int fd, i
                          if (fd != -1) {
                              map = fmap(fd, 0, 0);
                              if (!map) {
                     -            cli_dbgmsg("can't mmap pdf extracted obj\n");
                     +            cli_dbgmsg("run_pdf_hooks: can't mmap pdf extracted obj\n");
                                  map = *ctx->fmap;
                                  fd = -1;
+                             }
@@ -603,15 +1090,15 @@ static void aes_decrypt(const unsigned char *in, size_t *length, unsigned char *
                          unsigned char pad, i;
                          int nrounds;
                     -    cli_dbgmsg("cli_pdf: aes_decrypt: key length: %d, data length: %zu\n", key_n, *length);
                     +    cli_dbgmsg("aes_decrypt: key length: %d, data length: %zu\n", key_n, *length);
                          if (key_n > 32) {
                     -        cli_dbgmsg("cli_pdf: aes_decrypt: key length is %d!\n", key_n*8);
                     +        cli_dbgmsg("aes_decrypt: key length is %d!\n", key_n*8);
                              return;
+                         }
                          if (len < 32) {
                     -        cli_dbgmsg("cli_pdf: aes_decrypt: len is <32: %zu\n", len);
                     -        noisy_warnmsg("cli_pdf: aes_decrypt: len is <32: %zu\n", len);
                     +        cli_dbgmsg("aes_decrypt: len is <32: %zu\n", len);
                     +        noisy_warnmsg("aes_decrypt: len is <32: %zu\n", len);
                              return;
+                         }
@@ -626,7 +1113,7 @@ static void aes_decrypt(const unsigned char *in, size_t *length, unsigned char *
                          cli_dbgmsg("aes_decrypt: Calling rijndaelSetupDecrypt\n");
                          nrounds = rijndaelSetupDecrypt(rk, (const unsigned char *)key, key_n*8);
                          if (!nrounds) {
                     -    cli_dbgmsg("cli_pdf: aes_decrypt: nrounds = 0\n");
                     +    cli_dbgmsg("aes_decrypt: nrounds = 0\n");
                          return;
+                         }
                          cli_dbgmsg("aes_decrypt: Beginning rijndaelDecrypt\n");
@@ -649,8 +1136,8 @@ static void aes_decrypt(const unsigned char *in, size_t *length, unsigned char *
                              pad = q[-1];
                              if (pad > 0x10) {
                     -            cli_dbgmsg("cli_pdf: aes_decrypt: bad pad: %x (extra len: %zu)\n", pad, len-16);
                     -            noisy_warnmsg("cli_pdf: aes_decrypt: bad pad: %x (extra len: %zu)\n", pad, len-16);
                     +            cli_dbgmsg("aes_decrypt: bad pad: %x (extra len: %zu)\n", pad, len-16);
                     +            noisy_warnmsg("aes_decrypt: bad pad: %x (extra len: %zu)\n", pad, len-16);
                                  *length -= len;
                                  return;
+                             }
@@ -658,8 +1145,8 @@ static void aes_decrypt(const unsigned char *in, size_t *length, unsigned char *
                              q -= pad;
                              for (i=1;i<pad;i++) {
                                  if (q[i] != pad) {
                     -                cli_dbgmsg("cli_pdf: aes_decrypt: bad pad: %x != %x\n",q[i],pad);
                     -                noisy_warnmsg("cli_pdf: aes_decrypt: bad pad: %x != %x\n",q[i],pad);
                     +                cli_dbgmsg("aes_decrypt: bad pad: %x != %x\n",q[i],pad);
                     +                noisy_warnmsg("aes_decrypt: bad pad: %x != %x\n",q[i],pad);
                                      *length -= len;
                                      return;
@@ -671,7 +1158,7 @@ static void aes_decrypt(const unsigned char *in, size_t *length, unsigned char *
                          *length -= len;
                     -    cli_dbgmsg("cli_pdf: aes_decrypt: length is %zu\n", *length);
                     +    cli_dbgmsg("aes_decrypt: length is %zu\n", *length);
+                     }
@@ -682,7 +1169,7 @@ char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, size_t *l
                          struct arc4_state arc4;
                          if (!length || !*length || !in) {
                     -        noisy_warnmsg("decrypt failed for obj %u %u\n", id>>8, id&0xff);
                     +        noisy_warnmsg("decrypt_any: decrypt failed for obj %u %u\n", id>>8, id&0xff);
                              return NULL;
+                         }
@@ -726,20 +1213,20 @@ char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, size_t *l
                              arc4_init(&arc4, result, n);
                              arc4_apply(&arc4, q, (unsigned)*length); /* TODO: may truncate for very large lengths */
                     -        noisy_msg(pdf, "decrypted ARC4 data\n");
                     +        noisy_msg(pdf, "decrypt_any: decrypted ARC4 data\n");
                              break;
                          case ENC_AESV2:
                              cli_dbgmsg("cli_pdf: enc is aesv2\n");
                              aes_decrypt((const unsigned char *)in, length, q, (char *)result, n, 1);
                     -        noisy_msg(pdf, "decrypted AES(v2) data\n");
                     +        noisy_msg(pdf, "decrypt_any: decrypted AES(v2) data\n");
                              break;
                          case ENC_AESV3:
                     -        cli_dbgmsg("cli_pdf: enc is aesv3\n");
                     +        cli_dbgmsg("decrypt_any: enc is aesv3\n");
                              if (pdf->keylen == 0) {
                     -            cli_dbgmsg("cli_pdf: no key\n");
                     +            cli_dbgmsg("decrypt_any: no key\n");
                                  return NULL;
+                             }
@@ -749,21 +1236,21 @@ char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, size_t *l
                              break;
                          case ENC_IDENTITY:
                     -        cli_dbgmsg("cli_pdf: enc is identity\n");
                     +        cli_dbgmsg("decrypt_any: enc is identity\n");
                              memcpy(q, in, *length);
                     -        noisy_msg(pdf, "identity encryption\n");
                     +        noisy_msg(pdf, "decrypt_any: identity encryption\n");
                              break;
                          case ENC_NONE:
                     -        cli_dbgmsg("cli_pdf: enc is none\n");
                     +        cli_dbgmsg("decrypt_any: enc is none\n");
                              noisy_msg(pdf, "encryption is none\n");
                              free(q);
                              return NULL;
                          case ENC_UNKNOWN:
                     -        cli_dbgmsg("cli_pdf: enc is unknown\n");
                     +        cli_dbgmsg("decrypt_any: enc is unknown\n");
                              free(q);
                              noisy_warnmsg("decrypt_any: unknown encryption method for obj %u %u\n",
@@ -838,7 +1325,8 @@ static int pdf_scan_contents(int fd, struct pdf_struct *pdf)
                          char fullname[1024];
                          char outbuff[BUFSIZ];
                          char inbuf[BUFSIZ];
                     -    int fout, n, rc;
                     +    int fout, n;
                     +    cl_error_t rc;
                          enum cstate st = CSTATE_NONE;
                          snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u_c", pdf->dir, (pdf->files-1));
@@ -846,7 +1334,7 @@ static int pdf_scan_contents(int fd, struct pdf_struct *pdf)
                          if (fout < 0) {
                              char err[128];
                     -        cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err)));
                     +        cli_errmsg("pdf_scan_contents: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err)));
                              return CL_ETMPFILE;
+                         }
@@ -872,20 +1360,19 @@ static int pdf_scan_contents(int fd, struct pdf_struct *pdf)
                          return rc;
+                     }
                     -static const char *pdf_getdict(const char *q0, int* len, const char *key);
                     -static char *pdf_readval(const char *q, int len, const char *key);
                     -static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen, const char **qend, int noescape);
+                    -
                      int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
+                     {
                          char fullname[NAME_MAX + 1];
                          int fout;
                          ptrdiff_t sum = 0;
                     -    int rc = CL_SUCCESS;
                     +    cl_error_t rc = CL_SUCCESS;
                          int dump = 1;
                          cli_dbgmsg("pdf_extract_obj: obj %u %u\n", obj->id>>8, obj->id&0xff);
                     +    if (obj->objstm)
                     +        cli_dbgmsg("pdf_extract_obj: extracting obj found in objstm.\n");
+                    +
                          /* TODO: call bytecode hook here, allow override dumpability */
                          if ((!(obj->flags & (1 << OBJ_STREAM)) || (obj->flags & (1 << OBJ_HASFILTERS))) && !(obj->flags & DUMP_MASK)) {
                              /* don't dump all streams */
@@ -905,13 +1392,13 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
                          if (!dump)
                              return CL_CLEAN;
                     -    cli_dbgmsg("cli_pdf: dumping obj %u %u\n", obj->id>>8, obj->id&0xff);
                     +    cli_dbgmsg("pdf_extract_obj: dumping obj %u %u\n", obj->id>>8, obj->id&0xff);
                          snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u", pdf->dir, pdf->files++);
                          fout = open(fullname,O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
                          if (fout < 0) {
                              char err[128];
                     -        cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err)));
                     +        cli_errmsg("pdf_extract_obj: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err)));
                              return CL_ETMPFILE;
+                         }
@@ -925,6 +1412,11 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
                                  off_t p_stream = 0, p_endstream = 0;
                                  off_t length;
                     +            if (NULL != obj->objstm) {
                     +                cli_warnmsg("pdf_extract_obj: Object found in object stream claims to be an object stream! Skipping.\n");
                     +                break;
                     +            }
+                    +
                                  find_stream_bounds(start, pdf->size - obj->start,
                                             pdf->size - obj->start,
                                             &p_stream, &p_endstream,
@@ -937,6 +1429,7 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
                                      int len = p_stream;
                                      const char *pstr;
                                      struct pdf_dict *dparams = NULL;
                     +                struct objstm_struct *objstm = NULL;
                                      int xref = 0;
                                      length = find_length(pdf, obj, start, p_stream);
@@ -970,7 +1463,7 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
                                          if (length < 0)
                                              length = 0;
                     -                    cli_dbgmsg("cli_pdf: calculated length %lld\n", (long long)length);
                     +                    cli_dbgmsg("pdf_extract_obj: calculated length %lld\n", (long long)length);
                                      } else {
                                          if (size > (size_t)length+2) {
                                              cli_dbgmsg("cli_pdf: calculated length %zu < %zu\n",
@@ -980,7 +1473,7 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
+                                     }
                                      if (orig_length && size > (size_t)orig_length + 20) {
                     -                    cli_dbgmsg("cli_pdf: orig length: %lld, length: %lld, size: %zu\n",
                     +                    cli_dbgmsg("pdf_extract_obj: orig length: %lld, length: %lld, size: %zu\n",
                                                     (long long)orig_length, (long long)length, size);
                                          pdfobj_flag(pdf, obj, BAD_STREAMLEN);
+                                     }
@@ -998,12 +1491,20 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
                                      cli_dbgmsg("-------------EXPERIMENTAL-------------\n");
                     -                pstr = pdf_getdict(start, &len, "/DecodeParms");
                     -                if (!pstr)
                     -                    pstr = pdf_getdict(start, &len, "/DP");
                     +                /*
                     +                 * Identify the DecodeParms, if available.
                     +                 */
                     +                if (NULL != (pstr = pdf_getdict(start, &len, "/DecodeParms")))
                     +                {
                     +                    cli_dbgmsg("pdf_extract_obj: Found /DecodeParms\n");
                     +                }
                     +                else if (NULL != (pstr = pdf_getdict(start, &len, "/DP")))
                     +                {
                     +                    cli_dbgmsg("pdf_extract_obj: Found /DP\n");
                     +                }
                                      if (pstr) {
                     -                    unsigned int objsz = obj_size(pdf, obj, 1);
                     +                    unsigned int objsize = obj_size(pdf, obj, 1);
                                          /* shift pstr left to "<<" for pdf_parse_dict */
                                          while ((*pstr == '<') && (pstr > start)) {
@@ -1018,12 +1519,102 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
+                                         }
                                          if (len > 4)
                     -                        dparams = pdf_parse_dict(pdf, obj, objsz, (char *)pstr, NULL);
                     +                        dparams = pdf_parse_dict(pdf, obj, objsize, (char *)pstr, NULL);
                                          else
                     -                        cli_dbgmsg("cli_pdf: failed to locate DecodeParms dictionary start\n");
                     +                        cli_dbgmsg("pdf_extract_obj: failed to locate DecodeParms dictionary start\n");
                     +                }
+                    +
                     +                /*
                     +                 * Identify if the stream is an object stream. If so, collect the relevant info.
                     +                 */
                     +                len = p_stream;
                     +                if (NULL != (pstr = pdf_getdict(start, &len, "/Type/ObjStm")))
                     +                {
                     +                    int32_t objstm_first = -1;
                     +                    int32_t objstm_length = -1;
                     +                    int32_t objstm_n = -1;
+                    +
                     +                    cli_dbgmsg("pdf_extract_obj: Found /Type/ObjStm\n");
+                    +
                     +                    len = p_stream;
                     +                    if ((-1 == (objstm_first = pdf_readint(start, len, "/First"))))
                     +                    {
                     +                        cli_warnmsg("pdf_extract_obj: Failed to find offset of first object in object stream\n");
                     +                    }
                     +                    else if ((-1 == (objstm_length = pdf_readint(start, len, "/Length"))))
                     +                    {
                     +                        cli_warnmsg("pdf_extract_obj: Failed to find length of object stream\n");
                     +                    }
                     +                    else if ((-1 == (objstm_n = pdf_readint(start, len, "/N"))))
                     +                    {
                     +                        cli_warnmsg("pdf_extract_obj: Failed to find num objects in object stream\n");
                     +                    }
                     +                    else
                     +                    {
                     +                        /* Add objstm to pdf struct, so it can be freed eventually */
                     +                        pdf->nobjstms++;
                     +                        pdf->objstms = cli_realloc2(pdf->objstms, sizeof(struct objstm_struct*) * pdf->nobjstms);
                     +                        if (!pdf->objstms) {
                     +                            cli_warnmsg("pdf_extract_obj: out of memory parsing object stream (%u)\n", pdf->nobjstms);
                     +                            pdf_free_dict(dparams);
                     +                            return CL_EMEM;
                     +                        }
+                    +
                     +                        objstm = malloc(sizeof(struct objstm_struct));
                     +                        if (!objstm) {
                     +                            cli_warnmsg("pdf_extract_obj: out of memory parsing object stream (%u)\n", pdf->nobjstms);
                     +                            pdf_free_dict(dparams);
                     +                            return CL_EMEM;
                     +                        }
                     +                        pdf->objstms[pdf->nobjstms-1] = objstm;
+                    +
                     +                        memset(objstm, 0, sizeof(*objstm));
+                    +
                     +                        objstm->first =         (uint32_t)objstm_first;
                     +                        objstm->current =       (uint32_t)objstm_first;
                     +                        objstm->current_pair =  0;
                     +                        objstm->length =        (uint32_t)objstm_length;
                     +                        objstm->n =             (uint32_t)objstm_n;
+                    +
                     +                        cli_dbgmsg("pdf_extract_obj: ObjStm first obj at offset %d\n", objstm->first);
                     +                        cli_dbgmsg("pdf_extract_obj: ObjStm length is %d bytes\n", objstm->length);
                     +                        cli_dbgmsg("pdf_extract_obj: ObjStm should contain %d objects\n", objstm->n);
                     +                    }
                     +                }
+                    +
                     +                sum = pdf_decodestream(pdf, obj, dparams, start + p_stream, (uint32_t)length, xref, fout, &rc, objstm);
                     +                if (sum < 0) {
                     +                    /*
                     +                    * If we were expecting an objstm and there was a failure...
                     +                    *   discard the memory for last object stream.
                     +                    */
                     +                    if (NULL != objstm)
                     +                    {
                     +                        if (NULL != pdf->objstms) {
                     +                            if (NULL != pdf->objstms[pdf->nobjstms - 1]) {
                     +                                pdf->objstms[pdf->nobjstms - 1]->streambuf = NULL;
+                    +
                     +                                free(pdf->objstms[pdf->nobjstms - 1]);
                     +                                pdf->objstms[pdf->nobjstms - 1] = NULL;
                     +                            }
+                    +
                     +                            /* Pop the objstm off the end of the pdf->objstms array. */
                     +                            if (pdf->nobjstms > 0) {
                     +                                pdf->nobjstms--;
                     +                                pdf->objstms = cli_realloc2(pdf->objstms, sizeof(struct objstm_struct*) * pdf->nobjstms);
+                    +
                     +                                if (!pdf->objstms) {
                     +                                    cli_warnmsg("pdf_extract_obj: out of memory when shrinking down objstm array\n");
                     +                                    return CL_EMEM;
                     +                                }
                     +                            } else {
                     +                                /* hm.. this shouldn't happen */
                     +                                cli_warnmsg("pdf_extract_obj: Failure counting objstms.\n");
                     +                            }
                     +                        }
                     +                    }
+                                     }
                     -                sum = pdf_decodestream(pdf, obj, dparams, start + p_stream, (uint32_t)length, xref, fout, &rc);
                                      if (dparams)
                                          pdf_free_dict(dparams);
@@ -1034,14 +1625,17 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
                                      cli_dbgmsg("-------------EXPERIMENTAL-------------\n");
                                  } else {
                     -                noisy_warnmsg("cannot find stream bounds for obj %u %u\n", obj->id>>8, obj->id&0xff);
                     +                noisy_warnmsg("pdf_extract_obj: cannot find stream bounds for obj %u %u\n", obj->id>>8, obj->id&0xff);
+                                 }
                              } else if (obj->flags & (1 << OBJ_JAVASCRIPT)) {
                                  const char *q2;
                     -            const char *q = pdf->map+obj->start;
                     +            const char *q = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
                     +                                          : (const char *)(obj->start + pdf->map);
+                    +
                                  /* TODO: get obj-endobj size */
                                  off_t bytesleft = obj_size(pdf, obj, 0);
+                    +
                                  if (bytesleft < 0)
                                      break;
@@ -1066,11 +1660,11 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
                                          const char *out = js;
                                          js_len = strlen(js);
                                          if (pdf->flags & (1 << DECRYPTABLE_PDF)) {
                     -                        cli_dbgmsg("cli_pdf: encrypted string\n");
                     +                        cli_dbgmsg("pdf_extract_obj: encrypted string\n");
                                              decrypted = decrypt_any(pdf, obj->id, js, &js_len, pdf->enc_method_string);
                                              if (decrypted) {
                     -                            noisy_msg(pdf, "decrypted Javascript string from obj %u %u\n", obj->id>>8,obj->id&0xff);
                     +                            noisy_msg(pdf, "pdf_extract_obj: decrypted Javascript string from obj %u %u\n", obj->id>>8,obj->id&0xff);
                                                  out = decrypted;
+                                             }
+                                         }
@@ -1083,7 +1677,7 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
                                          free(decrypted);
                                          free(js);
                     -                    cli_dbgmsg("bytesleft: %d\n", (int)bytesleft);
                     +                    cli_dbgmsg("pdf_extract_obj: bytesleft: %d\n", (int)bytesleft);
                                          if (bytesleft > 0) {
                                              q2 = pdf_nextobject(q, bytesleft);
@@ -1117,8 +1711,8 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
+                             }
                          } while (0);
                     -    cli_dbgmsg("cli_pdf: extracted %td bytes %u %u obj\n", sum, obj->id>>8, obj->id&0xff);
                     -    cli_dbgmsg("         ... to %s\n", fullname);
                     +    cli_dbgmsg("pdf_extract_obj: extracted %td bytes %u %u obj\n", sum, obj->id>>8, obj->id&0xff);
                     +    cli_dbgmsg("pdf_extract_obj:         ... to %s\n", fullname);
                          if (flags & PDF_EXTRACT_OBJ_SCAN && sum) {
                              int rc2;
@@ -1132,20 +1726,25 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
                                  rc = rc2;
                              if ((rc == CL_CLEAN) || ((rc == CL_VIRUS) && (pdf->ctx->options & CL_SCAN_ALLMATCHES))) {
                     -            rc2 = run_pdf_hooks(pdf, PDF_PHASE_POSTDUMP, fout, obj - pdf->objs);
                     +            unsigned int dumpid = 0;
                     +            for (dumpid = 0; dumpid < pdf->nobjs; dumpid++) {
                     +                if (pdf->objs[dumpid] == obj)
                     +                    break;
                     +            }
                     +            rc2 = run_pdf_hooks(pdf, PDF_PHASE_POSTDUMP, fout, dumpid);
                                  if (rc2 == CL_VIRUS)
                                      rc = rc2;
+                             }
                              if (((rc == CL_CLEAN) || ((rc == CL_VIRUS) && (pdf->ctx->options & CL_SCAN_ALLMATCHES))) && (obj->flags & (1 << OBJ_CONTENTS))) {
                                  lseek(fout, 0, SEEK_SET);
                     -            cli_dbgmsg("cli_pdf: dumping contents %u %u\n", obj->id>>8, obj->id&0xff);
                     +            cli_dbgmsg("pdf_extract_obj: dumping contents %u %u\n", obj->id>>8, obj->id&0xff);
                                  rc2 = pdf_scan_contents(fout, pdf);
                                  if (rc2 == CL_VIRUS)
                                      rc = rc2;
                     -            noisy_msg(pdf, "extracted text from obj %u %u\n", obj->id>>8, obj->id&0xff);
                     +            noisy_msg(pdf, "pdf_extract_obj: extracted text from obj %u %u\n", obj->id>>8, obj->id&0xff);
+                             }
+                         }
@@ -1291,7 +1890,7 @@ static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj, const ch
                              /* these are digital signature objects, filter doesn't matter,
                               * we don't need them anyway */
                              if (*state == STATE_FILTER && !(obj->flags & (1 << OBJ_SIGNED)) && !(obj->flags & KNOWN_FILTERS)) {
                     -            cli_dbgmsg("cli_pdf: unknown filter %s\n", pdfname);
                     +            cli_dbgmsg("handle_pdfname: unknown filter %s\n", pdfname);
                                  obj->flags |= 1 << OBJ_FILTER_UNKNOWN;
+                             }
@@ -1305,7 +1904,7 @@ static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj, const ch
                          if ((act->nameflags & NAMEFLAG_HEURISTIC) && escapes) {
                              /* if a commonly used PDF name is escaped that is certainly
                                 suspicious. */
                     -        cli_dbgmsg("cli_pdf: pdfname %s is escaped\n", pdfname);
                     +        cli_dbgmsg("handle_pdfname: pdfname %s is escaped\n", pdfname);
                              pdfobj_flag(pdf, obj, ESCAPED_COMMON_PDFNAME);
+                         }
@@ -1318,7 +1917,7 @@ static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj, const ch
                              *state = act->to_state;
                              if (*state == STATE_FILTER && act->set_objflag != OBJ_DICT && (obj->flags & (1 << act->set_objflag))) {
                     -            cli_dbgmsg("cli_pdf: duplicate stream filter %s\n", pdfname);
                     +            cli_dbgmsg("handle_pdfname: duplicate stream filter %s\n", pdfname);
                                  pdfobj_flag(pdf, obj, BAD_STREAM_FILTERS);
+                             }
@@ -1335,8 +1934,6 @@ static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj, const ch
+                         }
+                     }
                     -static int pdf_readint(const char *q0, int len, const char *key);
+                    -
                      static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len)
+                     {
                          const char *q, *q2;
@@ -1361,7 +1958,7 @@ static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len)
                          q = q2;
                          if (CL_SUCCESS != cli_strntoul_wrap(q2, (size_t)len, 0, 10, &objid)) {
                     -        cli_dbgmsg("cli_pdf: Found Encrypt dictionary but failed to parse objid\n");
                     +        cli_dbgmsg("pdf_parse_encrypt: Found Encrypt dictionary but failed to parse objid\n");
                              return;
+                         }
                          objid = objid << 8;
@@ -1372,7 +1969,7 @@ static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len)
                          q = q2;
                          if (CL_SUCCESS != cli_strntoul_wrap(q2, (size_t)len, 0, 10, &genid)) {
                     -        cli_dbgmsg("cli_pdf: Found Encrypt dictionary but failed to parse genid\n");
                     +        cli_dbgmsg("pdf_parse_encrypt: Found Encrypt dictionary but failed to parse genid\n");
                              return;
+                         }
                          objid |= genid & 0xff;
@@ -1380,7 +1977,7 @@ static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len)
                          if (!q2 || *q2 != 'R')
                              return;
                     -    cli_dbgmsg("cli_pdf: Encrypt dictionary in obj %lu %lu\n", objid>>8, objid&0xff);
                     +    cli_dbgmsg("pdf_parse_encrypt: Encrypt dictionary in obj %lu %lu\n", objid>>8, objid&0xff);
                          pdf->enc_objid = objid;
+                     }
@@ -1410,18 +2007,21 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
                          char pdfname[64];
                          const char *q2, *q3;
                          const char *nextobj = NULL, *nextopen = NULL, *nextclose = NULL;
                     -    const char *q = obj->start + pdf->map;
                     -    const char *dict, *enddict, *start;
                     -    off_t dict_length, full_dict_length;
                     -    off_t objsize = obj_size(pdf, obj, 1);
                     -    off_t bytesleft;
                     -    size_t i;
                     -    unsigned filters=0, blockopens=0;
                     +    const char *q = NULL;
                     +    const char *dict = NULL, *enddict = NULL, *start = NULL;
                     +    off_t dict_length = 0, full_dict_length = 0, objsize = 0, bytesleft = 0;
                     +    size_t i = 0;
                     +    unsigned filters = 0, blockopens = 0;
                          enum objstate objstate = STATE_NONE;
                      #if HAVE_JSON
                          json_object *pdfobj=NULL, *jsonobj=NULL;
                      #endif
                     +    q = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
                     +                      : (const char *)(obj->start + pdf->map);
+                    +
                     +    objsize = obj_size(pdf, obj, 1);
+                    +
                          if (objsize < 0)
                              return;
@@ -1434,7 +2034,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
                              bytesleft -= nextobj -q;
                              if (!nextobj || bytesleft < 0) {
                     -            cli_dbgmsg("cli_pdf: %u %u obj: no dictionary\n", obj->id>>8, obj->id&0xff);
                     +            cli_dbgmsg("pdf_parseobj: %u %u obj: no dictionary\n", obj->id>>8, obj->id&0xff);
                      #if HAVE_JSON
                                  if (!(pdfobj) && pdf->ctx->wrkproperty != NULL) {
                                      pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
@@ -1465,7 +2065,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
                          /* find end of dictionary block */
                          if (bytesleft < 0) {
                     -        cli_dbgmsg("cli_pdf: %u %u obj: broken dictionary\n", obj->id>>8, obj->id&0xff);
                     +        cli_dbgmsg("pdf_parseobj: %u %u obj: broken dictionary\n", obj->id>>8, obj->id&0xff);
                      #if HAVE_JSON
                              if (!(pdfobj) && pdf->ctx->wrkproperty != NULL) {
                                  pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
@@ -1517,7 +2117,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
                          /* Was end of dictionary found? */
                          if (blockopens) {
                              /* probably truncated */
                     -        cli_dbgmsg("cli_pdf: %u %u obj broken dictionary\n", obj->id>>8, obj->id&0xff);
                     +        cli_dbgmsg("pdf_parseobj: %u %u obj broken dictionary\n", obj->id>>8, obj->id&0xff);
                      #if HAVE_JSON
                              if (!(pdfobj) && pdf->ctx->wrkproperty != NULL) {
                                  pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
@@ -1552,7 +2152,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
                                          dictionary[i] = '*';
+                                 }
                                  dictionary[dict_length] = '\0';
                     -            cli_dbgmsg("cli_pdf: dictionary is <<%s>>\n", dictionary);
                     +            cli_dbgmsg("pdf_parseobj: dictionary is <<%s>>\n", dictionary);
                                  free(dictionary);
+                             }
+                         }
@@ -1617,10 +2217,10 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
                                          trailer = 0;
                                      q2 = pdf->map + trailer;
                     -                cli_dbgmsg("cli_pdf: looking for trailer in linearized pdf: %ld - %ld\n", trailer, trailer_end);
                     +                cli_dbgmsg("pdf_parseobj: looking for trailer in linearized pdf: %ld - %ld\n", trailer, trailer_end);
                                      pdf_parse_trailer(pdf, q2, trailer_end - trailer);
                                      if (pdf->fileID)
                     -                    cli_dbgmsg("cli_pdf: found fileID\n");
                     +                    cli_dbgmsg("pdf_parseobj: found fileID\n");
+                                 }
+                             }
@@ -1641,7 +2241,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
                                      dict_remaining -= (off_t)(q2 - q);
                                      if (CL_SUCCESS != cli_strntoul_wrap(q2, (size_t)dict_remaining, 0, 10, &objid)) {
                     -                    cli_dbgmsg("cli_pdf: failed to parse object objid\n");
                     +                    cli_dbgmsg("pdf_parseobj: failed to parse object objid\n");
                                          return;
+                                     }
                                      objid = objid << 8;
@@ -1654,7 +2254,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
                                      if (q2 && isdigit(*q2)) {
                                          dict_remaining -= (off_t)(q2 - q2_old);
                                          if (CL_SUCCESS != cli_strntoul_wrap(q2, (size_t)dict_remaining, 0, 10, &genid)) {
                     -                        cli_dbgmsg("cli_pdf: failed to parse object genid\n");
                     +                        cli_dbgmsg("pdf_parseobj: failed to parse object genid\n");
                                              return;
+                                         }
                                          objid |= genid & 0xff;
@@ -1663,7 +2263,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
                                          if (q2 && *q2 == 'R') {
                                              struct pdf_obj *obj2;
                     -                        cli_dbgmsg("cli_pdf: found %s stored in indirect object %lu %lu\n", pdfname, objid >> 8, objid&0xff);
                     +                        cli_dbgmsg("pdf_parseobj: found %s stored in indirect object %lu %lu\n", pdfname, objid >> 8, objid&0xff);
                                              obj2 = find_obj(pdf, obj, objid);
                                              if (obj2) {
                                                  enum pdf_objflags flag =
@@ -1707,7 +2307,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
                          if (obj->flags & (1 << OBJ_FILTER_UNKNOWN))
                              pdfobj_flag(pdf, obj, UNKNOWN_FILTER);
                     -    cli_dbgmsg("cli_pdf: %u %u obj flags: %02x\n", obj->id>>8, obj->id&0xff, obj->flags);
                     +    cli_dbgmsg("pdf_parseobj: %u %u obj flags: %02x\n", obj->id>>8, obj->id&0xff, obj->flags);
+                     }
                      /**
@@ -1725,7 +2325,7 @@ static const char *pdf_getdict(const char *q0, int* len, const char *key)
                          const char *q;
                          if (*len <= 0) {
                     -        cli_dbgmsg("cli_pdf: bad length %d\n", *len);
                     +        cli_dbgmsg("pdf_getdict: bad length %d\n", *len);
                              return NULL;
+                         }
@@ -1735,7 +2335,7 @@ static const char *pdf_getdict(const char *q0, int* len, const char *key)
                          /* find the key */
                          q = cli_memstr(q0, *len, key, strlen(key));
                          if (!q) {
                     -        cli_dbgmsg("cli_pdf: %s not found in dict\n", key);
                     +        cli_dbgmsg("pdf_getdict: %s not found in dict\n", key);
                              return NULL;
+                         }
@@ -1745,7 +2345,7 @@ static const char *pdf_getdict(const char *q0, int* len, const char *key)
                          /* find the start of the value object */
                          q = pdf_nextobject(q0 + 1, *len - 1);
                          if (!q) {
                     -        cli_dbgmsg("cli_pdf: %s is invalid in dict\n", key);
                     +        cli_dbgmsg("pdf_getdict: %s is invalid in dict\n", key);
                              return NULL;
+                         }
@@ -1891,12 +2491,12 @@ static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *
                              s = cli_malloc((q - start)/2 + 1);
                              if (s == NULL) { /* oops, couldn't allocate memory */
                     -          cli_dbgmsg("cli_pdf: unable to allocate memory...\n");
                     +          cli_dbgmsg("pdf_readstring: unable to allocate memory...\n");
                                return NULL;
+                             }
                              if (cli_hex2str_to(start, s, q - start)) {
                     -            cli_dbgmsg("cli_pdf: %s has bad hex value\n", key);
                     +            cli_dbgmsg("pdf_readstring: %s has bad hex value\n", key);
                                  free(s);
                                  return NULL;
+                             }
@@ -1908,7 +2508,7 @@ static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *
                              return s;
+                         }
                     -    cli_dbgmsg("cli_pdf: %s is invalid string in dict\n", key);
                     +    cli_dbgmsg("pdf_readstring: %s is invalid string in dict\n", key);
                          return NULL;
+                     }
@@ -1982,7 +2582,7 @@ static int pdf_readbool(const char *q0, int len, const char *key, int Default)
                          if (!strncmp(q, "false", 5))
                              return 0;
                     -    cli_dbgmsg("cli_pdf: invalid value for %s bool\n", key);
                     +    cli_dbgmsg("pdf_readbool: invalid value for %s bool\n", key);
                          return Default;
+                     }
@@ -2032,8 +2632,8 @@ static void check_user_password(struct pdf_struct *pdf, int R, const char *O,
                                  cl_sha256(U+40, 8, result2, NULL);
                                  UE_len = UE ? strlen(UE) : 0;
                                  if (UE_len != 32) {
                     -                cli_dbgmsg("cli_pdf: UE length is not 32: %zu\n", UE_len);
                     -                noisy_warnmsg("cli_pdf: UE length is not 32: %zu\n", UE_len);
                     +                cli_dbgmsg("check_user_password: UE length is not 32: %zu\n", UE_len);
                     +                noisy_warnmsg("check_user_password: UE length is not 32: %zu\n", UE_len);
                                  } else {
                                      pdf->keylen = 32;
                                      pdf->key = cli_malloc(32);
@@ -2043,7 +2643,7 @@ static void check_user_password(struct pdf_struct *pdf, int R, const char *O,
+                                     }
                                      aes_decrypt((const unsigned char *)UE, &UE_len, (unsigned char *)(pdf->key), (char *)result2, 32, 0);
                     -                dbg_printhex("cli_pdf: Candidate encryption key", pdf->key, pdf->keylen);
                     +                dbg_printhex("check_user_password: Candidate encryption key", pdf->key, pdf->keylen);
+                                 }
+                             }
                          } else if ((R >= 2) && (R <= 4)) {
@@ -2129,27 +2729,27 @@ static void check_user_password(struct pdf_struct *pdf, int R, const char *O,
                                      password_empty = 1;
                                  free(d);
                              } else {
                     -            cli_dbgmsg("cli_pdf: invalid revision %d\n", R);
                     -            noisy_warnmsg("cli_pdf: invalid revision %d\n", R);
                     +            cli_dbgmsg("check_user_password: invalid revision %d\n", R);
                     +            noisy_warnmsg("check_user_password: invalid revision %d\n", R);
+                             }
                          } else {
                              /* Supported R is in {2,3,4,5} */
                     -        cli_dbgmsg("cli_pdf: R value out of range\n");
                     -        noisy_warnmsg("cli_pdf: R value out of range\n");
                     +        cli_dbgmsg("check_user_password: R value out of range\n");
                     +        noisy_warnmsg("check_user_password: R value out of range\n");
                              return;
+                         }
                          if (password_empty) {
                     -        cli_dbgmsg("cli_pdf: user password is empty\n");
                     -        noisy_msg(pdf, "cli_pdf: encrypted PDF found, user password is empty, will attempt to decrypt\n");
                     +        cli_dbgmsg("check_user_password: user password is empty\n");
                     +        noisy_msg(pdf, "check_user_password: encrypted PDF found, user password is empty, will attempt to decrypt\n");
                              /* The key we computed above is the key used to encrypt the streams.
                               * We could decrypt it now if we wanted to */
                              pdf->flags |= 1 << DECRYPTABLE_PDF;
                          } else {
                              /* the key is not valid, we would need the user or the owner password to decrypt */
                     -        cli_dbgmsg("cli_pdf: user/owner password would be required for decryption\n");
                     -        noisy_warnmsg("cli_pdf: encrypted PDF found, user password is NOT empty, cannot decrypt!\n");
                     +        cli_dbgmsg("check_user_password: user/owner password would be required for decryption\n");
                     +        noisy_warnmsg("check_user_password: encrypted PDF found, user password is NOT empty, cannot decrypt!\n");
+                         }
+                     }
@@ -2171,7 +2771,7 @@ enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key
                          CFM = pdf_readval(q, len, "/CFM");
                          if (CFM) {
                     -        cli_dbgmsg("cli_pdf: %s CFM: %s\n", key, CFM);
                     +        cli_dbgmsg("parse_enc_method: %s CFM: %s\n", key, CFM);
                              if (!strncmp(CFM,"V2", 2))
                                  ret = ENC_V2;
                              else if (!strncmp(CFM,"AESV2",5))
@@ -2197,15 +2797,15 @@ void pdf_handle_enc(struct pdf_struct *pdf)
                          if (pdf->enc_objid == ~0u)
                              return;
                          if (!pdf->fileID) {
                     -        cli_dbgmsg("cli_pdf: pdf_handle_enc no file ID\n");
                     -        noisy_warnmsg("cli_pdf: pdf_handle_enc no file ID\n");
                     +        cli_dbgmsg("pdf_handle_enc: no file ID\n");
                     +        noisy_warnmsg("pdf_handle_enc: no file ID\n");
                              return;
+                         }
                     -    obj = find_obj(pdf, pdf->objs, pdf->enc_objid);
                     +    obj = find_obj(pdf, pdf->objs[0], pdf->enc_objid);
                          if (!obj) {
                     -        cli_dbgmsg("cli_pdf: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff);
                     -        noisy_warnmsg("cli_pdf: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff);
                     +        cli_dbgmsg("pdf_handle_enc: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff);
                     +        noisy_warnmsg("pdf_handle_enc: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff);
                              return;
+                         }
@@ -2220,15 +2820,15 @@ void pdf_handle_enc(struct pdf_struct *pdf)
                              pdf->enc_method_embeddedfile = ENC_UNKNOWN;
                              P = pdf_readint(q, len, "/P");
                              if (P == ~0u) {
                     -            cli_dbgmsg("cli_pdf: invalid P\n");
                     -            noisy_warnmsg("cli_pdf: invalid P\n");
                     +            cli_dbgmsg("pdf_handle_enc: invalid P\n");
                     +            noisy_warnmsg("pdf_handle_enc: invalid P\n");
                                  break;
+                             }
                              q2 = cli_memstr(q, len, "/Standard", 9);
                              if (!q2) {
                     -            cli_dbgmsg("cli_pdf: /Standard not found\n");
                     -            noisy_warnmsg("cli_pdf: /Standard not found\n");
                     +            cli_dbgmsg("pdf_handle_enc: /Standard not found\n");
                     +            noisy_warnmsg("pdf_handle_enc: /Standard not found\n");
                                  break;
+                             }
@@ -2241,20 +2841,20 @@ void pdf_handle_enc(struct pdf_struct *pdf)
                                  length = pdf_readint(q, len, "/Length");
                              if (length < 40) {
                     -            cli_dbgmsg("cli_pdf: invalid length: %d\n", length);
                     +            cli_dbgmsg("pdf_handle_enc: invalid length: %d\n", length);
                                  length = 40;
+                             }
                              R = pdf_readint(q, len, "/R");
                              if (R == ~0u) {
                     -            cli_dbgmsg("cli_pdf: invalid R\n");
                     -            noisy_warnmsg("cli_pdf: invalid R\n");
                     +            cli_dbgmsg("pdf_handle_enc: invalid R\n");
                     +            noisy_warnmsg("pdf_handle_enc: invalid R\n");
                                  break;
+                             }
                              if ((R > 5) || (R < 2)) {
                     -            cli_dbgmsg("cli_pdf: R value outside supported range [2..5]\n");
                     -            noisy_warnmsg("cli_pdf: R value outside supported range [2..5]\n");
                     +            cli_dbgmsg("pdf_handle_enc: R value outside supported range [2..5]\n");
                     +            noisy_warnmsg("pdf_handle_enc: R value outside supported range [2..5]\n");
                                  break;
+                             }
@@ -2277,11 +2877,11 @@ void pdf_handle_enc(struct pdf_struct *pdf)
                                  pdf->CF_n = n;
                                  if (StmF)
                     -                cli_dbgmsg("cli_pdf: StmF: %s\n", StmF);
                     +                cli_dbgmsg("pdf_handle_enc: StmF: %s\n", StmF);
                                  if (StrF)
                     -                cli_dbgmsg("cli_pdf: StrF: %s\n", StrF);
                     +                cli_dbgmsg("pdf_handle_enc: StrF: %s\n", StrF);
                                  if (EFF)
                     -                cli_dbgmsg("cli_pdf: EFF: %s\n", EFF);
                     +                cli_dbgmsg("pdf_handle_enc: EFF: %s\n", EFF);
                                  pdf->enc_method_stream = parse_enc_method(pdf->CF, n, StmF, ENC_IDENTITY);
                                  pdf->enc_method_string = parse_enc_method(pdf->CF, n, StrF, ENC_IDENTITY);
@@ -2291,7 +2891,7 @@ void pdf_handle_enc(struct pdf_struct *pdf)
                                  free(StrF);
                                  free(EFF);
                     -            cli_dbgmsg("cli_pdf: EncryptMetadata: %s\n", EM ? "true" : "false");
                     +            cli_dbgmsg("pdf_handle_enc: EncryptMetadata: %s\n", EM ? "true" : "false");
                                  if (R == 4) {
                                      length = 128;
@@ -2308,8 +2908,8 @@ void pdf_handle_enc(struct pdf_struct *pdf)
                              n = 0;
                              O = pdf_readstring(q, len, "/O", &n, NULL, 0);
                              if (!O || n < oulen) {
                     -            cli_dbgmsg("cli_pdf: invalid O: %d\n", n);
                     -            cli_dbgmsg("cli_pdf: invalid O: %d\n", n);
                     +            cli_dbgmsg("pdf_handle_enc: invalid O: %d\n", n);
                     +            cli_dbgmsg("pdf_handle_enc: invalid O: %d\n", n);
                                  if (O)
                                      dbg_printhex("invalid O", O, n);
@@ -2321,8 +2921,8 @@ void pdf_handle_enc(struct pdf_struct *pdf)
                                          break;
                                  if (i != n) {
                     -                dbg_printhex("too long O", O, n);
                     -                noisy_warnmsg("too long O: %u", n);
                     +                dbg_printhex("pdf_handle_enc: too long O", O, n);
                     +                noisy_warnmsg("pdf_handle_enc: too long O: %u", n);
                                      break;
+                                 }
+                             }
@@ -2330,8 +2930,8 @@ void pdf_handle_enc(struct pdf_struct *pdf)
                              n = 0;
                              U = pdf_readstring(q, len, "/U", &n, NULL, 0);
                              if (!U || n < oulen) {
                     -            cli_dbgmsg("cli_pdf: invalid U: %u\n", n);
                     -            noisy_warnmsg("cli_pdf: invalid U: %u\n", n);
                     +            cli_dbgmsg("pdf_handle_enc: invalid U: %u\n", n);
                     +            noisy_warnmsg("pdf_handle_enc: invalid U: %u\n", n);
                                  if (U)
                                      dbg_printhex("invalid U", U, n);
@@ -2349,10 +2949,10 @@ void pdf_handle_enc(struct pdf_struct *pdf)
+                                 }
+                             }
                     -        cli_dbgmsg("cli_pdf: Encrypt R: %d, P %x, length: %u\n", R, P, length);
                     +        cli_dbgmsg("pdf_handle_enc: Encrypt R: %d, P %x, length: %u\n", R, P, length);
                              if (length % 8) {
                     -            cli_dbgmsg("cli_pdf: wrong key length, not multiple of 8\n");
                     -            noisy_warnmsg("cli_pdf: wrong key length, not multiple of 8\n");
                     +            cli_dbgmsg("pdf_handle_enc: wrong key length, not multiple of 8\n");
                     +            noisy_warnmsg("pdf_handle_enc: wrong key length, not multiple of 8\n");
                                  break;
+                             }
                              check_user_password(pdf, R, O, U, P, EM, UE, length, oulen);
@@ -2363,8 +2963,216 @@ void pdf_handle_enc(struct pdf_struct *pdf)
                          free(UE);
+                     }
                     +/**
                     + * @brief Search pdf buffer for objects.  Parse each.
                     + *
                     + * Newly found objects will be extracted after completion when the extraction for loop continues.
                     + *
                     + * @param pdf           Pdf struct that keeps track of all information found in the PDF.
                     + * @param objstm        Pointer to an object stream to parse.
                     + *
                     + * @return cl_error_t   Error code.
                     + */
                     +cl_error_t pdf_find_and_parse_objs_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm)
                     +{
                     +    cl_error_t status = CL_EFORMAT;
                     +    cl_error_t retval = CL_EPARSE;
                     +    int32_t foundobj = 0, alerts = 0;
                     +    uint32_t badobjects = 0;
                     +    size_t i = 0;
+                    +
                     +    struct pdf_obj* obj = NULL;
+                    +
                     +    char* current_pair = objstm->streambuf;
                     +    char* current_obj = objstm->streambuf + objstm->first;
+                    +
                     +    if ((0 == objstm->first) ||
                     +        (0 == objstm->streambuf_len) ||
                     +        (0 == objstm->n))
                     +    {
                     +        cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Empty object stream.\n");
                     +        goto done;
                     +    }
+                    +
                     +    if (objstm->first >= objstm->streambuf_len)
                     +    {
                     +        cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Invalid objstm values. Offset of first obj greater than stream length.\n");
                     +        goto done;
                     +    }
+                    +
                     +    /* Process each object */
                     +    for (i = 0; i < objstm->n; i++)
                     +    {
                     +        obj = NULL;
+                    +
                     +        if (cli_checktimelimit(pdf->ctx) != CL_SUCCESS) {
                     +            cli_errmsg("Timeout reached in the PDF parser while parsing object stream.\n");
                     +            status = CL_ETIMEOUT;
                     +            goto done;
                     +        }
+                    +
                     +        /* Find object */
                     +        retval = pdf_findobj_in_objstm(pdf, objstm, &obj);
+                    +
                     +        if (retval != CL_SUCCESS)
                     +        {
                     +            cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Fewer objects in stream than expected: %u found, %u expected.\n",
                     +                objstm->nobjs_found, objstm->n);
                     +            badobjects++;
                     +            pdf->stats.ninvalidobjs++;
                     +            break;
                     +        }
+                    +
                     +        cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Found object %u %u in object stream at offset: %u\n", obj->id >> 8, obj->id & 0xff, obj->start);
+                    +
                     +        if (cli_checktimelimit(pdf->ctx) != CL_SUCCESS) {
                     +            cli_errmsg("Timeout reached in the PDF parser while parsing object stream.\n");
                     +            status = CL_ETIMEOUT;
                     +            goto done;
                     +        }
+                    +
                     +        /* Parse object */
                     +        pdf_parseobj(pdf, obj);
                     +    }
+                    +
                     +    if (alerts) {
                     +        status = CL_VIRUS;
                     +        goto done;
                     +    }
                     +    else if (badobjects) {
                     +        status = CL_EFORMAT;
                     +        goto done;
                     +    }
+                    +
                     +    status = CL_SUCCESS;
+                    +
                     +done:
                     +    return status;
                     +}
+                    +
                     +/**
                     + * @brief Search pdf buffer for objects.  Parse each and then extract each.
                     + *
                     + * @param pdf               Pdf struct that keeps track of all information found in the PDF.
                     + * @param alerts[in/out]    The number of alerts, relevant in ALLMATCH mode.
                     + *
                     + * @return cl_error_t   Error code.
                     + */
                     +cl_error_t pdf_find_and_extract_objs(struct pdf_struct *pdf, uint32_t *alerts)
                     +{
                     +    cl_error_t status = CL_SUCCESS;
                     +    int32_t rv = 0;
                     +    int foundobj = 0;
                     +    unsigned int i = 0, j = 0;
                     +    uint32_t badobjects = 0;
+                    +
                     +    /* parse PDF and find obj offsets */
                     +    while (CL_BREAK != (rv = pdf_findobj(pdf))) {
                     +        if (rv == CL_EMEM) {
                     +            break;
                     +        }
                     +    }
+                    +
                     +    if (rv == -1)
                     +        pdf->flags |= 1 << BAD_PDF_TOOMANYOBJS;
+                    +
                     +    /* must parse after finding all objs, so we can flag indirect objects */
                     +    for (i=0; i < pdf->nobjs; i++) {
                     +        struct pdf_obj *obj = pdf->objs[i];
+                    +
                     +        if (cli_checktimelimit(pdf->ctx) != CL_SUCCESS) {
                     +            cli_errmsg("pdf_find_and_extract_objs: Timeout reached in the PDF parser while parsing objects.\n");
+                    +
                     +            status = CL_ETIMEOUT;
                     +            goto done;
                     +        }
+                    +
                     +        pdf_parseobj(pdf, obj);
                     +    }
+                    +
                     +    pdf_handle_enc(pdf);
                     +    if (pdf->flags & (1 << ENCRYPTED_PDF))
                     +        cli_dbgmsg("pdf_find_and_extract_objs: encrypted pdf found, %s!\n",
                     +               (pdf->flags & (1 << DECRYPTABLE_PDF)) ?
                     +               "decryptable" : "not decryptable, stream will probably fail to decompress");
+                    +
                     +    if ((pdf->ctx->options & CL_SCAN_BLOCKENCRYPTED) &&
                     +       (pdf->flags & (1 << ENCRYPTED_PDF)) &&
                     +       !(pdf->flags & (1 << DECRYPTABLE_PDF)))
                     +    {
                     +        /* It is encrypted, and a password/key needs to be supplied to decrypt.
                     +         * This doesn't trigger for PDFs that are encrypted but don't need
                     +         * a password to decrypt */
                     +        status = cli_append_virus(pdf->ctx, "Heuristics.Encrypted.PDF");
                     +        if (status == CL_VIRUS) {
                     +            alerts++;
                     +            if (pdf->ctx->options & CL_SCAN_ALLMATCHES)
                     +                status = CL_CLEAN;
                     +        }
                     +    }
+                    +
                     +    if (!status) {
                     +        status = run_pdf_hooks(pdf, PDF_PHASE_PARSED, -1, -1);
                     +        cli_dbgmsg("pdf_find_and_extract_objs: (parsed hooks) returned %d\n", status);
                     +        if (status == CL_VIRUS) {
                     +            alerts++;
                     +            if (pdf->ctx->options & CL_SCAN_ALLMATCHES) {
                     +                status = CL_CLEAN;
                     +            }
                     +        }
                     +    }
+                    +
                     +    /* extract PDF objs */
                     +    for (i=0; !status && i < pdf->nobjs; i++) {
                     +        struct pdf_obj *obj = pdf->objs[i];
+                    +
                     +        if (cli_checktimelimit(pdf->ctx) != CL_SUCCESS) {
                     +            cli_errmsg("pdf_find_and_extract_objs: Timeout reached in the PDF parser while extracting objects.\n");
+                    +
                     +            status = CL_ETIMEOUT;
                     +            goto done;
                     +        }
+                    +
                     +        status = pdf_extract_obj(pdf, obj, PDF_EXTRACT_OBJ_SCAN);
                     +        switch (status) {
                     +            case CL_EFORMAT:
                     +                /* Don't halt on one bad object */
                     +                cli_dbgmsg("pdf_find_and_extract_objs: Format error when extracting object, skipping to the next object.\n");
                     +                badobjects++;
                     +                pdf->stats.ninvalidobjs++;
                     +                status = CL_CLEAN;
                     +                break;
                     +            case CL_VIRUS:
                     +                alerts++;
                     +                if (pdf->ctx->options & CL_SCAN_ALLMATCHES) {
                     +                    status = CL_CLEAN;
                     +                }
                     +                break;
                     +            default:
                     +                break;
                     +        }
                     +    }
+                    +
                     +done:
                     +    if (!status && badobjects) {
                     +        status = CL_EFORMAT;
                     +    }
+                    +
                     +    return status;
                     +}
+                    +
                     +/**
                     + * @brief Primary function for parsing and scanning a PDF.
                     + *
                     + * @param dir       Filepath for temp file.
                     + * @param ctx       clam scan context structure.
                     + * @param offset    offset of pdf in ctx->fmap
                     + *
                     + * @return int      Returns cl_error_t status value.
                     + */
                      int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
+                     {
                     +    cl_error_t rc = CL_SUCCESS;
                          struct pdf_struct pdf;
                          fmap_t *map = *ctx->fmap;
                          size_t size = map->len - offset;
@@ -2372,8 +3180,8 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
                          off_t map_off, bytesleft;
                          unsigned long xref;
                          const char *pdfver, *tmp, *start, *eofmap, *q, *eof;
                     -    int rc, badobjects = 0;
                          unsigned i, alerts = 0;
                     +    unsigned int objs_found = 0;
                      #if HAVE_JSON
                          json_object *pdfobj=NULL;
                          char *begin, *end, *p1;
@@ -2390,7 +3198,8 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
                          /* Check PDF version */
                          if (!pdfver) {
                              cli_errmsg("cli_pdf: mmap() failed (1)\n");
                     -        return CL_EMAP;
                     +        rc = CL_EMAP;
                     +        goto done;
+                         }
                      #if HAVE_JSON
@@ -2406,14 +3215,16 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
                      #if HAVE_JSON
                              pdf_export_json(&pdf);
                      #endif
                     -        return CL_SUCCESS;
                     +        rc = CL_SUCCESS;
                     +        goto done;
+                         }
                          versize -= tmp - pdfver;
                          pdfver = tmp;
                          if (versize < 8) {
                     -        return CL_EFORMAT;
                     +        rc = CL_EFORMAT;
                     +        goto done;
+                         }
                          /* Check for PDF-1.[0-9]. Although 1.7 is highest now, allow for future versions */
@@ -2463,10 +3274,9 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
                          eofmap = fmap_need_off_once(map, map_off, bytesleft);
                          if (!eofmap) {
                              cli_errmsg("cli_pdf: mmap() failed (2)\n");
                     -#if HAVE_JSON
                     -        pdf_export_json(&pdf);
                     -#endif
                     -        return CL_EMAP;
+                    +
                     +        rc = CL_EMAP;
                     +        goto done;
+                         }
                          eof = eofmap + bytesleft;
@@ -2533,10 +3343,9 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
                          pdf.map = fmap_need_off(map, offset, size);
                          if (!pdf.map) {
                              cli_errmsg("cli_pdf: mmap() failed (3)\n");
                     -#if HAVE_JSON
                     -        pdf_export_json(&pdf);
                     -#endif
                     -        return CL_EMAP;
+                    +
                     +        rc = CL_EMAP;
                     +        goto done;
+                         }
                          pdf.startoff = offset;
@@ -2548,127 +3357,28 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
                              rc = CL_CLEAN;
                          } else if (rc) {
                              cli_dbgmsg("cli_pdf: (pre hooks) returning %d\n", rc);
                     -#if HAVE_JSON
                     -        pdf_export_json(&pdf);
                     -#endif
                     -        return rc == CL_BREAK ? CL_CLEAN : rc;
                     -    }
+                    -
                     -    /* parse PDF and find obj offsets */
                     -    while ((rc = pdf_findobj(&pdf)) > 0) {
                     -        if (rc == 1) {
                     -            struct pdf_obj *obj = &pdf.objs[pdf.nobjs-1];
+                    -
                     -            cli_dbgmsg("cli_pdf: found %d %d obj @%lld\n", obj->id >> 8, obj->id&0xff, (long long)(obj->start + offset));
                     -        }
                     -        else if (rc == 2) {
                     -            pdf.nobjs--;
                     -            cli_dbgmsg("cli_pdf: Failed to parse object, likely an oversight in parser design.\n");
                     -        }
                     -        else {
                     -            pdf.nobjs--;
                     -            cli_dbgmsg("cli_pdf: unexpected return code %d.\n", rc);
                     -        }
                     -    }
+                    -
                     -    if (pdf.nobjs)
                     -        pdf.nobjs--;
+                    -
                     -    if (rc == -1)
                     -        pdf.flags |= 1 << BAD_PDF_TOOMANYOBJS;
+                    -
                     -    /* must parse after finding all objs, so we can flag indirect objects */
                     -    for (i=0;i<pdf.nobjs;i++) {
                     -        struct pdf_obj *obj = &pdf.objs[i];
                     -        if (cli_checktimelimit(ctx) != CL_SUCCESS) {
                     -            cli_errmsg("Timeout reached in the PDF parser\n");
                     -#if HAVE_JSON
                     -            pdf_export_json(&pdf);
                     -#endif
                     -            free(pdf.objs);
                     -            if (pdf.fileID)
                     -                free(pdf.fileID);
                     -            if (pdf.key)
                     -                free(pdf.key);
                     -            return CL_ETIMEOUT;
                     -        }
+                    -
                     -        pdf_parseobj(&pdf, obj);
                     -    }
+                    -
                     -    pdf_handle_enc(&pdf);
                     -    if (pdf.flags & (1 << ENCRYPTED_PDF))
                     -        cli_dbgmsg("cli_pdf: encrypted pdf found, %s!\n",
                     -               (pdf.flags & (1 << DECRYPTABLE_PDF)) ?
                     -               "decryptable" : "not decryptable, stream will probably fail to decompress");
+                    -
                     -    if (DETECT_ENCRYPTED &&
                     -       (pdf.flags & (1 << ENCRYPTED_PDF)) &&
                     -       !(pdf.flags & (1 << DECRYPTABLE_PDF))) {
                     -        /* It is encrypted, and a password/key needs to be supplied to decrypt.
                     -         * This doesn't trigger for PDFs that are encrypted but don't need
                     -         * a password to decrypt */
                     -        rc = cli_append_virus(ctx, "Heuristics.Encrypted.PDF");
                     -        if (rc == CL_VIRUS) {
                     -            alerts++;
                     -            if (SCAN_ALL)
                     -                rc = CL_CLEAN;
                     -        }
                     -    }
+                    -
                     -    if (!rc) {
                     -        rc = run_pdf_hooks(&pdf, PDF_PHASE_PARSED, -1, -1);
                     -        cli_dbgmsg("cli_pdf: (parsed hooks) returned %d\n", rc);
                     -        if (rc == CL_VIRUS) {
                     -            alerts++;
                     -            if (SCAN_ALL) {
                     -                rc = CL_CLEAN;
                     -            }
                     -        }
                     +        rc = rc == CL_BREAK ? CL_CLEAN : rc;
                     +        goto done;
+                         }
                     -    /* extract PDF objs */
                     -    for (i=0;!rc && i<pdf.nobjs;i++) {
                     -        struct pdf_obj *obj = &pdf.objs[i];
+                    -
                     -        if (cli_checktimelimit(ctx) != CL_SUCCESS) {
                     -            cli_errmsg("Timeout reached in the PDF parser\n");
                     -#if HAVE_JSON
                     -            pdf_export_json(&pdf);
                     -#endif
                     -            free(pdf.objs);
                     -            if (pdf.fileID)
                     -                free(pdf.fileID);
                     -            if (pdf.key)
                     -                free(pdf.key);
                     -            return CL_ETIMEOUT;
                     -        }
                     +    /*
                     +     * Find and extract all objects in the PDF.
                     +     * New experimental recursive methodology that adds objects from object streams.
                     +     */
                     +    objs_found = pdf.nobjs;
                     +    rc = pdf_find_and_extract_objs(&pdf, &alerts);
                     -        rc = pdf_extract_obj(&pdf, obj, PDF_EXTRACT_OBJ_SCAN);
                     -        switch (rc) {
                     -            case CL_EFORMAT:
                     -                /* Don't halt on one bad object */
                     -                cli_dbgmsg("cli_pdf: bad format object, skipping to next\n");
                     -                badobjects++;
                     -                pdf.stats.ninvalidobjs++;
                     -                rc = CL_CLEAN;
                     -                break;
                     -            case CL_VIRUS:
                     -                alerts++;
                     -                if (SCAN_ALL) {
                     -                    rc = CL_CLEAN;
                     -                }
                     -                break;
                     -            default:
                     -                break;
                     -        }
                     +    if (pdf.nobjs <= objs_found) {
                     +        cli_dbgmsg("cli_pdf: pdf_find_and_extract_objs did not find any new objects!\n");
                     +    } else {
                     +        cli_dbgmsg("cli_pdf: pdf_find_and_extract_objs found %d new objects.\n", pdf.nobjs - objs_found);
+                         }
                          if (pdf.flags & (1 << ENCRYPTED_PDF))
                              pdf.flags &= ~ ((1 << BAD_FLATESTART) | (1 << BAD_STREAMSTART) | (1 << BAD_ASCIIDECODE));
                     -   if (pdf.flags && !rc) {
                     +    if (pdf.flags && !rc) {
                              cli_dbgmsg("cli_pdf: flags 0x%02x\n", pdf.flags);
                              rc = run_pdf_hooks(&pdf, PDF_PHASE_END, -1, -1);
                              if (rc == CL_VIRUS) {
@@ -2699,11 +3409,11 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
                      #endif
+                         }
                     +done:
                          if (alerts) {
                              rc = CL_VIRUS;
+                         }
+                    -
                     -    else if (!rc && badobjects) {
                     +    else if (!rc && pdf.stats.ninvalidobjs > 0) {
                              rc = CL_EFORMAT;
+                         }
@@ -2711,17 +3421,54 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
                          pdf_export_json(&pdf);
                      #endif
                     -    cli_dbgmsg("cli_pdf: returning %d\n", rc);
                     -    free(pdf.objs);
                     -    free(pdf.fileID);
                     -    free(pdf.key);
                     +    if (pdf.objstms) {
                     +        for (i = 0; i < pdf.nobjstms; i++) {
                     +            if (pdf.objstms[i]) {
                     +                if (pdf.objstms[i]->streambuf) {
                     +                    free(pdf.objstms[i]->streambuf);
                     +                    pdf.objstms[i]->streambuf = NULL;
                     +                }
                     +                free(pdf.objstms[i]);
                     +                pdf.objstms[i] = NULL;
                     +            }
                     +        }
                     +        free(pdf.objstms);
                     +        pdf.objstms = NULL;
                     +    }
+                    +
                     +    if (NULL != pdf.objs) {
                     +        for (i = 0; i < pdf.nobjs; i++) {
                     +            if (NULL != pdf.objs[i]) {
                     +                free(pdf.objs[i]);
                     +                pdf.objs[i] = NULL;
                     +            }
                     +        }
                     +        free(pdf.objs);
                     +        pdf.objs = NULL;
                     +    }
                     +    if (pdf.fileID) {
                     +        free(pdf.fileID);
                     +        pdf.fileID = NULL;
                     +    }
                     +    if (pdf.key) {
                     +        free(pdf.key);
                     +        pdf.key = NULL;
                     +    }
                          /* PDF hooks may abort, don't return CL_BREAK to caller! */
                     -    return rc == CL_BREAK ? CL_CLEAN : rc;
                     +    rc = (rc == CL_BREAK) ? CL_CLEAN : rc;
+                    +
                     +    cli_dbgmsg("cli_pdf: returning %d\n", rc);
                     +    return rc;
+                     }
                     -/*
                     - * Find the start of the next line
                     +/**
                     + * @brief   Skip the rest of the current line, and find the start of the next line.
                     + *
                     + * @param ptr   Current offset into buffer.
                     + * @param len   Remaining bytes in buffer.
                     + *
                     + * @return const char*  Address of next line, or NULL if no next line in buffer.
                       */
                      static const char *
                      pdf_nextlinestart(const char *ptr, size_t len)
@@ -2743,9 +3490,15 @@ pdf_nextlinestart(const char *ptr, size_t len)
                          return ptr;
+                     }
                     -/*
                     - * Return the start of the next PDF object.
                     +/**
                     + * @brief   Return the start of the next PDF object.
                     + *
                       * This assumes that we're not in a stream.
                     + *
                     + * @param ptr   Current offset into buffer.
                     + * @param len   Remaining bytes in buffer.
                     + *
                     + * @return const char*  Address of next object in the buffer, or NULL if there is none in the buffer.
                       */
                      static const char *
                      pdf_nextobject(const char *ptr, size_t len)
@@ -3078,10 +3831,13 @@ static void Author_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfnam
                              return;
                          if (!(pdf->stats.author)) {
                     +        const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
                     +                                             : (const char *)(obj->start + pdf->map);
+                    +
                              pdf->stats.author = cli_calloc(1, sizeof(struct pdf_stats_entry));
                              if (!(pdf->stats.author))
                                  return;
                     -        pdf->stats.author->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author", NULL, &(pdf->stats.author->meta));
                     +        pdf->stats.author->data = pdf_parse_string(pdf, obj, objstart, obj_size(pdf, obj, 1), "/Author", NULL, &(pdf->stats.author->meta));
+                         }
+                     }
                      #endif
@@ -3098,10 +3854,13 @@ static void Creator_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfna
                              return;
                          if (!(pdf->stats.creator)) {
                     +        const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
                     +                                             : (const char *)(obj->start + pdf->map);
+                    +
                              pdf->stats.creator = cli_calloc(1, sizeof(struct pdf_stats_entry));
                              if (!(pdf->stats.creator))
                                  return;
                     -        pdf->stats.creator->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator", NULL, &(pdf->stats.creator->meta));
                     +        pdf->stats.creator->data = pdf_parse_string(pdf, obj, objstart, obj_size(pdf, obj, 1), "/Creator", NULL, &(pdf->stats.creator->meta));
+                         }
+                     }
                      #endif
@@ -3118,10 +3877,13 @@ static void ModificationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, str
                              return;
                          if (!(pdf->stats.modificationdate)) {
                     +        const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
                     +                                             : (const char *)(obj->start + pdf->map);
+                    +
                              pdf->stats.modificationdate = cli_calloc(1, sizeof(struct pdf_stats_entry));
                              if (!(pdf->stats.modificationdate))
                                  return;
                     -        pdf->stats.modificationdate->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate", NULL, &(pdf->stats.modificationdate->meta));
                     +        pdf->stats.modificationdate->data = pdf_parse_string(pdf, obj, objstart, obj_size(pdf, obj, 1), "/ModDate", NULL, &(pdf->stats.modificationdate->meta));
+                         }
+                     }
                      #endif
@@ -3138,10 +3900,13 @@ static void CreationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct
                              return;
                          if (!(pdf->stats.creationdate)) {
                     +        const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
                     +                                             : (const char *)(obj->start + pdf->map);
+                    +
                              pdf->stats.creationdate = cli_calloc(1, sizeof(struct pdf_stats_entry));
                              if (!(pdf->stats.creationdate))
                                  return;
                     -        pdf->stats.creationdate->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate", NULL, &(pdf->stats.creationdate->meta));
                     +        pdf->stats.creationdate->data = pdf_parse_string(pdf, obj, objstart, obj_size(pdf, obj, 1), "/CreationDate", NULL, &(pdf->stats.creationdate->meta));
+                         }
+                     }
                      #endif
@@ -3158,10 +3923,13 @@ static void Producer_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfn
                              return;
                          if (!(pdf->stats.producer)) {
                     +        const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
                     +                                             : (const char *)(obj->start + pdf->map);
+                    +
                              pdf->stats.producer = cli_calloc(1, sizeof(struct pdf_stats_entry));
                              if (!(pdf->stats.producer))
                                  return;
                     -        pdf->stats.producer->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer", NULL, &(pdf->stats.producer->meta));
                     +        pdf->stats.producer->data = pdf_parse_string(pdf, obj, objstart, obj_size(pdf, obj, 1), "/Producer", NULL, &(pdf->stats.producer->meta));
+                         }
+                     }
                      #endif
@@ -3178,10 +3946,13 @@ static void Title_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname
                              return;
                          if (!(pdf->stats.title)) {
                     +        const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
                     +                                             : (const char *)(obj->start + pdf->map);
+                    +
                              pdf->stats.title = cli_calloc(1, sizeof(struct pdf_stats_entry));
                              if (!(pdf->stats.title))
                                  return;
                     -        pdf->stats.title->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title", NULL, &(pdf->stats.title->meta));
                     +        pdf->stats.title->data = pdf_parse_string(pdf, obj, objstart, obj_size(pdf, obj, 1), "/Title", NULL, &(pdf->stats.title->meta));
+                         }
+                     }
                      #endif
@@ -3198,10 +3969,13 @@ static void Keywords_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfn
                              return;
                          if (!(pdf->stats.keywords)) {
                     +        const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
                     +                                             : (const char *)(obj->start + pdf->map);
+                    +
                              pdf->stats.keywords = cli_calloc(1, sizeof(struct pdf_stats_entry));
                              if (!(pdf->stats.keywords))
                                  return;
                     -        pdf->stats.keywords->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords", NULL, &(pdf->stats.keywords->meta));
                     +        pdf->stats.keywords->data = pdf_parse_string(pdf, obj, objstart, obj_size(pdf, obj, 1), "/Keywords", NULL, &(pdf->stats.keywords->meta));
+                         }
+                     }
                      #endif
@@ -3218,10 +3992,13 @@ static void Subject_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfna
                              return;
                          if (!(pdf->stats.subject)) {
                     +        const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
                     +                                             : (const char *)(obj->start + pdf->map);
+                    +
                              pdf->stats.subject = cli_calloc(1, sizeof(struct pdf_stats_entry));
                              if (!(pdf->stats.subject))
                                  return;
                     -        pdf->stats.subject->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject", NULL, &(pdf->stats.subject->meta));
                     +        pdf->stats.subject->data = pdf_parse_string(pdf, obj, objstart, obj_size(pdf, obj, 1), "/Subject", NULL, &(pdf->stats.subject->meta));
+                         }
+                     }
                      #endif
@@ -3269,9 +4046,10 @@ static void XFA_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_a
                      static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
+                     {
                          struct pdf_array *array;
                     -    const char *objstart = (const char *)(obj->start + pdf->map);
                     +    const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
                     +                                         : (const char *)(obj->start + pdf->map);
                          const char *begin;
                     -    unsigned int objsz;
                     +    unsigned int objsize;
                          unsigned long npages=0, count;
                          struct pdf_array_node *node;
                          json_object *pdfobj;
@@ -3284,19 +4062,19 @@ static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname
                          if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
                              return;
                     -    objsz = obj_size(pdf, obj, 1);
                     +    objsize = obj_size(pdf, obj, 1);
                          pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
                          if (!(pdfobj))
                              return;
                     -    begin = cli_memstr(objstart, objsz, "/Kids", 5);
                     +    begin = cli_memstr(objstart, objsize, "/Kids", 5);
                          if (!(begin))
                              return;
                          begin += 5;
                     -    array = pdf_parse_array(pdf, obj, objsz, (char *)begin, NULL);
                     +    array = pdf_parse_array(pdf, obj, objsize, (char *)begin, NULL);
                          if (!(array)) {
                              cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);
                              return;
@@ -3307,21 +4085,21 @@ static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname
                                  if (strchr((char *)(node->data), 'R'))
                                      npages++;
                     -    begin = cli_memstr(obj->start + pdf->map, objsz, "/Count", 6);
                     +    begin = cli_memstr(objstart, objsize, "/Count", 6);
                          if (!(begin)) {
                              cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);
                              goto cleanup;
+                         }
                          begin += 6;
                     -    while (begin - objstart <  objsz && isspace(begin[0]))
                     +    while (begin - objstart <  objsize && isspace(begin[0]))
                              begin++;
                     -    if (begin - objstart >= objsz) {
                     +    if (begin - objstart >= objsize) {
                              goto cleanup;
+                         }
                     -    if ((CL_SUCCESS != cli_strntoul_wrap(begin, (size_t)(obj->start + pdf->map + objsz - begin), 0, 10, &count)) ||
                     +    if ((CL_SUCCESS != cli_strntoul_wrap(begin, (size_t)(obj->start + pdf->map + objsize - begin), 0, 10, &count)) ||
                              (count != npages)) {
                              cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);
+                         }
@@ -3336,8 +4114,10 @@ static void Colors_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfnam
+                     {
                          json_object *colorsobj, *pdfobj;
                          unsigned long ncolors;
                     -    char *start, *p1;
                     -    size_t objsz;
                     +    char *p1;
                     +    const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
                     +                                         : (const char *)(obj->start + pdf->map);
                     +    size_t objsize;
                          UNUSEDPARAM(act);
@@ -3347,27 +4127,25 @@ static void Colors_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfnam
                          if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
                              return;
                     -    objsz = obj_size(pdf, obj, 1);
+                    -
                     -    start = (char *)(obj->start + pdf->map);
                     +    objsize = obj_size(pdf, obj, 1);
                     -    p1 = (char *)cli_memstr(start, objsz, "/Colors", 7);
                     +    p1 = (char *)cli_memstr(objstart, objsize, "/Colors", 7);
                          if (!(p1))
                              return;
                          p1 += 7;
                          /* Ensure that we have at least one whitespace character plus at least one number */
                     -    if (objsz - (p1 - start) < 2)
                     +    if (objsize - (p1 - objstart) < 2)
                              return;
                     -    while (p1 - start < objsz && isspace(p1[0]))
                     +    while (p1 - objstart < objsize && isspace(p1[0]))
                              p1++;
                     -    if ((size_t)(p1 - start) == objsz)
                     +    if ((size_t)(p1 - objstart) == objsize)
                              return;
                     -    if (CL_SUCCESS != cli_strntoul_wrap(p1, (size_t)((p1 - start) - objsz), 0, 10, &ncolors))
                     +    if (CL_SUCCESS != cli_strntoul_wrap(p1, (size_t)((p1 - objstart) - objsize), 0, 10, &ncolors))
                              return;
                          /* We only care if the number of colors > 2**24 */
@@ -3651,14 +4429,14 @@ static void pdf_export_json(struct pdf_struct *pdf)
+                         }
                          for (i=0; i < pdf->nobjs; i++) {
                     -        if (pdf->objs[i].flags & (1<<OBJ_TRUNCATED)) {
                     +        if (pdf->objs[i]->flags & (1<<OBJ_TRUNCATED)) {
                                  json_object *truncobj;
                                  truncobj = cli_jsonarray(pdfobj, "TruncatedObjects");
                                  if (!(truncobj))
                                      continue;
                     -            cli_jsonint_array(truncobj, pdf->objs[i].id>>8);
                     +            cli_jsonint_array(truncobj, pdf->objs[i]->id >> 8);
+                             }
+                         }

libclamav/pdf.h

History View file @ 89d5207

@@ -24,13 +24,26 @@
                      #include "others.h"
                      #define PDF_FILTERLIST_MAX  64
                     +struct objstm_struct {
                     +    uint32_t first;         // offset of first obj
                     +    uint32_t current;       // offset of current obj
                     +    uint32_t current_pair;  // offset of current pair describing id, location of object
                     +    uint32_t length;        // total length of all objects (starting at first)
                     +    uint32_t n;             // number of objects that should be found in the object stream
                     +    uint32_t nobjs_found;   // number of objects actually found in the object stream
                     +    char *streambuf;        // address of stream buffer, beginning with first obj pair
                     +    size_t streambuf_len;   // length of stream buffer, includes pairs followed by actual objects
                     +};
+                    +
                      struct pdf_obj {
                          uint32_t start;
                     +    int32_t size;
                          uint32_t id;
                          uint32_t flags;
                          uint32_t statsflags;
                          uint32_t numfilters;
                          uint32_t filterlist[PDF_FILTERLIST_MAX];
                     +    struct objstm_struct *objstm;  // Should be NULL unless the obj exists in an object stream (separate buffer)
                          char *path;
                      };
@@ -124,7 +137,7 @@ enum enc_method {
                      };
                      struct pdf_struct {
                     -    struct pdf_obj *objs;
                     +    struct pdf_obj **objs;
                          unsigned nobjs;
                          unsigned flags;
                          unsigned enc_method_stream;
@@ -145,6 +158,8 @@ struct pdf_struct {
                          char *key;
                          unsigned keylen;
                          struct pdf_stats stats;
                     +    struct objstm_struct **objstms;
                     +    uint32_t nobjstms;
                      };
                      #define OBJ_FLAG_PDFNAME_NONE 0x0
@@ -156,7 +171,7 @@ struct pdf_struct {
                      int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset);
                      void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj);
                      int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags);
                     -int pdf_findobj(struct pdf_struct *pdf);
                     +cl_error_t pdf_findobj(struct pdf_struct *pdf);
                      struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid);
                      void pdf_handle_enc(struct pdf_struct *pdf);
@@ -166,13 +181,16 @@ enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key
                      void pdfobj_flag(struct pdf_struct *pdf, struct pdf_obj *obj, enum pdf_flag flag);
                      char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len);
                     -char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *stats);
                     -struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar);
                     -struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar);
                     +char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *meta);
                     +struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar);
                     +struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar);
                      int is_object_reference(char *begin, char **endchar, uint32_t *id);
                      void pdf_free_dict(struct pdf_dict *dict);
                      void pdf_free_array(struct pdf_array *array);
                      void pdf_print_dict(struct pdf_dict *dict, unsigned long depth);
                      void pdf_print_array(struct pdf_array *array, unsigned long depth);
                     +cl_error_t pdf_find_and_extract_objs(struct pdf_struct *pdf, uint32_t *alerts);
                     +cl_error_t pdf_find_and_parse_objs_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm);
+                    +
                      #endif

libclamav/pdfdecode.c

History View file @ 89d5207

@@ -1,5 +1,5 @@
                      /*
                     - *  Copyright (C) 2016-2017 Cisco and/or its affiliates. All rights reserved.
                     + *  Copyright (C) 2016-2018 Cisco and/or its affiliates. All rights reserved.
+                      *
                       *  Author: Kevin Lin
+                      *
@@ -37,6 +37,7 @@
                      #endif
                      #include <stdio.h>
                     +#include <stddef.h>
                      #include <sys/types.h>
                      #include <sys/stat.h>
                      #include <ctype.h>
@@ -75,26 +76,57 @@ struct pdf_token {
                          uint8_t *content;  /* content stream */
                      };
                     -static  int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token);
                     -static  int pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token, int lvl);
                     +static ptrdiff_t pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int fout, cl_error_t *status, struct objstm_struct *objstm);
                     +static cl_error_t pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token, int lvl);
+                    +
                     +static cl_error_t filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token);
                     +static cl_error_t filter_rldecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token);
                     +static cl_error_t filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token);
                     +static cl_error_t filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token);
                     +static cl_error_t filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int mode);
                     +static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token);
+                    +
                     +/**
                     + * @brief       Wrapper function for pdf_decodestream_internal.
                     + *
                     + * Allocate a token object to store decoded filter data.
                     + * Parse/decode the filter data and scan it.
                     + *
                     + * @param pdf       Pdf context structure.
                     + * @param obj       The object we found the filter content in.
                     + * @param params    (optional) Dictionary parameters describing the filter data.
                     + * @param stream    Filter stream buffer pointer.
                     + * @param streamlen Length of filter stream buffer.
                     + * @param xref      Indicates if the stream is an /XRef stream.  Do not apply forced decryption on /XRef streams.
                     + * @param fout      File descriptor to write to to be scanned.
                     + * @param[out] rc   Return code ()
                     + * @param objstm    (optional) Object stream context structure.
                     + * @return ptrdiff_t   The number of bytes written to 'fout' to be scanned. -1 if failed out.
                     + */
                     +ptrdiff_t pdf_decodestream(
                     +    struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params,
                     +    const char *stream, uint32_t streamlen, int xref, int fout, cl_error_t *status,
                     +    struct objstm_struct *objstm)
                     +{
                     +    struct pdf_token *token = NULL;
                     +    ptrdiff_t bytes_scanned = -1;
                     +    cl_error_t retval = CL_SUCCESS;
                     -static  int filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token);
                     -static  int filter_rldecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token);
                     -static  int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token);
                     -static  int filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token);
                     -static  int filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int mode);
                     -static  int filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token);
                     +    if (!status) {
                     +        /* invalid args, and no way to pass back the status code */
                     +        return -1;
                     +    }
                     -ptrdiff_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, const char *stream, uint32_t streamlen, int xref, int fout, int *rc)
                     -{
                     -    struct pdf_token *token;
                     -    ptrdiff_t rv;
                     +    if (!pdf || !obj) {
                     +        /* Invalid args */
                     +        retval = CL_EARG;
                     +        goto done;
                     +    }
                          if (!stream || !streamlen || fout < 0) {
                     -        cli_dbgmsg("cli_pdf: no filters or stream on obj %u %u\n", obj->id>>8, obj->id&0xff);
                     -        if (rc)
                     -            *rc = CL_ENULLARG;
                     -        return -1;
                     +        cli_dbgmsg("pdf_decodestream: no filters or stream on obj %u %u\n", obj->id>>8, obj->id&0xff);
                     +        retval = CL_ENULLARG;
                     +        goto done;
+                         }
                      #if 0
@@ -104,9 +136,8 @@ ptrdiff_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct p
                          token = cli_malloc(sizeof(struct pdf_token));
                          if (!token) {
                     -        if (rc)
                     -            *rc = CL_EMEM;
                     -        return -1;
                     +        retval = CL_EMEM;
                     +        goto done;
+                         }
                          token->flags = 0;
@@ -118,69 +149,110 @@ ptrdiff_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct p
                          token->content = cli_malloc(streamlen);
                          if (!token->content) {
                              free(token);
                     -        if (rc)
                     -            *rc = CL_EMEM;
                     -        return -1;
                     +        retval = CL_EMEM;
                     +        goto done;
+                         }
                          memcpy(token->content, stream, streamlen);
                          token->length = streamlen;
                     -    cli_dbgmsg("cli_pdf: detected %lu applied filters\n", (long unsigned)(obj->numfilters));
                     +    cli_dbgmsg("pdf_decodestream: detected %lu applied filters\n", (long unsigned)(obj->numfilters));
                     -    rv = (ptrdiff_t)pdf_decodestream_internal(pdf, obj, params, token);
                     -    /* return is generally ignored */
                     -    if (rc) {
                     -        if (rv == CL_VIRUS)
                     -            *rc = CL_VIRUS;
                     -        else
                     -            *rc = CL_SUCCESS;
                     -    }
+                    -
                     -    if (token->success) {
                     -        if (!cli_checklimits("pdf", pdf->ctx, token->length, 0, 0)) {
                     -            if (cli_writen(fout, token->content, token->length) != token->length) {
                     -                cli_errmsg("cli_pdf: failed to write output file\n");
                     -                if (rc)
                     -                    *rc = CL_EWRITE;
                     -                return -1;
                     -            }
                     -            rv = token->length;
                     -        }
                     -    } else {  /* if no non-forced filter are decoded, return the raw stream */
                     +    bytes_scanned = pdf_decodestream_internal(pdf, obj, params, token, fout, &retval, objstm);
                     +    /*
                     +     * Pass back the return value, though we really only care
                     +     * if it is CV_VIRUS or CL_SUCCESS.
                     +     */
                     +    if (retval == CL_VIRUS)
                     +        retval = CL_VIRUS;
                     +    else
                     +        retval = CL_SUCCESS;
+                    +
                     +    if (!token->success) {
                     +        /*
                     +         * If it was successful, the internal() function calls cli_writen()
                     +         * However, in this case... no non-forced filter are decoded,
                     +         *   so return the raw stream.
                     +         */
                              if (!cli_checklimits("pdf", pdf->ctx, streamlen, 0, 0)) {
                     -            cli_dbgmsg("cli_pdf: no non-forced filters decoded, returning raw stream\n");
                     +            cli_dbgmsg("pdf_decodestream: no non-forced filters decoded, returning raw stream\n");
                                  if (cli_writen(fout, stream, streamlen) != streamlen) {
                     -                cli_errmsg("cli_pdf: failed to write output file\n");
                     -                if (rc)
                     -                    *rc = CL_EWRITE;
                     -                return -1;
                     +                cli_errmsg("pdf_decodestream: failed to write output file\n");
                     +                retval = CL_EWRITE;
                     +                bytes_scanned = -1;
                     +                goto done;
+                                 }
                     -            rv = streamlen;
                     +            bytes_scanned = streamlen;
+                             }
+                         }
                     -    free(token->content);
                     -    free(token);
                     -    return rv;
                     +done:
                     +    *status = retval;
+                    +
                     +    /*
                     +     * Free up the token, and token content, if any.
                     +     */
                     +    if (NULL != token)
                     +    {
                     +        if (NULL != token->content) {
                     +            free(token->content);
                     +            token->content = NULL;
                     +            token->length = 0;
                     +        }
                     +        free(token);
                     +        token = NULL;
                     +    }
+                    +
                     +    return bytes_scanned;
+                     }
                     -static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token)
                     +/**
                     + * @brief       Decode filter buffer data.
                     + *
                     + * Attempt to decompress, decrypt or otherwise parse it.
                     + *
                     + * @param pdf           Pdf context structure.
                     + * @param obj           The object we found the filter content in.
                     + * @param params        (optional) Dictionary parameters describing the filter data.
                     + * @param token         Pointer to and length of filter data.
                     + * @param fout          File handle to write data to to be scanned.
                     + * @param[out] status   CL_CLEAN/CL_SUCCESS or CL_VIRUS/CL_E<error>
                     + * @param objstm        (optional) Object stream context structure.
                     + * @return ptrdiff_t    The number of bytes we wrote to 'fout'. -1 if failed out.
                     + */
                     +static ptrdiff_t pdf_decodestream_internal(
                     +    struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params,
                     +    struct pdf_token *token, int fout, cl_error_t *status, struct objstm_struct *objstm)
+                     {
                     +    cl_error_t vir = CL_CLEAN;
                     +    cl_error_t retval = CL_SUCCESS;
                     +    ptrdiff_t bytes_scanned = -1;
                          const char *filter = NULL;
                     -    int i, vir = 0, rc = CL_SUCCESS;
                     +    int i;
                     +    if (!status) {
                     +        /* invalid args, and no way to pass back the status code */
                     +        return -1;
                     +    }
+                    +
                     +    if (!pdf || !obj || !token) {
                     +        /* Invalid args */
                     +        retval = CL_EARG;
                     +        goto done;
                     +    }
+                    +
                          /*
                           * if pdf is decryptable, scan for CRYPT filter
                           * if none, force a DECRYPT filter application
                           */
                          if ((pdf->flags & (1 << DECRYPTABLE_PDF)) && !(obj->flags & (1 << OBJ_FILTER_CRYPT))) {
                              if (token->flags & PDFTOKEN_FLAG_XREF) /* TODO: is this on all crypt filters or only the assumed one? */
                     -            cli_dbgmsg("cli_pdf: skipping decoding => non-filter CRYPT (reason: xref)\n");
                     +            cli_dbgmsg("pdf_decodestream_internal: skipping decoding => non-filter CRYPT (reason: xref)\n");
                              else {
                     -            cli_dbgmsg("cli_pdf: decoding => non-filter CRYPT\n");
                     -            if ((rc = filter_decrypt(pdf, obj, params, token, 1)) != CL_SUCCESS) {
                     -                return rc;
                     +            cli_dbgmsg("pdf_decodestream_internal: decoding => non-filter CRYPT\n");
                     +            retval = filter_decrypt(pdf, obj, params, token, 1);
                     +            if (retval != CL_SUCCESS) {
                     +                goto done;
+                                 }
+                             }
+                         }
@@ -188,33 +260,33 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj
                          for (i = 0; i < obj->numfilters; i++) {
                              switch(obj->filterlist[i]) {
                              case OBJ_FILTER_A85:
                     -            cli_dbgmsg("cli_pdf: decoding [%d] => ASCII85DECODE\n", obj->filterlist[i]);
                     -            rc = filter_ascii85decode(pdf, obj, token);
                     +            cli_dbgmsg("pdf_decodestream_internal: decoding [%d] => ASCII85DECODE\n", obj->filterlist[i]);
                     +            retval = filter_ascii85decode(pdf, obj, token);
                                  break;
                              case OBJ_FILTER_RL:
                     -            cli_dbgmsg("cli_pdf: decoding [%d] => RLDECODE\n", obj->filterlist[i]);
                     -            rc = filter_rldecode(pdf, obj, token);
                     +            cli_dbgmsg("pdf_decodestream_internal: decoding [%d] => RLDECODE\n", obj->filterlist[i]);
                     +            retval = filter_rldecode(pdf, obj, token);
                                  break;
                              case OBJ_FILTER_FLATE:
                     -            cli_dbgmsg("cli_pdf: decoding [%d] => FLATEDECODE\n", obj->filterlist[i]);
                     -            rc = filter_flatedecode(pdf, obj, params, token);
                     +            cli_dbgmsg("pdf_decodestream_internal: decoding [%d] => FLATEDECODE\n", obj->filterlist[i]);
                     +            retval = filter_flatedecode(pdf, obj, params, token);
                                  break;
                              case OBJ_FILTER_AH:
                     -            cli_dbgmsg("cli_pdf: decoding [%d] => ASCIIHEXDECODE\n", obj->filterlist[i]);
                     -            rc = filter_asciihexdecode(pdf, obj, token);
                     +            cli_dbgmsg("pdf_decodestream_internal: decoding [%d] => ASCIIHEXDECODE\n", obj->filterlist[i]);
                     +            retval = filter_asciihexdecode(pdf, obj, token);
                                  break;
                              case OBJ_FILTER_CRYPT:
                     -            cli_dbgmsg("cli_pdf: decoding [%d] => CRYPT\n", obj->filterlist[i]);
                     -            rc = filter_decrypt(pdf, obj, params, token, 0);
                     +            cli_dbgmsg("pdf_decodestream_internal: decoding [%d] => CRYPT\n", obj->filterlist[i]);
                     +            retval = filter_decrypt(pdf, obj, params, token, 0);
                                  break;
                              case OBJ_FILTER_LZW:
                     -            cli_dbgmsg("cli_pdf: decoding [%d] => LZWDECODE\n", obj->filterlist[i]);
                     -            rc = filter_lzwdecode(pdf, obj, params, token);
                     +            cli_dbgmsg("pdf_decodestream_internal: decoding [%d] => LZWDECODE\n", obj->filterlist[i]);
                     +            retval = filter_lzwdecode(pdf, obj, params, token);
                                  break;
                              case OBJ_FILTER_JPX:
@@ -226,29 +298,29 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj
                              case OBJ_FILTER_JBIG2:
                                  if (!filter) filter = "JBIG2DECODE";
                     -            cli_dbgmsg("cli_pdf: unimplemented filter type [%d] => %s\n", obj->filterlist[i], filter);
                     +            cli_dbgmsg("pdf_decodestream_internal: unimplemented filter type [%d] => %s\n", obj->filterlist[i], filter);
                                  filter = NULL;
                     -            rc = CL_BREAK;
                     +            retval = CL_BREAK;
                                  break;
                              default:
                     -            cli_dbgmsg("cli_pdf: unknown filter type [%d]\n", obj->filterlist[i]);
                     -            rc = CL_BREAK;
                     +            cli_dbgmsg("pdf_decodestream_internal: unknown filter type [%d]\n", obj->filterlist[i]);
                     +            retval = CL_BREAK;
                                  break;
+                             }
                              if (!(token->content) || !(token->length)) {
                     -            cli_dbgmsg("cli_pdf: empty content, breaking after %d (of %lu) filters\n",
                     -                       i, (long unsigned)(obj->numfilters));
                     +            cli_dbgmsg("pdf_decodestream_internal: empty content, breaking after %d (of %lu) filters\n", i, (long unsigned)(obj->numfilters));
                                  break;
+                             }
                     -        if (rc != CL_SUCCESS) {
                     -            if (rc == CL_VIRUS && pdf->ctx->options & CL_SCAN_ALLMATCHES)
                     -                vir = 1;
                     -            else {
                     -                const char *reason;
                     -                switch (rc) {
                     +        if (retval != CL_SUCCESS) {
                     +            if (retval == CL_VIRUS && pdf->ctx->options & CL_SCAN_ALLMATCHES) {
                     +                vir = CL_VIRUS;
                     +            } else {
                     +                const char* reason;
+                    +
                     +                switch (retval) {
                                      case CL_VIRUS:
                                          reason = "detection";
                                          break;
@@ -260,29 +332,89 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj
                                          break;
+                                     }
                     -                cli_dbgmsg("cli_pdf: stopping after %d (of %lu) filters (reason: %s)\n",
                     -                           i, (long unsigned)(obj->numfilters), reason);
                     +                cli_dbgmsg("pdf_decodestream_internal: stopping after %d (of %lu) filters (reason: %s)\n", i, (long unsigned)(obj->numfilters), reason);
                                      break;
+                                 }
+                             }
                              token->success++;
                     +        /* Dump the stream content to a text file if keeptmp is enabled. */
                              if (pdf->ctx->engine->keeptmp) {
                     +            retval = pdf_decode_dump(pdf, obj, token, i+1);
                     +            if (retval != CL_SUCCESS) {
                     +                goto done;
                     +            }
                     +        }
                     +    }
                     -            if ((rc = pdf_decode_dump(pdf, obj, token, i+1)) != CL_SUCCESS)
                     -                return rc;
                     +    if (token->success > 0) {
                     +        /*
                     +         * Looks like we successfully decoded the stream, so lets write it out.
                     +         *   In the failure case, the caller will deal with the raw stream.
                     +         */
                     +        if (!cli_checklimits("pdf", pdf->ctx, token->length, 0, 0)) {
                     +            if (cli_writen(fout, token->content, token->length) != token->length) {
                     +                cli_errmsg("pdf_decodestream_internal: failed to write output file\n");
                     +                retval = CL_EWRITE;
                     +                bytes_scanned = -1;
                     +                goto done;
                     +            }
                     +            bytes_scanned = token->length;
+                             }
+                         }
                     -    if (vir)
                     -        return CL_VIRUS;
                     -    if (rc == CL_BREAK)
                     -        return CL_SUCCESS;
                     -    return rc;
                     +    if (NULL != objstm)
                     +    {
                     +        /*
                     +         * The caller indicated that the decoded data is an object stream.
                     +         * Perform experimental object stream parsing to extract objects from the stream.
                     +         */
                     +        objstm->streambuf = (char*)token->content;
                     +        objstm->streambuf_len = (size_t)token->length;
+                    +
                     +        /* Take ownership of the malloc'd buffer */
                     +        token->content = NULL;
                     +        token->length = 0;
+                    +
                     +        int objs_found = pdf->nobjs;
                     +        if (CL_SUCCESS != pdf_find_and_parse_objs_in_objstm(pdf, objstm))
                     +        {
                     +            cli_dbgmsg("pdf_decodestream_internal: pdf_find_and_parse_objs_in_objstm failed!\n");
                     +        }
+                    +
                     +        if (pdf->nobjs <= objs_found) {
                     +            cli_dbgmsg("pdf_decodestream_internal: pdf_find_and_parse_objs_in_objstm did not find any new objects!\n");
                     +        } else {
                     +            cli_dbgmsg("pdf_decodestream_internal: pdf_find_and_parse_objs_in_objstm found %d new objects.\n", pdf->nobjs - objs_found);
                     +        }
                     +    }
+                    +
                     +done:
+                    +
                     +    *status = retval;
+                    +
                     +    if (vir == CL_VIRUS)
                     +        *status = CL_VIRUS;
+                    +
                     +    if (*status == CL_BREAK)
                     +        *status = CL_SUCCESS;
+                    +
                     +    return bytes_scanned;
+                     }
                     -/* used only for intermediate dumping */
                     -static int pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token, int lvl)
                     +/**
                     + * @brief   Dump PDF filter content such as stream contents to a temp file.
                     + *
                     + * Temp file is created in the pdf->dir directory.
                     + * Filename format is "pdf<pdf->files-1>_<lvl>".
                     + *
                     + * @param pdf   Pdf context structure.
                     + * @param obj   The object we found the filter content in.
                     + * @param token The struct for the filter contents.
                     + * @param lvl   A unique index to distinguish the files from each other.
                     + * @return cl_error_t
                     + */
                     +static cl_error_t pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token, int lvl)
+                     {
                          char fname[1024];
                          int ifd;
@@ -313,7 +445,7 @@ static int pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct p
                       * ascii85 inflation
                       * See http://www.piclist.com/techref/method/encode.htm (look for base85)
                       */
                     -static int filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token)
                     +static cl_error_t filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token)
+                     {
                          uint8_t *decoded, *dptr;
                          uint32_t declen = 0;
@@ -415,7 +547,7 @@ static int filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *obj, str
+                     }
                      /* imported from razorback */
                     -static int filter_rldecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token)
                     +static cl_error_t filter_rldecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token)
+                     {
                          uint8_t *decoded, *temp;
                          uint32_t declen = 0, capacity = 0;
@@ -523,7 +655,7 @@ static uint8_t *decode_nextlinestart(uint8_t *content, uint32_t length)
                          return pt;
+                     }
                     -static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token)
                     +static cl_error_t filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token)
+                     {
                          uint8_t *decoded, *temp;
                          uint32_t declen = 0, capacity = 0;
@@ -671,14 +803,14 @@ static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struc
                          return rc;
+                     }
                     -static int filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token)
                     +static cl_error_t filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token)
+                     {
                          uint8_t *decoded;
                          const uint8_t *content = (uint8_t *)token->content;
                          uint32_t length = token->length;
                          uint32_t i, j;
                     -    int rc = CL_SUCCESS;
                     +    cl_error_t rc = CL_SUCCESS;
                          if (!(decoded = (uint8_t *)cli_calloc(length/2 + 1, sizeof(uint8_t)))) {
                              cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n");
@@ -724,7 +856,7 @@ static int filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *obj, st
+                     }
                      /* modes: 0 = use default/DecodeParms, 1 = use document setting */
                     -static int filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int mode)
                     +static cl_error_t filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int mode)
+                     {
                          char *decrypted;
                          size_t length = (size_t)token->length;
@@ -768,7 +900,7 @@ static int filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pd
                          return CL_SUCCESS;
+                     }
                     -static int filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token)
                     +static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token)
+                     {
                          uint8_t *decoded, *temp;
                          uint32_t declen = 0, capacity = 0;

libclamav/pdfdecode.h

History View file @ 89d5207

@@ -36,6 +36,26 @@
                      #include "pdf.h"
                     -ptrdiff_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, const char *stream, uint32_t streamlen, int xref, int fout, int *rc);
                     +/**
                     + * @brief       Wrapper function for pdf_decodestream_internal.
                     + *
                     + * Allocate a token object to store decoded filter data.
                     + * Parse/decode the filter data and scan it.
                     + *
                     + * @param pdf       Pdf context structure.
                     + * @param obj       The object we found the filter content in.
                     + * @param params    Dictionary parameters describing the filter data.
                     + * @param stream    Filter stream buffer pointer.
                     + * @param streamlen Length of filter stream buffer.
                     + * @param xref      Indicates if the stream is an /XRef stream.  Do not apply forced decryption on /XRef streams.
                     + * @param fout      File descriptor to write to a temp file.
                     + * @param[out] rc   Return code ()
                     + * @param objstm    Object stream context structure.
                     + * @return ptrdiff_t
                     + */
                     +ptrdiff_t pdf_decodestream(
                     +    struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params,
                     +    const char *stream, uint32_t streamlen, int xref, int fout, cl_error_t *status,
                     +    struct objstm_struct *objstm);
                      #endif /* __PDFDECODE_H__ */

libclamav/pdfng.c

History View file @ 89d5207

@@ -377,17 +377,26 @@ char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const cha
                      char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *meta)
+                     {
                     -    const char *q = objstart, *oobj=obj->start+pdf->map;
                     +    const char *q = objstart;
                          char *p1, *p2;
                          size_t len, checklen;
                          char *res = NULL;
                          uint32_t objid;
                          size_t i;
                     -    if (objsize > (size_t)(pdf->size - (objstart - pdf->map))) {
                     -        /* Possible attempt to exploit bb11980 */
                     -        cli_dbgmsg("Malformed PDF: Alleged size of obj in PDF would extend further than the PDF data.\n");
                     -        return NULL;
                     +    if (obj->objstm) {
                     +        if (objsize > (size_t)(obj->objstm->streambuf_len - (objstart - obj->objstm->streambuf))) {
                     +            /* Possible attempt to exploit bb11980 */
                     +            cli_dbgmsg("Malformed PDF: Alleged size of obj in object stream in PDF would extend further than the object stream data.\n");
                     +            return NULL;
                     +        }
                     +    }
                     +    else {
                     +        if (objsize > (size_t)(pdf->size - (objstart - pdf->map))) {
                     +            /* Possible attempt to exploit bb11980 */
                     +            cli_dbgmsg("Malformed PDF: Alleged size of obj in PDF would extend further than the PDF data.\n");
                     +            return NULL;
                     +        }
+                         }
                          /*
@@ -557,10 +566,10 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
                              /* Hex string */
                              p2 = p1+1;
                     -        while ((size_t)(p2 - oobj) < objsize && *p2 != '>')
                     +        while ((size_t)(p2 - objstart) < objsize && *p2 != '>')
                                  p2++;
                     -        if ((size_t)(p2 - oobj) == objsize) {
                     +        if ((size_t)(p2 - objstart) == objsize) {
                                  return NULL;
+                             }
@@ -647,7 +656,7 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
                          return res;
+                     }
                     -struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar)
                     +struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar)
+                     {
                          struct pdf_dict *res=NULL;
                          struct pdf_dict_node *node=NULL;
@@ -659,9 +668,10 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz
                          if (!(pdf) || !(obj) || !(begin))
                              return NULL;
                     -    objstart = (const char *)(obj->start + pdf->map);
                     +    objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
                     +                             : (const char *)(obj->start + pdf->map);
                     -    if (begin < objstart || (size_t)(begin - objstart) >= objsz - 2)
                     +    if (begin < objstart || (size_t)(begin - objstart) >= objsize - 2)
                              return NULL;
                          if (begin[0] != '<' || begin[1] != '<')
@@ -669,7 +679,7 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz
                          /* Find the end of the dictionary */
                          end = begin;
                     -    while ((size_t)(end - objstart) < objsz) {
                     +    while ((size_t)(end - objstart) < objsize) {
                              int increment=1;
                              if (in_string) {
                                  if (*end == '\\') {
@@ -689,18 +699,18 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz
                                      in_string=1;
                                      break;
                                  case '<':
                     -                if ((size_t)(end - objstart) <= objsz - 2 && end[1] == '<')
                     +                if ((size_t)(end - objstart) <= objsize - 2 && end[1] == '<')
                                          ninner++;
                                      increment=2;
                                      break;
                                  case '>':
                     -                if ((size_t)(end - objstart) <= objsz - 2 && end[1] == '>')
                     +                if ((size_t)(end - objstart) <= objsize - 2 && end[1] == '>')
                                          ninner--;
                                      increment=2;
                                      break;
+                             }
                     -        if ((size_t)(end - objstart) <= objsz - 2)
                     +        if ((size_t)(end - objstart) <= objsize - 2)
                                  if (end[0] == '>' && end[1] == '>' && ninner == 0)
                                      break;
@@ -708,7 +718,7 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz
+                         }
                          /* More sanity checking */
                     -    if ((size_t)(end - objstart) >= objsz - 2)
                     +    if ((size_t)(end - objstart) >= objsize - 2)
                              return NULL;
                          if (end[0] != '>' || end[1] != '>')
@@ -809,7 +819,7 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz
                                      begin = p1+1;
                                      break;
                                  case '<':
                     -                if ((size_t)(begin - objstart) < objsz - 2) {
                     +                if ((size_t)(begin - objstart) < objsize - 2) {
                                          if (begin[1] == '<') {
                                              dict = pdf_parse_dict(pdf, obj, end - objstart, begin, &p1);
                                              begin = p1+2;
@@ -912,7 +922,7 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz
                          return res;
+                     }
                     -struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar)
                     +struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar)
+                     {
                          struct pdf_array *res=NULL;
                          struct pdf_array_node *node=NULL;
@@ -924,9 +934,10 @@ struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, s
                          if (!(pdf) || !(obj) || !(begin))
                              return NULL;
                     -    objstart = obj->start + pdf->map;
                     +    objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
                     +                             : (const char *)(obj->start + pdf->map);
                     -    if (begin < objstart || (size_t)(begin - objstart) >= objsz)
                     +    if (begin < objstart || (size_t)(begin - objstart) >= objsize)
                              return NULL;
                          if (begin[0] != '[')
@@ -934,7 +945,7 @@ struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, s
                          /* Find the end of the array */
                          end = begin;
                     -    while ((size_t)(end - objstart) < objsz) {
                     +    while ((size_t)(end - objstart) < objsize) {
                              if (in_string) {
                                  if (*end == '\\') {
                                      end += 2;
@@ -967,7 +978,7 @@ struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, s
+                         }
                          /* More sanity checking */
                     -    if ((size_t)(end - objstart) >= objsz)
                     +    if ((size_t)(end - objstart) >= objsize)
                              return NULL;
                          if (*end != ']')
@@ -991,7 +1002,7 @@ struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, s
                              switch (begin[0]) {
                                  case '<':
                     -                if ((size_t)(begin - objstart) < objsz - 2 && begin[1] == '<') {
                     +                if ((size_t)(begin - objstart) < objsize - 2 && begin[1] == '<') {
                                          dict = pdf_parse_dict(pdf, obj, end - objstart, begin, &begin);
                                          begin+=2;
                                          break;

libclamav/str.c

History View file @ 89d5207

@@ -755,7 +755,7 @@ done:
                       * @return CL_SUCCESS       Success
                       * @return CL_EPARSE        Failure
                       */
                     -int cli_strntol_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, long *result)
                     +cl_error_t cli_strntol_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, long *result)
+                     {
                          char *endptr = NULL;
                          long num;
@@ -798,7 +798,7 @@ int cli_strntol_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int
                       * @return CL_SUCCESS       Success
                       * @return CL_EPARSE        Failure
                       */
                     -int cli_strntoul_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, unsigned long *result)
                     +cl_error_t cli_strntoul_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, unsigned long *result)
+                     {
                          char *endptr = NULL;
                          long num;

libclamav/str.h

History View file @ 89d5207

@@ -28,6 +28,7 @@
                      #include <ctype.h>
                      #include <sys/types.h>
                     +#include "clamav.h"
                      #include "cltypes.h"
                      #ifdef HAVE_STRCASESTR
@@ -68,8 +69,8 @@ const char *cli_memstr(const char *haystack, unsigned int hs, const char *needle
                      char *cli_strrcpy(char *dest, const char *source);
                      size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens);
                      size_t cli_ldbtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens, int token_skip);
                     -int cli_strntol_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, long *result);
                     -int cli_strntoul_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, unsigned long *result);
                     +cl_error_t cli_strntol_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, long *result);
                     +cl_error_t cli_strntoul_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, unsigned long *result);
                      int cli_isnumber(const char *str);
                      char *cli_unescape(const char *str);
                      struct text_buffer;