libclamav/pdf.h
d056cc17
 /*
e1cbc270
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
5091689d
  *  Copyright (C) 2007-2013 Sourcefire, Inc.
2023340a
  *
  *  Authors: Nigel Horne
d056cc17
  *
  *  This program is free software; you can redistribute it and/or modify
2023340a
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
d056cc17
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
48b7b4a7
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
d056cc17
  */
798308de
 #ifndef __PDF_H
 #define __PDF_H
 
2023340a
 #include "others.h"
288057e9
 #define PDF_FILTERLIST_MAX 64
66f70f6a
 
1158b285
 struct objstm_struct {
288057e9
     uint32_t first;        // offset of first obj
     uint32_t current;      // offset of current obj
     uint32_t current_pair; // offset of current pair describing id, location of object
     uint32_t length;       // total length of all objects (starting at first)
     uint32_t n;            // number of objects that should be found in the object stream
     uint32_t nobjs_found;  // number of objects actually found in the object stream
     char *streambuf;       // address of stream buffer, beginning with first obj pair
     size_t streambuf_len;  // length of stream buffer, includes pairs followed by actual objects
1158b285
 };
 
dc200c6b
 struct pdf_obj {
     uint32_t start;
f18ae5be
     size_t size;
dc200c6b
     uint32_t id;
     uint32_t flags;
5091689d
     uint32_t statsflags;
a9584bfe
     uint32_t numfilters;
     uint32_t filterlist[PDF_FILTERLIST_MAX];
43422579
     const char *stream;           // pointer to stream contained in object.
     size_t stream_size;           // size of stream contained in object.
288057e9
     struct objstm_struct *objstm; // Should be NULL unless the obj exists in an object stream (separate buffer)
1412b807
     char *path;
dc200c6b
 };
2023340a
 
288057e9
 enum pdf_array_type { PDF_ARR_UNKNOWN = 0,
                       PDF_ARR_STRING,
                       PDF_ARR_ARRAY,
                       PDF_ARR_DICT };
 enum pdf_dict_type { PDF_DICT_UNKNOWN = 0,
                      PDF_DICT_STRING,
                      PDF_DICT_ARRAY,
                      PDF_DICT_DICT };
440f1fff
 
 struct pdf_array_node {
     void *data;
     size_t datasz;
     enum pdf_array_type type;
 
     struct pdf_array_node *prev;
     struct pdf_array_node *next;
 };
 
 struct pdf_array {
     struct pdf_array_node *nodes;
7a98488d
     struct pdf_array_node *tail;
440f1fff
 };
 
dd101bee
 struct pdf_dict_node {
     char *key;
     void *value;
     size_t valuesz;
     enum pdf_dict_type type;
 
     struct pdf_dict_node *prev;
     struct pdf_dict_node *next;
 };
 
 struct pdf_dict {
     struct pdf_dict_node *nodes;
     struct pdf_dict_node *tail;
 };
 
9d33052f
 struct pdf_stats_entry {
     char *data;
 
     /* populated by pdf_parse_string */
     struct pdf_stats_metadata {
         int length;
         struct pdf_obj *obj;
         int success; /* if finalize succeeds */
     } meta;
 };
 
930b9395
 struct pdf_stats {
288057e9
     int32_t ninvalidobjs;                     /* Number of invalid objects */
     int32_t njs;                              /* Number of javascript objects */
     int32_t nflate;                           /* Number of flate-encoded objects */
     int32_t nactivex;                         /* Number of ActiveX objects */
     int32_t nflash;                           /* Number of flash objects */
     int32_t ncolors;                          /* Number of colors */
     int32_t nasciihexdecode;                  /* Number of ASCIIHexDecode-filtered objects */
     int32_t nascii85decode;                   /* Number of ASCII85Decode-filtered objects */
     int32_t nembeddedfile;                    /* Number of embedded files */
     int32_t nimage;                           /* Number of image objects */
     int32_t nlzw;                             /* Number of LZW-filtered objects */
     int32_t nrunlengthdecode;                 /* Number of RunLengthDecode-filtered objects */
     int32_t nfaxdecode;                       /* Number of CCITT-filtered objects */
     int32_t njbig2decode;                     /* Number of JBIG2Decode-filtered objects */
     int32_t ndctdecode;                       /* Number of DCTDecode-filtered objects */
     int32_t njpxdecode;                       /* Number of JPXDecode-filtered objects */
     int32_t ncrypt;                           /* Number of Crypt-filtered objects */
     int32_t nstandard;                        /* Number of Standard-filtered objects */
     int32_t nsigned;                          /* Number of Signed objects */
     int32_t nopenaction;                      /* Number of OpenAction objects */
     int32_t nlaunch;                          /* Number of Launch objects */
     int32_t npage;                            /* Number of Page objects */
     int32_t nrichmedia;                       /* Number of RichMedia objects */
     int32_t nacroform;                        /* Number of AcroForm objects */
     int32_t nxfa;                             /* Number of XFA objects */
     struct pdf_stats_entry *author;           /* Author of the PDF */
     struct pdf_stats_entry *creator;          /* Application used to create the PDF */
     struct pdf_stats_entry *producer;         /* Application used to produce the PDF */
     struct pdf_stats_entry *creationdate;     /* Date the PDF was created */
     struct pdf_stats_entry *modificationdate; /* Date the PDF was modified */
     struct pdf_stats_entry *title;            /* Title of the PDF */
     struct pdf_stats_entry *subject;          /* Subject of the PDF */
     struct pdf_stats_entry *keywords;         /* Keywords of the PDF */
930b9395
 };
 
 enum enc_method {
     ENC_UNKNOWN,
     ENC_NONE,
     ENC_IDENTITY,
     ENC_V2,
     ENC_AESV2,
     ENC_AESV3
 };
 
 struct pdf_struct {
1158b285
     struct pdf_obj **objs;
930b9395
     unsigned nobjs;
     unsigned flags;
     unsigned enc_method_stream;
     unsigned enc_method_string;
     unsigned enc_method_embeddedfile;
     const char *CF;
     long CF_n;
     const char *map;
f18ae5be
     size_t size;
930b9395
     off_t offset;
     off_t startoff;
     cli_ctx *ctx;
     const char *dir;
     unsigned files;
     uint32_t enc_objid;
     char *fileID;
     unsigned fileIDlen;
     char *key;
     unsigned keylen;
     struct pdf_stats stats;
1158b285
     struct objstm_struct **objstms;
     uint32_t nobjstms;
930b9395
 };
 
5091689d
 #define OBJ_FLAG_PDFNAME_NONE 0x0
 #define OBJ_FLAG_PDFNAME_DONE 0x1
 
930b9395
 #define PDF_EXTRACT_OBJ_NONE 0x0
 #define PDF_EXTRACT_OBJ_SCAN 0x1
 
2d5dbc37
 int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset);
224d1c4d
 void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj);
930b9395
 int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags);
1158b285
 cl_error_t pdf_findobj(struct pdf_struct *pdf);
930b9395
 struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid);
 
e2b1880f
 void pdf_handle_enc(struct pdf_struct *pdf);
e09d8843
 char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, size_t *length, enum enc_method enc_method);
e2b1880f
 enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj);
1d0cdc67
 enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key, enum enc_method def);
e2b1880f
 
1d0cdc67
 void pdfobj_flag(struct pdf_struct *pdf, struct pdf_obj *obj, enum pdf_flag flag);
9d33052f
 char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len);
1158b285
 char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *meta);
 struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar);
 struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar);
930b9395
 int is_object_reference(char *begin, char **endchar, uint32_t *id);
 void pdf_free_dict(struct pdf_dict *dict);
 void pdf_free_array(struct pdf_array *array);
224d1c4d
 void pdf_print_dict(struct pdf_dict *dict, unsigned long depth);
 void pdf_print_array(struct pdf_array *array, unsigned long depth);
798308de
 
1158b285
 cl_error_t pdf_find_and_extract_objs(struct pdf_struct *pdf, uint32_t *alerts);
 cl_error_t pdf_find_and_parse_objs_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm);
 
798308de
 #endif