/* * Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * Copyright (C) 2007-2013 Sourcefire, Inc. * * Authors: Nigel Horne * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef __PDF_H #define __PDF_H #include "others.h" #define PDF_FILTERLIST_MAX 64 struct objstm_struct { uint32_t first; // offset of first obj uint32_t current; // offset of current obj uint32_t current_pair; // offset of current pair describing id, location of object uint32_t length; // total length of all objects (starting at first) uint32_t n; // number of objects that should be found in the object stream uint32_t nobjs_found; // number of objects actually found in the object stream char *streambuf; // address of stream buffer, beginning with first obj pair size_t streambuf_len; // length of stream buffer, includes pairs followed by actual objects }; struct pdf_obj { uint32_t start; size_t size; uint32_t id; uint32_t flags; uint32_t statsflags; uint32_t numfilters; uint32_t filterlist[PDF_FILTERLIST_MAX]; const char *stream; // pointer to stream contained in object. size_t stream_size; // size of stream contained in object. struct objstm_struct *objstm; // Should be NULL unless the obj exists in an object stream (separate buffer) char *path; }; enum pdf_array_type { PDF_ARR_UNKNOWN = 0, PDF_ARR_STRING, PDF_ARR_ARRAY, PDF_ARR_DICT }; enum pdf_dict_type { PDF_DICT_UNKNOWN = 0, PDF_DICT_STRING, PDF_DICT_ARRAY, PDF_DICT_DICT }; struct pdf_array_node { void *data; size_t datasz; enum pdf_array_type type; struct pdf_array_node *prev; struct pdf_array_node *next; }; struct pdf_array { struct pdf_array_node *nodes; struct pdf_array_node *tail; }; struct pdf_dict_node { char *key; void *value; size_t valuesz; enum pdf_dict_type type; struct pdf_dict_node *prev; struct pdf_dict_node *next; }; struct pdf_dict { struct pdf_dict_node *nodes; struct pdf_dict_node *tail; }; struct pdf_stats_entry { char *data; /* populated by pdf_parse_string */ struct pdf_stats_metadata { int length; struct pdf_obj *obj; int success; /* if finalize succeeds */ } meta; }; struct pdf_stats { int32_t ninvalidobjs; /* Number of invalid objects */ int32_t njs; /* Number of javascript objects */ int32_t nflate; /* Number of flate-encoded objects */ int32_t nactivex; /* Number of ActiveX objects */ int32_t nflash; /* Number of flash objects */ int32_t ncolors; /* Number of colors */ int32_t nasciihexdecode; /* Number of ASCIIHexDecode-filtered objects */ int32_t nascii85decode; /* Number of ASCII85Decode-filtered objects */ int32_t nembeddedfile; /* Number of embedded files */ int32_t nimage; /* Number of image objects */ int32_t nlzw; /* Number of LZW-filtered objects */ int32_t nrunlengthdecode; /* Number of RunLengthDecode-filtered objects */ int32_t nfaxdecode; /* Number of CCITT-filtered objects */ int32_t njbig2decode; /* Number of JBIG2Decode-filtered objects */ int32_t ndctdecode; /* Number of DCTDecode-filtered objects */ int32_t njpxdecode; /* Number of JPXDecode-filtered objects */ int32_t ncrypt; /* Number of Crypt-filtered objects */ int32_t nstandard; /* Number of Standard-filtered objects */ int32_t nsigned; /* Number of Signed objects */ int32_t nopenaction; /* Number of OpenAction objects */ int32_t nlaunch; /* Number of Launch objects */ int32_t npage; /* Number of Page objects */ int32_t nrichmedia; /* Number of RichMedia objects */ int32_t nacroform; /* Number of AcroForm objects */ int32_t nxfa; /* Number of XFA objects */ struct pdf_stats_entry *author; /* Author of the PDF */ struct pdf_stats_entry *creator; /* Application used to create the PDF */ struct pdf_stats_entry *producer; /* Application used to produce the PDF */ struct pdf_stats_entry *creationdate; /* Date the PDF was created */ struct pdf_stats_entry *modificationdate; /* Date the PDF was modified */ struct pdf_stats_entry *title; /* Title of the PDF */ struct pdf_stats_entry *subject; /* Subject of the PDF */ struct pdf_stats_entry *keywords; /* Keywords of the PDF */ }; enum enc_method { ENC_UNKNOWN, ENC_NONE, ENC_IDENTITY, ENC_V2, ENC_AESV2, ENC_AESV3 }; struct pdf_struct { struct pdf_obj **objs; unsigned nobjs; unsigned flags; unsigned enc_method_stream; unsigned enc_method_string; unsigned enc_method_embeddedfile; const char *CF; long CF_n; const char *map; size_t size; off_t offset; off_t startoff; cli_ctx *ctx; const char *dir; unsigned files; uint32_t enc_objid; char *fileID; unsigned fileIDlen; char *key; unsigned keylen; struct pdf_stats stats; struct objstm_struct **objstms; uint32_t nobjstms; }; #define OBJ_FLAG_PDFNAME_NONE 0x0 #define OBJ_FLAG_PDFNAME_DONE 0x1 #define PDF_EXTRACT_OBJ_NONE 0x0 #define PDF_EXTRACT_OBJ_SCAN 0x1 int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset); void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj); int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags); cl_error_t pdf_findobj(struct pdf_struct *pdf); struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid); void pdf_handle_enc(struct pdf_struct *pdf); char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, size_t *length, enum enc_method enc_method); enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj); enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key, enum enc_method def); void pdfobj_flag(struct pdf_struct *pdf, struct pdf_obj *obj, enum pdf_flag flag); char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len); char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *meta); struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar); struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar); int is_object_reference(char *begin, char **endchar, uint32_t *id); void pdf_free_dict(struct pdf_dict *dict); void pdf_free_array(struct pdf_array *array); void pdf_print_dict(struct pdf_dict *dict, unsigned long depth); void pdf_print_array(struct pdf_array *array, unsigned long depth); cl_error_t pdf_find_and_extract_objs(struct pdf_struct *pdf, uint32_t *alerts); cl_error_t pdf_find_and_parse_objs_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm); #endif