d056cc17 |
/* |
e1cbc270 |
* Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved. |
5091689d |
* Copyright (C) 2007-2013 Sourcefire, Inc. |
2023340a |
*
* Authors: Nigel Horne |
d056cc17 |
*
* This program is free software; you can redistribute it and/or modify |
2023340a |
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation. |
d056cc17 |
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software |
48b7b4a7 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA. |
d056cc17 |
*/ |
798308de |
#ifndef __PDF_H
#define __PDF_H
|
2023340a |
#include "others.h" |
288057e9 |
#define PDF_FILTERLIST_MAX 64 |
66f70f6a |
|
1158b285 |
struct objstm_struct { |
288057e9 |
uint32_t first; // offset of first obj
uint32_t current; // offset of current obj
uint32_t current_pair; // offset of current pair describing id, location of object
uint32_t length; // total length of all objects (starting at first)
uint32_t n; // number of objects that should be found in the object stream
uint32_t nobjs_found; // number of objects actually found in the object stream
char *streambuf; // address of stream buffer, beginning with first obj pair
size_t streambuf_len; // length of stream buffer, includes pairs followed by actual objects |
1158b285 |
};
|
dc200c6b |
struct pdf_obj {
uint32_t start; |
f18ae5be |
size_t size; |
dc200c6b |
uint32_t id;
uint32_t flags; |
5091689d |
uint32_t statsflags; |
a9584bfe |
uint32_t numfilters;
uint32_t filterlist[PDF_FILTERLIST_MAX]; |
43422579 |
const char *stream; // pointer to stream contained in object.
size_t stream_size; // size of stream contained in object. |
288057e9 |
struct objstm_struct *objstm; // Should be NULL unless the obj exists in an object stream (separate buffer) |
1412b807 |
char *path; |
dc200c6b |
}; |
2023340a |
|
288057e9 |
enum pdf_array_type { PDF_ARR_UNKNOWN = 0,
PDF_ARR_STRING,
PDF_ARR_ARRAY,
PDF_ARR_DICT };
enum pdf_dict_type { PDF_DICT_UNKNOWN = 0,
PDF_DICT_STRING,
PDF_DICT_ARRAY,
PDF_DICT_DICT }; |
440f1fff |
struct pdf_array_node {
void *data;
size_t datasz;
enum pdf_array_type type;
struct pdf_array_node *prev;
struct pdf_array_node *next;
};
struct pdf_array {
struct pdf_array_node *nodes; |
7a98488d |
struct pdf_array_node *tail; |
440f1fff |
};
|
dd101bee |
struct pdf_dict_node {
char *key;
void *value;
size_t valuesz;
enum pdf_dict_type type;
struct pdf_dict_node *prev;
struct pdf_dict_node *next;
};
struct pdf_dict {
struct pdf_dict_node *nodes;
struct pdf_dict_node *tail;
};
|
9d33052f |
struct pdf_stats_entry {
char *data;
/* populated by pdf_parse_string */
struct pdf_stats_metadata {
int length;
struct pdf_obj *obj;
int success; /* if finalize succeeds */
} meta;
};
|
930b9395 |
struct pdf_stats { |
288057e9 |
int32_t ninvalidobjs; /* Number of invalid objects */
int32_t njs; /* Number of javascript objects */
int32_t nflate; /* Number of flate-encoded objects */
int32_t nactivex; /* Number of ActiveX objects */
int32_t nflash; /* Number of flash objects */
int32_t ncolors; /* Number of colors */
int32_t nasciihexdecode; /* Number of ASCIIHexDecode-filtered objects */
int32_t nascii85decode; /* Number of ASCII85Decode-filtered objects */
int32_t nembeddedfile; /* Number of embedded files */
int32_t nimage; /* Number of image objects */
int32_t nlzw; /* Number of LZW-filtered objects */
int32_t nrunlengthdecode; /* Number of RunLengthDecode-filtered objects */
int32_t nfaxdecode; /* Number of CCITT-filtered objects */
int32_t njbig2decode; /* Number of JBIG2Decode-filtered objects */
int32_t ndctdecode; /* Number of DCTDecode-filtered objects */
int32_t njpxdecode; /* Number of JPXDecode-filtered objects */
int32_t ncrypt; /* Number of Crypt-filtered objects */
int32_t nstandard; /* Number of Standard-filtered objects */
int32_t nsigned; /* Number of Signed objects */
int32_t nopenaction; /* Number of OpenAction objects */
int32_t nlaunch; /* Number of Launch objects */
int32_t npage; /* Number of Page objects */
int32_t nrichmedia; /* Number of RichMedia objects */
int32_t nacroform; /* Number of AcroForm objects */
int32_t nxfa; /* Number of XFA objects */
struct pdf_stats_entry *author; /* Author of the PDF */
struct pdf_stats_entry *creator; /* Application used to create the PDF */
struct pdf_stats_entry *producer; /* Application used to produce the PDF */
struct pdf_stats_entry *creationdate; /* Date the PDF was created */
struct pdf_stats_entry *modificationdate; /* Date the PDF was modified */
struct pdf_stats_entry *title; /* Title of the PDF */
struct pdf_stats_entry *subject; /* Subject of the PDF */
struct pdf_stats_entry *keywords; /* Keywords of the PDF */ |
930b9395 |
};
enum enc_method {
ENC_UNKNOWN,
ENC_NONE,
ENC_IDENTITY,
ENC_V2,
ENC_AESV2,
ENC_AESV3
};
struct pdf_struct { |
1158b285 |
struct pdf_obj **objs; |
930b9395 |
unsigned nobjs;
unsigned flags;
unsigned enc_method_stream;
unsigned enc_method_string;
unsigned enc_method_embeddedfile;
const char *CF;
long CF_n;
const char *map; |
f18ae5be |
size_t size; |
930b9395 |
off_t offset;
off_t startoff;
cli_ctx *ctx;
const char *dir;
unsigned files;
uint32_t enc_objid;
char *fileID;
unsigned fileIDlen;
char *key;
unsigned keylen;
struct pdf_stats stats; |
1158b285 |
struct objstm_struct **objstms;
uint32_t nobjstms; |
930b9395 |
};
|
5091689d |
#define OBJ_FLAG_PDFNAME_NONE 0x0
#define OBJ_FLAG_PDFNAME_DONE 0x1
|
930b9395 |
#define PDF_EXTRACT_OBJ_NONE 0x0
#define PDF_EXTRACT_OBJ_SCAN 0x1
|
2d5dbc37 |
int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset); |
224d1c4d |
void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj); |
930b9395 |
int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags); |
1158b285 |
cl_error_t pdf_findobj(struct pdf_struct *pdf); |
930b9395 |
struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid);
|
e2b1880f |
void pdf_handle_enc(struct pdf_struct *pdf); |
e09d8843 |
char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, size_t *length, enum enc_method enc_method); |
e2b1880f |
enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj); |
1d0cdc67 |
enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key, enum enc_method def); |
e2b1880f |
|
1d0cdc67 |
void pdfobj_flag(struct pdf_struct *pdf, struct pdf_obj *obj, enum pdf_flag flag); |
9d33052f |
char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len); |
1158b285 |
char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *meta);
struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar);
struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar); |
930b9395 |
int is_object_reference(char *begin, char **endchar, uint32_t *id);
void pdf_free_dict(struct pdf_dict *dict);
void pdf_free_array(struct pdf_array *array); |
224d1c4d |
void pdf_print_dict(struct pdf_dict *dict, unsigned long depth);
void pdf_print_array(struct pdf_array *array, unsigned long depth); |
798308de |
|
1158b285 |
cl_error_t pdf_find_and_extract_objs(struct pdf_struct *pdf, uint32_t *alerts);
cl_error_t pdf_find_and_parse_objs_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm);
|
798308de |
#endif |