...
|
...
|
@@ -71,13 +71,17 @@ static char const rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $";
|
71
|
71
|
*Save the file being worked on in tmp */
|
72
|
72
|
#endif
|
73
|
73
|
|
|
74
|
+#define PDF_EXTRACT_OBJ_NONE 0x0
|
|
75
|
+#define PDF_EXTRACT_OBJ_SCAN 0x1
|
|
76
|
+
|
74
|
77
|
struct pdf_struct;
|
75
|
78
|
|
76
|
79
|
static int asciihexdecode(const char *buf, off_t len, char *output);
|
77
|
80
|
static int ascii85decode(const char *buf, off_t len, unsigned char *output);
|
78
|
81
|
static const char *pdf_nextlinestart(const char *ptr, size_t len);
|
79
|
82
|
static const char *pdf_nextobject(const char *ptr, size_t len);
|
80
|
|
-static char *pdf_parse_string(struct pdf_struct *pdf, const char *objstart, size_t objsize, const char *str);
|
|
83
|
+static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str);
|
|
84
|
+static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags);
|
81
|
85
|
|
82
|
86
|
/* PDF statistics callbacks and related */
|
83
|
87
|
struct pdf_action;
|
...
|
...
|
@@ -577,8 +581,8 @@ static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, const
|
577
|
577
|
|
578
|
578
|
static struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid)
|
579
|
579
|
{
|
580
|
|
- unsigned j;
|
581
|
|
- unsigned i;
|
|
580
|
+ uint32_t j;
|
|
581
|
+ uint32_t i;
|
582
|
582
|
|
583
|
583
|
/* search starting at previous obj (if exists) */
|
584
|
584
|
i = (obj != pdf->objs) ? obj - pdf->objs : 0;
|
...
|
...
|
@@ -1003,7 +1007,7 @@ static char *pdf_readval(const char *q, int len, const char *key);
|
1003
|
1003
|
static enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key, enum enc_method def);
|
1004
|
1004
|
static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen, const char **qend, int noescape);
|
1005
|
1005
|
|
1006
|
|
-static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
|
|
1006
|
+static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
|
1007
|
1007
|
{
|
1008
|
1008
|
char fullname[NAME_MAX + 1];
|
1009
|
1009
|
int fout;
|
...
|
...
|
@@ -1046,6 +1050,9 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
|
1046
|
1046
|
return CL_ETMPFILE;
|
1047
|
1047
|
}
|
1048
|
1048
|
|
|
1049
|
+ if (!(flags & PDF_EXTRACT_OBJ_SCAN))
|
|
1050
|
+ obj->path = strdup(fullname);
|
|
1051
|
+
|
1049
|
1052
|
do {
|
1050
|
1053
|
if (obj->flags & (1 << OBJ_STREAM)) {
|
1051
|
1054
|
const char *start = pdf->map + obj->start;
|
...
|
...
|
@@ -1294,7 +1301,7 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
|
1294
|
1294
|
|
1295
|
1295
|
cli_dbgmsg("cli_pdf: extracted %ld bytes %u %u obj to %s\n", sum, obj->id>>8, obj->id&0xff, fullname);
|
1296
|
1296
|
|
1297
|
|
- if (sum) {
|
|
1297
|
+ if (flags & PDF_EXTRACT_OBJ_SCAN && sum) {
|
1298
|
1298
|
int rc2;
|
1299
|
1299
|
|
1300
|
1300
|
cli_updatelimits(pdf->ctx, sum);
|
...
|
...
|
@@ -1327,7 +1334,7 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
|
1327
|
1327
|
free(ascii_decoded);
|
1328
|
1328
|
free(decrypted);
|
1329
|
1329
|
|
1330
|
|
- if (!pdf->ctx->engine->keeptmp)
|
|
1330
|
+ if (flags & PDF_EXTRACT_OBJ_SCAN && !pdf->ctx->engine->keeptmp)
|
1331
|
1331
|
if (cli_unlink(fullname) && rc != CL_VIRUS)
|
1332
|
1332
|
rc = CL_EUNLINK;
|
1333
|
1333
|
|
...
|
...
|
@@ -2551,7 +2558,7 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
|
2551
|
2551
|
/* extract PDF objs */
|
2552
|
2552
|
for (i=0;!rc && i<pdf.nobjs;i++) {
|
2553
|
2553
|
struct pdf_obj *obj = &pdf.objs[i];
|
2554
|
|
- rc = pdf_extract_obj(&pdf, obj);
|
|
2554
|
+ rc = pdf_extract_obj(&pdf, obj, PDF_EXTRACT_OBJ_SCAN);
|
2555
|
2555
|
switch (rc) {
|
2556
|
2556
|
case CL_EFORMAT:
|
2557
|
2557
|
/* Don't halt on one bad object */
|
...
|
...
|
@@ -2808,7 +2815,7 @@ pdf_nextobject(const char *ptr, size_t len)
|
2808
|
2808
|
return NULL;
|
2809
|
2809
|
}
|
2810
|
2810
|
|
2811
|
|
-static char *pdf_parse_string(struct pdf_struct *pdf, const char *objstart, size_t objsize, const char *str)
|
|
2811
|
+static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str)
|
2812
|
2812
|
{
|
2813
|
2813
|
const char *q = objstart;
|
2814
|
2814
|
char *p1, *p2;
|
...
|
...
|
@@ -2863,8 +2870,11 @@ static char *pdf_parse_string(struct pdf_struct *pdf, const char *objstart, size
|
2863
|
2863
|
/* We should be at the start of the string, minus 1 */
|
2864
|
2864
|
|
2865
|
2865
|
if (isdigit(p1[0])) {
|
2866
|
|
- unsigned long objnum, revnum;
|
|
2866
|
+ unsigned long objnum;
|
|
2867
|
+ struct pdf_obj *newobj;
|
2867
|
2868
|
char *end;
|
|
2869
|
+ STATBUF sb;
|
|
2870
|
+ int fd;
|
2868
|
2871
|
|
2869
|
2872
|
/*
|
2870
|
2873
|
* This is kind of sketchy... This string says it points to another object.
|
...
|
...
|
@@ -2876,24 +2886,42 @@ static char *pdf_parse_string(struct pdf_struct *pdf, const char *objstart, size
|
2876
|
2876
|
if ((end - p1) == 0)
|
2877
|
2877
|
return NULL;
|
2878
|
2878
|
|
2879
|
|
- /* Skip whitespaces */
|
2880
|
|
- p1 = end;
|
2881
|
|
- while ((p1 - q) < objsize && isspace(p1[0]))
|
2882
|
|
- p1++;
|
|
2879
|
+ newobj = find_obj(pdf, obj, objnum);
|
|
2880
|
+ if (!(newobj))
|
|
2881
|
+ return NULL;
|
2883
|
2882
|
|
2884
|
|
- if (p1 - q == objsize)
|
|
2883
|
+ if (pdf_extract_obj(pdf, newobj, PDF_EXTRACT_OBJ_NONE) != CL_SUCCESS)
|
2885
|
2884
|
return NULL;
|
2886
|
2885
|
|
2887
|
|
- /* Get revision number */
|
2888
|
|
- revnum = strtoul(p1, &end, 10);
|
2889
|
|
- if ((end - p1) == 0)
|
|
2886
|
+ if (!(newobj->path))
|
2890
|
2887
|
return NULL;
|
2891
|
2888
|
|
2892
|
|
- if (objnum > pdf->nobjs)
|
|
2889
|
+ fd = open(newobj->path, O_RDONLY);
|
|
2890
|
+ if (fd == -1) {
|
|
2891
|
+ cli_unlink(newobj->path);
|
|
2892
|
+ free(newobj->path);
|
2893
|
2893
|
return NULL;
|
|
2894
|
+ }
|
|
2895
|
+
|
|
2896
|
+ if (FSTAT(fd, &sb)) {
|
|
2897
|
+ close(fd);
|
|
2898
|
+ cli_unlink(newobj->path);
|
|
2899
|
+ free(newobj->path);
|
|
2900
|
+ }
|
|
2901
|
+
|
|
2902
|
+ res = cli_calloc(1, sb.st_size);
|
|
2903
|
+ if (!(res)) {
|
|
2904
|
+ close(fd);
|
|
2905
|
+ cli_unlink(newobj->path);
|
|
2906
|
+ free(newobj->path);
|
|
2907
|
+ }
|
|
2908
|
+
|
|
2909
|
+ read(fd, res, sb.st_size);
|
|
2910
|
+
|
|
2911
|
+ close(fd);
|
|
2912
|
+ cli_unlink(newobj->path);
|
|
2913
|
+ free(newobj->path);
|
2894
|
2914
|
|
2895
|
|
- /* TODO: Parse the object, decode it if necessary, and return the string */
|
2896
|
|
- res = NULL;
|
2897
|
2915
|
return res;
|
2898
|
2916
|
}
|
2899
|
2917
|
|
...
|
...
|
@@ -3143,7 +3171,7 @@ static void Author_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_ac
|
3143
|
3143
|
return;
|
3144
|
3144
|
|
3145
|
3145
|
if (!(pdf->stats.author))
|
3146
|
|
- pdf->stats.author = pdf_parse_string(pdf, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author");
|
|
3146
|
+ pdf->stats.author = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author");
|
3147
|
3147
|
#endif
|
3148
|
3148
|
}
|
3149
|
3149
|
|
...
|
...
|
@@ -3154,7 +3182,7 @@ static void Creator_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_a
|
3154
|
3154
|
return;
|
3155
|
3155
|
|
3156
|
3156
|
if (!(pdf->stats.creator))
|
3157
|
|
- pdf->stats.creator = pdf_parse_string(pdf, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator");
|
|
3157
|
+ pdf->stats.creator = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator");
|
3158
|
3158
|
#endif
|
3159
|
3159
|
}
|
3160
|
3160
|
|
...
|
...
|
@@ -3165,7 +3193,7 @@ static void ModificationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, str
|
3165
|
3165
|
return;
|
3166
|
3166
|
|
3167
|
3167
|
if (!(pdf->stats.modificationdate))
|
3168
|
|
- pdf->stats.modificationdate = pdf_parse_string(pdf, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate");
|
|
3168
|
+ pdf->stats.modificationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate");
|
3169
|
3169
|
#endif
|
3170
|
3170
|
}
|
3171
|
3171
|
|
...
|
...
|
@@ -3176,7 +3204,7 @@ static void CreationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct
|
3176
|
3176
|
return;
|
3177
|
3177
|
|
3178
|
3178
|
if (!(pdf->stats.creationdate))
|
3179
|
|
- pdf->stats.creationdate = pdf_parse_string(pdf, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate");
|
|
3179
|
+ pdf->stats.creationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate");
|
3180
|
3180
|
#endif
|
3181
|
3181
|
}
|
3182
|
3182
|
|
...
|
...
|
@@ -3187,7 +3215,7 @@ static void Producer_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_
|
3187
|
3187
|
return;
|
3188
|
3188
|
|
3189
|
3189
|
if (!(pdf->stats.producer))
|
3190
|
|
- pdf->stats.producer = pdf_parse_string(pdf, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer");
|
|
3190
|
+ pdf->stats.producer = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer");
|
3191
|
3191
|
#endif
|
3192
|
3192
|
}
|
3193
|
3193
|
|