Browse code

If an indirect object is specified, then try to use that.

Shawn Webb authored on 2014/06/11 02:21:31
Showing 2 changed files
... ...
@@ -71,13 +71,17 @@ static	char	const	rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $";
71 71
  *Save the file being worked on in tmp */
72 72
 #endif
73 73
 
74
+#define PDF_EXTRACT_OBJ_NONE 0x0
75
+#define PDF_EXTRACT_OBJ_SCAN 0x1
76
+
74 77
 struct pdf_struct;
75 78
 
76 79
 static	int	asciihexdecode(const char *buf, off_t len, char *output);
77 80
 static	int	ascii85decode(const char *buf, off_t len, unsigned char *output);
78 81
 static	const	char	*pdf_nextlinestart(const char *ptr, size_t len);
79 82
 static	const	char	*pdf_nextobject(const char *ptr, size_t len);
80
-static char *pdf_parse_string(struct pdf_struct *pdf, const char *objstart, size_t objsize, const char *str);
83
+static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str);
84
+static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags);
81 85
 
82 86
 /* PDF statistics callbacks and related */
83 87
 struct pdf_action;
... ...
@@ -577,8 +581,8 @@ static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, const
577 577
 
578 578
 static struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid)
579 579
 {
580
-    unsigned j;
581
-    unsigned i;
580
+    uint32_t j;
581
+    uint32_t i;
582 582
 
583 583
     /* search starting at previous obj (if exists) */
584 584
     i = (obj != pdf->objs) ? obj - pdf->objs : 0;
... ...
@@ -1003,7 +1007,7 @@ static char *pdf_readval(const char *q, int len, const char *key);
1003 1003
 static enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key, enum enc_method def);
1004 1004
 static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen, const char **qend, int noescape);
1005 1005
 
1006
-static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
1006
+static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
1007 1007
 {
1008 1008
     char fullname[NAME_MAX + 1];
1009 1009
     int fout;
... ...
@@ -1046,6 +1050,9 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
1046 1046
         return CL_ETMPFILE;
1047 1047
     }
1048 1048
 
1049
+    if (!(flags & PDF_EXTRACT_OBJ_SCAN))
1050
+        obj->path = strdup(fullname);
1051
+
1049 1052
     do {
1050 1053
         if (obj->flags & (1 << OBJ_STREAM)) {
1051 1054
             const char *start = pdf->map + obj->start;
... ...
@@ -1294,7 +1301,7 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
1294 1294
 
1295 1295
     cli_dbgmsg("cli_pdf: extracted %ld bytes %u %u obj to %s\n", sum, obj->id>>8, obj->id&0xff, fullname);
1296 1296
 
1297
-    if (sum) {
1297
+    if (flags & PDF_EXTRACT_OBJ_SCAN && sum) {
1298 1298
         int rc2;
1299 1299
 
1300 1300
         cli_updatelimits(pdf->ctx, sum);
... ...
@@ -1327,7 +1334,7 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
1327 1327
     free(ascii_decoded);
1328 1328
     free(decrypted);
1329 1329
 
1330
-    if (!pdf->ctx->engine->keeptmp)
1330
+    if (flags & PDF_EXTRACT_OBJ_SCAN && !pdf->ctx->engine->keeptmp)
1331 1331
         if (cli_unlink(fullname) && rc != CL_VIRUS)
1332 1332
             rc = CL_EUNLINK;
1333 1333
 
... ...
@@ -2551,7 +2558,7 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
2551 2551
     /* extract PDF objs */
2552 2552
     for (i=0;!rc && i<pdf.nobjs;i++) {
2553 2553
         struct pdf_obj *obj = &pdf.objs[i];
2554
-        rc = pdf_extract_obj(&pdf, obj);
2554
+        rc = pdf_extract_obj(&pdf, obj, PDF_EXTRACT_OBJ_SCAN);
2555 2555
         switch (rc) {
2556 2556
             case CL_EFORMAT:
2557 2557
                 /* Don't halt on one bad object */
... ...
@@ -2808,7 +2815,7 @@ pdf_nextobject(const char *ptr, size_t len)
2808 2808
     return NULL;
2809 2809
 }
2810 2810
 
2811
-static char *pdf_parse_string(struct pdf_struct *pdf, const char *objstart, size_t objsize, const char *str)
2811
+static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str)
2812 2812
 {
2813 2813
     const char *q = objstart;
2814 2814
     char *p1, *p2;
... ...
@@ -2863,8 +2870,11 @@ static char *pdf_parse_string(struct pdf_struct *pdf, const char *objstart, size
2863 2863
     /* We should be at the start of the string, minus 1 */
2864 2864
 
2865 2865
     if (isdigit(p1[0])) {
2866
-        unsigned long objnum, revnum;
2866
+        unsigned long objnum;
2867
+        struct pdf_obj *newobj;
2867 2868
         char *end;
2869
+        STATBUF sb;
2870
+        int fd;
2868 2871
 
2869 2872
         /*
2870 2873
          * This is kind of sketchy... This string says it points to another object.
... ...
@@ -2876,24 +2886,42 @@ static char *pdf_parse_string(struct pdf_struct *pdf, const char *objstart, size
2876 2876
         if ((end - p1) == 0)
2877 2877
             return NULL;
2878 2878
 
2879
-        /* Skip whitespaces */
2880
-        p1 = end;
2881
-        while ((p1 - q) < objsize && isspace(p1[0]))
2882
-            p1++;
2879
+        newobj = find_obj(pdf, obj, objnum);
2880
+        if (!(newobj))
2881
+            return NULL;
2883 2882
 
2884
-        if (p1 - q == objsize)
2883
+        if (pdf_extract_obj(pdf, newobj, PDF_EXTRACT_OBJ_NONE) != CL_SUCCESS)
2885 2884
             return NULL;
2886 2885
 
2887
-        /* Get revision number */
2888
-        revnum = strtoul(p1, &end, 10);
2889
-        if ((end - p1) == 0)
2886
+        if (!(newobj->path))
2890 2887
             return NULL;
2891 2888
 
2892
-        if (objnum > pdf->nobjs)
2889
+        fd = open(newobj->path, O_RDONLY);
2890
+        if (fd == -1) {
2891
+            cli_unlink(newobj->path);
2892
+            free(newobj->path);
2893 2893
             return NULL;
2894
+        }
2895
+
2896
+        if (FSTAT(fd, &sb)) {
2897
+            close(fd);
2898
+            cli_unlink(newobj->path);
2899
+            free(newobj->path);
2900
+        }
2901
+
2902
+        res = cli_calloc(1, sb.st_size);
2903
+        if (!(res)) {
2904
+            close(fd);
2905
+            cli_unlink(newobj->path);
2906
+            free(newobj->path);
2907
+        }
2908
+
2909
+        read(fd, res, sb.st_size);
2910
+
2911
+        close(fd);
2912
+        cli_unlink(newobj->path);
2913
+        free(newobj->path);
2894 2914
 
2895
-        /* TODO: Parse the object, decode it if necessary, and return the string */
2896
-        res = NULL;
2897 2915
         return res;
2898 2916
     }
2899 2917
 
... ...
@@ -3143,7 +3171,7 @@ static void Author_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_ac
3143 3143
         return;
3144 3144
 
3145 3145
     if (!(pdf->stats.author))
3146
-        pdf->stats.author = pdf_parse_string(pdf, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author");
3146
+        pdf->stats.author = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author");
3147 3147
 #endif
3148 3148
 }
3149 3149
 
... ...
@@ -3154,7 +3182,7 @@ static void Creator_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_a
3154 3154
         return;
3155 3155
 
3156 3156
     if (!(pdf->stats.creator))
3157
-        pdf->stats.creator = pdf_parse_string(pdf, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator");
3157
+        pdf->stats.creator = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator");
3158 3158
 #endif
3159 3159
 }
3160 3160
 
... ...
@@ -3165,7 +3193,7 @@ static void ModificationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, str
3165 3165
         return;
3166 3166
 
3167 3167
     if (!(pdf->stats.modificationdate))
3168
-        pdf->stats.modificationdate = pdf_parse_string(pdf, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate");
3168
+        pdf->stats.modificationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate");
3169 3169
 #endif
3170 3170
 }
3171 3171
 
... ...
@@ -3176,7 +3204,7 @@ static void CreationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct
3176 3176
         return;
3177 3177
 
3178 3178
     if (!(pdf->stats.creationdate))
3179
-        pdf->stats.creationdate = pdf_parse_string(pdf, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate");
3179
+        pdf->stats.creationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate");
3180 3180
 #endif
3181 3181
 }
3182 3182
 
... ...
@@ -3187,7 +3215,7 @@ static void Producer_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_
3187 3187
         return;
3188 3188
 
3189 3189
     if (!(pdf->stats.producer))
3190
-        pdf->stats.producer = pdf_parse_string(pdf, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer");
3190
+        pdf->stats.producer = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer");
3191 3191
 #endif
3192 3192
 }
3193 3193
 
... ...
@@ -25,6 +25,7 @@ struct pdf_obj {
25 25
     uint32_t start;
26 26
     uint32_t id;
27 27
     uint32_t flags;
28
+    char *path;
28 29
 };
29 30
 
30 31
 int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset);