... | ... |
@@ -83,6 +83,7 @@ static const char *pdf_nextobject(const char *ptr, size_t len); |
83 | 83 |
static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar); |
84 | 84 |
static struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar); |
85 | 85 |
static struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar); |
86 |
+static int is_object_reference(char *begin, char **endchar, uint32_t *id); |
|
86 | 87 |
static void pdf_free_dict(struct pdf_dict *dict); |
87 | 88 |
static void pdf_free_array(struct pdf_array *array); |
88 | 89 |
static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags); |
... | ... |
@@ -2901,12 +2902,23 @@ static char *pdf_convert_utf(char *begin, size_t sz) |
2901 | 2901 |
#endif |
2902 | 2902 |
} |
2903 | 2903 |
|
2904 |
-static int is_object_reference(char *begin, char **endchar) |
|
2904 |
+static int is_object_reference(char *begin, char **endchar, uint32_t *id) |
|
2905 | 2905 |
{ |
2906 | 2906 |
char *end = *endchar; |
2907 | 2907 |
char *p1=begin, *p2; |
2908 | 2908 |
unsigned long n; |
2909 |
+ uint32_t t=0; |
|
2909 | 2910 |
|
2911 |
+ /* |
|
2912 |
+ * Object references are always this format: |
|
2913 |
+ * XXXX YYYY R |
|
2914 |
+ * Where XXXX is the object ID and YYYY is the revision ID of the object. |
|
2915 |
+ * The letter R signifies that this is a reference. |
|
2916 |
+ * |
|
2917 |
+ * In between each item can be an arbitrary amount of whitespace. |
|
2918 |
+ */ |
|
2919 |
+ |
|
2920 |
+ /* Skip whitespace */ |
|
2910 | 2921 |
while (p1 < end && isspace(p1[0])) |
2911 | 2922 |
p1++; |
2912 | 2923 |
|
... | ... |
@@ -2916,6 +2928,7 @@ static int is_object_reference(char *begin, char **endchar) |
2916 | 2916 |
if (!isnumber(p1[0])) |
2917 | 2917 |
return 0; |
2918 | 2918 |
|
2919 |
+ /* Ensure strtoul() isn't going to go past our buffer */ |
|
2919 | 2920 |
p2 = p1+1; |
2920 | 2921 |
while (p2 < end && !isspace(p2[0])) |
2921 | 2922 |
p2++; |
... | ... |
@@ -2927,6 +2940,9 @@ static int is_object_reference(char *begin, char **endchar) |
2927 | 2927 |
if (n == ULONG_MAX && errno) |
2928 | 2928 |
return 0; |
2929 | 2929 |
|
2930 |
+ t = n<<8; |
|
2931 |
+ |
|
2932 |
+ /* Skip more whitespace */ |
|
2930 | 2933 |
p1 = p2; |
2931 | 2934 |
while (p1 < end && isspace(p1[0])) |
2932 | 2935 |
p1++; |
... | ... |
@@ -2937,6 +2953,7 @@ static int is_object_reference(char *begin, char **endchar) |
2937 | 2937 |
if (!isnumber(p1[0])) |
2938 | 2938 |
return 0; |
2939 | 2939 |
|
2940 |
+ /* Ensure strtoul() is going to go past our buffer */ |
|
2940 | 2941 |
p2 = p1+1; |
2941 | 2942 |
while (p2 < end && !isspace(p2[0])) |
2942 | 2943 |
p2++; |
... | ... |
@@ -2948,6 +2965,8 @@ static int is_object_reference(char *begin, char **endchar) |
2948 | 2948 |
if (n == ULONG_MAX && errno) |
2949 | 2949 |
return 0; |
2950 | 2950 |
|
2951 |
+ t |= (n&0xff); |
|
2952 |
+ |
|
2951 | 2953 |
p1 = p2; |
2952 | 2954 |
while (p1 < end && isspace(p1[0])) |
2953 | 2955 |
p1++; |
... | ... |
@@ -2957,6 +2976,9 @@ static int is_object_reference(char *begin, char **endchar) |
2957 | 2957 |
|
2958 | 2958 |
if (p1[0] == 'R') { |
2959 | 2959 |
*endchar = p1+1; |
2960 |
+ if (id) |
|
2961 |
+ *id = t; |
|
2962 |
+ |
|
2960 | 2963 |
return 1; |
2961 | 2964 |
} |
2962 | 2965 |
|
... | ... |
@@ -2971,6 +2993,7 @@ static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const |
2971 | 2971 |
char *buf, *outbuf, *res; |
2972 | 2972 |
int likelyutf = 0; |
2973 | 2973 |
unsigned int i; |
2974 |
+ uint32_t objid; |
|
2974 | 2975 |
|
2975 | 2976 |
/* |
2976 | 2977 |
* Yes, all of this is required to find the start and end of a potentially UTF-* string |
... | ... |
@@ -3011,45 +3034,14 @@ static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const |
3011 | 3011 |
/* We should be at the start of the string, minus 1 */ |
3012 | 3012 |
|
3013 | 3013 |
p2 = q + objsize; |
3014 |
- if (is_object_reference(p1, &p2)) { |
|
3015 |
- unsigned long objnum, revnum; |
|
3014 |
+ if (is_object_reference(p1, &p2, &objid)) { |
|
3016 | 3015 |
struct pdf_obj *newobj; |
3017 | 3016 |
char *end, *begin; |
3018 | 3017 |
STATBUF sb; |
3019 | 3018 |
uint32_t objflags; |
3020 | 3019 |
int fd; |
3021 | 3020 |
|
3022 |
- /* |
|
3023 |
- * This is kind of sketchy... This string says it points to another object. |
|
3024 |
- * Try to get/parse the object and return the decoded value as an ASCII/UTF-8 string. |
|
3025 |
- */ |
|
3026 |
- |
|
3027 |
- /* Get the object number */ |
|
3028 |
- objnum = strtoul(p1, &end, 10); |
|
3029 |
- if ((end - p1) == 0) |
|
3030 |
- return NULL; |
|
3031 |
- |
|
3032 |
- /* Skip whitespace and get the revision number */ |
|
3033 |
- p1 = end+1; |
|
3034 |
- while (p1 - q < objsize && isspace(p1[0])) |
|
3035 |
- p1++; |
|
3036 |
- |
|
3037 |
- if (p1 - q == objsize) |
|
3038 |
- return NULL; |
|
3039 |
- |
|
3040 |
- revnum = strtoul(p1, &end, 10); |
|
3041 |
- |
|
3042 |
- p1 = end+1; |
|
3043 |
- while (p1 - q < objsize && isspace(p1[0])) |
|
3044 |
- p1++; |
|
3045 |
- |
|
3046 |
- if (p1 - q == objsize) |
|
3047 |
- return NULL; |
|
3048 |
- |
|
3049 |
- if (p1[0] != 'R') |
|
3050 |
- return NULL; |
|
3051 |
- |
|
3052 |
- newobj = find_obj(pdf, obj, (objnum<<8) | (revnum & 0xff)); |
|
3021 |
+ newobj = find_obj(pdf, obj, objid); |
|
3053 | 3022 |
if (!(newobj)) |
3054 | 3023 |
return NULL; |
3055 | 3024 |
|
... | ... |
@@ -3131,7 +3123,7 @@ static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const |
3131 | 3131 |
newobj->path = NULL; |
3132 | 3132 |
|
3133 | 3133 |
if (endchar) |
3134 |
- *endchar = p1; |
|
3134 |
+ *endchar = p2; |
|
3135 | 3135 |
|
3136 | 3136 |
return res; |
3137 | 3137 |
} |
... | ... |
@@ -3368,7 +3360,7 @@ static struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *o |
3368 | 3368 |
p1++; |
3369 | 3369 |
} |
3370 | 3370 |
|
3371 |
- is_object_reference(begin, &p1); |
|
3371 |
+ is_object_reference(begin, &p1, NULL); |
|
3372 | 3372 |
|
3373 | 3373 |
val = cli_calloc((p1 - begin) + 2, 1); |
3374 | 3374 |
if (!(val)) |
... | ... |
@@ -3517,7 +3509,7 @@ static struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj |
3517 | 3517 |
default: |
3518 | 3518 |
/* This should just be a number or the letter R */ |
3519 | 3519 |
p1 = end; |
3520 |
- if (!is_object_reference(begin, &p1)) { |
|
3520 |
+ if (!is_object_reference(begin, &p1, NULL)) { |
|
3521 | 3521 |
p1 = begin+1; |
3522 | 3522 |
while (p1 < end && !isspace(p1[0])) |
3523 | 3523 |
p1++; |
... | ... |
@@ -3639,12 +3631,14 @@ static void pdf_print_dict(struct pdf_dict *dict, unsigned long depth) |
3639 | 3639 |
struct pdf_dict_node *node; |
3640 | 3640 |
|
3641 | 3641 |
for (node = dict->nodes; node != NULL; node = node->next) { |
3642 |
- if (node->type == PDF_DICT_STRING) |
|
3642 |
+ if (node->type == PDF_DICT_STRING) { |
|
3643 | 3643 |
cli_errmsg("dict[%lu][%s]: %s\n", depth, node->key, (char *)(node->value)); |
3644 |
- else if (node->type == PDF_DICT_ARRAY) |
|
3644 |
+ } else if (node->type == PDF_DICT_ARRAY) { |
|
3645 |
+ cli_errmsg("dict[%lu][%s]: Array =>\n", depth, node->key); |
|
3645 | 3646 |
pdf_print_array((struct pdf_array *)(node->value), depth); |
3646 |
- else if (node->type == PDF_DICT_DICT) |
|
3647 |
+ } else if (node->type == PDF_DICT_DICT) { |
|
3647 | 3648 |
pdf_print_dict((struct pdf_dict *)(node->value), depth+1); |
3649 |
+ } |
|
3648 | 3650 |
} |
3649 | 3651 |
} |
3650 | 3652 |
|
... | ... |
@@ -3939,8 +3933,11 @@ static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_act |
3939 | 3939 |
return; |
3940 | 3940 |
|
3941 | 3941 |
dict = pdf_parse_dict(pdf, obj, objsz, begin, NULL); |
3942 |
- if (dict) |
|
3942 |
+ if (dict) { |
|
3943 |
+ cli_errmsg("==== ==== ==== ====\n"); |
|
3944 |
+ pdf_print_dict(dict, 0); |
|
3943 | 3945 |
pdf_free_dict(dict); |
3946 |
+ } |
|
3944 | 3947 |
|
3945 | 3948 |
begin = cli_memstr(objstart, objsz, "/Kids", 5); |
3946 | 3949 |
if (!(begin)) |