... | ... |
@@ -80,8 +80,10 @@ static int asciihexdecode(const char *buf, off_t len, char *output); |
80 | 80 |
static int ascii85decode(const char *buf, off_t len, unsigned char *output); |
81 | 81 |
static const char *pdf_nextlinestart(const char *ptr, size_t len); |
82 | 82 |
static const char *pdf_nextobject(const char *ptr, size_t len); |
83 |
-static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str); |
|
84 |
-static struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin); |
|
83 |
+static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar); |
|
84 |
+static struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar); |
|
85 |
+static struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar); |
|
86 |
+static void pdf_free_dict(struct pdf_dict *dict); |
|
85 | 87 |
static void pdf_free_array(struct pdf_array *array); |
86 | 88 |
static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags); |
87 | 89 |
static char *pdf_convert_utf(char *begin, size_t sz); |
... | ... |
@@ -2899,7 +2901,69 @@ static char *pdf_convert_utf(char *begin, size_t sz) |
2899 | 2899 |
#endif |
2900 | 2900 |
} |
2901 | 2901 |
|
2902 |
-static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str) |
|
2902 |
+static int is_object_reference(char *begin, char **endchar) |
|
2903 |
+{ |
|
2904 |
+ char *end = *endchar; |
|
2905 |
+ char *p1=begin, *p2; |
|
2906 |
+ unsigned long n; |
|
2907 |
+ |
|
2908 |
+ while (p1 < end && isspace(p1[0])) |
|
2909 |
+ p1++; |
|
2910 |
+ |
|
2911 |
+ if (p1 == end) |
|
2912 |
+ return 0; |
|
2913 |
+ |
|
2914 |
+ if (!isnumber(p1[0])) |
|
2915 |
+ return 0; |
|
2916 |
+ |
|
2917 |
+ p2 = p1+1; |
|
2918 |
+ while (p2 < end && !isspace(p2[0])) |
|
2919 |
+ p2++; |
|
2920 |
+ |
|
2921 |
+ if (p2 == end) |
|
2922 |
+ return 0; |
|
2923 |
+ |
|
2924 |
+ n = strtoul(p1, &p2, 10); |
|
2925 |
+ if (n == ULONG_MAX && errno) |
|
2926 |
+ return 0; |
|
2927 |
+ |
|
2928 |
+ p1 = p2; |
|
2929 |
+ while (p1 < end && isspace(p1[0])) |
|
2930 |
+ p1++; |
|
2931 |
+ |
|
2932 |
+ if (p1 == end) |
|
2933 |
+ return 0; |
|
2934 |
+ |
|
2935 |
+ if (!isnumber(p1[0])) |
|
2936 |
+ return 0; |
|
2937 |
+ |
|
2938 |
+ p2 = p1+1; |
|
2939 |
+ while (p2 < end && !isspace(p2[0])) |
|
2940 |
+ p2++; |
|
2941 |
+ |
|
2942 |
+ if (p2 == end) |
|
2943 |
+ return 0; |
|
2944 |
+ |
|
2945 |
+ n = strtoul(p1, &p2, 10); |
|
2946 |
+ if (n == ULONG_MAX && errno) |
|
2947 |
+ return 0; |
|
2948 |
+ |
|
2949 |
+ p1 = p2; |
|
2950 |
+ while (p1 < end && isspace(p1[0])) |
|
2951 |
+ p1++; |
|
2952 |
+ |
|
2953 |
+ if (p1 == end) |
|
2954 |
+ return 0; |
|
2955 |
+ |
|
2956 |
+ if (p1[0] == 'R') { |
|
2957 |
+ *endchar = p1+1; |
|
2958 |
+ return 1; |
|
2959 |
+ } |
|
2960 |
+ |
|
2961 |
+ return 0; |
|
2962 |
+} |
|
2963 |
+ |
|
2964 |
+static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar) |
|
2903 | 2965 |
{ |
2904 | 2966 |
const char *q = objstart; |
2905 | 2967 |
char *p1, *p2; |
... | ... |
@@ -2946,7 +3010,8 @@ static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const |
2946 | 2946 |
|
2947 | 2947 |
/* We should be at the start of the string, minus 1 */ |
2948 | 2948 |
|
2949 |
- if (isdigit(p1[0])) { |
|
2949 |
+ p2 = q + objsize; |
|
2950 |
+ if (is_object_reference(p1, &p2)) { |
|
2950 | 2951 |
unsigned long objnum, revnum; |
2951 | 2952 |
struct pdf_obj *newobj; |
2952 | 2953 |
char *end, *begin; |
... | ... |
@@ -2974,6 +3039,16 @@ static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const |
2974 | 2974 |
|
2975 | 2975 |
revnum = strtoul(p1, &end, 10); |
2976 | 2976 |
|
2977 |
+ p1 = end+1; |
|
2978 |
+ while (p1 - q < objsize && isspace(p1[0])) |
|
2979 |
+ p1++; |
|
2980 |
+ |
|
2981 |
+ if (p1 - q == objsize) |
|
2982 |
+ return NULL; |
|
2983 |
+ |
|
2984 |
+ if (p1[0] != 'R') |
|
2985 |
+ return NULL; |
|
2986 |
+ |
|
2977 | 2987 |
newobj = find_obj(pdf, obj, (objnum<<8) | (revnum & 0xff)); |
2978 | 2988 |
if (!(newobj)) |
2979 | 2989 |
return NULL; |
... | ... |
@@ -3038,7 +3113,7 @@ static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const |
3038 | 3038 |
switch (begin[0]) { |
3039 | 3039 |
case '(': |
3040 | 3040 |
case '<': |
3041 |
- res = pdf_parse_string(pdf, obj, begin, sb.st_size, NULL); |
|
3041 |
+ res = pdf_parse_string(pdf, obj, begin, sb.st_size, NULL, NULL); |
|
3042 | 3042 |
free(begin); |
3043 | 3043 |
break; |
3044 | 3044 |
default: |
... | ... |
@@ -3055,6 +3130,9 @@ static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const |
3055 | 3055 |
free(newobj->path); |
3056 | 3056 |
newobj->path = NULL; |
3057 | 3057 |
|
3058 |
+ if (endchar) |
|
3059 |
+ *endchar = p1; |
|
3060 |
+ |
|
3058 | 3061 |
return res; |
3059 | 3062 |
} |
3060 | 3063 |
|
... | ... |
@@ -3076,6 +3154,8 @@ static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const |
3076 | 3076 |
return NULL; |
3077 | 3077 |
|
3078 | 3078 |
strncpy(res, p1, (p2 - p1) + 1); |
3079 |
+ if (endchar) |
|
3080 |
+ *endchar = p2; |
|
3079 | 3081 |
|
3080 | 3082 |
return res; |
3081 | 3083 |
} |
... | ... |
@@ -3130,20 +3210,227 @@ static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const |
3130 | 3130 |
|
3131 | 3131 |
memcpy(res, p1, len); |
3132 | 3132 |
res[len] = '\0'; |
3133 |
+ if (endchar) |
|
3134 |
+ *endchar = p2; |
|
3135 |
+ |
|
3133 | 3136 |
return res; |
3134 | 3137 |
} |
3135 | 3138 |
|
3136 |
- res = pdf_convert_utf(p1, len); |
|
3139 |
+ res = pdf_convert_utf(p1, len); |
|
3140 |
+ |
|
3141 |
+ if (res && endchar) |
|
3142 |
+ *endchar = p2; |
|
3143 |
+ |
|
3144 |
+ return res; |
|
3145 |
+} |
|
3146 |
+ |
|
3147 |
+static struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar) |
|
3148 |
+{ |
|
3149 |
+ struct pdf_dict *res=NULL; |
|
3150 |
+ struct pdf_dict_node *node=NULL; |
|
3151 |
+ const char *objstart = obj->start + pdf->map; |
|
3152 |
+ char *end; |
|
3153 |
+ unsigned int in_string=0, ninner=0; |
|
3154 |
+ |
|
3155 |
+ /* Sanity checking */ |
|
3156 |
+ if (!(pdf) || !(obj) || !(begin)) |
|
3157 |
+ return NULL; |
|
3158 |
+ |
|
3159 |
+ if (begin < objstart || begin - objstart >= objsz - 2) |
|
3160 |
+ return NULL; |
|
3161 |
+ |
|
3162 |
+ if (begin[0] != '<' || begin[1] != '<') |
|
3163 |
+ return NULL; |
|
3164 |
+ |
|
3165 |
+ /* Find the end of the dictionary */ |
|
3166 |
+ end = begin; |
|
3167 |
+ while (end - objstart < objsz) { |
|
3168 |
+ if (in_string) { |
|
3169 |
+ if (*end == ')') |
|
3170 |
+ in_string = 0; |
|
3171 |
+ |
|
3172 |
+ end++; |
|
3173 |
+ continue; |
|
3174 |
+ } |
|
3175 |
+ |
|
3176 |
+ switch (*end) { |
|
3177 |
+ case '(': |
|
3178 |
+ in_string=1; |
|
3179 |
+ break; |
|
3180 |
+ case '<': |
|
3181 |
+ if (end - objstart <= objsz - 2 && end[1] == '<') |
|
3182 |
+ ninner++; |
|
3183 |
+ break; |
|
3184 |
+ case '>': |
|
3185 |
+ if (end - objstart <= objsz - 2 && end[1] == '>') |
|
3186 |
+ ninner--; |
|
3187 |
+ break; |
|
3188 |
+ case '\\': |
|
3189 |
+ end += 2; |
|
3190 |
+ if (end - objstart >= objsz) |
|
3191 |
+ return NULL; |
|
3192 |
+ } |
|
3193 |
+ |
|
3194 |
+ if (end - objstart <= objsz - 2) |
|
3195 |
+ if (end[0] == '>' && end[1] == '>' && ninner == 0) |
|
3196 |
+ break; |
|
3197 |
+ |
|
3198 |
+ end++; |
|
3199 |
+ } |
|
3200 |
+ |
|
3201 |
+ /* More sanity checking */ |
|
3202 |
+ if (end - objstart >= objsz - 1) |
|
3203 |
+ return NULL; |
|
3204 |
+ |
|
3205 |
+ if (end[0] != '>' || end[1] != '>') |
|
3206 |
+ return NULL; |
|
3207 |
+ |
|
3208 |
+ res = cli_calloc(1, sizeof(struct pdf_dict)); |
|
3209 |
+ if (!(res)) |
|
3210 |
+ return NULL; |
|
3211 |
+ |
|
3212 |
+ /* Loop through each element of the dictionary */ |
|
3213 |
+ begin += 2; |
|
3214 |
+ while (begin < end) { |
|
3215 |
+ char *val=NULL, *key=NULL, *p1; |
|
3216 |
+ struct pdf_dict *dict=NULL; |
|
3217 |
+ struct pdf_array *arr=NULL; |
|
3218 |
+ |
|
3219 |
+ /* Skip any whitespaces */ |
|
3220 |
+ while (begin < end && isspace(begin[0])) |
|
3221 |
+ begin++; |
|
3222 |
+ |
|
3223 |
+ if (begin == end) |
|
3224 |
+ break; |
|
3225 |
+ |
|
3226 |
+ /* Get the key */ |
|
3227 |
+ p1 = begin+1; |
|
3228 |
+ while (p1 < end && isalpha(p1[0])) |
|
3229 |
+ p1++; |
|
3230 |
+ |
|
3231 |
+ if (p1 == end) |
|
3232 |
+ break; |
|
3233 |
+ |
|
3234 |
+ key = cli_calloc((p1 - begin) + 2, 1); |
|
3235 |
+ if (!(key)) |
|
3236 |
+ break; |
|
3237 |
+ |
|
3238 |
+ strncpy(key, begin, p1 - begin); |
|
3239 |
+ key[p1 - begin] = '\0'; |
|
3240 |
+ |
|
3241 |
+ /* Now for the value */ |
|
3242 |
+ begin = p1; |
|
3243 |
+ |
|
3244 |
+ /* Skip any whitespaces */ |
|
3245 |
+ while (begin < end && isspace(begin[0])) |
|
3246 |
+ begin++; |
|
3247 |
+ |
|
3248 |
+ if (begin == end) { |
|
3249 |
+ free(key); |
|
3250 |
+ break; |
|
3251 |
+ } |
|
3252 |
+ |
|
3253 |
+ switch (begin[0]) { |
|
3254 |
+ case '(': |
|
3255 |
+ val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1); |
|
3256 |
+ begin = p1+2; |
|
3257 |
+ break; |
|
3258 |
+ case '[': |
|
3259 |
+ arr = pdf_parse_array(pdf, obj, objsz, begin, &p1); |
|
3260 |
+ begin = p1+1; |
|
3261 |
+ break; |
|
3262 |
+ case '<': |
|
3263 |
+ if (begin - objstart < objsz - 2) { |
|
3264 |
+ if (begin[1] == '<') { |
|
3265 |
+ dict = pdf_parse_dict(pdf, obj, objsz, begin, &p1); |
|
3266 |
+ begin = p1+2; |
|
3267 |
+ break; |
|
3268 |
+ } |
|
3269 |
+ } |
|
3270 |
+ |
|
3271 |
+ val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1); |
|
3272 |
+ begin = p1+2; |
|
3273 |
+ break; |
|
3274 |
+ default: |
|
3275 |
+ p1 = (begin[0] == '/') ? begin+1 : begin; |
|
3276 |
+ while (p1 < end) { |
|
3277 |
+ int shouldbreak = 0; |
|
3278 |
+ switch (p1[0]) { |
|
3279 |
+ case '>': |
|
3280 |
+ case '/': |
|
3281 |
+ shouldbreak=1; |
|
3282 |
+ break; |
|
3283 |
+ } |
|
3284 |
+ |
|
3285 |
+ if (shouldbreak) |
|
3286 |
+ break; |
|
3287 |
+ |
|
3288 |
+ p1++; |
|
3289 |
+ } |
|
3290 |
+ |
|
3291 |
+ is_object_reference(begin, &p1); |
|
3292 |
+ |
|
3293 |
+ val = cli_calloc((p1 - begin) + 2, 1); |
|
3294 |
+ if (!(val)) |
|
3295 |
+ break; |
|
3296 |
+ |
|
3297 |
+ strncpy(val, begin, p1 - begin); |
|
3298 |
+ val[p1 - begin] = '\0'; |
|
3299 |
+ |
|
3300 |
+ if (p1[0] != '/') |
|
3301 |
+ begin = p1+1; |
|
3302 |
+ else |
|
3303 |
+ begin = p1; |
|
3304 |
+ |
|
3305 |
+ break; |
|
3306 |
+ } |
|
3307 |
+ |
|
3308 |
+ if (!(val) && !(dict) && !(arr)) |
|
3309 |
+ break; |
|
3310 |
+ |
|
3311 |
+ if (!(res->nodes)) { |
|
3312 |
+ res->nodes = res->tail = node = cli_calloc(1, sizeof(struct pdf_dict_node)); |
|
3313 |
+ if (!(node)) |
|
3314 |
+ break; |
|
3315 |
+ } else { |
|
3316 |
+ node = calloc(1, sizeof(struct pdf_dict_node)); |
|
3317 |
+ if (!(node)) |
|
3318 |
+ break; |
|
3319 |
+ |
|
3320 |
+ node->prev = res->tail; |
|
3321 |
+ if (res->tail) |
|
3322 |
+ res->tail->next = node; |
|
3323 |
+ res->tail = node; |
|
3324 |
+ } |
|
3325 |
+ |
|
3326 |
+ node->key = key; |
|
3327 |
+ if ((val)) { |
|
3328 |
+ node->value = val; |
|
3329 |
+ node->valuesz = strlen(val); |
|
3330 |
+ node->type = PDF_DICT_STRING; |
|
3331 |
+ } else if ((arr)) { |
|
3332 |
+ node->value = arr; |
|
3333 |
+ node->valuesz = sizeof(struct pdf_array); |
|
3334 |
+ node->type = PDF_DICT_ARRAY; |
|
3335 |
+ } else if ((dict)) { |
|
3336 |
+ node->value = dict; |
|
3337 |
+ node->valuesz = sizeof(struct pdf_dict); |
|
3338 |
+ node->type = PDF_DICT_DICT; |
|
3339 |
+ } |
|
3340 |
+ } |
|
3341 |
+ |
|
3342 |
+ if (endchar) |
|
3343 |
+ *endchar = end; |
|
3137 | 3344 |
|
3138 | 3345 |
return res; |
3139 | 3346 |
} |
3140 | 3347 |
|
3141 |
-static struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin) |
|
3348 |
+static struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar) |
|
3142 | 3349 |
{ |
3143 | 3350 |
struct pdf_array *res=NULL; |
3144 | 3351 |
struct pdf_array_node *node=NULL; |
3145 | 3352 |
const char *objstart = obj->start + pdf->map; |
3146 |
- char *end; |
|
3353 |
+ char *end, *tempend; |
|
3147 | 3354 |
int in_string=0, ninner=0; |
3148 | 3355 |
|
3149 | 3356 |
/* Sanity checking */ |
... | ... |
@@ -3204,6 +3491,7 @@ static struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj |
3204 | 3204 |
while (begin < end) { |
3205 | 3205 |
char *val=NULL, *p1; |
3206 | 3206 |
struct pdf_array *arr=NULL; |
3207 |
+ struct pdf_dict *dict=NULL; |
|
3207 | 3208 |
|
3208 | 3209 |
while (begin < end && isspace(begin[0])) |
3209 | 3210 |
begin++; |
... | ... |
@@ -3214,23 +3502,26 @@ static struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj |
3214 | 3214 |
switch (begin[0]) { |
3215 | 3215 |
case '<': |
3216 | 3216 |
if (begin - objstart < objsz - 2 && begin[1] == '<') { |
3217 |
- /* Handle dictionaries later */ |
|
3217 |
+ dict = pdf_parse_dict(pdf, obj, objsz, begin, &begin); |
|
3218 | 3218 |
break; |
3219 | 3219 |
} |
3220 | 3220 |
|
3221 | 3221 |
/* Not a dictionary. Intentially fall through. */ |
3222 | 3222 |
case '(': |
3223 |
- val = pdf_parse_string(pdf, obj, begin, objsz, NULL); |
|
3223 |
+ val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &begin); |
|
3224 | 3224 |
break; |
3225 | 3225 |
case '[': |
3226 | 3226 |
/* XXX We should have a recursion counter here */ |
3227 |
- arr = pdf_parse_array(pdf, obj, objsz, begin); |
|
3227 |
+ arr = pdf_parse_array(pdf, obj, objsz, begin, &begin); |
|
3228 | 3228 |
break; |
3229 | 3229 |
default: |
3230 | 3230 |
/* This should just be a number or the letter R */ |
3231 |
- p1 = begin+1; |
|
3232 |
- while (p1 < end && !isspace(p1[0])) |
|
3233 |
- p1++; |
|
3231 |
+ p1 = end; |
|
3232 |
+ if (!is_object_reference(begin, &p1)) { |
|
3233 |
+ p1 = begin+1; |
|
3234 |
+ while (p1 < end && !isspace(p1[0])) |
|
3235 |
+ p1++; |
|
3236 |
+ } |
|
3234 | 3237 |
|
3235 | 3238 |
val = cli_calloc((p1 - begin) + 2, 1); |
3236 | 3239 |
if (!(val)) |
... | ... |
@@ -3238,12 +3529,13 @@ static struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj |
3238 | 3238 |
|
3239 | 3239 |
strncpy(val, begin, p1 - begin); |
3240 | 3240 |
val[p1 - begin] = '\0'; |
3241 |
- |
|
3241 |
+ |
|
3242 |
+ begin = p1; |
|
3242 | 3243 |
break; |
3243 | 3244 |
} |
3244 | 3245 |
|
3245 | 3246 |
/* Parse error, just return what we could */ |
3246 |
- if (!(val) && !(arr)) |
|
3247 |
+ if (!(val) && !(arr) && !(dict)) |
|
3247 | 3248 |
break; |
3248 | 3249 |
|
3249 | 3250 |
if (!(node)) { |
... | ... |
@@ -3265,21 +3557,48 @@ static struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj |
3265 | 3265 |
node->type = PDF_ARR_STRING; |
3266 | 3266 |
node->data = val; |
3267 | 3267 |
node->datasz = strlen(val); |
3268 |
+ } else if (dict != NULL) { |
|
3269 |
+ node->type = PDF_ARR_DICT; |
|
3270 |
+ node->data = dict; |
|
3271 |
+ node->datasz = sizeof(struct pdf_dict); |
|
3268 | 3272 |
} else { |
3269 | 3273 |
node->type = PDF_ARR_ARRAY; |
3270 | 3274 |
node->data = arr; |
3271 | 3275 |
node->datasz = sizeof(struct pdf_array); |
3272 | 3276 |
} |
3273 |
- |
|
3274 |
- begin++; |
|
3275 | 3277 |
} |
3276 | 3278 |
|
3279 |
+ if (endchar) |
|
3280 |
+ *endchar = end; |
|
3281 |
+ |
|
3277 | 3282 |
return res; |
3278 | 3283 |
} |
3279 | 3284 |
|
3285 |
+static void pdf_free_dict(struct pdf_dict *dict) |
|
3286 |
+{ |
|
3287 |
+ struct pdf_dict_node *node, *next; |
|
3288 |
+ |
|
3289 |
+ node = dict->nodes; |
|
3290 |
+ while (node != NULL) { |
|
3291 |
+ free(node->key); |
|
3292 |
+ |
|
3293 |
+ if (node->type == PDF_DICT_STRING) |
|
3294 |
+ free(node->value); |
|
3295 |
+ else if (node->type == PDF_DICT_ARRAY) |
|
3296 |
+ pdf_free_array((struct pdf_array *)(node->value)); |
|
3297 |
+ else if (node->type == PDF_DICT_DICT) |
|
3298 |
+ pdf_free_dict((struct pdf_dict *)(node->value)); |
|
3299 |
+ |
|
3300 |
+ next = node->next; |
|
3301 |
+ free(node); |
|
3302 |
+ node = next; |
|
3303 |
+ } |
|
3304 |
+ |
|
3305 |
+ free(dict); |
|
3306 |
+} |
|
3307 |
+ |
|
3280 | 3308 |
static void pdf_free_array(struct pdf_array *array) |
3281 | 3309 |
{ |
3282 |
- struct pdf_array *arr; |
|
3283 | 3310 |
struct pdf_array_node *node, *next; |
3284 | 3311 |
|
3285 | 3312 |
if (!(array)) |
... | ... |
@@ -3289,6 +3608,8 @@ static void pdf_free_array(struct pdf_array *array) |
3289 | 3289 |
while (node != NULL) { |
3290 | 3290 |
if (node->type == PDF_ARR_ARRAY) |
3291 | 3291 |
pdf_free_array((struct pdf_array *)(node->data)); |
3292 |
+ else if (node->type == PDF_ARR_DICT) |
|
3293 |
+ pdf_free_dict((struct pdf_dict *)(node->data)); |
|
3292 | 3294 |
else |
3293 | 3295 |
free(node->data); |
3294 | 3296 |
|
... | ... |
@@ -3313,6 +3634,20 @@ static void pdf_print_array(struct pdf_array *array, unsigned long depth) |
3313 | 3313 |
} |
3314 | 3314 |
} |
3315 | 3315 |
|
3316 |
+static void pdf_print_dict(struct pdf_dict *dict, unsigned long depth) |
|
3317 |
+{ |
|
3318 |
+ struct pdf_dict_node *node; |
|
3319 |
+ |
|
3320 |
+ for (node = dict->nodes; node != NULL; node = node->next) { |
|
3321 |
+ if (node->type == PDF_DICT_STRING) |
|
3322 |
+ cli_errmsg("dict[%lu][%s]: %s\n", depth, node->key, (char *)(node->value)); |
|
3323 |
+ else if (node->type == PDF_DICT_ARRAY) |
|
3324 |
+ pdf_print_array((struct pdf_array *)(node->value), depth); |
|
3325 |
+ else if (node->type == PDF_DICT_DICT) |
|
3326 |
+ pdf_print_dict((struct pdf_dict *)(node->value), depth+1); |
|
3327 |
+ } |
|
3328 |
+} |
|
3329 |
+ |
|
3316 | 3330 |
/* PDF statistics */ |
3317 | 3331 |
static void ASCIIHexDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_action *act) |
3318 | 3332 |
{ |
... | ... |
@@ -3499,7 +3834,7 @@ static void Author_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_ac |
3499 | 3499 |
return; |
3500 | 3500 |
|
3501 | 3501 |
if (!(pdf->stats.author)) |
3502 |
- pdf->stats.author = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author"); |
|
3502 |
+ pdf->stats.author = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author", NULL); |
|
3503 | 3503 |
#endif |
3504 | 3504 |
} |
3505 | 3505 |
|
... | ... |
@@ -3510,7 +3845,7 @@ static void Creator_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_a |
3510 | 3510 |
return; |
3511 | 3511 |
|
3512 | 3512 |
if (!(pdf->stats.creator)) |
3513 |
- pdf->stats.creator = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator"); |
|
3513 |
+ pdf->stats.creator = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator", NULL); |
|
3514 | 3514 |
#endif |
3515 | 3515 |
} |
3516 | 3516 |
|
... | ... |
@@ -3521,7 +3856,7 @@ static void ModificationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, str |
3521 | 3521 |
return; |
3522 | 3522 |
|
3523 | 3523 |
if (!(pdf->stats.modificationdate)) |
3524 |
- pdf->stats.modificationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate"); |
|
3524 |
+ pdf->stats.modificationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate", NULL); |
|
3525 | 3525 |
#endif |
3526 | 3526 |
} |
3527 | 3527 |
|
... | ... |
@@ -3532,7 +3867,7 @@ static void CreationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct |
3532 | 3532 |
return; |
3533 | 3533 |
|
3534 | 3534 |
if (!(pdf->stats.creationdate)) |
3535 |
- pdf->stats.creationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate"); |
|
3535 |
+ pdf->stats.creationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate", NULL); |
|
3536 | 3536 |
#endif |
3537 | 3537 |
} |
3538 | 3538 |
|
... | ... |
@@ -3543,7 +3878,7 @@ static void Producer_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_ |
3543 | 3543 |
return; |
3544 | 3544 |
|
3545 | 3545 |
if (!(pdf->stats.producer)) |
3546 |
- pdf->stats.producer = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer"); |
|
3546 |
+ pdf->stats.producer = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer", NULL); |
|
3547 | 3547 |
#endif |
3548 | 3548 |
} |
3549 | 3549 |
|
... | ... |
@@ -3554,7 +3889,7 @@ static void Title_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_act |
3554 | 3554 |
return; |
3555 | 3555 |
|
3556 | 3556 |
if (!(pdf->stats.title)) |
3557 |
- pdf->stats.title = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title"); |
|
3557 |
+ pdf->stats.title = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title", NULL); |
|
3558 | 3558 |
#endif |
3559 | 3559 |
} |
3560 | 3560 |
|
... | ... |
@@ -3565,7 +3900,7 @@ static void Keywords_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_ |
3565 | 3565 |
return; |
3566 | 3566 |
|
3567 | 3567 |
if (!(pdf->stats.keywords)) |
3568 |
- pdf->stats.keywords = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords"); |
|
3568 |
+ pdf->stats.keywords = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords", NULL); |
|
3569 | 3569 |
#endif |
3570 | 3570 |
} |
3571 | 3571 |
|
... | ... |
@@ -3576,7 +3911,7 @@ static void Subject_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_a |
3576 | 3576 |
return; |
3577 | 3577 |
|
3578 | 3578 |
if (!(pdf->stats.subject)) |
3579 |
- pdf->stats.subject = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject"); |
|
3579 |
+ pdf->stats.subject = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject", NULL); |
|
3580 | 3580 |
#endif |
3581 | 3581 |
} |
3582 | 3582 |
|
... | ... |
@@ -3589,6 +3924,7 @@ static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_act |
3589 | 3589 |
unsigned int objsz = obj_size(pdf, obj, 1); |
3590 | 3590 |
unsigned long npages=0, count; |
3591 | 3591 |
struct pdf_array_node *node; |
3592 |
+ struct pdf_dict *dict; |
|
3592 | 3593 |
json_object *pdfobj; |
3593 | 3594 |
|
3594 | 3595 |
if (!(pdf) || !(pdf->ctx->wrkproperty)) |
... | ... |
@@ -3598,21 +3934,29 @@ static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_act |
3598 | 3598 |
if (!(pdfobj)) |
3599 | 3599 |
return; |
3600 | 3600 |
|
3601 |
+ begin = cli_memstr(objstart, objsz, "<<", 2); |
|
3602 |
+ if (!(begin)) |
|
3603 |
+ return; |
|
3604 |
+ |
|
3605 |
+ dict = pdf_parse_dict(pdf, obj, objsz, begin, NULL); |
|
3606 |
+ if (dict) |
|
3607 |
+ pdf_free_dict(dict); |
|
3608 |
+ |
|
3601 | 3609 |
begin = cli_memstr(objstart, objsz, "/Kids", 5); |
3602 | 3610 |
if (!(begin)) |
3603 | 3611 |
return; |
3604 | 3612 |
|
3605 | 3613 |
begin += 5; |
3606 | 3614 |
|
3607 |
- array = pdf_parse_array(pdf, obj, objsz, begin); |
|
3608 |
- if (!(array)) |
|
3615 |
+ array = pdf_parse_array(pdf, obj, objsz, begin, NULL); |
|
3616 |
+ if (!(array)) { |
|
3617 |
+ cli_jsonbool(pdfobj, "IncorrectPagesCount", 1); |
|
3609 | 3618 |
return; |
3610 |
- |
|
3611 |
- pdf_print_array(array, 0); |
|
3619 |
+ } |
|
3612 | 3620 |
|
3613 | 3621 |
for (node = array->nodes; node != NULL; node = node->next) |
3614 | 3622 |
if (node->datasz) |
3615 |
- if (((char *)(node->data))[0] == 'R') |
|
3623 |
+ if (strchr((char *)(node->data), 'R')) |
|
3616 | 3624 |
npages++; |
3617 | 3625 |
|
3618 | 3626 |
begin = cli_memstr(obj->start + pdf->map, objsz, "/Count", 6); |
... | ... |
@@ -30,7 +30,8 @@ struct pdf_obj { |
30 | 30 |
char *path; |
31 | 31 |
}; |
32 | 32 |
|
33 |
-enum pdf_array_type { PDF_ARR_UNKNOWN=0, PDF_ARR_STRING, PDF_ARR_ARRAY }; |
|
33 |
+enum pdf_array_type { PDF_ARR_UNKNOWN=0, PDF_ARR_STRING, PDF_ARR_ARRAY, PDF_ARR_DICT }; |
|
34 |
+enum pdf_dict_type { PDF_DICT_UNKNOWN=0, PDF_DICT_STRING, PDF_DICT_ARRAY, PDF_DICT_DICT }; |
|
34 | 35 |
|
35 | 36 |
struct pdf_array_node { |
36 | 37 |
void *data; |
... | ... |
@@ -46,6 +47,21 @@ struct pdf_array { |
46 | 46 |
struct pdf_array_node *tail; |
47 | 47 |
}; |
48 | 48 |
|
49 |
+struct pdf_dict_node { |
|
50 |
+ char *key; |
|
51 |
+ void *value; |
|
52 |
+ size_t valuesz; |
|
53 |
+ enum pdf_dict_type type; |
|
54 |
+ |
|
55 |
+ struct pdf_dict_node *prev; |
|
56 |
+ struct pdf_dict_node *next; |
|
57 |
+}; |
|
58 |
+ |
|
59 |
+struct pdf_dict { |
|
60 |
+ struct pdf_dict_node *nodes; |
|
61 |
+ struct pdf_dict_node *tail; |
|
62 |
+}; |
|
63 |
+ |
|
49 | 64 |
#define OBJ_FLAG_PDFNAME_NONE 0x0 |
50 | 65 |
#define OBJ_FLAG_PDFNAME_DONE 0x1 |
51 | 66 |
|