...
|
...
|
@@ -201,7 +201,15 @@ static int find_stream_bounds(const char *start, off_t bytesleft, off_t byteslef
|
201
|
201
|
return 0;
|
202
|
202
|
}
|
203
|
203
|
|
204
|
|
-/* Expected returns: 1 if success, 0 if no more objects, -1 if error */
|
|
204
|
+/**
|
|
205
|
+ * @brief Finds the next obj and adds it to our list of objects, and increments nobj.
|
|
206
|
+ *
|
|
207
|
+ * @param pdf PDF structure
|
|
208
|
+ * @return int -1 if error
|
|
209
|
+ * @return int 0 if no more objects
|
|
210
|
+ * @return int 1 if success
|
|
211
|
+ * @return int 2 if an invalid object was discovered, may be skipped.
|
|
212
|
+ */
|
205
|
213
|
int pdf_findobj(struct pdf_struct *pdf)
|
206
|
214
|
{
|
207
|
215
|
const char *start, *q, *q2, *q3, *eof;
|
...
|
...
|
@@ -245,7 +253,9 @@ int pdf_findobj(struct pdf_struct *pdf)
|
245
|
245
|
|
246
|
246
|
if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, (long*)&genid)) {
|
247
|
247
|
cli_dbgmsg("cli_pdf: Failed to parse object genid (%u)\n", pdf->nobjs);
|
248
|
|
- return -1;
|
|
248
|
+ /* Failed to parse, probably not a real object. Skip past the "obj" thing, and continue. */
|
|
249
|
+ pdf->offset = q2 + 4 - pdf->map;
|
|
250
|
+ return 2;
|
249
|
251
|
}
|
250
|
252
|
q = findNextNonWSBack(q-1,start);
|
251
|
253
|
while (q > start && isdigit(*q))
|
...
|
...
|
@@ -260,9 +270,11 @@ int pdf_findobj(struct pdf_struct *pdf)
|
260
|
260
|
if (q - 4 > start) {
|
261
|
261
|
const char* lastfile = q - 4;
|
262
|
262
|
if (0 != strncmp(lastfile, "\%\%EOF", 5)) {
|
263
|
|
- /* Nope, wasn't %%EOF, I guess just fail out. */
|
|
263
|
+ /* Nope, wasn't %%EOF */
|
264
|
264
|
cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs);
|
265
|
|
- return -1;
|
|
265
|
+ /* Skip past the "obj" thing, and continue. */
|
|
266
|
+ pdf->offset = q2 + 4 - pdf->map;
|
|
267
|
+ return 2;
|
266
|
268
|
}
|
267
|
269
|
/* Yup, Looks, like the file continues after %%EOF.
|
268
|
270
|
* Probably another revision. Keep parsing... */
|
...
|
...
|
@@ -271,12 +283,16 @@ int pdf_findobj(struct pdf_struct *pdf)
|
271
|
271
|
} else {
|
272
|
272
|
/* Failed parsing at the very beginning */
|
273
|
273
|
cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs);
|
274
|
|
- return -1;
|
|
274
|
+ /* Probably not a real object. Skip past the "obj" thing, and continue. */
|
|
275
|
+ pdf->offset = q2 + 4 - pdf->map;
|
|
276
|
+ return 2;
|
275
|
277
|
}
|
276
|
278
|
/* Try again, with offset slightly adjusted */
|
277
|
279
|
if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, (long*)&objid)) {
|
278
|
280
|
cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs);
|
279
|
|
- return -1;
|
|
281
|
+ /* Still failed... Probably not a real object. Skip past the "obj" thing, and continue. */
|
|
282
|
+ pdf->offset = q2 + 4 - pdf->map;
|
|
283
|
+ return 2;
|
280
|
284
|
}
|
281
|
285
|
cli_dbgmsg("cli_pdf: There appears to be an additional revision. Continuing to parse...\n");
|
282
|
286
|
}
|
...
|
...
|
@@ -2540,9 +2556,19 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
|
2540
|
2540
|
|
2541
|
2541
|
/* parse PDF and find obj offsets */
|
2542
|
2542
|
while ((rc = pdf_findobj(&pdf)) > 0) {
|
2543
|
|
- struct pdf_obj *obj = &pdf.objs[pdf.nobjs-1];
|
|
2543
|
+ if (rc == 1) {
|
|
2544
|
+ struct pdf_obj *obj = &pdf.objs[pdf.nobjs-1];
|
2544
|
2545
|
|
2545
|
|
- cli_dbgmsg("cli_pdf: found %d %d obj @%lld\n", obj->id >> 8, obj->id&0xff, (long long)(obj->start + offset));
|
|
2546
|
+ cli_dbgmsg("cli_pdf: found %d %d obj @%lld\n", obj->id >> 8, obj->id&0xff, (long long)(obj->start + offset));
|
|
2547
|
+ }
|
|
2548
|
+ else if (rc == 2) {
|
|
2549
|
+ pdf.nobjs--;
|
|
2550
|
+ cli_dbgmsg("cli_pdf: Failed to parse object, likely an oversight in parser design.\n");
|
|
2551
|
+ }
|
|
2552
|
+ else {
|
|
2553
|
+ pdf.nobjs--;
|
|
2554
|
+ cli_dbgmsg("cli_pdf: unexpected return code %d.\n", rc);
|
|
2555
|
+ }
|
2546
|
2556
|
}
|
2547
|
2557
|
|
2548
|
2558
|
if (pdf.nobjs)
|