... | ... |
@@ -94,7 +94,7 @@ int cli_bytecode_context_setparam_int(struct cli_bc_ctx *ctx, unsigned i, uint64 |
94 | 94 |
int cli_bytecode_context_setparam_ptr(struct cli_bc_ctx *ctx, unsigned i, void *data, unsigned datalen); |
95 | 95 |
int cli_bytecode_context_setfile(struct cli_bc_ctx *ctx, fmap_t *map); |
96 | 96 |
int cli_bytecode_context_setpe(struct cli_bc_ctx *ctx, const struct cli_pe_hook_data *data, const struct cli_exe_section *sections); |
97 |
-int cli_bytecode_context_setpdf(struct cli_bc_ctx *ctx, unsigned phase, unsigned nobjs, struct pdf_obj *objs, uint32_t *pdf_flags, uint32_t pdfsize, uint32_t pdfstartoff); |
|
97 |
+int cli_bytecode_context_setpdf(struct cli_bc_ctx *ctx, unsigned phase, unsigned nobjs, struct pdf_obj **objs, uint32_t *pdf_flags, uint32_t pdfsize, uint32_t pdfstartoff); |
|
98 | 98 |
int cli_bytecode_context_clear(struct cli_bc_ctx *ctx); |
99 | 99 |
/* returns file descriptor, sets tempfile. Caller takes ownership, and is |
100 | 100 |
* responsible for freeing/unlinking */ |
... | ... |
@@ -1427,7 +1427,7 @@ uint32_t cli_bcapi_check_platform(struct cli_bc_ctx *ctx , uint32_t a, uint32_t |
1427 | 1427 |
|
1428 | 1428 |
int cli_bytecode_context_setpdf(struct cli_bc_ctx *ctx, unsigned phase, |
1429 | 1429 |
unsigned nobjs, |
1430 |
- struct pdf_obj *objs, uint32_t *pdf_flags, |
|
1430 |
+ struct pdf_obj **objs, uint32_t *pdf_flags, |
|
1431 | 1431 |
uint32_t pdfsize, uint32_t pdfstartoff) |
1432 | 1432 |
{ |
1433 | 1433 |
ctx->pdf_nobjs = nobjs; |
... | ... |
@@ -1470,7 +1470,7 @@ int32_t cli_bcapi_pdf_lookupobj(struct cli_bc_ctx *ctx , uint32_t objid) |
1470 | 1470 |
if (!ctx->pdf_phase) |
1471 | 1471 |
return -1; |
1472 | 1472 |
for (i=0;i<ctx->pdf_nobjs;i++) { |
1473 |
- if (ctx->pdf_objs[i].id == objid) |
|
1473 |
+ if (ctx->pdf_objs[i]->id == objid) |
|
1474 | 1474 |
return i; |
1475 | 1475 |
} |
1476 | 1476 |
return -1; |
... | ... |
@@ -1484,8 +1484,8 @@ uint32_t cli_bcapi_pdf_getobjsize(struct cli_bc_ctx *ctx , int32_t objidx) |
1484 | 1484 |
) |
1485 | 1485 |
return 0; |
1486 | 1486 |
if ((uint32_t)(objidx + 1) == ctx->pdf_nobjs) |
1487 |
- return ctx->pdf_size - ctx->pdf_objs[objidx].start; |
|
1488 |
- return ctx->pdf_objs[objidx+1].start - ctx->pdf_objs[objidx].start - 4; |
|
1487 |
+ return ctx->pdf_size - ctx->pdf_objs[objidx]->start; |
|
1488 |
+ return ctx->pdf_objs[objidx+1]->start - ctx->pdf_objs[objidx]->start - 4; |
|
1489 | 1489 |
} |
1490 | 1490 |
|
1491 | 1491 |
const uint8_t* cli_bcapi_pdf_getobj(struct cli_bc_ctx *ctx , int32_t objidx, uint32_t amount) |
... | ... |
@@ -1493,7 +1493,7 @@ const uint8_t* cli_bcapi_pdf_getobj(struct cli_bc_ctx *ctx , int32_t objidx, uin |
1493 | 1493 |
uint32_t size = cli_bcapi_pdf_getobjsize(ctx, objidx); |
1494 | 1494 |
if (amount > size) |
1495 | 1495 |
return NULL; |
1496 |
- return fmap_need_off(ctx->fmap, ctx->pdf_objs[objidx].start, amount); |
|
1496 |
+ return fmap_need_off(ctx->fmap, ctx->pdf_objs[objidx]->start, amount); |
|
1497 | 1497 |
} |
1498 | 1498 |
|
1499 | 1499 |
int32_t cli_bcapi_pdf_getobjid(struct cli_bc_ctx *ctx , int32_t objidx) |
... | ... |
@@ -1501,7 +1501,7 @@ int32_t cli_bcapi_pdf_getobjid(struct cli_bc_ctx *ctx , int32_t objidx) |
1501 | 1501 |
if (!ctx->pdf_phase || |
1502 | 1502 |
(uint32_t)objidx >= ctx->pdf_nobjs) |
1503 | 1503 |
return -1; |
1504 |
- return ctx->pdf_objs[objidx].id; |
|
1504 |
+ return ctx->pdf_objs[objidx]->id; |
|
1505 | 1505 |
} |
1506 | 1506 |
|
1507 | 1507 |
int32_t cli_bcapi_pdf_getobjflags(struct cli_bc_ctx *ctx , int32_t objidx) |
... | ... |
@@ -1509,7 +1509,7 @@ int32_t cli_bcapi_pdf_getobjflags(struct cli_bc_ctx *ctx , int32_t objidx) |
1509 | 1509 |
if (!ctx->pdf_phase || |
1510 | 1510 |
(uint32_t)objidx >= ctx->pdf_nobjs) |
1511 | 1511 |
return -1; |
1512 |
- return ctx->pdf_objs[objidx].flags; |
|
1512 |
+ return ctx->pdf_objs[objidx]->flags; |
|
1513 | 1513 |
} |
1514 | 1514 |
|
1515 | 1515 |
int32_t cli_bcapi_pdf_setobjflags(struct cli_bc_ctx *ctx , int32_t objidx, int32_t flags) |
... | ... |
@@ -1518,9 +1518,9 @@ int32_t cli_bcapi_pdf_setobjflags(struct cli_bc_ctx *ctx , int32_t objidx, int32 |
1518 | 1518 |
(uint32_t)objidx >= ctx->pdf_nobjs) |
1519 | 1519 |
return -1; |
1520 | 1520 |
cli_dbgmsg("cli_pdf: bytecode setobjflags %08x -> %08x\n", |
1521 |
- ctx->pdf_objs[objidx].flags, |
|
1521 |
+ ctx->pdf_objs[objidx]->flags, |
|
1522 | 1522 |
flags); |
1523 |
- ctx->pdf_objs[objidx].flags = flags; |
|
1523 |
+ ctx->pdf_objs[objidx]->flags = flags; |
|
1524 | 1524 |
return 0; |
1525 | 1525 |
} |
1526 | 1526 |
|
... | ... |
@@ -1529,7 +1529,7 @@ int32_t cli_bcapi_pdf_get_offset(struct cli_bc_ctx *ctx , int32_t objidx) |
1529 | 1529 |
if (!ctx->pdf_phase || |
1530 | 1530 |
(uint32_t)objidx >= ctx->pdf_nobjs) |
1531 | 1531 |
return -1; |
1532 |
- return ctx->pdf_startoff + ctx->pdf_objs[objidx].start; |
|
1532 |
+ return ctx->pdf_startoff + ctx->pdf_objs[objidx]->start; |
|
1533 | 1533 |
} |
1534 | 1534 |
|
1535 | 1535 |
int32_t cli_bcapi_pdf_get_phase(struct cli_bc_ctx *ctx) |
... | ... |
@@ -119,6 +119,11 @@ static void XFA_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_a |
119 | 119 |
#endif |
120 | 120 |
/* End PDF statistics callbacks and related */ |
121 | 121 |
|
122 |
+static int pdf_readint(const char *q0, int len, const char *key); |
|
123 |
+static const char *pdf_getdict(const char *q0, int* len, const char *key); |
|
124 |
+static char *pdf_readval(const char *q, int len, const char *key); |
|
125 |
+static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen, const char **qend, int noescape); |
|
126 |
+ |
|
122 | 127 |
static int xrefCheck(const char *xref, const char *eof) |
123 | 128 |
{ |
124 | 129 |
const char *q; |
... | ... |
@@ -156,6 +161,14 @@ static int xrefCheck(const char *xref, const char *eof) |
156 | 156 |
#define noisy_warnmsg(...) |
157 | 157 |
#endif |
158 | 158 |
|
159 |
+/** |
|
160 |
+ * @brief Searching BACKwards, find the next character that is not a whitespace. |
|
161 |
+ * |
|
162 |
+ * @param q Index to start from (at the end of the search space) |
|
163 |
+ * @param start Beginning of the search space. |
|
164 |
+ * |
|
165 |
+ * @return const char* Address of the final non-whitespace character OR the same address as the start. |
|
166 |
+ */ |
|
159 | 167 |
static const char *findNextNonWSBack(const char *q, const char *start) |
160 | 168 |
{ |
161 | 169 |
while (q > start && (*q == 0 || *q == 9 || *q == 0xa || *q == 0xc || *q == 0xd || *q == 0x20)) |
... | ... |
@@ -164,15 +177,56 @@ static const char *findNextNonWSBack(const char *q, const char *start) |
164 | 164 |
return q; |
165 | 165 |
} |
166 | 166 |
|
167 |
-static int find_stream_bounds(const char *start, off_t bytesleft, off_t bytesleft2, off_t *stream, off_t *endstream, int newline_hack) |
|
167 |
+/** |
|
168 |
+ * @brief Searching FORwards, find the next character that is not a whitespace. |
|
169 |
+ * |
|
170 |
+ * @param q Index to start from (at the end of the search space) |
|
171 |
+ * @param start Beginning of the search space. |
|
172 |
+ * |
|
173 |
+ * @return const char* Address of the final non-whitespace character OR the same address as the start. |
|
174 |
+ */ |
|
175 |
+static const char *findNextNonWS(const char *q, const char *end) |
|
176 |
+{ |
|
177 |
+ while (q < end && (*q == 0 || *q == 9 || *q == 0xa || *q == 0xc || *q == 0xd || *q == 0x20)) |
|
178 |
+ q++; |
|
179 |
+ |
|
180 |
+ return q; |
|
181 |
+} |
|
182 |
+ |
|
183 |
+/** |
|
184 |
+ * @brief Find bounds of stream. |
|
185 |
+ * |
|
186 |
+ * PDF streams are prefixed with "stream" and suffixed with "endstream". |
|
187 |
+ * Return value indicates success or failure. |
|
188 |
+ * |
|
189 |
+ * @param start start address of search space. |
|
190 |
+ * @param bytesleft size of search space for "stream" |
|
191 |
+ * @param bytesleft2 size of search space for "endstream" |
|
192 |
+ * @param[out] stream output param, address of start of stream data |
|
193 |
+ * @param[out] endstream output param, address of end of stream data |
|
194 |
+ * @param newline_hack hack to support newlines that are \r\n, and not just \n or just \r. |
|
195 |
+ * |
|
196 |
+ * @return int 1 if stream bounds were found. |
|
197 |
+ * @return int 0 if stream bounds could not be found. |
|
198 |
+ */ |
|
199 |
+static int find_stream_bounds( |
|
200 |
+ const char *start, |
|
201 |
+ off_t bytesleft, |
|
202 |
+ off_t bytesleft2, |
|
203 |
+ off_t *stream, |
|
204 |
+ off_t *endstream, |
|
205 |
+ int newline_hack) |
|
168 | 206 |
{ |
169 | 207 |
const char *q2, *q; |
208 |
+ |
|
209 |
+ /* Begin by finding the "stream" string that prefixes stream data. */ |
|
170 | 210 |
if ((q2 = cli_memstr(start, bytesleft, "stream", 6))) { |
171 | 211 |
q2 += 6; |
172 | 212 |
bytesleft -= q2 - start; |
173 | 213 |
if (bytesleft < 0) |
174 | 214 |
return 0; |
175 | 215 |
|
216 |
+ /* Skip any new line charcters. */ |
|
176 | 217 |
if (bytesleft >= 2 && q2[0] == '\xd' && q2[1] == '\xa') { |
177 | 218 |
q2 += 2; |
178 | 219 |
if (newline_hack && (bytesleft > 2) && q2[0] == '\xa') |
... | ... |
@@ -182,16 +236,23 @@ static int find_stream_bounds(const char *start, off_t bytesleft, off_t byteslef |
182 | 182 |
} |
183 | 183 |
|
184 | 184 |
*stream = q2 - start; |
185 |
+ |
|
185 | 186 |
bytesleft2 -= q2 - start; |
186 | 187 |
if (bytesleft2 <= 0) |
187 | 188 |
return 0; |
188 | 189 |
|
190 |
+ /* Now find the "endstream" string that suffixes stream data */ |
|
189 | 191 |
q = q2; |
190 | 192 |
q2 = cli_memstr(q, bytesleft2, "endstream", 9); |
191 |
- if (!q2) |
|
193 |
+ if (!q2) { |
|
194 |
+ /* Couldn't find "endstream", but that's ok -- |
|
195 |
+ * -- we'll just count the data we have until EOF. */ |
|
192 | 196 |
q2 = q + bytesleft2-9; /* till EOF */ |
197 |
+ } |
|
193 | 198 |
|
194 | 199 |
*endstream = q2 - start; |
200 |
+ |
|
201 |
+ /* Double-check that endstream >= stream */ |
|
195 | 202 |
if (*endstream < *stream) |
196 | 203 |
*endstream = *stream; |
197 | 204 |
|
... | ... |
@@ -202,61 +263,273 @@ static int find_stream_bounds(const char *start, off_t bytesleft, off_t byteslef |
202 | 202 |
} |
203 | 203 |
|
204 | 204 |
/** |
205 |
- * @brief Finds the next obj and adds it to our list of objects, and increments nobj. |
|
206 |
- * |
|
207 |
- * @param pdf PDF structure |
|
208 |
- * @return int -1 if error |
|
209 |
- * @return int 0 if no more objects |
|
210 |
- * @return int 1 if success |
|
211 |
- * @return int 2 if an invalid object was discovered, may be skipped. |
|
205 |
+ * @brief Find the next *indirect* object in an object stream, adds it to our list of |
|
206 |
+ * objects, and increments nobj. |
|
207 |
+ * |
|
208 |
+ * Indirect objects in a stream DON'T begin with "obj" and end with "endobj". |
|
209 |
+ * Instead, they have an obj ID and an offset from the first object to point you |
|
210 |
+ * right at them. |
|
211 |
+ * |
|
212 |
+ * If found, objstm->current will be updated to the next obj id. |
|
213 |
+ * |
|
214 |
+ * All objects in an object stream are indirect and thus do not begin or start |
|
215 |
+ * with "obj" or "endobj". Instead, the object stream takes the following |
|
216 |
+ * format. |
|
217 |
+ * |
|
218 |
+ * <dictionary describing stream> objstm content endobjstm |
|
219 |
+ * |
|
220 |
+ * where content looks something like the following: |
|
221 |
+ * |
|
222 |
+ * 15 0 16 3 17 46 (ab)<</IDS 8 0 R/JavaScript 27 0 R/URLS 9 0 R>><</Names[(Test)28 0 R]>> |
|
223 |
+ * |
|
224 |
+ * In the above example, the literal string (ab) is indirect object # 15, and |
|
225 |
+ * begins at offset 0 of the set of objects. The next object, # 16 begis at |
|
226 |
+ * offset 3 is a dictionary. The final object is also a dictionary, beginning |
|
227 |
+ * at offset 46. |
|
228 |
+ * |
|
229 |
+ * @param pdf Pdf struct that keeps track of all information found in the PDF. |
|
230 |
+ * @param objstm |
|
231 |
+ * |
|
232 |
+ * @return CL_SUCCESS if success |
|
233 |
+ * @return CL_EPARSE if parsing error |
|
234 |
+ * @return CL_EMEM if error allocating memory |
|
235 |
+ * @return CL_EARG if invalid arguments |
|
236 |
+ */ |
|
237 |
+int pdf_findobj_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm, struct pdf_obj **obj_found) |
|
238 |
+{ |
|
239 |
+ cl_error_t status = CL_EPARSE; |
|
240 |
+ struct pdf_obj *obj = NULL; |
|
241 |
+ unsigned long objid = 0, objsize = 0, objoff = 0; |
|
242 |
+ const char *index = NULL; |
|
243 |
+ size_t bytes_remaining = 0; |
|
244 |
+ |
|
245 |
+ if (NULL == pdf || NULL == objstm) { |
|
246 |
+ cli_warnmsg("pdf_findobj_in_objstm: invalid arguments\n"); |
|
247 |
+ return CL_EARG; |
|
248 |
+ } |
|
249 |
+ |
|
250 |
+ *obj_found = NULL; |
|
251 |
+ |
|
252 |
+ index = objstm->streambuf + objstm->current_pair; |
|
253 |
+ bytes_remaining = objstm->streambuf_len - objstm->current_pair; |
|
254 |
+ |
|
255 |
+ obj = calloc(sizeof(struct pdf_obj), 1); |
|
256 |
+ if (!obj) { |
|
257 |
+ cli_warnmsg("pdf_findobj_in_objstm: out of memory finding objects in stream\n"); |
|
258 |
+ status = CL_EMEM; |
|
259 |
+ goto done; |
|
260 |
+ } |
|
261 |
+ |
|
262 |
+ /* This object is in a stream, not in the regular map buffer. */ |
|
263 |
+ obj->objstm = objstm; |
|
264 |
+ |
|
265 |
+ /* objstm->current_pair points directly to the obj id */ |
|
266 |
+ if (CL_SUCCESS != cli_strntoul_wrap(index, bytes_remaining, 0, 10, &objid)) { |
|
267 |
+ /* Failed to find objid */ |
|
268 |
+ cli_dbgmsg("pdf_findobj_in_objstm: Failed to find objid for obj in object stream\n"); |
|
269 |
+ status = CL_EPARSE; |
|
270 |
+ goto done; |
|
271 |
+ } |
|
272 |
+ |
|
273 |
+ /* Find the obj offset that appears just after the obj id*/ |
|
274 |
+ while ((index < objstm->streambuf + objstm->streambuf_len) && isdigit(*index)) { |
|
275 |
+ index++; |
|
276 |
+ bytes_remaining--; |
|
277 |
+ } |
|
278 |
+ index = findNextNonWS(index, objstm->streambuf + objstm->first); |
|
279 |
+ bytes_remaining = objstm->streambuf + objstm->streambuf_len - index; |
|
280 |
+ |
|
281 |
+ if (CL_SUCCESS != cli_strntoul_wrap(index, bytes_remaining, 0, 10, &objoff)) { |
|
282 |
+ /* Failed to find obj offset */ |
|
283 |
+ cli_dbgmsg("pdf_findobj_in_objstm: Failed to find obj offset for obj in object stream\n"); |
|
284 |
+ status = CL_EPARSE; |
|
285 |
+ goto done; |
|
286 |
+ } |
|
287 |
+ |
|
288 |
+ objstm->current = objstm->first + objoff; |
|
289 |
+ |
|
290 |
+ obj->id = (objid << 8) | (0 & 0xff); |
|
291 |
+ obj->start = objstm->current; |
|
292 |
+ obj->flags = 0; |
|
293 |
+ |
|
294 |
+ objstm->nobjs_found++; |
|
295 |
+ |
|
296 |
+ while ((index < objstm->streambuf + objstm->streambuf_len) && isdigit(*index)) { |
|
297 |
+ index++; |
|
298 |
+ bytes_remaining--; |
|
299 |
+ } |
|
300 |
+ objstm->current_pair = (uint32_t)(findNextNonWS(index, objstm->streambuf + objstm->first) - objstm->streambuf); |
|
301 |
+ |
|
302 |
+ /* Update current_pair, if there are more */ |
|
303 |
+ if ((objstm->nobjs_found < objstm->n) && |
|
304 |
+ (index < objstm->streambuf + objstm->streambuf_len)) |
|
305 |
+ { |
|
306 |
+ unsigned long next_objid = 0, next_objoff = 0; |
|
307 |
+ |
|
308 |
+ /* |
|
309 |
+ * While we're at it, |
|
310 |
+ * lets record the size as running up to the next object offset. |
|
311 |
+ * |
|
312 |
+ * To do so, we will need to parse the next obj pair. |
|
313 |
+ */ |
|
314 |
+ /* objstm->current_pair points directly to the obj id */ |
|
315 |
+ index = objstm->streambuf + objstm->current_pair; |
|
316 |
+ bytes_remaining = objstm->streambuf + objstm->streambuf_len - index; |
|
317 |
+ |
|
318 |
+ if (CL_SUCCESS != cli_strntoul_wrap(index, bytes_remaining, 0, 10, &next_objid)) { |
|
319 |
+ /* Failed to find objid for next obj */ |
|
320 |
+ cli_dbgmsg("pdf_findobj_in_objstm: Failed to find next objid for obj in object stream though there should be {%u} more.\n", objstm->n - objstm->nobjs_found); |
|
321 |
+ status = CL_EPARSE; |
|
322 |
+ goto done; |
|
323 |
+ } |
|
324 |
+ |
|
325 |
+ /* Find the obj offset that appears just after the obj id*/ |
|
326 |
+ while ((index < objstm->streambuf + objstm->streambuf_len) && isdigit(*index)) { |
|
327 |
+ index++; |
|
328 |
+ bytes_remaining--; |
|
329 |
+ } |
|
330 |
+ index = findNextNonWS(index, objstm->streambuf + objstm->first); |
|
331 |
+ bytes_remaining = objstm->streambuf + objstm->streambuf_len - index; |
|
332 |
+ |
|
333 |
+ if (CL_SUCCESS != cli_strntoul_wrap(index, bytes_remaining, 0, 10, &next_objoff)) { |
|
334 |
+ /* Failed to find obj offset for next obj */ |
|
335 |
+ cli_dbgmsg("pdf_findobj_in_objstm: Failed to find next obj offset for obj in object stream though there should be {%u} more.\n", objstm->n - objstm->nobjs_found); |
|
336 |
+ status = CL_EPARSE; |
|
337 |
+ goto done; |
|
338 |
+ } |
|
339 |
+ |
|
340 |
+ obj->size = objstm->first + next_objoff - obj->start; |
|
341 |
+ } |
|
342 |
+ else |
|
343 |
+ { |
|
344 |
+ /* |
|
345 |
+ * Should be no more objects. We should verify. |
|
346 |
+ * |
|
347 |
+ * Either way... |
|
348 |
+ * obj->size should be the rest of the buffer. |
|
349 |
+ */ |
|
350 |
+ if (objstm->nobjs_found < objstm->n) { |
|
351 |
+ cli_warnmsg("pdf_findobj_in_objstm: Fewer objects found in object stream than expected!\n"); |
|
352 |
+ } |
|
353 |
+ |
|
354 |
+ obj->size = objstm->streambuf_len - obj->start; |
|
355 |
+ } |
|
356 |
+ |
|
357 |
+ /* Success! Add the object to the list of all objects found. */ |
|
358 |
+ pdf->nobjs++; |
|
359 |
+ pdf->objs = cli_realloc2(pdf->objs, sizeof(struct pdf_obj*) * pdf->nobjs); |
|
360 |
+ if (!pdf->objs) { |
|
361 |
+ cli_warnmsg("pdf_findobj_in_objstm: out of memory finding objects in stream\n"); |
|
362 |
+ status = CL_EMEM; |
|
363 |
+ goto done; |
|
364 |
+ } |
|
365 |
+ pdf->objs[pdf->nobjs-1] = obj; |
|
366 |
+ |
|
367 |
+ *obj_found = obj; |
|
368 |
+ |
|
369 |
+ status = CL_SUCCESS; |
|
370 |
+ |
|
371 |
+done: |
|
372 |
+ if (CL_SUCCESS != status) { |
|
373 |
+ if (NULL != obj) { |
|
374 |
+ free(obj); |
|
375 |
+ } |
|
376 |
+ } |
|
377 |
+ return status; |
|
378 |
+} |
|
379 |
+ |
|
380 |
+/** |
|
381 |
+ * @brief Find the next *indirect* object. |
|
382 |
+ * |
|
383 |
+ * Indirect objects begin with "obj" and end with "endobj". |
|
384 |
+ * Identify objects that contain streams. |
|
385 |
+ * Identify truncated objects. |
|
386 |
+ * |
|
387 |
+ * If found, pdf->offset will be updated to just after the "endobj". |
|
388 |
+ * If truncated, pdf->offset will == pdf->size. |
|
389 |
+ * If not found, pdf->offset will not be updated. |
|
390 |
+ * |
|
391 |
+ * @param pdf Pdf context struct that keeps track of all information found in the PDF. |
|
392 |
+ * |
|
393 |
+ * @return CL_SUCCESS if success |
|
394 |
+ * @return CL_BREAK if no more objects |
|
395 |
+ * @return CL_EPARSE if parsing error |
|
396 |
+ * @return CL_EMEM if error allocating memory |
|
212 | 397 |
*/ |
213 |
-int pdf_findobj(struct pdf_struct *pdf) |
|
398 |
+cl_error_t pdf_findobj(struct pdf_struct *pdf) |
|
214 | 399 |
{ |
400 |
+ cl_error_t status = CL_EPARSE; |
|
215 | 401 |
const char *start, *q, *q2, *q3, *eof; |
216 |
- struct pdf_obj *obj; |
|
402 |
+ struct pdf_obj *obj = NULL; |
|
217 | 403 |
off_t bytesleft; |
218 | 404 |
unsigned long genid, objid; |
219 | 405 |
|
220 | 406 |
pdf->nobjs++; |
221 |
- pdf->objs = cli_realloc2(pdf->objs, sizeof(*pdf->objs)*pdf->nobjs); |
|
407 |
+ pdf->objs = cli_realloc2(pdf->objs, sizeof(struct pdf_obj*) * pdf->nobjs); |
|
222 | 408 |
if (!pdf->objs) { |
223 |
- cli_warnmsg("cli_pdf: out of memory parsing objects (%u)\n", pdf->nobjs); |
|
224 |
- return -1; |
|
409 |
+ status = CL_EMEM; |
|
410 |
+ goto done; |
|
411 |
+ } |
|
412 |
+ |
|
413 |
+ obj = malloc(sizeof(struct pdf_obj)); |
|
414 |
+ if (!obj) { |
|
415 |
+ status = CL_EMEM; |
|
416 |
+ goto done; |
|
225 | 417 |
} |
418 |
+ pdf->objs[pdf->nobjs-1] = obj; |
|
226 | 419 |
|
227 |
- obj = &pdf->objs[pdf->nobjs-1]; |
|
228 | 420 |
memset(obj, 0, sizeof(*obj)); |
229 |
- start = pdf->map+pdf->offset; |
|
421 |
+ |
|
422 |
+ start = pdf->map + pdf->offset; |
|
230 | 423 |
bytesleft = pdf->size - pdf->offset; |
231 |
- while (bytesleft > 0) { |
|
424 |
+ |
|
425 |
+ /* Indirect objects located outside of an object stream are prefaced with "obj" |
|
426 |
+ * and suffixed with "endobj". Find the "obj" preface. */ |
|
427 |
+ while (bytesleft > 0) |
|
428 |
+ { |
|
232 | 429 |
q2 = cli_memstr(start, bytesleft, "obj", 3); |
233 |
- if (!q2) |
|
234 |
- return 0;/* no more objs */ |
|
430 |
+ if (!q2) { |
|
431 |
+ status = CL_BREAK; /* no more objs */ |
|
432 |
+ goto done; |
|
433 |
+ } |
|
235 | 434 |
|
435 |
+ /* verify that "obj" has a whitespace before it, and is not the end of |
|
436 |
+ * a previous string like... "globj" */ |
|
236 | 437 |
q2--; |
237 | 438 |
bytesleft -= q2 - start; |
439 |
+ |
|
238 | 440 |
if (*q2 != 0 && *q2 != 9 && *q2 != 0xa && *q2 != 0xc && *q2 != 0xd && *q2 != 0x20) { |
441 |
+ /* This instance of the "obj" string appears to be part of another string. |
|
442 |
+ * Skip it, and keep searching for an object. */ |
|
239 | 443 |
start = q2+4; |
240 | 444 |
bytesleft -= 4; |
241 | 445 |
continue; |
242 | 446 |
} |
243 | 447 |
|
244 |
- break; |
|
448 |
+ break; /* Found it. q2 should point to the whitespace before the "obj" string */ |
|
245 | 449 |
} |
246 | 450 |
|
247 |
- if (bytesleft <= 0) |
|
248 |
- return 0; |
|
451 |
+ if (bytesleft <= 0) { |
|
452 |
+ status = CL_BREAK; /* No "obj" found. */ |
|
453 |
+ goto done; |
|
454 |
+ } |
|
455 |
+ |
|
456 |
+ /* "obj" found! */ |
|
249 | 457 |
|
458 |
+ /* Find the generation id (genid) that appears before the "obj" */ |
|
250 | 459 |
q = findNextNonWSBack(q2-1, start); |
251 | 460 |
while (q > start && isdigit(*q)) |
252 | 461 |
q--; |
253 | 462 |
|
254 | 463 |
if (CL_SUCCESS != cli_strntoul_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, &genid)) { |
255 |
- cli_dbgmsg("cli_pdf: Failed to parse object genid (%u)\n", pdf->nobjs); |
|
464 |
+ cli_dbgmsg("pdf_findobj: Failed to parse object genid (# objects found: %u)\n", pdf->nobjs); |
|
256 | 465 |
/* Failed to parse, probably not a real object. Skip past the "obj" thing, and continue. */ |
257 | 466 |
pdf->offset = q2 + 4 - pdf->map; |
258 |
- return 2; |
|
467 |
+ status = CL_EPARSE; |
|
468 |
+ goto done; |
|
259 | 469 |
} |
470 |
+ |
|
471 |
+ /* Find the object id (objid) that appers before the genid */ |
|
260 | 472 |
q = findNextNonWSBack(q-1,start); |
261 | 473 |
while (q > start && isdigit(*q)) |
262 | 474 |
q--; |
... | ... |
@@ -271,59 +544,82 @@ int pdf_findobj(struct pdf_struct *pdf) |
271 | 271 |
const char* lastfile = q - 4; |
272 | 272 |
if (0 != strncmp(lastfile, "\%\%EOF", 5)) { |
273 | 273 |
/* Nope, wasn't %%EOF */ |
274 |
- cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs); |
|
274 |
+ cli_dbgmsg("pdf_findobj: Failed to parse object objid (# objects found: %u)\n", pdf->nobjs); |
|
275 | 275 |
/* Skip past the "obj" thing, and continue. */ |
276 | 276 |
pdf->offset = q2 + 4 - pdf->map; |
277 |
- return 2; |
|
277 |
+ status = CL_EPARSE; |
|
278 |
+ goto done; |
|
278 | 279 |
} |
279 | 280 |
/* Yup, Looks, like the file continues after %%EOF. |
280 | 281 |
* Probably another revision. Keep parsing... */ |
281 | 282 |
q++; |
282 |
- cli_dbgmsg("cli_pdf: \%\%EOF detected before end of file, at %zu\n", (size_t)q); |
|
283 |
+ cli_dbgmsg("pdf_findobj: \%\%EOF detected before end of file, at %zu\n", (size_t)q); |
|
283 | 284 |
} else { |
284 | 285 |
/* Failed parsing at the very beginning */ |
285 |
- cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs); |
|
286 |
+ cli_dbgmsg("pdf_findobj: Failed to parse object objid (# objects found: %u)\n", pdf->nobjs); |
|
286 | 287 |
/* Probably not a real object. Skip past the "obj" thing, and continue. */ |
287 | 288 |
pdf->offset = q2 + 4 - pdf->map; |
288 |
- return 2; |
|
289 |
+ status = CL_EPARSE; |
|
290 |
+ goto done; |
|
289 | 291 |
} |
290 | 292 |
/* Try again, with offset slightly adjusted */ |
291 | 293 |
if (CL_SUCCESS != cli_strntoul_wrap(q, (size_t)(bytesleft + (q2-q)), 0, 10, &objid)) { |
292 |
- cli_dbgmsg("cli_pdf: Failed to parse object objid (%u)\n", pdf->nobjs); |
|
294 |
+ cli_dbgmsg("pdf_findobj: Failed to parse object objid (# objects found: %u)\n", pdf->nobjs); |
|
293 | 295 |
/* Still failed... Probably not a real object. Skip past the "obj" thing, and continue. */ |
294 | 296 |
pdf->offset = q2 + 4 - pdf->map; |
295 |
- return 2; |
|
297 |
+ status = CL_EPARSE; |
|
298 |
+ goto done; |
|
296 | 299 |
} |
297 |
- cli_dbgmsg("cli_pdf: There appears to be an additional revision. Continuing to parse...\n"); |
|
300 |
+ cli_dbgmsg("pdf_findobj: There appears to be an additional revision. Continuing to parse...\n"); |
|
298 | 301 |
} |
299 |
- obj->id = (objid << 8) | (genid&0xff); |
|
300 |
- obj->start = q2+4 - pdf->map; |
|
302 |
+ |
|
303 |
+ /* |
|
304 |
+ * Ok so we have the objid, genid, and "obj" string. |
|
305 |
+ * Time to store that information and then ... |
|
306 |
+ * ... investigate what kind of object this is. |
|
307 |
+ */ |
|
308 |
+ obj->id = (objid << 8) | (genid & 0xff); |
|
309 |
+ obj->start = q2+4 - pdf->map; /* obj start begins just after the "obj" string */ |
|
301 | 310 |
obj->flags = 0; |
311 |
+ |
|
302 | 312 |
bytesleft -= 4; |
303 | 313 |
eof = pdf->map + pdf->size; |
304 | 314 |
q = pdf->map + obj->start; |
305 | 315 |
|
306 |
- while (q < eof && bytesleft > 0) { |
|
316 |
+ while (q < eof && bytesleft > 0) |
|
317 |
+ { |
|
307 | 318 |
off_t p_stream, p_endstream; |
308 | 319 |
q2 = pdf_nextobject(q, bytesleft); |
309 | 320 |
if (!q2) |
310 |
- q2 = pdf->map + pdf->size; |
|
321 |
+ q2 = pdf->map + pdf->size; /* No interesting objects found, fast-forward to eof */ |
|
311 | 322 |
|
312 | 323 |
bytesleft -= q2 - q; |
313 | 324 |
if (find_stream_bounds(q-1, q2-q, bytesleft + (q2-q), &p_stream, &p_endstream, 1)) { |
325 |
+ /* |
|
326 |
+ * Found obj that contains a stream. |
|
327 |
+ */ |
|
314 | 328 |
obj->flags |= 1 << OBJ_STREAM; |
315 | 329 |
q2 = q-1 + p_endstream + 9; |
316 | 330 |
bytesleft -= q2 - q + 1; |
317 | 331 |
|
318 | 332 |
if (bytesleft < 0) { |
333 |
+ /* ... and the stream is truncated. Hmm... */ |
|
319 | 334 |
obj->flags |= 1 << OBJ_TRUNCATED; |
320 | 335 |
pdf->offset = pdf->size; |
321 |
- return 1;/* truncated */ |
|
336 |
+ |
|
337 |
+ status = CL_SUCCESS; |
|
338 |
+ goto done; /* Truncated file, no end to obj/stream. |
|
339 |
+ * The next call to pdf_findobj() will return no more objects. */ |
|
322 | 340 |
} |
323 | 341 |
} else if ((q3 = cli_memstr(q-1, q2-q+1, "endobj", 6))) { |
342 |
+ /* |
|
343 |
+ * obj found and offset positioned. ideal return case |
|
344 |
+ */ |
|
324 | 345 |
q2 = q3 + 6; |
325 |
- pdf->offset = q2 - pdf->map; |
|
326 |
- return 1; /* obj found and offset positioned */ |
|
346 |
+ pdf->offset = q2 - pdf->map; /* update the offset to just after the endobj */ |
|
347 |
+ |
|
348 |
+ status = CL_SUCCESS; |
|
349 |
+ goto done; |
|
327 | 350 |
} else { |
328 | 351 |
q2++; |
329 | 352 |
bytesleft--; |
... | ... |
@@ -335,7 +631,32 @@ int pdf_findobj(struct pdf_struct *pdf) |
335 | 335 |
obj->flags |= 1 << OBJ_TRUNCATED; |
336 | 336 |
pdf->offset = pdf->size; |
337 | 337 |
|
338 |
- return 1;/* truncated */ |
|
338 |
+ status = CL_SUCCESS; /* truncated file, no end to obj. */ |
|
339 |
+ |
|
340 |
+done: |
|
341 |
+ if (status == CL_SUCCESS) { |
|
342 |
+ cli_dbgmsg("pdf_findobj: found %d %d obj @%lld\n", obj->id >> 8, obj->id&0xff, (long long)(obj->start + pdf->startoff)); |
|
343 |
+ } |
|
344 |
+ else |
|
345 |
+ { |
|
346 |
+ if(status == CL_BREAK) { |
|
347 |
+ cli_dbgmsg("pdf_findobj: No more objects (# objects found: %u)\n", pdf->nobjs); |
|
348 |
+ } else if(status == CL_EMEM) { |
|
349 |
+ cli_warnmsg("pdf_findobj: Error allocating memory (# objects found: %u)\n", pdf->nobjs); |
|
350 |
+ } else { |
|
351 |
+ cli_dbgmsg("pdf_findobj: Unexpected status code %d.\n", status); |
|
352 |
+ } |
|
353 |
+ /* Remove the unused obj reference from our list of objects found */ |
|
354 |
+ /* No need to realloc pdf->objs back down. It won't leak. */ |
|
355 |
+ pdf->objs[pdf->nobjs-1] = NULL; |
|
356 |
+ pdf->nobjs--; |
|
357 |
+ |
|
358 |
+ /* Free up the obj struct. */ |
|
359 |
+ if (NULL != obj) |
|
360 |
+ free(obj); |
|
361 |
+ } |
|
362 |
+ |
|
363 |
+ return status; |
|
339 | 364 |
} |
340 | 365 |
|
341 | 366 |
static size_t filter_writen(struct pdf_struct *pdf, struct pdf_obj *obj, int fout, const char *buf, size_t len, size_t *sum) |
... | ... |
@@ -424,7 +745,7 @@ void pdfobj_flag(struct pdf_struct *pdf, struct pdf_obj *obj, enum pdf_flag flag |
424 | 424 |
break; |
425 | 425 |
} |
426 | 426 |
|
427 |
- cli_dbgmsg("cli_pdf: %s flagged in object %u %u\n", s, obj->id>>8, obj->id&0xff); |
|
427 |
+ cli_dbgmsg("pdfobj_flag: %s flagged in object %u %u\n", s, obj->id>>8, obj->id&0xff); |
|
428 | 428 |
} |
429 | 429 |
|
430 | 430 |
struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid) |
... | ... |
@@ -433,17 +754,20 @@ struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t o |
433 | 433 |
uint32_t i; |
434 | 434 |
|
435 | 435 |
/* search starting at previous obj (if exists) */ |
436 |
- i = (obj != pdf->objs) ? obj - pdf->objs : 0; |
|
436 |
+ for (i = 0; i < pdf->nobjs; i++) { |
|
437 |
+ if (pdf->objs[i] == obj) |
|
438 |
+ break; |
|
439 |
+ } |
|
437 | 440 |
|
438 |
- for (j=i;j<pdf->nobjs;j++) { |
|
439 |
- obj = &pdf->objs[j]; |
|
441 |
+ for (j = i; j < pdf->nobjs; j++) { |
|
442 |
+ obj = pdf->objs[j]; |
|
440 | 443 |
if (obj->id == objid) |
441 | 444 |
return obj; |
442 | 445 |
} |
443 | 446 |
|
444 | 447 |
/* restart search from beginning if not found */ |
445 |
- for (j=0;j<i;j++) { |
|
446 |
- obj = &pdf->objs[j]; |
|
448 |
+ for (j = 0; j < i; j++) { |
|
449 |
+ obj = pdf->objs[j]; |
|
447 | 450 |
if (obj->id == objid) |
448 | 451 |
return obj; |
449 | 452 |
} |
... | ... |
@@ -451,72 +775,173 @@ struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t o |
451 | 451 |
return NULL; |
452 | 452 |
} |
453 | 453 |
|
454 |
-static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *start, off_t len) |
|
454 |
+/** |
|
455 |
+ * @brief Find and interpret the "/Length" dictionary key value. |
|
456 |
+ * |
|
457 |
+ * The value may be: |
|
458 |
+ * - a direct object (i.e. just a number) |
|
459 |
+ * - an indirect object, where the value is somewhere else in the document and we have to look it up. |
|
460 |
+ * indirect objects are referenced using an object id (objid), generation id (genid) genid, and the letter 'R'. |
|
461 |
+ * |
|
462 |
+ * Example dictionary with a single key "/Length" that relies direct object for the value. |
|
463 |
+ * |
|
464 |
+ * 1 0 obj |
|
465 |
+ * << /Length 534 |
|
466 |
+ * /Filter [ /ASCII85Decode /LZWDecode ] |
|
467 |
+ * >> |
|
468 |
+ * stream |
|
469 |
+ * J..)6T`?p&<!J9%_[umg"B7/Z7KNXbN'S+,*Q/&"OLT'FLIDK#!n`$"<Atdi`\Vn%b%)&'cA*VnK\CJY(sF>c!Jnl@ |
|
470 |
+ * RM]WM;jjH6Gnc75idkL5]+cPZKEBPWdR>FF(kj1_R%W_d&/jS!;iuad7h?[L-F$+]]0A3Ck*$I0KZ?;<)CJtqi65Xb |
|
471 |
+ * Vc3\n5ua:Q/=0$W<#N3U;H,MQKqfg1?:lUpR;6oN[C2E4ZNr8Udn.'p+?#X+1>0Kuk$bCDF/(3fL5]Oq)^kJZ!C2H1 |
|
472 |
+ * 'TO]Rl?Q:&'<5&iP!$Rq;BXRecDN[IJB`,)o8XJOSJ9sDS]hQ;Rj@!ND)bD_q&C\g:inYC%)&u#:u,M6Bm%IY!Kb1+ |
|
473 |
+ * ":aAa'S`ViJglLb8<W9k6Yl\\0McJQkDeLWdPN?9A'jX*al>iG1p&i;eVoK&juJHs9%;Xomop"5KatWRT"JQ#qYuL, |
|
474 |
+ * JD?M$0QP)lKn06l1apKDC@\qJ4B!!(5m+j.7F790m(Vj88l8Q:_CZ(Gm1%X\N1&u!FKHMB~> |
|
475 |
+ * endstream |
|
476 |
+ * endobj |
|
477 |
+ * |
|
478 |
+ * Example dictionary with a single key "/Length" that relies on an indirect object for the value. |
|
479 |
+ * |
|
480 |
+ * 7 0 obj |
|
481 |
+ * << /Length 8 0 R >> % An indirect reference to object 8, with generation id 0. |
|
482 |
+ * stream |
|
483 |
+ * BT |
|
484 |
+ * /F1 12 Tf |
|
485 |
+ * 72 712 Td |
|
486 |
+ * ( A stream with an indirect length ) Tj |
|
487 |
+ * ET |
|
488 |
+ * endstream |
|
489 |
+ * endobj |
|
490 |
+ * |
|
491 |
+ * 8 0 obj |
|
492 |
+ * 77 % The length of the preceding stream |
|
493 |
+ * endobj |
|
494 |
+ * |
|
495 |
+ * @param pdf Pdf context structure. |
|
496 |
+ * @param obj Pdf object context structure. |
|
497 |
+ * @param start Pointer start of the dictionary string. |
|
498 |
+ * @param len Remaining length of the dictioary string in bytes. |
|
499 |
+ * @return size_t Unsigned integer value of the "/Length" key |
|
500 |
+ */ |
|
501 |
+static size_t find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *dict_start, size_t dict_len) |
|
455 | 502 |
{ |
456 |
- unsigned long length; |
|
457 |
- const char *q; |
|
503 |
+ size_t length = 0; |
|
504 |
+ const char *obj_start = dict_start; |
|
505 |
+ size_t bytes_remaining = dict_len; |
|
506 |
+ unsigned long length_ul = 0; |
|
507 |
+ const char *index; |
|
458 | 508 |
|
459 |
- q = cli_memstr(start, len, "/Length", 7); |
|
460 |
- if (!q) |
|
509 |
+ if (bytes_remaining < 8) { |
|
461 | 510 |
return 0; |
511 |
+ } |
|
462 | 512 |
|
463 |
- q++; |
|
464 |
- len -= q - start; |
|
465 |
- start = pdf_nextobject(q, len); |
|
466 |
- if (!start) |
|
513 |
+ /* |
|
514 |
+ * Find the "/Length" dictionary key |
|
515 |
+ */ |
|
516 |
+ index = cli_memstr(obj_start, bytes_remaining, "/Length", 7); |
|
517 |
+ if (!index) |
|
467 | 518 |
return 0; |
468 | 519 |
|
469 |
- len -= start - q; |
|
470 |
- q = start; |
|
471 |
- if (CL_SUCCESS != cli_strntoul_wrap(q, (size_t)len, 0, 10, &length)) { |
|
472 |
- cli_dbgmsg("cli_pdf: failed to parse object length\n"); |
|
520 |
+ if (bytes_remaining < 1) { |
|
473 | 521 |
return 0; |
474 | 522 |
} |
475 | 523 |
|
476 |
- while (isdigit(*q) && len > 0) { |
|
477 |
- q++; |
|
478 |
- len--; |
|
524 |
+ /* Step the index into the "/Length" string. */ |
|
525 |
+ index++; |
|
526 |
+ bytes_remaining -= index - obj_start; |
|
527 |
+ |
|
528 |
+ /* Find the start of the next direct or indirect object. |
|
529 |
+ * pdf_nextobject() assumes we started searching from within a previous object */ |
|
530 |
+ obj_start = pdf_nextobject(index, bytes_remaining); |
|
531 |
+ if (!obj_start) |
|
532 |
+ return 0; |
|
533 |
+ |
|
534 |
+ if (bytes_remaining < obj_start - index) { |
|
535 |
+ return 0; |
|
536 |
+ } |
|
537 |
+ bytes_remaining -= obj_start - index; |
|
538 |
+ index = obj_start; |
|
539 |
+ |
|
540 |
+ /* Read the value. This could either be the direct length value, |
|
541 |
+ or the object id of the indirect object that has the length */ |
|
542 |
+ if (CL_SUCCESS != cli_strntoul_wrap(index, bytes_remaining, 0, 10, &length_ul)) { |
|
543 |
+ cli_dbgmsg("find_length: failed to parse object length\n"); |
|
544 |
+ return 0; |
|
545 |
+ } |
|
546 |
+ length = length_ul; /* length or maybe object id */ |
|
547 |
+ |
|
548 |
+ /* |
|
549 |
+ * Keep parsing, skipping past the first integer that might have been what we wanted. |
|
550 |
+ * If it's an indirect object, we'll find a Generation ID followed by the letter 'R' |
|
551 |
+ * I.e. something like " 0 R" |
|
552 |
+ */ |
|
553 |
+ while ((bytes_remaining > 0) && isdigit(*index)) { |
|
554 |
+ index++; |
|
555 |
+ bytes_remaining--; |
|
479 | 556 |
} |
480 | 557 |
|
481 |
- if (*q == ' ' && len > 0) { |
|
558 |
+ if ((bytes_remaining > 0) && (*index == ' ')) { |
|
482 | 559 |
unsigned long genid; |
483 |
- q++; |
|
484 |
- len--; |
|
485 |
- if (CL_SUCCESS != cli_strntoul_wrap(q, (size_t)len, 0, 10, &genid)) { |
|
486 |
- cli_dbgmsg("cli_pdf: failed to parse object genid\n"); |
|
560 |
+ |
|
561 |
+ index++; |
|
562 |
+ bytes_remaining--; |
|
563 |
+ |
|
564 |
+ if (CL_SUCCESS != cli_strntoul_wrap(index, bytes_remaining, 0, 10, &genid)) { |
|
565 |
+ cli_dbgmsg("find_length: failed to parse object genid\n"); |
|
487 | 566 |
return 0; |
488 | 567 |
} |
489 | 568 |
|
490 |
- while(isdigit(*q) && len > 0) { |
|
491 |
- q++; |
|
492 |
- len--; |
|
569 |
+ while((bytes_remaining > 0) && isdigit(*index)) { |
|
570 |
+ index++; |
|
571 |
+ bytes_remaining--; |
|
572 |
+ } |
|
573 |
+ |
|
574 |
+ if (bytes_remaining < 2) { |
|
575 |
+ return 0; |
|
493 | 576 |
} |
494 | 577 |
|
495 |
- if (q[0] == ' ' && q[1] == 'R') { |
|
496 |
- cli_dbgmsg("cli_pdf: length is in indirect object %lu %lu\n", length, genid); |
|
578 |
+ if (index[0] == ' ' && index[1] == 'R') { |
|
579 |
+ /* |
|
580 |
+ * Ok so we found a genid and that 'R'. Which means that first value |
|
581 |
+ * was actually the objid. |
|
582 |
+ * We can look up the indirect object using this information. |
|
583 |
+ */ |
|
584 |
+ unsigned long objid = length; |
|
585 |
+ const char* indirect_obj_start = NULL; |
|
586 |
+ |
|
587 |
+ cli_dbgmsg("find_length: length is in indirect object %lu %lu\n", objid, genid); |
|
497 | 588 |
|
498 | 589 |
obj = find_obj(pdf, obj, (length << 8) | (genid&0xff)); |
499 | 590 |
if (!obj) { |
500 |
- cli_dbgmsg("cli_pdf: indirect object not found\n"); |
|
591 |
+ cli_dbgmsg("find_length: indirect object not found\n"); |
|
501 | 592 |
return 0; |
502 | 593 |
} |
503 | 594 |
|
504 |
- q = pdf_nextobject(pdf->map+obj->start, pdf->size - obj->start); |
|
505 |
- if (!q) { |
|
506 |
- cli_dbgmsg("cli_pdf: next object not found\n"); |
|
595 |
+ indirect_obj_start = pdf->map + obj->start; |
|
596 |
+ bytes_remaining = pdf->size - obj->start; |
|
597 |
+ |
|
598 |
+ /* Ok so we found the indirect object, lets read the value. */ |
|
599 |
+ index = pdf_nextobject(indirect_obj_start, bytes_remaining); |
|
600 |
+ if (!index) { |
|
601 |
+ cli_dbgmsg("find_length: next object not found\n"); |
|
507 | 602 |
return 0; |
508 | 603 |
} |
604 |
+ |
|
605 |
+ if (bytes_remaining < index - indirect_obj_start) { |
|
606 |
+ return 0; |
|
607 |
+ } |
|
608 |
+ bytes_remaining -= index - indirect_obj_start; |
|
509 | 609 |
|
510 |
- if (CL_SUCCESS != cli_strntoul_wrap(q, (size_t)len, 0, 10, &length)) { |
|
511 |
- cli_dbgmsg("cli_pdf: failed to parse object length from indirect object\n"); |
|
610 |
+ /* Found the value, so lets parse it as an unsigned long */ |
|
611 |
+ if (CL_SUCCESS != cli_strntoul_wrap(index, bytes_remaining, 0, 10, &length)) { |
|
612 |
+ cli_dbgmsg("find_length: failed to parse object length from indirect object\n"); |
|
512 | 613 |
return 0; |
513 | 614 |
} |
514 | 615 |
} |
515 | 616 |
} |
516 | 617 |
|
517 | 618 |
/* limit length */ |
518 |
- if (start - pdf->map + length+5 > pdf->size) |
|
519 |
- length = pdf->size - (start - pdf->map)-5; |
|
619 |
+ if (obj_start - pdf->map + length + 5 > pdf->size) |
|
620 |
+ length = pdf->size - (obj_start - pdf->map) - 5; |
|
520 | 621 |
|
521 | 622 |
return length; |
522 | 623 |
} |
... | ... |
@@ -525,36 +950,98 @@ static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char * |
525 | 525 |
|
526 | 526 |
static int obj_size(struct pdf_struct *pdf, struct pdf_obj *obj, int binary) |
527 | 527 |
{ |
528 |
- unsigned i = obj - pdf->objs; |
|
528 |
+ if (0 == obj->size) |
|
529 |
+ { |
|
530 |
+ /* |
|
531 |
+ * Programmatically determine size if not already known. |
|
532 |
+ */ |
|
533 |
+ unsigned i = 0; |
|
534 |
+ |
|
535 |
+ /* Find the index of the current object */ |
|
536 |
+ for (i = 0; i < pdf->nobjs; i++) { |
|
537 |
+ if (pdf->objs[i] == obj) |
|
538 |
+ break; |
|
539 |
+ } |
|
540 |
+ |
|
541 |
+ /* Find the next object that exists in the same buffer (pdf fmap, or object stream) */ |
|
542 |
+ if (i < pdf->nobjs) { |
|
543 |
+ i++; |
|
544 |
+ } |
|
545 |
+ |
|
546 |
+ if (obj->objstm == NULL) { |
|
547 |
+ /* Current object isn't in an object stream, we want to find |
|
548 |
+ * the next object that also isn't in an object stream. */ |
|
549 |
+ for ( ; i < pdf->nobjs; i++) { |
|
550 |
+ if (pdf->objs[i]->objstm == NULL) |
|
551 |
+ break; |
|
552 |
+ } |
|
553 |
+ } else { |
|
554 |
+ /* Current object is in an object stream, we want to find |
|
555 |
+ * the next object that is in the same object stream. |
|
556 |
+ * |
|
557 |
+ * This really shouldn't happen, so throw a warning and |
|
558 |
+ * then see if we can solve it anyhow */ |
|
559 |
+ cli_warnmsg("obj_size: Encountered pdf object in an object stream that has an unknown size!!\n"); |
|
560 |
+ |
|
561 |
+ for ( ; i < pdf->nobjs; i++) { |
|
562 |
+ if (pdf->objs[i]->objstm == obj->objstm) |
|
563 |
+ break; |
|
564 |
+ } |
|
565 |
+ } |
|
566 |
+ |
|
567 |
+ /* Step backwards from the "next" object to find the end of the current object */ |
|
568 |
+ if (i < pdf->nobjs) { |
|
569 |
+ int s = pdf->objs[i]->start - obj->start - 4; |
|
570 |
+ if (s > 0) { |
|
571 |
+ if (!binary) { |
|
572 |
+ const char *p = NULL; |
|
573 |
+ const char *q = NULL; |
|
574 |
+ |
|
575 |
+ if (obj->objstm == NULL) { |
|
576 |
+ p = pdf->map + obj->start; |
|
577 |
+ } else { |
|
578 |
+ p = obj->objstm->streambuf + obj->start; |
|
579 |
+ } |
|
580 |
+ q = p + s; |
|
529 | 581 |
|
530 |
- i++; |
|
531 |
- if (i < pdf->nobjs) { |
|
532 |
- int s = pdf->objs[i].start - obj->start - 4; |
|
533 |
- if (s > 0) { |
|
534 |
- if (!binary) { |
|
535 |
- const char *p = pdf->map + obj->start; |
|
536 |
- const char *q = p + s; |
|
582 |
+ while (q > p && (isspace(*q) || isdigit(*q))) |
|
583 |
+ q--; |
|
537 | 584 |
|
538 |
- while (q > p && (isspace(*q) || isdigit(*q))) |
|
539 |
- q--; |
|
585 |
+ if (q > p+5 && !memcmp(q-5,"endobj",6)) |
|
586 |
+ q -= 6; |
|
540 | 587 |
|
541 |
- if (q > p+5 && !memcmp(q-5,"endobj",6)) |
|
542 |
- q -= 6; |
|
588 |
+ q = findNextNonWSBack(q, p); |
|
589 |
+ q++; |
|
543 | 590 |
|
544 |
- q = findNextNonWSBack(q, p); |
|
545 |
- q++; |
|
591 |
+ obj->size = q - p; |
|
592 |
+ goto done; |
|
593 |
+ } |
|
546 | 594 |
|
547 |
- return q - p; |
|
595 |
+ obj->size = s; |
|
596 |
+ goto done; |
|
548 | 597 |
} |
598 |
+ } |
|
549 | 599 |
|
550 |
- return s; |
|
600 |
+ /* If we've gotten this far, we didn't find a "next" object... so our |
|
601 |
+ * current object must be at the end of the pdf fmap or the end of the |
|
602 |
+ * object stream. */ |
|
603 |
+ if (obj->objstm == NULL) { |
|
604 |
+ /* Current object isn't in an object stream, so we can determine object |
|
605 |
+ * size based on the remaining size of the file (in theory). */ |
|
606 |
+ if (binary) |
|
607 |
+ obj->size = pdf->size - obj->start; |
|
608 |
+ else |
|
609 |
+ obj->size = pdf->offset - obj->start - 6; /* This hack I think assumes that we reached the end of the file when finding objects. */ |
|
610 |
+ } else { |
|
611 |
+ /* Current object is in an object stream, we want to find |
|
612 |
+ * the next object that is in the same object stream. */ |
|
613 |
+ obj->size = obj->objstm->streambuf_len - obj->start; |
|
551 | 614 |
} |
552 | 615 |
} |
553 | 616 |
|
554 |
- if (binary) |
|
555 |
- return pdf->size - obj->start; |
|
617 |
+done: |
|
556 | 618 |
|
557 |
- return pdf->offset - obj->start - 6; |
|
619 |
+ return obj->size; |
|
558 | 620 |
} |
559 | 621 |
|
560 | 622 |
static int run_pdf_hooks(struct pdf_struct *pdf, enum pdf_phase phase, int fd, int dumpid) |
... | ... |
@@ -568,7 +1055,7 @@ static int run_pdf_hooks(struct pdf_struct *pdf, enum pdf_phase phase, int fd, i |
568 | 568 |
|
569 | 569 |
bc_ctx = cli_bytecode_context_alloc(); |
570 | 570 |
if (!bc_ctx) { |
571 |
- cli_errmsg("cli_pdf: can't allocate memory for bc_ctx"); |
|
571 |
+ cli_errmsg("run_pdf_hooks: can't allocate memory for bc_ctx\n"); |
|
572 | 572 |
return CL_EMEM; |
573 | 573 |
} |
574 | 574 |
|
... | ... |
@@ -576,7 +1063,7 @@ static int run_pdf_hooks(struct pdf_struct *pdf, enum pdf_phase phase, int fd, i |
576 | 576 |
if (fd != -1) { |
577 | 577 |
map = fmap(fd, 0, 0); |
578 | 578 |
if (!map) { |
579 |
- cli_dbgmsg("can't mmap pdf extracted obj\n"); |
|
579 |
+ cli_dbgmsg("run_pdf_hooks: can't mmap pdf extracted obj\n"); |
|
580 | 580 |
map = *ctx->fmap; |
581 | 581 |
fd = -1; |
582 | 582 |
} |
... | ... |
@@ -603,15 +1090,15 @@ static void aes_decrypt(const unsigned char *in, size_t *length, unsigned char * |
603 | 603 |
unsigned char pad, i; |
604 | 604 |
int nrounds; |
605 | 605 |
|
606 |
- cli_dbgmsg("cli_pdf: aes_decrypt: key length: %d, data length: %zu\n", key_n, *length); |
|
606 |
+ cli_dbgmsg("aes_decrypt: key length: %d, data length: %zu\n", key_n, *length); |
|
607 | 607 |
if (key_n > 32) { |
608 |
- cli_dbgmsg("cli_pdf: aes_decrypt: key length is %d!\n", key_n*8); |
|
608 |
+ cli_dbgmsg("aes_decrypt: key length is %d!\n", key_n*8); |
|
609 | 609 |
return; |
610 | 610 |
} |
611 | 611 |
|
612 | 612 |
if (len < 32) { |
613 |
- cli_dbgmsg("cli_pdf: aes_decrypt: len is <32: %zu\n", len); |
|
614 |
- noisy_warnmsg("cli_pdf: aes_decrypt: len is <32: %zu\n", len); |
|
613 |
+ cli_dbgmsg("aes_decrypt: len is <32: %zu\n", len); |
|
614 |
+ noisy_warnmsg("aes_decrypt: len is <32: %zu\n", len); |
|
615 | 615 |
return; |
616 | 616 |
} |
617 | 617 |
|
... | ... |
@@ -626,7 +1113,7 @@ static void aes_decrypt(const unsigned char *in, size_t *length, unsigned char * |
626 | 626 |
cli_dbgmsg("aes_decrypt: Calling rijndaelSetupDecrypt\n"); |
627 | 627 |
nrounds = rijndaelSetupDecrypt(rk, (const unsigned char *)key, key_n*8); |
628 | 628 |
if (!nrounds) { |
629 |
- cli_dbgmsg("cli_pdf: aes_decrypt: nrounds = 0\n"); |
|
629 |
+ cli_dbgmsg("aes_decrypt: nrounds = 0\n"); |
|
630 | 630 |
return; |
631 | 631 |
} |
632 | 632 |
cli_dbgmsg("aes_decrypt: Beginning rijndaelDecrypt\n"); |
... | ... |
@@ -649,8 +1136,8 @@ static void aes_decrypt(const unsigned char *in, size_t *length, unsigned char * |
649 | 649 |
pad = q[-1]; |
650 | 650 |
|
651 | 651 |
if (pad > 0x10) { |
652 |
- cli_dbgmsg("cli_pdf: aes_decrypt: bad pad: %x (extra len: %zu)\n", pad, len-16); |
|
653 |
- noisy_warnmsg("cli_pdf: aes_decrypt: bad pad: %x (extra len: %zu)\n", pad, len-16); |
|
652 |
+ cli_dbgmsg("aes_decrypt: bad pad: %x (extra len: %zu)\n", pad, len-16); |
|
653 |
+ noisy_warnmsg("aes_decrypt: bad pad: %x (extra len: %zu)\n", pad, len-16); |
|
654 | 654 |
*length -= len; |
655 | 655 |
return; |
656 | 656 |
} |
... | ... |
@@ -658,8 +1145,8 @@ static void aes_decrypt(const unsigned char *in, size_t *length, unsigned char * |
658 | 658 |
q -= pad; |
659 | 659 |
for (i=1;i<pad;i++) { |
660 | 660 |
if (q[i] != pad) { |
661 |
- cli_dbgmsg("cli_pdf: aes_decrypt: bad pad: %x != %x\n",q[i],pad); |
|
662 |
- noisy_warnmsg("cli_pdf: aes_decrypt: bad pad: %x != %x\n",q[i],pad); |
|
661 |
+ cli_dbgmsg("aes_decrypt: bad pad: %x != %x\n",q[i],pad); |
|
662 |
+ noisy_warnmsg("aes_decrypt: bad pad: %x != %x\n",q[i],pad); |
|
663 | 663 |
*length -= len; |
664 | 664 |
|
665 | 665 |
return; |
... | ... |
@@ -671,7 +1158,7 @@ static void aes_decrypt(const unsigned char *in, size_t *length, unsigned char * |
671 | 671 |
|
672 | 672 |
*length -= len; |
673 | 673 |
|
674 |
- cli_dbgmsg("cli_pdf: aes_decrypt: length is %zu\n", *length); |
|
674 |
+ cli_dbgmsg("aes_decrypt: length is %zu\n", *length); |
|
675 | 675 |
} |
676 | 676 |
|
677 | 677 |
|
... | ... |
@@ -682,7 +1169,7 @@ char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, size_t *l |
682 | 682 |
struct arc4_state arc4; |
683 | 683 |
|
684 | 684 |
if (!length || !*length || !in) { |
685 |
- noisy_warnmsg("decrypt failed for obj %u %u\n", id>>8, id&0xff); |
|
685 |
+ noisy_warnmsg("decrypt_any: decrypt failed for obj %u %u\n", id>>8, id&0xff); |
|
686 | 686 |
return NULL; |
687 | 687 |
} |
688 | 688 |
|
... | ... |
@@ -726,20 +1213,20 @@ char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, size_t *l |
726 | 726 |
arc4_init(&arc4, result, n); |
727 | 727 |
arc4_apply(&arc4, q, (unsigned)*length); /* TODO: may truncate for very large lengths */ |
728 | 728 |
|
729 |
- noisy_msg(pdf, "decrypted ARC4 data\n"); |
|
729 |
+ noisy_msg(pdf, "decrypt_any: decrypted ARC4 data\n"); |
|
730 | 730 |
|
731 | 731 |
break; |
732 | 732 |
case ENC_AESV2: |
733 | 733 |
cli_dbgmsg("cli_pdf: enc is aesv2\n"); |
734 | 734 |
aes_decrypt((const unsigned char *)in, length, q, (char *)result, n, 1); |
735 | 735 |
|
736 |
- noisy_msg(pdf, "decrypted AES(v2) data\n"); |
|
736 |
+ noisy_msg(pdf, "decrypt_any: decrypted AES(v2) data\n"); |
|
737 | 737 |
|
738 | 738 |
break; |
739 | 739 |
case ENC_AESV3: |
740 |
- cli_dbgmsg("cli_pdf: enc is aesv3\n"); |
|
740 |
+ cli_dbgmsg("decrypt_any: enc is aesv3\n"); |
|
741 | 741 |
if (pdf->keylen == 0) { |
742 |
- cli_dbgmsg("cli_pdf: no key\n"); |
|
742 |
+ cli_dbgmsg("decrypt_any: no key\n"); |
|
743 | 743 |
return NULL; |
744 | 744 |
} |
745 | 745 |
|
... | ... |
@@ -749,21 +1236,21 @@ char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, size_t *l |
749 | 749 |
|
750 | 750 |
break; |
751 | 751 |
case ENC_IDENTITY: |
752 |
- cli_dbgmsg("cli_pdf: enc is identity\n"); |
|
752 |
+ cli_dbgmsg("decrypt_any: enc is identity\n"); |
|
753 | 753 |
memcpy(q, in, *length); |
754 | 754 |
|
755 |
- noisy_msg(pdf, "identity encryption\n"); |
|
755 |
+ noisy_msg(pdf, "decrypt_any: identity encryption\n"); |
|
756 | 756 |
|
757 | 757 |
break; |
758 | 758 |
case ENC_NONE: |
759 |
- cli_dbgmsg("cli_pdf: enc is none\n"); |
|
759 |
+ cli_dbgmsg("decrypt_any: enc is none\n"); |
|
760 | 760 |
|
761 | 761 |
noisy_msg(pdf, "encryption is none\n"); |
762 | 762 |
|
763 | 763 |
free(q); |
764 | 764 |
return NULL; |
765 | 765 |
case ENC_UNKNOWN: |
766 |
- cli_dbgmsg("cli_pdf: enc is unknown\n"); |
|
766 |
+ cli_dbgmsg("decrypt_any: enc is unknown\n"); |
|
767 | 767 |
free(q); |
768 | 768 |
|
769 | 769 |
noisy_warnmsg("decrypt_any: unknown encryption method for obj %u %u\n", |
... | ... |
@@ -838,7 +1325,8 @@ static int pdf_scan_contents(int fd, struct pdf_struct *pdf) |
838 | 838 |
char fullname[1024]; |
839 | 839 |
char outbuff[BUFSIZ]; |
840 | 840 |
char inbuf[BUFSIZ]; |
841 |
- int fout, n, rc; |
|
841 |
+ int fout, n; |
|
842 |
+ cl_error_t rc; |
|
842 | 843 |
enum cstate st = CSTATE_NONE; |
843 | 844 |
|
844 | 845 |
snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u_c", pdf->dir, (pdf->files-1)); |
... | ... |
@@ -846,7 +1334,7 @@ static int pdf_scan_contents(int fd, struct pdf_struct *pdf) |
846 | 846 |
if (fout < 0) { |
847 | 847 |
char err[128]; |
848 | 848 |
|
849 |
- cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err))); |
|
849 |
+ cli_errmsg("pdf_scan_contents: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err))); |
|
850 | 850 |
return CL_ETMPFILE; |
851 | 851 |
} |
852 | 852 |
|
... | ... |
@@ -872,20 +1360,19 @@ static int pdf_scan_contents(int fd, struct pdf_struct *pdf) |
872 | 872 |
return rc; |
873 | 873 |
} |
874 | 874 |
|
875 |
-static const char *pdf_getdict(const char *q0, int* len, const char *key); |
|
876 |
-static char *pdf_readval(const char *q, int len, const char *key); |
|
877 |
-static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen, const char **qend, int noescape); |
|
878 |
- |
|
879 | 875 |
int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
880 | 876 |
{ |
881 | 877 |
char fullname[NAME_MAX + 1]; |
882 | 878 |
int fout; |
883 | 879 |
ptrdiff_t sum = 0; |
884 |
- int rc = CL_SUCCESS; |
|
880 |
+ cl_error_t rc = CL_SUCCESS; |
|
885 | 881 |
int dump = 1; |
886 | 882 |
|
887 | 883 |
cli_dbgmsg("pdf_extract_obj: obj %u %u\n", obj->id>>8, obj->id&0xff); |
888 | 884 |
|
885 |
+ if (obj->objstm) |
|
886 |
+ cli_dbgmsg("pdf_extract_obj: extracting obj found in objstm.\n"); |
|
887 |
+ |
|
889 | 888 |
/* TODO: call bytecode hook here, allow override dumpability */ |
890 | 889 |
if ((!(obj->flags & (1 << OBJ_STREAM)) || (obj->flags & (1 << OBJ_HASFILTERS))) && !(obj->flags & DUMP_MASK)) { |
891 | 890 |
/* don't dump all streams */ |
... | ... |
@@ -905,13 +1392,13 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
905 | 905 |
if (!dump) |
906 | 906 |
return CL_CLEAN; |
907 | 907 |
|
908 |
- cli_dbgmsg("cli_pdf: dumping obj %u %u\n", obj->id>>8, obj->id&0xff); |
|
908 |
+ cli_dbgmsg("pdf_extract_obj: dumping obj %u %u\n", obj->id>>8, obj->id&0xff); |
|
909 | 909 |
|
910 | 910 |
snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u", pdf->dir, pdf->files++); |
911 | 911 |
fout = open(fullname,O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600); |
912 | 912 |
if (fout < 0) { |
913 | 913 |
char err[128]; |
914 |
- cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err))); |
|
914 |
+ cli_errmsg("pdf_extract_obj: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err))); |
|
915 | 915 |
|
916 | 916 |
return CL_ETMPFILE; |
917 | 917 |
} |
... | ... |
@@ -925,6 +1412,11 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
925 | 925 |
off_t p_stream = 0, p_endstream = 0; |
926 | 926 |
off_t length; |
927 | 927 |
|
928 |
+ if (NULL != obj->objstm) { |
|
929 |
+ cli_warnmsg("pdf_extract_obj: Object found in object stream claims to be an object stream! Skipping.\n"); |
|
930 |
+ break; |
|
931 |
+ } |
|
932 |
+ |
|
928 | 933 |
find_stream_bounds(start, pdf->size - obj->start, |
929 | 934 |
pdf->size - obj->start, |
930 | 935 |
&p_stream, &p_endstream, |
... | ... |
@@ -937,6 +1429,7 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
937 | 937 |
int len = p_stream; |
938 | 938 |
const char *pstr; |
939 | 939 |
struct pdf_dict *dparams = NULL; |
940 |
+ struct objstm_struct *objstm = NULL; |
|
940 | 941 |
int xref = 0; |
941 | 942 |
|
942 | 943 |
length = find_length(pdf, obj, start, p_stream); |
... | ... |
@@ -970,7 +1463,7 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
970 | 970 |
if (length < 0) |
971 | 971 |
length = 0; |
972 | 972 |
|
973 |
- cli_dbgmsg("cli_pdf: calculated length %lld\n", (long long)length); |
|
973 |
+ cli_dbgmsg("pdf_extract_obj: calculated length %lld\n", (long long)length); |
|
974 | 974 |
} else { |
975 | 975 |
if (size > (size_t)length+2) { |
976 | 976 |
cli_dbgmsg("cli_pdf: calculated length %zu < %zu\n", |
... | ... |
@@ -980,7 +1473,7 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
980 | 980 |
} |
981 | 981 |
|
982 | 982 |
if (orig_length && size > (size_t)orig_length + 20) { |
983 |
- cli_dbgmsg("cli_pdf: orig length: %lld, length: %lld, size: %zu\n", |
|
983 |
+ cli_dbgmsg("pdf_extract_obj: orig length: %lld, length: %lld, size: %zu\n", |
|
984 | 984 |
(long long)orig_length, (long long)length, size); |
985 | 985 |
pdfobj_flag(pdf, obj, BAD_STREAMLEN); |
986 | 986 |
} |
... | ... |
@@ -998,12 +1491,20 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
998 | 998 |
|
999 | 999 |
cli_dbgmsg("-------------EXPERIMENTAL-------------\n"); |
1000 | 1000 |
|
1001 |
- pstr = pdf_getdict(start, &len, "/DecodeParms"); |
|
1002 |
- if (!pstr) |
|
1003 |
- pstr = pdf_getdict(start, &len, "/DP"); |
|
1001 |
+ /* |
|
1002 |
+ * Identify the DecodeParms, if available. |
|
1003 |
+ */ |
|
1004 |
+ if (NULL != (pstr = pdf_getdict(start, &len, "/DecodeParms"))) |
|
1005 |
+ { |
|
1006 |
+ cli_dbgmsg("pdf_extract_obj: Found /DecodeParms\n"); |
|
1007 |
+ } |
|
1008 |
+ else if (NULL != (pstr = pdf_getdict(start, &len, "/DP"))) |
|
1009 |
+ { |
|
1010 |
+ cli_dbgmsg("pdf_extract_obj: Found /DP\n"); |
|
1011 |
+ } |
|
1004 | 1012 |
|
1005 | 1013 |
if (pstr) { |
1006 |
- unsigned int objsz = obj_size(pdf, obj, 1); |
|
1014 |
+ unsigned int objsize = obj_size(pdf, obj, 1); |
|
1007 | 1015 |
|
1008 | 1016 |
/* shift pstr left to "<<" for pdf_parse_dict */ |
1009 | 1017 |
while ((*pstr == '<') && (pstr > start)) { |
... | ... |
@@ -1018,12 +1519,102 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
1018 | 1018 |
} |
1019 | 1019 |
|
1020 | 1020 |
if (len > 4) |
1021 |
- dparams = pdf_parse_dict(pdf, obj, objsz, (char *)pstr, NULL); |
|
1021 |
+ dparams = pdf_parse_dict(pdf, obj, objsize, (char *)pstr, NULL); |
|
1022 | 1022 |
else |
1023 |
- cli_dbgmsg("cli_pdf: failed to locate DecodeParms dictionary start\n"); |
|
1023 |
+ cli_dbgmsg("pdf_extract_obj: failed to locate DecodeParms dictionary start\n"); |
|
1024 |
+ } |
|
1025 |
+ |
|
1026 |
+ /* |
|
1027 |
+ * Identify if the stream is an object stream. If so, collect the relevant info. |
|
1028 |
+ */ |
|
1029 |
+ len = p_stream; |
|
1030 |
+ if (NULL != (pstr = pdf_getdict(start, &len, "/Type/ObjStm"))) |
|
1031 |
+ { |
|
1032 |
+ int32_t objstm_first = -1; |
|
1033 |
+ int32_t objstm_length = -1; |
|
1034 |
+ int32_t objstm_n = -1; |
|
1035 |
+ |
|
1036 |
+ cli_dbgmsg("pdf_extract_obj: Found /Type/ObjStm\n"); |
|
1037 |
+ |
|
1038 |
+ len = p_stream; |
|
1039 |
+ if ((-1 == (objstm_first = pdf_readint(start, len, "/First")))) |
|
1040 |
+ { |
|
1041 |
+ cli_warnmsg("pdf_extract_obj: Failed to find offset of first object in object stream\n"); |
|
1042 |
+ } |
|
1043 |
+ else if ((-1 == (objstm_length = pdf_readint(start, len, "/Length")))) |
|
1044 |
+ { |
|
1045 |
+ cli_warnmsg("pdf_extract_obj: Failed to find length of object stream\n"); |
|
1046 |
+ } |
|
1047 |
+ else if ((-1 == (objstm_n = pdf_readint(start, len, "/N")))) |
|
1048 |
+ { |
|
1049 |
+ cli_warnmsg("pdf_extract_obj: Failed to find num objects in object stream\n"); |
|
1050 |
+ } |
|
1051 |
+ else |
|
1052 |
+ { |
|
1053 |
+ /* Add objstm to pdf struct, so it can be freed eventually */ |
|
1054 |
+ pdf->nobjstms++; |
|
1055 |
+ pdf->objstms = cli_realloc2(pdf->objstms, sizeof(struct objstm_struct*) * pdf->nobjstms); |
|
1056 |
+ if (!pdf->objstms) { |
|
1057 |
+ cli_warnmsg("pdf_extract_obj: out of memory parsing object stream (%u)\n", pdf->nobjstms); |
|
1058 |
+ pdf_free_dict(dparams); |
|
1059 |
+ return CL_EMEM; |
|
1060 |
+ } |
|
1061 |
+ |
|
1062 |
+ objstm = malloc(sizeof(struct objstm_struct)); |
|
1063 |
+ if (!objstm) { |
|
1064 |
+ cli_warnmsg("pdf_extract_obj: out of memory parsing object stream (%u)\n", pdf->nobjstms); |
|
1065 |
+ pdf_free_dict(dparams); |
|
1066 |
+ return CL_EMEM; |
|
1067 |
+ } |
|
1068 |
+ pdf->objstms[pdf->nobjstms-1] = objstm; |
|
1069 |
+ |
|
1070 |
+ memset(objstm, 0, sizeof(*objstm)); |
|
1071 |
+ |
|
1072 |
+ objstm->first = (uint32_t)objstm_first; |
|
1073 |
+ objstm->current = (uint32_t)objstm_first; |
|
1074 |
+ objstm->current_pair = 0; |
|
1075 |
+ objstm->length = (uint32_t)objstm_length; |
|
1076 |
+ objstm->n = (uint32_t)objstm_n; |
|
1077 |
+ |
|
1078 |
+ cli_dbgmsg("pdf_extract_obj: ObjStm first obj at offset %d\n", objstm->first); |
|
1079 |
+ cli_dbgmsg("pdf_extract_obj: ObjStm length is %d bytes\n", objstm->length); |
|
1080 |
+ cli_dbgmsg("pdf_extract_obj: ObjStm should contain %d objects\n", objstm->n); |
|
1081 |
+ } |
|
1082 |
+ } |
|
1083 |
+ |
|
1084 |
+ sum = pdf_decodestream(pdf, obj, dparams, start + p_stream, (uint32_t)length, xref, fout, &rc, objstm); |
|
1085 |
+ if (sum < 0) { |
|
1086 |
+ /* |
|
1087 |
+ * If we were expecting an objstm and there was a failure... |
|
1088 |
+ * discard the memory for last object stream. |
|
1089 |
+ */ |
|
1090 |
+ if (NULL != objstm) |
|
1091 |
+ { |
|
1092 |
+ if (NULL != pdf->objstms) { |
|
1093 |
+ if (NULL != pdf->objstms[pdf->nobjstms - 1]) { |
|
1094 |
+ pdf->objstms[pdf->nobjstms - 1]->streambuf = NULL; |
|
1095 |
+ |
|
1096 |
+ free(pdf->objstms[pdf->nobjstms - 1]); |
|
1097 |
+ pdf->objstms[pdf->nobjstms - 1] = NULL; |
|
1098 |
+ } |
|
1099 |
+ |
|
1100 |
+ /* Pop the objstm off the end of the pdf->objstms array. */ |
|
1101 |
+ if (pdf->nobjstms > 0) { |
|
1102 |
+ pdf->nobjstms--; |
|
1103 |
+ pdf->objstms = cli_realloc2(pdf->objstms, sizeof(struct objstm_struct*) * pdf->nobjstms); |
|
1104 |
+ |
|
1105 |
+ if (!pdf->objstms) { |
|
1106 |
+ cli_warnmsg("pdf_extract_obj: out of memory when shrinking down objstm array\n"); |
|
1107 |
+ return CL_EMEM; |
|
1108 |
+ } |
|
1109 |
+ } else { |
|
1110 |
+ /* hm.. this shouldn't happen */ |
|
1111 |
+ cli_warnmsg("pdf_extract_obj: Failure counting objstms.\n"); |
|
1112 |
+ } |
|
1113 |
+ } |
|
1114 |
+ } |
|
1024 | 1115 |
} |
1025 | 1116 |
|
1026 |
- sum = pdf_decodestream(pdf, obj, dparams, start + p_stream, (uint32_t)length, xref, fout, &rc); |
|
1027 | 1117 |
if (dparams) |
1028 | 1118 |
pdf_free_dict(dparams); |
1029 | 1119 |
|
... | ... |
@@ -1034,14 +1625,17 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
1034 | 1034 |
|
1035 | 1035 |
cli_dbgmsg("-------------EXPERIMENTAL-------------\n"); |
1036 | 1036 |
} else { |
1037 |
- noisy_warnmsg("cannot find stream bounds for obj %u %u\n", obj->id>>8, obj->id&0xff); |
|
1037 |
+ noisy_warnmsg("pdf_extract_obj: cannot find stream bounds for obj %u %u\n", obj->id>>8, obj->id&0xff); |
|
1038 | 1038 |
} |
1039 | 1039 |
|
1040 | 1040 |
} else if (obj->flags & (1 << OBJ_JAVASCRIPT)) { |
1041 | 1041 |
const char *q2; |
1042 |
- const char *q = pdf->map+obj->start; |
|
1042 |
+ const char *q = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf) |
|
1043 |
+ : (const char *)(obj->start + pdf->map); |
|
1044 |
+ |
|
1043 | 1045 |
/* TODO: get obj-endobj size */ |
1044 | 1046 |
off_t bytesleft = obj_size(pdf, obj, 0); |
1047 |
+ |
|
1045 | 1048 |
if (bytesleft < 0) |
1046 | 1049 |
break; |
1047 | 1050 |
|
... | ... |
@@ -1066,11 +1660,11 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
1066 | 1066 |
const char *out = js; |
1067 | 1067 |
js_len = strlen(js); |
1068 | 1068 |
if (pdf->flags & (1 << DECRYPTABLE_PDF)) { |
1069 |
- cli_dbgmsg("cli_pdf: encrypted string\n"); |
|
1069 |
+ cli_dbgmsg("pdf_extract_obj: encrypted string\n"); |
|
1070 | 1070 |
decrypted = decrypt_any(pdf, obj->id, js, &js_len, pdf->enc_method_string); |
1071 | 1071 |
|
1072 | 1072 |
if (decrypted) { |
1073 |
- noisy_msg(pdf, "decrypted Javascript string from obj %u %u\n", obj->id>>8,obj->id&0xff); |
|
1073 |
+ noisy_msg(pdf, "pdf_extract_obj: decrypted Javascript string from obj %u %u\n", obj->id>>8,obj->id&0xff); |
|
1074 | 1074 |
out = decrypted; |
1075 | 1075 |
} |
1076 | 1076 |
} |
... | ... |
@@ -1083,7 +1677,7 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
1083 | 1083 |
|
1084 | 1084 |
free(decrypted); |
1085 | 1085 |
free(js); |
1086 |
- cli_dbgmsg("bytesleft: %d\n", (int)bytesleft); |
|
1086 |
+ cli_dbgmsg("pdf_extract_obj: bytesleft: %d\n", (int)bytesleft); |
|
1087 | 1087 |
|
1088 | 1088 |
if (bytesleft > 0) { |
1089 | 1089 |
q2 = pdf_nextobject(q, bytesleft); |
... | ... |
@@ -1117,8 +1711,8 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
1117 | 1117 |
} |
1118 | 1118 |
} while (0); |
1119 | 1119 |
|
1120 |
- cli_dbgmsg("cli_pdf: extracted %td bytes %u %u obj\n", sum, obj->id>>8, obj->id&0xff); |
|
1121 |
- cli_dbgmsg(" ... to %s\n", fullname); |
|
1120 |
+ cli_dbgmsg("pdf_extract_obj: extracted %td bytes %u %u obj\n", sum, obj->id>>8, obj->id&0xff); |
|
1121 |
+ cli_dbgmsg("pdf_extract_obj: ... to %s\n", fullname); |
|
1122 | 1122 |
|
1123 | 1123 |
if (flags & PDF_EXTRACT_OBJ_SCAN && sum) { |
1124 | 1124 |
int rc2; |
... | ... |
@@ -1132,20 +1726,25 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
1132 | 1132 |
rc = rc2; |
1133 | 1133 |
|
1134 | 1134 |
if ((rc == CL_CLEAN) || ((rc == CL_VIRUS) && (pdf->ctx->options & CL_SCAN_ALLMATCHES))) { |
1135 |
- rc2 = run_pdf_hooks(pdf, PDF_PHASE_POSTDUMP, fout, obj - pdf->objs); |
|
1135 |
+ unsigned int dumpid = 0; |
|
1136 |
+ for (dumpid = 0; dumpid < pdf->nobjs; dumpid++) { |
|
1137 |
+ if (pdf->objs[dumpid] == obj) |
|
1138 |
+ break; |
|
1139 |
+ } |
|
1140 |
+ rc2 = run_pdf_hooks(pdf, PDF_PHASE_POSTDUMP, fout, dumpid); |
|
1136 | 1141 |
if (rc2 == CL_VIRUS) |
1137 | 1142 |
rc = rc2; |
1138 | 1143 |
} |
1139 | 1144 |
|
1140 | 1145 |
if (((rc == CL_CLEAN) || ((rc == CL_VIRUS) && (pdf->ctx->options & CL_SCAN_ALLMATCHES))) && (obj->flags & (1 << OBJ_CONTENTS))) { |
1141 | 1146 |
lseek(fout, 0, SEEK_SET); |
1142 |
- cli_dbgmsg("cli_pdf: dumping contents %u %u\n", obj->id>>8, obj->id&0xff); |
|
1147 |
+ cli_dbgmsg("pdf_extract_obj: dumping contents %u %u\n", obj->id>>8, obj->id&0xff); |
|
1143 | 1148 |
|
1144 | 1149 |
rc2 = pdf_scan_contents(fout, pdf); |
1145 | 1150 |
if (rc2 == CL_VIRUS) |
1146 | 1151 |
rc = rc2; |
1147 | 1152 |
|
1148 |
- noisy_msg(pdf, "extracted text from obj %u %u\n", obj->id>>8, obj->id&0xff); |
|
1153 |
+ noisy_msg(pdf, "pdf_extract_obj: extracted text from obj %u %u\n", obj->id>>8, obj->id&0xff); |
|
1149 | 1154 |
} |
1150 | 1155 |
} |
1151 | 1156 |
|
... | ... |
@@ -1291,7 +1890,7 @@ static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj, const ch |
1291 | 1291 |
/* these are digital signature objects, filter doesn't matter, |
1292 | 1292 |
* we don't need them anyway */ |
1293 | 1293 |
if (*state == STATE_FILTER && !(obj->flags & (1 << OBJ_SIGNED)) && !(obj->flags & KNOWN_FILTERS)) { |
1294 |
- cli_dbgmsg("cli_pdf: unknown filter %s\n", pdfname); |
|
1294 |
+ cli_dbgmsg("handle_pdfname: unknown filter %s\n", pdfname); |
|
1295 | 1295 |
obj->flags |= 1 << OBJ_FILTER_UNKNOWN; |
1296 | 1296 |
} |
1297 | 1297 |
|
... | ... |
@@ -1305,7 +1904,7 @@ static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj, const ch |
1305 | 1305 |
if ((act->nameflags & NAMEFLAG_HEURISTIC) && escapes) { |
1306 | 1306 |
/* if a commonly used PDF name is escaped that is certainly |
1307 | 1307 |
suspicious. */ |
1308 |
- cli_dbgmsg("cli_pdf: pdfname %s is escaped\n", pdfname); |
|
1308 |
+ cli_dbgmsg("handle_pdfname: pdfname %s is escaped\n", pdfname); |
|
1309 | 1309 |
pdfobj_flag(pdf, obj, ESCAPED_COMMON_PDFNAME); |
1310 | 1310 |
} |
1311 | 1311 |
|
... | ... |
@@ -1318,7 +1917,7 @@ static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj, const ch |
1318 | 1318 |
*state = act->to_state; |
1319 | 1319 |
|
1320 | 1320 |
if (*state == STATE_FILTER && act->set_objflag != OBJ_DICT && (obj->flags & (1 << act->set_objflag))) { |
1321 |
- cli_dbgmsg("cli_pdf: duplicate stream filter %s\n", pdfname); |
|
1321 |
+ cli_dbgmsg("handle_pdfname: duplicate stream filter %s\n", pdfname); |
|
1322 | 1322 |
pdfobj_flag(pdf, obj, BAD_STREAM_FILTERS); |
1323 | 1323 |
} |
1324 | 1324 |
|
... | ... |
@@ -1335,8 +1934,6 @@ static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj, const ch |
1335 | 1335 |
} |
1336 | 1336 |
} |
1337 | 1337 |
|
1338 |
-static int pdf_readint(const char *q0, int len, const char *key); |
|
1339 |
- |
|
1340 | 1338 |
static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len) |
1341 | 1339 |
{ |
1342 | 1340 |
const char *q, *q2; |
... | ... |
@@ -1361,7 +1958,7 @@ static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len) |
1361 | 1361 |
q = q2; |
1362 | 1362 |
|
1363 | 1363 |
if (CL_SUCCESS != cli_strntoul_wrap(q2, (size_t)len, 0, 10, &objid)) { |
1364 |
- cli_dbgmsg("cli_pdf: Found Encrypt dictionary but failed to parse objid\n"); |
|
1364 |
+ cli_dbgmsg("pdf_parse_encrypt: Found Encrypt dictionary but failed to parse objid\n"); |
|
1365 | 1365 |
return; |
1366 | 1366 |
} |
1367 | 1367 |
objid = objid << 8; |
... | ... |
@@ -1372,7 +1969,7 @@ static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len) |
1372 | 1372 |
q = q2; |
1373 | 1373 |
|
1374 | 1374 |
if (CL_SUCCESS != cli_strntoul_wrap(q2, (size_t)len, 0, 10, &genid)) { |
1375 |
- cli_dbgmsg("cli_pdf: Found Encrypt dictionary but failed to parse genid\n"); |
|
1375 |
+ cli_dbgmsg("pdf_parse_encrypt: Found Encrypt dictionary but failed to parse genid\n"); |
|
1376 | 1376 |
return; |
1377 | 1377 |
} |
1378 | 1378 |
objid |= genid & 0xff; |
... | ... |
@@ -1380,7 +1977,7 @@ static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len) |
1380 | 1380 |
if (!q2 || *q2 != 'R') |
1381 | 1381 |
return; |
1382 | 1382 |
|
1383 |
- cli_dbgmsg("cli_pdf: Encrypt dictionary in obj %lu %lu\n", objid>>8, objid&0xff); |
|
1383 |
+ cli_dbgmsg("pdf_parse_encrypt: Encrypt dictionary in obj %lu %lu\n", objid>>8, objid&0xff); |
|
1384 | 1384 |
|
1385 | 1385 |
pdf->enc_objid = objid; |
1386 | 1386 |
} |
... | ... |
@@ -1410,18 +2007,21 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj) |
1410 | 1410 |
char pdfname[64]; |
1411 | 1411 |
const char *q2, *q3; |
1412 | 1412 |
const char *nextobj = NULL, *nextopen = NULL, *nextclose = NULL; |
1413 |
- const char *q = obj->start + pdf->map; |
|
1414 |
- const char *dict, *enddict, *start; |
|
1415 |
- off_t dict_length, full_dict_length; |
|
1416 |
- off_t objsize = obj_size(pdf, obj, 1); |
|
1417 |
- off_t bytesleft; |
|
1418 |
- size_t i; |
|
1419 |
- unsigned filters=0, blockopens=0; |
|
1413 |
+ const char *q = NULL; |
|
1414 |
+ const char *dict = NULL, *enddict = NULL, *start = NULL; |
|
1415 |
+ off_t dict_length = 0, full_dict_length = 0, objsize = 0, bytesleft = 0; |
|
1416 |
+ size_t i = 0; |
|
1417 |
+ unsigned filters = 0, blockopens = 0; |
|
1420 | 1418 |
enum objstate objstate = STATE_NONE; |
1421 | 1419 |
#if HAVE_JSON |
1422 | 1420 |
json_object *pdfobj=NULL, *jsonobj=NULL; |
1423 | 1421 |
#endif |
1424 | 1422 |
|
1423 |
+ q = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf) |
|
1424 |
+ : (const char *)(obj->start + pdf->map); |
|
1425 |
+ |
|
1426 |
+ objsize = obj_size(pdf, obj, 1); |
|
1427 |
+ |
|
1425 | 1428 |
if (objsize < 0) |
1426 | 1429 |
return; |
1427 | 1430 |
|
... | ... |
@@ -1434,7 +2034,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj) |
1434 | 1434 |
bytesleft -= nextobj -q; |
1435 | 1435 |
|
1436 | 1436 |
if (!nextobj || bytesleft < 0) { |
1437 |
- cli_dbgmsg("cli_pdf: %u %u obj: no dictionary\n", obj->id>>8, obj->id&0xff); |
|
1437 |
+ cli_dbgmsg("pdf_parseobj: %u %u obj: no dictionary\n", obj->id>>8, obj->id&0xff); |
|
1438 | 1438 |
#if HAVE_JSON |
1439 | 1439 |
if (!(pdfobj) && pdf->ctx->wrkproperty != NULL) { |
1440 | 1440 |
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats"); |
... | ... |
@@ -1465,7 +2065,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj) |
1465 | 1465 |
|
1466 | 1466 |
/* find end of dictionary block */ |
1467 | 1467 |
if (bytesleft < 0) { |
1468 |
- cli_dbgmsg("cli_pdf: %u %u obj: broken dictionary\n", obj->id>>8, obj->id&0xff); |
|
1468 |
+ cli_dbgmsg("pdf_parseobj: %u %u obj: broken dictionary\n", obj->id>>8, obj->id&0xff); |
|
1469 | 1469 |
#if HAVE_JSON |
1470 | 1470 |
if (!(pdfobj) && pdf->ctx->wrkproperty != NULL) { |
1471 | 1471 |
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats"); |
... | ... |
@@ -1517,7 +2117,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj) |
1517 | 1517 |
/* Was end of dictionary found? */ |
1518 | 1518 |
if (blockopens) { |
1519 | 1519 |
/* probably truncated */ |
1520 |
- cli_dbgmsg("cli_pdf: %u %u obj broken dictionary\n", obj->id>>8, obj->id&0xff); |
|
1520 |
+ cli_dbgmsg("pdf_parseobj: %u %u obj broken dictionary\n", obj->id>>8, obj->id&0xff); |
|
1521 | 1521 |
#if HAVE_JSON |
1522 | 1522 |
if (!(pdfobj) && pdf->ctx->wrkproperty != NULL) { |
1523 | 1523 |
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats"); |
... | ... |
@@ -1552,7 +2152,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj) |
1552 | 1552 |
dictionary[i] = '*'; |
1553 | 1553 |
} |
1554 | 1554 |
dictionary[dict_length] = '\0'; |
1555 |
- cli_dbgmsg("cli_pdf: dictionary is <<%s>>\n", dictionary); |
|
1555 |
+ cli_dbgmsg("pdf_parseobj: dictionary is <<%s>>\n", dictionary); |
|
1556 | 1556 |
free(dictionary); |
1557 | 1557 |
} |
1558 | 1558 |
} |
... | ... |
@@ -1617,10 +2217,10 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj) |
1617 | 1617 |
trailer = 0; |
1618 | 1618 |
|
1619 | 1619 |
q2 = pdf->map + trailer; |
1620 |
- cli_dbgmsg("cli_pdf: looking for trailer in linearized pdf: %ld - %ld\n", trailer, trailer_end); |
|
1620 |
+ cli_dbgmsg("pdf_parseobj: looking for trailer in linearized pdf: %ld - %ld\n", trailer, trailer_end); |
|
1621 | 1621 |
pdf_parse_trailer(pdf, q2, trailer_end - trailer); |
1622 | 1622 |
if (pdf->fileID) |
1623 |
- cli_dbgmsg("cli_pdf: found fileID\n"); |
|
1623 |
+ cli_dbgmsg("pdf_parseobj: found fileID\n"); |
|
1624 | 1624 |
} |
1625 | 1625 |
} |
1626 | 1626 |
|
... | ... |
@@ -1641,7 +2241,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj) |
1641 | 1641 |
dict_remaining -= (off_t)(q2 - q); |
1642 | 1642 |
|
1643 | 1643 |
if (CL_SUCCESS != cli_strntoul_wrap(q2, (size_t)dict_remaining, 0, 10, &objid)) { |
1644 |
- cli_dbgmsg("cli_pdf: failed to parse object objid\n"); |
|
1644 |
+ cli_dbgmsg("pdf_parseobj: failed to parse object objid\n"); |
|
1645 | 1645 |
return; |
1646 | 1646 |
} |
1647 | 1647 |
objid = objid << 8; |
... | ... |
@@ -1654,7 +2254,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj) |
1654 | 1654 |
if (q2 && isdigit(*q2)) { |
1655 | 1655 |
dict_remaining -= (off_t)(q2 - q2_old); |
1656 | 1656 |
if (CL_SUCCESS != cli_strntoul_wrap(q2, (size_t)dict_remaining, 0, 10, &genid)) { |
1657 |
- cli_dbgmsg("cli_pdf: failed to parse object genid\n"); |
|
1657 |
+ cli_dbgmsg("pdf_parseobj: failed to parse object genid\n"); |
|
1658 | 1658 |
return; |
1659 | 1659 |
} |
1660 | 1660 |
objid |= genid & 0xff; |
... | ... |
@@ -1663,7 +2263,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj) |
1663 | 1663 |
if (q2 && *q2 == 'R') { |
1664 | 1664 |
struct pdf_obj *obj2; |
1665 | 1665 |
|
1666 |
- cli_dbgmsg("cli_pdf: found %s stored in indirect object %lu %lu\n", pdfname, objid >> 8, objid&0xff); |
|
1666 |
+ cli_dbgmsg("pdf_parseobj: found %s stored in indirect object %lu %lu\n", pdfname, objid >> 8, objid&0xff); |
|
1667 | 1667 |
obj2 = find_obj(pdf, obj, objid); |
1668 | 1668 |
if (obj2) { |
1669 | 1669 |
enum pdf_objflags flag = |
... | ... |
@@ -1707,7 +2307,7 @@ void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj) |
1707 | 1707 |
if (obj->flags & (1 << OBJ_FILTER_UNKNOWN)) |
1708 | 1708 |
pdfobj_flag(pdf, obj, UNKNOWN_FILTER); |
1709 | 1709 |
|
1710 |
- cli_dbgmsg("cli_pdf: %u %u obj flags: %02x\n", obj->id>>8, obj->id&0xff, obj->flags); |
|
1710 |
+ cli_dbgmsg("pdf_parseobj: %u %u obj flags: %02x\n", obj->id>>8, obj->id&0xff, obj->flags); |
|
1711 | 1711 |
} |
1712 | 1712 |
|
1713 | 1713 |
/** |
... | ... |
@@ -1725,7 +2325,7 @@ static const char *pdf_getdict(const char *q0, int* len, const char *key) |
1725 | 1725 |
const char *q; |
1726 | 1726 |
|
1727 | 1727 |
if (*len <= 0) { |
1728 |
- cli_dbgmsg("cli_pdf: bad length %d\n", *len); |
|
1728 |
+ cli_dbgmsg("pdf_getdict: bad length %d\n", *len); |
|
1729 | 1729 |
return NULL; |
1730 | 1730 |
} |
1731 | 1731 |
|
... | ... |
@@ -1735,7 +2335,7 @@ static const char *pdf_getdict(const char *q0, int* len, const char *key) |
1735 | 1735 |
/* find the key */ |
1736 | 1736 |
q = cli_memstr(q0, *len, key, strlen(key)); |
1737 | 1737 |
if (!q) { |
1738 |
- cli_dbgmsg("cli_pdf: %s not found in dict\n", key); |
|
1738 |
+ cli_dbgmsg("pdf_getdict: %s not found in dict\n", key); |
|
1739 | 1739 |
return NULL; |
1740 | 1740 |
} |
1741 | 1741 |
|
... | ... |
@@ -1745,7 +2345,7 @@ static const char *pdf_getdict(const char *q0, int* len, const char *key) |
1745 | 1745 |
/* find the start of the value object */ |
1746 | 1746 |
q = pdf_nextobject(q0 + 1, *len - 1); |
1747 | 1747 |
if (!q) { |
1748 |
- cli_dbgmsg("cli_pdf: %s is invalid in dict\n", key); |
|
1748 |
+ cli_dbgmsg("pdf_getdict: %s is invalid in dict\n", key); |
|
1749 | 1749 |
return NULL; |
1750 | 1750 |
} |
1751 | 1751 |
|
... | ... |
@@ -1891,12 +2491,12 @@ static char *pdf_readstring(const char *q0, int len, const char *key, unsigned * |
1891 | 1891 |
|
1892 | 1892 |
s = cli_malloc((q - start)/2 + 1); |
1893 | 1893 |
if (s == NULL) { /* oops, couldn't allocate memory */ |
1894 |
- cli_dbgmsg("cli_pdf: unable to allocate memory...\n"); |
|
1894 |
+ cli_dbgmsg("pdf_readstring: unable to allocate memory...\n"); |
|
1895 | 1895 |
return NULL; |
1896 | 1896 |
} |
1897 | 1897 |
|
1898 | 1898 |
if (cli_hex2str_to(start, s, q - start)) { |
1899 |
- cli_dbgmsg("cli_pdf: %s has bad hex value\n", key); |
|
1899 |
+ cli_dbgmsg("pdf_readstring: %s has bad hex value\n", key); |
|
1900 | 1900 |
free(s); |
1901 | 1901 |
return NULL; |
1902 | 1902 |
} |
... | ... |
@@ -1908,7 +2508,7 @@ static char *pdf_readstring(const char *q0, int len, const char *key, unsigned * |
1908 | 1908 |
return s; |
1909 | 1909 |
} |
1910 | 1910 |
|
1911 |
- cli_dbgmsg("cli_pdf: %s is invalid string in dict\n", key); |
|
1911 |
+ cli_dbgmsg("pdf_readstring: %s is invalid string in dict\n", key); |
|
1912 | 1912 |
return NULL; |
1913 | 1913 |
} |
1914 | 1914 |
|
... | ... |
@@ -1982,7 +2582,7 @@ static int pdf_readbool(const char *q0, int len, const char *key, int Default) |
1982 | 1982 |
if (!strncmp(q, "false", 5)) |
1983 | 1983 |
return 0; |
1984 | 1984 |
|
1985 |
- cli_dbgmsg("cli_pdf: invalid value for %s bool\n", key); |
|
1985 |
+ cli_dbgmsg("pdf_readbool: invalid value for %s bool\n", key); |
|
1986 | 1986 |
|
1987 | 1987 |
return Default; |
1988 | 1988 |
} |
... | ... |
@@ -2032,8 +2632,8 @@ static void check_user_password(struct pdf_struct *pdf, int R, const char *O, |
2032 | 2032 |
cl_sha256(U+40, 8, result2, NULL); |
2033 | 2033 |
UE_len = UE ? strlen(UE) : 0; |
2034 | 2034 |
if (UE_len != 32) { |
2035 |
- cli_dbgmsg("cli_pdf: UE length is not 32: %zu\n", UE_len); |
|
2036 |
- noisy_warnmsg("cli_pdf: UE length is not 32: %zu\n", UE_len); |
|
2035 |
+ cli_dbgmsg("check_user_password: UE length is not 32: %zu\n", UE_len); |
|
2036 |
+ noisy_warnmsg("check_user_password: UE length is not 32: %zu\n", UE_len); |
|
2037 | 2037 |
} else { |
2038 | 2038 |
pdf->keylen = 32; |
2039 | 2039 |
pdf->key = cli_malloc(32); |
... | ... |
@@ -2043,7 +2643,7 @@ static void check_user_password(struct pdf_struct *pdf, int R, const char *O, |
2043 | 2043 |
} |
2044 | 2044 |
|
2045 | 2045 |
aes_decrypt((const unsigned char *)UE, &UE_len, (unsigned char *)(pdf->key), (char *)result2, 32, 0); |
2046 |
- dbg_printhex("cli_pdf: Candidate encryption key", pdf->key, pdf->keylen); |
|
2046 |
+ dbg_printhex("check_user_password: Candidate encryption key", pdf->key, pdf->keylen); |
|
2047 | 2047 |
} |
2048 | 2048 |
} |
2049 | 2049 |
} else if ((R >= 2) && (R <= 4)) { |
... | ... |
@@ -2129,27 +2729,27 @@ static void check_user_password(struct pdf_struct *pdf, int R, const char *O, |
2129 | 2129 |
password_empty = 1; |
2130 | 2130 |
free(d); |
2131 | 2131 |
} else { |
2132 |
- cli_dbgmsg("cli_pdf: invalid revision %d\n", R); |
|
2133 |
- noisy_warnmsg("cli_pdf: invalid revision %d\n", R); |
|
2132 |
+ cli_dbgmsg("check_user_password: invalid revision %d\n", R); |
|
2133 |
+ noisy_warnmsg("check_user_password: invalid revision %d\n", R); |
|
2134 | 2134 |
} |
2135 | 2135 |
} else { |
2136 | 2136 |
/* Supported R is in {2,3,4,5} */ |
2137 |
- cli_dbgmsg("cli_pdf: R value out of range\n"); |
|
2138 |
- noisy_warnmsg("cli_pdf: R value out of range\n"); |
|
2137 |
+ cli_dbgmsg("check_user_password: R value out of range\n"); |
|
2138 |
+ noisy_warnmsg("check_user_password: R value out of range\n"); |
|
2139 | 2139 |
|
2140 | 2140 |
return; |
2141 | 2141 |
} |
2142 | 2142 |
|
2143 | 2143 |
if (password_empty) { |
2144 |
- cli_dbgmsg("cli_pdf: user password is empty\n"); |
|
2145 |
- noisy_msg(pdf, "cli_pdf: encrypted PDF found, user password is empty, will attempt to decrypt\n"); |
|
2144 |
+ cli_dbgmsg("check_user_password: user password is empty\n"); |
|
2145 |
+ noisy_msg(pdf, "check_user_password: encrypted PDF found, user password is empty, will attempt to decrypt\n"); |
|
2146 | 2146 |
/* The key we computed above is the key used to encrypt the streams. |
2147 | 2147 |
* We could decrypt it now if we wanted to */ |
2148 | 2148 |
pdf->flags |= 1 << DECRYPTABLE_PDF; |
2149 | 2149 |
} else { |
2150 | 2150 |
/* the key is not valid, we would need the user or the owner password to decrypt */ |
2151 |
- cli_dbgmsg("cli_pdf: user/owner password would be required for decryption\n"); |
|
2152 |
- noisy_warnmsg("cli_pdf: encrypted PDF found, user password is NOT empty, cannot decrypt!\n"); |
|
2151 |
+ cli_dbgmsg("check_user_password: user/owner password would be required for decryption\n"); |
|
2152 |
+ noisy_warnmsg("check_user_password: encrypted PDF found, user password is NOT empty, cannot decrypt!\n"); |
|
2153 | 2153 |
} |
2154 | 2154 |
} |
2155 | 2155 |
|
... | ... |
@@ -2171,7 +2771,7 @@ enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key |
2171 | 2171 |
|
2172 | 2172 |
CFM = pdf_readval(q, len, "/CFM"); |
2173 | 2173 |
if (CFM) { |
2174 |
- cli_dbgmsg("cli_pdf: %s CFM: %s\n", key, CFM); |
|
2174 |
+ cli_dbgmsg("parse_enc_method: %s CFM: %s\n", key, CFM); |
|
2175 | 2175 |
if (!strncmp(CFM,"V2", 2)) |
2176 | 2176 |
ret = ENC_V2; |
2177 | 2177 |
else if (!strncmp(CFM,"AESV2",5)) |
... | ... |
@@ -2197,15 +2797,15 @@ void pdf_handle_enc(struct pdf_struct *pdf) |
2197 | 2197 |
if (pdf->enc_objid == ~0u) |
2198 | 2198 |
return; |
2199 | 2199 |
if (!pdf->fileID) { |
2200 |
- cli_dbgmsg("cli_pdf: pdf_handle_enc no file ID\n"); |
|
2201 |
- noisy_warnmsg("cli_pdf: pdf_handle_enc no file ID\n"); |
|
2200 |
+ cli_dbgmsg("pdf_handle_enc: no file ID\n"); |
|
2201 |
+ noisy_warnmsg("pdf_handle_enc: no file ID\n"); |
|
2202 | 2202 |
return; |
2203 | 2203 |
} |
2204 | 2204 |
|
2205 |
- obj = find_obj(pdf, pdf->objs, pdf->enc_objid); |
|
2205 |
+ obj = find_obj(pdf, pdf->objs[0], pdf->enc_objid); |
|
2206 | 2206 |
if (!obj) { |
2207 |
- cli_dbgmsg("cli_pdf: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff); |
|
2208 |
- noisy_warnmsg("cli_pdf: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff); |
|
2207 |
+ cli_dbgmsg("pdf_handle_enc: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff); |
|
2208 |
+ noisy_warnmsg("pdf_handle_enc: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff); |
|
2209 | 2209 |
return; |
2210 | 2210 |
} |
2211 | 2211 |
|
... | ... |
@@ -2220,15 +2820,15 @@ void pdf_handle_enc(struct pdf_struct *pdf) |
2220 | 2220 |
pdf->enc_method_embeddedfile = ENC_UNKNOWN; |
2221 | 2221 |
P = pdf_readint(q, len, "/P"); |
2222 | 2222 |
if (P == ~0u) { |
2223 |
- cli_dbgmsg("cli_pdf: invalid P\n"); |
|
2224 |
- noisy_warnmsg("cli_pdf: invalid P\n"); |
|
2223 |
+ cli_dbgmsg("pdf_handle_enc: invalid P\n"); |
|
2224 |
+ noisy_warnmsg("pdf_handle_enc: invalid P\n"); |
|
2225 | 2225 |
break; |
2226 | 2226 |
} |
2227 | 2227 |
|
2228 | 2228 |
q2 = cli_memstr(q, len, "/Standard", 9); |
2229 | 2229 |
if (!q2) { |
2230 |
- cli_dbgmsg("cli_pdf: /Standard not found\n"); |
|
2231 |
- noisy_warnmsg("cli_pdf: /Standard not found\n"); |
|
2230 |
+ cli_dbgmsg("pdf_handle_enc: /Standard not found\n"); |
|
2231 |
+ noisy_warnmsg("pdf_handle_enc: /Standard not found\n"); |
|
2232 | 2232 |
break; |
2233 | 2233 |
} |
2234 | 2234 |
|
... | ... |
@@ -2241,20 +2841,20 @@ void pdf_handle_enc(struct pdf_struct *pdf) |
2241 | 2241 |
length = pdf_readint(q, len, "/Length"); |
2242 | 2242 |
|
2243 | 2243 |
if (length < 40) { |
2244 |
- cli_dbgmsg("cli_pdf: invalid length: %d\n", length); |
|
2244 |
+ cli_dbgmsg("pdf_handle_enc: invalid length: %d\n", length); |
|
2245 | 2245 |
length = 40; |
2246 | 2246 |
} |
2247 | 2247 |
|
2248 | 2248 |
R = pdf_readint(q, len, "/R"); |
2249 | 2249 |
if (R == ~0u) { |
2250 |
- cli_dbgmsg("cli_pdf: invalid R\n"); |
|
2251 |
- noisy_warnmsg("cli_pdf: invalid R\n"); |
|
2250 |
+ cli_dbgmsg("pdf_handle_enc: invalid R\n"); |
|
2251 |
+ noisy_warnmsg("pdf_handle_enc: invalid R\n"); |
|
2252 | 2252 |
break; |
2253 | 2253 |
} |
2254 | 2254 |
|
2255 | 2255 |
if ((R > 5) || (R < 2)) { |
2256 |
- cli_dbgmsg("cli_pdf: R value outside supported range [2..5]\n"); |
|
2257 |
- noisy_warnmsg("cli_pdf: R value outside supported range [2..5]\n"); |
|
2256 |
+ cli_dbgmsg("pdf_handle_enc: R value outside supported range [2..5]\n"); |
|
2257 |
+ noisy_warnmsg("pdf_handle_enc: R value outside supported range [2..5]\n"); |
|
2258 | 2258 |
break; |
2259 | 2259 |
} |
2260 | 2260 |
|
... | ... |
@@ -2277,11 +2877,11 @@ void pdf_handle_enc(struct pdf_struct *pdf) |
2277 | 2277 |
pdf->CF_n = n; |
2278 | 2278 |
|
2279 | 2279 |
if (StmF) |
2280 |
- cli_dbgmsg("cli_pdf: StmF: %s\n", StmF); |
|
2280 |
+ cli_dbgmsg("pdf_handle_enc: StmF: %s\n", StmF); |
|
2281 | 2281 |
if (StrF) |
2282 |
- cli_dbgmsg("cli_pdf: StrF: %s\n", StrF); |
|
2282 |
+ cli_dbgmsg("pdf_handle_enc: StrF: %s\n", StrF); |
|
2283 | 2283 |
if (EFF) |
2284 |
- cli_dbgmsg("cli_pdf: EFF: %s\n", EFF); |
|
2284 |
+ cli_dbgmsg("pdf_handle_enc: EFF: %s\n", EFF); |
|
2285 | 2285 |
|
2286 | 2286 |
pdf->enc_method_stream = parse_enc_method(pdf->CF, n, StmF, ENC_IDENTITY); |
2287 | 2287 |
pdf->enc_method_string = parse_enc_method(pdf->CF, n, StrF, ENC_IDENTITY); |
... | ... |
@@ -2291,7 +2891,7 @@ void pdf_handle_enc(struct pdf_struct *pdf) |
2291 | 2291 |
free(StrF); |
2292 | 2292 |
free(EFF); |
2293 | 2293 |
|
2294 |
- cli_dbgmsg("cli_pdf: EncryptMetadata: %s\n", EM ? "true" : "false"); |
|
2294 |
+ cli_dbgmsg("pdf_handle_enc: EncryptMetadata: %s\n", EM ? "true" : "false"); |
|
2295 | 2295 |
|
2296 | 2296 |
if (R == 4) { |
2297 | 2297 |
length = 128; |
... | ... |
@@ -2308,8 +2908,8 @@ void pdf_handle_enc(struct pdf_struct *pdf) |
2308 | 2308 |
n = 0; |
2309 | 2309 |
O = pdf_readstring(q, len, "/O", &n, NULL, 0); |
2310 | 2310 |
if (!O || n < oulen) { |
2311 |
- cli_dbgmsg("cli_pdf: invalid O: %d\n", n); |
|
2312 |
- cli_dbgmsg("cli_pdf: invalid O: %d\n", n); |
|
2311 |
+ cli_dbgmsg("pdf_handle_enc: invalid O: %d\n", n); |
|
2312 |
+ cli_dbgmsg("pdf_handle_enc: invalid O: %d\n", n); |
|
2313 | 2313 |
if (O) |
2314 | 2314 |
dbg_printhex("invalid O", O, n); |
2315 | 2315 |
|
... | ... |
@@ -2321,8 +2921,8 @@ void pdf_handle_enc(struct pdf_struct *pdf) |
2321 | 2321 |
break; |
2322 | 2322 |
|
2323 | 2323 |
if (i != n) { |
2324 |
- dbg_printhex("too long O", O, n); |
|
2325 |
- noisy_warnmsg("too long O: %u", n); |
|
2324 |
+ dbg_printhex("pdf_handle_enc: too long O", O, n); |
|
2325 |
+ noisy_warnmsg("pdf_handle_enc: too long O: %u", n); |
|
2326 | 2326 |
break; |
2327 | 2327 |
} |
2328 | 2328 |
} |
... | ... |
@@ -2330,8 +2930,8 @@ void pdf_handle_enc(struct pdf_struct *pdf) |
2330 | 2330 |
n = 0; |
2331 | 2331 |
U = pdf_readstring(q, len, "/U", &n, NULL, 0); |
2332 | 2332 |
if (!U || n < oulen) { |
2333 |
- cli_dbgmsg("cli_pdf: invalid U: %u\n", n); |
|
2334 |
- noisy_warnmsg("cli_pdf: invalid U: %u\n", n); |
|
2333 |
+ cli_dbgmsg("pdf_handle_enc: invalid U: %u\n", n); |
|
2334 |
+ noisy_warnmsg("pdf_handle_enc: invalid U: %u\n", n); |
|
2335 | 2335 |
|
2336 | 2336 |
if (U) |
2337 | 2337 |
dbg_printhex("invalid U", U, n); |
... | ... |
@@ -2349,10 +2949,10 @@ void pdf_handle_enc(struct pdf_struct *pdf) |
2349 | 2349 |
} |
2350 | 2350 |
} |
2351 | 2351 |
|
2352 |
- cli_dbgmsg("cli_pdf: Encrypt R: %d, P %x, length: %u\n", R, P, length); |
|
2352 |
+ cli_dbgmsg("pdf_handle_enc: Encrypt R: %d, P %x, length: %u\n", R, P, length); |
|
2353 | 2353 |
if (length % 8) { |
2354 |
- cli_dbgmsg("cli_pdf: wrong key length, not multiple of 8\n"); |
|
2355 |
- noisy_warnmsg("cli_pdf: wrong key length, not multiple of 8\n"); |
|
2354 |
+ cli_dbgmsg("pdf_handle_enc: wrong key length, not multiple of 8\n"); |
|
2355 |
+ noisy_warnmsg("pdf_handle_enc: wrong key length, not multiple of 8\n"); |
|
2356 | 2356 |
break; |
2357 | 2357 |
} |
2358 | 2358 |
check_user_password(pdf, R, O, U, P, EM, UE, length, oulen); |
... | ... |
@@ -2363,8 +2963,216 @@ void pdf_handle_enc(struct pdf_struct *pdf) |
2363 | 2363 |
free(UE); |
2364 | 2364 |
} |
2365 | 2365 |
|
2366 |
+/** |
|
2367 |
+ * @brief Search pdf buffer for objects. Parse each. |
|
2368 |
+ * |
|
2369 |
+ * Newly found objects will be extracted after completion when the extraction for loop continues. |
|
2370 |
+ * |
|
2371 |
+ * @param pdf Pdf struct that keeps track of all information found in the PDF. |
|
2372 |
+ * @param objstm Pointer to an object stream to parse. |
|
2373 |
+ * |
|
2374 |
+ * @return cl_error_t Error code. |
|
2375 |
+ */ |
|
2376 |
+cl_error_t pdf_find_and_parse_objs_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm) |
|
2377 |
+{ |
|
2378 |
+ cl_error_t status = CL_EFORMAT; |
|
2379 |
+ cl_error_t retval = CL_EPARSE; |
|
2380 |
+ int32_t foundobj = 0, alerts = 0; |
|
2381 |
+ uint32_t badobjects = 0; |
|
2382 |
+ size_t i = 0; |
|
2383 |
+ |
|
2384 |
+ struct pdf_obj* obj = NULL; |
|
2385 |
+ |
|
2386 |
+ char* current_pair = objstm->streambuf; |
|
2387 |
+ char* current_obj = objstm->streambuf + objstm->first; |
|
2388 |
+ |
|
2389 |
+ if ((0 == objstm->first) || |
|
2390 |
+ (0 == objstm->streambuf_len) || |
|
2391 |
+ (0 == objstm->n)) |
|
2392 |
+ { |
|
2393 |
+ cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Empty object stream.\n"); |
|
2394 |
+ goto done; |
|
2395 |
+ } |
|
2396 |
+ |
|
2397 |
+ if (objstm->first >= objstm->streambuf_len) |
|
2398 |
+ { |
|
2399 |
+ cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Invalid objstm values. Offset of first obj greater than stream length.\n"); |
|
2400 |
+ goto done; |
|
2401 |
+ } |
|
2402 |
+ |
|
2403 |
+ /* Process each object */ |
|
2404 |
+ for (i = 0; i < objstm->n; i++) |
|
2405 |
+ { |
|
2406 |
+ obj = NULL; |
|
2407 |
+ |
|
2408 |
+ if (cli_checktimelimit(pdf->ctx) != CL_SUCCESS) { |
|
2409 |
+ cli_errmsg("Timeout reached in the PDF parser while parsing object stream.\n"); |
|
2410 |
+ status = CL_ETIMEOUT; |
|
2411 |
+ goto done; |
|
2412 |
+ } |
|
2413 |
+ |
|
2414 |
+ /* Find object */ |
|
2415 |
+ retval = pdf_findobj_in_objstm(pdf, objstm, &obj); |
|
2416 |
+ |
|
2417 |
+ if (retval != CL_SUCCESS) |
|
2418 |
+ { |
|
2419 |
+ cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Fewer objects in stream than expected: %u found, %u expected.\n", |
|
2420 |
+ objstm->nobjs_found, objstm->n); |
|
2421 |
+ badobjects++; |
|
2422 |
+ pdf->stats.ninvalidobjs++; |
|
2423 |
+ break; |
|
2424 |
+ } |
|
2425 |
+ |
|
2426 |
+ cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Found object %u %u in object stream at offset: %u\n", obj->id >> 8, obj->id & 0xff, obj->start); |
|
2427 |
+ |
|
2428 |
+ if (cli_checktimelimit(pdf->ctx) != CL_SUCCESS) { |
|
2429 |
+ cli_errmsg("Timeout reached in the PDF parser while parsing object stream.\n"); |
|
2430 |
+ status = CL_ETIMEOUT; |
|
2431 |
+ goto done; |
|
2432 |
+ } |
|
2433 |
+ |
|
2434 |
+ /* Parse object */ |
|
2435 |
+ pdf_parseobj(pdf, obj); |
|
2436 |
+ } |
|
2437 |
+ |
|
2438 |
+ if (alerts) { |
|
2439 |
+ status = CL_VIRUS; |
|
2440 |
+ goto done; |
|
2441 |
+ } |
|
2442 |
+ else if (badobjects) { |
|
2443 |
+ status = CL_EFORMAT; |
|
2444 |
+ goto done; |
|
2445 |
+ } |
|
2446 |
+ |
|
2447 |
+ status = CL_SUCCESS; |
|
2448 |
+ |
|
2449 |
+done: |
|
2450 |
+ return status; |
|
2451 |
+} |
|
2452 |
+ |
|
2453 |
+/** |
|
2454 |
+ * @brief Search pdf buffer for objects. Parse each and then extract each. |
|
2455 |
+ * |
|
2456 |
+ * @param pdf Pdf struct that keeps track of all information found in the PDF. |
|
2457 |
+ * @param alerts[in/out] The number of alerts, relevant in ALLMATCH mode. |
|
2458 |
+ * |
|
2459 |
+ * @return cl_error_t Error code. |
|
2460 |
+ */ |
|
2461 |
+cl_error_t pdf_find_and_extract_objs(struct pdf_struct *pdf, uint32_t *alerts) |
|
2462 |
+{ |
|
2463 |
+ cl_error_t status = CL_SUCCESS; |
|
2464 |
+ int32_t rv = 0; |
|
2465 |
+ int foundobj = 0; |
|
2466 |
+ unsigned int i = 0, j = 0; |
|
2467 |
+ uint32_t badobjects = 0; |
|
2468 |
+ |
|
2469 |
+ /* parse PDF and find obj offsets */ |
|
2470 |
+ while (CL_BREAK != (rv = pdf_findobj(pdf))) { |
|
2471 |
+ if (rv == CL_EMEM) { |
|
2472 |
+ break; |
|
2473 |
+ } |
|
2474 |
+ } |
|
2475 |
+ |
|
2476 |
+ if (rv == -1) |
|
2477 |
+ pdf->flags |= 1 << BAD_PDF_TOOMANYOBJS; |
|
2478 |
+ |
|
2479 |
+ /* must parse after finding all objs, so we can flag indirect objects */ |
|
2480 |
+ for (i=0; i < pdf->nobjs; i++) { |
|
2481 |
+ struct pdf_obj *obj = pdf->objs[i]; |
|
2482 |
+ |
|
2483 |
+ if (cli_checktimelimit(pdf->ctx) != CL_SUCCESS) { |
|
2484 |
+ cli_errmsg("pdf_find_and_extract_objs: Timeout reached in the PDF parser while parsing objects.\n"); |
|
2485 |
+ |
|
2486 |
+ status = CL_ETIMEOUT; |
|
2487 |
+ goto done; |
|
2488 |
+ } |
|
2489 |
+ |
|
2490 |
+ pdf_parseobj(pdf, obj); |
|
2491 |
+ } |
|
2492 |
+ |
|
2493 |
+ pdf_handle_enc(pdf); |
|
2494 |
+ if (pdf->flags & (1 << ENCRYPTED_PDF)) |
|
2495 |
+ cli_dbgmsg("pdf_find_and_extract_objs: encrypted pdf found, %s!\n", |
|
2496 |
+ (pdf->flags & (1 << DECRYPTABLE_PDF)) ? |
|
2497 |
+ "decryptable" : "not decryptable, stream will probably fail to decompress"); |
|
2498 |
+ |
|
2499 |
+ if ((pdf->ctx->options & CL_SCAN_BLOCKENCRYPTED) && |
|
2500 |
+ (pdf->flags & (1 << ENCRYPTED_PDF)) && |
|
2501 |
+ !(pdf->flags & (1 << DECRYPTABLE_PDF))) |
|
2502 |
+ { |
|
2503 |
+ /* It is encrypted, and a password/key needs to be supplied to decrypt. |
|
2504 |
+ * This doesn't trigger for PDFs that are encrypted but don't need |
|
2505 |
+ * a password to decrypt */ |
|
2506 |
+ status = cli_append_virus(pdf->ctx, "Heuristics.Encrypted.PDF"); |
|
2507 |
+ if (status == CL_VIRUS) { |
|
2508 |
+ alerts++; |
|
2509 |
+ if (pdf->ctx->options & CL_SCAN_ALLMATCHES) |
|
2510 |
+ status = CL_CLEAN; |
|
2511 |
+ } |
|
2512 |
+ } |
|
2513 |
+ |
|
2514 |
+ if (!status) { |
|
2515 |
+ status = run_pdf_hooks(pdf, PDF_PHASE_PARSED, -1, -1); |
|
2516 |
+ cli_dbgmsg("pdf_find_and_extract_objs: (parsed hooks) returned %d\n", status); |
|
2517 |
+ if (status == CL_VIRUS) { |
|
2518 |
+ alerts++; |
|
2519 |
+ if (pdf->ctx->options & CL_SCAN_ALLMATCHES) { |
|
2520 |
+ status = CL_CLEAN; |
|
2521 |
+ } |
|
2522 |
+ } |
|
2523 |
+ } |
|
2524 |
+ |
|
2525 |
+ /* extract PDF objs */ |
|
2526 |
+ for (i=0; !status && i < pdf->nobjs; i++) { |
|
2527 |
+ struct pdf_obj *obj = pdf->objs[i]; |
|
2528 |
+ |
|
2529 |
+ if (cli_checktimelimit(pdf->ctx) != CL_SUCCESS) { |
|
2530 |
+ cli_errmsg("pdf_find_and_extract_objs: Timeout reached in the PDF parser while extracting objects.\n"); |
|
2531 |
+ |
|
2532 |
+ status = CL_ETIMEOUT; |
|
2533 |
+ goto done; |
|
2534 |
+ } |
|
2535 |
+ |
|
2536 |
+ status = pdf_extract_obj(pdf, obj, PDF_EXTRACT_OBJ_SCAN); |
|
2537 |
+ switch (status) { |
|
2538 |
+ case CL_EFORMAT: |
|
2539 |
+ /* Don't halt on one bad object */ |
|
2540 |
+ cli_dbgmsg("pdf_find_and_extract_objs: Format error when extracting object, skipping to the next object.\n"); |
|
2541 |
+ badobjects++; |
|
2542 |
+ pdf->stats.ninvalidobjs++; |
|
2543 |
+ status = CL_CLEAN; |
|
2544 |
+ break; |
|
2545 |
+ case CL_VIRUS: |
|
2546 |
+ alerts++; |
|
2547 |
+ if (pdf->ctx->options & CL_SCAN_ALLMATCHES) { |
|
2548 |
+ status = CL_CLEAN; |
|
2549 |
+ } |
|
2550 |
+ break; |
|
2551 |
+ default: |
|
2552 |
+ break; |
|
2553 |
+ } |
|
2554 |
+ } |
|
2555 |
+ |
|
2556 |
+done: |
|
2557 |
+ if (!status && badobjects) { |
|
2558 |
+ status = CL_EFORMAT; |
|
2559 |
+ } |
|
2560 |
+ |
|
2561 |
+ return status; |
|
2562 |
+} |
|
2563 |
+ |
|
2564 |
+/** |
|
2565 |
+ * @brief Primary function for parsing and scanning a PDF. |
|
2566 |
+ * |
|
2567 |
+ * @param dir Filepath for temp file. |
|
2568 |
+ * @param ctx clam scan context structure. |
|
2569 |
+ * @param offset offset of pdf in ctx->fmap |
|
2570 |
+ * |
|
2571 |
+ * @return int Returns cl_error_t status value. |
|
2572 |
+ */ |
|
2366 | 2573 |
int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
2367 | 2574 |
{ |
2575 |
+ cl_error_t rc = CL_SUCCESS; |
|
2368 | 2576 |
struct pdf_struct pdf; |
2369 | 2577 |
fmap_t *map = *ctx->fmap; |
2370 | 2578 |
size_t size = map->len - offset; |
... | ... |
@@ -2372,8 +3180,8 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
2372 | 2372 |
off_t map_off, bytesleft; |
2373 | 2373 |
unsigned long xref; |
2374 | 2374 |
const char *pdfver, *tmp, *start, *eofmap, *q, *eof; |
2375 |
- int rc, badobjects = 0; |
|
2376 | 2375 |
unsigned i, alerts = 0; |
2376 |
+ unsigned int objs_found = 0; |
|
2377 | 2377 |
#if HAVE_JSON |
2378 | 2378 |
json_object *pdfobj=NULL; |
2379 | 2379 |
char *begin, *end, *p1; |
... | ... |
@@ -2390,7 +3198,8 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
2390 | 2390 |
/* Check PDF version */ |
2391 | 2391 |
if (!pdfver) { |
2392 | 2392 |
cli_errmsg("cli_pdf: mmap() failed (1)\n"); |
2393 |
- return CL_EMAP; |
|
2393 |
+ rc = CL_EMAP; |
|
2394 |
+ goto done; |
|
2394 | 2395 |
} |
2395 | 2396 |
|
2396 | 2397 |
#if HAVE_JSON |
... | ... |
@@ -2406,14 +3215,16 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
2406 | 2406 |
#if HAVE_JSON |
2407 | 2407 |
pdf_export_json(&pdf); |
2408 | 2408 |
#endif |
2409 |
- return CL_SUCCESS; |
|
2409 |
+ rc = CL_SUCCESS; |
|
2410 |
+ goto done; |
|
2410 | 2411 |
} |
2411 | 2412 |
|
2412 | 2413 |
versize -= tmp - pdfver; |
2413 | 2414 |
pdfver = tmp; |
2414 | 2415 |
|
2415 | 2416 |
if (versize < 8) { |
2416 |
- return CL_EFORMAT; |
|
2417 |
+ rc = CL_EFORMAT; |
|
2418 |
+ goto done; |
|
2417 | 2419 |
} |
2418 | 2420 |
|
2419 | 2421 |
/* Check for PDF-1.[0-9]. Although 1.7 is highest now, allow for future versions */ |
... | ... |
@@ -2463,10 +3274,9 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
2463 | 2463 |
eofmap = fmap_need_off_once(map, map_off, bytesleft); |
2464 | 2464 |
if (!eofmap) { |
2465 | 2465 |
cli_errmsg("cli_pdf: mmap() failed (2)\n"); |
2466 |
-#if HAVE_JSON |
|
2467 |
- pdf_export_json(&pdf); |
|
2468 |
-#endif |
|
2469 |
- return CL_EMAP; |
|
2466 |
+ |
|
2467 |
+ rc = CL_EMAP; |
|
2468 |
+ goto done; |
|
2470 | 2469 |
} |
2471 | 2470 |
|
2472 | 2471 |
eof = eofmap + bytesleft; |
... | ... |
@@ -2533,10 +3343,9 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
2533 | 2533 |
pdf.map = fmap_need_off(map, offset, size); |
2534 | 2534 |
if (!pdf.map) { |
2535 | 2535 |
cli_errmsg("cli_pdf: mmap() failed (3)\n"); |
2536 |
-#if HAVE_JSON |
|
2537 |
- pdf_export_json(&pdf); |
|
2538 |
-#endif |
|
2539 |
- return CL_EMAP; |
|
2536 |
+ |
|
2537 |
+ rc = CL_EMAP; |
|
2538 |
+ goto done; |
|
2540 | 2539 |
} |
2541 | 2540 |
|
2542 | 2541 |
pdf.startoff = offset; |
... | ... |
@@ -2548,127 +3357,28 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
2548 | 2548 |
rc = CL_CLEAN; |
2549 | 2549 |
} else if (rc) { |
2550 | 2550 |
cli_dbgmsg("cli_pdf: (pre hooks) returning %d\n", rc); |
2551 |
-#if HAVE_JSON |
|
2552 |
- pdf_export_json(&pdf); |
|
2553 |
-#endif |
|
2554 |
- return rc == CL_BREAK ? CL_CLEAN : rc; |
|
2555 |
- } |
|
2556 |
- |
|
2557 |
- /* parse PDF and find obj offsets */ |
|
2558 |
- while ((rc = pdf_findobj(&pdf)) > 0) { |
|
2559 |
- if (rc == 1) { |
|
2560 |
- struct pdf_obj *obj = &pdf.objs[pdf.nobjs-1]; |
|
2561 |
- |
|
2562 |
- cli_dbgmsg("cli_pdf: found %d %d obj @%lld\n", obj->id >> 8, obj->id&0xff, (long long)(obj->start + offset)); |
|
2563 |
- } |
|
2564 |
- else if (rc == 2) { |
|
2565 |
- pdf.nobjs--; |
|
2566 |
- cli_dbgmsg("cli_pdf: Failed to parse object, likely an oversight in parser design.\n"); |
|
2567 |
- } |
|
2568 |
- else { |
|
2569 |
- pdf.nobjs--; |
|
2570 |
- cli_dbgmsg("cli_pdf: unexpected return code %d.\n", rc); |
|
2571 |
- } |
|
2572 |
- } |
|
2573 |
- |
|
2574 |
- if (pdf.nobjs) |
|
2575 |
- pdf.nobjs--; |
|
2576 |
- |
|
2577 |
- if (rc == -1) |
|
2578 |
- pdf.flags |= 1 << BAD_PDF_TOOMANYOBJS; |
|
2579 |
- |
|
2580 |
- /* must parse after finding all objs, so we can flag indirect objects */ |
|
2581 |
- for (i=0;i<pdf.nobjs;i++) { |
|
2582 |
- struct pdf_obj *obj = &pdf.objs[i]; |
|
2583 | 2551 |
|
2584 |
- if (cli_checktimelimit(ctx) != CL_SUCCESS) { |
|
2585 |
- cli_errmsg("Timeout reached in the PDF parser\n"); |
|
2586 |
-#if HAVE_JSON |
|
2587 |
- pdf_export_json(&pdf); |
|
2588 |
-#endif |
|
2589 |
- free(pdf.objs); |
|
2590 |
- if (pdf.fileID) |
|
2591 |
- free(pdf.fileID); |
|
2592 |
- if (pdf.key) |
|
2593 |
- free(pdf.key); |
|
2594 |
- return CL_ETIMEOUT; |
|
2595 |
- } |
|
2596 |
- |
|
2597 |
- pdf_parseobj(&pdf, obj); |
|
2598 |
- } |
|
2599 |
- |
|
2600 |
- pdf_handle_enc(&pdf); |
|
2601 |
- if (pdf.flags & (1 << ENCRYPTED_PDF)) |
|
2602 |
- cli_dbgmsg("cli_pdf: encrypted pdf found, %s!\n", |
|
2603 |
- (pdf.flags & (1 << DECRYPTABLE_PDF)) ? |
|
2604 |
- "decryptable" : "not decryptable, stream will probably fail to decompress"); |
|
2605 |
- |
|
2606 |
- if (DETECT_ENCRYPTED && |
|
2607 |
- (pdf.flags & (1 << ENCRYPTED_PDF)) && |
|
2608 |
- !(pdf.flags & (1 << DECRYPTABLE_PDF))) { |
|
2609 |
- /* It is encrypted, and a password/key needs to be supplied to decrypt. |
|
2610 |
- * This doesn't trigger for PDFs that are encrypted but don't need |
|
2611 |
- * a password to decrypt */ |
|
2612 |
- rc = cli_append_virus(ctx, "Heuristics.Encrypted.PDF"); |
|
2613 |
- if (rc == CL_VIRUS) { |
|
2614 |
- alerts++; |
|
2615 |
- if (SCAN_ALL) |
|
2616 |
- rc = CL_CLEAN; |
|
2617 |
- } |
|
2618 |
- } |
|
2619 |
- |
|
2620 |
- if (!rc) { |
|
2621 |
- rc = run_pdf_hooks(&pdf, PDF_PHASE_PARSED, -1, -1); |
|
2622 |
- cli_dbgmsg("cli_pdf: (parsed hooks) returned %d\n", rc); |
|
2623 |
- if (rc == CL_VIRUS) { |
|
2624 |
- alerts++; |
|
2625 |
- if (SCAN_ALL) { |
|
2626 |
- rc = CL_CLEAN; |
|
2627 |
- } |
|
2628 |
- } |
|
2552 |
+ rc = rc == CL_BREAK ? CL_CLEAN : rc; |
|
2553 |
+ goto done; |
|
2629 | 2554 |
} |
2630 | 2555 |
|
2631 |
- /* extract PDF objs */ |
|
2632 |
- for (i=0;!rc && i<pdf.nobjs;i++) { |
|
2633 |
- struct pdf_obj *obj = &pdf.objs[i]; |
|
2634 |
- |
|
2635 |
- if (cli_checktimelimit(ctx) != CL_SUCCESS) { |
|
2636 |
- cli_errmsg("Timeout reached in the PDF parser\n"); |
|
2637 |
-#if HAVE_JSON |
|
2638 |
- pdf_export_json(&pdf); |
|
2639 |
-#endif |
|
2640 |
- free(pdf.objs); |
|
2641 |
- if (pdf.fileID) |
|
2642 |
- free(pdf.fileID); |
|
2643 |
- if (pdf.key) |
|
2644 |
- free(pdf.key); |
|
2645 |
- return CL_ETIMEOUT; |
|
2646 |
- } |
|
2556 |
+ /* |
|
2557 |
+ * Find and extract all objects in the PDF. |
|
2558 |
+ * New experimental recursive methodology that adds objects from object streams. |
|
2559 |
+ */ |
|
2560 |
+ objs_found = pdf.nobjs; |
|
2561 |
+ rc = pdf_find_and_extract_objs(&pdf, &alerts); |
|
2647 | 2562 |
|
2648 |
- rc = pdf_extract_obj(&pdf, obj, PDF_EXTRACT_OBJ_SCAN); |
|
2649 |
- switch (rc) { |
|
2650 |
- case CL_EFORMAT: |
|
2651 |
- /* Don't halt on one bad object */ |
|
2652 |
- cli_dbgmsg("cli_pdf: bad format object, skipping to next\n"); |
|
2653 |
- badobjects++; |
|
2654 |
- pdf.stats.ninvalidobjs++; |
|
2655 |
- rc = CL_CLEAN; |
|
2656 |
- break; |
|
2657 |
- case CL_VIRUS: |
|
2658 |
- alerts++; |
|
2659 |
- if (SCAN_ALL) { |
|
2660 |
- rc = CL_CLEAN; |
|
2661 |
- } |
|
2662 |
- break; |
|
2663 |
- default: |
|
2664 |
- break; |
|
2665 |
- } |
|
2563 |
+ if (pdf.nobjs <= objs_found) { |
|
2564 |
+ cli_dbgmsg("cli_pdf: pdf_find_and_extract_objs did not find any new objects!\n"); |
|
2565 |
+ } else { |
|
2566 |
+ cli_dbgmsg("cli_pdf: pdf_find_and_extract_objs found %d new objects.\n", pdf.nobjs - objs_found); |
|
2666 | 2567 |
} |
2667 | 2568 |
|
2668 | 2569 |
if (pdf.flags & (1 << ENCRYPTED_PDF)) |
2669 | 2570 |
pdf.flags &= ~ ((1 << BAD_FLATESTART) | (1 << BAD_STREAMSTART) | (1 << BAD_ASCIIDECODE)); |
2670 | 2571 |
|
2671 |
- if (pdf.flags && !rc) { |
|
2572 |
+ if (pdf.flags && !rc) { |
|
2672 | 2573 |
cli_dbgmsg("cli_pdf: flags 0x%02x\n", pdf.flags); |
2673 | 2574 |
rc = run_pdf_hooks(&pdf, PDF_PHASE_END, -1, -1); |
2674 | 2575 |
if (rc == CL_VIRUS) { |
... | ... |
@@ -2699,11 +3409,11 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
2699 | 2699 |
#endif |
2700 | 2700 |
} |
2701 | 2701 |
|
2702 |
+done: |
|
2702 | 2703 |
if (alerts) { |
2703 | 2704 |
rc = CL_VIRUS; |
2704 | 2705 |
} |
2705 |
- |
|
2706 |
- else if (!rc && badobjects) { |
|
2706 |
+ else if (!rc && pdf.stats.ninvalidobjs > 0) { |
|
2707 | 2707 |
rc = CL_EFORMAT; |
2708 | 2708 |
} |
2709 | 2709 |
|
... | ... |
@@ -2711,17 +3421,54 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
2711 | 2711 |
pdf_export_json(&pdf); |
2712 | 2712 |
#endif |
2713 | 2713 |
|
2714 |
- cli_dbgmsg("cli_pdf: returning %d\n", rc); |
|
2715 |
- free(pdf.objs); |
|
2716 |
- free(pdf.fileID); |
|
2717 |
- free(pdf.key); |
|
2714 |
+ if (pdf.objstms) { |
|
2715 |
+ for (i = 0; i < pdf.nobjstms; i++) { |
|
2716 |
+ if (pdf.objstms[i]) { |
|
2717 |
+ if (pdf.objstms[i]->streambuf) { |
|
2718 |
+ free(pdf.objstms[i]->streambuf); |
|
2719 |
+ pdf.objstms[i]->streambuf = NULL; |
|
2720 |
+ } |
|
2721 |
+ free(pdf.objstms[i]); |
|
2722 |
+ pdf.objstms[i] = NULL; |
|
2723 |
+ } |
|
2724 |
+ } |
|
2725 |
+ free(pdf.objstms); |
|
2726 |
+ pdf.objstms = NULL; |
|
2727 |
+ } |
|
2728 |
+ |
|
2729 |
+ if (NULL != pdf.objs) { |
|
2730 |
+ for (i = 0; i < pdf.nobjs; i++) { |
|
2731 |
+ if (NULL != pdf.objs[i]) { |
|
2732 |
+ free(pdf.objs[i]); |
|
2733 |
+ pdf.objs[i] = NULL; |
|
2734 |
+ } |
|
2735 |
+ } |
|
2736 |
+ free(pdf.objs); |
|
2737 |
+ pdf.objs = NULL; |
|
2738 |
+ } |
|
2739 |
+ if (pdf.fileID) { |
|
2740 |
+ free(pdf.fileID); |
|
2741 |
+ pdf.fileID = NULL; |
|
2742 |
+ } |
|
2743 |
+ if (pdf.key) { |
|
2744 |
+ free(pdf.key); |
|
2745 |
+ pdf.key = NULL; |
|
2746 |
+ } |
|
2718 | 2747 |
|
2719 | 2748 |
/* PDF hooks may abort, don't return CL_BREAK to caller! */ |
2720 |
- return rc == CL_BREAK ? CL_CLEAN : rc; |
|
2749 |
+ rc = (rc == CL_BREAK) ? CL_CLEAN : rc; |
|
2750 |
+ |
|
2751 |
+ cli_dbgmsg("cli_pdf: returning %d\n", rc); |
|
2752 |
+ return rc; |
|
2721 | 2753 |
} |
2722 | 2754 |
|
2723 |
-/* |
|
2724 |
- * Find the start of the next line |
|
2755 |
+/** |
|
2756 |
+ * @brief Skip the rest of the current line, and find the start of the next line. |
|
2757 |
+ * |
|
2758 |
+ * @param ptr Current offset into buffer. |
|
2759 |
+ * @param len Remaining bytes in buffer. |
|
2760 |
+ * |
|
2761 |
+ * @return const char* Address of next line, or NULL if no next line in buffer. |
|
2725 | 2762 |
*/ |
2726 | 2763 |
static const char * |
2727 | 2764 |
pdf_nextlinestart(const char *ptr, size_t len) |
... | ... |
@@ -2743,9 +3490,15 @@ pdf_nextlinestart(const char *ptr, size_t len) |
2743 | 2743 |
return ptr; |
2744 | 2744 |
} |
2745 | 2745 |
|
2746 |
-/* |
|
2747 |
- * Return the start of the next PDF object. |
|
2746 |
+/** |
|
2747 |
+ * @brief Return the start of the next PDF object. |
|
2748 |
+ * |
|
2748 | 2749 |
* This assumes that we're not in a stream. |
2750 |
+ * |
|
2751 |
+ * @param ptr Current offset into buffer. |
|
2752 |
+ * @param len Remaining bytes in buffer. |
|
2753 |
+ * |
|
2754 |
+ * @return const char* Address of next object in the buffer, or NULL if there is none in the buffer. |
|
2749 | 2755 |
*/ |
2750 | 2756 |
static const char * |
2751 | 2757 |
pdf_nextobject(const char *ptr, size_t len) |
... | ... |
@@ -3078,10 +3831,13 @@ static void Author_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfnam |
3078 | 3078 |
return; |
3079 | 3079 |
|
3080 | 3080 |
if (!(pdf->stats.author)) { |
3081 |
+ const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf) |
|
3082 |
+ : (const char *)(obj->start + pdf->map); |
|
3083 |
+ |
|
3081 | 3084 |
pdf->stats.author = cli_calloc(1, sizeof(struct pdf_stats_entry)); |
3082 | 3085 |
if (!(pdf->stats.author)) |
3083 | 3086 |
return; |
3084 |
- pdf->stats.author->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author", NULL, &(pdf->stats.author->meta)); |
|
3087 |
+ pdf->stats.author->data = pdf_parse_string(pdf, obj, objstart, obj_size(pdf, obj, 1), "/Author", NULL, &(pdf->stats.author->meta)); |
|
3085 | 3088 |
} |
3086 | 3089 |
} |
3087 | 3090 |
#endif |
... | ... |
@@ -3098,10 +3854,13 @@ static void Creator_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfna |
3098 | 3098 |
return; |
3099 | 3099 |
|
3100 | 3100 |
if (!(pdf->stats.creator)) { |
3101 |
+ const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf) |
|
3102 |
+ : (const char *)(obj->start + pdf->map); |
|
3103 |
+ |
|
3101 | 3104 |
pdf->stats.creator = cli_calloc(1, sizeof(struct pdf_stats_entry)); |
3102 | 3105 |
if (!(pdf->stats.creator)) |
3103 | 3106 |
return; |
3104 |
- pdf->stats.creator->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator", NULL, &(pdf->stats.creator->meta)); |
|
3107 |
+ pdf->stats.creator->data = pdf_parse_string(pdf, obj, objstart, obj_size(pdf, obj, 1), "/Creator", NULL, &(pdf->stats.creator->meta)); |
|
3105 | 3108 |
} |
3106 | 3109 |
} |
3107 | 3110 |
#endif |
... | ... |
@@ -3118,10 +3877,13 @@ static void ModificationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, str |
3118 | 3118 |
return; |
3119 | 3119 |
|
3120 | 3120 |
if (!(pdf->stats.modificationdate)) { |
3121 |
+ const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf) |
|
3122 |
+ : (const char *)(obj->start + pdf->map); |
|
3123 |
+ |
|
3121 | 3124 |
pdf->stats.modificationdate = cli_calloc(1, sizeof(struct pdf_stats_entry)); |
3122 | 3125 |
if (!(pdf->stats.modificationdate)) |
3123 | 3126 |
return; |
3124 |
- pdf->stats.modificationdate->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate", NULL, &(pdf->stats.modificationdate->meta)); |
|
3127 |
+ pdf->stats.modificationdate->data = pdf_parse_string(pdf, obj, objstart, obj_size(pdf, obj, 1), "/ModDate", NULL, &(pdf->stats.modificationdate->meta)); |
|
3125 | 3128 |
} |
3126 | 3129 |
} |
3127 | 3130 |
#endif |
... | ... |
@@ -3138,10 +3900,13 @@ static void CreationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct |
3138 | 3138 |
return; |
3139 | 3139 |
|
3140 | 3140 |
if (!(pdf->stats.creationdate)) { |
3141 |
+ const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf) |
|
3142 |
+ : (const char *)(obj->start + pdf->map); |
|
3143 |
+ |
|
3141 | 3144 |
pdf->stats.creationdate = cli_calloc(1, sizeof(struct pdf_stats_entry)); |
3142 | 3145 |
if (!(pdf->stats.creationdate)) |
3143 | 3146 |
return; |
3144 |
- pdf->stats.creationdate->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate", NULL, &(pdf->stats.creationdate->meta)); |
|
3147 |
+ pdf->stats.creationdate->data = pdf_parse_string(pdf, obj, objstart, obj_size(pdf, obj, 1), "/CreationDate", NULL, &(pdf->stats.creationdate->meta)); |
|
3145 | 3148 |
} |
3146 | 3149 |
} |
3147 | 3150 |
#endif |
... | ... |
@@ -3158,10 +3923,13 @@ static void Producer_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfn |
3158 | 3158 |
return; |
3159 | 3159 |
|
3160 | 3160 |
if (!(pdf->stats.producer)) { |
3161 |
+ const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf) |
|
3162 |
+ : (const char *)(obj->start + pdf->map); |
|
3163 |
+ |
|
3161 | 3164 |
pdf->stats.producer = cli_calloc(1, sizeof(struct pdf_stats_entry)); |
3162 | 3165 |
if (!(pdf->stats.producer)) |
3163 | 3166 |
return; |
3164 |
- pdf->stats.producer->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer", NULL, &(pdf->stats.producer->meta)); |
|
3167 |
+ pdf->stats.producer->data = pdf_parse_string(pdf, obj, objstart, obj_size(pdf, obj, 1), "/Producer", NULL, &(pdf->stats.producer->meta)); |
|
3165 | 3168 |
} |
3166 | 3169 |
} |
3167 | 3170 |
#endif |
... | ... |
@@ -3178,10 +3946,13 @@ static void Title_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname |
3178 | 3178 |
return; |
3179 | 3179 |
|
3180 | 3180 |
if (!(pdf->stats.title)) { |
3181 |
+ const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf) |
|
3182 |
+ : (const char *)(obj->start + pdf->map); |
|
3183 |
+ |
|
3181 | 3184 |
pdf->stats.title = cli_calloc(1, sizeof(struct pdf_stats_entry)); |
3182 | 3185 |
if (!(pdf->stats.title)) |
3183 | 3186 |
return; |
3184 |
- pdf->stats.title->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title", NULL, &(pdf->stats.title->meta)); |
|
3187 |
+ pdf->stats.title->data = pdf_parse_string(pdf, obj, objstart, obj_size(pdf, obj, 1), "/Title", NULL, &(pdf->stats.title->meta)); |
|
3185 | 3188 |
} |
3186 | 3189 |
} |
3187 | 3190 |
#endif |
... | ... |
@@ -3198,10 +3969,13 @@ static void Keywords_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfn |
3198 | 3198 |
return; |
3199 | 3199 |
|
3200 | 3200 |
if (!(pdf->stats.keywords)) { |
3201 |
+ const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf) |
|
3202 |
+ : (const char *)(obj->start + pdf->map); |
|
3203 |
+ |
|
3201 | 3204 |
pdf->stats.keywords = cli_calloc(1, sizeof(struct pdf_stats_entry)); |
3202 | 3205 |
if (!(pdf->stats.keywords)) |
3203 | 3206 |
return; |
3204 |
- pdf->stats.keywords->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords", NULL, &(pdf->stats.keywords->meta)); |
|
3207 |
+ pdf->stats.keywords->data = pdf_parse_string(pdf, obj, objstart, obj_size(pdf, obj, 1), "/Keywords", NULL, &(pdf->stats.keywords->meta)); |
|
3205 | 3208 |
} |
3206 | 3209 |
} |
3207 | 3210 |
#endif |
... | ... |
@@ -3218,10 +3992,13 @@ static void Subject_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfna |
3218 | 3218 |
return; |
3219 | 3219 |
|
3220 | 3220 |
if (!(pdf->stats.subject)) { |
3221 |
+ const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf) |
|
3222 |
+ : (const char *)(obj->start + pdf->map); |
|
3223 |
+ |
|
3221 | 3224 |
pdf->stats.subject = cli_calloc(1, sizeof(struct pdf_stats_entry)); |
3222 | 3225 |
if (!(pdf->stats.subject)) |
3223 | 3226 |
return; |
3224 |
- pdf->stats.subject->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject", NULL, &(pdf->stats.subject->meta)); |
|
3227 |
+ pdf->stats.subject->data = pdf_parse_string(pdf, obj, objstart, obj_size(pdf, obj, 1), "/Subject", NULL, &(pdf->stats.subject->meta)); |
|
3225 | 3228 |
} |
3226 | 3229 |
} |
3227 | 3230 |
#endif |
... | ... |
@@ -3269,9 +4046,10 @@ static void XFA_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_a |
3269 | 3269 |
static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act) |
3270 | 3270 |
{ |
3271 | 3271 |
struct pdf_array *array; |
3272 |
- const char *objstart = (const char *)(obj->start + pdf->map); |
|
3272 |
+ const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf) |
|
3273 |
+ : (const char *)(obj->start + pdf->map); |
|
3273 | 3274 |
const char *begin; |
3274 |
- unsigned int objsz; |
|
3275 |
+ unsigned int objsize; |
|
3275 | 3276 |
unsigned long npages=0, count; |
3276 | 3277 |
struct pdf_array_node *node; |
3277 | 3278 |
json_object *pdfobj; |
... | ... |
@@ -3284,19 +4062,19 @@ static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname |
3284 | 3284 |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES)) |
3285 | 3285 |
return; |
3286 | 3286 |
|
3287 |
- objsz = obj_size(pdf, obj, 1); |
|
3287 |
+ objsize = obj_size(pdf, obj, 1); |
|
3288 | 3288 |
|
3289 | 3289 |
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats"); |
3290 | 3290 |
if (!(pdfobj)) |
3291 | 3291 |
return; |
3292 | 3292 |
|
3293 |
- begin = cli_memstr(objstart, objsz, "/Kids", 5); |
|
3293 |
+ begin = cli_memstr(objstart, objsize, "/Kids", 5); |
|
3294 | 3294 |
if (!(begin)) |
3295 | 3295 |
return; |
3296 | 3296 |
|
3297 | 3297 |
begin += 5; |
3298 | 3298 |
|
3299 |
- array = pdf_parse_array(pdf, obj, objsz, (char *)begin, NULL); |
|
3299 |
+ array = pdf_parse_array(pdf, obj, objsize, (char *)begin, NULL); |
|
3300 | 3300 |
if (!(array)) { |
3301 | 3301 |
cli_jsonbool(pdfobj, "IncorrectPagesCount", 1); |
3302 | 3302 |
return; |
... | ... |
@@ -3307,21 +4085,21 @@ static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname |
3307 | 3307 |
if (strchr((char *)(node->data), 'R')) |
3308 | 3308 |
npages++; |
3309 | 3309 |
|
3310 |
- begin = cli_memstr(obj->start + pdf->map, objsz, "/Count", 6); |
|
3310 |
+ begin = cli_memstr(objstart, objsize, "/Count", 6); |
|
3311 | 3311 |
if (!(begin)) { |
3312 | 3312 |
cli_jsonbool(pdfobj, "IncorrectPagesCount", 1); |
3313 | 3313 |
goto cleanup; |
3314 | 3314 |
} |
3315 | 3315 |
|
3316 | 3316 |
begin += 6; |
3317 |
- while (begin - objstart < objsz && isspace(begin[0])) |
|
3317 |
+ while (begin - objstart < objsize && isspace(begin[0])) |
|
3318 | 3318 |
begin++; |
3319 | 3319 |
|
3320 |
- if (begin - objstart >= objsz) { |
|
3320 |
+ if (begin - objstart >= objsize) { |
|
3321 | 3321 |
goto cleanup; |
3322 | 3322 |
} |
3323 | 3323 |
|
3324 |
- if ((CL_SUCCESS != cli_strntoul_wrap(begin, (size_t)(obj->start + pdf->map + objsz - begin), 0, 10, &count)) || |
|
3324 |
+ if ((CL_SUCCESS != cli_strntoul_wrap(begin, (size_t)(obj->start + pdf->map + objsize - begin), 0, 10, &count)) || |
|
3325 | 3325 |
(count != npages)) { |
3326 | 3326 |
cli_jsonbool(pdfobj, "IncorrectPagesCount", 1); |
3327 | 3327 |
} |
... | ... |
@@ -3336,8 +4114,10 @@ static void Colors_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfnam |
3336 | 3336 |
{ |
3337 | 3337 |
json_object *colorsobj, *pdfobj; |
3338 | 3338 |
unsigned long ncolors; |
3339 |
- char *start, *p1; |
|
3340 |
- size_t objsz; |
|
3339 |
+ char *p1; |
|
3340 |
+ const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf) |
|
3341 |
+ : (const char *)(obj->start + pdf->map); |
|
3342 |
+ size_t objsize; |
|
3341 | 3343 |
|
3342 | 3344 |
UNUSEDPARAM(act); |
3343 | 3345 |
|
... | ... |
@@ -3347,27 +4127,25 @@ static void Colors_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfnam |
3347 | 3347 |
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES)) |
3348 | 3348 |
return; |
3349 | 3349 |
|
3350 |
- objsz = obj_size(pdf, obj, 1); |
|
3351 |
- |
|
3352 |
- start = (char *)(obj->start + pdf->map); |
|
3350 |
+ objsize = obj_size(pdf, obj, 1); |
|
3353 | 3351 |
|
3354 |
- p1 = (char *)cli_memstr(start, objsz, "/Colors", 7); |
|
3352 |
+ p1 = (char *)cli_memstr(objstart, objsize, "/Colors", 7); |
|
3355 | 3353 |
if (!(p1)) |
3356 | 3354 |
return; |
3357 | 3355 |
|
3358 | 3356 |
p1 += 7; |
3359 | 3357 |
|
3360 | 3358 |
/* Ensure that we have at least one whitespace character plus at least one number */ |
3361 |
- if (objsz - (p1 - start) < 2) |
|
3359 |
+ if (objsize - (p1 - objstart) < 2) |
|
3362 | 3360 |
return; |
3363 | 3361 |
|
3364 |
- while (p1 - start < objsz && isspace(p1[0])) |
|
3362 |
+ while (p1 - objstart < objsize && isspace(p1[0])) |
|
3365 | 3363 |
p1++; |
3366 | 3364 |
|
3367 |
- if ((size_t)(p1 - start) == objsz) |
|
3365 |
+ if ((size_t)(p1 - objstart) == objsize) |
|
3368 | 3366 |
return; |
3369 | 3367 |
|
3370 |
- if (CL_SUCCESS != cli_strntoul_wrap(p1, (size_t)((p1 - start) - objsz), 0, 10, &ncolors)) |
|
3368 |
+ if (CL_SUCCESS != cli_strntoul_wrap(p1, (size_t)((p1 - objstart) - objsize), 0, 10, &ncolors)) |
|
3371 | 3369 |
return; |
3372 | 3370 |
|
3373 | 3371 |
/* We only care if the number of colors > 2**24 */ |
... | ... |
@@ -3651,14 +4429,14 @@ static void pdf_export_json(struct pdf_struct *pdf) |
3651 | 3651 |
} |
3652 | 3652 |
|
3653 | 3653 |
for (i=0; i < pdf->nobjs; i++) { |
3654 |
- if (pdf->objs[i].flags & (1<<OBJ_TRUNCATED)) { |
|
3654 |
+ if (pdf->objs[i]->flags & (1<<OBJ_TRUNCATED)) { |
|
3655 | 3655 |
json_object *truncobj; |
3656 | 3656 |
|
3657 | 3657 |
truncobj = cli_jsonarray(pdfobj, "TruncatedObjects"); |
3658 | 3658 |
if (!(truncobj)) |
3659 | 3659 |
continue; |
3660 | 3660 |
|
3661 |
- cli_jsonint_array(truncobj, pdf->objs[i].id>>8); |
|
3661 |
+ cli_jsonint_array(truncobj, pdf->objs[i]->id >> 8); |
|
3662 | 3662 |
} |
3663 | 3663 |
} |
3664 | 3664 |
|
... | ... |
@@ -24,13 +24,26 @@ |
24 | 24 |
#include "others.h" |
25 | 25 |
#define PDF_FILTERLIST_MAX 64 |
26 | 26 |
|
27 |
+struct objstm_struct { |
|
28 |
+ uint32_t first; // offset of first obj |
|
29 |
+ uint32_t current; // offset of current obj |
|
30 |
+ uint32_t current_pair; // offset of current pair describing id, location of object |
|
31 |
+ uint32_t length; // total length of all objects (starting at first) |
|
32 |
+ uint32_t n; // number of objects that should be found in the object stream |
|
33 |
+ uint32_t nobjs_found; // number of objects actually found in the object stream |
|
34 |
+ char *streambuf; // address of stream buffer, beginning with first obj pair |
|
35 |
+ size_t streambuf_len; // length of stream buffer, includes pairs followed by actual objects |
|
36 |
+}; |
|
37 |
+ |
|
27 | 38 |
struct pdf_obj { |
28 | 39 |
uint32_t start; |
40 |
+ int32_t size; |
|
29 | 41 |
uint32_t id; |
30 | 42 |
uint32_t flags; |
31 | 43 |
uint32_t statsflags; |
32 | 44 |
uint32_t numfilters; |
33 | 45 |
uint32_t filterlist[PDF_FILTERLIST_MAX]; |
46 |
+ struct objstm_struct *objstm; // Should be NULL unless the obj exists in an object stream (separate buffer) |
|
34 | 47 |
char *path; |
35 | 48 |
}; |
36 | 49 |
|
... | ... |
@@ -124,7 +137,7 @@ enum enc_method { |
124 | 124 |
}; |
125 | 125 |
|
126 | 126 |
struct pdf_struct { |
127 |
- struct pdf_obj *objs; |
|
127 |
+ struct pdf_obj **objs; |
|
128 | 128 |
unsigned nobjs; |
129 | 129 |
unsigned flags; |
130 | 130 |
unsigned enc_method_stream; |
... | ... |
@@ -145,6 +158,8 @@ struct pdf_struct { |
145 | 145 |
char *key; |
146 | 146 |
unsigned keylen; |
147 | 147 |
struct pdf_stats stats; |
148 |
+ struct objstm_struct **objstms; |
|
149 |
+ uint32_t nobjstms; |
|
148 | 150 |
}; |
149 | 151 |
|
150 | 152 |
#define OBJ_FLAG_PDFNAME_NONE 0x0 |
... | ... |
@@ -156,7 +171,7 @@ struct pdf_struct { |
156 | 156 |
int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset); |
157 | 157 |
void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj); |
158 | 158 |
int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags); |
159 |
-int pdf_findobj(struct pdf_struct *pdf); |
|
159 |
+cl_error_t pdf_findobj(struct pdf_struct *pdf); |
|
160 | 160 |
struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid); |
161 | 161 |
|
162 | 162 |
void pdf_handle_enc(struct pdf_struct *pdf); |
... | ... |
@@ -166,13 +181,16 @@ enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key |
166 | 166 |
|
167 | 167 |
void pdfobj_flag(struct pdf_struct *pdf, struct pdf_obj *obj, enum pdf_flag flag); |
168 | 168 |
char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len); |
169 |
-char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *stats); |
|
170 |
-struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar); |
|
171 |
-struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar); |
|
169 |
+char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *meta); |
|
170 |
+struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar); |
|
171 |
+struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar); |
|
172 | 172 |
int is_object_reference(char *begin, char **endchar, uint32_t *id); |
173 | 173 |
void pdf_free_dict(struct pdf_dict *dict); |
174 | 174 |
void pdf_free_array(struct pdf_array *array); |
175 | 175 |
void pdf_print_dict(struct pdf_dict *dict, unsigned long depth); |
176 | 176 |
void pdf_print_array(struct pdf_array *array, unsigned long depth); |
177 | 177 |
|
178 |
+cl_error_t pdf_find_and_extract_objs(struct pdf_struct *pdf, uint32_t *alerts); |
|
179 |
+cl_error_t pdf_find_and_parse_objs_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm); |
|
180 |
+ |
|
178 | 181 |
#endif |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
/* |
2 |
- * Copyright (C) 2016-2017 Cisco and/or its affiliates. All rights reserved. |
|
2 |
+ * Copyright (C) 2016-2018 Cisco and/or its affiliates. All rights reserved. |
|
3 | 3 |
* |
4 | 4 |
* Author: Kevin Lin |
5 | 5 |
* |
... | ... |
@@ -37,6 +37,7 @@ |
37 | 37 |
#endif |
38 | 38 |
|
39 | 39 |
#include <stdio.h> |
40 |
+#include <stddef.h> |
|
40 | 41 |
#include <sys/types.h> |
41 | 42 |
#include <sys/stat.h> |
42 | 43 |
#include <ctype.h> |
... | ... |
@@ -75,26 +76,57 @@ struct pdf_token { |
75 | 75 |
uint8_t *content; /* content stream */ |
76 | 76 |
}; |
77 | 77 |
|
78 |
-static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token); |
|
79 |
-static int pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token, int lvl); |
|
78 |
+static ptrdiff_t pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int fout, cl_error_t *status, struct objstm_struct *objstm); |
|
79 |
+static cl_error_t pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token, int lvl); |
|
80 |
+ |
|
81 |
+static cl_error_t filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token); |
|
82 |
+static cl_error_t filter_rldecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token); |
|
83 |
+static cl_error_t filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token); |
|
84 |
+static cl_error_t filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token); |
|
85 |
+static cl_error_t filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int mode); |
|
86 |
+static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token); |
|
87 |
+ |
|
88 |
+/** |
|
89 |
+ * @brief Wrapper function for pdf_decodestream_internal. |
|
90 |
+ * |
|
91 |
+ * Allocate a token object to store decoded filter data. |
|
92 |
+ * Parse/decode the filter data and scan it. |
|
93 |
+ * |
|
94 |
+ * @param pdf Pdf context structure. |
|
95 |
+ * @param obj The object we found the filter content in. |
|
96 |
+ * @param params (optional) Dictionary parameters describing the filter data. |
|
97 |
+ * @param stream Filter stream buffer pointer. |
|
98 |
+ * @param streamlen Length of filter stream buffer. |
|
99 |
+ * @param xref Indicates if the stream is an /XRef stream. Do not apply forced decryption on /XRef streams. |
|
100 |
+ * @param fout File descriptor to write to to be scanned. |
|
101 |
+ * @param[out] rc Return code () |
|
102 |
+ * @param objstm (optional) Object stream context structure. |
|
103 |
+ * @return ptrdiff_t The number of bytes written to 'fout' to be scanned. -1 if failed out. |
|
104 |
+ */ |
|
105 |
+ptrdiff_t pdf_decodestream( |
|
106 |
+ struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, |
|
107 |
+ const char *stream, uint32_t streamlen, int xref, int fout, cl_error_t *status, |
|
108 |
+ struct objstm_struct *objstm) |
|
109 |
+{ |
|
110 |
+ struct pdf_token *token = NULL; |
|
111 |
+ ptrdiff_t bytes_scanned = -1; |
|
112 |
+ cl_error_t retval = CL_SUCCESS; |
|
80 | 113 |
|
81 |
-static int filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token); |
|
82 |
-static int filter_rldecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token); |
|
83 |
-static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token); |
|
84 |
-static int filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token); |
|
85 |
-static int filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int mode); |
|
86 |
-static int filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token); |
|
114 |
+ if (!status) { |
|
115 |
+ /* invalid args, and no way to pass back the status code */ |
|
116 |
+ return -1; |
|
117 |
+ } |
|
87 | 118 |
|
88 |
-ptrdiff_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, const char *stream, uint32_t streamlen, int xref, int fout, int *rc) |
|
89 |
-{ |
|
90 |
- struct pdf_token *token; |
|
91 |
- ptrdiff_t rv; |
|
119 |
+ if (!pdf || !obj) { |
|
120 |
+ /* Invalid args */ |
|
121 |
+ retval = CL_EARG; |
|
122 |
+ goto done; |
|
123 |
+ } |
|
92 | 124 |
|
93 | 125 |
if (!stream || !streamlen || fout < 0) { |
94 |
- cli_dbgmsg("cli_pdf: no filters or stream on obj %u %u\n", obj->id>>8, obj->id&0xff); |
|
95 |
- if (rc) |
|
96 |
- *rc = CL_ENULLARG; |
|
97 |
- return -1; |
|
126 |
+ cli_dbgmsg("pdf_decodestream: no filters or stream on obj %u %u\n", obj->id>>8, obj->id&0xff); |
|
127 |
+ retval = CL_ENULLARG; |
|
128 |
+ goto done; |
|
98 | 129 |
} |
99 | 130 |
|
100 | 131 |
#if 0 |
... | ... |
@@ -104,9 +136,8 @@ ptrdiff_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct p |
104 | 104 |
|
105 | 105 |
token = cli_malloc(sizeof(struct pdf_token)); |
106 | 106 |
if (!token) { |
107 |
- if (rc) |
|
108 |
- *rc = CL_EMEM; |
|
109 |
- return -1; |
|
107 |
+ retval = CL_EMEM; |
|
108 |
+ goto done; |
|
110 | 109 |
} |
111 | 110 |
|
112 | 111 |
token->flags = 0; |
... | ... |
@@ -118,69 +149,110 @@ ptrdiff_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct p |
118 | 118 |
token->content = cli_malloc(streamlen); |
119 | 119 |
if (!token->content) { |
120 | 120 |
free(token); |
121 |
- if (rc) |
|
122 |
- *rc = CL_EMEM; |
|
123 |
- return -1; |
|
121 |
+ retval = CL_EMEM; |
|
122 |
+ goto done; |
|
124 | 123 |
} |
125 | 124 |
memcpy(token->content, stream, streamlen); |
126 | 125 |
token->length = streamlen; |
127 | 126 |
|
128 |
- cli_dbgmsg("cli_pdf: detected %lu applied filters\n", (long unsigned)(obj->numfilters)); |
|
127 |
+ cli_dbgmsg("pdf_decodestream: detected %lu applied filters\n", (long unsigned)(obj->numfilters)); |
|
129 | 128 |
|
130 |
- rv = (ptrdiff_t)pdf_decodestream_internal(pdf, obj, params, token); |
|
131 |
- /* return is generally ignored */ |
|
132 |
- if (rc) { |
|
133 |
- if (rv == CL_VIRUS) |
|
134 |
- *rc = CL_VIRUS; |
|
135 |
- else |
|
136 |
- *rc = CL_SUCCESS; |
|
137 |
- } |
|
138 |
- |
|
139 |
- if (token->success) { |
|
140 |
- if (!cli_checklimits("pdf", pdf->ctx, token->length, 0, 0)) { |
|
141 |
- if (cli_writen(fout, token->content, token->length) != token->length) { |
|
142 |
- cli_errmsg("cli_pdf: failed to write output file\n"); |
|
143 |
- if (rc) |
|
144 |
- *rc = CL_EWRITE; |
|
145 |
- return -1; |
|
146 |
- } |
|
147 |
- rv = token->length; |
|
148 |
- } |
|
149 |
- } else { /* if no non-forced filter are decoded, return the raw stream */ |
|
129 |
+ bytes_scanned = pdf_decodestream_internal(pdf, obj, params, token, fout, &retval, objstm); |
|
130 |
+ /* |
|
131 |
+ * Pass back the return value, though we really only care |
|
132 |
+ * if it is CV_VIRUS or CL_SUCCESS. |
|
133 |
+ */ |
|
134 |
+ if (retval == CL_VIRUS) |
|
135 |
+ retval = CL_VIRUS; |
|
136 |
+ else |
|
137 |
+ retval = CL_SUCCESS; |
|
138 |
+ |
|
139 |
+ if (!token->success) { |
|
140 |
+ /* |
|
141 |
+ * If it was successful, the internal() function calls cli_writen() |
|
142 |
+ * However, in this case... no non-forced filter are decoded, |
|
143 |
+ * so return the raw stream. |
|
144 |
+ */ |
|
150 | 145 |
if (!cli_checklimits("pdf", pdf->ctx, streamlen, 0, 0)) { |
151 |
- cli_dbgmsg("cli_pdf: no non-forced filters decoded, returning raw stream\n"); |
|
146 |
+ cli_dbgmsg("pdf_decodestream: no non-forced filters decoded, returning raw stream\n"); |
|
152 | 147 |
|
153 | 148 |
if (cli_writen(fout, stream, streamlen) != streamlen) { |
154 |
- cli_errmsg("cli_pdf: failed to write output file\n"); |
|
155 |
- if (rc) |
|
156 |
- *rc = CL_EWRITE; |
|
157 |
- return -1; |
|
149 |
+ cli_errmsg("pdf_decodestream: failed to write output file\n"); |
|
150 |
+ retval = CL_EWRITE; |
|
151 |
+ bytes_scanned = -1; |
|
152 |
+ goto done; |
|
158 | 153 |
} |
159 |
- rv = streamlen; |
|
154 |
+ bytes_scanned = streamlen; |
|
160 | 155 |
} |
161 | 156 |
} |
162 | 157 |
|
163 |
- free(token->content); |
|
164 |
- free(token); |
|
165 |
- return rv; |
|
158 |
+done: |
|
159 |
+ *status = retval; |
|
160 |
+ |
|
161 |
+ /* |
|
162 |
+ * Free up the token, and token content, if any. |
|
163 |
+ */ |
|
164 |
+ if (NULL != token) |
|
165 |
+ { |
|
166 |
+ if (NULL != token->content) { |
|
167 |
+ free(token->content); |
|
168 |
+ token->content = NULL; |
|
169 |
+ token->length = 0; |
|
170 |
+ } |
|
171 |
+ free(token); |
|
172 |
+ token = NULL; |
|
173 |
+ } |
|
174 |
+ |
|
175 |
+ return bytes_scanned; |
|
166 | 176 |
} |
167 | 177 |
|
168 |
-static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token) |
|
178 |
+/** |
|
179 |
+ * @brief Decode filter buffer data. |
|
180 |
+ * |
|
181 |
+ * Attempt to decompress, decrypt or otherwise parse it. |
|
182 |
+ * |
|
183 |
+ * @param pdf Pdf context structure. |
|
184 |
+ * @param obj The object we found the filter content in. |
|
185 |
+ * @param params (optional) Dictionary parameters describing the filter data. |
|
186 |
+ * @param token Pointer to and length of filter data. |
|
187 |
+ * @param fout File handle to write data to to be scanned. |
|
188 |
+ * @param[out] status CL_CLEAN/CL_SUCCESS or CL_VIRUS/CL_E<error> |
|
189 |
+ * @param objstm (optional) Object stream context structure. |
|
190 |
+ * @return ptrdiff_t The number of bytes we wrote to 'fout'. -1 if failed out. |
|
191 |
+ */ |
|
192 |
+static ptrdiff_t pdf_decodestream_internal( |
|
193 |
+ struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, |
|
194 |
+ struct pdf_token *token, int fout, cl_error_t *status, struct objstm_struct *objstm) |
|
169 | 195 |
{ |
196 |
+ cl_error_t vir = CL_CLEAN; |
|
197 |
+ cl_error_t retval = CL_SUCCESS; |
|
198 |
+ ptrdiff_t bytes_scanned = -1; |
|
170 | 199 |
const char *filter = NULL; |
171 |
- int i, vir = 0, rc = CL_SUCCESS; |
|
200 |
+ int i; |
|
172 | 201 |
|
202 |
+ if (!status) { |
|
203 |
+ /* invalid args, and no way to pass back the status code */ |
|
204 |
+ return -1; |
|
205 |
+ } |
|
206 |
+ |
|
207 |
+ if (!pdf || !obj || !token) { |
|
208 |
+ /* Invalid args */ |
|
209 |
+ retval = CL_EARG; |
|
210 |
+ goto done; |
|
211 |
+ } |
|
212 |
+ |
|
173 | 213 |
/* |
174 | 214 |
* if pdf is decryptable, scan for CRYPT filter |
175 | 215 |
* if none, force a DECRYPT filter application |
176 | 216 |
*/ |
177 | 217 |
if ((pdf->flags & (1 << DECRYPTABLE_PDF)) && !(obj->flags & (1 << OBJ_FILTER_CRYPT))) { |
178 | 218 |
if (token->flags & PDFTOKEN_FLAG_XREF) /* TODO: is this on all crypt filters or only the assumed one? */ |
179 |
- cli_dbgmsg("cli_pdf: skipping decoding => non-filter CRYPT (reason: xref)\n"); |
|
219 |
+ cli_dbgmsg("pdf_decodestream_internal: skipping decoding => non-filter CRYPT (reason: xref)\n"); |
|
180 | 220 |
else { |
181 |
- cli_dbgmsg("cli_pdf: decoding => non-filter CRYPT\n"); |
|
182 |
- if ((rc = filter_decrypt(pdf, obj, params, token, 1)) != CL_SUCCESS) { |
|
183 |
- return rc; |
|
221 |
+ cli_dbgmsg("pdf_decodestream_internal: decoding => non-filter CRYPT\n"); |
|
222 |
+ retval = filter_decrypt(pdf, obj, params, token, 1); |
|
223 |
+ if (retval != CL_SUCCESS) { |
|
224 |
+ goto done; |
|
184 | 225 |
} |
185 | 226 |
} |
186 | 227 |
} |
... | ... |
@@ -188,33 +260,33 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj |
188 | 188 |
for (i = 0; i < obj->numfilters; i++) { |
189 | 189 |
switch(obj->filterlist[i]) { |
190 | 190 |
case OBJ_FILTER_A85: |
191 |
- cli_dbgmsg("cli_pdf: decoding [%d] => ASCII85DECODE\n", obj->filterlist[i]); |
|
192 |
- rc = filter_ascii85decode(pdf, obj, token); |
|
191 |
+ cli_dbgmsg("pdf_decodestream_internal: decoding [%d] => ASCII85DECODE\n", obj->filterlist[i]); |
|
192 |
+ retval = filter_ascii85decode(pdf, obj, token); |
|
193 | 193 |
break; |
194 | 194 |
|
195 | 195 |
case OBJ_FILTER_RL: |
196 |
- cli_dbgmsg("cli_pdf: decoding [%d] => RLDECODE\n", obj->filterlist[i]); |
|
197 |
- rc = filter_rldecode(pdf, obj, token); |
|
196 |
+ cli_dbgmsg("pdf_decodestream_internal: decoding [%d] => RLDECODE\n", obj->filterlist[i]); |
|
197 |
+ retval = filter_rldecode(pdf, obj, token); |
|
198 | 198 |
break; |
199 | 199 |
|
200 | 200 |
case OBJ_FILTER_FLATE: |
201 |
- cli_dbgmsg("cli_pdf: decoding [%d] => FLATEDECODE\n", obj->filterlist[i]); |
|
202 |
- rc = filter_flatedecode(pdf, obj, params, token); |
|
201 |
+ cli_dbgmsg("pdf_decodestream_internal: decoding [%d] => FLATEDECODE\n", obj->filterlist[i]); |
|
202 |
+ retval = filter_flatedecode(pdf, obj, params, token); |
|
203 | 203 |
break; |
204 | 204 |
|
205 | 205 |
case OBJ_FILTER_AH: |
206 |
- cli_dbgmsg("cli_pdf: decoding [%d] => ASCIIHEXDECODE\n", obj->filterlist[i]); |
|
207 |
- rc = filter_asciihexdecode(pdf, obj, token); |
|
206 |
+ cli_dbgmsg("pdf_decodestream_internal: decoding [%d] => ASCIIHEXDECODE\n", obj->filterlist[i]); |
|
207 |
+ retval = filter_asciihexdecode(pdf, obj, token); |
|
208 | 208 |
break; |
209 | 209 |
|
210 | 210 |
case OBJ_FILTER_CRYPT: |
211 |
- cli_dbgmsg("cli_pdf: decoding [%d] => CRYPT\n", obj->filterlist[i]); |
|
212 |
- rc = filter_decrypt(pdf, obj, params, token, 0); |
|
211 |
+ cli_dbgmsg("pdf_decodestream_internal: decoding [%d] => CRYPT\n", obj->filterlist[i]); |
|
212 |
+ retval = filter_decrypt(pdf, obj, params, token, 0); |
|
213 | 213 |
break; |
214 | 214 |
|
215 | 215 |
case OBJ_FILTER_LZW: |
216 |
- cli_dbgmsg("cli_pdf: decoding [%d] => LZWDECODE\n", obj->filterlist[i]); |
|
217 |
- rc = filter_lzwdecode(pdf, obj, params, token); |
|
216 |
+ cli_dbgmsg("pdf_decodestream_internal: decoding [%d] => LZWDECODE\n", obj->filterlist[i]); |
|
217 |
+ retval = filter_lzwdecode(pdf, obj, params, token); |
|
218 | 218 |
break; |
219 | 219 |
|
220 | 220 |
case OBJ_FILTER_JPX: |
... | ... |
@@ -226,29 +298,29 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj |
226 | 226 |
case OBJ_FILTER_JBIG2: |
227 | 227 |
if (!filter) filter = "JBIG2DECODE"; |
228 | 228 |
|
229 |
- cli_dbgmsg("cli_pdf: unimplemented filter type [%d] => %s\n", obj->filterlist[i], filter); |
|
229 |
+ cli_dbgmsg("pdf_decodestream_internal: unimplemented filter type [%d] => %s\n", obj->filterlist[i], filter); |
|
230 | 230 |
filter = NULL; |
231 |
- rc = CL_BREAK; |
|
231 |
+ retval = CL_BREAK; |
|
232 | 232 |
break; |
233 | 233 |
|
234 | 234 |
default: |
235 |
- cli_dbgmsg("cli_pdf: unknown filter type [%d]\n", obj->filterlist[i]); |
|
236 |
- rc = CL_BREAK; |
|
235 |
+ cli_dbgmsg("pdf_decodestream_internal: unknown filter type [%d]\n", obj->filterlist[i]); |
|
236 |
+ retval = CL_BREAK; |
|
237 | 237 |
break; |
238 | 238 |
} |
239 | 239 |
|
240 | 240 |
if (!(token->content) || !(token->length)) { |
241 |
- cli_dbgmsg("cli_pdf: empty content, breaking after %d (of %lu) filters\n", |
|
242 |
- i, (long unsigned)(obj->numfilters)); |
|
241 |
+ cli_dbgmsg("pdf_decodestream_internal: empty content, breaking after %d (of %lu) filters\n", i, (long unsigned)(obj->numfilters)); |
|
243 | 242 |
break; |
244 | 243 |
} |
245 | 244 |
|
246 |
- if (rc != CL_SUCCESS) { |
|
247 |
- if (rc == CL_VIRUS && pdf->ctx->options & CL_SCAN_ALLMATCHES) |
|
248 |
- vir = 1; |
|
249 |
- else { |
|
250 |
- const char *reason; |
|
251 |
- switch (rc) { |
|
245 |
+ if (retval != CL_SUCCESS) { |
|
246 |
+ if (retval == CL_VIRUS && pdf->ctx->options & CL_SCAN_ALLMATCHES) { |
|
247 |
+ vir = CL_VIRUS; |
|
248 |
+ } else { |
|
249 |
+ const char* reason; |
|
250 |
+ |
|
251 |
+ switch (retval) { |
|
252 | 252 |
case CL_VIRUS: |
253 | 253 |
reason = "detection"; |
254 | 254 |
break; |
... | ... |
@@ -260,29 +332,89 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj |
260 | 260 |
break; |
261 | 261 |
} |
262 | 262 |
|
263 |
- cli_dbgmsg("cli_pdf: stopping after %d (of %lu) filters (reason: %s)\n", |
|
264 |
- i, (long unsigned)(obj->numfilters), reason); |
|
263 |
+ cli_dbgmsg("pdf_decodestream_internal: stopping after %d (of %lu) filters (reason: %s)\n", i, (long unsigned)(obj->numfilters), reason); |
|
265 | 264 |
break; |
266 | 265 |
} |
267 | 266 |
} |
268 | 267 |
token->success++; |
269 | 268 |
|
269 |
+ /* Dump the stream content to a text file if keeptmp is enabled. */ |
|
270 | 270 |
if (pdf->ctx->engine->keeptmp) { |
271 |
+ retval = pdf_decode_dump(pdf, obj, token, i+1); |
|
272 |
+ if (retval != CL_SUCCESS) { |
|
273 |
+ goto done; |
|
274 |
+ } |
|
275 |
+ } |
|
276 |
+ } |
|
271 | 277 |
|
272 |
- if ((rc = pdf_decode_dump(pdf, obj, token, i+1)) != CL_SUCCESS) |
|
273 |
- return rc; |
|
278 |
+ if (token->success > 0) { |
|
279 |
+ /* |
|
280 |
+ * Looks like we successfully decoded the stream, so lets write it out. |
|
281 |
+ * In the failure case, the caller will deal with the raw stream. |
|
282 |
+ */ |
|
283 |
+ if (!cli_checklimits("pdf", pdf->ctx, token->length, 0, 0)) { |
|
284 |
+ if (cli_writen(fout, token->content, token->length) != token->length) { |
|
285 |
+ cli_errmsg("pdf_decodestream_internal: failed to write output file\n"); |
|
286 |
+ retval = CL_EWRITE; |
|
287 |
+ bytes_scanned = -1; |
|
288 |
+ goto done; |
|
289 |
+ } |
|
290 |
+ bytes_scanned = token->length; |
|
274 | 291 |
} |
275 | 292 |
} |
276 | 293 |
|
277 |
- if (vir) |
|
278 |
- return CL_VIRUS; |
|
279 |
- if (rc == CL_BREAK) |
|
280 |
- return CL_SUCCESS; |
|
281 |
- return rc; |
|
294 |
+ if (NULL != objstm) |
|
295 |
+ { |
|
296 |
+ /* |
|
297 |
+ * The caller indicated that the decoded data is an object stream. |
|
298 |
+ * Perform experimental object stream parsing to extract objects from the stream. |
|
299 |
+ */ |
|
300 |
+ objstm->streambuf = (char*)token->content; |
|
301 |
+ objstm->streambuf_len = (size_t)token->length; |
|
302 |
+ |
|
303 |
+ /* Take ownership of the malloc'd buffer */ |
|
304 |
+ token->content = NULL; |
|
305 |
+ token->length = 0; |
|
306 |
+ |
|
307 |
+ int objs_found = pdf->nobjs; |
|
308 |
+ if (CL_SUCCESS != pdf_find_and_parse_objs_in_objstm(pdf, objstm)) |
|
309 |
+ { |
|
310 |
+ cli_dbgmsg("pdf_decodestream_internal: pdf_find_and_parse_objs_in_objstm failed!\n"); |
|
311 |
+ } |
|
312 |
+ |
|
313 |
+ if (pdf->nobjs <= objs_found) { |
|
314 |
+ cli_dbgmsg("pdf_decodestream_internal: pdf_find_and_parse_objs_in_objstm did not find any new objects!\n"); |
|
315 |
+ } else { |
|
316 |
+ cli_dbgmsg("pdf_decodestream_internal: pdf_find_and_parse_objs_in_objstm found %d new objects.\n", pdf->nobjs - objs_found); |
|
317 |
+ } |
|
318 |
+ } |
|
319 |
+ |
|
320 |
+done: |
|
321 |
+ |
|
322 |
+ *status = retval; |
|
323 |
+ |
|
324 |
+ if (vir == CL_VIRUS) |
|
325 |
+ *status = CL_VIRUS; |
|
326 |
+ |
|
327 |
+ if (*status == CL_BREAK) |
|
328 |
+ *status = CL_SUCCESS; |
|
329 |
+ |
|
330 |
+ return bytes_scanned; |
|
282 | 331 |
} |
283 | 332 |
|
284 |
-/* used only for intermediate dumping */ |
|
285 |
-static int pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token, int lvl) |
|
333 |
+/** |
|
334 |
+ * @brief Dump PDF filter content such as stream contents to a temp file. |
|
335 |
+ * |
|
336 |
+ * Temp file is created in the pdf->dir directory. |
|
337 |
+ * Filename format is "pdf<pdf->files-1>_<lvl>". |
|
338 |
+ * |
|
339 |
+ * @param pdf Pdf context structure. |
|
340 |
+ * @param obj The object we found the filter content in. |
|
341 |
+ * @param token The struct for the filter contents. |
|
342 |
+ * @param lvl A unique index to distinguish the files from each other. |
|
343 |
+ * @return cl_error_t |
|
344 |
+ */ |
|
345 |
+static cl_error_t pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token, int lvl) |
|
286 | 346 |
{ |
287 | 347 |
char fname[1024]; |
288 | 348 |
int ifd; |
... | ... |
@@ -313,7 +445,7 @@ static int pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct p |
313 | 313 |
* ascii85 inflation |
314 | 314 |
* See http://www.piclist.com/techref/method/encode.htm (look for base85) |
315 | 315 |
*/ |
316 |
-static int filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token) |
|
316 |
+static cl_error_t filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token) |
|
317 | 317 |
{ |
318 | 318 |
uint8_t *decoded, *dptr; |
319 | 319 |
uint32_t declen = 0; |
... | ... |
@@ -415,7 +547,7 @@ static int filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *obj, str |
415 | 415 |
} |
416 | 416 |
|
417 | 417 |
/* imported from razorback */ |
418 |
-static int filter_rldecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token) |
|
418 |
+static cl_error_t filter_rldecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token) |
|
419 | 419 |
{ |
420 | 420 |
uint8_t *decoded, *temp; |
421 | 421 |
uint32_t declen = 0, capacity = 0; |
... | ... |
@@ -523,7 +655,7 @@ static uint8_t *decode_nextlinestart(uint8_t *content, uint32_t length) |
523 | 523 |
return pt; |
524 | 524 |
} |
525 | 525 |
|
526 |
-static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token) |
|
526 |
+static cl_error_t filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token) |
|
527 | 527 |
{ |
528 | 528 |
uint8_t *decoded, *temp; |
529 | 529 |
uint32_t declen = 0, capacity = 0; |
... | ... |
@@ -671,14 +803,14 @@ static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struc |
671 | 671 |
return rc; |
672 | 672 |
} |
673 | 673 |
|
674 |
-static int filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token) |
|
674 |
+static cl_error_t filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token) |
|
675 | 675 |
{ |
676 | 676 |
uint8_t *decoded; |
677 | 677 |
|
678 | 678 |
const uint8_t *content = (uint8_t *)token->content; |
679 | 679 |
uint32_t length = token->length; |
680 | 680 |
uint32_t i, j; |
681 |
- int rc = CL_SUCCESS; |
|
681 |
+ cl_error_t rc = CL_SUCCESS; |
|
682 | 682 |
|
683 | 683 |
if (!(decoded = (uint8_t *)cli_calloc(length/2 + 1, sizeof(uint8_t)))) { |
684 | 684 |
cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n"); |
... | ... |
@@ -724,7 +856,7 @@ static int filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *obj, st |
724 | 724 |
} |
725 | 725 |
|
726 | 726 |
/* modes: 0 = use default/DecodeParms, 1 = use document setting */ |
727 |
-static int filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int mode) |
|
727 |
+static cl_error_t filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int mode) |
|
728 | 728 |
{ |
729 | 729 |
char *decrypted; |
730 | 730 |
size_t length = (size_t)token->length; |
... | ... |
@@ -768,7 +900,7 @@ static int filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, struct pd |
768 | 768 |
return CL_SUCCESS; |
769 | 769 |
} |
770 | 770 |
|
771 |
-static int filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token) |
|
771 |
+static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token) |
|
772 | 772 |
{ |
773 | 773 |
uint8_t *decoded, *temp; |
774 | 774 |
uint32_t declen = 0, capacity = 0; |
... | ... |
@@ -36,6 +36,26 @@ |
36 | 36 |
|
37 | 37 |
#include "pdf.h" |
38 | 38 |
|
39 |
-ptrdiff_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, const char *stream, uint32_t streamlen, int xref, int fout, int *rc); |
|
39 |
+/** |
|
40 |
+ * @brief Wrapper function for pdf_decodestream_internal. |
|
41 |
+ * |
|
42 |
+ * Allocate a token object to store decoded filter data. |
|
43 |
+ * Parse/decode the filter data and scan it. |
|
44 |
+ * |
|
45 |
+ * @param pdf Pdf context structure. |
|
46 |
+ * @param obj The object we found the filter content in. |
|
47 |
+ * @param params Dictionary parameters describing the filter data. |
|
48 |
+ * @param stream Filter stream buffer pointer. |
|
49 |
+ * @param streamlen Length of filter stream buffer. |
|
50 |
+ * @param xref Indicates if the stream is an /XRef stream. Do not apply forced decryption on /XRef streams. |
|
51 |
+ * @param fout File descriptor to write to a temp file. |
|
52 |
+ * @param[out] rc Return code () |
|
53 |
+ * @param objstm Object stream context structure. |
|
54 |
+ * @return ptrdiff_t |
|
55 |
+ */ |
|
56 |
+ptrdiff_t pdf_decodestream( |
|
57 |
+ struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, |
|
58 |
+ const char *stream, uint32_t streamlen, int xref, int fout, cl_error_t *status, |
|
59 |
+ struct objstm_struct *objstm); |
|
40 | 60 |
|
41 | 61 |
#endif /* __PDFDECODE_H__ */ |
... | ... |
@@ -377,17 +377,26 @@ char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const cha |
377 | 377 |
|
378 | 378 |
char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *meta) |
379 | 379 |
{ |
380 |
- const char *q = objstart, *oobj=obj->start+pdf->map; |
|
380 |
+ const char *q = objstart; |
|
381 | 381 |
char *p1, *p2; |
382 | 382 |
size_t len, checklen; |
383 | 383 |
char *res = NULL; |
384 | 384 |
uint32_t objid; |
385 | 385 |
size_t i; |
386 | 386 |
|
387 |
- if (objsize > (size_t)(pdf->size - (objstart - pdf->map))) { |
|
388 |
- /* Possible attempt to exploit bb11980 */ |
|
389 |
- cli_dbgmsg("Malformed PDF: Alleged size of obj in PDF would extend further than the PDF data.\n"); |
|
390 |
- return NULL; |
|
387 |
+ if (obj->objstm) { |
|
388 |
+ if (objsize > (size_t)(obj->objstm->streambuf_len - (objstart - obj->objstm->streambuf))) { |
|
389 |
+ /* Possible attempt to exploit bb11980 */ |
|
390 |
+ cli_dbgmsg("Malformed PDF: Alleged size of obj in object stream in PDF would extend further than the object stream data.\n"); |
|
391 |
+ return NULL; |
|
392 |
+ } |
|
393 |
+ } |
|
394 |
+ else { |
|
395 |
+ if (objsize > (size_t)(pdf->size - (objstart - pdf->map))) { |
|
396 |
+ /* Possible attempt to exploit bb11980 */ |
|
397 |
+ cli_dbgmsg("Malformed PDF: Alleged size of obj in PDF would extend further than the PDF data.\n"); |
|
398 |
+ return NULL; |
|
399 |
+ } |
|
391 | 400 |
} |
392 | 401 |
|
393 | 402 |
/* |
... | ... |
@@ -557,10 +566,10 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char * |
557 | 557 |
/* Hex string */ |
558 | 558 |
|
559 | 559 |
p2 = p1+1; |
560 |
- while ((size_t)(p2 - oobj) < objsize && *p2 != '>') |
|
560 |
+ while ((size_t)(p2 - objstart) < objsize && *p2 != '>') |
|
561 | 561 |
p2++; |
562 | 562 |
|
563 |
- if ((size_t)(p2 - oobj) == objsize) { |
|
563 |
+ if ((size_t)(p2 - objstart) == objsize) { |
|
564 | 564 |
return NULL; |
565 | 565 |
} |
566 | 566 |
|
... | ... |
@@ -647,7 +656,7 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char * |
647 | 647 |
return res; |
648 | 648 |
} |
649 | 649 |
|
650 |
-struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar) |
|
650 |
+struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar) |
|
651 | 651 |
{ |
652 | 652 |
struct pdf_dict *res=NULL; |
653 | 653 |
struct pdf_dict_node *node=NULL; |
... | ... |
@@ -659,9 +668,10 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz |
659 | 659 |
if (!(pdf) || !(obj) || !(begin)) |
660 | 660 |
return NULL; |
661 | 661 |
|
662 |
- objstart = (const char *)(obj->start + pdf->map); |
|
662 |
+ objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf) |
|
663 |
+ : (const char *)(obj->start + pdf->map); |
|
663 | 664 |
|
664 |
- if (begin < objstart || (size_t)(begin - objstart) >= objsz - 2) |
|
665 |
+ if (begin < objstart || (size_t)(begin - objstart) >= objsize - 2) |
|
665 | 666 |
return NULL; |
666 | 667 |
|
667 | 668 |
if (begin[0] != '<' || begin[1] != '<') |
... | ... |
@@ -669,7 +679,7 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz |
669 | 669 |
|
670 | 670 |
/* Find the end of the dictionary */ |
671 | 671 |
end = begin; |
672 |
- while ((size_t)(end - objstart) < objsz) { |
|
672 |
+ while ((size_t)(end - objstart) < objsize) { |
|
673 | 673 |
int increment=1; |
674 | 674 |
if (in_string) { |
675 | 675 |
if (*end == '\\') { |
... | ... |
@@ -689,18 +699,18 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz |
689 | 689 |
in_string=1; |
690 | 690 |
break; |
691 | 691 |
case '<': |
692 |
- if ((size_t)(end - objstart) <= objsz - 2 && end[1] == '<') |
|
692 |
+ if ((size_t)(end - objstart) <= objsize - 2 && end[1] == '<') |
|
693 | 693 |
ninner++; |
694 | 694 |
increment=2; |
695 | 695 |
break; |
696 | 696 |
case '>': |
697 |
- if ((size_t)(end - objstart) <= objsz - 2 && end[1] == '>') |
|
697 |
+ if ((size_t)(end - objstart) <= objsize - 2 && end[1] == '>') |
|
698 | 698 |
ninner--; |
699 | 699 |
increment=2; |
700 | 700 |
break; |
701 | 701 |
} |
702 | 702 |
|
703 |
- if ((size_t)(end - objstart) <= objsz - 2) |
|
703 |
+ if ((size_t)(end - objstart) <= objsize - 2) |
|
704 | 704 |
if (end[0] == '>' && end[1] == '>' && ninner == 0) |
705 | 705 |
break; |
706 | 706 |
|
... | ... |
@@ -708,7 +718,7 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz |
708 | 708 |
} |
709 | 709 |
|
710 | 710 |
/* More sanity checking */ |
711 |
- if ((size_t)(end - objstart) >= objsz - 2) |
|
711 |
+ if ((size_t)(end - objstart) >= objsize - 2) |
|
712 | 712 |
return NULL; |
713 | 713 |
|
714 | 714 |
if (end[0] != '>' || end[1] != '>') |
... | ... |
@@ -809,7 +819,7 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz |
809 | 809 |
begin = p1+1; |
810 | 810 |
break; |
811 | 811 |
case '<': |
812 |
- if ((size_t)(begin - objstart) < objsz - 2) { |
|
812 |
+ if ((size_t)(begin - objstart) < objsize - 2) { |
|
813 | 813 |
if (begin[1] == '<') { |
814 | 814 |
dict = pdf_parse_dict(pdf, obj, end - objstart, begin, &p1); |
815 | 815 |
begin = p1+2; |
... | ... |
@@ -912,7 +922,7 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz |
912 | 912 |
return res; |
913 | 913 |
} |
914 | 914 |
|
915 |
-struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar) |
|
915 |
+struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar) |
|
916 | 916 |
{ |
917 | 917 |
struct pdf_array *res=NULL; |
918 | 918 |
struct pdf_array_node *node=NULL; |
... | ... |
@@ -924,9 +934,10 @@ struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, s |
924 | 924 |
if (!(pdf) || !(obj) || !(begin)) |
925 | 925 |
return NULL; |
926 | 926 |
|
927 |
- objstart = obj->start + pdf->map; |
|
927 |
+ objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf) |
|
928 |
+ : (const char *)(obj->start + pdf->map); |
|
928 | 929 |
|
929 |
- if (begin < objstart || (size_t)(begin - objstart) >= objsz) |
|
930 |
+ if (begin < objstart || (size_t)(begin - objstart) >= objsize) |
|
930 | 931 |
return NULL; |
931 | 932 |
|
932 | 933 |
if (begin[0] != '[') |
... | ... |
@@ -934,7 +945,7 @@ struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, s |
934 | 934 |
|
935 | 935 |
/* Find the end of the array */ |
936 | 936 |
end = begin; |
937 |
- while ((size_t)(end - objstart) < objsz) { |
|
937 |
+ while ((size_t)(end - objstart) < objsize) { |
|
938 | 938 |
if (in_string) { |
939 | 939 |
if (*end == '\\') { |
940 | 940 |
end += 2; |
... | ... |
@@ -967,7 +978,7 @@ struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, s |
967 | 967 |
} |
968 | 968 |
|
969 | 969 |
/* More sanity checking */ |
970 |
- if ((size_t)(end - objstart) >= objsz) |
|
970 |
+ if ((size_t)(end - objstart) >= objsize) |
|
971 | 971 |
return NULL; |
972 | 972 |
|
973 | 973 |
if (*end != ']') |
... | ... |
@@ -991,7 +1002,7 @@ struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, s |
991 | 991 |
|
992 | 992 |
switch (begin[0]) { |
993 | 993 |
case '<': |
994 |
- if ((size_t)(begin - objstart) < objsz - 2 && begin[1] == '<') { |
|
994 |
+ if ((size_t)(begin - objstart) < objsize - 2 && begin[1] == '<') { |
|
995 | 995 |
dict = pdf_parse_dict(pdf, obj, end - objstart, begin, &begin); |
996 | 996 |
begin+=2; |
997 | 997 |
break; |
... | ... |
@@ -755,7 +755,7 @@ done: |
755 | 755 |
* @return CL_SUCCESS Success |
756 | 756 |
* @return CL_EPARSE Failure |
757 | 757 |
*/ |
758 |
-int cli_strntol_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, long *result) |
|
758 |
+cl_error_t cli_strntol_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, long *result) |
|
759 | 759 |
{ |
760 | 760 |
char *endptr = NULL; |
761 | 761 |
long num; |
... | ... |
@@ -798,7 +798,7 @@ int cli_strntol_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int |
798 | 798 |
* @return CL_SUCCESS Success |
799 | 799 |
* @return CL_EPARSE Failure |
800 | 800 |
*/ |
801 |
-int cli_strntoul_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, unsigned long *result) |
|
801 |
+cl_error_t cli_strntoul_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, unsigned long *result) |
|
802 | 802 |
{ |
803 | 803 |
char *endptr = NULL; |
804 | 804 |
long num; |
... | ... |
@@ -28,6 +28,7 @@ |
28 | 28 |
#include <ctype.h> |
29 | 29 |
#include <sys/types.h> |
30 | 30 |
|
31 |
+#include "clamav.h" |
|
31 | 32 |
#include "cltypes.h" |
32 | 33 |
|
33 | 34 |
#ifdef HAVE_STRCASESTR |
... | ... |
@@ -68,8 +69,8 @@ const char *cli_memstr(const char *haystack, unsigned int hs, const char *needle |
68 | 68 |
char *cli_strrcpy(char *dest, const char *source); |
69 | 69 |
size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens); |
70 | 70 |
size_t cli_ldbtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens, int token_skip); |
71 |
-int cli_strntol_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, long *result); |
|
72 |
-int cli_strntoul_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, unsigned long *result); |
|
71 |
+cl_error_t cli_strntol_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, long *result); |
|
72 |
+cl_error_t cli_strntoul_wrap(const char *buf, size_t buf_size, int fail_at_nondigit, int base, unsigned long *result); |
|
73 | 73 |
int cli_isnumber(const char *str); |
74 | 74 |
char *cli_unescape(const char *str); |
75 | 75 |
struct text_buffer; |