git-svn: trunk@2882
Nigel Horne authored on 2007/03/01 20:06:37... | ... |
@@ -1,3 +1,10 @@ |
1 |
+Thu Mar 1 09:10:04 GMT 2007 (njh) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav/pdf.c: Try with both real and calculated Length fields, since |
|
4 |
+ the Length object can't always be trusted |
|
5 |
+ Improved backing out of unhandled formats (e.g. |
|
6 |
+ Predictor for images and embedded fonts) |
|
7 |
+ |
|
1 | 8 |
Thu Mar 1 02:36:40 CET 2007 (tk) |
2 | 9 |
--------------------------------- |
3 | 10 |
* libclamav/unrar/unrar.c: improve handling of multi-volume archives: do not |
... | ... |
@@ -50,7 +57,7 @@ Sun Feb 25 20:50:54 CET 2007 (tk) |
50 | 50 |
Sun Feb 25 17:00:31 CET 2007 (acab) |
51 | 51 |
----------------------------------- |
52 | 52 |
* libclamav/pe.c: fix leaks on upack return (bb#351) |
53 |
- |
|
53 |
+ |
|
54 | 54 |
Sun Feb 25 14:40:10 CET 2007 (tk) |
55 | 55 |
--------------------------------- |
56 | 56 |
* libclamav/unzip.c: fix memory leak when extracting stored files |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
/* |
2 |
- * Copyright (C) 2005 Nigel Horne <njh@bandsman.co.uk> |
|
2 |
+ * Copyright (C) 2005-2007 Nigel Horne <njh@bandsman.co.uk> |
|
3 | 3 |
* |
4 | 4 |
* This program is free software; you can redistribute it and/or modify |
5 | 5 |
* it under the terms of the GNU General Public License as published by |
... | ... |
@@ -14,6 +14,9 @@ |
14 | 14 |
* You should have received a copy of the GNU General Public License |
15 | 15 |
* along with this program; if not, write to the Free Software |
16 | 16 |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
17 |
+ * |
|
18 |
+ * TODO: Embedded fonts |
|
19 |
+ * TODO: Predictor image handling |
|
17 | 20 |
*/ |
18 | 21 |
static char const rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $"; |
19 | 22 |
|
... | ... |
@@ -51,6 +54,11 @@ static char const rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $"; |
51 | 51 |
#include "mbox.h" |
52 | 52 |
#include "pdf.h" |
53 | 53 |
|
54 |
+#ifdef CL_DEBUG |
|
55 |
+/*#define SAVE_TMP /* Save the file being worked on in tmp */ |
|
56 |
+#endif |
|
57 |
+ |
|
58 |
+static int try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fout, const cli_ctx *ctx); |
|
54 | 59 |
static int flatedecode(unsigned char *buf, off_t len, int fout, const cli_ctx *ctx); |
55 | 60 |
static int ascii85decode(const char *buf, off_t len, unsigned char *output); |
56 | 61 |
static const char *pdf_nextlinestart(const char *ptr, size_t len); |
... | ... |
@@ -72,6 +80,8 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
72 | 72 |
/*size_t xreflength;*/ |
73 | 73 |
int rc = CL_CLEAN; |
74 | 74 |
struct table *md5table; |
75 |
+ int printed_predictor_message; |
|
76 |
+ int printed_embedded_font_message; |
|
75 | 77 |
|
76 | 78 |
cli_dbgmsg("in cli_pdf(%s)\n", dir); |
77 | 79 |
|
... | ... |
@@ -162,6 +172,8 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
162 | 162 |
return CL_EFORMAT; |
163 | 163 |
} |
164 | 164 |
|
165 |
+ printed_predictor_message = printed_embedded_font_message = 0; |
|
166 |
+ |
|
165 | 167 |
md5table = tableCreate(); |
166 | 168 |
/* |
167 | 169 |
* not true, since edits may put data after the trailer |
... | ... |
@@ -179,7 +191,8 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
179 | 179 |
/*int object_number, generation_number;*/ |
180 | 180 |
const char *objstart, *objend, *streamstart, *streamend; |
181 | 181 |
char *md5digest; |
182 |
- size_t length, objlen, streamlen; |
|
182 |
+ size_t length, objlen, real_streamlen, calculated_streamlen; |
|
183 |
+ int is_embedded_font, predictor; |
|
183 | 184 |
char fullname[NAME_MAX + 1]; |
184 | 185 |
|
185 | 186 |
if(q == xrefstart) |
... | ... |
@@ -231,7 +244,10 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
231 | 231 |
if(streamstart == NULL) |
232 | 232 |
continue; |
233 | 233 |
|
234 |
- length = is_ascii85decode = is_flatedecode = 0; |
|
234 |
+ is_embedded_font = length = is_ascii85decode = |
|
235 |
+ is_flatedecode = 0; |
|
236 |
+ predictor = 1; |
|
237 |
+ |
|
235 | 238 |
/* |
236 | 239 |
* TODO: handle F and FFilter? |
237 | 240 |
*/ |
... | ... |
@@ -245,6 +261,14 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
245 | 245 |
while(isdigit(*q)) |
246 | 246 |
q++; |
247 | 247 |
q--; |
248 |
+ } else if(strncmp(q, "Length2 ", 8) == 0) |
|
249 |
+ is_embedded_font = 1; |
|
250 |
+ else if(strncmp(q, "Predictor ", 10) == 0) { |
|
251 |
+ q += 10; |
|
252 |
+ predictor = atoi(q); |
|
253 |
+ while(isdigit(*q)) |
|
254 |
+ q++; |
|
255 |
+ q--; |
|
248 | 256 |
} else if(strncmp(q, "FlateDecode", 11) == 0) { |
249 | 257 |
is_flatedecode = 1; |
250 | 258 |
q += 11; |
... | ... |
@@ -258,6 +282,31 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
258 | 258 |
break; |
259 | 259 |
} |
260 | 260 |
|
261 |
+ if(is_embedded_font) { |
|
262 |
+ /* |
|
263 |
+ * Need some documentation, the only I can find a |
|
264 |
+ * reference to is not free, if some kind sole wishes |
|
265 |
+ * to donate a copy, please contact me! |
|
266 |
+ * (http://safari.adobepress.com/0321304748) |
|
267 |
+ */ |
|
268 |
+ if(!printed_embedded_font_message) { |
|
269 |
+ cli_dbgmsg("Embedded fonts not yet supported\n"); |
|
270 |
+ printed_embedded_font_message = 1; |
|
271 |
+ } |
|
272 |
+ continue; |
|
273 |
+ } |
|
274 |
+ if(predictor > 1) { |
|
275 |
+ /* |
|
276 |
+ * Needs some thought |
|
277 |
+ */ |
|
278 |
+ if(!printed_predictor_message) { |
|
279 |
+ cli_dbgmsg("Predictor %d not honoured for embedded image\n", |
|
280 |
+ predictor); |
|
281 |
+ printed_predictor_message = 1; |
|
282 |
+ } |
|
283 |
+ continue; |
|
284 |
+ } |
|
285 |
+ |
|
261 | 286 |
/* objend points to the end of the object (start of "endobj") */ |
262 | 287 |
streamstart += 6; /* go past the word "stream" */ |
263 | 288 |
len = (int)(objend - streamstart); |
... | ... |
@@ -274,16 +323,6 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
274 | 274 |
break; |
275 | 275 |
} |
276 | 276 |
} |
277 |
- /*while(strchr("\r\n", *--streamend)) |
|
278 |
- ;*/ |
|
279 |
- |
|
280 |
- streamlen = (int)(streamend - streamstart) + 1; |
|
281 |
- |
|
282 |
- if(streamlen == 0) { |
|
283 |
- cli_dbgmsg("Empty stream\n"); |
|
284 |
- continue; |
|
285 |
- } |
|
286 |
- |
|
287 | 277 |
snprintf(fullname, sizeof(fullname), "%s/pdfXXXXXX", dir); |
288 | 278 |
#if defined(C_LINUX) || defined(C_BSD) || defined(HAVE_MKSTEMP) || defined(C_SOLARIS) || defined(C_CYGWIN) |
289 | 279 |
fout = mkstemp(fullname); |
... | ... |
@@ -312,8 +351,26 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
312 | 312 |
break; |
313 | 313 |
} |
314 | 314 |
|
315 |
- cli_dbgmsg("length %d, streamlen %d isFlate %d isASCII85 %d\n", |
|
316 |
- length, streamlen, is_flatedecode, is_ascii85decode); |
|
315 |
+ /* |
|
316 |
+ * Calculate the length ourself, the Length parameter is often |
|
317 |
+ * wrong |
|
318 |
+ */ |
|
319 |
+ while(strchr("\r\n", *--streamend)) |
|
320 |
+ ; |
|
321 |
+ |
|
322 |
+ if(streamend <= streamstart) { |
|
323 |
+ cli_dbgmsg("Empty stream\n"); |
|
324 |
+ continue; |
|
325 |
+ } |
|
326 |
+ calculated_streamlen = (int)(streamend - streamstart) + 1; |
|
327 |
+ real_streamlen = length; |
|
328 |
+ |
|
329 |
+ if(calculated_streamlen != real_streamlen) |
|
330 |
+ cli_dbgmsg("cli_pdf: Incorrect Length field in file attempting to recover\n"); |
|
331 |
+ |
|
332 |
+ cli_dbgmsg("length %d, calculated_streamlen %d isFlate %d isASCII85 %d\n", |
|
333 |
+ length, calculated_streamlen, |
|
334 |
+ is_flatedecode, is_ascii85decode); |
|
317 | 335 |
|
318 | 336 |
#if 0 |
319 | 337 |
/* FIXME: this isn't right... */ |
... | ... |
@@ -323,7 +380,7 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
323 | 323 |
#endif |
324 | 324 |
|
325 | 325 |
if(is_ascii85decode) { |
326 |
- unsigned char *tmpbuf = cli_malloc(streamlen * 5); |
|
326 |
+ unsigned char *tmpbuf = cli_malloc(calculated_streamlen * 5); |
|
327 | 327 |
int ret; |
328 | 328 |
|
329 | 329 |
if(tmpbuf == NULL) { |
... | ... |
@@ -333,7 +390,7 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
333 | 333 |
continue; |
334 | 334 |
} |
335 | 335 |
|
336 |
- ret = ascii85decode(streamstart, streamlen, tmpbuf); |
|
336 |
+ ret = ascii85decode(streamstart, calculated_streamlen, tmpbuf); |
|
337 | 337 |
|
338 | 338 |
if(ret == -1) { |
339 | 339 |
free(tmpbuf); |
... | ... |
@@ -343,31 +400,32 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
343 | 343 |
continue; |
344 | 344 |
} |
345 | 345 |
if(ret) { |
346 |
- streamlen = (size_t)ret; |
|
346 |
+ real_streamlen = (size_t)ret; |
|
347 | 347 |
/* free unused trailing bytes */ |
348 |
- tmpbuf = cli_realloc(tmpbuf, streamlen); |
|
348 |
+ tmpbuf = cli_realloc(tmpbuf, |
|
349 |
+ calculated_streamlen); |
|
349 | 350 |
/* |
350 | 351 |
* Note that it will probably be both |
351 | 352 |
* ascii85encoded and flateencoded |
352 | 353 |
*/ |
353 | 354 |
if(is_flatedecode) { |
354 |
- const int zstat = flatedecode((unsigned char *)tmpbuf, streamlen, fout, ctx); |
|
355 |
+ const int zstat = try_flatedecode((unsigned char *)tmpbuf, real_streamlen, real_streamlen, fout, ctx); |
|
355 | 356 |
|
356 | 357 |
if(zstat != Z_OK) |
357 | 358 |
rc = CL_EZIP; |
358 | 359 |
} else |
359 |
- cli_writen(fout, (const char *)streamstart, streamlen); |
|
360 |
+ cli_writen(fout, (const char *)streamstart, real_streamlen); |
|
360 | 361 |
} |
361 | 362 |
free(tmpbuf); |
362 | 363 |
} else if(is_flatedecode) { |
363 |
- const int zstat = flatedecode((unsigned char *)streamstart, streamlen, fout, ctx); |
|
364 |
+ const int zstat = try_flatedecode((unsigned char *)streamstart, real_streamlen, calculated_streamlen, fout, ctx); |
|
364 | 365 |
|
365 | 366 |
if(zstat != Z_OK) |
366 | 367 |
rc = CL_EZIP; |
367 | 368 |
} else { |
368 | 369 |
cli_dbgmsg("cli_pdf: writing %lu bytes from the stream\n", |
369 |
- (unsigned long)streamlen); |
|
370 |
- cli_writen(fout, (const char *)streamstart, streamlen); |
|
370 |
+ (unsigned long)real_streamlen); |
|
371 |
+ cli_writen(fout, (const char *)streamstart, real_streamlen); |
|
371 | 372 |
} |
372 | 373 |
|
373 | 374 |
close(fout); |
... | ... |
@@ -391,15 +449,56 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
391 | 391 |
|
392 | 392 |
/* flate inflation - returns zlib status, e.g. Z_OK */ |
393 | 393 |
static int |
394 |
+try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fout, const cli_ctx *ctx) |
|
395 |
+{ |
|
396 |
+ int ret = flatedecode(buf, real_len, fout, ctx); |
|
397 |
+ |
|
398 |
+ if(ret == Z_OK) |
|
399 |
+ return Z_OK; |
|
400 |
+ |
|
401 |
+ if(real_len == calculated_len) |
|
402 |
+ return ret; |
|
403 |
+ |
|
404 |
+ return flatedecode(buf, calculated_len, fout, ctx); |
|
405 |
+} |
|
406 |
+ |
|
407 |
+static int |
|
394 | 408 |
flatedecode(unsigned char *buf, off_t len, int fout, const cli_ctx *ctx) |
395 | 409 |
{ |
396 | 410 |
int zstat; |
397 | 411 |
off_t nbytes; |
398 | 412 |
z_stream stream; |
399 | 413 |
unsigned char output[BUFSIZ]; |
414 |
+#ifdef SAVE_TMP |
|
415 |
+ char tmpfilename[16]; |
|
416 |
+ int tmpfd; |
|
417 |
+#endif |
|
400 | 418 |
|
401 | 419 |
cli_dbgmsg("cli_pdf: flatedecode %lu bytes\n", (unsigned long)len); |
402 | 420 |
|
421 |
+#ifdef SAVE_TMP |
|
422 |
+ /* |
|
423 |
+ * Copy the embedded area for debugging, so that if it falls over |
|
424 |
+ * we have a copy of the offending data. This is debugging code |
|
425 |
+ * that you shouldn't of course install in a live environment. I am |
|
426 |
+ * not interested in hearing about security issues with this section |
|
427 |
+ * of the parser. |
|
428 |
+ */ |
|
429 |
+ strcpy(tmpfilename, "/tmp/pdfXXXXXX"); |
|
430 |
+ tmpfd = mkstemp(tmpfilename); |
|
431 |
+ if(tmpfd < 0) { |
|
432 |
+ perror(tmpfilename); |
|
433 |
+ cli_errmsg("Can't make debugging file\n"); |
|
434 |
+ } else { |
|
435 |
+ FILE *tmpfp = fdopen(tmpfd, "w"); |
|
436 |
+ |
|
437 |
+ if(tmpfp) { |
|
438 |
+ fwrite(buf, sizeof(char), len, tmpfp); |
|
439 |
+ fclose(tmpfp); |
|
440 |
+ } else |
|
441 |
+ cli_errmsg("cli_pdf: can't fdopen debugging file\n"); |
|
442 |
+ } |
|
443 |
+#endif |
|
403 | 444 |
stream.zalloc = (alloc_func)Z_NULL; |
404 | 445 |
stream.zfree = (free_func)Z_NULL; |
405 | 446 |
stream.opaque = (void *)NULL; |
... | ... |
@@ -441,11 +540,11 @@ flatedecode(unsigned char *buf, off_t len, int fout, const cli_ctx *ctx) |
441 | 441 |
break; |
442 | 442 |
default: |
443 | 443 |
if(stream.msg) |
444 |
- cli_warnmsg("pdf: after writing %lu bytes, got error \"%s\" inflating PDF attachment\n", |
|
444 |
+ cli_dbgmsg("pdf: after writing %lu bytes, got error \"%s\" inflating PDF attachment\n", |
|
445 | 445 |
(unsigned long)nbytes, |
446 | 446 |
stream.msg); |
447 | 447 |
else |
448 |
- cli_warnmsg("pdf: after writing %lu bytes, got error %d inflating PDF attachment\n", |
|
448 |
+ cli_dbgmsg("pdf: after writing %lu bytes, got error %d inflating PDF attachment\n", |
|
449 | 449 |
(unsigned long)nbytes, zstat); |
450 | 450 |
inflateEnd(&stream); |
451 | 451 |
return zstat; |
... | ... |
@@ -454,7 +553,8 @@ flatedecode(unsigned char *buf, off_t len, int fout, const cli_ctx *ctx) |
454 | 454 |
} |
455 | 455 |
|
456 | 456 |
if(stream.avail_out != sizeof(output)) |
457 |
- (void)cli_writen(fout, output, sizeof(output) - stream.avail_out); |
|
457 |
+ if(cli_writen(fout, output, sizeof(output) - stream.avail_out) < 0) |
|
458 |
+ return Z_STREAM_ERROR; |
|
458 | 459 |
|
459 | 460 |
cli_dbgmsg("cli_pdf: flatedecode in=%lu out=%lu ratio %ld (max %d)\n", |
460 | 461 |
stream.total_in, stream.total_out, |
... | ... |
@@ -471,6 +571,9 @@ flatedecode(unsigned char *buf, off_t len, int fout, const cli_ctx *ctx) |
471 | 471 |
return Z_DATA_ERROR; |
472 | 472 |
} |
473 | 473 |
|
474 |
+#ifdef SAVE_TMP |
|
475 |
+ unlink(tmpfilename); |
|
476 |
+#endif |
|
474 | 477 |
return inflateEnd(&stream); |
475 | 478 |
} |
476 | 479 |
|
... | ... |
@@ -598,10 +701,13 @@ pdf_nextobject(const char *ptr, size_t len) |
598 | 598 |
case '[': /* Start of an array object */ |
599 | 599 |
case '\v': |
600 | 600 |
case '\f': |
601 |
+ case '<': /* Start of a dictionary object */ |
|
601 | 602 |
inobject = 0; |
602 | 603 |
ptr++; |
603 | 604 |
len--; |
604 | 605 |
break; |
606 |
+ case '/': /* Start of a name object */ |
|
607 |
+ return ptr; |
|
605 | 608 |
default: |
606 | 609 |
if(!inobject) |
607 | 610 |
/* TODO: parse and return object type */ |