... | ... |
@@ -1,3 +1,7 @@ |
1 |
+Mon Aug 24 22:09:12 CEST 2009 (tk) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav: improve handling of PDF files (bb#1682) |
|
4 |
+ |
|
1 | 5 |
Fri Aug 21 15:53:35 CEST 2009 (tk) |
2 | 6 |
---------------------------------- |
3 | 7 |
* libclamav: handle relative offsets with cli_ac_data; fix offset logic |
... | ... |
@@ -57,6 +57,7 @@ static char const rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $"; |
57 | 57 |
#include "mbox.h" |
58 | 58 |
#include "pdf.h" |
59 | 59 |
#include "scanners.h" |
60 |
+#include "str.h" |
|
60 | 61 |
|
61 | 62 |
#ifndef O_BINARY |
62 | 63 |
#define O_BINARY 0 |
... | ... |
@@ -72,7 +73,6 @@ static int flatedecode(unsigned char *buf, off_t len, int fout, cli_ctx *ctx); |
72 | 72 |
static int ascii85decode(const char *buf, off_t len, unsigned char *output); |
73 | 73 |
static const char *pdf_nextlinestart(const char *ptr, size_t len); |
74 | 74 |
static const char *pdf_nextobject(const char *ptr, size_t len); |
75 |
-static const char *cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns); |
|
76 | 75 |
|
77 | 76 |
int |
78 | 77 |
cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
... | ... |
@@ -87,6 +87,7 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
87 | 87 |
int printed_predictor_message, printed_embedded_font_message, rc; |
88 | 88 |
unsigned int files; |
89 | 89 |
struct stat statb; |
90 |
+ int opt_failed = 0; |
|
90 | 91 |
|
91 | 92 |
cli_dbgmsg("in cli_pdf(%s)\n", dir); |
92 | 93 |
|
... | ... |
@@ -145,7 +146,7 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
145 | 145 |
* q points to the end of the trailer section |
146 | 146 |
*/ |
147 | 147 |
trailerlength = (long)(q - trailerstart); |
148 |
- if(cli_pmemstr(trailerstart, trailerlength, "Encrypt", 7)) { |
|
148 |
+ if(cli_memstr(trailerstart, trailerlength, "Encrypt", 7)) { |
|
149 | 149 |
/* |
150 | 150 |
* This tends to mean that the file is, in effect, read-only |
151 | 151 |
* http://www.cs.cmu.edu/~dst/Adobe/Gallery/anon21jul01-pdf-encryption.txt |
... | ... |
@@ -238,7 +239,7 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
238 | 238 |
|
239 | 239 |
bytesleft -= (off_t)((q - p) + 3); |
240 | 240 |
objstart = p = &q[3]; |
241 |
- objend = cli_pmemstr(p, bytesleft, "endobj", 6); |
|
241 |
+ objend = cli_memstr(p, bytesleft, "endobj", 6); |
|
242 | 242 |
if(objend == NULL) { |
243 | 243 |
cli_dbgmsg("cli_pdf: No matching endobj\n"); |
244 | 244 |
break; |
... | ... |
@@ -248,7 +249,7 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
248 | 248 |
objlen = (unsigned long)(objend - objstart); |
249 | 249 |
|
250 | 250 |
/* Is this object a stream? */ |
251 |
- streamstart = cli_pmemstr(objstart, objlen, "stream", 6); |
|
251 |
+ streamstart = cli_memstr(objstart, objlen, "stream", 6); |
|
252 | 252 |
if(streamstart == NULL) |
253 | 253 |
continue; |
254 | 254 |
|
... | ... |
@@ -274,7 +275,6 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
274 | 274 |
*/ |
275 | 275 |
if((bytesleft > 11) && strncmp(q, " 0 R", 4) == 0) { |
276 | 276 |
const char *r, *nq; |
277 |
- int opt_failed = 0; |
|
278 | 277 |
size_t len; |
279 | 278 |
char b[14]; |
280 | 279 |
|
... | ... |
@@ -286,10 +286,15 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
286 | 286 |
length = (unsigned long)strlen(b); |
287 | 287 |
/* optimization: assume objects |
288 | 288 |
* are sequential */ |
289 |
- nq = q; |
|
290 |
- len = buf + size - q; |
|
289 |
+ if(!opt_failed) { |
|
290 |
+ nq = q; |
|
291 |
+ len = buf + size - q; |
|
292 |
+ } else { |
|
293 |
+ nq = buf; |
|
294 |
+ len = q - buf; |
|
295 |
+ } |
|
291 | 296 |
do { |
292 |
- r = cli_pmemstr(nq, len, b, length); |
|
297 |
+ r = cli_memstr(nq, len, b, length); |
|
293 | 298 |
if (r > nq) { |
294 | 299 |
const char x = *(r-1); |
295 | 300 |
if (x == '\n' || x=='\r') { |
... | ... |
@@ -298,8 +303,8 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
298 | 298 |
} |
299 | 299 |
} |
300 | 300 |
if (r) { |
301 |
- len -= r+1-nq; |
|
302 |
- nq = r + 1; |
|
301 |
+ len -= r + length - nq; |
|
302 |
+ nq = r + length; |
|
303 | 303 |
} else if (!opt_failed) { |
304 | 304 |
/* we failed optimized match, |
305 | 305 |
* try matching from the beginning |
... | ... |
@@ -381,9 +386,9 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
381 | 381 |
break; |
382 | 382 |
len -= (int)(q - streamstart); |
383 | 383 |
streamstart = q; |
384 |
- streamend = cli_pmemstr(streamstart, len, "endstream\n", 10); |
|
384 |
+ streamend = cli_memstr(streamstart, len, "endstream\n", 10); |
|
385 | 385 |
if(streamend == NULL) { |
386 |
- streamend = cli_pmemstr(streamstart, len, "endstream\r", 10); |
|
386 |
+ streamend = cli_memstr(streamstart, len, "endstream\r", 10); |
|
387 | 387 |
if(streamend == NULL) { |
388 | 388 |
cli_dbgmsg("cli_pdf: No endstream\n"); |
389 | 389 |
break; |
... | ... |
@@ -531,6 +536,10 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
531 | 531 |
rc = CL_EUNLINK; |
532 | 532 |
break; |
533 | 533 |
} |
534 |
+ if(cli_updatelimits(ctx, real_streamlen) != CL_SUCCESS) { |
|
535 |
+ rc = CL_CLEAN; |
|
536 |
+ break; |
|
537 |
+ } |
|
534 | 538 |
continue; |
535 | 539 |
} else |
536 | 540 |
tableInsert(md5table, md5str, 1); |
... | ... |
@@ -713,7 +722,7 @@ ascii85decode(const char *buf, off_t len, unsigned char *output) |
713 | 713 |
int quintet = 0; |
714 | 714 |
int ret = 0; |
715 | 715 |
|
716 |
- if(cli_pmemstr(buf, len, "~>", 2) == NULL) |
|
716 |
+ if(cli_memstr(buf, len, "~>", 2) == NULL) |
|
717 | 717 |
cli_dbgmsg("cli_pdf: ascii85decode: no EOF marker found\n"); |
718 | 718 |
|
719 | 719 |
ptr = buf; |
... | ... |
@@ -842,45 +851,6 @@ pdf_nextobject(const char *ptr, size_t len) |
842 | 842 |
return NULL; |
843 | 843 |
} |
844 | 844 |
|
845 |
-/* |
|
846 |
- * like cli_memstr - but returns the location of the match |
|
847 |
- * FIXME: need a case insensitive version |
|
848 |
- */ |
|
849 |
-static const char * |
|
850 |
-cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns) |
|
851 |
-{ |
|
852 |
- const char *pt, *hay; |
|
853 |
- size_t n; |
|
854 |
- |
|
855 |
- if(haystack == needle) |
|
856 |
- return haystack; |
|
857 |
- |
|
858 |
- if(hs < ns) |
|
859 |
- return NULL; |
|
860 |
- |
|
861 |
- if(memcmp(haystack, needle, ns) == 0) |
|
862 |
- return haystack; |
|
863 |
- |
|
864 |
- pt = hay = haystack; |
|
865 |
- n = hs; |
|
866 |
- |
|
867 |
- while((pt = memchr(hay, needle[0], n)) != NULL) { |
|
868 |
- n -= (size_t)(pt - hay); |
|
869 |
- if(n < ns) |
|
870 |
- break; |
|
871 |
- |
|
872 |
- if(memcmp(pt, needle, ns) == 0) |
|
873 |
- return pt; |
|
874 |
- |
|
875 |
- if(hay == pt) { |
|
876 |
- n--; |
|
877 |
- hay++; |
|
878 |
- } else |
|
879 |
- hay = pt; |
|
880 |
- } |
|
881 |
- |
|
882 |
- return NULL; |
|
883 |
-} |
|
884 | 845 |
#else /*!HAVE_MMAP*/ |
885 | 846 |
|
886 | 847 |
#include "clamav.h" |
... | ... |
@@ -369,37 +369,33 @@ char *cli_strtokbuf(const char *input, int fieldno, const char *delim, char *out |
369 | 369 |
return output; |
370 | 370 |
} |
371 | 371 |
|
372 |
-const char *cli_memstr(const char *haystack, int hs, const char *needle, int ns) |
|
372 |
+const char *cli_memstr(const char *haystack, unsigned int hs, const char *needle, unsigned int ns) |
|
373 | 373 |
{ |
374 |
- const char *pt, *hay; |
|
375 |
- int n; |
|
374 |
+ unsigned int i, s1, s2; |
|
376 | 375 |
|
377 |
- |
|
378 |
- if(hs < ns) |
|
376 |
+ if(!hs || !ns || hs < ns) |
|
379 | 377 |
return NULL; |
380 | 378 |
|
381 |
- if(haystack == needle) |
|
382 |
- return haystack; |
|
383 |
- |
|
384 |
- if(!memcmp(haystack, needle, ns)) |
|
379 |
+ if(needle == haystack) |
|
385 | 380 |
return haystack; |
386 | 381 |
|
387 |
- pt = hay = haystack; |
|
388 |
- n = hs; |
|
382 |
+ if(ns == 1) |
|
383 |
+ return memchr(haystack, needle[0], hs); |
|
389 | 384 |
|
390 |
- while((pt = memchr(hay, needle[0], n)) != NULL) { |
|
391 |
- n -= (int) (pt - hay); |
|
392 |
- if(n < ns) |
|
393 |
- break; |
|
394 |
- |
|
395 |
- if(!memcmp(pt, needle, ns)) |
|
396 |
- return pt; |
|
397 |
- |
|
398 |
- if(hay == pt) { |
|
399 |
- n--; |
|
400 |
- hay++; |
|
385 |
+ if(needle[0] == needle[1]) { |
|
386 |
+ s1 = 2; |
|
387 |
+ s2 = 1; |
|
388 |
+ } else { |
|
389 |
+ s1 = 1; |
|
390 |
+ s2 = 2; |
|
391 |
+ } |
|
392 |
+ for(i = 0; i <= hs - ns; ) { |
|
393 |
+ if(needle[1] != haystack[i + 1]) { |
|
394 |
+ i += s1; |
|
401 | 395 |
} else { |
402 |
- hay = pt; |
|
396 |
+ if((needle[0] == haystack[i]) && !memcmp(needle + 2, haystack + i + 2, ns - 2)) |
|
397 |
+ return &haystack[i]; |
|
398 |
+ i += s2; |
|
403 | 399 |
} |
404 | 400 |
} |
405 | 401 |
|
... | ... |
@@ -42,7 +42,7 @@ int cli_hex2num(const char *hex); |
42 | 42 |
char *cli_str2hex(const char *string, unsigned int len); |
43 | 43 |
char *cli_utf16toascii(const char *str, unsigned int length); |
44 | 44 |
char *cli_strtokbuf(const char *input, int fieldno, const char *delim, char *output); |
45 |
-const char *cli_memstr(const char *haystack, int hs, const char *needle, int ns); |
|
45 |
+const char *cli_memstr(const char *haystack, unsigned int hs, const char *needle, unsigned int ns); |
|
46 | 46 |
char *cli_strrcpy(char *dest, const char *source); |
47 | 47 |
size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens); |
48 | 48 |
int cli_isnumber(const char *str); |