Browse code

libclamav: improve handling of PDF files (bb#1682)

Tomasz Kojm authored on 2009/08/25 05:09:38
Showing 4 changed files
... ...
@@ -1,3 +1,7 @@
1
+Mon Aug 24 22:09:12 CEST 2009 (tk)
2
+----------------------------------
3
+ * libclamav: improve handling of PDF files (bb#1682)
4
+
1 5
 Fri Aug 21 15:53:35 CEST 2009 (tk)
2 6
 ----------------------------------
3 7
  * libclamav: handle relative offsets with cli_ac_data; fix offset logic
... ...
@@ -57,6 +57,7 @@ static	char	const	rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $";
57 57
 #include "mbox.h"
58 58
 #include "pdf.h"
59 59
 #include "scanners.h"
60
+#include "str.h"
60 61
 
61 62
 #ifndef	O_BINARY
62 63
 #define	O_BINARY	0
... ...
@@ -72,7 +73,6 @@ static	int	flatedecode(unsigned char *buf, off_t len, int fout, cli_ctx *ctx);
72 72
 static	int	ascii85decode(const char *buf, off_t len, unsigned char *output);
73 73
 static	const	char	*pdf_nextlinestart(const char *ptr, size_t len);
74 74
 static	const	char	*pdf_nextobject(const char *ptr, size_t len);
75
-static	const	char	*cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns);
76 75
 
77 76
 int
78 77
 cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset)
... ...
@@ -87,6 +87,7 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset)
87 87
 	int printed_predictor_message, printed_embedded_font_message, rc;
88 88
 	unsigned int files;
89 89
 	struct stat statb;
90
+	int opt_failed = 0;
90 91
 
91 92
 	cli_dbgmsg("in cli_pdf(%s)\n", dir);
92 93
 
... ...
@@ -145,7 +146,7 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset)
145 145
 	 * q points to the end of the trailer section
146 146
 	 */
147 147
 	trailerlength = (long)(q - trailerstart);
148
-	if(cli_pmemstr(trailerstart, trailerlength, "Encrypt", 7)) {
148
+	if(cli_memstr(trailerstart, trailerlength, "Encrypt", 7)) {
149 149
 		/*
150 150
 		 * This tends to mean that the file is, in effect, read-only
151 151
 		 * http://www.cs.cmu.edu/~dst/Adobe/Gallery/anon21jul01-pdf-encryption.txt
... ...
@@ -238,7 +239,7 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset)
238 238
 
239 239
 		bytesleft -= (off_t)((q - p) + 3);
240 240
 		objstart = p = &q[3];
241
-		objend = cli_pmemstr(p, bytesleft, "endobj", 6);
241
+		objend = cli_memstr(p, bytesleft, "endobj", 6);
242 242
 		if(objend == NULL) {
243 243
 			cli_dbgmsg("cli_pdf: No matching endobj\n");
244 244
 			break;
... ...
@@ -248,7 +249,7 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset)
248 248
 		objlen = (unsigned long)(objend - objstart);
249 249
 
250 250
 		/* Is this object a stream? */
251
-		streamstart = cli_pmemstr(objstart, objlen, "stream", 6);
251
+		streamstart = cli_memstr(objstart, objlen, "stream", 6);
252 252
 		if(streamstart == NULL)
253 253
 			continue;
254 254
 
... ...
@@ -274,7 +275,6 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset)
274 274
 					 */
275 275
 					if((bytesleft > 11) && strncmp(q, " 0 R", 4) == 0) {
276 276
 						const char *r, *nq;
277
-						int opt_failed = 0;
278 277
 						size_t len;
279 278
 						char b[14];
280 279
 
... ...
@@ -286,10 +286,15 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset)
286 286
 						length = (unsigned long)strlen(b);
287 287
 						/* optimization: assume objects
288 288
 						 * are sequential */
289
-						nq = q;
290
-						len = buf + size - q;
289
+						if(!opt_failed) {
290
+						    nq = q;
291
+						    len = buf + size - q;
292
+						} else {
293
+						    nq = buf;
294
+						    len = q - buf;
295
+						}
291 296
 						do {
292
-							r = cli_pmemstr(nq, len, b, length);
297
+							r = cli_memstr(nq, len, b, length);
293 298
 							if (r > nq) {
294 299
 								const char x = *(r-1);
295 300
 								if (x == '\n' || x=='\r') {
... ...
@@ -298,8 +303,8 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset)
298 298
 								}
299 299
 							}
300 300
 							if (r) {
301
-								len -= r+1-nq;
302
-								nq = r + 1;
301
+								len -= r + length - nq;
302
+								nq = r + length;
303 303
 							} else if (!opt_failed) {
304 304
 								/* we failed optimized match,
305 305
 								 * try matching from the beginning
... ...
@@ -381,9 +386,9 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset)
381 381
 			break;
382 382
 		len -= (int)(q - streamstart);
383 383
 		streamstart = q;
384
-		streamend = cli_pmemstr(streamstart, len, "endstream\n", 10);
384
+		streamend = cli_memstr(streamstart, len, "endstream\n", 10);
385 385
 		if(streamend == NULL) {
386
-			streamend = cli_pmemstr(streamstart, len, "endstream\r", 10);
386
+			streamend = cli_memstr(streamstart, len, "endstream\r", 10);
387 387
 			if(streamend == NULL) {
388 388
 				cli_dbgmsg("cli_pdf: No endstream\n");
389 389
 				break;
... ...
@@ -531,6 +536,10 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset)
531 531
 						rc = CL_EUNLINK;
532 532
 						break;
533 533
 					}
534
+					if(cli_updatelimits(ctx, real_streamlen) != CL_SUCCESS) {
535
+						rc = CL_CLEAN;
536
+						break;
537
+					}
534 538
 					continue;
535 539
 				} else
536 540
 					tableInsert(md5table, md5str, 1);
... ...
@@ -713,7 +722,7 @@ ascii85decode(const char *buf, off_t len, unsigned char *output)
713 713
 	int quintet = 0;
714 714
 	int ret = 0;
715 715
 
716
-	if(cli_pmemstr(buf, len, "~>", 2) == NULL)
716
+	if(cli_memstr(buf, len, "~>", 2) == NULL)
717 717
 		cli_dbgmsg("cli_pdf: ascii85decode: no EOF marker found\n");
718 718
 
719 719
 	ptr = buf;
... ...
@@ -842,45 +851,6 @@ pdf_nextobject(const char *ptr, size_t len)
842 842
 	return NULL;
843 843
 }
844 844
 
845
-/*
846
- * like cli_memstr - but returns the location of the match
847
- * FIXME: need a case insensitive version
848
- */
849
-static const char *
850
-cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns)
851
-{
852
-	const char *pt, *hay;
853
-	size_t n;
854
-
855
-	if(haystack == needle)
856
-		return haystack;
857
-
858
-	if(hs < ns)
859
-		return NULL;
860
-
861
-	if(memcmp(haystack, needle, ns) == 0)
862
-		return haystack;
863
-
864
-	pt = hay = haystack;
865
-	n = hs;
866
-
867
-	while((pt = memchr(hay, needle[0], n)) != NULL) {
868
-		n -= (size_t)(pt - hay);
869
-		if(n < ns)
870
-			break;
871
-
872
-		if(memcmp(pt, needle, ns) == 0)
873
-			return pt;
874
-
875
-		if(hay == pt) {
876
-			n--;
877
-			hay++;
878
-		} else
879
-			hay = pt;
880
-	}
881
-
882
-	return NULL;
883
-}
884 845
 #else	/*!HAVE_MMAP*/
885 846
 
886 847
 #include "clamav.h"
... ...
@@ -369,37 +369,33 @@ char *cli_strtokbuf(const char *input, int fieldno, const char *delim, char *out
369 369
     return output;
370 370
 }
371 371
 
372
-const char *cli_memstr(const char *haystack, int hs, const char *needle, int ns)
372
+const char *cli_memstr(const char *haystack, unsigned int hs, const char *needle, unsigned int ns)
373 373
 {
374
-	const char *pt, *hay;
375
-	int n;
374
+	unsigned int i, s1, s2;
376 375
 
377
-
378
-    if(hs < ns)
376
+    if(!hs || !ns || hs < ns)
379 377
 	return NULL;
380 378
 
381
-    if(haystack == needle)
382
-	return haystack;
383
-
384
-    if(!memcmp(haystack, needle, ns))
379
+    if(needle == haystack)
385 380
 	return haystack;
386 381
 
387
-    pt = hay = haystack;
388
-    n = hs;
382
+    if(ns == 1)
383
+	return memchr(haystack, needle[0], hs);
389 384
 
390
-    while((pt = memchr(hay, needle[0], n)) != NULL) {
391
-	n -= (int) (pt - hay);
392
-	if(n < ns)
393
-	    break;
394
-
395
-	if(!memcmp(pt, needle, ns))
396
-	    return pt;
397
-
398
-	if(hay == pt) {
399
-	    n--;
400
-	    hay++;
385
+    if(needle[0] == needle[1]) {
386
+	s1 = 2;
387
+	s2 = 1;
388
+    } else {
389
+	s1 = 1;
390
+	s2 = 2;
391
+    }
392
+    for(i = 0; i <= hs - ns; ) {
393
+	if(needle[1] != haystack[i + 1]) {
394
+	    i += s1;
401 395
 	} else {
402
-	    hay = pt;
396
+	    if((needle[0] == haystack[i]) && !memcmp(needle + 2, haystack + i + 2, ns - 2))
397
+		return &haystack[i];
398
+	    i += s2;
403 399
 	}
404 400
     }
405 401
 
... ...
@@ -42,7 +42,7 @@ int cli_hex2num(const char *hex);
42 42
 char *cli_str2hex(const char *string, unsigned int len);
43 43
 char *cli_utf16toascii(const char *str, unsigned int length);
44 44
 char *cli_strtokbuf(const char *input, int fieldno, const char *delim, char *output);
45
-const char *cli_memstr(const char *haystack, int hs, const char *needle, int ns);
45
+const char *cli_memstr(const char *haystack, unsigned int hs, const char *needle, unsigned int ns);
46 46
 char *cli_strrcpy(char *dest, const char *source);
47 47
 size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens);
48 48
 int cli_isnumber(const char *str);