Browse code

Don't scan files embedded more than once

git-svn: trunk@1859

Nigel Horne authored on 2006/03/12 00:56:57
Showing 2 changed files
... ...
@@ -1,3 +1,9 @@
1
+Sat Mar 11 15:54:42 GMT 2006 (njh)
2
+----------------------------------
3
+  * libclamav/pdf.c:	Handle files with consecutive 'endobj' statements,
4
+				reported by by Andy Fiddaman <clam*fiddaman.net>
5
+			Only scan files embedded more than once one time.
6
+
1 7
 Sat Mar 11 15:23:20 CET 2006 (tk)
2 8
 ---------------------------------
3 9
   * libclamav/sis.c: improve support for multi-language files
... ...
@@ -15,7 +15,7 @@
15 15
  *  along with this program; if not, write to the Free Software
16 16
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 17
  */
18
-static	char	const	rcsid[] = "$Id: pdf.c,v 1.44 2006/03/11 15:24:57 nigelhorne Exp $";
18
+static	char	const	rcsid[] = "$Id: pdf.c,v 1.45 2006/03/11 15:54:09 nigelhorne Exp $";
19 19
 
20 20
 #if HAVE_CONFIG_H
21 21
 #include "clamav-config.h"
... ...
@@ -51,6 +51,7 @@ static	char	const	rcsid[] = "$Id: pdf.c,v 1.44 2006/03/11 15:24:57 nigelhorne Ex
51 51
 #include "mbox.h"
52 52
 #include "blob.h"
53 53
 #include "pdf.h"
54
+#include "md5.h"
54 55
 
55 56
 #ifndef	MIN
56 57
 #define	MIN(a, b)	(((a) < (b)) ? (a) : (b))
... ...
@@ -76,6 +77,7 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx)
76 76
 	const char *xrefstart;	/* cross reference table */
77 77
 	/*size_t xreflength;*/
78 78
 	int rc = CL_CLEAN;
79
+	struct table *md5table;
79 80
 
80 81
 	cli_dbgmsg("in cli_pdf()\n");
81 82
 
... ...
@@ -166,6 +168,7 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx)
166 166
 		return CL_EFORMAT;
167 167
 	}
168 168
 
169
+	md5table = tableCreate();
169 170
 	/*
170 171
 	 * not true, since edits may put data after the trailer
171 172
 	xreflength = (size_t)(trailerstart - xrefstart);
... ...
@@ -179,6 +182,7 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx)
179 179
 		int is_ascii85decode, is_flatedecode, fout, len;
180 180
 		/*int object_number, generation_number;*/
181 181
 		const char *objstart, *objend, *streamstart, *streamend;
182
+		char *md5digest;
182 183
 		size_t length, objlen, streamlen;
183 184
 		char fullname[NAME_MAX + 1];
184 185
   
... ...
@@ -351,11 +355,20 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx)
351 351
 			cli_writen(fout, (char *)streamstart, streamlen);
352 352
 
353 353
 		close(fout);
354
+		md5digest = cli_md5file(fullname);
355
+		if(tableFind(md5table, md5digest) >= 0) {
356
+			cli_dbgmsg("cli_pdf: not scanning duplicate embedded file '%s'\n", fullname);
357
+			unlink(fullname);
358
+		} else
359
+			tableInsert(md5table, md5digest, 1);
360
+		free(md5digest);
354 361
 		cli_dbgmsg("cli_pdf: extracted to %s\n", fullname);
355 362
 	}
356 363
 
357 364
 	munmap(buf, size);
358 365
 
366
+	tableDestroy(md5table);
367
+
359 368
 	cli_dbgmsg("cli_pdf: returning %d\n", rc);
360 369
 	return rc;
361 370
 }