git-svn: trunk@3620
aCaB authored on 2008/02/12 09:58:49... | ... |
@@ -1,3 +1,7 @@ |
1 |
+Tue Feb 12 01:39:03 CET 2008 (acab) |
|
2 |
+----------------------------------- |
|
3 |
+ * libclamav/pdf: on the fly scanning of attachments |
|
4 |
+ |
|
1 | 5 |
Mon Feb 11 23:27:47 EET 2008 (edwin) |
2 | 6 |
------------------------------------ |
3 | 7 |
* libclamav/scanners.c, htmlnorm.c: tagless version of HTML file (bb #162) |
... | ... |
@@ -72,7 +72,7 @@ static const char *cli_pmemstr(const char *haystack, size_t hs, const char *need |
72 | 72 |
* TODO: handle embedded URLs if (options&CL_SCAN_MAILURL) |
73 | 73 |
*/ |
74 | 74 |
int |
75 |
-cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
|
75 |
+cli_pdf(const char *dir, int desc, cli_ctx *ctx) |
|
76 | 76 |
{ |
77 | 77 |
off_t size; /* total number of bytes in the file */ |
78 | 78 |
off_t bytesleft, trailerlength; |
... | ... |
@@ -81,7 +81,7 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
81 | 81 |
const char *xrefstart; /* cross reference table */ |
82 | 82 |
/*size_t xreflength;*/ |
83 | 83 |
table_t *md5table; |
84 |
- int printed_predictor_message, printed_embedded_font_message, rc, ret; |
|
84 |
+ int printed_predictor_message, printed_embedded_font_message, ret, rc; |
|
85 | 85 |
unsigned int files; |
86 | 86 |
struct stat statb; |
87 | 87 |
|
... | ... |
@@ -194,16 +194,17 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
194 | 194 |
/* |
195 | 195 |
* The body section consists of a sequence of indirect objects |
196 | 196 |
*/ |
197 |
- while((p < xrefstart) && ((rc=cli_checklimits("cli_pdf", ctx, 0, 0, 0))==CL_CLEAN) && |
|
197 |
+ while((p < xrefstart) && (cli_checklimits("cli_pdf", ctx, 0, 0, 0)==CL_CLEAN) && |
|
198 | 198 |
((q = pdf_nextobject(p, bytesleft)) != NULL)) { |
199 | 199 |
int is_ascii85decode, is_flatedecode, fout, len, has_cr; |
200 | 200 |
/*int object_number, generation_number;*/ |
201 | 201 |
const char *objstart, *objend, *streamstart, *streamend; |
202 |
- char *md5digest; |
|
202 |
+ unsigned char *md5digest; |
|
203 | 203 |
unsigned long length, objlen, real_streamlen, calculated_streamlen; |
204 | 204 |
int is_embedded_font, predictor; |
205 | 205 |
char fullname[NAME_MAX + 1]; |
206 | 206 |
|
207 |
+ rc=CL_CLEAN; |
|
207 | 208 |
if(q == xrefstart) |
208 | 209 |
break; |
209 | 210 |
if(memcmp(q, "xref", 4) == 0) |
... | ... |
@@ -217,13 +218,11 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
217 | 217 |
continue; |
218 | 218 |
if(!isdigit(*q)) { |
219 | 219 |
cli_dbgmsg("cli_pdf: Object number missing\n"); |
220 |
- rc = CL_CLEAN; |
|
221 | 220 |
break; |
222 | 221 |
} |
223 | 222 |
q = pdf_nextobject(p, bytesleft); |
224 | 223 |
if((q == NULL) || !isdigit(*q)) { |
225 | 224 |
cli_dbgmsg("cli_pdf: Generation number missing\n"); |
226 |
- rc = CL_CLEAN; |
|
227 | 225 |
break; |
228 | 226 |
} |
229 | 227 |
/*generation_number = atoi(q);*/ |
... | ... |
@@ -233,7 +232,6 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
233 | 233 |
q = pdf_nextobject(p, bytesleft); |
234 | 234 |
if((q == NULL) || (memcmp(q, "obj", 3) != 0)) { |
235 | 235 |
cli_dbgmsg("cli_pdf: Indirect object missing \"obj\"\n"); |
236 |
- rc = CL_CLEAN; |
|
237 | 236 |
break; |
238 | 237 |
} |
239 | 238 |
|
... | ... |
@@ -430,7 +428,7 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
430 | 430 |
|
431 | 431 |
if(is_ascii85decode) { |
432 | 432 |
unsigned char *tmpbuf; |
433 |
- int ret = cli_checklimits("cli_pdf", ctx, calculated_streamlen * 5, calculated_streamlen, 0); |
|
433 |
+ int ret = cli_checklimits("cli_pdf", ctx, calculated_streamlen * 5, calculated_streamlen, real_streamlen); |
|
434 | 434 |
|
435 | 435 |
if(ret != CL_CLEAN) { |
436 | 436 |
close(fout); |
... | ... |
@@ -475,7 +473,7 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
475 | 475 |
if(is_flatedecode) |
476 | 476 |
rc = try_flatedecode((unsigned char *)tmpbuf, real_streamlen, real_streamlen, fout, ctx); |
477 | 477 |
else |
478 |
- cli_writen(fout, (const char *)streamstart, real_streamlen); |
|
478 |
+ rc = cli_writen(fout, (const char *)streamstart, real_streamlen)==real_streamlen ? CL_CLEAN : CL_EIO; |
|
479 | 479 |
} |
480 | 480 |
free(tmpbuf); |
481 | 481 |
} else if(is_flatedecode) { |
... | ... |
@@ -484,19 +482,33 @@ cli_pdf(const char *dir, int desc, const cli_ctx *ctx) |
484 | 484 |
} else { |
485 | 485 |
cli_dbgmsg("cli_pdf: writing %lu bytes from the stream\n", |
486 | 486 |
(unsigned long)real_streamlen); |
487 |
- cli_writen(fout, (const char *)streamstart, real_streamlen); |
|
487 |
+ if((rc = cli_checklimits("cli_pdf", ctx, real_streamlen, 0, 0))==CL_CLEAN) |
|
488 |
+ rc = cli_writen(fout, (const char *)streamstart, real_streamlen) == real_streamlen ? CL_CLEAN : CL_EIO; |
|
488 | 489 |
} |
489 | 490 |
|
491 |
+ if (rc == CL_CLEAN) { |
|
492 |
+ cli_dbgmsg("cli_pdf: extracted file %u to %s\n", ++files, fullname); |
|
493 |
+ |
|
494 |
+ lseek(fout, 0, SEEK_SET); |
|
495 |
+ md5digest = cli_md5digest(fout); |
|
496 |
+ |
|
497 |
+ if(tableFind(md5table, md5digest) >= 0) { |
|
498 |
+ cli_dbgmsg("cli_pdf: not scanning duplicate embedded file '%s'\n", fullname); |
|
499 |
+ free(md5digest); |
|
500 |
+ close(fout); |
|
501 |
+ unlink(fullname); |
|
502 |
+ continue; |
|
503 |
+ } else |
|
504 |
+ tableInsert(md5table, md5digest, 1); |
|
505 |
+ |
|
506 |
+ free(md5digest); |
|
507 |
+ |
|
508 |
+ lseek(fout, 0, SEEK_SET); |
|
509 |
+ rc = cli_magic_scandesc(fout, ctx); |
|
510 |
+ } |
|
490 | 511 |
close(fout); |
491 |
- md5digest = cli_md5file(fullname); |
|
492 |
- if(tableFind(md5table, md5digest) >= 0) { |
|
493 |
- cli_dbgmsg("cli_pdf: not scanning duplicate embedded file '%s'\n", fullname); |
|
494 |
- unlink(fullname); |
|
495 |
- } else |
|
496 |
- tableInsert(md5table, md5digest, 1); |
|
497 |
- free(md5digest); |
|
498 |
- cli_dbgmsg("cli_pdf: extracted file %u to %s\n", ++files, |
|
499 |
- fullname); |
|
512 |
+ if(!cli_leavetemps_flag) unlink(fullname); |
|
513 |
+ if(rc != CL_CLEAN) break; |
|
500 | 514 |
} |
501 | 515 |
|
502 | 516 |
munmap(buf, size); |
... | ... |
@@ -516,7 +528,7 @@ try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fo |
516 | 516 |
int ret = cli_checklimits("cli_pdf", ctx, real_len, 0, 0); |
517 | 517 |
|
518 | 518 |
if (ret==CL_CLEAN && flatedecode(buf, real_len, fout, ctx) == CL_SUCCESS) |
519 |
- return CL_SUCCESS; |
|
519 |
+ return CL_CLEAN; |
|
520 | 520 |
|
521 | 521 |
if(real_len == calculated_len) { |
522 | 522 |
/* |
... | ... |
@@ -530,8 +542,8 @@ try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fo |
530 | 530 |
return CL_CLEAN; |
531 | 531 |
|
532 | 532 |
ret = flatedecode(buf, calculated_len, fout, ctx); |
533 |
- if(ret == CL_SUCCESS) |
|
534 |
- return CL_SUCCESS; |
|
533 |
+ if(ret == CL_CLEAN) |
|
534 |
+ return CL_CLEAN; |
|
535 | 535 |
|
536 | 536 |
/* i.e. the PDF file is broken :-( */ |
537 | 537 |
cli_dbgmsg("cli_pdf: Bad compressed block length in flate stream\n"); |
... | ... |
@@ -1401,9 +1401,6 @@ static int cli_scanpdf(int desc, cli_ctx *ctx) |
1401 | 1401 |
|
1402 | 1402 |
ret = cli_pdf(dir, desc, ctx); |
1403 | 1403 |
|
1404 |
- if(ret == CL_CLEAN) |
|
1405 |
- ret = cli_scandir(dir, ctx, 0); |
|
1406 |
- |
|
1407 | 1404 |
if(!cli_leavetemps_flag) |
1408 | 1405 |
cli_rmdirs(dir); |
1409 | 1406 |
|
... | ... |
@@ -1887,7 +1884,7 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx) |
1887 | 1887 |
ret = cli_scanjpeg(desc, ctx->virname); |
1888 | 1888 |
break; |
1889 | 1889 |
|
1890 |
- case CL_TYPE_PDF: |
|
1890 |
+ case CL_TYPE_PDF: /* FIXMELIMITS: pdf should be an archive! */ |
|
1891 | 1891 |
if(SCAN_PDF && (DCONF_DOC & DOC_CONF_PDF)) |
1892 | 1892 |
ret = cli_scanpdf(desc, ctx); |
1893 | 1893 |
break; |