... | ... |
@@ -71,6 +71,7 @@ enum pdf_flag { |
71 | 71 |
BAD_ASCIIDECODE, |
72 | 72 |
UNTERMINATED_OBJ_DICT, |
73 | 73 |
ESCAPED_COMMON_PDFNAME, |
74 |
+ HEX_JAVASCRIPT |
|
74 | 75 |
}; |
75 | 76 |
|
76 | 77 |
static int xrefCheck(const char *xref, const char *eof) |
... | ... |
@@ -138,7 +139,7 @@ static const char *findNextNonWSBack(const char *q, const char *start) |
138 | 138 |
return q; |
139 | 139 |
} |
140 | 140 |
|
141 |
-static int find_stream_bounds(const char *start, off_t bytesleft, off_t *stream, off_t *endstream) |
|
141 |
+static int find_stream_bounds(const char *start, off_t bytesleft, off_t bytesleft2, off_t *stream, off_t *endstream) |
|
142 | 142 |
{ |
143 | 143 |
const char *q2, *q; |
144 | 144 |
if ((q2 = cli_memstr(start, bytesleft, "stream", 6))) { |
... | ... |
@@ -148,11 +149,11 @@ static int find_stream_bounds(const char *start, off_t bytesleft, off_t *stream, |
148 | 148 |
if (q2[0] == '\xa') |
149 | 149 |
q2++; |
150 | 150 |
*stream = q2 - start; |
151 |
- bytesleft -= q2 - start; |
|
151 |
+ bytesleft2 -= q2 - start; |
|
152 | 152 |
q = q2; |
153 |
- q2 = cli_memstr(q, bytesleft, "endstream", 9); |
|
153 |
+ q2 = cli_memstr(q, bytesleft2, "endstream", 9); |
|
154 | 154 |
if (!q2) |
155 |
- return;/* no more objs */ |
|
155 |
+ return 0;/* no more objs */ |
|
156 | 156 |
*endstream = q2 - start; |
157 | 157 |
return 1; |
158 | 158 |
} |
... | ... |
@@ -173,6 +174,7 @@ static int pdf_findobj(struct pdf_struct *pdf) |
173 | 173 |
return -1; |
174 | 174 |
} |
175 | 175 |
obj = &pdf->objs[pdf->nobjs-1]; |
176 |
+ memset(obj, 0, sizeof(*obj)); |
|
176 | 177 |
start = pdf->map+pdf->offset; |
177 | 178 |
bytesleft = pdf->size - pdf->offset; |
178 | 179 |
q2 = cli_memstr(start, bytesleft, " obj", 4); |
... | ... |
@@ -197,7 +199,7 @@ static int pdf_findobj(struct pdf_struct *pdf) |
197 | 197 |
if (!q2) |
198 | 198 |
return 0;/* no more objs */ |
199 | 199 |
bytesleft -= q2 - q; |
200 |
- if (find_stream_bounds(q-1, q2-q+1, &p_stream, &p_endstream)) { |
|
200 |
+ if (find_stream_bounds(q-1, q2-q, bytesleft + (q2-q), &p_stream, &p_endstream)) { |
|
201 | 201 |
obj->flags |= 1 << OBJ_STREAM; |
202 | 202 |
q2 = q-1 + p_endstream + 6; |
203 | 203 |
bytesleft -= q2 - q + 1; |
... | ... |
@@ -349,30 +351,52 @@ static int find_length(struct pdf_struct *pdf, |
349 | 349 |
return length; |
350 | 350 |
} |
351 | 351 |
|
352 |
+#define DUMP_MASK ((1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_A85) | (1 << OBJ_EMBEDDED_FILE) | (1 << OBJ_JAVASCRIPT)) |
|
353 |
+ |
|
354 |
+static int obj_size(struct pdf_struct *pdf, struct pdf_obj *obj) |
|
355 |
+{ |
|
356 |
+ int i = obj - pdf->objs; |
|
357 |
+ i++; |
|
358 |
+ if (i < pdf->nobjs) { |
|
359 |
+ return pdf->objs[i].start - obj->start - 4; |
|
360 |
+ } |
|
361 |
+ return pdf->size - obj->start; |
|
362 |
+} |
|
363 |
+ |
|
352 | 364 |
static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj) |
353 | 365 |
{ |
366 |
+ char fullname[NAME_MAX + 1]; |
|
367 |
+ int fout; |
|
368 |
+ off_t sum = 0; |
|
354 | 369 |
int rc = CL_SUCCESS; |
355 |
- if (obj->flags | (1 << OBJ_STREAM)) { |
|
370 |
+ char *ascii_decoded = NULL; |
|
371 |
+ |
|
372 |
+ if (!(obj->flags & DUMP_MASK)) { |
|
373 |
+ /* don't dump all streams */ |
|
374 |
+ return CL_CLEAN; |
|
375 |
+ } |
|
376 |
+ snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u", pdf->dir, pdf->files++); |
|
377 |
+ fout = open(fullname,O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600); |
|
378 |
+ if (fout < 0) { |
|
379 |
+ char err[128]; |
|
380 |
+ cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err))); |
|
381 |
+ free(ascii_decoded); |
|
382 |
+ return CL_ETMPFILE; |
|
383 |
+ } |
|
384 |
+ |
|
385 |
+ do { |
|
386 |
+ if (obj->flags & (1 << OBJ_STREAM)) { |
|
356 | 387 |
const char *start = pdf->map + obj->start; |
357 | 388 |
off_t p_stream = 0, p_endstream = 0; |
358 | 389 |
off_t length; |
359 | 390 |
find_stream_bounds(start, pdf->size - obj->start, |
391 |
+ pdf->size - obj->start, |
|
360 | 392 |
&p_stream, &p_endstream); |
361 | 393 |
if (p_stream && p_endstream) { |
362 |
- char fullname[NAME_MAX + 1]; |
|
363 |
- int fout; |
|
364 |
- off_t sum = 0; |
|
365 | 394 |
const char *flate_in; |
366 |
- char *ascii_decoded = NULL; |
|
367 | 395 |
long ascii_decoded_size = 0; |
368 | 396 |
size_t size = p_endstream - p_stream; |
369 | 397 |
|
370 |
- if (!(obj->flags & (1 << OBJ_FILTER_FLATE)) && |
|
371 |
- !(obj->flags & (1 << OBJ_EMBEDDED_FILE)) && |
|
372 |
- !ascii_decoded) { |
|
373 |
- /* only dump encoded streams */ |
|
374 |
- return CL_CLEAN; |
|
375 |
- } |
|
376 | 398 |
length = find_length(pdf, obj, start, p_stream); |
377 | 399 |
if (!(obj->flags & (1 << OBJ_FILTER_FLATE)) && !length) { |
378 | 400 |
const char *q = start + p_endstream; |
... | ... |
@@ -395,7 +419,8 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj) |
395 | 395 |
ascii_decoded = cli_malloc(size/2 + 1); |
396 | 396 |
if (!ascii_decoded) { |
397 | 397 |
cli_errmsg("Cannot allocate memory for asciidecode\n"); |
398 |
- return CL_EMEM; |
|
398 |
+ rc = CL_EMEM; |
|
399 |
+ break; |
|
399 | 400 |
} |
400 | 401 |
ascii_decoded_size = asciihexdecode(start + p_stream, |
401 | 402 |
length, |
... | ... |
@@ -404,7 +429,8 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj) |
404 | 404 |
ascii_decoded = cli_malloc(size*5); |
405 | 405 |
if (!ascii_decoded) { |
406 | 406 |
cli_errmsg("Cannot allocate memory for asciidecode\n"); |
407 |
- return CL_EMEM; |
|
407 |
+ rc = CL_EMEM; |
|
408 |
+ break; |
|
408 | 409 |
} |
409 | 410 |
ascii_decoded_size = ascii85decode(start+p_stream, |
410 | 411 |
length, |
... | ... |
@@ -413,40 +439,71 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj) |
413 | 413 |
if (ascii_decoded_size < 0) { |
414 | 414 |
pdf->flags |= 1 << BAD_ASCIIDECODE; |
415 | 415 |
cli_dbgmsg("cli_pdf: failed to asciidecode in %u %u obj\n", obj->id>>8,obj->id&0xff); |
416 |
- free(ascii_decoded); |
|
417 |
- return CL_SUCCESS; |
|
416 |
+ rc = CL_CLEAN; |
|
417 |
+ break; |
|
418 | 418 |
} |
419 | 419 |
/* either direct or ascii-decoded input */ |
420 | 420 |
if (!ascii_decoded) |
421 | 421 |
ascii_decoded_size = length; |
422 | 422 |
flate_in = ascii_decoded ? ascii_decoded : start+p_stream; |
423 | 423 |
|
424 |
- snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u", pdf->dir, pdf->files++); |
|
425 |
- fout = open(fullname,O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600); |
|
426 |
- if (fout < 0) { |
|
427 |
- char err[128]; |
|
428 |
- cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err))); |
|
429 |
- free(ascii_decoded); |
|
430 |
- return CL_ETMPFILE; |
|
431 |
- } |
|
432 | 424 |
if (obj->flags & (1 << OBJ_FILTER_FLATE)) { |
433 | 425 |
rc = filter_flatedecode(pdf, obj, flate_in, ascii_decoded_size, fout, &sum); |
434 | 426 |
} else { |
435 |
- rc = filter_writen(pdf, obj, fout, flate_in, ascii_decoded_size, &sum); |
|
427 |
+ if (filter_writen(pdf, obj, fout, flate_in, ascii_decoded_size, &sum) != ascii_decoded_size) |
|
428 |
+ rc = CL_EWRITE; |
|
436 | 429 |
} |
437 | 430 |
cli_updatelimits(pdf->ctx, sum); |
438 |
- /* invoke bytecode on this pdf obj with metainformation associated |
|
431 |
+ /* TODO: invoke bytecode on this pdf obj with metainformation associated |
|
439 | 432 |
* */ |
440 | 433 |
cli_dbgmsg("cli_pdf: extracted %ld bytes %u %u obj to %s\n", sum, obj->id>>8, obj->id&0xff, fullname); |
441 | 434 |
lseek(fout, 0, SEEK_SET); |
442 | 435 |
rc = cli_magic_scandesc(fout, pdf->ctx); |
443 |
- close(fout); |
|
444 |
- free(ascii_decoded); |
|
445 |
- if (!pdf->ctx->engine->keeptmp) |
|
446 |
- if (cli_unlink(fullname) && rc != CL_VIRUS) |
|
447 |
- rc = CL_EUNLINK; |
|
436 |
+ } |
|
437 |
+ } else if (obj->flags & (1 << OBJ_JAVASCRIPT)) { |
|
438 |
+ const char *q2; |
|
439 |
+ const char *q = pdf->map+obj->start; |
|
440 |
+ /* TODO: get obj-endobj size */ |
|
441 |
+ off_t bytesleft = obj_size(pdf, obj); |
|
442 |
+ |
|
443 |
+ q2 = cli_memstr(q, bytesleft, "/JavaScript", 11); |
|
444 |
+ if (!q2) |
|
445 |
+ break; |
|
446 |
+ q2++; |
|
447 |
+ bytesleft -= q2 - q; |
|
448 |
+ q = pdf_nextobject(q2, bytesleft); |
|
449 |
+ if (!q) |
|
450 |
+ break; |
|
451 |
+ bytesleft -= q - q2; |
|
452 |
+ if (*q == '(') { |
|
453 |
+ if (filter_writen(pdf, obj, fout, q+1, bytesleft-1, &sum) != (bytesleft-1)) { |
|
454 |
+ rc = CL_EWRITE; |
|
455 |
+ break; |
|
456 |
+ } |
|
457 |
+ } else if (*q == '<') { |
|
458 |
+ char *decoded; |
|
459 |
+ q2 = memchr(q+1, '>', bytesleft); |
|
460 |
+ if (!q2) q2 = q + bytesleft; |
|
461 |
+ decoded = cli_malloc(q2 - q); |
|
462 |
+ if (!decoded) { |
|
463 |
+ rc = CL_EMEM; |
|
464 |
+ break; |
|
465 |
+ } |
|
466 |
+ cli_hex2str_to(q2, decoded, q2-q-1); |
|
467 |
+ decoded[q2-q-1] = '\0'; |
|
468 |
+ cli_dbgmsg("cli_pdf: found hexadecimal encoded javascript in %u %u obj\n", |
|
469 |
+ obj->id>>8, obj->id&0xff); |
|
470 |
+ pdf->flags |= 1 << HEX_JAVASCRIPT; |
|
471 |
+ filter_writen(pdf, obj, fout, decoded, q2-q-1, &sum); |
|
472 |
+ free(decoded); |
|
448 | 473 |
} |
449 | 474 |
} |
475 |
+ } while (0); |
|
476 |
+ close(fout); |
|
477 |
+ free(ascii_decoded); |
|
478 |
+ if (!pdf->ctx->engine->keeptmp) |
|
479 |
+ if (cli_unlink(fullname) && rc != CL_VIRUS) |
|
480 |
+ rc = CL_EUNLINK; |
|
450 | 481 |
return rc; |
451 | 482 |
} |
452 | 483 |
|
... | ... |
@@ -621,6 +678,27 @@ static void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj) |
621 | 621 |
pdfname[i] = '\0'; |
622 | 622 |
|
623 | 623 |
handle_pdfname(pdf, obj, pdfname, escapes, q, &objstate); |
624 |
+ if (objstate == STATE_JAVASCRIPT) { |
|
625 |
+ const char *q2; |
|
626 |
+ q2 = pdf_nextobject(q, dict_length); |
|
627 |
+ if (q2 && isdigit(*q2)) { |
|
628 |
+ uint32_t objid = atoi(q2) << 8; |
|
629 |
+ while (isdigit(*q2)) q2++; |
|
630 |
+ q2 = pdf_nextobject(q2, dict_length); |
|
631 |
+ if (q2 && isdigit(*q2)) { |
|
632 |
+ objid |= atoi(q2) & 0xff; |
|
633 |
+ q2 = pdf_nextobject(q2, dict_length); |
|
634 |
+ if (*q2 == 'R') { |
|
635 |
+ struct pdf_obj *obj2; |
|
636 |
+ cli_dbgmsg("cli_pdf: found javascript stored in indirect object %u %u", |
|
637 |
+ objid >> 8, objid&0xff); |
|
638 |
+ obj2 = find_obj(pdf, obj, objid); |
|
639 |
+ obj2->flags |= OBJ_JAVASCRIPT; |
|
640 |
+ } |
|
641 |
+ } |
|
642 |
+ } |
|
643 |
+ objstate = STATE_NONE; |
|
644 |
+ } |
|
624 | 645 |
} |
625 | 646 |
cli_dbgmsg("cli_pdf: %u %u obj flags: %02x\n", obj->id>>8, obj->id&0xff, obj->flags); |
626 | 647 |
} |
... | ... |
@@ -732,8 +810,14 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
732 | 732 |
break; |
733 | 733 |
} |
734 | 734 |
|
735 |
- if (pdf.flags) |
|
735 |
+ if (pdf.flags) { |
|
736 | 736 |
cli_dbgmsg("cli_pdf: flags 0x%02x\n", pdf.flags); |
737 |
+ if (pdf.flags & ESCAPED_COMMON_PDFNAME) { |
|
738 |
+ /* for example /Fl#61te#44#65#63#6f#64#65 instead of /FlateDecode */ |
|
739 |
+ *ctx->virname = "Heuristics.PDF.ObfuscatedNameObject"; |
|
740 |
+ rc = CL_VIRUS; |
|
741 |
+ } |
|
742 |
+ } |
|
737 | 743 |
cli_dbgmsg("cli_pdf: returning %d\n", rc); |
738 | 744 |
free(pdf.objs); |
739 | 745 |
return rc; |