... | ... |
@@ -827,6 +827,8 @@ static int pdf_scan_contents(int fd, struct pdf_struct *pdf) |
827 | 827 |
static const char *pdf_getdict(const char *q0, int* len, const char *key); |
828 | 828 |
static char *pdf_readval(const char *q, int len, const char *key); |
829 | 829 |
static enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key, enum enc_method def); |
830 |
+static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen, const char **qend, int noescape); |
|
831 |
+ |
|
830 | 832 |
static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj) |
831 | 833 |
{ |
832 | 834 |
char fullname[NAME_MAX + 1]; |
... | ... |
@@ -1002,91 +1004,53 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj) |
1002 | 1002 |
break; |
1003 | 1003 |
|
1004 | 1004 |
do { |
1005 |
+ char *js = NULL; |
|
1006 |
+ off_t js_len = 0; |
|
1005 | 1007 |
|
1006 | 1008 |
q2 = cli_memstr(q, bytesleft, "/JavaScript", 11); |
1007 | 1009 |
if (!q2) |
1008 | 1010 |
break; |
1009 | 1011 |
bytesleft -= q2 - q; |
1010 |
- do { |
|
1011 |
- q2++; |
|
1012 |
- bytesleft--; |
|
1013 |
- q = pdf_nextobject(q2, bytesleft); |
|
1014 |
- if (!q) |
|
1015 |
- break; |
|
1016 |
- bytesleft -= q - q2; |
|
1017 |
- q2 = q; |
|
1018 |
- } while (*q == '/'); |
|
1019 |
- if (!q) |
|
1020 |
- break; |
|
1021 |
- |
|
1022 |
- if (*q == '(') { |
|
1023 |
- const char *out, *q2, *end; |
|
1024 |
- long n = bytesleft; |
|
1025 |
- q++; |
|
1026 |
- n--; |
|
1027 |
- out = q; |
|
1028 |
- end = q + n; |
|
1029 |
- |
|
1030 |
- do { |
|
1031 |
- q2 = memchr(q, ')', n); |
|
1032 |
- if (q2) { |
|
1033 |
- q2++; |
|
1034 |
- n -= q2 - q; |
|
1035 |
- q = q2; |
|
1036 |
- } |
|
1037 |
- } while (n > 0 && q2 && q2[-2] == '\\'); |
|
1038 |
- if (q2) |
|
1039 |
- end = q2-1; |
|
1040 |
- n = end - out; |
|
1041 |
- bytesleft -= q - out; |
|
1042 |
- |
|
1043 |
- if (pdf->flags & (1 << DECRYPTABLE_PDF)) { |
|
1044 |
- cli_dbgmsg("cli_pdf: encrypted string\n"); |
|
1045 |
- decrypted = decrypt_any(pdf, obj->id, out, &n, |
|
1012 |
+ q = q2 + 11; |
|
1013 |
+ |
|
1014 |
+ js = pdf_readstring(q, bytesleft, "/JS", NULL, &q2, !(pdf->flags & (1<<DECRYPTABLE_PDF))); |
|
1015 |
+ bytesleft -= q2 - q; |
|
1016 |
+ q = q2; |
|
1017 |
+ |
|
1018 |
+ if (js) { |
|
1019 |
+ const char *out = js; |
|
1020 |
+ js_len = strlen(js); |
|
1021 |
+ if (pdf->flags & (1 << DECRYPTABLE_PDF)) { |
|
1022 |
+ cli_dbgmsg("cli_pdf: encrypted string\n"); |
|
1023 |
+ decrypted = decrypt_any(pdf, obj->id, js, &js_len, |
|
1046 | 1024 |
pdf->enc_method_string); |
1047 | 1025 |
if (decrypted) { |
1048 | 1026 |
noisy_msg(pdf, "decrypted Javascript string from obj %u %u\n", obj->id>>8,obj->id&0xff); |
1049 | 1027 |
out = decrypted; |
1050 | 1028 |
} |
1051 | 1029 |
} |
1052 |
- if (filter_writen(pdf, obj, fout, out, n, &sum) != n) { |
|
1030 |
+ |
|
1031 |
+ if (filter_writen(pdf, obj, fout, out, js_len, &sum) != js_len) { |
|
1053 | 1032 |
rc = CL_EWRITE; |
1033 |
+ free(js); |
|
1054 | 1034 |
break; |
1055 | 1035 |
} |
1036 |
+ free(js); |
|
1056 | 1037 |
cli_dbgmsg("bytesleft: %d\n", bytesleft); |
1057 |
- } else if (*q == '<') { |
|
1058 |
- char *decoded; |
|
1059 |
- const char *out; |
|
1060 |
- long n; |
|
1061 |
- q2 = memchr(q+1, '>', bytesleft); |
|
1062 |
- if (!q2) q2 = q + bytesleft; |
|
1063 |
- n = q2 - q; |
|
1064 |
- out = q; |
|
1065 |
- q += n; |
|
1066 |
- bytesleft -= n; |
|
1067 |
- n--; |
|
1068 |
- if (pdf->flags & (1 << DECRYPTABLE_PDF)) { |
|
1069 |
- out++; |
|
1070 |
- n--; |
|
1071 |
- decrypted = decrypt_any(pdf, obj->id, out, &n, pdf->enc_method_string); |
|
1072 |
- if (decrypted) { |
|
1073 |
- noisy_msg(pdf, "decrypted Javascript string from obj %u %u\n", obj->id>>8,obj->id&0xff); |
|
1074 |
- out = decrypted; |
|
1075 |
- } |
|
1076 |
- } |
|
1077 |
- decoded = cli_malloc(n); |
|
1078 |
- if (!decoded) { |
|
1079 |
- rc = CL_EMEM; |
|
1080 |
- break; |
|
1081 |
- } |
|
1082 |
- cli_hex2str_to(out, decoded, n-1); |
|
1083 |
- decoded[n-1] = '\0'; |
|
1084 |
- cli_dbgmsg("cli_pdf: found hexadecimal encoded javascript in %u %u obj\n", |
|
1085 |
- obj->id>>8, obj->id&0xff); |
|
1086 |
- pdfobj_flag(pdf, obj, HEX_JAVASCRIPT); |
|
1087 |
- filter_writen(pdf, obj, fout, decoded, n-1, &sum); |
|
1088 |
- free(decoded); |
|
1089 |
- } |
|
1038 |
+ |
|
1039 |
+ q2 = pdf_nextobject(q, bytesleft); |
|
1040 |
+ if (!q2) q2 = q + bytesleft - 1; |
|
1041 |
+ /* non-conforming PDFs that don't escape ) properly */ |
|
1042 |
+ const char *q3 = memchr(q, ')', bytesleft); |
|
1043 |
+ if (q3 && q3 < q2) q2 = q3; |
|
1044 |
+ while (q2 > q && q2[-1] == ' ') q2--; |
|
1045 |
+ if (q2 > q) { |
|
1046 |
+ q--; |
|
1047 |
+ filter_writen(pdf, obj, fout, q, q2 - q, &sum); |
|
1048 |
+ q++; |
|
1049 |
+ } |
|
1050 |
+ } |
|
1051 |
+ |
|
1090 | 1052 |
} while (bytesleft > 0); |
1091 | 1053 |
} else { |
1092 | 1054 |
off_t bytesleft = obj_size(pdf, obj, 0); |
... | ... |
@@ -1236,7 +1200,6 @@ static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj, |
1236 | 1236 |
} |
1237 | 1237 |
} |
1238 | 1238 |
|
1239 |
-static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen); |
|
1240 | 1239 |
static int pdf_readint(const char *q0, int len, const char *key); |
1241 | 1240 |
|
1242 | 1241 |
static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len) |
... | ... |
@@ -1280,7 +1243,7 @@ static void pdf_parse_trailer(struct pdf_struct *pdf, const char *s, long length |
1280 | 1280 |
char *newID; |
1281 | 1281 |
pdf->flags |= 1 << ENCRYPTED_PDF; |
1282 | 1282 |
pdf_parse_encrypt(pdf, enc, s + length - enc); |
1283 |
- newID = pdf_readstring(s, length, "/ID", &pdf->fileIDlen); |
|
1283 |
+ newID = pdf_readstring(s, length, "/ID", &pdf->fileIDlen, NULL, 0); |
|
1284 | 1284 |
if (newID) { |
1285 | 1285 |
free(pdf->fileID); |
1286 | 1286 |
pdf->fileID = newID; |
... | ... |
@@ -1477,12 +1440,14 @@ static const char *pdf_getdict(const char *q0, int* len, const char *key) |
1477 | 1477 |
return q; |
1478 | 1478 |
} |
1479 | 1479 |
|
1480 |
-static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen) |
|
1480 |
+static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen, const char **qend, int noescape) |
|
1481 | 1481 |
{ |
1482 | 1482 |
char *s, *s0; |
1483 | 1483 |
const char *start, *q, *end; |
1484 | 1484 |
if (slen) |
1485 | 1485 |
*slen = 0; |
1486 |
+ if (qend) |
|
1487 |
+ *qend = q0; |
|
1486 | 1488 |
q = pdf_getdict(q0, &len, key); |
1487 | 1489 |
if (!q) |
1488 | 1490 |
return NULL; |
... | ... |
@@ -1505,12 +1470,18 @@ static char *pdf_readstring(const char *q0, int len, const char *key, unsigned * |
1505 | 1505 |
break; |
1506 | 1506 |
} |
1507 | 1507 |
} |
1508 |
+ if (qend) |
|
1509 |
+ *qend = q; |
|
1508 | 1510 |
q--; |
1509 | 1511 |
len = q - start; |
1510 | 1512 |
s0 = s = cli_malloc(len + 1); |
1511 | 1513 |
if (!s) |
1512 | 1514 |
return NULL; |
1513 | 1515 |
end = start + len; |
1516 |
+ if (noescape) { |
|
1517 |
+ memcpy(s0, start, len); |
|
1518 |
+ s = s0 + len; |
|
1519 |
+ } else { |
|
1514 | 1520 |
for (q = start;q < end;q++) { |
1515 | 1521 |
if (*q != '\\') { |
1516 | 1522 |
*s++ = *q; |
... | ... |
@@ -1564,11 +1535,13 @@ static char *pdf_readstring(const char *q0, int len, const char *key, unsigned * |
1564 | 1564 |
break; |
1565 | 1565 |
default: |
1566 | 1566 |
/* ignore */ |
1567 |
- q--; |
|
1567 |
+ *s++ = '\\'; |
|
1568 |
+ q--; |
|
1568 | 1569 |
break; |
1569 | 1570 |
} |
1570 | 1571 |
} |
1571 | 1572 |
} |
1573 |
+ } |
|
1572 | 1574 |
*s++ = '\0'; |
1573 | 1575 |
if (slen) |
1574 | 1576 |
*slen = s - s0 - 1; |
... | ... |
@@ -1579,6 +1552,8 @@ static char *pdf_readstring(const char *q0, int len, const char *key, unsigned * |
1579 | 1579 |
q = memchr(q+1, '>', len); |
1580 | 1580 |
if (!q) |
1581 | 1581 |
return NULL; |
1582 |
+ if (qend) |
|
1583 |
+ *qend = q; |
|
1582 | 1584 |
s = cli_malloc((q - start)/2 + 1); |
1583 | 1585 |
cli_hex2str_to(start, s, q - start); |
1584 | 1586 |
s[(q-start)/2] = '\0'; |
... | ... |
@@ -1893,7 +1868,7 @@ static void pdf_handle_enc(struct pdf_struct *pdf) |
1893 | 1893 |
length = 128; |
1894 | 1894 |
else { |
1895 | 1895 |
n = 0; |
1896 |
- UE = pdf_readstring(q, len, "/UE", &n); |
|
1896 |
+ UE = pdf_readstring(q, len, "/UE", &n, NULL, 0); |
|
1897 | 1897 |
length = 256; |
1898 | 1898 |
} |
1899 | 1899 |
} |
... | ... |
@@ -1901,7 +1876,7 @@ static void pdf_handle_enc(struct pdf_struct *pdf) |
1901 | 1901 |
length = 40; |
1902 | 1902 |
|
1903 | 1903 |
n = 0; |
1904 |
- O = pdf_readstring(q, len, "/O", &n); |
|
1904 |
+ O = pdf_readstring(q, len, "/O", &n, NULL, 0); |
|
1905 | 1905 |
if (!O || n < oulen) { |
1906 | 1906 |
cli_dbgmsg("cli_pdf: invalid O: %d\n", n); |
1907 | 1907 |
cli_dbgmsg("cli_pdf: invalid O: %d\n", n); |
... | ... |
@@ -1921,7 +1896,7 @@ static void pdf_handle_enc(struct pdf_struct *pdf) |
1921 | 1921 |
} |
1922 | 1922 |
|
1923 | 1923 |
n = 0; |
1924 |
- U = pdf_readstring(q, len, "/U", &n); |
|
1924 |
+ U = pdf_readstring(q, len, "/U", &n, NULL, 0); |
|
1925 | 1925 |
if (!U || n < oulen) { |
1926 | 1926 |
cli_dbgmsg("cli_pdf: invalid U: %d\n", n); |
1927 | 1927 |
noisy_warnmsg("cli_pdf: invalid U: %d\n", n); |