Browse code

pdf: support for AESV3, V 5 security handler, and encrypted linearized PDFs.

Török Edvin authored on 2011/12/15 20:27:31
Showing 1 changed files
... ...
@@ -93,7 +93,8 @@ enum enc_method {
93 93
     ENC_UNKNOWN,
94 94
     ENC_NONE,
95 95
     ENC_V2,
96
-    ENC_AESV2
96
+    ENC_AESV2,
97
+    ENC_AESV3
97 98
 };
98 99
 
99 100
 struct pdf_struct {
... ...
@@ -567,25 +568,29 @@ static int run_pdf_hooks(struct pdf_struct *pdf, enum pdf_phase phase, int fd,
567 567
     return ret;
568 568
 }
569 569
 
570
-static void aes_decrypt(const unsigned char *in, off_t *length, unsigned char *q, char *key, unsigned key_n)
570
+static void dbg_printhex(const char *msg, const char *hex, unsigned len);
571
+static void aes_decrypt(const unsigned char *in, off_t *length, unsigned char *q, char *key, unsigned key_n, int has_iv)
571 572
 {
572 573
     unsigned long rk[RKLENGTH(256)];
573 574
     unsigned char iv[16];
574 575
     unsigned len = *length;
575 576
     unsigned char pad, i;
576 577
 
577
-    cli_dbgmsg("aes_decrypt: key length: %d, data length: %d\n", key_n, *length);
578
+    cli_dbgmsg("cli_pdf: aes_decrypt: key length: %d, data length: %d\n", key_n, *length);
578 579
     if (key_n > 32) {
579
-	cli_dbgmsg("aes_decrypt: key length is %s!\n", key_n*8);
580
+	cli_dbgmsg("cli_pdf: aes_decrypt: key length is %s!\n", key_n*8);
580 581
 	return;
581 582
     }
582 583
     if (len < 32) {
583
-	cli_dbgmsg("aes_decrypt: len is <32: %d\n", len);
584
+	cli_dbgmsg("cli_pdf: aes_decrypt: len is <32: %d\n", len);
584 585
 	return;
585 586
     }
586
-    memcpy(iv, in, 16);
587
-    in += 16;
588
-    len -= 16;
587
+    if (has_iv) {
588
+	memcpy(iv, in, 16);
589
+	in += 16;
590
+	len -= 16;
591
+    } else
592
+	memset(iv, 0, sizeof(iv));
589 593
 
590 594
     int nrounds = rijndaelSetupDecrypt(rk, key, key_n*8);
591 595
     while (len >= 16) {
... ...
@@ -598,21 +603,26 @@ static void aes_decrypt(const unsigned char *in, off_t *length, unsigned char *q
598 598
 	in += 16;
599 599
 	len -= 16;
600 600
     }
601
-    pad = q[-1];
602
-    if (pad > 0x10) {
603
-	cli_dbgmsg("aes_decrypt: bad pad: %x\n", pad);
604
-	return;
605
-    }
606
-    len += pad;
607
-    q -= pad;
608
-    for (i=1;i<pad;i++) {
609
-	if (q[i] != pad) {
610
-	    cli_dbgmsg("aes_decrypt: bad pad: %x != %x\n",q[i],pad);
601
+    if (has_iv) {
602
+	len += 16;
603
+	pad = q[-1];
604
+	if (pad > 0x10) {
605
+	    cli_dbgmsg("cli_pdf: aes_decrypt: bad pad: %x (extra len: %d)\n", pad, len);
606
+	    *length -= len;
611 607
 	    return;
612 608
 	}
609
+	q -= pad;
610
+	for (i=1;i<pad;i++) {
611
+	    if (q[i] != pad) {
612
+		cli_dbgmsg("cli_pdf: aes_decrypt: bad pad: %x != %x\n",q[i],pad);
613
+		*length -= len;
614
+		return;
615
+	    }
616
+	}
617
+	len += pad;
613 618
     }
614 619
     *length -= len;
615
-    cli_dbgmsg("aes_decrypt: length is %d\n", *length);
620
+    cli_dbgmsg("cli_pdf: aes_decrypt: length is %d\n", *length);
616 621
 }
617 622
 
618 623
 
... ...
@@ -644,6 +654,8 @@ static char *decrypt_stream(struct pdf_struct *pdf, uint32_t id, const char *in,
644 644
     cli_md5_init(&md5);
645 645
     cli_md5_update(&md5, key, n);
646 646
     cli_md5_final(result, &md5);
647
+    free(key);
648
+
647 649
     n = pdf->keylen + 5;
648 650
     if (n > 16)
649 651
 	n = 16;
... ...
@@ -654,20 +666,25 @@ static char *decrypt_stream(struct pdf_struct *pdf, uint32_t id, const char *in,
654 654
 
655 655
     switch (pdf->enc_method) {
656 656
 	case ENC_V2:
657
-	    cli_dbgmsg("enc is v2\n");
657
+	    cli_dbgmsg("cli_pdf: enc is v2\n");
658 658
 	    memcpy(q, in, *length);
659 659
 	    arc4_init(&arc4, result, n);
660 660
 	    arc4_apply(&arc4, q, *length);
661 661
 	    break;
662 662
 	case ENC_AESV2:
663
-	    aes_decrypt(in, length, q, result, n);
663
+	    cli_dbgmsg("cli_pdf: enc is aesv2\n");
664
+	    aes_decrypt(in, length, q, result, n, 1);
665
+	    break;
666
+	case ENC_AESV3:
667
+	    cli_dbgmsg("cli_pdf: enc is aesv3\n");
668
+	    aes_decrypt(in, length, q, pdf->key, pdf->keylen, 1);
664 669
 	    break;
665 670
 	case ENC_NONE:
666
-	    cli_dbgmsg("enc is none\n");
671
+	    cli_dbgmsg("cli_pdf: enc is none\n");
667 672
 	    free(q);
668 673
 	    return NULL;
669 674
 	case ENC_UNKNOWN:
670
-	    cli_dbgmsg("enc is unknown\n");
675
+	    cli_dbgmsg("cli_pdf: enc is unknown\n");
671 676
 	    free(q);
672 677
 	    return NULL;
673 678
     }
... ...
@@ -1004,6 +1021,56 @@ static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj,
1004 1004
 static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen);
1005 1005
 static int pdf_readint(const char *q0, int len, const char *key);
1006 1006
 static const char *pdf_getdict(const char *q0, int* len, const char *key);
1007
+
1008
+static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len)
1009
+{
1010
+    const char *q, *q2;
1011
+    uint32_t objid;
1012
+
1013
+    if (len >= 16 && !strncmp(enc, "/EncryptMetadata", 16)) {
1014
+	q = cli_memstr(enc+16, len-16, "/Encrypt", 8);
1015
+	if (!q)
1016
+	    return;
1017
+	len -= q - enc;
1018
+	enc = q;
1019
+    }
1020
+    q = enc + 8;
1021
+    len -= 8;
1022
+    q2 = pdf_nextobject(q, len);
1023
+    if (!q2 || !isdigit(*q2))
1024
+	return;
1025
+    objid = atoi(q2) << 8;
1026
+    len -= q2 - q;
1027
+    q = q2;
1028
+    q2 = pdf_nextobject(q, len);
1029
+    if (!q2 || !isdigit(*q2))
1030
+	return;
1031
+    objid |= atoi(q2) & 0xff;
1032
+    len -= q2 - q;
1033
+    q = q2;
1034
+    q2 = pdf_nextobject(q, len);
1035
+    if (!q2 || *q2 != 'R')
1036
+	return;
1037
+    cli_dbgmsg("cli_pdf: Encrypt dictionary in obj %d %d\n", objid>>8, objid&0xff);
1038
+    pdf->enc_objid = objid;
1039
+}
1040
+
1041
+static void pdf_parse_trailer(struct pdf_struct *pdf, const char *s, long length)
1042
+{
1043
+    const char *enc;
1044
+    enc = cli_memstr(s, length, "/Encrypt", 8);
1045
+    if (enc) {
1046
+	char *newID;
1047
+	pdf->flags |= 1 << ENCRYPTED_PDF;
1048
+	pdf_parse_encrypt(pdf, enc, s + length - enc);
1049
+	newID = pdf_readstring(s, length, "/ID", &pdf->fileIDlen);
1050
+	if (newID) {
1051
+	    free(pdf->fileID);
1052
+	    pdf->fileID = newID;
1053
+	}
1054
+    }
1055
+}
1056
+
1007 1057
 static void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
1008 1058
 {
1009 1059
     /* enough to hold common pdf names, we don't need all the names */
... ...
@@ -1084,13 +1151,14 @@ static void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
1084 1084
 	    objstate = STATE_NONE;
1085 1085
 	    trailer_end = pdf_readint(q, dict_length, "/H");
1086 1086
 	    if (trailer_end > 0 && trailer_end < pdf->size) {
1087
+		const char *enc;
1087 1088
 		trailer = trailer_end - 1024;
1088 1089
 		if (trailer < 0) trailer = 0;
1089 1090
 		q2 = pdf->map + trailer;
1090 1091
 		cli_dbgmsg("cli_pdf: looking for trailer in linearized pdf: %ld - %ld\n", trailer, trailer_end);
1091
-		pdf->fileID = pdf_readstring(q2, trailer_end - trailer, "/ID", &pdf->fileIDlen);
1092
+		pdf_parse_trailer(pdf, q2, trailer_end - trailer);
1092 1093
 		if (pdf->fileID)
1093
-		    cli_dbgmsg("found fileID\n");
1094
+		    cli_dbgmsg("cli_pdf: found fileID\n");
1094 1095
 	    }
1095 1096
 	}
1096 1097
 	if (objstate == STATE_LAUNCHACTION)
... ...
@@ -1147,39 +1215,6 @@ static void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
1147 1147
     cli_dbgmsg("cli_pdf: %u %u obj flags: %02x\n", obj->id>>8, obj->id&0xff, obj->flags);
1148 1148
 }
1149 1149
 
1150
-static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len)
1151
-{
1152
-    const char *q, *q2;
1153
-    uint32_t objid;
1154
-
1155
-    if (len >= 16 && !strncmp(enc, "/EncryptMetadata", 16)) {
1156
-	q = cli_memstr(enc+16, len-16, "/Encrypt", 8);
1157
-	if (!q)
1158
-	    return;
1159
-	len -= q - enc;
1160
-	enc = q;
1161
-    }
1162
-    q = enc + 8;
1163
-    len -= 8;
1164
-    q2 = pdf_nextobject(q, len);
1165
-    if (!q2 || !isdigit(*q2))
1166
-	return;
1167
-    objid = atoi(q2) << 8;
1168
-    len -= q2 - q;
1169
-    q = q2;
1170
-    q2 = pdf_nextobject(q, len);
1171
-    if (!q2 || !isdigit(*q2))
1172
-	return;
1173
-    objid |= atoi(q2) & 0xff;
1174
-    len -= q2 - q;
1175
-    q = q2;
1176
-    q2 = pdf_nextobject(q, len);
1177
-    if (!q2 || *q2 != 'R')
1178
-	return;
1179
-    cli_dbgmsg("cli_pdf: Encrypt dictionary in obj %d %d\n", objid>>8, objid&0xff);
1180
-    pdf->enc_objid = objid;
1181
-}
1182
-
1183 1150
 static const char *pdf_getdict(const char *q0, int* len, const char *key)
1184 1151
 {
1185 1152
     const char *q;
... ...
@@ -1380,6 +1415,7 @@ static void dbg_printhex(const char *msg, const char *hex, unsigned len)
1380 1380
 
1381 1381
 static void check_user_password(struct pdf_struct *pdf, int R, const char *O,
1382 1382
 				const char *U, int32_t P, int EM,
1383
+				const char *UE,
1383 1384
 				unsigned length, unsigned oulen)
1384 1385
 {
1385 1386
     unsigned i;
... ...
@@ -1401,8 +1437,23 @@ static void check_user_password(struct pdf_struct *pdf, int R, const char *O,
1401 1401
 	sha256_final(&sha256, result2);
1402 1402
 	dbg_printhex("Computed U", result2, 32);
1403 1403
 	if (!memcmp(result2, U, 32)) {
1404
+	    off_t n;
1404 1405
 	    password_empty = 1;
1405 1406
 	    /* Algorithm 3.2a could be used to recover encryption key */
1407
+	    sha256_init(&sha256);
1408
+	    sha256_update(&sha256, U+40, 8);
1409
+	    sha256_final(&sha256, result2);
1410
+	    n = UE ? strlen(UE) : 0;
1411
+	    if (n != 32) {
1412
+		cli_dbgmsg("cli_pdf: UE length is not 32: %d\n", n);
1413
+	    } else {
1414
+		pdf->keylen = 32;
1415
+		pdf->key = cli_malloc(32);
1416
+		if (!pdf->key)
1417
+		    return;
1418
+		aes_decrypt(UE, &n, pdf->key, result2, 32, 0);
1419
+		dbg_printhex("cli_pdf: Candidate encryption key", pdf->key, pdf->keylen);
1420
+	    }
1406 1421
 	}
1407 1422
     } else {
1408 1423
 	/* 7.6.3.3 Algorithm 2 */
... ...
@@ -1434,7 +1485,7 @@ static void check_user_password(struct pdf_struct *pdf, int R, const char *O,
1434 1434
 	if (!pdf->key)
1435 1435
 	    return;
1436 1436
 	memcpy(pdf->key, result, pdf->keylen);
1437
-	dbg_printhex("md5", result, 32);
1437
+	dbg_printhex("md5", result, 16);
1438 1438
 	dbg_printhex("Candidate encryption key", pdf->key, pdf->keylen);
1439 1439
 
1440 1440
 	/* 7.6.3.3 Algorithm 6 */
... ...
@@ -1486,8 +1537,8 @@ static void check_user_password(struct pdf_struct *pdf, int R, const char *O,
1486 1486
 static void pdf_handle_enc(struct pdf_struct *pdf)
1487 1487
 {
1488 1488
     struct pdf_obj *obj;
1489
-    uint32_t len, required_flags, n, R, P, length, EM, i, oulen;
1490
-    char *O, *U;
1489
+    uint32_t len, required_flags, n, R, P, length, EM = 1, i, oulen;
1490
+    char *O, *U, *CFM, *UE;
1491 1491
     const char *q, *q2;
1492 1492
 
1493 1493
     if (pdf->enc_objid == ~0u)
... ...
@@ -1504,23 +1555,10 @@ static void pdf_handle_enc(struct pdf_struct *pdf)
1504 1504
     len = obj_size(pdf, obj, 1);
1505 1505
     q = pdf->map + obj->start;
1506 1506
 
1507
-    O = U =  NULL;
1507
+    O = U = UE = CFM = NULL;
1508 1508
     do {
1509
-	char *CFM = NULL;
1510 1509
 
1511
-	EM = pdf_readbool(q, len, "/EncryptMetadata", 1);
1512
-	CFM = pdf_readval(q, len, "/CFM");
1513 1510
 	pdf->enc_method = ENC_UNKNOWN;
1514
-	if (CFM) {
1515
-	    cli_dbgmsg("CFM: %s\n", CFM);
1516
-	    if (!strncmp(CFM,"V2", 2))
1517
-		pdf->enc_method = ENC_V2;
1518
-	    else if (!strncmp(CFM,"AESV2",5))
1519
-		pdf->enc_method = ENC_AESV2;
1520
-	    else if (!strncmp(CFM,"None",4))
1521
-		pdf->enc_method = ENC_NONE;
1522
-	}
1523
-	free(CFM);
1524 1511
 	P = pdf_readint(q, len, "/P");
1525 1512
 	if (P == ~0u) {
1526 1513
 	    cli_dbgmsg("cli_pdf: invalid P\n");
... ...
@@ -1539,8 +1577,6 @@ static void pdf_handle_enc(struct pdf_struct *pdf)
1539 1539
 	length = pdf_readint(q2, len - (q2 - q), "/Length");
1540 1540
 	if (length == ~0u)
1541 1541
 	    length = pdf_readint(q, len, "/Length");
1542
-	if (length == ~0u)
1543
-	    length = 40;
1544 1542
 	if (length < 40) {
1545 1543
 	    cli_dbgmsg("cli_pdf: invalid length: %d\n", length);
1546 1544
 	    length = 40;
... ...
@@ -1556,6 +1592,34 @@ static void pdf_handle_enc(struct pdf_struct *pdf)
1556 1556
 	    oulen = 32;
1557 1557
 	else
1558 1558
 	    oulen = 48;
1559
+	if (R == 2 || R == 3) {
1560
+	    pdf->enc_method = ENC_V2;
1561
+	} else if (R == 4 || R == 5) {
1562
+	    CFM = pdf_readval(q, len, "/CFM");
1563
+	    EM = pdf_readbool(q, len, "/EncryptMetadata", 1);
1564
+	    cli_dbgmsg("cli_pdf: EncryptMetadata: %s\n",
1565
+		       EM ? "true" : "false");
1566
+	    if (CFM) {
1567
+		cli_dbgmsg("cli_pdf: CFM: %s\n", CFM);
1568
+		if (!strncmp(CFM,"V2", 2))
1569
+		    pdf->enc_method = ENC_V2;
1570
+		else if (!strncmp(CFM,"AESV2",5))
1571
+		    pdf->enc_method = ENC_AESV2;
1572
+		else if (!strncmp(CFM,"AESV3",5))
1573
+		    pdf->enc_method = ENC_AESV3;
1574
+		else if (!strncmp(CFM,"None",4))
1575
+		    pdf->enc_method = ENC_NONE;
1576
+	    }
1577
+	    if (R == 4)
1578
+		length = 128;
1579
+	    else {
1580
+		n = 0;
1581
+		UE = pdf_readstring(q, len, "/UE", &n);
1582
+		length = 256;
1583
+	    }
1584
+	}
1585
+	if (length == ~0u)
1586
+	    length = 40;
1559 1587
 
1560 1588
 	n = 0;
1561 1589
 	O = pdf_readstring(q, len, "/O", &n);
... ...
@@ -1597,10 +1661,12 @@ static void pdf_handle_enc(struct pdf_struct *pdf)
1597 1597
 	    cli_dbgmsg("cli_pdf: wrong key length, not multiple of 8\n");
1598 1598
 	    break;
1599 1599
 	}
1600
-	check_user_password(pdf, R, O, U, P, EM, length, oulen);
1600
+	check_user_password(pdf, R, O, U, P, EM, UE, length, oulen);
1601 1601
     } while (0);
1602 1602
     free(O);
1603 1603
     free(U);
1604
+    free(UE);
1605
+    free(CFM);
1604 1606
 }
1605 1607
 
1606 1608
 int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
... ...
@@ -1677,19 +1743,12 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
1677 1677
 	    pdf.flags |= 1 << BAD_PDF_TRAILER;
1678 1678
 	    cli_dbgmsg("cli_pdf: startxref not found\n");
1679 1679
 	} else {
1680
-	    const char *enc;
1681 1680
 	    for (t=q;t > eofmap; t--) {
1682 1681
 		if (memcmp(t,"trailer",7) == 0)
1683 1682
 		    break;
1684 1683
 	    }
1685 1684
 
1686
-	    enc = cli_memstr(eofmap, bytesleft, "/Encrypt", 8);
1687
-	    if (enc) {
1688
-		pdf.flags |= 1 << ENCRYPTED_PDF;
1689
-		cli_dbgmsg("cli_pdf: encrypted pdf found, stream will probably fail to decompress!\n");
1690
-		pdf_parse_encrypt(&pdf, enc, eof - enc);
1691
-		pdf.fileID = pdf_readstring(eofmap, bytesleft, "/ID", &pdf.fileIDlen);
1692
-	    }
1685
+	    pdf_parse_trailer(&pdf, eofmap, eof - eofmap);
1693 1686
 	    q += 9;
1694 1687
 	    while (q < eof && (*q == ' ' || *q == '\n' || *q == '\r')) { q++; }
1695 1688
 	    xref = atol(q);
... ...
@@ -1720,7 +1779,7 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
1720 1720
     /* parse PDF and find obj offsets */
1721 1721
     while ((rc = pdf_findobj(&pdf)) > 0) {
1722 1722
 	struct pdf_obj *obj = &pdf.objs[pdf.nobjs-1];
1723
-	cli_dbgmsg("found %d %d obj @%ld\n", obj->id >> 8, obj->id&0xff, obj->start + offset);
1723
+	cli_dbgmsg("cli_pdf: found %d %d obj @%ld\n", obj->id >> 8, obj->id&0xff, obj->start + offset);
1724 1724
     }
1725 1725
     if (pdf.nobjs)
1726 1726
 	pdf.nobjs--;
... ...
@@ -1734,6 +1793,10 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
1734 1734
     }
1735 1735
 
1736 1736
     pdf_handle_enc(&pdf);
1737
+    if (pdf.flags & (1 << ENCRYPTED_PDF))
1738
+	cli_dbgmsg("cli_pdf: encrypted pdf found, %s!\n",
1739
+		   (pdf.flags & (1 << DECRYPTABLE_PDF)) ?
1740
+		   "decryptable" : "not decryptable, stream will probably fail to decompress");
1737 1741
 
1738 1742
     if (DETECT_ENCRYPTED &&
1739 1743
 	(pdf.flags & (1 << ENCRYPTED_PDF)) &&
... ...
@@ -2443,7 +2506,7 @@ ascii85decode(const char *buf, off_t len, unsigned char *output)
2443 2443
 			}
2444 2444
 		} else if(byte == 'z') {
2445 2445
 			if(quintet) {
2446
-				cli_dbgmsg("ascii85decode: unexpected 'z'\n");
2446
+				cli_dbgmsg("cli_pdf: ascii85decode: unexpected 'z'\n");
2447 2447
 				return -1;
2448 2448
 			}
2449 2449
 			*output++ = '\0';
... ...
@@ -2452,12 +2515,12 @@ ascii85decode(const char *buf, off_t len, unsigned char *output)
2452 2452
 			*output++ = '\0';
2453 2453
 			ret += 4;
2454 2454
 		} else if(byte == EOF) {
2455
-			cli_dbgmsg("ascii85decode: quintet %d\n", quintet);
2455
+			cli_dbgmsg("cli_pdf: ascii85decode: quintet %d\n", quintet);
2456 2456
 			if(quintet) {
2457 2457
 				int i;
2458 2458
 
2459 2459
 				if(quintet == 1) {
2460
-					cli_dbgmsg("ascii85Decode: only 1 byte in last quintet\n");
2460
+					cli_dbgmsg("cli_pdf: ascii85Decode: only 1 byte in last quintet\n");
2461 2461
 					return -1;
2462 2462
 				}
2463 2463
 				for(i = quintet; i < 5; i++)
... ...
@@ -2471,7 +2534,7 @@ ascii85decode(const char *buf, off_t len, unsigned char *output)
2471 2471
 			}
2472 2472
 			break;
2473 2473
 		} else if(!isspace(byte)) {
2474
-			cli_dbgmsg("ascii85Decode: invalid character 0x%x, len %lu\n",
2474
+			cli_dbgmsg("cli_pdf: ascii85Decode: invalid character 0x%x, len %lu\n",
2475 2475
 				byte & 0xFF, (unsigned long)len);
2476 2476
 			return -1;
2477 2477
 		}