Browse code

Better detection for encrypted PDFs (bb #2448)

If --block-encrypted is specified then we can detect Encrypted.PDF if:
- PDF is encrypted with R 2,3,4 or 5
- PDF is not displayable without specifying a password

If PDF is encrypted, but is displayable without specifying a password, then it
is not detected as Encrypted.PDF.

Török Edvin authored on 2011/05/08 00:06:06
Showing 7 changed files
... ...
@@ -1,3 +1,7 @@
1
+Sat May  7 18:05:23 EEST 2011 (edwin)
2
+-------------------------------------
3
+ * libclamav/pdf.c: better detection for encrypted PDFs (bb #2448)
4
+
1 5
 Fri May  6 16:16:00 EEST 2011 (edwin)
2 6
 ------------------------------------
3 7
  * libclamav/c++: add support for building with external LLVM 2.9, and drop external 2.8 support
... ...
@@ -362,7 +362,9 @@ libclamav_la_SOURCES = \
362 362
 	bytecode_detect.h\
363 363
 	builtin_bytecodes.h\
364 364
 	events.c\
365
-	events.h
365
+	events.h \
366
+	arc4.c \
367
+	arc4.h
366 368
 
367 369
 if !LINK_TOMMATH
368 370
 libclamav_la_SOURCES += bignum.c \
... ...
@@ -158,7 +158,8 @@ am__libclamav_la_SOURCES_DIST = clamav.h matcher-ac.c matcher-ac.h \
158 158
 	bytecode_api_decl.c bytecode_api.h bytecode_api_impl.h \
159 159
 	bytecode_hooks.h cache.c cache.h bytecode_detect.c \
160 160
 	bytecode_detect.h builtin_bytecodes.h events.c events.h \
161
-	bignum.c bignum_class.h
161
+	arc4.c arc4.h bignum.c \
162
+	bignum_class.h
162 163
 @LINK_TOMMATH_FALSE@am__objects_1 = libclamav_la-bignum.lo
163 164
 am_libclamav_la_OBJECTS = libclamav_la-matcher-ac.lo \
164 165
 	libclamav_la-matcher-bm.lo libclamav_la-matcher-hash.lo \
... ...
@@ -210,7 +211,7 @@ am_libclamav_la_OBJECTS = libclamav_la-matcher-ac.lo \
210 210
 	libclamav_la-ishield.lo libclamav_la-bytecode_api.lo \
211 211
 	libclamav_la-bytecode_api_decl.lo libclamav_la-cache.lo \
212 212
 	libclamav_la-bytecode_detect.lo libclamav_la-events.lo \
213
-	$(am__objects_1)
213
+	libclamav_la-arc4.lo $(am__objects_1)
214 214
 libclamav_la_OBJECTS = $(am_libclamav_la_OBJECTS)
215 215
 AM_V_lt = $(am__v_lt_$(V))
216 216
 am__v_lt_ = $(am__v_lt_$(AM_DEFAULT_VERBOSITY))
... ...
@@ -667,7 +668,7 @@ libclamav_la_SOURCES = clamav.h matcher-ac.c matcher-ac.h matcher-bm.c \
667 667
 	bytecode_api_decl.c bytecode_api.h bytecode_api_impl.h \
668 668
 	bytecode_hooks.h cache.c cache.h bytecode_detect.c \
669 669
 	bytecode_detect.h builtin_bytecodes.h events.c events.h \
670
-	$(am__append_7)
670
+	arc4.c arc4.h $(am__append_7)
671 671
 noinst_LTLIBRARIES = libclamav_internal_utils.la libclamav_internal_utils_nothreads.la libclamav_nocxx.la
672 672
 COMMON_CLEANFILES = version.h version.h.tmp *.gcda *.gcno
673 673
 @MAINTAINER_MODE_TRUE@BUILT_SOURCES = jsparse/generated/operators.h jsparse/generated/keywords.h jsparse-keywords.gperf
... ...
@@ -801,6 +802,7 @@ distclean-compile:
801 801
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-Bra.Plo@am__quote@
802 802
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-Bra86.Plo@am__quote@
803 803
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-LzmaDec.Plo@am__quote@
804
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-arc4.Plo@am__quote@
804 805
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-aspack.Plo@am__quote@
805 806
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-autoit.Plo@am__quote@
806 807
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-bignum.Plo@am__quote@
... ...
@@ -1731,6 +1733,14 @@ libclamav_la-events.lo: events.c
1731 1731
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
1732 1732
 @am__fastdepCC_FALSE@	$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -c -o libclamav_la-events.lo `test -f 'events.c' || echo '$(srcdir)/'`events.c
1733 1733
 
1734
+
1735
+libclamav_la-arc4.lo: arc4.c
1736
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -MT libclamav_la-arc4.lo -MD -MP -MF $(DEPDIR)/libclamav_la-arc4.Tpo -c -o libclamav_la-arc4.lo `test -f 'arc4.c' || echo '$(srcdir)/'`arc4.c
1737
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libclamav_la-arc4.Tpo $(DEPDIR)/libclamav_la-arc4.Plo
1738
+@am__fastdepCC_FALSE@	$(AM_V_CC) @AM_BACKSLASH@
1739
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='arc4.c' object='libclamav_la-arc4.lo' libtool=yes @AMDEPBACKSLASH@
1740
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
1741
+@am__fastdepCC_FALSE@	$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -c -o libclamav_la-arc4.lo `test -f 'arc4.c' || echo '$(srcdir)/'`arc4.c
1734 1742
 libclamav_la-bignum.lo: bignum.c
1735 1743
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -MT libclamav_la-bignum.lo -MD -MP -MF $(DEPDIR)/libclamav_la-bignum.Tpo -c -o libclamav_la-bignum.lo `test -f 'bignum.c' || echo '$(srcdir)/'`bignum.c
1736 1744
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libclamav_la-bignum.Tpo $(DEPDIR)/libclamav_la-bignum.Plo
... ...
@@ -101,7 +101,8 @@ enum pdf_flag {
101 101
     HAS_OPENACTION,
102 102
     BAD_STREAMLEN,
103 103
     ENCRYPTED_PDF,
104
-    LINEARIZED_PDF /* not bad, just as flag */
104
+    LINEARIZED_PDF, /* not bad, just as flag */
105
+    DECRYPTABLE_PDF
105 106
 };
106 107
 
107 108
 /** PDF obj flags */
... ...
@@ -126,7 +127,8 @@ enum pdf_objflags {
126 126
     OBJ_SIGNED,
127 127
     OBJ_IMAGE,
128 128
     OBJ_TRUNCATED,
129
-    OBJ_FORCEDUMP
129
+    OBJ_FORCEDUMP,
130
+    OBJ_FILTER_STANDARD
130 131
 };
131 132
 
132 133
 #ifdef __CLAMBC__
... ...
@@ -52,6 +52,9 @@ static	char	const	rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $";
52 52
 #include "str.h"
53 53
 #include "bytecode.h"
54 54
 #include "bytecode_api.h"
55
+#include "md5.h"
56
+#include "arc4.h"
57
+#include "sha256.h"
55 58
 
56 59
 #ifdef	CL_DEBUG
57 60
 /*#define	SAVE_TMP	
... ...
@@ -96,6 +99,11 @@ struct pdf_struct {
96 96
     cli_ctx *ctx;
97 97
     const char *dir;
98 98
     unsigned files;
99
+    uint32_t enc_objid;
100
+    char *fileID;
101
+    unsigned fileIDlen;
102
+    char *key;
103
+    unsigned keylen;
99 104
 };
100 105
 
101 106
 static const char *findNextNonWSBack(const char *q, const char *start)
... ...
@@ -793,7 +801,7 @@ static struct pdfname_action pdfname_actions[] = {
793 793
     {"DCT", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER},
794 794
     {"JPXDecode", OBJ_FILTER_JPX, STATE_FILTER, STATE_FILTER},
795 795
     {"Crypt",  OBJ_FILTER_CRYPT, STATE_FILTER, STATE_NONE},
796
-    {"Standard", OBJ_FILTER_CRYPT, STATE_FILTER, STATE_FILTER},
796
+    {"Standard", OBJ_FILTER_STANDARD, STATE_FILTER, STATE_FILTER},
797 797
     {"Sig",    OBJ_SIGNED, STATE_ANY, STATE_NONE},
798 798
     {"V",     OBJ_SIGNED, STATE_ANY, STATE_NONE},
799 799
     {"R",     OBJ_SIGNED, STATE_ANY, STATE_NONE},
... ...
@@ -975,7 +983,8 @@ static void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
975 975
 	if ((obj->flags & (1 << act->set_objflag)) &&
976 976
 	    act->from_state == STATE_FILTER &&
977 977
 	    act->to_state == STATE_FILTER &&
978
-	    act->set_objflag != OBJ_FILTER_CRYPT) {
978
+	    act->set_objflag != OBJ_FILTER_CRYPT &&
979
+	    act->set_objflag != OBJ_FILTER_STANDARD) {
979 980
 	    filters++;
980 981
 	}
981 982
     }
... ...
@@ -989,6 +998,409 @@ static void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
989 989
     cli_dbgmsg("cli_pdf: %u %u obj flags: %02x\n", obj->id>>8, obj->id&0xff, obj->flags);
990 990
 }
991 991
 
992
+static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len)
993
+{
994
+    const char *q, *q2;
995
+    uint32_t objid;
996
+
997
+    if (len >= 16 && !strncmp(enc, "/EncryptMetadata", 16)) {
998
+	q = cli_memstr(enc+16, len-16, "/Encrypt", 8);
999
+	if (!q)
1000
+	    return;
1001
+	len -= q - enc;
1002
+	enc = q;
1003
+    }
1004
+    q = enc + 8;
1005
+    len -= 8;
1006
+    q2 = pdf_nextobject(q, len);
1007
+    if (!q2 || !isdigit(*q2))
1008
+	return;
1009
+    objid = atoi(q2) << 8;
1010
+    len -= q2 - q;
1011
+    q = q2;
1012
+    q2 = pdf_nextobject(q, len);
1013
+    if (!q2 || !isdigit(*q2))
1014
+	return;
1015
+    objid |= atoi(q2) & 0xff;
1016
+    len -= q2 - q;
1017
+    q = q2;
1018
+    q2 = pdf_nextobject(q, len);
1019
+    if (!q2 || *q2 != 'R')
1020
+	return;
1021
+    cli_dbgmsg("cli_pdf: Encrypt dictionary in obj %d %d\n", objid>>8, objid&0xff);
1022
+    pdf->enc_objid = objid;
1023
+}
1024
+
1025
+static const char *pdf_getdict(const char *q0, int* len, const char *key)
1026
+{
1027
+    const char *q;
1028
+
1029
+    q = cli_memstr(q0, *len, key, strlen(key));
1030
+    if (!q) {
1031
+	cli_dbgmsg("cli_pdf: %s not found in dict\n", key);
1032
+	return NULL;
1033
+    }
1034
+    *len -= q - q0;
1035
+    q0 = q;
1036
+    q = pdf_nextobject(q0 + 1, *len - 1);
1037
+    if (!q) {
1038
+	cli_dbgmsg("cli_pdf: %s is invalid in dict\n", key);
1039
+	return NULL;
1040
+    }
1041
+    if (q[-1] == '<')
1042
+	q--;
1043
+    *len -= q - q0;
1044
+    return q;
1045
+}
1046
+
1047
+static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen)
1048
+{
1049
+    char *s, *s0;
1050
+    const char *start, *q, *end;
1051
+    if (slen)
1052
+	*slen = 0;
1053
+    q = pdf_getdict(q0, &len, key);
1054
+    if (!q)
1055
+	return NULL;
1056
+    if (*q == '(') {
1057
+	int paren = 1;
1058
+	start = ++q;
1059
+	for (;paren > 0 && len > 0; q++) {
1060
+	    switch (*q) {
1061
+		case '(':
1062
+		    paren++;
1063
+		    break;
1064
+		case ')':
1065
+		    paren--;
1066
+		    break;
1067
+		case '\\':
1068
+		    q++;
1069
+		    len--;
1070
+		    break;
1071
+		default:
1072
+		    break;
1073
+	    }
1074
+	}
1075
+	q--;
1076
+	len  = q - start;
1077
+	s0 = s = cli_malloc(len + 1);
1078
+	if (!s)
1079
+	    return NULL;
1080
+	end = start + len;
1081
+	for (q = start;q < end;q++) {
1082
+	    if (*q != '\\') {
1083
+		*s++ = *q;
1084
+	    } else {
1085
+		q++;
1086
+		switch (*q) {
1087
+		    case 'n':
1088
+			*s++ = '\n';
1089
+			break;
1090
+		    case 'r':
1091
+			*s++ = '\r';
1092
+			break;
1093
+		    case 't':
1094
+			*s++ = '\t';
1095
+			break;
1096
+		    case 'b':
1097
+			*s++ = '\b';
1098
+			break;
1099
+		    case 'f':
1100
+			*s++ = '\f';
1101
+			break;
1102
+		    case '(':/* fall-through */
1103
+		    case ')':/* fall-through */
1104
+		    case '\\':
1105
+			*s++ = *q;
1106
+			break;
1107
+		    case '\n':
1108
+			/* ignore */
1109
+			break;
1110
+		    case '\r':
1111
+			/* ignore */
1112
+			if (q+1 < end && q[1] == '\n')
1113
+			    q++;
1114
+			break;
1115
+		    case '0':
1116
+		    case '1':
1117
+		    case '2':
1118
+		    case '3':
1119
+		    case '4':
1120
+		    case '5':
1121
+		    case '6':
1122
+		    case '7':
1123
+		    case '8':
1124
+		    case '9':
1125
+			/* octal escape */
1126
+			if (q+2 < end)
1127
+			    q++;
1128
+			*s++ = 64*(q[0] - '0')+
1129
+			      8*(q[1] - '0')+
1130
+			        (q[2] - '0');
1131
+			break;
1132
+		    default:
1133
+			/* ignore */
1134
+			q--;
1135
+			break;
1136
+		}
1137
+	    }
1138
+	}
1139
+	*s++ = '\0';
1140
+	if (slen)
1141
+	    *slen = s - s0 - 1;
1142
+	return s0;
1143
+    }
1144
+    if (*q == '<') {
1145
+	start = ++q;
1146
+	q = memchr(q+1, '>', len);
1147
+	if (!q)
1148
+	    return NULL;
1149
+	s = cli_malloc((q - start)/2 + 1);
1150
+	cli_hex2str_to(start, s, q - start);
1151
+	s[(q-start)/2] = '\0';
1152
+	if (slen)
1153
+	    *slen = (q - start)/2;
1154
+	return s;
1155
+    }
1156
+    cli_dbgmsg("cli_pdf: %s is invalid string in dict\n", key);
1157
+    return NULL;
1158
+}
1159
+
1160
+static int pdf_readint(const char *q0, int len, const char *key)
1161
+{
1162
+    const char *q  = pdf_getdict(q0, &len, key);
1163
+    if (!q)
1164
+	return -1;
1165
+    return atoi(q);
1166
+}
1167
+
1168
+static int pdf_readbool(const char *q0, int len, const char *key, int Default)
1169
+{
1170
+    const char *q  = pdf_getdict(q0, &len, key);
1171
+    if (!q || len < 5)
1172
+	return Default;
1173
+    if (!strncmp(q, "true", 4))
1174
+	return 1;
1175
+    if (!strncmp(q, "false", 5))
1176
+	return 0;
1177
+    cli_dbgmsg("cli_pdf: invalid value for %s bool\n", key);
1178
+    return Default;
1179
+}
1180
+
1181
+static const char *key_padding =
1182
+"\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4e\x56\xff\xfa\x01\x08"
1183
+"\x2e\x2e\x00\xB6\xD0\x68\x3E\x80\x2F\x0C\xA9\xFE\x64\x53\x69\x7A";
1184
+
1185
+static void dbg_printhex(const char *msg, const char *hex, unsigned len)
1186
+{
1187
+    if (cli_debug_flag) {
1188
+	char *kh = cli_str2hex(hex, len);
1189
+	cli_dbgmsg("cli_pdf: %s: %s\n", msg, kh);
1190
+	free(kh);
1191
+    }
1192
+}
1193
+
1194
+static void check_user_password(struct pdf_struct *pdf, int R, const char *O,
1195
+				const char *U, int32_t P, int EM,
1196
+				unsigned length, unsigned oulen)
1197
+{
1198
+    unsigned i;
1199
+    uint8_t result[16];
1200
+    char data[32];
1201
+    cli_md5_ctx md5;
1202
+    struct arc4_state arc4;
1203
+    unsigned password_empty = 0;
1204
+
1205
+    dbg_printhex("U: ", U, 32);
1206
+    dbg_printhex("O: ", O, 32);
1207
+    if (R == 5) {
1208
+	uint8_t result2[32];
1209
+	SHA256_CTX sha256;
1210
+	/* supplement to ISO3200, 3.5.2 Algorithm 3.11 */
1211
+	sha256_init(&sha256);
1212
+	/* user validation salt */
1213
+	sha256_update(&sha256, U+32, 8);
1214
+	sha256_final(&sha256, result2);
1215
+	dbg_printhex("Computed U", result2, 32);
1216
+	if (!memcmp(result2, U, 32)) {
1217
+	    password_empty = 1;
1218
+	    /* Algorithm 3.2a could be used to recover encryption key */
1219
+	}
1220
+    } else {
1221
+	/* 7.6.3.3 Algorithm 2 */
1222
+	cli_md5_init(&md5);
1223
+	/* empty password, password == padding */
1224
+	cli_md5_update(&md5, key_padding, 32);
1225
+	cli_md5_update(&md5, O, 32);
1226
+	P = le32_to_host(P);
1227
+	cli_md5_update(&md5, &P, 4);
1228
+	cli_md5_update(&md5, pdf->fileID, pdf->fileIDlen);
1229
+	if (R >= 4 && !EM) {
1230
+	    uint32_t v = 0xFFFFFFFF;
1231
+	    cli_md5_update(&md5, &v, 4);
1232
+	}
1233
+	cli_md5_final(result, &md5);
1234
+	if (R >= 3) {
1235
+	    if (length > 128)
1236
+		length = 128;
1237
+	    for (i=0;i<50;i++) {
1238
+		cli_md5_init(&md5);
1239
+		cli_md5_update(&md5, result, length/8);
1240
+		cli_md5_final(result, &md5);
1241
+	    }
1242
+	}
1243
+	if (R == 2)
1244
+	    length = 40;
1245
+	pdf->keylen = length / 8;
1246
+	pdf->key = cli_malloc(pdf->keylen);
1247
+	if (!pdf->key)
1248
+	    return;
1249
+	memcpy(pdf->key, result, pdf->keylen);
1250
+	dbg_printhex("md5", result, 32);
1251
+	dbg_printhex("Candidate encryption key", pdf->key, pdf->keylen);
1252
+
1253
+	/* 7.6.3.3 Algorithm 6 */
1254
+	if (R == 2) {
1255
+	    /* 7.6.3.3 Algorithm 4 */
1256
+	    memcpy(data, key_padding, 32);
1257
+	    arc4_init(&arc4, pdf->key, pdf->keylen);
1258
+	    arc4_apply(&arc4, data, 32);
1259
+	    dbg_printhex("computed U", data, 32);
1260
+	    if (!memcmp(data, U, 32))
1261
+		password_empty = 1;
1262
+	} else if (R >= 3) {
1263
+	    unsigned len = pdf->keylen;
1264
+	    /* 7.6.3.3 Algorithm 5 */
1265
+	    cli_md5_init(&md5);
1266
+	    cli_md5_update(&md5, key_padding, 32);
1267
+	    cli_md5_update(&md5, pdf->fileID, pdf->fileIDlen);
1268
+	    cli_md5_final(result, &md5);
1269
+	    memcpy(data, pdf->key, len);
1270
+	    arc4_init(&arc4, data, len);
1271
+	    arc4_apply(&arc4, result, 16);
1272
+	    for (i=1;i<=19;i++) {
1273
+		unsigned j;
1274
+		for (j=0;j<len;j++)
1275
+		    data[j] = pdf->key[j] ^ i;
1276
+		arc4_init(&arc4, data, len);
1277
+		arc4_apply(&arc4, result, 16);
1278
+	    }
1279
+	    dbg_printhex("computed U", result, 16);
1280
+	    if (!memcmp(result, U, 16))
1281
+		password_empty = 1;
1282
+	} else {
1283
+	    cli_dbgmsg("cli_pdf: invalid revision %d\n", R);
1284
+	}
1285
+    }
1286
+    if (password_empty) {
1287
+	cli_dbgmsg("cli_pdf: user password is empty\n");
1288
+	/* The key we computed above is the key used to encrypt the streams.
1289
+	 * We could decrypt it now if we wanted to */
1290
+	pdf->flags |= 1 << DECRYPTABLE_PDF;
1291
+    } else {
1292
+	cli_dbgmsg("cli_pdf: user/owner password would be required for decryption\n");
1293
+	/* the key is not valid, we would need the user or the owner password to
1294
+	 * decrypt */
1295
+    }
1296
+}
1297
+
1298
+static void pdf_handle_enc(struct pdf_struct *pdf)
1299
+{
1300
+    struct pdf_obj *obj;
1301
+    uint32_t len, required_flags, n, R, P, length, EM, i, oulen;
1302
+    char *O, *U;
1303
+    const char *q, *q2;
1304
+
1305
+    if (pdf->enc_objid == ~0u || !pdf->fileID)
1306
+	return;
1307
+    obj = find_obj(pdf, pdf->objs, pdf->enc_objid);
1308
+    required_flags = (1 << OBJ_HASFILTERS) | (1 << OBJ_FILTER_STANDARD);
1309
+    if (!(obj->flags & required_flags))
1310
+	return;
1311
+    len = obj_size(pdf, obj, 1);
1312
+    q = pdf->map + obj->start;
1313
+
1314
+    O = U = NULL;
1315
+    do {
1316
+	EM = pdf_readbool(q, len, "/EncryptMetadata", 1);
1317
+
1318
+	q2 = cli_memstr(q, len, "/Standard", 9);
1319
+	if (!q2) {
1320
+	    cli_dbgmsg("cli_pdf: /Standard not found\n");
1321
+	    break;
1322
+	}
1323
+	len -= q2-q;
1324
+	q = q2;
1325
+
1326
+	R = pdf_readint(q, len, "/R");
1327
+	if (R == ~0u) {
1328
+	    cli_dbgmsg("cli_pdf: invalid R\n");
1329
+	    break;
1330
+	}
1331
+
1332
+	if (R < 5)
1333
+	    oulen = 32;
1334
+	else
1335
+	    oulen = 48;
1336
+
1337
+	n = 0;
1338
+	O = pdf_readstring(q, len, "/O", &n);
1339
+	if (!O || n < oulen) {
1340
+	    cli_dbgmsg("cli_pdf: invalid O: %d\n", n);
1341
+	    if (O)
1342
+		dbg_printhex("invalid O", O, n);
1343
+	    break;
1344
+	}
1345
+	if (n > oulen) {
1346
+	    for (i=oulen;i<n;i++)
1347
+		if (O[i])
1348
+		    break;
1349
+	    if (i != n) {
1350
+		dbg_printhex("too long O", O, n);
1351
+		break;
1352
+	    }
1353
+	}
1354
+
1355
+	n = 0;
1356
+	U = pdf_readstring(q, len, "/U", &n);
1357
+	if (!U || n < oulen) {
1358
+	    cli_dbgmsg("cli_pdf: invalid U: %d\n", n);
1359
+	    if (U)
1360
+		dbg_printhex("invalid U", U, n);
1361
+	    break;
1362
+	}
1363
+	if (n > oulen) {
1364
+	    for (i=oulen;i<n;i++)
1365
+		if (U[i])
1366
+		    break;
1367
+	    if (i != n) {
1368
+		dbg_printhex("too long U", U, n);
1369
+		break;
1370
+	    }
1371
+	}
1372
+	P = pdf_readint(q, len, "/P");
1373
+	if (P == ~0u) {
1374
+	    cli_dbgmsg("cli_pdf: invalid P\n");
1375
+	    break;
1376
+	}
1377
+	length = pdf_readint(q, len, "/Length");
1378
+	if (length == ~0u)
1379
+	    length = 40;
1380
+	if (length < 40) {
1381
+	    cli_dbgmsg("cli_pdf: invalid length: %d\n", length);
1382
+	    length = 40;
1383
+	}
1384
+	cli_dbgmsg("cli_pdf: Encrypt R: %d, P %x, length: %d\n", R, P, length);
1385
+	if (length % 8) {
1386
+	    cli_dbgmsg("cli_pdf: wrong key length, not multiple of 8\n");
1387
+	    break;
1388
+	}
1389
+	check_user_password(pdf, R, O, U, P, EM, length, oulen);
1390
+    } while (0);
1391
+    free(O);
1392
+    free(U);
1393
+}
1394
+
992 1395
 int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
993 1396
 {
994 1397
     struct pdf_struct pdf;
... ...
@@ -1005,6 +1417,7 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
1005 1005
     memset(&pdf, 0, sizeof(pdf));
1006 1006
     pdf.ctx = ctx;
1007 1007
     pdf.dir = dir;
1008
+    pdf.enc_objid = ~0u;
1008 1009
 
1009 1010
     pdfver = start = fmap_need_off_once(map, offset, versize);
1010 1011
 
... ...
@@ -1062,15 +1475,18 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
1062 1062
 	    pdf.flags |= 1 << BAD_PDF_TRAILER;
1063 1063
 	    cli_dbgmsg("cli_pdf: startxref not found\n");
1064 1064
 	} else {
1065
+	    const char *enc;
1065 1066
 	    for (t=q;t > eofmap; t--) {
1066 1067
 		if (memcmp(t,"trailer",7) == 0)
1067 1068
 		    break;
1068 1069
 	    }
1069
-	    if (t > eofmap) {
1070
-		if (cli_memstr(t, q-t, "/Encrypt", 8)) {
1071
-		    pdf.flags |= 1 << ENCRYPTED_PDF;
1072
-		    cli_dbgmsg("cli_pdf: encrypted pdf found, stream will probably fail to decompress!\n");
1073
-		}
1070
+
1071
+	    enc = cli_memstr(eofmap, bytesleft, "/Encrypt", 8);
1072
+	    if (enc) {
1073
+		pdf.flags |= 1 << ENCRYPTED_PDF;
1074
+		cli_dbgmsg("cli_pdf: encrypted pdf found, stream will probably fail to decompress!\n");
1075
+		pdf_parse_encrypt(&pdf, enc, eof - enc);
1076
+		pdf.fileID = pdf_readstring(eofmap, bytesleft, "/ID", &pdf.fileIDlen);
1074 1077
 	    }
1075 1078
 	    q += 9;
1076 1079
 	    while (q < eof && (*q == ' ' || *q == '\n' || *q == '\r')) { q++; }
... ...
@@ -1115,7 +1531,20 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
1115 1115
 	pdf_parseobj(&pdf, obj);
1116 1116
     }
1117 1117
 
1118
-    rc = run_pdf_hooks(&pdf, PDF_PHASE_PARSED, -1, -1);
1118
+    pdf_handle_enc(&pdf);
1119
+
1120
+    if (DETECT_ENCRYPTED &&
1121
+	(pdf.flags & (1 << ENCRYPTED_PDF)) &&
1122
+	!(pdf.flags & (1 << DECRYPTABLE_PDF))) {
1123
+	/* It is encrypted, and a password/key needs to be supplied to decrypt.
1124
+	 * This doesn't trigger for PDFs that are encrypted but don't need
1125
+	 * a password to decrypt */
1126
+	*ctx->virname = "Encrypted.PDF";
1127
+	rc = CL_VIRUS;
1128
+    }
1129
+
1130
+    if (!rc)
1131
+	rc = run_pdf_hooks(&pdf, PDF_PHASE_PARSED, -1, -1);
1119 1132
     /* extract PDF objs */
1120 1133
     for (i=0;!rc && i<pdf.nobjs;i++) {
1121 1134
 	struct pdf_obj *obj = &pdf.objs[i];
... ...
@@ -1152,6 +1581,8 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
1152 1152
     }
1153 1153
     cli_dbgmsg("cli_pdf: returning %d\n", rc);
1154 1154
     free(pdf.objs);
1155
+    free(pdf.fileID);
1156
+    free(pdf.key);
1155 1157
     /* PDF hooks may abort, don't return CL_BREAK to caller! */
1156 1158
     return rc == CL_BREAK ? CL_CLEAN : rc;
1157 1159
 }
... ...
@@ -502,7 +502,7 @@
502 502
 /* #undef USE_SYSLOG */
503 503
 
504 504
 /* Version number of package */
505
-#define VERSION "devel-clamav-0.97-129-g0e3c6e3"
505
+#define VERSION "devel-clamav-0.97-133-gde8d667"
506 506
 
507 507
 /* Version suffix for package */
508 508
 #define VERSION_SUFFIX ""
... ...
@@ -166,6 +166,7 @@
166 166
     <ResourceCompile Include="res\libclamav.rc"/>
167 167
   </ItemGroup>
168 168
   <ItemGroup>
169
+    <ClCompile Include="..\libclamav\arc4.c"/>
169 170
     <ClCompile Include="..\libclamav\matcher-hash.c"/>
170 171
     <ClCompile Include="..\libclamav\sha1.c"/>
171 172
     <ClCompile Include="..\libclamav\events.c"/>