... | ... |
@@ -67,7 +67,8 @@ |
67 | 67 |
* strings SHOULD range from 9 to 12 bits. |
68 | 68 |
*/ |
69 | 69 |
#define BITS_MIN 9 /* start with 9 bits */ |
70 |
-#define BITS_MAX 13 /* max of 12 bit strings, +1 for robustness */ |
|
70 |
+#define BITS_VALID 12 /* max of 12 bit strings are valid, used to flag */ |
|
71 |
+#define BITS_MAX 14 /* max of 12 bit strings, +2 for robustness */ |
|
71 | 72 |
/* predefined codes */ |
72 | 73 |
#define CODE_BASIC 256 /* last basic code + 1 */ |
73 | 74 |
#define CODE_CLEAR 256 /* code to clear string table */ |
... | ... |
@@ -94,7 +95,6 @@ struct lzw_internal_state { |
94 | 94 |
uint16_t nbits; /* # of bits/code */ |
95 | 95 |
long nextdata; /* next bits of i/o */ |
96 | 96 |
long nextbits; /* # of valid bits in lzw_nextdata */ |
97 |
- uint32_t flags; /* flags affecting decompression */ |
|
98 | 97 |
|
99 | 98 |
/* decoding-specific state */ |
100 | 99 |
long dec_nbitsmask; /* lzw_nbits 1 bits, right adjusted */ |
... | ... |
@@ -146,7 +146,7 @@ break; \ |
146 | 146 |
oldcodep = state->dec_codetab + code; \ |
147 | 147 |
} |
148 | 148 |
|
149 |
-int lzwInit(lzw_streamp strm, uint32_t flags) |
|
149 |
+int lzwInit(lzw_streamp strm) |
|
150 | 150 |
{ |
151 | 151 |
struct lzw_internal_state *sp; |
152 | 152 |
hcode_t code; |
... | ... |
@@ -161,7 +161,6 @@ int lzwInit(lzw_streamp strm, uint32_t flags) |
161 | 161 |
sp->nbits = BITS_MIN; |
162 | 162 |
sp->nextdata = 0; |
163 | 163 |
sp->nextbits = 0; |
164 |
- sp->flags = flags; |
|
165 | 164 |
|
166 | 165 |
/* dictionary setup */ |
167 | 166 |
sp->dec_codetab = cli_calloc(CSIZE, sizeof(code_t)); |
... | ... |
@@ -200,19 +199,22 @@ int lzwInflate(lzw_streamp strm) |
200 | 200 |
uint8_t *wp; |
201 | 201 |
hcode_t code, free_code; |
202 | 202 |
int echg, ret = LZW_OK; |
203 |
+ uint32_t flags; |
|
203 | 204 |
|
204 | 205 |
if (strm == NULL || strm->state == NULL || strm->next_out == NULL || |
205 | 206 |
(strm->next_in == NULL && strm->avail_in != 0)) |
206 | 207 |
return LZW_STREAM_ERROR; |
207 | 208 |
|
208 | 209 |
/* load state */ |
209 |
- state = strm->state; |
|
210 | 210 |
to = strm->next_out; |
211 | 211 |
out = left = strm->avail_out; |
212 | 212 |
|
213 | 213 |
from = strm->next_in; |
214 | 214 |
in = have = strm->avail_in; |
215 | 215 |
|
216 |
+ flags = strm->flags; |
|
217 |
+ state = strm->state; |
|
218 |
+ |
|
216 | 219 |
nbits = state->nbits; |
217 | 220 |
nextdata = state->nextdata; |
218 | 221 |
nextbits = state->nextbits; |
... | ... |
@@ -221,7 +223,7 @@ int lzwInflate(lzw_streamp strm) |
221 | 221 |
free_entp = state->dec_free_entp; |
222 | 222 |
maxcodep = state->dec_maxcodep; |
223 | 223 |
|
224 |
- echg = state->flags & LZW_FLAG_EARLYCHG; |
|
224 |
+ echg = flags & LZW_FLAG_EARLYCHG; |
|
225 | 225 |
free_code = free_entp - &state->dec_codetab[0]; |
226 | 226 |
|
227 | 227 |
if (oldcodep == &state->dec_codetab[CODE_EOI]) |
... | ... |
@@ -289,8 +291,11 @@ int lzwInflate(lzw_streamp strm) |
289 | 289 |
|
290 | 290 |
/* non-earlychange bit expansion */ |
291 | 291 |
if (!echg && free_entp > maxcodep) { |
292 |
- if (++nbits > BITS_MAX) /* should not happen */ |
|
293 |
- nbits = BITS_MAX; |
|
292 |
+ if (++nbits > BITS_VALID) { |
|
293 |
+ flags |= LZW_FLAG_BIGDICT; |
|
294 |
+ if (nbits > BITS_MAX) /* should not happen */ |
|
295 |
+ nbits = BITS_MAX; |
|
296 |
+ } |
|
294 | 297 |
nbitsmask = MAXCODE(nbits); |
295 | 298 |
maxcodep = state->dec_codetab + nbitsmask-1; |
296 | 299 |
} |
... | ... |
@@ -311,8 +316,11 @@ int lzwInflate(lzw_streamp strm) |
311 | 311 |
free_entp++; |
312 | 312 |
/* earlychange bit expansion */ |
313 | 313 |
if (echg && free_entp > maxcodep) { |
314 |
- if (++nbits > BITS_MAX) /* should not happen */ |
|
315 |
- nbits = BITS_MAX; |
|
314 |
+ if (++nbits > BITS_VALID) { |
|
315 |
+ flags |= LZW_FLAG_BIGDICT; |
|
316 |
+ if (nbits > BITS_MAX) /* should not happen */ |
|
317 |
+ nbits = BITS_MAX; |
|
318 |
+ } |
|
316 | 319 |
nbitsmask = MAXCODE(nbits); |
317 | 320 |
maxcodep = state->dec_codetab + nbitsmask-1; |
318 | 321 |
} |
... | ... |
@@ -366,6 +374,7 @@ inf_end: |
366 | 366 |
strm->avail_out = left; |
367 | 367 |
strm->next_in = from; |
368 | 368 |
strm->avail_in = have; |
369 |
+ strm->flags = flags; |
|
369 | 370 |
|
370 | 371 |
state->nbits = (uint16_t)nbits; |
371 | 372 |
state->nextdata = nextdata; |
... | ... |
@@ -49,6 +49,8 @@ typedef struct lzw_stream_s { |
49 | 49 |
unsigned total_out; |
50 | 50 |
|
51 | 51 |
char *msg; |
52 |
+ |
|
53 |
+ uint32_t flags; |
|
52 | 54 |
struct lzw_internal_state *state; |
53 | 55 |
} lzw_stream; |
54 | 56 |
|
... | ... |
@@ -62,10 +64,13 @@ typedef lzw_stream *lzw_streamp; |
62 | 62 |
#define LZW_BUF_ERROR (-5) |
63 | 63 |
#define LZW_DICT_ERROR (-7) |
64 | 64 |
|
65 |
-#define LZW_NOFLAGS 0 |
|
66 |
-#define LZW_FLAG_EARLYCHG 1 |
|
65 |
+/* option flags */ |
|
66 |
+#define LZW_NOFLAGS 0x0 |
|
67 |
+#define LZW_FLAG_EARLYCHG 0x1 |
|
68 |
+/* state flags */ |
|
69 |
+#define LZW_FLAG_BIGDICT 0x100 |
|
67 | 70 |
|
68 |
-int lzwInit(lzw_streamp strm, uint32_t flags); |
|
71 |
+int lzwInit(lzw_streamp strm); |
|
69 | 72 |
int lzwInflate(lzw_streamp strm); |
70 | 73 |
int lzwInflateEnd(lzw_streamp strm); |
71 | 74 |
|
... | ... |
@@ -957,8 +957,10 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags) |
957 | 957 |
if (dparams) |
958 | 958 |
pdf_free_dict(dparams); |
959 | 959 |
|
960 |
- if (sum < 0) |
|
961 |
- return rc; |
|
960 |
+ if (sum < 0 || (rc == CL_VIRUS && !(pdf->ctx->options & CL_SCAN_ALLMATCHES))) { |
|
961 |
+ sum = 0; /* prevents post-filter scan */ |
|
962 |
+ break; |
|
963 |
+ } |
|
962 | 964 |
|
963 | 965 |
cli_dbgmsg("-------------EXPERIMENTAL-------------\n"); |
964 | 966 |
|
... | ... |
@@ -117,7 +117,13 @@ off_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_d |
117 | 117 |
cli_dbgmsg("cli_pdf: detected %lu applied filters\n", (long unsigned)(obj->numfilters)); |
118 | 118 |
|
119 | 119 |
rv = pdf_decodestream_internal(pdf, obj, params, token); |
120 |
- /* return is ignored so that the existing content is dumped to file */ |
|
120 |
+ /* return is generally ignored */ |
|
121 |
+ if (rc) { |
|
122 |
+ if (rv == CL_VIRUS) |
|
123 |
+ *rc = CL_VIRUS; |
|
124 |
+ else |
|
125 |
+ *rc = CL_SUCCESS; |
|
126 |
+ } |
|
121 | 127 |
|
122 | 128 |
if (!cli_checklimits("pdf", pdf->ctx, token->length, 0, 0)) { |
123 | 129 |
if (cli_writen(fout, token->content, token->length) != token->length) { |
... | ... |
@@ -131,15 +137,13 @@ off_t pdf_decodestream(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_d |
131 | 131 |
|
132 | 132 |
free(token->content); |
133 | 133 |
free(token); |
134 |
- if (rc) |
|
135 |
- *rc = CL_SUCCESS; |
|
136 | 134 |
return rv; |
137 | 135 |
} |
138 | 136 |
|
139 | 137 |
static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token) |
140 | 138 |
{ |
141 | 139 |
const char *filter = NULL; |
142 |
- int i, rc = CL_SUCCESS; |
|
140 |
+ int i, vir = 0, rc = CL_SUCCESS; |
|
143 | 141 |
|
144 | 142 |
/* |
145 | 143 |
* if pdf is decryptable, scan for CRYPT filter |
... | ... |
@@ -211,22 +215,26 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj |
211 | 211 |
} |
212 | 212 |
|
213 | 213 |
if (rc != CL_SUCCESS) { |
214 |
- const char *reason; |
|
215 |
- switch (rc) { |
|
216 |
- case CL_VIRUS: |
|
217 |
- reason = "detection"; |
|
218 |
- break; |
|
219 |
- case CL_BREAK: |
|
220 |
- reason = "break decoding"; |
|
221 |
- break; |
|
222 |
- default: |
|
223 |
- reason = "error decoding"; |
|
214 |
+ if (rc == CL_VIRUS && pdf->ctx->options & CL_SCAN_ALLMATCHES) |
|
215 |
+ vir = 1; |
|
216 |
+ else { |
|
217 |
+ const char *reason; |
|
218 |
+ switch (rc) { |
|
219 |
+ case CL_VIRUS: |
|
220 |
+ reason = "detection"; |
|
221 |
+ break; |
|
222 |
+ case CL_BREAK: |
|
223 |
+ reason = "break decoding"; |
|
224 |
+ break; |
|
225 |
+ default: |
|
226 |
+ reason = "error decoding"; |
|
227 |
+ break; |
|
228 |
+ } |
|
229 |
+ |
|
230 |
+ cli_dbgmsg("cli_pdf: %s, stopping after %d (of %lu) filters\n", |
|
231 |
+ reason, i, (long unsigned)(obj->numfilters)); |
|
224 | 232 |
break; |
225 | 233 |
} |
226 |
- |
|
227 |
- cli_dbgmsg("cli_pdf: %s, stopping after %d (of %lu) filters\n", |
|
228 |
- reason, i, (long unsigned)(obj->numfilters)); |
|
229 |
- break; |
|
230 | 234 |
} |
231 | 235 |
|
232 | 236 |
if (cl_engine_get_num(pdf->ctx->engine, CL_ENGINE_FORCETODISK, NULL) && |
... | ... |
@@ -237,6 +245,8 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj |
237 | 237 |
} |
238 | 238 |
} |
239 | 239 |
|
240 |
+ if (vir) |
|
241 |
+ return CL_VIRUS; |
|
240 | 242 |
if (rc == CL_BREAK) |
241 | 243 |
return CL_SUCCESS; |
242 | 244 |
return rc; |
... | ... |
@@ -786,8 +796,10 @@ static int filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct |
786 | 786 |
stream.avail_in = length; |
787 | 787 |
stream.next_out = decoded; |
788 | 788 |
stream.avail_out = BUFSIZ; |
789 |
+ if (echg) |
|
790 |
+ stream.flags |= LZW_FLAG_EARLYCHG; |
|
789 | 791 |
|
790 |
- lzwstat = lzwInit(&stream, echg ? LZW_FLAG_EARLYCHG : LZW_NOFLAGS); |
|
792 |
+ lzwstat = lzwInit(&stream); |
|
791 | 793 |
if(lzwstat != Z_OK) { |
792 | 794 |
cli_warnmsg("cli_pdf: lzwInit failed\n"); |
793 | 795 |
free(decoded); |
... | ... |
@@ -811,7 +823,7 @@ static int filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct |
811 | 811 |
stream.next_out = (Bytef *)decoded; |
812 | 812 |
stream.avail_out = capacity; |
813 | 813 |
|
814 |
- lzwstat = lzwInit(&stream, echg ? LZW_FLAG_EARLYCHG : LZW_NOFLAGS); |
|
814 |
+ lzwstat = lzwInit(&stream); |
|
815 | 815 |
if(lzwstat != Z_OK) { |
816 | 816 |
cli_warnmsg("cli_pdf: lzwInit failed\n"); |
817 | 817 |
free(decoded); |
... | ... |
@@ -897,5 +909,11 @@ static int filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct |
897 | 897 |
free(decoded); |
898 | 898 |
} |
899 | 899 |
|
900 |
+ /* heuristic check */ |
|
901 |
+ if (stream.flags & LZW_FLAG_BIGDICT) { |
|
902 |
+ cli_append_virus(pdf->ctx, "Heuristics.PDF.LZWInvalidDictionary"); |
|
903 |
+ rc = CL_VIRUS; |
|
904 |
+ } |
|
905 |
+ |
|
900 | 906 |
return rc; |
901 | 907 |
} |