... | ... |
@@ -67,13 +67,14 @@ |
67 | 67 |
* strings SHOULD range from 9 to 12 bits. |
68 | 68 |
*/ |
69 | 69 |
#define BITS_MIN 9 /* start with 9 bits */ |
70 |
-#define BITS_VALID 12 /* max of 12 bit strings are valid, used to flag */ |
|
71 |
-#define BITS_MAX 14 /* max of 12 bit strings, +2 for robustness */ |
|
70 |
+#define BITS_VALID 12 /* 12 bit codes are the max valid */ |
|
71 |
+#define BITS_MAX 14 /* max of 14 bit codes (2 bits extension) */ |
|
72 | 72 |
/* predefined codes */ |
73 | 73 |
#define CODE_BASIC 256 /* last basic code + 1 */ |
74 | 74 |
#define CODE_CLEAR 256 /* code to clear string table */ |
75 | 75 |
#define CODE_EOI 257 /* end-of-information code */ |
76 | 76 |
#define CODE_FIRST 258 /* first free code entry */ |
77 |
+#define CODE_VALID MAXCODE(BITS_VALID) |
|
77 | 78 |
#define CODE_MAX MAXCODE(BITS_MAX) |
78 | 79 |
|
79 | 80 |
#define CSIZE (MAXCODE(BITS_MAX)+1L) |
... | ... |
@@ -198,7 +199,7 @@ int lzwInflate(lzw_streamp strm) |
198 | 198 |
|
199 | 199 |
uint8_t *wp; |
200 | 200 |
hcode_t code, free_code; |
201 |
- int echg, ret = LZW_OK; |
|
201 |
+ int echg, cext, ret = LZW_OK; |
|
202 | 202 |
uint32_t flags; |
203 | 203 |
|
204 | 204 |
if (strm == NULL || strm->state == NULL || strm->next_out == NULL || |
... | ... |
@@ -224,6 +225,7 @@ int lzwInflate(lzw_streamp strm) |
224 | 224 |
maxcodep = state->dec_maxcodep; |
225 | 225 |
|
226 | 226 |
echg = flags & LZW_FLAG_EARLYCHG; |
227 |
+ cext = flags & LZW_FLAG_EXTNCODE; |
|
227 | 228 |
free_code = free_entp - &state->dec_codetab[0]; |
228 | 229 |
|
229 | 230 |
if (oldcodep == &state->dec_codetab[CODE_EOI]) |
... | ... |
@@ -289,47 +291,56 @@ int lzwInflate(lzw_streamp strm) |
289 | 289 |
} |
290 | 290 |
codep = state->dec_codetab + code; |
291 | 291 |
|
292 |
- /* non-earlychange bit expansion */ |
|
293 |
- if (!echg && free_entp > maxcodep) { |
|
294 |
- if (++nbits > BITS_VALID) { |
|
295 |
- flags |= LZW_FLAG_BIGDICT; |
|
296 |
- if (nbits > BITS_MAX) /* should not happen */ |
|
297 |
- nbits = BITS_MAX; |
|
292 |
+ /* cap dictionary codes to valid range (12-bits) */ |
|
293 |
+ if (free_code < CODE_VALID+1 || cext) { |
|
294 |
+ /* non-earlychange bit expansion */ |
|
295 |
+ if (!echg && free_entp > maxcodep) { |
|
296 |
+ if (++nbits > BITS_VALID) { |
|
297 |
+ if (!cext) |
|
298 |
+ nbits = BITS_VALID; |
|
299 |
+ else if (nbits > BITS_MAX) |
|
300 |
+ nbits = BITS_MAX; |
|
301 |
+ } |
|
302 |
+ nbitsmask = MAXCODE(nbits); |
|
303 |
+ maxcodep = state->dec_codetab + nbitsmask-1; |
|
298 | 304 |
} |
299 |
- nbitsmask = MAXCODE(nbits); |
|
300 |
- maxcodep = state->dec_codetab + nbitsmask-1; |
|
301 |
- } |
|
302 |
- /* |
|
303 |
- * Add the new entry to the code table. |
|
304 |
- */ |
|
305 |
- if (&state->dec_codetab[0] > free_entp || free_entp >= &state->dec_codetab[CSIZE]) { |
|
306 |
- cli_dbgmsg("%p <= %p, %p < %p(%ld)\n", &state->dec_codetab[0], free_entp, free_entp, &state->dec_codetab[CSIZE], CSIZE); |
|
307 |
- strm->msg = "full dictionary, cannot add new entry"; |
|
308 |
- ret = LZW_DICT_ERROR; |
|
309 |
- break; |
|
310 |
- } |
|
311 |
- free_entp->next = oldcodep; |
|
312 |
- free_entp->firstchar = free_entp->next->firstchar; |
|
313 |
- free_entp->length = free_entp->next->length+1; |
|
314 |
- free_entp->value = (codep < free_entp) ? |
|
315 |
- codep->firstchar : free_entp->firstchar; |
|
316 |
- free_entp++; |
|
317 |
- /* earlychange bit expansion */ |
|
318 |
- if (echg && free_entp > maxcodep) { |
|
319 |
- if (++nbits > BITS_VALID) { |
|
320 |
- flags |= LZW_FLAG_BIGDICT; |
|
321 |
- if (nbits > BITS_MAX) /* should not happen */ |
|
322 |
- nbits = BITS_MAX; |
|
305 |
+ /* |
|
306 |
+ * Add the new entry to the code table. |
|
307 |
+ */ |
|
308 |
+ if (&state->dec_codetab[0] > free_entp || free_entp >= &state->dec_codetab[CSIZE]) { |
|
309 |
+ cli_dbgmsg("%p <= %p, %p < %p(%ld)\n", &state->dec_codetab[0], free_entp, free_entp, &state->dec_codetab[CSIZE], CSIZE); |
|
310 |
+ strm->msg = "full dictionary, cannot add new entry"; |
|
311 |
+ flags |= LZW_FLAG_FULLDICT; |
|
312 |
+ ret = LZW_DICT_ERROR; |
|
313 |
+ break; |
|
323 | 314 |
} |
324 |
- nbitsmask = MAXCODE(nbits); |
|
325 |
- maxcodep = state->dec_codetab + nbitsmask-1; |
|
326 |
- } |
|
327 |
- free_code++; |
|
328 |
- oldcodep = codep; |
|
315 |
+ free_entp->next = oldcodep; |
|
316 |
+ free_entp->firstchar = free_entp->next->firstchar; |
|
317 |
+ free_entp->length = free_entp->next->length+1; |
|
318 |
+ free_entp->value = (codep < free_entp) ? |
|
319 |
+ codep->firstchar : free_entp->firstchar; |
|
320 |
+ free_entp++; |
|
321 |
+ /* earlychange bit expansion */ |
|
322 |
+ if (echg && free_entp > maxcodep) { |
|
323 |
+ if (++nbits > BITS_VALID) { |
|
324 |
+ if (!cext) |
|
325 |
+ nbits = BITS_VALID; |
|
326 |
+ else if (nbits > BITS_MAX) |
|
327 |
+ nbits = BITS_MAX; |
|
328 |
+ } |
|
329 |
+ nbitsmask = MAXCODE(nbits); |
|
330 |
+ maxcodep = state->dec_codetab + nbitsmask-1; |
|
331 |
+ } |
|
332 |
+ if (free_code++ > CODE_VALID) |
|
333 |
+ flags |= LZW_FLAG_EXTNCODEUSE; |
|
334 |
+ oldcodep = codep; |
|
335 |
+ } else |
|
336 |
+ flags |= LZW_FLAG_FULLDICT; |
|
329 | 337 |
if (code >= CODE_BASIC) { |
330 | 338 |
/* check if code is valid */ |
331 | 339 |
if (code >= free_code) { |
332 | 340 |
strm->msg = "cannot reference unpopulated dictionary entries"; |
341 |
+ flags |= LZW_FLAG_INVALIDCODE; |
|
333 | 342 |
ret = LZW_DATA_ERROR; |
334 | 343 |
break; |
335 | 344 |
} |
... | ... |
@@ -66,9 +66,12 @@ typedef lzw_stream *lzw_streamp; |
66 | 66 |
|
67 | 67 |
/* option flags */ |
68 | 68 |
#define LZW_NOFLAGS 0x0 |
69 |
-#define LZW_FLAG_EARLYCHG 0x1 |
|
69 |
+#define LZW_FLAG_EARLYCHG 0x1 /* code point changes one code earlier */ |
|
70 |
+#define LZW_FLAG_EXTNCODE 0x2 /* use extended code points (12+ bits) */ |
|
70 | 71 |
/* state flags */ |
71 |
-#define LZW_FLAG_BIGDICT 0x100 |
|
72 |
+#define LZW_FLAG_FULLDICT 0x100 /* dictionary consumes all usable codes */ |
|
73 |
+#define LZW_FLAG_EXTNCODEUSE 0x200 /* extended dictionary uses 12+ bit codes */ |
|
74 |
+#define LZW_FLAG_INVALIDCODE 0x400 /* input references invalid code entry (data error) */ |
|
72 | 75 |
|
73 | 76 |
int lzwInit(lzw_streamp strm); |
74 | 77 |
int lzwInflate(lzw_streamp strm); |
... | ... |
@@ -939,11 +939,11 @@ static int filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct |
939 | 939 |
free(decoded); |
940 | 940 |
} |
941 | 941 |
|
942 |
- /* heuristic check */ |
|
943 |
- if (stream.flags & LZW_FLAG_BIGDICT) { |
|
944 |
- cli_append_virus(pdf->ctx, "Heuristics.PDF.LZWInvalidDictionary"); |
|
945 |
- rc = CL_VIRUS; |
|
946 |
- } |
|
942 |
+ /* |
|
943 |
+ heuristic checks: |
|
944 |
+ - full dictionary heuristics? |
|
945 |
+ - invalid code points? |
|
946 |
+ */ |
|
947 | 947 |
|
948 | 948 |
return rc; |
949 | 949 |
} |