Browse code

lzwdec: modify dictionary max code points and change state flags

Kevin Lin authored on 2016/05/27 04:25:54
Showing 3 changed files
... ...
@@ -67,13 +67,14 @@
67 67
  * strings SHOULD range from 9 to 12 bits.
68 68
  */
69 69
 #define BITS_MIN    9       /* start with 9 bits */
70
-#define BITS_VALID  12      /* max of 12 bit strings are valid, used to flag  */
71
-#define BITS_MAX    14      /* max of 12 bit strings, +2 for robustness */
70
+#define BITS_VALID  12      /* 12 bit codes are the max valid */
71
+#define BITS_MAX    14      /* max of 14 bit codes (2 bits extension) */
72 72
 /* predefined codes */
73 73
 #define CODE_BASIC  256     /* last basic code + 1 */
74 74
 #define CODE_CLEAR  256     /* code to clear string table */
75 75
 #define CODE_EOI    257     /* end-of-information code */
76 76
 #define CODE_FIRST  258     /* first free code entry */
77
+#define CODE_VALID  MAXCODE(BITS_VALID)
77 78
 #define CODE_MAX    MAXCODE(BITS_MAX)
78 79
 
79 80
 #define CSIZE       (MAXCODE(BITS_MAX)+1L)
... ...
@@ -198,7 +199,7 @@ int lzwInflate(lzw_streamp strm)
198 198
 
199 199
     uint8_t *wp;
200 200
     hcode_t code, free_code;
201
-    int echg, ret = LZW_OK;
201
+    int echg, cext, ret = LZW_OK;
202 202
     uint32_t flags;
203 203
 
204 204
     if (strm == NULL || strm->state == NULL || strm->next_out == NULL ||
... ...
@@ -224,6 +225,7 @@ int lzwInflate(lzw_streamp strm)
224 224
     maxcodep = state->dec_maxcodep;
225 225
 
226 226
     echg = flags & LZW_FLAG_EARLYCHG;
227
+    cext = flags & LZW_FLAG_EXTNCODE;
227 228
     free_code = free_entp - &state->dec_codetab[0];
228 229
 
229 230
     if (oldcodep == &state->dec_codetab[CODE_EOI])
... ...
@@ -289,47 +291,56 @@ int lzwInflate(lzw_streamp strm)
289 289
         }
290 290
         codep = state->dec_codetab + code;
291 291
 
292
-        /* non-earlychange bit expansion */
293
-        if (!echg && free_entp > maxcodep) {
294
-            if (++nbits > BITS_VALID) {
295
-                flags |= LZW_FLAG_BIGDICT;
296
-                if (nbits > BITS_MAX)     /* should not happen */
297
-                    nbits = BITS_MAX;
292
+        /* cap dictionary codes to valid range (12-bits) */
293
+        if (free_code < CODE_VALID+1 || cext) {
294
+            /* non-earlychange bit expansion */
295
+            if (!echg && free_entp > maxcodep) {
296
+                if (++nbits > BITS_VALID) {
297
+                    if (!cext)
298
+                        nbits = BITS_VALID;
299
+                    else if (nbits > BITS_MAX)
300
+                        nbits = BITS_MAX;
301
+                }
302
+                nbitsmask = MAXCODE(nbits);
303
+                maxcodep = state->dec_codetab + nbitsmask-1;
298 304
             }
299
-            nbitsmask = MAXCODE(nbits);
300
-            maxcodep = state->dec_codetab + nbitsmask-1;
301
-        }
302
-        /*
303
-         * Add the new entry to the code table.
304
-         */
305
-        if (&state->dec_codetab[0] > free_entp || free_entp >= &state->dec_codetab[CSIZE]) {
306
-            cli_dbgmsg("%p <= %p, %p < %p(%ld)\n", &state->dec_codetab[0], free_entp, free_entp, &state->dec_codetab[CSIZE], CSIZE);
307
-            strm->msg = "full dictionary, cannot add new entry";
308
-            ret = LZW_DICT_ERROR;
309
-            break;
310
-        }
311
-        free_entp->next = oldcodep;
312
-        free_entp->firstchar = free_entp->next->firstchar;
313
-        free_entp->length = free_entp->next->length+1;
314
-        free_entp->value = (codep < free_entp) ?
315
-            codep->firstchar : free_entp->firstchar;
316
-        free_entp++;
317
-        /* earlychange bit expansion */
318
-        if (echg && free_entp > maxcodep) {
319
-            if (++nbits > BITS_VALID) {
320
-                flags |= LZW_FLAG_BIGDICT;
321
-                if (nbits > BITS_MAX)     /* should not happen */
322
-                    nbits = BITS_MAX;
305
+            /*
306
+             * Add the new entry to the code table.
307
+             */
308
+            if (&state->dec_codetab[0] > free_entp || free_entp >= &state->dec_codetab[CSIZE]) {
309
+                cli_dbgmsg("%p <= %p, %p < %p(%ld)\n", &state->dec_codetab[0], free_entp, free_entp, &state->dec_codetab[CSIZE], CSIZE);
310
+                strm->msg = "full dictionary, cannot add new entry";
311
+                flags |= LZW_FLAG_FULLDICT;
312
+                ret = LZW_DICT_ERROR;
313
+                break;
323 314
             }
324
-            nbitsmask = MAXCODE(nbits);
325
-            maxcodep = state->dec_codetab + nbitsmask-1;
326
-        }
327
-        free_code++;
328
-        oldcodep = codep;
315
+            free_entp->next = oldcodep;
316
+            free_entp->firstchar = free_entp->next->firstchar;
317
+            free_entp->length = free_entp->next->length+1;
318
+            free_entp->value = (codep < free_entp) ?
319
+                codep->firstchar : free_entp->firstchar;
320
+            free_entp++;
321
+            /* earlychange bit expansion */
322
+            if (echg && free_entp > maxcodep) {
323
+                if (++nbits > BITS_VALID) {
324
+                    if (!cext)
325
+                        nbits = BITS_VALID;
326
+                    else if (nbits > BITS_MAX)
327
+                        nbits = BITS_MAX;
328
+                }
329
+                nbitsmask = MAXCODE(nbits);
330
+                maxcodep = state->dec_codetab + nbitsmask-1;
331
+            }
332
+            if (free_code++ > CODE_VALID)
333
+                flags |= LZW_FLAG_EXTNCODEUSE;
334
+            oldcodep = codep;
335
+        } else
336
+            flags |= LZW_FLAG_FULLDICT;
329 337
         if (code >= CODE_BASIC) {
330 338
             /* check if code is valid */
331 339
             if (code >= free_code) {
332 340
                 strm->msg = "cannot reference unpopulated dictionary entries";
341
+                flags |= LZW_FLAG_INVALIDCODE;
333 342
                 ret = LZW_DATA_ERROR;
334 343
                 break;
335 344
             }
... ...
@@ -66,9 +66,12 @@ typedef lzw_stream *lzw_streamp;
66 66
 
67 67
 /* option flags */
68 68
 #define LZW_NOFLAGS        0x0
69
-#define LZW_FLAG_EARLYCHG  0x1
69
+#define LZW_FLAG_EARLYCHG  0x1 /* code point changes one code earlier */
70
+#define LZW_FLAG_EXTNCODE  0x2 /* use extended code points (12+ bits) */
70 71
 /* state flags */
71
-#define LZW_FLAG_BIGDICT   0x100
72
+#define LZW_FLAG_FULLDICT     0x100 /* dictionary consumes all usable codes */
73
+#define LZW_FLAG_EXTNCODEUSE  0x200 /* extended dictionary uses 12+ bit codes */
74
+#define LZW_FLAG_INVALIDCODE  0x400 /* input references invalid code entry (data error) */
72 75
 
73 76
 int lzwInit(lzw_streamp strm);
74 77
 int lzwInflate(lzw_streamp strm);
... ...
@@ -939,11 +939,11 @@ static int filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct
939 939
         free(decoded);
940 940
     }
941 941
 
942
-    /* heuristic check */
943
-    if (stream.flags & LZW_FLAG_BIGDICT) {
944
-        cli_append_virus(pdf->ctx, "Heuristics.PDF.LZWInvalidDictionary");
945
-        rc = CL_VIRUS;
946
-    }
942
+    /*
943
+       heuristic checks:
944
+       - full dictionary heuristics?
945
+       - invalid code points?
946
+    */
947 947
 
948 948
     return rc;
949 949
 }