Browse code

implement lzw decompression, derived from tiff

Kevin Lin authored on 2016/04/14 07:45:34
Showing 3 changed files
... ...
@@ -464,7 +464,9 @@ libclamav_la_SOURCES = \
464 464
 	tiff.c \
465 465
 	tiff.h \
466 466
 	hwp.c \
467
-	hwp.h
467
+	hwp.h \
468
+	lzw/lzwdec.c \
469
+	lzw/lzwdec.h
468 470
 
469 471
 if ENABLE_YARA
470 472
 libclamav_la_SOURCES += yara_arena.c \
471 473
new file mode 100644
... ...
@@ -0,0 +1,431 @@
0
+/*
1
+ * Copyright (c) 1988-1997 Sam Leffler
2
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
3
+ *
4
+ * Permission to use, copy, modify, distribute, and sell this software and
5
+ * its documentation for any purpose is hereby granted without fee, provided
6
+ * that (i) the above copyright notices and this permission notice appear in
7
+ * all copies of the software and related documentation, and (ii) the names of
8
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
9
+ * publicity relating to the software without the specific, prior written
10
+ * permission of Sam Leffler and Silicon Graphics.
11
+ *
12
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
13
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
14
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
15
+ *
16
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
17
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
18
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
19
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF
20
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
21
+ * OF THIS SOFTWARE.
22
+ */
23
+/*
24
+ *  Portions Copyright (C) 2016 Cisco and/or its affiliates. All rights reserved.
25
+ *
26
+ *  Modified by: Kevin Lin
27
+ *
28
+ *  This program is free software; you can redistribute it and/or modify
29
+ *  it under the terms of the GNU General Public License version 2 as
30
+ *  published by the Free Software Foundation.
31
+ *
32
+ *  This program is distributed in the hope that it will be useful,
33
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
34
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
35
+ *  GNU General Public License for more details.
36
+ *
37
+ *  You should have received a copy of the GNU General Public License
38
+ *  along with this program; if not, write to the Free Software
39
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
40
+ *  MA 02110-1301, USA.
41
+ *
42
+ *  In addition, as a special exception, the copyright holders give
43
+ *  permission to link the code of portions of this program with the
44
+ *  OpenSSL library under certain conditions as described in each
45
+ *  individual source file, and distribute linked combinations
46
+ *  including the two.
47
+ *  
48
+ *  You must obey the GNU General Public License in all respects
49
+ *  for all of the code used other than OpenSSL.  If you modify
50
+ *  file(s) with this exception, you may extend this exception to your
51
+ *  version of the file(s), but you are not obligated to do so.  If you
52
+ *  do not wish to do so, delete this exception statement from your
53
+ *  version.  If you delete this exception statement from all source
54
+ *  files in the program, then also delete it here.
55
+ */
56
+#include <stdio.h>
57
+
58
+#include <assert.h>
59
+#include <stdint.h>
60
+#include "lzwdec.h"
61
+#include "../others.h"
62
+
63
+#define MAXCODE(n)  ((1L<<(n))-1)
64
+/*
65
+ * The spec specifies that encoded bit
66
+ * strings SHOULD range from 9 to 12 bits.
67
+ */
68
+#define BITS_MIN    9       /* start with 9 bits */
69
+#define BITS_MAX    13      /* max of 12 bit strings, +1 for robustness */
70
+/* predefined codes */
71
+#define CODE_BASIC  256     /* last basic code + 1 */
72
+#define CODE_CLEAR  256     /* code to clear string table */
73
+#define CODE_EOI    257     /* end-of-information code */
74
+#define CODE_FIRST  258     /* first free code entry */
75
+#define CODE_MAX    MAXCODE(BITS_MAX)
76
+
77
+#define CSIZE       (MAXCODE(BITS_MAX)+1L)
78
+
79
+typedef uint16_t hcode_t;     /* codes fit in 16 bits */
80
+
81
+/*
82
+ * Decoding-specific state.
83
+ */
84
+typedef struct code_ent {
85
+    struct code_ent *next;
86
+    uint16_t length;         /* string len, including this token */
87
+    uint8_t  value;          /* data value */
88
+    uint8_t  firstchar;      /* first token of string */
89
+} code_t;
90
+
91
+struct lzw_internal_state {
92
+    /* general state */
93
+    uint16_t    nbits;      /* # of bits/code */
94
+    long        nextdata;   /* next bits of i/o */
95
+    long        nextbits;   /* # of valid bits in lzw_nextdata */
96
+    uint32_t    flags;      /* flags affecting decompression */
97
+
98
+    /* decoding-specific state */
99
+    long    dec_nbitsmask;  /* lzw_nbits 1 bits, right adjusted */
100
+    long    dec_restart;    /* restart count */
101
+    code_t *dec_codep;      /* current recognized code */
102
+    code_t *dec_oldcodep;   /* previously recognized code */
103
+    code_t *dec_free_entp;  /* next free entry */
104
+    code_t *dec_maxcodep;   /* max available entry */
105
+    code_t *dec_codetab;    /* kept separate for small machines */
106
+};
107
+
108
+static void code_print(code_t *code);
109
+static void dict_print(code_t *codetab, uint16_t start, uint16_t maxcode);
110
+
111
+#define GetNextCode(code) {                                           \
112
+    if (have == 0)                                                    \
113
+        break;                                                        \
114
+    nextdata = nextdata << 8 | *(from)++;                             \
115
+    have--;                                                           \
116
+    nextbits += 8;                                                    \
117
+    if (nextbits < nbits) {                                           \
118
+        if (have == 0)                                                \
119
+break;                                                    \
120
+        nextdata = nextdata << 8 | *(from)++;                         \
121
+        have--;                                                       \
122
+        nextbits += 8;                                                \
123
+    }                                                                 \
124
+    code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask);     \
125
+    nextbits -= nbits;                                                \
126
+}
127
+
128
+#define CodeClear(code) {                                               \
129
+    free_code = CODE_FIRST;                                             \
130
+    free_entp = state->dec_codetab + CODE_FIRST;                        \
131
+    nbits = BITS_MIN;                                                   \
132
+    nbitsmask = MAXCODE(BITS_MIN);                                      \
133
+    maxcodep = state->dec_codetab + nbitsmask-1;                        \
134
+    while (code == CODE_CLEAR) /* clears out consecutive CODE_CLEARs */ \
135
+        GetNextCode(code);                                              \
136
+    if (code < CODE_BASIC)                                              \
137
+        *to++ = code, left--;                                           \
138
+    else if (code == CODE_EOI)                                          \
139
+        ret = LZW_STREAM_END;                                           \
140
+    else if (code >= CODE_FIRST) {                                      \
141
+        /* cannot reference unpopulated dictionary entries */           \
142
+        strm->msg = "cannot reference unpopulated dictionary entries";  \
143
+        ret = LZW_DATA_ERROR;                                           \
144
+    }                                                                   \
145
+    oldcodep = state->dec_codetab + code;                               \
146
+}
147
+
148
+int lzwInit(lzw_streamp strm, uint32_t flags)
149
+{
150
+    struct lzw_internal_state *sp;
151
+    hcode_t code;
152
+
153
+    sp = cli_malloc(sizeof(struct lzw_internal_state));
154
+    if (sp == NULL) {
155
+        strm->msg = "failed to allocate state";
156
+        return LZW_MEM_ERROR;
157
+    }
158
+
159
+    /* general state setup */
160
+    sp->nbits = BITS_MIN;
161
+    sp->nextdata = 0;
162
+    sp->nextbits = 0;
163
+    sp->flags = flags;
164
+
165
+    /* dictionary setup */
166
+    sp->dec_codetab = cli_calloc(CSIZE, sizeof(code_t));
167
+    if (sp->dec_codetab == NULL) {
168
+        free(sp);
169
+        strm->msg = "failed to allocate code table";
170
+        return LZW_MEM_ERROR;
171
+    }
172
+
173
+    for (code = 0; code < CODE_BASIC; code++) {
174
+        sp->dec_codetab[code].next = NULL;
175
+        sp->dec_codetab[code].length = 1;
176
+        sp->dec_codetab[code].value = code;
177
+        sp->dec_codetab[code].firstchar = code;
178
+    }
179
+
180
+    sp->dec_restart = 0;
181
+    sp->dec_nbitsmask = MAXCODE(BITS_MIN);
182
+    sp->dec_free_entp = sp->dec_codetab + CODE_FIRST;
183
+    sp->dec_oldcodep = &sp->dec_codetab[CODE_CLEAR];
184
+    sp->dec_maxcodep = &sp->dec_codetab[sp->dec_nbitsmask-1];
185
+
186
+    strm->state = sp;
187
+    return LZW_OK;
188
+}
189
+
190
+int lzwInflate(lzw_streamp strm)
191
+{
192
+    struct lzw_internal_state *state;
193
+    uint8_t *from, *to;
194
+    unsigned in, out;
195
+    unsigned have, left;
196
+    long nbits, nextbits, nextdata, nbitsmask;
197
+    code_t *codep, *free_entp, *maxcodep, *oldcodep;
198
+
199
+    uint8_t *wp;
200
+    hcode_t code, free_code;
201
+    int echg, ret = LZW_OK;
202
+
203
+    if (strm == NULL || strm->state == NULL || strm->next_out == NULL ||
204
+        (strm->next_in == NULL && strm->avail_in != 0))
205
+        return LZW_STREAM_ERROR;
206
+
207
+    /* load state */
208
+    state = strm->state;
209
+    to = strm->next_out;
210
+    out = left = strm->avail_out;
211
+
212
+    from = strm->next_in;
213
+    in = have = strm->avail_in;
214
+
215
+    nbits = state->nbits;
216
+    nextdata = state->nextdata;
217
+    nextbits = state->nextbits;
218
+    nbitsmask = state->dec_nbitsmask;
219
+    oldcodep = state->dec_oldcodep;
220
+    free_entp = state->dec_free_entp;
221
+    maxcodep = state->dec_maxcodep;
222
+
223
+    echg = state->flags & LZW_FLAG_EARLYCHG;
224
+    free_code = free_entp - &state->dec_codetab[0];
225
+
226
+    if (oldcodep == &state->dec_codetab[CODE_EOI])
227
+        return LZW_STREAM_END;
228
+
229
+    /*
230
+     * Restart interrupted output operation.
231
+     */
232
+    if (state->dec_restart) {
233
+        long residue;
234
+
235
+        codep = state->dec_codep;
236
+        residue = codep->length - state->dec_restart;
237
+        if (residue > left) {
238
+            /*
239
+             * Residue from previous decode is sufficient
240
+             * to satisfy decode request.  Skip to the
241
+             * start of the decoded string, place decoded
242
+             * values in the output buffer, and return.
243
+             */
244
+            state->dec_restart += left;
245
+            do {
246
+                codep = codep->next;
247
+            } while (--residue > left);
248
+            to = wp = to + left;
249
+            do {
250
+                *--wp = codep->value;
251
+                codep = codep->next;
252
+            } while (--left);
253
+            goto inf_end;
254
+        }
255
+        /*
256
+         * Residue satisfies only part of the decode request.
257
+         */
258
+        to += residue, left -= residue;
259
+        wp = to;
260
+        do {
261
+            *--wp = codep->value;
262
+            codep = codep->next;
263
+        } while (--residue);
264
+        state->dec_restart = 0;
265
+    }
266
+
267
+    /* guarentee valid initial state */
268
+    if (left > 0 && (oldcodep == &state->dec_codetab[CODE_CLEAR])) {
269
+        code = CODE_CLEAR;
270
+        CodeClear(code);
271
+        if (ret != LZW_OK)
272
+            goto inf_end;
273
+    }
274
+
275
+    while (left > 0) {
276
+        GetNextCode(code);
277
+        if (code == CODE_EOI) {
278
+            ret = LZW_STREAM_END;
279
+            break;
280
+        }
281
+        if (code == CODE_CLEAR) {
282
+            CodeClear(code);
283
+            if (ret != LZW_OK)
284
+                break;
285
+            continue;
286
+        }
287
+        codep = state->dec_codetab + code;
288
+
289
+        /* non-earlychange bit expansion */
290
+        if (!echg && free_entp > maxcodep) {
291
+            if (++nbits > BITS_MAX)     /* should not happen */
292
+                nbits = BITS_MAX;
293
+            nbitsmask = MAXCODE(nbits);
294
+            maxcodep = state->dec_codetab + nbitsmask-1;
295
+        }
296
+        /*
297
+         * Add the new entry to the code table.
298
+         */
299
+        if (&state->dec_codetab[0] > free_entp || free_entp >= &state->dec_codetab[CSIZE]) {
300
+            cli_dbgmsg("%p <= %p, %p < %p(%ld)\n", &state->dec_codetab[0], free_entp, free_entp, &state->dec_codetab[CSIZE], CSIZE);
301
+            strm->msg = "full dictionary, cannot add new entry";
302
+            ret = LZW_DICT_ERROR;
303
+            break;
304
+        }
305
+        free_entp->next = oldcodep;
306
+        free_entp->firstchar = free_entp->next->firstchar;
307
+        free_entp->length = free_entp->next->length+1;
308
+        free_entp->value = (codep < free_entp) ?
309
+            codep->firstchar : free_entp->firstchar;
310
+        free_entp++;
311
+        /* earlychange bit expansion */
312
+        if (echg && free_entp > maxcodep) {
313
+            if (++nbits > BITS_MAX)     /* should not happen */
314
+                nbits = BITS_MAX;
315
+            nbitsmask = MAXCODE(nbits);
316
+            maxcodep = state->dec_codetab + nbitsmask-1;
317
+        }
318
+        free_code++;
319
+        oldcodep = codep;
320
+        if (code >= CODE_BASIC) {
321
+            /* check if code is valid */
322
+            if (code >= free_code) {
323
+                strm->msg = "cannot reference unpopulated dictionary entries";
324
+                ret = LZW_DATA_ERROR;
325
+                break;
326
+            }
327
+
328
+            /*
329
+             * Code maps to a string, copy string
330
+             * value to output (written in reverse).
331
+             */
332
+            if (codep->length > left) {
333
+                /*
334
+                 * String is too long for decode buffer,
335
+                 * locate portion that will fit, copy to
336
+                 * the decode buffer, and setup restart
337
+                 * logic for the next decoding call.
338
+                 */
339
+                state->dec_codep = codep;
340
+                do {
341
+                    codep = codep->next;
342
+                } while (codep->length > left);
343
+                state->dec_restart = left;
344
+                to = wp = to + left;
345
+                do  {
346
+                    *--wp = codep->value;
347
+                    codep = codep->next;
348
+                }  while (--left);
349
+                goto inf_end;
350
+            }
351
+
352
+            to += codep->length, left -= codep->length;
353
+            wp = to;
354
+            do {
355
+                *--wp = codep->value;
356
+                codep = codep->next;
357
+            } while(codep != NULL);
358
+        } else
359
+            *to++ = code, left--;
360
+    }
361
+
362
+inf_end:
363
+    /* restore state */
364
+    strm->next_out = to;
365
+    strm->avail_out = left;
366
+    strm->next_in = from;
367
+    strm->avail_in = have;
368
+
369
+    state->nbits = (uint16_t)nbits;
370
+    state->nextdata = nextdata;
371
+    state->nextbits = nextbits;
372
+    state->dec_nbitsmask = nbitsmask;
373
+    state->dec_oldcodep = oldcodep;
374
+    state->dec_free_entp = free_entp;
375
+    state->dec_maxcodep = maxcodep;
376
+
377
+    /* update state */
378
+    in -= strm->avail_in;
379
+    out -= strm->avail_out;
380
+    strm->total_in += in;
381
+    strm->total_out += out;
382
+
383
+    if ((in == 0 && out == 0) && ret == LZW_OK) {
384
+        strm->msg = "no data was processed";
385
+        ret = LZW_BUF_ERROR;
386
+    }
387
+    return ret;
388
+}
389
+
390
+int lzwInflateEnd(lzw_streamp strm)
391
+{
392
+    free(strm->state->dec_codetab);
393
+    free(strm->state);
394
+    strm->state = NULL;
395
+    return LZW_OK;
396
+}
397
+
398
+static void code_print(code_t *code)
399
+{
400
+    code_t *cpt = code;
401
+    uint8_t *string;
402
+    int i = 0;
403
+
404
+    string = cli_calloc(code->length+1, sizeof(uint8_t));
405
+    if (!string)
406
+        return;
407
+
408
+    while (cpt && (i < code->length)) {
409
+        if (isalnum(cpt->value))
410
+            string[code->length - i - 1] = cpt->value;
411
+        else
412
+            string[code->length - i - 1] = '*';
413
+
414
+        i++;
415
+        cpt = cpt->next;
416
+    }
417
+
418
+    printf("%s\n", string);
419
+    free(string);
420
+}
421
+
422
+static void dict_print(code_t *codetab, uint16_t start, uint16_t maxcode)
423
+{
424
+    int i;
425
+
426
+    for (i = start; i < maxcode; i++) {
427
+        printf("%d: ", i);
428
+        code_print(codetab + i);
429
+    }
430
+}
0 431
new file mode 100644
... ...
@@ -0,0 +1,72 @@
0
+/*
1
+ *  Copyright (C) 2016 Cisco and/or its affiliates. All rights reserved.
2
+ *
3
+ *  Author: Kevin Lin
4
+ *
5
+ *  This program is free software; you can redistribute it and/or modify
6
+ *  it under the terms of the GNU General Public License version 2 as
7
+ *  published by the Free Software Foundation.
8
+ *
9
+ *  This program is distributed in the hope that it will be useful,
10
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+ *  GNU General Public License for more details.
13
+ *
14
+ *  You should have received a copy of the GNU General Public License
15
+ *  along with this program; if not, write to the Free Software
16
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17
+ *  MA 02110-1301, USA.
18
+ *
19
+ *  In addition, as a special exception, the copyright holders give
20
+ *  permission to link the code of portions of this program with the
21
+ *  OpenSSL library under certain conditions as described in each
22
+ *  individual source file, and distribute linked combinations
23
+ *  including the two.
24
+ *  
25
+ *  You must obey the GNU General Public License in all respects
26
+ *  for all of the code used other than OpenSSL.  If you modify
27
+ *  file(s) with this exception, you may extend this exception to your
28
+ *  version of the file(s), but you are not obligated to do so.  If you
29
+ *  do not wish to do so, delete this exception statement from your
30
+ *  version.  If you delete this exception statement from all source
31
+ *  files in the program, then also delete it here.
32
+ */
33
+
34
+#ifndef __LZWDEC_H__
35
+#define __LZWDEC_H__
36
+
37
+#include <stdint.h>
38
+
39
+struct lzw_internal_state;
40
+
41
+typedef struct lzw_stream_s {
42
+    uint8_t *next_in;
43
+    unsigned avail_in;
44
+    unsigned total_in;
45
+
46
+    uint8_t *next_out;
47
+    unsigned avail_out;
48
+    unsigned total_out;
49
+
50
+    char *msg;
51
+    struct lzw_internal_state *state;
52
+} lzw_stream;
53
+
54
+typedef lzw_stream *lzw_streamp;
55
+
56
+#define LZW_OK             0
57
+#define LZW_STREAM_END     1
58
+#define LZW_STREAM_ERROR (-2)
59
+#define LZW_DATA_ERROR   (-3)
60
+#define LZW_MEM_ERROR    (-4)
61
+#define LZW_BUF_ERROR    (-5)
62
+#define LZW_DICT_ERROR   (-7)
63
+
64
+#define LZW_NOFLAGS        0
65
+#define LZW_FLAG_EARLYCHG  1
66
+
67
+int lzwInit(lzw_streamp strm, uint32_t flags);
68
+int lzwInflate(lzw_streamp strm);
69
+int lzwInflateEnd(lzw_streamp strm);
70
+
71
+#endif /* __LZWDEC_H__ */