Browse code

unzip/implode support

git-svn: trunk@3476

aCaB authored on 2008/01/02 08:09:15
Showing 6 changed files
... ...
@@ -1,3 +1,8 @@
1
+Tue Jan  1 23:30:06 CET 2008 (acab)
2
+-----------------------------------
3
+  * libclamav/unzip: add support for zip method 6 (implode) - bb#584
4
+  		     fix for zip method 12 (bzip2)
5
+
1 6
 Mon Dec 31 14:08:40 EET 2007 (edwin)
2 7
 ------------------------------------
3 8
  * configure*: add support for version scripts when using Sun's ld on Solaris.
... ...
@@ -180,7 +180,9 @@ libclamav_la_SOURCES = \
180 180
 	dconf.c \
181 181
 	dconf.h \
182 182
 	lzma_iface.c \
183
-	lzma_iface.h
183
+	lzma_iface.h \
184
+	explode.c \
185
+	explode.h
184 186
 
185 187
 libclamav_internal_utils_la_SOURCES=str.c \
186 188
 				    str.h \
... ...
@@ -88,7 +88,7 @@ am_libclamav_la_OBJECTS = matcher-ac.lo matcher-bm.lo matcher.lo \
88 88
 	pdf.lo spin.lo yc.lo elf.lo sis.lo uuencode.lo pst.lo \
89 89
 	phishcheck.lo phish_domaincheck_db.lo phish_whitelist.lo \
90 90
 	regex_list.lo mspack.lo cab.lo entconv.lo hashtab.lo dconf.lo \
91
-	lzma_iface.lo
91
+	lzma_iface.lo explode.lo
92 92
 libclamav_la_OBJECTS = $(am_libclamav_la_OBJECTS)
93 93
 libclamav_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
94 94
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
... ...
@@ -410,7 +410,9 @@ libclamav_la_SOURCES = \
410 410
 	dconf.c \
411 411
 	dconf.h \
412 412
 	lzma_iface.c \
413
-	lzma_iface.h
413
+	lzma_iface.h \
414
+	explode.c \
415
+	explode.h
414 416
 
415 417
 libclamav_internal_utils_la_SOURCES = str.c \
416 418
 				    str.h \
... ...
@@ -515,6 +517,7 @@ distclean-compile:
515 515
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dsig.Plo@am__quote@
516 516
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/elf.Plo@am__quote@
517 517
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/entconv.Plo@am__quote@
518
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/explode.Plo@am__quote@
518 519
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/filetypes.Plo@am__quote@
519 520
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fsg.Plo@am__quote@
520 521
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hashtab.Plo@am__quote@
521 522
new file mode 100644
... ...
@@ -0,0 +1,315 @@
0
+/*
1
+ *  Copyright (C) 2007 Sourcefire Inc.
2
+ *  Author: aCaB <acab@clamav.net>
3
+ *
4
+ *  This program is free software; you can redistribute it and/or modify
5
+ *  it under the terms of the GNU General Public License version 2 as
6
+ *  published by the Free Software Foundation.
7
+ *
8
+ *  This program is distributed in the hope that it will be useful,
9
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
+ *  GNU General Public License for more details.
12
+ *
13
+ *  You should have received a copy of the GNU General Public License
14
+ *  along with this program; if not, write to the Free Software
15
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
16
+ *  MA 02110-1301, USA.
17
+ */
18
+
19
+/* 
20
+ * Written from scratch based on specs from PKWARE:
21
+ * see www.pkware.com/documents/casestudies/APPNOTE.TXT
22
+ *
23
+ * To the best of my knowledge, it's patent free:
24
+ * http://www.unisys.com/about__unisys/lzw
25
+*/
26
+
27
+
28
+/* To Cami and Dario, the only laywers I can stand */
29
+
30
+
31
+#if HAVE_CONFIG_H
32
+#include "clamav-config.h"
33
+#endif
34
+
35
+#if HAVE_STRING_H
36
+#include <string.h>
37
+#endif
38
+
39
+#include "explode.h"
40
+
41
+/* NOTE: sorting algo must be stable! */
42
+static void bs(uint8_t *k, uint8_t *v, unsigned int elements) {
43
+  uint8_t tmp;
44
+  unsigned int i=0, l=0, stop=0, r=elements;
45
+  
46
+  while(!stop) {
47
+    stop=1;
48
+    for(; i<r; i++) {
49
+      if(v[k[i]]>v[k[i+1]]) {
50
+	tmp=k[i];
51
+	k[i]=k[i+1];
52
+	k[i+1]=tmp;
53
+	stop=0;
54
+      }
55
+    }
56
+    if(stop) break;
57
+    r--;
58
+    i--;
59
+    for(; i>l; i--) {
60
+      if(v[k[i]]<v[k[i-1]]) {
61
+	tmp=k[i];
62
+	k[i]=k[i-1];
63
+	k[i-1]=tmp;
64
+	stop=0;
65
+      }
66
+    }
67
+    l++;
68
+    i++;
69
+  }
70
+}
71
+
72
+
73
+static int unpack_tree(struct xplstate *X, uint32_t *tree, unsigned int expected) {
74
+  uint8_t temptree[256], order[256], *ttree=temptree;
75
+  uint8_t *cur=X->window;
76
+  uint8_t packsz;
77
+  unsigned int i;
78
+  uint16_t code=0, codeinc=0, lastlen=0;
79
+
80
+  packsz=*cur++;
81
+
82
+  for(i=0; i<expected; i++) order[i]=i;
83
+
84
+  i=expected;
85
+
86
+  do {
87
+    uint8_t values, len;
88
+    values = *cur++;
89
+    len = (values&15) + 1;
90
+    values = (values>>4) + 1;
91
+    if(values>i) return 1;
92
+    i-=values;
93
+    while(values--)
94
+      *ttree++ = len;
95
+  } while(packsz--);
96
+
97
+  if(i) return 1;
98
+
99
+  bs(order, temptree, expected-1);
100
+
101
+  i=expected-1;
102
+  do {
103
+    code=code+codeinc;
104
+    if(temptree[order[i]]!=lastlen) {
105
+      lastlen=temptree[order[i]];
106
+      codeinc=1<<(16-lastlen);
107
+    }
108
+    tree[order[i]]=code | ((uint32_t)lastlen<<16);
109
+  } while(i--);
110
+
111
+  return 0;
112
+}
113
+
114
+/* bit lame of a lookup, but prolly not worth optimizing */
115
+static int lookup_tree(uint32_t *tree, unsigned int size, uint16_t code, uint8_t len) {
116
+  uint32_t lookup=((uint32_t)(len+1))<<16 | code;
117
+  unsigned int i;
118
+  for(i=0; i<size; i++)
119
+    if(tree[i]==lookup) return i;
120
+  return -1;
121
+}
122
+
123
+int explode_init(struct xplstate *X, uint8_t flags) {
124
+  X->bits = X->cur = 0;
125
+  if(flags&2) {
126
+    X->largewin = 1;
127
+    X->mask = 0x1fff;
128
+  } else {
129
+    X->largewin = 0;
130
+    X->mask = 0xfff;
131
+  }
132
+  if(flags&4) {
133
+    X->state = GRABLITS;
134
+    X->litcodes = 1;
135
+    X->minlen=3;
136
+  } else {
137
+    X->state = GRABLENS;
138
+    X->litcodes = 0;
139
+    X->minlen=2;
140
+  }
141
+  X->got=0;
142
+  return EXPLODE_OK;
143
+}
144
+
145
+#define GETBIT					\
146
+  if(X->bits) {					\
147
+    X->bits--;					\
148
+    val=X->bitmap&1;				\
149
+    X->bitmap>>=1;				\
150
+  } else {					\
151
+    if(!X->avail_in) return EXPLODE_EBUFF;	\
152
+    if(X->avail_in>=4) {			\
153
+      X->bitmap=*(uint32_t *)X->next_in;	\
154
+      X->bits=31;				\
155
+      X->next_in+=4;				\
156
+      X->avail_in-=4;				\
157
+    } else {					\
158
+      X->bitmap=*X->next_in;			\
159
+      X->bits=7;				\
160
+      X->next_in++;				\
161
+      X->avail_in--;				\
162
+    }						\
163
+    val=X->bitmap&1;				\
164
+    X->bitmap>>=1;				\
165
+  }
166
+
167
+
168
+#define GETBITS(NUM)						\
169
+  if(X->bits>=(NUM)) {						\
170
+    val=X->bitmap&((1<<(NUM))-1);				\
171
+    X->bitmap>>=(NUM);						\
172
+    X->bits-=(NUM);						\
173
+  } else {							\
174
+    if(X->avail_in*8+X->bits<(NUM)) return EXPLODE_EBUFF;	\
175
+    val=X->bitmap;						\
176
+    if(X->avail_in>=4) {					\
177
+      X->bitmap=*(uint32_t *)X->next_in;			\
178
+      X->next_in+=4;						\
179
+      X->avail_in-=4;						\
180
+      val|=(X->bitmap&((1<<((NUM)-X->bits))-1))<<X->bits;	\
181
+      X->bitmap>>=(NUM)-X->bits;				\
182
+      X->bits=32-((NUM)-X->bits);				\
183
+    } else {							\
184
+      X->bitmap=*X->next_in;					\
185
+      X->next_in++;						\
186
+      X->avail_in--;						\
187
+      val|=(X->bitmap&((1<<((NUM)-X->bits))-1))<<X->bits;	\
188
+      X->bitmap>>=(NUM)-X->bits;				\
189
+      X->bits=8-((NUM)-X->bits);				\
190
+    }								\
191
+  }
192
+
193
+
194
+#define GETCODES(CASE, WHICH, HOWMANY)					\
195
+  case CASE: {								\
196
+    if(!X->avail_in) return EXPLODE_EBUFF;				\
197
+    if(!X->got) need = *X->next_in;					\
198
+    else need = X->window[0];						\
199
+    if(need > HOWMANY - 1) return EXPLODE_ESTREAM; /* too many codes */ \
200
+    need = need + 2 - X->got; /* bytes remaining */			\
201
+    if(need>X->avail_in) { /* if not enuff */				\
202
+      /* just copy what's avail... */					\
203
+      memcpy(&X->window[X->got], X->next_in, X->avail_in);		\
204
+      X->got += X->avail_in;						\
205
+      X->next_in += X->avail_in;					\
206
+      X->avail_in = 0;							\
207
+      return EXPLODE_EBUFF; /* ...and beg for more */			\
208
+    }									\
209
+    /* else fetch what's needed */					\
210
+    memcpy(&X->window[X->got], X->next_in, need);			\
211
+    X->avail_in -= need;						\
212
+    X->next_in += need;							\
213
+    if(unpack_tree(X, X->WHICH, HOWMANY )) return EXPLODE_ESTREAM;	\
214
+    /* and move on */							\
215
+    X->got=0;								\
216
+    X->state++;								\
217
+  }
218
+
219
+#define SETCASE(CASE) \
220
+  X->state = (CASE);  \
221
+ case(CASE): \
222
+ {/* FAKE */}
223
+
224
+int explode(struct xplstate *X) {
225
+  unsigned int val, need;
226
+  int temp;
227
+  
228
+  switch(X->state) {
229
+    /* grab compressed coded literals, if present */
230
+    GETCODES(GRABLITS, lit_tree, 256);
231
+    /* grab compressed coded lens */
232
+    GETCODES(GRABLENS, len_tree, 64);
233
+    /* grab compressed coded dists */
234
+    GETCODES(GRABDISTS, dist_tree, 64);
235
+    
236
+  case EXPLODE:
237
+    while(X->avail_in || X->bits) {
238
+      GETBIT; /* can't fail */
239
+      if(val) {
240
+	if(X->litcodes) {
241
+	  X->backsize=0;
242
+	  X->state=EXPLODE_LITCODES;
243
+	  for(X->got=0; X->got<=15; X->got++) {
244
+	  case EXPLODE_LITCODES:
245
+	    GETBIT;
246
+	    X->backsize|=val<<(15-X->got);
247
+	    if((temp=lookup_tree(X->lit_tree, 256, X->backsize, X->got))!=-1) break;
248
+	  }
249
+	  if(temp==-1) return EXPLODE_ESTREAM;
250
+	  X->got=temp;
251
+	} else {
252
+	  SETCASE(EXPLODE_LITS);
253
+	  GETBITS(8);
254
+	  X->got=val;
255
+	}
256
+	SETCASE(EXPLODE_WBYTE);
257
+	if(!X->avail_out) return EXPLODE_EBUFF;
258
+	X->avail_out--;
259
+	*X->next_out = X->window[X->cur & X->mask] = X->got;
260
+	X->cur++;
261
+	X->next_out++;
262
+      } else {
263
+	SETCASE(EXPLODE_BASEDIST);
264
+	GETBITS(6+X->largewin);
265
+	X->backbytes=val;
266
+	X->backsize=0;
267
+	X->state=EXPLODE_DECODEDISTS;
268
+	for(X->got=0; X->got<=15; X->got++) {
269
+	case EXPLODE_DECODEDISTS:
270
+	  GETBIT;
271
+	  X->backsize|=val<<(15-X->got);
272
+	  if((temp=lookup_tree(X->dist_tree, 64, X->backsize, X->got))!=-1) break;
273
+	}
274
+	if(temp==-1) return EXPLODE_ESTREAM;
275
+	X->backbytes|=temp<<(6+X->largewin);
276
+	X->backbytes++;
277
+	X->backsize=0;
278
+	X->state=EXPLODE_DECODELENS;
279
+	for(X->got=0; X->got<=15; X->got++) {
280
+	case EXPLODE_DECODELENS:
281
+	  GETBIT;
282
+	  X->backsize|=val<<(15-X->got);
283
+	  if((temp=lookup_tree(X->len_tree, 64, X->backsize, X->got))!=-1) break;
284
+	}
285
+	if(temp==-1) {
286
+	  cli_dbgmsg("HERE3\n");
287
+	  return EXPLODE_ESTREAM;
288
+	}
289
+	if(temp==63) {
290
+	  SETCASE(EXPLODE_DECODEEXTRA);
291
+	  GETBITS(8);
292
+	  temp=63+val;
293
+	}
294
+	X->backsize=temp+X->minlen;
295
+	X->state=EXPLODE_BACKCOPY;
296
+	while(X->backsize--) {
297
+	case EXPLODE_BACKCOPY:
298
+	  if(!X->avail_out) return EXPLODE_EBUFF;
299
+	  X->avail_out--;
300
+	  if (X->cur>=X->backbytes)
301
+	    *X->next_out = X->window[X->cur & X->mask] = X->window[(X->cur-X->backbytes) & X->mask];
302
+	  else
303
+	    *X->next_out = X->window[X->cur & X->mask] = 0;
304
+	  X->cur++;
305
+	  X->next_out++;
306
+	}
307
+      }
308
+      X->state=EXPLODE;
309
+    }
310
+  }
311
+  return EXPLODE_EBUFF;
312
+}
313
+
314
+void explode_shutdown() {};
0 315
new file mode 100644
... ...
@@ -0,0 +1,73 @@
0
+/*
1
+ *  Copyright (C) 2007 Sourcefire Inc.
2
+ *  Author: aCaB <acab@clamav.net>
3
+ *
4
+ *  This program is free software; you can redistribute it and/or modify
5
+ *  it under the terms of the GNU General Public License version 2 as
6
+ *  published by the Free Software Foundation.
7
+ *
8
+ *  This program is distributed in the hope that it will be useful,
9
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
+ *  GNU General Public License for more details.
12
+ *
13
+ *  You should have received a copy of the GNU General Public License
14
+ *  along with this program; if not, write to the Free Software
15
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
16
+ *  MA 02110-1301, USA.
17
+ */
18
+
19
+#ifndef __EXPLODE_H
20
+#define __EXPLODE_H
21
+
22
+#include "cltypes.h"
23
+
24
+enum {
25
+  EXPLODE_EBUFF,
26
+  EXPLODE_ESTREAM
27
+};
28
+
29
+#define EXPLODE_OK EXPLODE_EBUFF
30
+
31
+enum XPL_STATE {
32
+  GRABLITS,
33
+  GRABLENS,
34
+  GRABDISTS,
35
+  EXPLODE,
36
+  EXPLODE_LITCODES,
37
+  EXPLODE_LITS,
38
+  EXPLODE_BASEDIST,
39
+  EXPLODE_DECODEDISTS,
40
+  EXPLODE_DECODELENS,
41
+  EXPLODE_DECODEEXTRA,
42
+  EXPLODE_WBYTE,
43
+  EXPLODE_BACKCOPY
44
+};
45
+
46
+struct xplstate {
47
+  uint8_t *next_in;
48
+  uint8_t *next_out;
49
+  unsigned int got;
50
+  unsigned int minlen;
51
+  unsigned int mask;
52
+  unsigned int cur;
53
+  uint32_t lit_tree[256];
54
+  uint32_t len_tree[64];
55
+  uint32_t dist_tree[64];
56
+  uint32_t bitmap;
57
+  uint32_t avail_in;
58
+  uint32_t avail_out;
59
+  uint16_t backbytes;
60
+  uint16_t backsize;
61
+  uint8_t window[8192];
62
+  uint8_t bits;
63
+  uint8_t largewin;
64
+  uint8_t litcodes;
65
+  enum XPL_STATE state;
66
+};
67
+
68
+int explode_init(struct xplstate *, uint8_t);
69
+int explode(struct xplstate *);
70
+void explode_shutdown();
71
+
72
+#endif /* __EXPLODE_H */
... ...
@@ -45,6 +45,7 @@
45 45
 #include <bzlib.h>
46 46
 #endif
47 47
 
48
+#include "explode.h"
48 49
 #include "others.h"
49 50
 #include "clamav.h"
50 51
 #include "scanners.h"
... ...
@@ -62,7 +63,7 @@ static int wrap_inflateinit2(void *a, int b) {
62 62
   return inflateInit2(a, b);
63 63
 }
64 64
 
65
-static int unz(uint8_t *src, uint32_t csize, uint32_t usize, uint16_t method, unsigned int *fu, cli_ctx *ctx, char *tmpd) {
65
+static int unz(uint8_t *src, uint32_t csize, uint32_t usize, uint16_t method, uint16_t flags, unsigned int *fu, cli_ctx *ctx, char *tmpd) {
66 66
   char name[1024], obuf[BUFSIZ];
67 67
   char *tempfile = name;
68 68
   int of, ret=CL_CLEAN;
... ...
@@ -84,7 +85,7 @@ static int unz(uint8_t *src, uint32_t csize, uint32_t usize, uint16_t method, un
84 84
     if(csize<usize) {
85 85
       unsigned int fake = *fu + 1;
86 86
       cli_dbgmsg("cli_unzip: attempting to inflate stored file with inconsistent size\n");
87
-      if ((ret=unz(src, csize, usize, ALG_DEFLATE, &fake, ctx, tmpd))==CL_CLEAN) {
87
+      if ((ret=unz(src, csize, usize, ALG_DEFLATE, 0, &fake, ctx, tmpd))==CL_CLEAN) {
88 88
 	(*fu)++;
89 89
 	res=fake-(*fu);
90 90
       }
... ...
@@ -206,7 +207,7 @@ static int unz(uint8_t *src, uint32_t csize, uint32_t usize, uint16_t method, un
206 206
 	    break;
207 207
 	  }
208 208
 	  cli_dbgmsg("cli_unzip: trimming output size to maxfilesize (%lu)\n", ctx->limits->maxfilesize);
209
-	  res = Z_STREAM_END;
209
+	  res = BZ_STREAM_END;
210 210
 	  break;
211 211
 	}
212 212
 	if(cli_writen(of, obuf, sizeof(obuf)-strm.avail_out) != (int)(sizeof(obuf)-strm.avail_out)) {
... ...
@@ -216,6 +217,7 @@ static int unz(uint8_t *src, uint32_t csize, uint32_t usize, uint16_t method, un
216 216
 	}
217 217
 	strm.next_out = obuf;
218 218
 	strm.avail_out = sizeof(obuf);
219
+	continue;
219 220
       }
220 221
       break;
221 222
     }
... ...
@@ -227,6 +229,45 @@ static int unz(uint8_t *src, uint32_t csize, uint32_t usize, uint16_t method, un
227 227
   }
228 228
 #endif /* HAVE_BZLIB_H */
229 229
 
230
+
231
+  case ALG_IMPLODE: {
232
+    struct xplstate strm;
233
+    strm.next_in = (char *)src;
234
+    strm.next_out = obuf;
235
+    strm.avail_in = csize;
236
+    strm.avail_out = sizeof(obuf);
237
+    if (explode_init(&strm, flags)!=EXPLODE_OK) {
238
+      cli_dbgmsg("cli_unzip: explode_init() failed\n");
239
+      break;
240
+    }
241
+    while((res = explode(&strm))==EXPLODE_OK) {
242
+      if(strm.avail_out!=sizeof(obuf)) {
243
+	written+=sizeof(obuf)-strm.avail_out;
244
+	if(ctx->limits && ctx->limits->maxfilesize && written > ctx->limits->maxfilesize) {
245
+	  if(BLOCKMAX) {
246
+	    *ctx->virname = "Zip.ExceededFileSize";
247
+	    ret = CL_VIRUS;
248
+	    break;
249
+	  }
250
+	  cli_dbgmsg("cli_unzip: trimming output size to maxfilesize (%lu)\n", ctx->limits->maxfilesize);
251
+	  res = 0;
252
+	  break;
253
+	}
254
+	if(cli_writen(of, obuf, sizeof(obuf)-strm.avail_out) != (int)(sizeof(obuf)-strm.avail_out)) {
255
+	  cli_warnmsg("cli_unzip: falied to write %lu exploded bytes\n", sizeof(obuf)-strm.avail_out);
256
+	  ret = CL_EIO;
257
+	  res=1;
258
+	}
259
+	strm.next_out = obuf;
260
+	strm.avail_out = sizeof(obuf);
261
+	continue;
262
+      }
263
+      break;
264
+    }
265
+    break;
266
+  }
267
+
268
+
230 269
   case ALG_LZMA:
231 270
     /* easy but there's not a single sample in the zoo */
232 271
 
... ...
@@ -238,7 +279,6 @@ static int unz(uint8_t *src, uint32_t csize, uint32_t usize, uint16_t method, un
238 238
   case ALG_REDUCE2:
239 239
   case ALG_REDUCE3:
240 240
   case ALG_REDUCE4:
241
-  case ALG_IMPLODE:
242 241
   case ALG_TOKENZD:
243 242
   case ALG_OLDTERSE:
244 243
   case ALG_RSVD1:
... ...
@@ -365,7 +405,7 @@ static unsigned int lhdr(uint8_t *zip, uint32_t zsize, unsigned int *fu, unsigne
365 365
       *ctx->virname = "Oversized.Zip";
366 366
       *ret = CL_VIRUS;
367 367
       return 0;
368
-    } else *ret = unz(zip, csize, usize, LH_method, fu, ctx, tmpd);
368
+    } else *ret = unz(zip, csize, usize, LH_method, LH_flags, fu, ctx, tmpd);
369 369
     zip+=csize;
370 370
     zsize-=csize;
371 371
   }