Browse code

rewritten decompressor for mscompress

git-svn: trunk@3416

Tomasz Kojm authored on 2007/12/14 08:18:03
Showing 4 changed files
... ...
@@ -1,3 +1,7 @@
1
+Thu Dec 13 23:34:22 CET 2007 (tk)
2
+---------------------------------
3
+  * libclamav: rewritten decompressor for mscompress - faster and more secure
4
+
1 5
 Thu Dec 13 21:47:53 CET 2007 (acab)
2 6
 -----------------------------------
3 7
   * libclamav: merge the post 0.92 code
... ...
@@ -1,15 +1,11 @@
1 1
 /*
2
- *  msexpand: Microsoft "compress.exe/expand.exe" compatible decompressor
3
- *
4
- *  Copyright (c) 2000 Martin Hinner <mhi@penguin.cz>
5
- *  Algorithm & data structures by M. Winterhoff <100326.2776@compuserve.com>
6
- *
7
- *  Corrected and adapted to ClamAV by Tomasz Kojm <tkojm@clamav.net>
2
+ *  Copyright (C) 2007 Sourcefire, Inc.
3
+ *  Author: Tomasz Kojm <tkojm@clamav.net>
4
+ *  Credits: Decompression scheme by M. Winterhoff
8 5
  *
9 6
  *  This program is free software; you can redistribute it and/or modify
10
- *  it under the terms of the GNU General Public License as published by
11
- *  the Free Software Foundation; either version 2, or (at your option)
12
- *  any later version.
7
+ *  it under the terms of the GNU General Public License version 2 as
8
+ *  published by the Free Software Foundation.
13 9
  *
14 10
  *  This program is distributed in the hope that it will be useful,
15 11
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
... ...
@@ -23,124 +19,137 @@
23 23
  */
24 24
 
25 25
 #include <stdio.h>
26
-#include <stdlib.h>
27
-#ifdef	HAVE_UNISTD_H
28
-#include <unistd.h>
29
-#endif
30
-#include <string.h>
26
+#include <stddef.h>
27
+#include <sys/types.h>
28
+#include <sys/stat.h>
29
+#include <fcntl.h>
31 30
 
32
-#if HAVE_CONFIG_H
33
-#include "clamav-config.h"
34
-#endif
31
+#include "clamav.h"
35 32
 #include "cltypes.h"
36 33
 #include "others.h"
37 34
 #include "msexpand.h"
38 35
 
39
-int cli_msexpand(FILE *in, FILE *out)
40
-{
41
-	int bits, ch, i, j, len, mask;
42
-	unsigned char *buffer;
43
-	uint32_t magic1, magic2, magic3, filesize;
44
-	uint16_t reserved;
45
-
36
+#ifndef HAVE_ATTRIB_PACKED
37
+#define __attribute__(x)
38
+#endif
46 39
 
47
-    if(fread(&magic1, sizeof(magic1), 1, in) != 1) {
48
-	return -1;
49
-    }
40
+#define EC32(x) le32_to_host(x)
41
+#define EC16(x) le16_to_host(x)
42
+
43
+#define MAGIC1	0x44445a53
44
+#define MAGIC2	0x3327f088
45
+#define MAGIC3	0x0041
46
+
47
+struct msexp_hdr {
48
+    uint32_t magic1;
49
+    uint32_t magic2;
50
+    uint16_t magic3;
51
+    uint32_t fsize;
52
+} __attribute((packed));
53
+
54
+#define BSIZE 4096
55
+#define RWBUFF 2048
56
+
57
+#define READBYTES				\
58
+    ret = cli_readn(fd, rbuff, RWBUFF);		\
59
+    if(ret == -1)				\
60
+	return CL_EIO;				\
61
+    if(!ret)					\
62
+	break;					\
63
+    rbytes = (unsigned int) ret;		\
64
+    r = 0;
65
+
66
+#define WRITEBYTES				\
67
+    ret = cli_writen(ofd, wbuff, w);		\
68
+    if(ret == -1 || (unsigned int) ret != w)	\
69
+	return CL_EIO;				\
70
+    wbytes += w;				\
71
+    if(wbytes >= hdr.fsize)			\
72
+	return CL_SUCCESS;			\
73
+    w = 0;
74
+
75
+
76
+int cli_msexpand(int fd, int ofd, cli_ctx *ctx)
77
+{
78
+	struct msexp_hdr hdr;
79
+	uint8_t i, mask, bits;
80
+	unsigned char buff[BSIZE], rbuff[RWBUFF], wbuff[RWBUFF];
81
+	unsigned int j = BSIZE - 16, k, l, r = 0, w = 0, rbytes = 0, wbytes = 0;
82
+	int ret;
50 83
 
51
-    if(magic1 == le32_to_host(0x44445A53L))
52
-    {
53
-	if(fread(&magic2, sizeof(magic2), 1, in) != 1) {
54
-	    return -1;
55
-	}
56 84
 
57
-	if(fread(&reserved, sizeof(reserved), 1, in) != 1) {
58
-	    return -1;
59
-	}
85
+    if(cli_readn(fd, &hdr, sizeof(hdr)) == -1)
86
+	return CL_EIO;
60 87
 
61
-	if(fread(&filesize, sizeof(filesize), 1, in) != 1) {
62
-	    return -1;
63
-	}
88
+    if(EC32(hdr.magic1) != MAGIC1 || EC32(hdr.magic2) != MAGIC2 || EC16(hdr.magic3) != MAGIC3) {
89
+	cli_dbgmsg("MSEXPAND: Not supported file format\n");
90
+	return CL_EFORMAT;
91
+    }
64 92
 
65
-	if(magic2 != le32_to_host(0x3327F088L))
66
-	{
67
-	    cli_warnmsg("msexpand: Not a MS-compressed file\n");
68
-	    return -1;
69
-	}
93
+    cli_dbgmsg("MSEXPAND: File size from header: %u\n", hdr.fsize);
70 94
 
71
-    } else
72
-    if(magic1 == le32_to_host(0x4A41574BL))
73
-    {
74
-	if(fread(&magic2, sizeof(magic2), 1, in) != 1) {
75
-	    return -1;
76
-	}
95
+    if(ctx->limits && ctx->limits->maxfilesize && (hdr.fsize > ctx->limits->maxfilesize)) {
96
+	cli_dbgmsg("MSEXPAND: Size exceeded (%u, max: %lu)\n", hdr.fsize, ctx->limits->maxfilesize);
97
+        if(BLOCKMAX) {
98
+	    *ctx->virname = "MSEXPAND.ExceededFileSize";
99
+            return CL_VIRUS;
100
+        }
101
+	hdr.fsize = ctx->limits->maxfilesize;
102
+	cli_dbgmsg("MSEXPAND: Only extracting first %u bytes\n", hdr.fsize); /* may extract up to 2kB more */
103
+    }
77 104
 
78
-	if(fread(&magic3, sizeof(magic3), 1, in) != 1) {
79
-	    return -1;
80
-	}
105
+    while(1) {
81 106
 
82
-	if(fread(&reserved, sizeof(reserved), 1, in) != 1) {
83
-	    return -1;
107
+	if(!rbytes || (r == rbytes)) {
108
+	    READBYTES;
84 109
 	}
85 110
 
86
-	if(magic2 != le32_to_host(0xD127F088L) || magic3 != le32_to_host(0x00120003L))
87
-	{
88
-	    cli_warnmsg("msexpand: Not a MS-compressed file\n");
89
-	    return -1;
90
-	}
111
+	bits = rbuff[r]; r++;
91 112
 
92
-	cli_warnmsg("msexpand: unsupported version 6.22\n");
93
-	return -1;
113
+	mask = 1;
114
+	for(i = 0; i < 8; i++) {
115
+	    if(bits & mask) {
116
+		if(r == rbytes) {
117
+		    READBYTES;
118
+		}
94 119
 
95
-    } else {
96
-	cli_warnmsg("msexpand: Not a MS-compressed file\n");
97
-	return -1;
98
-    }
120
+		if(w == RWBUFF) {
121
+		    WRITEBYTES;
122
+		}
99 123
 
100
-    if((buffer = (unsigned char *) cli_calloc(4096, sizeof(char))) == NULL) {
101
-	cli_errmsg("msexpand: Can't allocate memory\n");
102
-	return -1;
103
-    }
124
+		wbuff[w] = buff[j] = rbuff[r];
125
+		r++; w++;
126
+		j++; j %= BSIZE;
127
+	    } else {
128
+		if(r == rbytes) {
129
+		    READBYTES;
130
+		}
131
+		k = rbuff[r]; r++;
104 132
 
105
-    i = 4096 - 16;
106
-
107
-    while (1) {
108
-	if((bits = fgetc(in)) == EOF)
109
-	    break;
110
-
111
-	for(mask = 0x01; mask & 0xFF; mask <<= 1) {
112
-	    if(!(bits & mask)) {
113
-		if((j = fgetc(in)) == EOF)
114
-		    break;
115
-		len = fgetc(in);
116
-		j += (len & 0xF0) << 4;
117
-		len = (len & 15) + 3;
118
-		while(len--) {
119
-		    buffer[i] = buffer[j];
120
-		    if(fwrite(&buffer[i], sizeof(unsigned char), 1, out) != 1) {
121
-			free(buffer);
122
-			return -1;
123
-		    }
124
-		    j++;
125
-		    j %= 4096;
126
-		    i++;
127
-		    i %= 4096;
133
+		if(r == rbytes) {
134
+		    READBYTES;
128 135
 		}
129
-	    } else {
130
-		if((ch = fgetc(in)) == EOF)
131
-		    break;
136
+		l = rbuff[r]; r++;
132 137
 
133
-		buffer[i] = ch;
134
-		if(fwrite(&buffer[i], sizeof(unsigned char), 1, out) != 1) {
135
-		    free(buffer);
136
-		    return -1;
138
+		k += (l & 0xf0) << 4;
139
+		l = (l & 0x0f) + 3;
140
+		while(l--) {
141
+		    if(w == RWBUFF) {
142
+			WRITEBYTES;
143
+		    }
144
+		    wbuff[w] = buff[j] = buff[k];
145
+		    w++;
146
+		    k++; k %= BSIZE;
147
+		    j++; j %= BSIZE;
137 148
 		}
138
-		i++;
139
-		i %= 4096;
140 149
 	    }
150
+	    mask *= 2;
141 151
 	}
142 152
     }
143 153
 
144
-    free(buffer);
145
-    return 0;
154
+    if(w) {
155
+	WRITEBYTES;
156
+    }
157
+
158
+    return CL_SUCCESS;
146 159
 }
... ...
@@ -1,10 +1,11 @@
1 1
 /*
2
- *  Copyright (C) 2004 Tomasz Kojm <tkojm@clamav.net>
2
+ *  Copyright (C) 2007 Sourcefire, Inc.
3
+ *  Author: Tomasz Kojm <tkojm@clamav.net>
4
+ *  Credits: Decompression scheme by M. Winterhoff
3 5
  *
4 6
  *  This program is free software; you can redistribute it and/or modify
5
- *  it under the terms of the GNU General Public License as published by
6
- *  the Free Software Foundation; either version 2 of the License, or
7
- *  (at your option) any later version.
7
+ *  it under the terms of the GNU General Public License version 2 as
8
+ *  published by the Free Software Foundation.
8 9
  *
9 10
  *  This program is distributed in the hope that it will be useful,
10 11
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
... ...
@@ -20,8 +21,8 @@
20 20
 #ifndef __MSEXPAND_H
21 21
 #define __MSEXPAND_H
22 22
 
23
-#include <stdio.h>
23
+#include "others.h"
24 24
 
25
-int cli_msexpand(FILE *in, FILE *out);
25
+int cli_msexpand(int fd, int ofd, cli_ctx *ctx);
26 26
 
27 27
 #endif
... ...
@@ -652,71 +652,40 @@ static int cli_scanbzip(int desc, cli_ctx *ctx)
652 652
 }
653 653
 #endif
654 654
 
655
-/*
656 655
 static int cli_scanszdd(int desc, cli_ctx *ctx)
657 656
 {
658
-	int fd, ret = CL_CLEAN, dcpy;
659
-	FILE *tmp = NULL, *in;
657
+	int ofd, ret;
660 658
 	char *tmpname;
661 659
 
662 660
 
663 661
     cli_dbgmsg("in cli_scanszdd()\n");
664 662
 
665
-    if((dcpy = dup(desc)) == -1) {
666
-	cli_dbgmsg("SZDD: Can't duplicate descriptor %d\n", desc);
667
-	return CL_EIO;
668
-    }
669
-
670
-    if((in = fdopen(dcpy, "rb")) == NULL) {
671
-	cli_dbgmsg("SZDD: Can't open descriptor %d\n", desc);
672
-	close(dcpy);
673
-	return CL_EMSCOMP;
674
-    }
675
-
676
-    if((tmpname = cli_gentempstream(NULL, &tmp)) == NULL) {
677
-	cli_dbgmsg("SZDD: Can't generate temporary file.\n");
678
-	fclose(in);
679
-	return CL_ETMPFILE;
680
-    }
681
-
682
-    if(cli_msexpand(in, tmp) == -1) {
683
-	cli_dbgmsg("SZDD: msexpand failed.\n");
684
-	fclose(in);
685
-	fclose(tmp);
686
-	if(!cli_leavetemps_flag)
687
-	    unlink(tmpname);
688
-	free(tmpname);	
689
-	return CL_EMSCOMP;
663
+    if((ret = cli_gentempfd(NULL, &tmpname, &ofd))) {
664
+	cli_dbgmsg("MSEXPAND: Can't generate temporary file/descriptor\n");
665
+	return ret;
690 666
     }
691 667
 
692
-    fclose(in);
693
-    if(fflush(tmp)) {
694
-	cli_dbgmsg("SZDD: fflush() failed.\n");
695
-	fclose(tmp);
696
-	if(!cli_leavetemps_flag)
697
-	    unlink(tmpname);
698
-	free(tmpname);	
699
-	return CL_EFSYNC;
700
-    }
668
+    lseek(desc, 0, SEEK_SET);
669
+    ret = cli_msexpand(desc, ofd, ctx);
701 670
 
702
-    fd = fileno(tmp);
703
-    lseek(fd, 0, SEEK_SET);
704
-    if((ret = cli_magic_scandesc(fd, ctx)) == CL_VIRUS) {
705
-	cli_dbgmsg("SZDD: Infected with %s\n", *ctx->virname);
706
-	fclose(tmp);
671
+    if(ret != CL_SUCCESS) { /* CL_VIRUS or some error */
672
+	close(ofd);
707 673
 	if(!cli_leavetemps_flag)
708 674
 	    unlink(tmpname);
709 675
 	free(tmpname);	
710
-	return CL_VIRUS;
676
+	return ret;
711 677
     }
712 678
 
713
-    fclose(tmp);
679
+    cli_dbgmsg("MSEXPAND: Decompressed into %s\n", tmpname);
680
+    lseek(ofd, 0, SEEK_SET);
681
+    ret = cli_magic_scandesc(ofd, ctx);
682
+    close(ofd);
714 683
     if(!cli_leavetemps_flag)
715 684
 	unlink(tmpname);
716 685
     free(tmpname);	
686
+
717 687
     return ret;
718 688
 }
719
-*/
720 689
 
721 690
 static int cli_scanmscab(int desc, cli_ctx *ctx, off_t sfx_offset)
722 691
 {
... ...
@@ -1847,12 +1816,12 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
1847 1847
 	    if(SCAN_ARCHIVE && (DCONF_ARCH & ARCH_CONF_AUTOIT))
1848 1848
 		ret = cli_scanautoit(desc, ctx, 23);
1849 1849
 	    break;
1850
-/*
1850
+
1851 1851
 	case CL_TYPE_MSSZDD:
1852 1852
 	    if(SCAN_ARCHIVE && (DCONF_ARCH & ARCH_CONF_SZDD))
1853 1853
 		ret = cli_scanszdd(desc, ctx);
1854 1854
 	    break;
1855
-*/
1855
+
1856 1856
 	case CL_TYPE_MSCAB:
1857 1857
 	    if(SCAN_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CAB))
1858 1858
 		ret = cli_scanmscab(desc, ctx, 0);