Browse code

one pass binhex

aCaB authored on 2010/02/12 20:46:57
Showing 3 changed files
... ...
@@ -1,7 +1,7 @@
1 1
 /*
2
- *  Copyright (C) 2007-2008 Sourcefire, Inc.
2
+ *  Copyright (C) 2010 Sourcefire, Inc.
3 3
  *
4
- *  Authors: Nigel Horne
4
+ *  Authors: aCaB <acab@clamav.net>
5 5
  *
6 6
  *  This program is free software; you can redistribute it and/or modify
7 7
  *  it under the terms of the GNU General Public License version 2 as
... ...
@@ -17,197 +17,214 @@
17 17
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18 18
  *  MA 02110-1301, USA.
19 19
  *
20
- * Change History:
21
- * $Log: binhex.c,v $
22
- * Revision 1.23  2007/02/12 20:46:08  njh
23
- * Various tidy
24
- *
25
- * Revision 1.22  2006/07/31 09:19:52  njh
26
- * Use MAP_PRIVATE
27
- *
28
- * Revision 1.21  2006/07/01 16:17:35  njh
29
- * Added destroy flag
30
- *
31
- * Revision 1.20  2006/07/01 03:47:50  njh
32
- * Don't loop if binhex runs out of memory
33
- *
34
- * Revision 1.19  2006/05/19 11:02:12  njh
35
- * Just include mbox.h
36
- *
37
- * Revision 1.18  2006/04/09 19:59:27  kojm
38
- * update GPL headers with new address for FSF
39
- *
40
- * Revision 1.17  2005/11/06 14:03:26  nigelhorne
41
- * Ensure NAME_MAX isn't redefined on BeOS
42
- *
43
- * Revision 1.16  2005/05/14 16:13:25  nigelhorne
44
- * Ensure munmap is the right size
45
- *
46
- * Revision 1.15  2005/05/13 19:30:34  nigelhorne
47
- * Clean cli_realloc call
48
- *
49
- * Revision 1.14  2005/03/10 08:51:30  nigelhorne
50
- * Tidy
51
- *
52
- * Revision 1.13  2005/01/19 05:29:41  nigelhorne
53
- * tidy
54
- *
55
- * Revision 1.12  2004/12/27 14:17:14  nigelhorne
56
- * Fix segfault if write to temporary file fails
57
- *
58
- * Revision 1.11  2004/12/17 12:03:38  nigelhorne
59
- * Tidy up for machines without MMAP
60
- *
61
- * Revision 1.10  2004/12/16 15:29:51  nigelhorne
62
- * Tidy
63
- *
64
- * Revision 1.9  2004/11/28 22:06:39  nigelhorne
65
- * Tidy space only headers code
66
- *
67
- * Revision 1.8  2004/11/28 21:05:50  nigelhorne
68
- * Handle headers with only spaces
69
- *
70
- * Revision 1.7  2004/11/23 09:05:26  nigelhorne
71
- * Fix crash in base64 encoded binhex files
72
- *
73
- * Revision 1.6  2004/11/22 15:16:53  nigelhorne
74
- * Use cli_realloc instead of many cli_mallocs
75
- *
76
- * Revision 1.5  2004/11/18 20:11:34  nigelhorne
77
- * Fix segfault
78
- *
79
- * Revision 1.4  2004/11/18 19:30:29  kojm
80
- * add support for Mac's HQX file format
81
- *
82
- * Revision 1.3  2004/11/18 18:24:45  nigelhorne
83
- * Added binhex.h
84
- *
85
- * Revision 1.2  2004/11/18 18:09:06  nigelhorne
86
- * First draft of binhex.c
87
- *
88 20
  */
89
-static	char	const	rcsid[] = "$Id: binhex.c,v 1.23 2007/02/12 20:46:08 njh Exp $";
90
-
91
-#include "clamav.h"
92 21
 
93 22
 #if HAVE_CONFIG_H
94 23
 #include "clamav-config.h"
95 24
 #endif
96 25
 
97
-#ifdef CL_THREAD_SAFE
98
-#ifndef	_REENTRANT
99
-#define	_REENTRANT	/* for Solaris 2.8 */
100
-#endif
101
-#endif
102
-
103
-#include <stdio.h>
104
-#include <memory.h>
105
-#include <sys/stat.h>
106
-#if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H)
107
-#include <sys/mman.h>
108
-#endif
26
+#include <string.h>
109 27
 
28
+#include "scanners.h"
29
+#include "cltypes.h"
110 30
 #include "others.h"
111
-#include "mbox.h"
112
-#include "binhex.h"
31
+#include "clamav.h"
113 32
 #include "fmap.h"
114 33
 
115
-int
116
-cli_binhex(const char *dir, fmap_t *map)
117
-{
118
-	char *buf, *start, *line;
119
-	size_t size;
120
-	long bytesleft;
121
-	message *m;
122
-	fileblob *fb;
123
-	text *t_line;
124
-
125
-	size = (size_t)map->len;
126
-
127
-	if(size == 0)
128
-		return CL_CLEAN;
129
-
130
-	m = messageCreate();
131
-	if(m == NULL)
132
-		return CL_EMEM;
133
-
134
-	start = buf = fmap_need_off_once(map, 0, size);
135
-	if(!buf) {
136
-		messageDestroy(m);
137
-		return CL_EMAP;
138
-	}
139
-
140
-	cli_dbgmsg("mmap'ed binhex file\n");
141
-
142
-	bytesleft = (long)size;
143
-	line = NULL;
144
-
145
-	while(bytesleft > 0) {
146
-		int length = bytesleft;
147
-		char *ptr, *newline;
148 34
 
149
-		/*printf("%d: ", bytesleft);*/
150
-
151
-		for(ptr = buf; bytesleft && (*ptr != '\n') && (*ptr != '\r'); ptr++) {
152
-			--bytesleft;
35
+static const uint8_t hqxtbl[] = {
36
+    /*           00   01   02   03   04   05   06   07   08   09   0a   0b   0c   0d   0e   0f */
37
+    /* 00-0f */	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
38
+    /* 10-1f */	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
39
+    /* 20-2f */	0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0xff,0xff,
40
+    /* 30-3f */	0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0xff,0x14,0x15,0xff,0xff,0xff,0xff,0xff,0xff,
41
+    /* 40-4f */	0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0xff,
42
+    /* 50-5f */	0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0xff,0x2c,0x2d,0x2e,0x2f,0xff,0xff,0xff,0xff,
43
+    /* 60-6f */	0x30,0x31,0x32,0x33,0x34,0x35,0x36,0xff,0x37,0x38,0x39,0x3a,0x3b,0x3c,0xff,0xff,
44
+    /* 70-7f */	0x3d,0x3e,0x3f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
45
+};
46
+
47
+#define BH_FLUSH_SZ (BUFSIZ - 256)
48
+
49
+int cli_binhex(cli_ctx *ctx) {
50
+    fmap_t *map = *ctx->fmap;
51
+    uint8_t *encoded, decoded[BUFSIZ], spare_bits, last_byte=0, this_byte, offset=0;
52
+    size_t enc_done=0, enc_todo=map->len;
53
+    unsigned int dec_done=0, chunksz = 0, chunkoff=0;
54
+    uint32_t datalen, reslen;
55
+    int in_data = 0, in_run = 0, _6bit=-1, datafd, resfd, ret = CL_CLEAN;
56
+    enum binhex_phase { IN_HEADER, IN_DATA, IN_LIMBO1, IN_LIMBO2, IN_RES } write_phase = IN_HEADER;
57
+    char *dname, *rname;
58
+
59
+    cli_dbgmsg("in cli_binhex\n");
60
+    if(!map->len) return CL_CLEAN;
61
+
62
+    if((ret = cli_gentempfd(ctx->engine->tmpdir, &dname, &datafd)) != CL_SUCCESS)
63
+	return ret;
64
+
65
+    if((ret = cli_gentempfd(ctx->engine->tmpdir, &rname, &resfd)) != CL_SUCCESS) {
66
+	close(datafd);
67
+	if(cli_unlink(dname)) ret = CL_EUNLINK;
68
+	free(dname);
69
+	return ret;
70
+    }
71
+
72
+    while(1) {
73
+	uint8_t b;
74
+	if(!enc_todo || dec_done >= BH_FLUSH_SZ) {
75
+	    if(!enc_todo && !dec_done) {
76
+		cli_dbgmsg("cli_binhex: possibly truncated file\n");
77
+		break;
78
+	    }
79
+	    if(write_phase == IN_HEADER) {
80
+		uint32_t namelen = (uint32_t)decoded[0], hdrlen = 1 + namelen + 1 + 4 + 4 + 2;
81
+		datalen = (decoded[hdrlen]<<24) | (decoded[hdrlen+1]<<16) | (decoded[hdrlen+2]<<8) | decoded[hdrlen+3];
82
+		hdrlen += 4;
83
+		reslen = (decoded[hdrlen]<<24) | (decoded[hdrlen+1]<<16) | (decoded[hdrlen+2]<<8) | decoded[hdrlen+3];
84
+		hdrlen += 4 + 2;
85
+		decoded[namelen+1] = 0;
86
+		if(dec_done <= hdrlen) {
87
+		    cli_dbgmsg("cli_binhex: possibly truncated file\n");
88
+		    break;
153 89
 		}
154
-
155
-		length -= bytesleft;
156
-		/*printf("%d: ", length);*/
157
-
158
-		newline = cli_realloc(line, (size_t)(length + 1));
159
-		if(newline == NULL)
160
-			break;
161
-
162
-		line = newline;
163
-
164
-		memcpy(line, buf, length);
165
-		line[length] = '\0';
166
-
167
-		/*puts(line);*/
168
-
169
-		if(messageAddStr(m, line) < 0)
90
+		cli_dbgmsg("cli_binhex: decoding '%s' - %u bytes of data to %s - %u bytes or resources to %s\n", decoded+1, datalen, dname, reslen, rname);
91
+		memmove(decoded, &decoded[hdrlen], dec_done - hdrlen);
92
+		dec_done -= hdrlen;
93
+		write_phase++;
94
+	    }
95
+	    if(dec_done && write_phase == IN_DATA) {
96
+		unsigned int todo = MIN(dec_done, datalen);
97
+		datalen -= todo;
98
+		dec_done -= todo;
99
+		if(cli_writen(datafd, decoded, todo)!=(int)todo) {
100
+		    ret = CL_EWRITE;
101
+		    break;
102
+		}
103
+		if(!datalen) {
104
+		    write_phase++;
105
+		    lseek(datafd, 0, SEEK_SET);
106
+		    ret = cli_magic_scandesc(datafd, ctx);
107
+		    if(ret == CL_VIRUS) break;
108
+		}
109
+		if(dec_done)
110
+		    memmove(decoded, &decoded[todo], dec_done);
111
+	    }
112
+	    if(dec_done && write_phase == IN_LIMBO1) {
113
+		if(dec_done > 1) {
114
+		    if(reslen<5) {
115
+			cli_dbgmsg("cli_binhex: skipping resources (too small)\n");
170 116
 			break;
171
-
172
-		if((bytesleft > 0) && (*ptr == '\r')) {
173
-			ptr++;
174
-			bytesleft--;
117
+		    }
118
+		    dec_done-=2;
119
+		    write_phase+=2;
120
+		    if(dec_done)
121
+			memmove(decoded, &decoded[2], dec_done);
122
+		} else {
123
+		    dec_done--;
124
+		    write_phase++;
125
+		    if(dec_done)
126
+			memmove(decoded, &decoded[1], dec_done);
127
+		}
128
+	    }
129
+	    if(dec_done && write_phase == IN_LIMBO2) {
130
+		if(reslen<5) {
131
+		    cli_dbgmsg("cli_binhex: skipping resources (too small)\n");
132
+		    break;
175 133
 		}
176
-		buf = ++ptr;
177
-		bytesleft--;
134
+		write_phase++;
135
+		if(--dec_done)
136
+		    memmove(decoded, &decoded[1], dec_done);
137
+	    }
138
+	    if(dec_done && write_phase == IN_RES) {
139
+		unsigned int todo = MIN(dec_done, reslen);
140
+		reslen -= todo;
141
+		dec_done -= todo;
142
+		if(cli_writen(resfd, decoded, todo)!=(int)todo) {
143
+		    ret = CL_EWRITE;
144
+		    break;
145
+		}
146
+		if(!reslen) {
147
+		    lseek(resfd, 0, SEEK_SET);
148
+		    ret = cli_magic_scandesc(resfd, ctx);
149
+		    break;
150
+		}
151
+	    }
178 152
 	}
179 153
 
180
-	if(line)
181
-		free(line);
182
-
183
-	if((t_line = binhexBegin(m)) == NULL) {
184
-		messageDestroy(m);
185
-		cli_dbgmsg("No binhex line found\n");
186
-		return CL_EFORMAT;
154
+	if(!chunksz) {
155
+	    chunksz = MIN(enc_todo, map->pgsz);
156
+	    encoded = fmap_need_off_once(map, enc_done, chunksz);
157
+	    if(!encoded) {
158
+		ret = CL_EREAD;
159
+		break;
160
+	    }
161
+	    chunkoff = 0;
187 162
 	}
163
+	chunksz--;
188 164
 
189
-	while(((t_line = t_line->t_next) != NULL) && (t_line->t_line == NULL));
165
+	b = encoded[chunkoff++];
166
+	enc_done++;
167
+	enc_todo--;
190 168
 
191
-	if(!t_line) {
192
-		messageDestroy(m);
193
-		cli_dbgmsg("No binhex data to parse\n");
194
-		return CL_EFORMAT;
169
+	if((char)b == '\r' || (char)b == '\n') { /* FIXME: skip eol on 8bit encoding ? */
170
+	    in_data = 1;
171
+	    continue;
172
+	}
173
+	if(!in_data) continue;
174
+	if(_6bit < 0) {
175
+	    _6bit = ((char)b == ':');
176
+	    continue;
177
+	}
178
+	if(_6bit) {
179
+	    if((char)b == ':')
180
+		continue;
181
+	    if(b > 0x7f || (b = hqxtbl[b]) == 0xff) {
182
+		cli_dbgmsg("cli_binhex: Invalid character (%02x)\n", encoded[chunkoff-1]);
183
+		break;
184
+	    }
185
+	    switch((offset++) & 3) { /* 6 bits per char */
186
+	    case 0: /* left-6h */
187
+		spare_bits = b<<2;
188
+		continue;
189
+	    case 1: /* left-2l + middle-4h */
190
+		this_byte = spare_bits | (b>>4);
191
+		spare_bits = b<<4;
192
+		break;
193
+	    case 2: /* middle-4l + right-2h */
194
+		this_byte = spare_bits | (b>>2);
195
+		spare_bits = b<<6;
196
+		break;
197
+	    case 3: /* right-6l */
198
+		this_byte = spare_bits | b;
199
+	    }
200
+	} else {
201
+	    this_byte = b;
195 202
 	}
196 203
 
197
-	/* similar to binhexMessage */
198
-	messageSetEncoding(m, "x-binhex");
199
-
200
-	fb = messageToFileblob(m, dir, 1);
201
-	if(fb) {
202
-		cli_dbgmsg("Binhex file decoded to %s\n", fileblobGetFilename(fb));
203
-		fileblobDestroy(fb);
204
-	} else
205
-		cli_errmsg("Couldn't decode binhex file to %s\n", dir);
206
-	messageDestroy(m);
207
-
208
-	if(fb)
209
-		return CL_CLEAN;	/* a lie - but it gets things going */
210
-	/* return CL_EIO; */	/* probably CL_EMEM, but we can't tell at this layer */
211
-	/* TK: CL_EMEM is too generic here and should not be reported for parsing errors */
212
-	return CL_EFORMAT;
204
+	if(in_run) {
205
+	    in_run = 0;
206
+	    if(!this_byte)
207
+		this_byte = 0x90;
208
+	    else {
209
+		while(--this_byte) 
210
+		    decoded[dec_done++] = last_byte;
211
+		continue;
212
+	    }
213
+	} else if(this_byte == 0x90) {
214
+	    in_run = 1;
215
+	    continue;
216
+	}
217
+	decoded[dec_done++] = this_byte;
218
+	last_byte = this_byte;
219
+    }
220
+
221
+    close(datafd);
222
+    close(resfd);
223
+    if(!ctx->engine->keeptmp) {
224
+	if(cli_unlink(dname) && ret != CL_VIRUS) ret = CL_EUNLINK;
225
+	if(cli_unlink(rname) && ret != CL_VIRUS) ret = CL_EUNLINK;
226
+    }
227
+    free(dname);
228
+    free(rname);
229
+    return ret;
213 230
 }
... ...
@@ -33,6 +33,7 @@
33 33
 #ifndef __BINHEX_H
34 34
 #define __BINHEX_H
35 35
 
36
-int cli_binhex(const char *dir, fmap_t *map);
36
+#include "others.h"
37
+int cli_binhex(cli_ctx *ctx);
37 38
 
38 39
 #endif
... ...
@@ -1243,36 +1243,6 @@ static int cli_scantar(int desc, cli_ctx *ctx, unsigned int posix)
1243 1243
     return ret;
1244 1244
 }
1245 1245
 
1246
-static int cli_scanbinhex(cli_ctx *ctx)
1247
-{
1248
-	char *dir;
1249
-	int ret = CL_CLEAN;
1250
-
1251
-
1252
-    cli_dbgmsg("in cli_scanbinhex()\n");
1253
-
1254
-    /* generate temporary directory */
1255
-    if(!(dir = cli_gentemp(ctx->engine->tmpdir)))
1256
-	return CL_EMEM;
1257
-
1258
-    if(mkdir(dir, 0700)) {
1259
-	cli_errmsg("Binhex: Can't create temporary directory %s\n", dir);
1260
-	free(dir);
1261
-	return CL_ETMPDIR;
1262
-    }
1263
-
1264
-    if((ret = cli_binhex(dir, *ctx->fmap)))
1265
-	cli_dbgmsg("Binhex: %s\n", cl_strerror(ret));
1266
-    else
1267
-	ret = cli_scandir(dir, ctx);
1268
-
1269
-    if(!ctx->engine->keeptmp)
1270
-	cli_rmdirs(dir);
1271
-
1272
-    free(dir);
1273
-    return ret;
1274
-}
1275
-
1276 1246
 static int cli_scanmschm(int desc, cli_ctx *ctx)
1277 1247
 {
1278 1248
 	int ret = CL_CLEAN, rc;
... ...
@@ -2135,7 +2105,7 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
2135 2135
 
2136 2136
 	case CL_TYPE_BINHEX:
2137 2137
 	    if(SCAN_ARCHIVE && (DCONF_ARCH & ARCH_CONF_BINHEX))
2138
-		ret = cli_scanbinhex(ctx);
2138
+		ret = cli_binhex(ctx);
2139 2139
 	    break;
2140 2140
 
2141 2141
 	case CL_TYPE_SCRENC: