git-svn: trunk@200

Tomasz Kojm authored on 2004/01/23 20:17:16
Showing 14 changed files
... ...
@@ -1,3 +1,14 @@
1
+Fri Jan 23 12:12:30 CET 2004 (tk)
2
+---------------------------------
3
+  * libclamav: added support for OLE2 / VBA streams scanning (!!), based on
4
+	       code written by Trog <trog@uncon.org>. It may be enabled
5
+	       with CL_OLE2 passed in options to cli_scandesc().
6
+  * clamscan: support for OLE2 scanning is enabled by default and may be
7
+	      disabled with --no-ole2
8
+  * clamd: support for OLE2 scanning is disabled by default and may be enabled
9
+	   with ScanOLE2 in clamav.conf.
10
+  * clamd: included Darwin fix from Nigel
11
+
1 12
 Fri Jan 23 10:50:51 GMT 2004 (njh)
2 13
 ----------------------------------
3 14
   * libclamav: Fixed memory leak in handling some multipart messages
... ...
@@ -211,6 +211,7 @@ void help(void)
211 211
     mprintf("    --no-summary                         Disable summary at end of scanning\n");
212 212
     mprintf("    --mbox                -m             Treat stdin as a mailbox\n");
213 213
     mprintf("\n");
214
+    mprintf("    --no-ole2                            Disable OLE2 support\n");
214 215
     mprintf("    --no-archive                         Disable libclamav archive support\n");
215 216
     mprintf("    --max-space=#n                       Extract first #n kilobytes only\n");
216 217
     mprintf("    --max-files=#n                       Extract first #n files only\n");
... ...
@@ -349,6 +349,11 @@ int scanfile(const char *filename, struct cl_node *root, const struct passwd *us
349 349
     else
350 350
 	options |= CL_ARCHIVE;
351 351
 
352
+    if(optl(opt, "no-ole2"))
353
+	options &= ~CL_OLE2;
354
+    else
355
+	options |= CL_OLE2;
356
+
352 357
     if(optc(opt, 'm'))
353 358
 	options |= CL_MAIL;
354 359
 
... ...
@@ -69,7 +69,7 @@ int main(int argc, char **argv)
69 69
 	    {"max-recursion", 1, 0, 0},
70 70
 	    {"disable-archive", 0, 0, 0},
71 71
 	    {"no-archive", 0, 0, 0},
72
-
72
+	    {"no-ole2", 0, 0, 0},
73 73
 	    {"mbox", 0, 0, 'm'},
74 74
 	    {"stdout", 0, 0, 0},
75 75
 	    {"unzip", 2, 0, 0},
... ...
@@ -127,6 +127,13 @@ MaxDirectoryRecursion 15
127 127
 #Debug
128 128
 
129 129
 ##
130
+## Document scanning
131
+##
132
+
133
+# This option enables scanning of Microsoft Office document macros.
134
+#ScanOLE2
135
+
136
+##
130 137
 ## Mail support
131 138
 ##
132 139
 
... ...
@@ -70,6 +70,10 @@ libclamav_la_SOURCES = \
70 70
 	table.c \
71 71
 	table.h \
72 72
 	text.c \
73
-	text.h
73
+	text.h \
74
+	ole2_extract.c \
75
+	ole2_extract.h \
76
+	vba_extract.c \
77
+	vba_extract.h
74 78
 
75 79
 lib_LTLIBRARIES = libclamav.la
... ...
@@ -169,7 +169,11 @@ libclamav_la_SOURCES = \
169 169
 	table.c \
170 170
 	table.h \
171 171
 	text.c \
172
-	text.h
172
+	text.h \
173
+	ole2_extract.c \
174
+	ole2_extract.h \
175
+	vba_extract.c \
176
+	vba_extract.h
173 177
 
174 178
 
175 179
 lib_LTLIBRARIES = libclamav.la
... ...
@@ -182,7 +186,8 @@ libclamav_la_DEPENDENCIES =
182 182
 am_libclamav_la_OBJECTS = matcher.lo md5.lo others.lo readdb.lo cvd.lo \
183 183
 	dsig.lo str.lo scanners.lo unrarlib.lo zzip-dir.lo zzip-err.lo \
184 184
 	zzip-file.lo zzip-info.lo zzip-io.lo zzip-stat.lo zzip-zip.lo \
185
-	strc.lo blob.lo mbox.lo message.lo strrcpy.lo table.lo text.lo
185
+	strc.lo blob.lo mbox.lo message.lo strrcpy.lo table.lo text.lo \
186
+	ole2_extract.lo vba_extract.lo
186 187
 libclamav_la_OBJECTS = $(am_libclamav_la_OBJECTS)
187 188
 
188 189
 DEFS = @DEFS@
... ...
@@ -195,11 +200,13 @@ am__depfiles_maybe = depfiles
195 195
 @AMDEP_TRUE@DEP_FILES = ./$(DEPDIR)/blob.Plo ./$(DEPDIR)/cvd.Plo \
196 196
 @AMDEP_TRUE@	./$(DEPDIR)/dsig.Plo ./$(DEPDIR)/matcher.Plo \
197 197
 @AMDEP_TRUE@	./$(DEPDIR)/mbox.Plo ./$(DEPDIR)/md5.Plo \
198
-@AMDEP_TRUE@	./$(DEPDIR)/message.Plo ./$(DEPDIR)/others.Plo \
198
+@AMDEP_TRUE@	./$(DEPDIR)/message.Plo \
199
+@AMDEP_TRUE@	./$(DEPDIR)/ole2_extract.Plo ./$(DEPDIR)/others.Plo \
199 200
 @AMDEP_TRUE@	./$(DEPDIR)/readdb.Plo ./$(DEPDIR)/scanners.Plo \
200 201
 @AMDEP_TRUE@	./$(DEPDIR)/str.Plo ./$(DEPDIR)/strc.Plo \
201 202
 @AMDEP_TRUE@	./$(DEPDIR)/strrcpy.Plo ./$(DEPDIR)/table.Plo \
202 203
 @AMDEP_TRUE@	./$(DEPDIR)/text.Plo ./$(DEPDIR)/unrarlib.Plo \
204
+@AMDEP_TRUE@	./$(DEPDIR)/vba_extract.Plo \
203 205
 @AMDEP_TRUE@	./$(DEPDIR)/zzip-dir.Plo ./$(DEPDIR)/zzip-err.Plo \
204 206
 @AMDEP_TRUE@	./$(DEPDIR)/zzip-file.Plo ./$(DEPDIR)/zzip-info.Plo \
205 207
 @AMDEP_TRUE@	./$(DEPDIR)/zzip-io.Plo ./$(DEPDIR)/zzip-stat.Plo \
... ...
@@ -273,6 +280,7 @@ distclean-compile:
273 273
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbox.Plo@am__quote@
274 274
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md5.Plo@am__quote@
275 275
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/message.Plo@am__quote@
276
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ole2_extract.Plo@am__quote@
276 277
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/others.Plo@am__quote@
277 278
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readdb.Plo@am__quote@
278 279
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scanners.Plo@am__quote@
... ...
@@ -282,6 +290,7 @@ distclean-compile:
282 282
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/table.Plo@am__quote@
283 283
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/text.Plo@am__quote@
284 284
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unrarlib.Plo@am__quote@
285
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vba_extract.Plo@am__quote@
285 286
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/zzip-dir.Plo@am__quote@
286 287
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/zzip-err.Plo@am__quote@
287 288
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/zzip-file.Plo@am__quote@
... ...
@@ -46,6 +46,7 @@ extern "C"
46 46
 #define	CL_EMALFZIP	102 /* malformed zip */
47 47
 #define CL_EGZIP	103 /* gzip handler error */
48 48
 #define CL_EBZIP	104 /* bzip2 handler error */
49
+#define CL_EOLE2	105 /* OLE2 handler error */
49 50
 #define CL_EACCES	200 /* access denied */
50 51
 #define CL_ENULLARG	300 /* null argument error */
51 52
 
... ...
@@ -62,10 +63,11 @@ extern "C"
62 62
 #define CL_EDSIG	-11 /* digital signature verification error */
63 63
 
64 64
 /* options */
65
-#define CL_RAW		  00
66
-#define CL_ARCHIVE	  01
67
-#define CL_MAIL		0100
68
-#define CL_DISABLERAR  01000
65
+#define CL_RAW		0
66
+#define CL_ARCHIVE	1
67
+#define CL_MAIL		2
68
+#define CL_DISABLERAR	4
69
+#define CL_OLE2		8
69 70
 
70 71
 struct cli_patt {
71 72
     short int *pattern;
72 73
new file mode 100644
... ...
@@ -0,0 +1,495 @@
0
+/*
1
+ *  Extract component parts of OLE2 files (e.g. MS Office Documents)
2
+ *
3
+ *  Copyright (C) 2004 trog@uncon.org
4
+ *
5
+ *  This code is based on the OpenOffice and libgsf sources.
6
+ *                  
7
+ *  This program is free software; you can redistribute it and/or modify
8
+ *  it under the terms of the GNU General Public License as published by
9
+ *  the Free Software Foundation; either version 2 of the License, or
10
+ *  (at your option) any later version.
11
+ *
12
+ *  This program is distributed in the hope that it will be useful,
13
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
+ *  GNU General Public License for more details.
16
+ *
17
+ *  You should have received a copy of the GNU General Public License
18
+ *  along with this program; if not, write to the Free Software
19
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20
+ */
21
+
22
+#include <sys/types.h>
23
+#include <sys/stat.h>
24
+#include <fcntl.h>
25
+#include <stdio.h>
26
+#include <string.h>
27
+#include <unistd.h>
28
+#include <stdint.h>
29
+#include <ctype.h>
30
+#include <stdlib.h>
31
+#include <clamav.h>
32
+
33
+#define FALSE (0)
34
+#define TRUE (1)
35
+
36
+#define MIN(a, b)  (((a) < (b)) ? (a) : (b))
37
+
38
+int big_block_size, small_block_size;
39
+int sbat_start=-1;
40
+
41
+typedef struct ole2_header_tag
42
+{
43
+	unsigned char magic[8];			/* should be: 0xd0cf11e0a1b11ae1 */
44
+	unsigned char clsid[16];
45
+	uint16_t minor_version;
46
+	uint16_t dll_version;
47
+	int16_t byte_order;			/* -2=intel */
48
+
49
+	uint16_t log2_big_block_size;		/* usually 9 (2^9 = 512) */
50
+	uint32_t log2_small_block_size;		/* usually 6 (2^6 = 128) */
51
+
52
+	int32_t reserved[2];
53
+	int32_t bat_count;
54
+	int32_t prop_start;
55
+
56
+	uint32_t signature;
57
+	uint32_t sbat_cutoff;			/* cutoff for files held in small blocks (4096) */
58
+
59
+	int32_t sbat_start;
60
+	int32_t sbat_block_count;
61
+	int32_t xbat_start;
62
+	int32_t xbat_count;
63
+	int32_t bat_array[109];
64
+} ole2_header_t __attribute__ ((packed));
65
+
66
+typedef struct property_tag
67
+{
68
+	unsigned char name[64];			/* in unicode */
69
+	int16_t name_size;
70
+	unsigned char type;			/* 1=dir 2=file 5=root */
71
+	unsigned char color;			/* black or red */
72
+	int32_t prev;
73
+	int32_t next;
74
+	int32_t child;
75
+
76
+	unsigned char clsid[16];
77
+	uint16_t user_flags;
78
+
79
+	uint32_t create_lowdate;
80
+	uint32_t create_highdate;
81
+	uint32_t mod_lowdate;
82
+	uint32_t mod_highdate;
83
+	int32_t start_block;
84
+	int32_t size;
85
+	unsigned char reserved[4];
86
+} property_t __attribute__ ((packed));
87
+
88
+char magic_id[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1};
89
+
90
+
91
+/* Function: readn
92
+	Try hard to read the requested number of bytes
93
+*/
94
+int readn(int fd, void *buff, unsigned int count)
95
+{
96
+	int retval;
97
+	unsigned int todo;
98
+	void *current;
99
+
100
+	todo = count;
101
+	current = buff;
102
+
103
+	do {
104
+		retval = read(fd, current, todo);
105
+		if (retval == 0) {
106
+			return (count - todo);
107
+		}
108
+		if (retval < 0) {
109
+			return -1;
110
+		}
111
+		todo -= retval;
112
+		current += retval;
113
+	} while (todo > 0);
114
+
115
+	return count;
116
+}
117
+
118
+/* Function: writen
119
+	Try hard to write the specified number of bytes
120
+*/
121
+int writen(int fd, void *buff, unsigned int count)
122
+{
123
+	int retval;
124
+	unsigned int todo;
125
+	void *current;
126
+
127
+	todo = count;
128
+	current = buff;
129
+
130
+	do {
131
+		retval = write(fd, current, todo);
132
+		if (retval < 0) {
133
+			return -1;
134
+		}
135
+		todo -= retval;
136
+		current += retval;
137
+	} while (todo > 0);
138
+
139
+	return count;
140
+}
141
+
142
+void print_property_name(char *name, int size)
143
+{
144
+	int i, count=0;
145
+
146
+	if (*name == 0 || size == 0) {
147
+		cli_dbgmsg("[unused]                         ");
148
+		return;
149
+	}
150
+	/* size-2 to ignore trailing NULL */
151
+	for (i=0 ; i<size-2; i+=2) {
152
+		if (isprint(name[i])) {
153
+			cli_dbgmsg("%c", name[i]);
154
+			count++;
155
+		} else {
156
+			cli_dbgmsg("_%d_", name[i]);
157
+			count += 3;
158
+		}
159
+	}
160
+	for (i=0 ; i < (34-count) ; i++) {
161
+		cli_dbgmsg(" ");
162
+	}
163
+}
164
+
165
+char *get_property_name(char *name, int size)
166
+{
167
+	int i, j;
168
+	char *newname;
169
+
170
+	if (*name == 0 || size == 0) {
171
+		return NULL;
172
+	}
173
+
174
+	newname = (char *) cli_malloc(size);
175
+	if (!newname) {
176
+		return NULL;
177
+	}
178
+	j=0;
179
+	/* size-2 to ignore trailing NULL */
180
+	for (i=0 ; i < size-2; i+=2) {
181
+		if (isprint(name[i])) {
182
+			newname[j++] = name[i];
183
+		} else {
184
+			if (name[i] < 10 && name[i] >= 0) {
185
+				newname[j++] = '_';
186
+				newname[j++] = name[i] + '0';
187
+			}
188
+			newname[j++] = '_';
189
+		}
190
+	}
191
+	newname[j] = '\0';
192
+	return newname;
193
+}
194
+
195
+void print_ole2_property(property_t *property)
196
+{
197
+	//print_property_name(property->name, property->name_size);
198
+	switch (property->type) {
199
+	case 2:
200
+		cli_dbgmsg(" [file]");
201
+		break;
202
+	case 1:
203
+		cli_dbgmsg(" [dir ]");
204
+		break;
205
+	case 5:
206
+		cli_dbgmsg(" [root]");
207
+		break;
208
+	default:
209
+		cli_dbgmsg(" [%d]", property->type);
210
+	}
211
+	switch (property->color) {
212
+	case 0:
213
+		cli_dbgmsg(" r");
214
+		break;
215
+	case 1:
216
+		cli_dbgmsg(" b");
217
+		break;
218
+	default:
219
+		cli_dbgmsg(" u");
220
+	}
221
+	cli_dbgmsg(" %d %x\n", property->size, property->user_flags);
222
+}
223
+
224
+void print_ole2_header(ole2_header_t *hdr)
225
+{
226
+	int i;
227
+	
228
+	if (!hdr) {
229
+		return;
230
+	}
231
+	
232
+	cli_dbgmsg("\nMagic:\t\t\t0x");
233
+	for (i=0 ; i<8; i++) {
234
+		cli_dbgmsg("%x", hdr->magic[i]);
235
+	}
236
+	cli_dbgmsg("\n");
237
+
238
+	cli_dbgmsg("CLSID:\t\t\t{");
239
+	for (i=0 ; i<16; i++) {
240
+		cli_dbgmsg("%x ", hdr->clsid[i]);
241
+	}
242
+	cli_dbgmsg("}\n");
243
+
244
+	cli_dbgmsg("Minor version:\t\t0x%x\n", hdr->minor_version);
245
+	cli_dbgmsg("DLL version:\t\t0x%x\n", hdr->dll_version);
246
+	cli_dbgmsg("Byte Order:\t\t%d\n", hdr->byte_order);
247
+	cli_dbgmsg("Big Block Size:\t\t%i\n", hdr->log2_big_block_size);
248
+	cli_dbgmsg("Small Block Size:\t%i\n", hdr->log2_small_block_size);
249
+	cli_dbgmsg("BAT count:\t\t%d\n", hdr->bat_count);
250
+	cli_dbgmsg("Prop start:\t\t%d\n", hdr->prop_start);
251
+	cli_dbgmsg("SBAT cutoff:\t\t%d\n", hdr->sbat_cutoff);
252
+	cli_dbgmsg("SBat start:\t\t%d\n", hdr->sbat_start);
253
+	cli_dbgmsg("SBat block count:\t%d\n", hdr->sbat_block_count);
254
+	cli_dbgmsg("XBat start:\t\t%d\n", hdr->xbat_start);
255
+	cli_dbgmsg("XBat block count:\t%d\n\n", hdr->xbat_count);
256
+	return;
257
+}
258
+
259
+int ole2_read_block(int fd, ole2_header_t *hdr, void *buff, int blockno)
260
+{
261
+	int offset;
262
+
263
+	// other methods: (blockno+1) * 512 or (blockno * block_size) + 512;
264
+	offset = (blockno << hdr->log2_big_block_size) + 512;	/* 512 is header size */
265
+	if (lseek(fd, offset, SEEK_SET) != offset) {
266
+		return FALSE;
267
+	}
268
+	if (readn(fd, buff, big_block_size) != big_block_size) {
269
+		return FALSE;
270
+	}
271
+	return TRUE;
272
+}
273
+
274
+int ole2_get_next_bat_block(int fd, ole2_header_t *hdr, int current_block)
275
+{
276
+	int bat_array_index;
277
+	uint32_t bat[128];
278
+
279
+	bat_array_index = current_block / 128;
280
+	if (bat_array_index > hdr->bat_count) {
281
+		cli_dbgmsg("bat_array index error\n");
282
+		return -10;
283
+	}
284
+	ole2_read_block(fd, hdr, &bat, hdr->bat_array[bat_array_index]);
285
+	return bat[current_block-(bat_array_index * 128)];
286
+}
287
+
288
+int ole2_get_next_sbat_block(int fd, ole2_header_t *hdr, int current_block)
289
+{
290
+	int iter, current_bat_block;
291
+	uint32_t sbat[128];
292
+
293
+	current_bat_block = hdr->sbat_start;
294
+	iter = current_block / 128;
295
+	while (iter > 0) {
296
+		current_bat_block = ole2_get_next_bat_block(fd, hdr, current_bat_block);
297
+		iter--;
298
+	}
299
+	ole2_read_block(fd, hdr, &sbat, current_bat_block);
300
+	return sbat[current_block % 128];
301
+}
302
+
303
+int ole2_get_next_xbat_block(int fd, ole2_header_t *hdr, int current_block)
304
+{
305
+	int xbat_index, xbat_block_index, bat_index, bat_blockno;
306
+	uint32_t xbat[128], bat[128];
307
+
308
+	xbat_index = current_block / 128;
309
+
310
+	/* NB:	The last entry in each XBAT points to the next XBAT block.
311
+		This reduces the number of entries in each block by 1.
312
+	*/
313
+	xbat_block_index = (xbat_index - 109) / 127;
314
+	bat_blockno = (xbat_index - 109) % 127;
315
+
316
+	bat_index = current_block % 128;
317
+
318
+	ole2_read_block(fd, hdr, &xbat, hdr->xbat_start);
319
+
320
+	/* Follow the chain of XBAT blocks */
321
+	while (xbat_block_index > 0) {
322
+		ole2_read_block(fd, hdr, &xbat, xbat[127]);
323
+		xbat_block_index--;
324
+	}
325
+
326
+	ole2_read_block(fd, hdr, &bat, xbat[bat_blockno]);
327
+
328
+	return bat[bat_index];
329
+}
330
+
331
+int ole2_get_next_block_number(int fd, ole2_header_t *hdr, int current_block)
332
+{
333
+	if ((current_block / 128) > 108) {
334
+		return ole2_get_next_xbat_block(fd, hdr, current_block);
335
+	} else {
336
+		return ole2_get_next_bat_block(fd, hdr, current_block);
337
+	}
338
+}
339
+
340
+/* Retrieve the block containing the data for the given sbat index */
341
+int ole2_get_sbat_data_block(int fd, ole2_header_t *hdr, void *buff, int sbat_index)
342
+{
343
+	int block_count;
344
+	int current_block;
345
+
346
+	if (sbat_start < 0) {
347
+		cli_errmsg("No root start block\n");
348
+		return FALSE;
349
+	}
350
+
351
+	block_count = sbat_index / 8;			// 8 small blocks per big block
352
+	current_block = sbat_start;
353
+	while (block_count > 0) {
354
+		current_block = ole2_get_next_bat_block(fd, hdr, current_block);
355
+		block_count--;
356
+	}
357
+	/* current_block now contains the block number of the sbat array
358
+	   containing the entry for the required small block */
359
+
360
+	return(ole2_read_block(fd, hdr, buff, current_block));
361
+
362
+}
363
+
364
+/* Read the property tree.
365
+   It is read as just an array rather than a tree */
366
+void ole2_read_property_tree(int fd, ole2_header_t *hdr, const char *dir,
367
+				void (*handler)(int fd, ole2_header_t *hdr, property_t *prop, const char *dir))
368
+{
369
+	property_t prop_block[4];
370
+	int index, current_block;
371
+	
372
+	current_block = hdr->prop_start;
373
+
374
+	while(current_block >= 0) {
375
+		ole2_read_block(fd, hdr, prop_block, current_block);
376
+		for (index=0 ; index < 4 ; index++) {
377
+			if (prop_block[index].name[0] != 0) {
378
+				if (prop_block[index].type == 5) {
379
+					sbat_start = prop_block[index].start_block;
380
+				}
381
+				print_ole2_property(&prop_block[index]);
382
+				handler(fd, hdr, &prop_block[index], dir);
383
+			}
384
+		}
385
+		current_block = ole2_get_next_block_number(fd, hdr, current_block);
386
+	}
387
+	return;
388
+}
389
+
390
+/* Callback handlers
391
+   These are called for each entry in the container (property tree) */
392
+
393
+/* Null Handler - doesn't do anything */
394
+void handler_null(int fd, ole2_header_t *hdr, property_t *prop, const char *dir)
395
+{
396
+	return;
397
+}
398
+
399
+/* Write file Handler - write the contents of the entry to a file */
400
+void handler_writefile(int fd, ole2_header_t *hdr, property_t *prop, const char *dir)
401
+{
402
+	unsigned char buff[big_block_size];
403
+	int current_block, ofd, len, offset;
404
+	char *name, *newname;
405
+
406
+	if (prop->type != 2) {
407
+		// Not a file
408
+		return;
409
+	}
410
+
411
+	if (! (name = get_property_name(prop->name, prop->name_size))) {
412
+		return;
413
+	}
414
+
415
+	newname = (char *) cli_malloc(strlen(name) + strlen(dir) + 2);
416
+	sprintf(newname, "%s/%s", dir, name);
417
+	free(name);
418
+
419
+	ofd = open(newname, O_WRONLY|O_CREAT|O_TRUNC, S_IRWXU);
420
+	if (ofd < 0) {
421
+		return;
422
+	}
423
+	free(newname);
424
+	current_block = prop->start_block;
425
+	len = prop->size;
426
+
427
+	while((current_block >= 0) && (len > 0)) {
428
+		if (prop->size < hdr->sbat_cutoff) {
429
+			// Small block file
430
+			if (!ole2_get_sbat_data_block(fd, hdr, &buff, current_block)) {
431
+				cli_dbgmsg("ole2_get_sbat_data_block failed\n");
432
+				close(ofd);
433
+				return;
434
+			}
435
+			// buff now contains the block with 8 small blocks in it
436
+			offset = 64 * (current_block % 8);
437
+			if (writen(ofd, &buff[offset], MIN(len,64)) != MIN(len,64)) {
438
+				close(ofd);
439
+				return;
440
+			}
441
+
442
+			len -= MIN(len,64);
443
+			current_block = ole2_get_next_sbat_block(fd, hdr, current_block);
444
+		} else {
445
+			// Big block file
446
+			if (!ole2_read_block(fd, hdr, &buff, current_block)) {
447
+				close(ofd);
448
+				return;
449
+			}
450
+			if (writen(ofd, &buff, MIN(len,big_block_size)) != MIN(len,big_block_size)) {
451
+				close(ofd);
452
+				return;
453
+			}
454
+
455
+			current_block = ole2_get_next_block_number(fd, hdr, current_block);
456
+			len -= MIN(len,big_block_size);
457
+		}
458
+	}
459
+	close(ofd);
460
+	return;
461
+}
462
+
463
+int cli_ole2_extract(int fd, const char *dirname)
464
+{
465
+	ole2_header_t hdr;
466
+
467
+	cli_dbgmsg("in cli_ole2_extract()\n");
468
+
469
+	readn(fd, &hdr, sizeof(struct ole2_header_tag));
470
+
471
+	if (strncmp(hdr.magic, magic_id, 8) != 0) {
472
+		cli_dbgmsg("OLE2 magic failed!\n");
473
+		return CL_EOLE2;
474
+	}
475
+
476
+	if (hdr.log2_big_block_size != 9) {
477
+		cli_dbgmsg("WARNING: untested big block size - please report\n\n");
478
+	}
479
+	if (hdr.log2_small_block_size != 6) {
480
+		cli_dbgmsg("WARNING: untested small block size - please report\n\n");
481
+	}
482
+	if (hdr.sbat_cutoff != 4096) {
483
+		cli_dbgmsg("WARNING: untested sbat cutoff - please report\n\n");
484
+	}
485
+
486
+	big_block_size = 1 << hdr.log2_big_block_size;
487
+	small_block_size = 1 << hdr.log2_small_block_size;
488
+
489
+	print_ole2_header(&hdr);
490
+
491
+	ole2_read_property_tree(fd, &hdr, dirname, handler_writefile);
492
+
493
+	return 0;
494
+}
0 495
new file mode 100644
... ...
@@ -0,0 +1,28 @@
0
+/*
1
+ *  Extract component parts of OLE2 files (e.g. MS Office Documents)
2
+ *
3
+ *  Copyright (C) 2004 trog@uncon.org
4
+ *
5
+ *  This code is based on the OpenOffice and libgsf sources.
6
+ *                  
7
+ *  This program is free software; you can redistribute it and/or modify
8
+ *  it under the terms of the GNU General Public License as published by
9
+ *  the Free Software Foundation; either version 2 of the License, or
10
+ *  (at your option) any later version.
11
+ *
12
+ *  This program is distributed in the hope that it will be useful,
13
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
+ *  GNU General Public License for more details.
16
+ *
17
+ *  You should have received a copy of the GNU General Public License
18
+ *  along with this program; if not, write to the Free Software
19
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20
+ */
21
+
22
+#ifndef __OLE2_EXTRACT_H
23
+#define __OLE2_EXTRACT_H
24
+
25
+int cli_ole2_extract(int fd, const char *dirname);
26
+
27
+#endif
... ...
@@ -107,6 +107,8 @@ char *cl_strerror(int clerror)
107 107
 	    return "Malformed Zip detected.";
108 108
 	case CL_EGZIP:
109 109
 	    return "GZip module failure.";
110
+	case CL_EOLE2:
111
+	    return "OLE2 module failure.";
110 112
 	case CL_ETMPFILE:
111 113
 	    return "Unable to create temporary file.";
112 114
 	case CL_ETMPDIR:
... ...
@@ -37,6 +37,8 @@ int cli_scanrar_inuse = 0;
37 37
 #include "others.h"
38 38
 #include "matcher.h"
39 39
 #include "unrarlib.h"
40
+#include "ole2_extract.h"
41
+#include "vba_extract.h"
40 42
 
41 43
 #ifdef HAVE_ZLIB_H
42 44
 #include <zlib.h>
... ...
@@ -49,6 +51,7 @@ int cli_scanrar_inuse = 0;
49 49
 
50 50
 #define SCAN_ARCHIVE	(options & CL_ARCHIVE)
51 51
 #define SCAN_MAIL	(options & CL_MAIL)
52
+#define SCAN_OLE2	(options & CL_OLE2)
52 53
 #define DISABLE_RAR	(options & CL_DISABLERAR)
53 54
 
54 55
 #define MAGIC_BUFFER_SIZE 14
... ...
@@ -60,6 +63,7 @@ int cli_scanrar_inuse = 0;
60 60
 #define MAILDIR_MAGIC_STR "Return-Path: "
61 61
 #define DELIVERED_MAGIC_STR "Delivered-To: "
62 62
 #define BZIP_MAGIC_STR "BZh"
63
+#define OLE2_MAGIC_STR "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"
63 64
 
64 65
 int cli_magic_scandesc(int desc, char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, int options, int *reclev);
65 66
 
... ...
@@ -535,6 +539,78 @@ int cli_scanbzip(int desc, char **virname, long int *scanned, const struct cl_no
535 535
 }
536 536
 #endif
537 537
 
538
+int cli_scanole2(int desc, char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, int options, int *reclev)
539
+{
540
+	const char *tmpdir;
541
+	char *dir, *fullname;
542
+	unsigned char *data;
543
+	int ret = CL_CLEAN, fd, i;
544
+	vba_project_t *vba_project;
545
+
546
+    cli_dbgmsg("in cli_scanole2()\n");
547
+
548
+    tmpdir = getenv("TMPDIR");
549
+
550
+    if(tmpdir == NULL)
551
+#ifdef P_tmpdir
552
+	tmpdir = P_tmpdir;
553
+#else
554
+	tmpdir = "/tmp";
555
+#endif
556
+
557
+	/* generate the temporary directory */
558
+	dir = cl_gentemp(tmpdir);
559
+	if(mkdir(dir, 0700)) {
560
+	    cli_errmsg("ScanOLE2 -> Can't create temporary directory %s\n", dir);
561
+	    return CL_ETMPDIR;
562
+	}
563
+
564
+	if((ret = cli_ole2_extract(desc, dir))) {
565
+	    cli_errmsg("ScanOLE2 -> %s\n", cl_strerror(ret));
566
+	    cli_rmdirs(dir);
567
+	    free(dir);
568
+	    return ret;
569
+	}
570
+
571
+	if((vba_project = (vba_project_t *) vba56_dir_read(dir))) {
572
+
573
+	    for(i = 0; i < vba_project->count; i++) {
574
+		fullname = (char *) malloc(strlen(vba_project->dir) + strlen(vba_project->name[i]) + 2);
575
+		sprintf(fullname, "%s/%s", vba_project->dir, vba_project->name[i]);
576
+		fd = open(fullname, O_RDONLY);
577
+		if(fd == -1) {
578
+			cli_errmsg("Scan->OLE2 -> Can't open file %s\n", fullname);
579
+			free(fullname);
580
+			ret = CL_EOPEN;
581
+			break;
582
+		}
583
+		free(fullname);
584
+		data = (unsigned char *) vba_decompress(fd, vba_project->offset[i]);
585
+
586
+		if(cl_scanbuff(data, strlen(data), virname, root) == CL_VIRUS) {
587
+		    free(data);
588
+		    ret = CL_VIRUS;
589
+		    break;
590
+		}
591
+
592
+		free(data);
593
+	    }
594
+
595
+	} else {
596
+	    cli_errmsg("ScanOLE2 -> Can't decode VBA streams.\n");
597
+	    ret = CL_EOLE2;
598
+	}
599
+
600
+	for(i = 0; i < vba_project->count; i++)
601
+	    free(vba_project->name[i]);
602
+	free(vba_project->name);
603
+	free(vba_project->dir);
604
+	free(vba_project->offset);
605
+
606
+	cli_rmdirs(dir);
607
+	free(dir);
608
+	return ret;
609
+}
538 610
 int cli_scandir(char *dirname, char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, int options, int *reclev)
539 611
 {
540 612
 	DIR *dd;
... ...
@@ -700,6 +776,10 @@ int cli_magic_scandesc(int desc, char **virname, long int *scanned, const struct
700 700
 	    ret = cli_scanbzip(desc, virname, scanned, root, limits, options, reclev);
701 701
 	}
702 702
 #endif
703
+	else if(SCAN_OLE2 && !strncmp(magic, OLE2_MAGIC_STR, 8)) {
704
+	    cli_dbgmsg("Recognized OLE2 file.\n");
705
+	    ret = cli_scanole2(desc, virname, scanned, root, limits, options, reclev);
706
+	}
703 707
 	else if(SCAN_MAIL && !strncmp(magic, MAIL_MAGIC_STR, strlen(MAIL_MAGIC_STR))) {
704 708
 	    cli_dbgmsg("Recognized mail file.\n");
705 709
 	    ret = cli_scanmail(desc, virname, scanned, root, limits, options, reclev);
706 710
new file mode 100644
... ...
@@ -0,0 +1,602 @@
0
+/*
1
+ *  Extract VBA source code for component MS Office Documents)
2
+ *
3
+ *  Copyright (C) 2004 trog@uncon.org
4
+ *
5
+ *  This code is based on the OpenOffice and libgsf sources.
6
+ *                  
7
+ *  This program is free software; you can redistribute it and/or modify
8
+ *  it under the terms of the GNU General Public License as published by
9
+ *  the Free Software Foundation; either version 2 of the License, or
10
+ *  (at your option) any later version.
11
+ *
12
+ *  This program is distributed in the hope that it will be useful,
13
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
+ *  GNU General Public License for more details.
16
+ *
17
+ *  You should have received a copy of the GNU General Public License
18
+ *  along with this program; if not, write to the Free Software
19
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20
+ */
21
+
22
+#include <stdio.h>
23
+#include <string.h>
24
+#include <unistd.h>
25
+#include <sys/types.h>
26
+#include <sys/stat.h>
27
+#include <fcntl.h>
28
+#include <stdint.h>
29
+#include <stdlib.h>
30
+#include <ctype.h>
31
+
32
+#include "vba_extract.h"
33
+
34
+#define FALSE (0)
35
+#define TRUE (1)
36
+
37
+typedef struct vba_version_tag {
38
+	unsigned char signature[4];
39
+	const char *name;
40
+	int vba_version;
41
+	int is_mac;
42
+} vba_version_t;
43
+
44
+
45
+typedef struct byte_array_tag {
46
+	unsigned int length;
47
+	unsigned char *data;
48
+} byte_array_t;
49
+
50
+#define NUM_VBA_VERSIONS 9
51
+vba_version_t vba_version[] = {
52
+	{ { 0x5e, 0x00, 0x00, 0x01 }, "Office 97",              5, FALSE},
53
+	{ { 0x5f, 0x00, 0x00, 0x01 }, "Office 97 SR1",          5, FALSE },
54
+	{ { 0x65, 0x00, 0x00, 0x01 }, "Office 2000 alpha?",     6, FALSE },
55
+	{ { 0x6b, 0x00, 0x00, 0x01 }, "Office 2000 beta?",      6, FALSE },
56
+	{ { 0x6d, 0x00, 0x00, 0x01 }, "Office 2000",            6, FALSE },
57
+	{ { 0x70, 0x00, 0x00, 0x01 }, "Office XP beta 1/2",     6, FALSE },
58
+	{ { 0x73, 0x00, 0x00, 0x01 }, "Office XP",              6, FALSE },
59
+	{ { 0x60, 0x00, 0x00, 0x0e }, "MacOffice 98",           5, TRUE },
60
+	{ { 0x62, 0x00, 0x00, 0x0e }, "MacOffice 2001",         5, TRUE },
61
+};
62
+
63
+#define VBA56_DIRENT_RECORD_COUNT (2 + /* magic */              \
64
+                                   4 + /* version */            \
65
+                                   2 + /* 0x00 0xff */          \
66
+                                  22)  /* unknown */
67
+#define VBA56_DIRENT_HEADER_SIZE (VBA56_DIRENT_RECORD_COUNT +   \
68
+                                  2 +  /* type1 record count */ \
69
+                                  2)   /* unknown */
70
+
71
+/* Function: vba_readn
72
+        Try hard to read the requested number of bytes
73
+*/
74
+int vba_readn(int fd, void *buff, unsigned int count)
75
+{
76
+        int retval;
77
+        unsigned int todo;
78
+        void *current;
79
+ 
80
+        todo = count;
81
+        current = buff;
82
+ 
83
+        do {
84
+                retval = read(fd, current, todo);
85
+                if (retval == 0) {
86
+                        return (count - todo);
87
+                }
88
+                if (retval < 0) {
89
+                        return -1;
90
+                }
91
+                todo -= retval;
92
+                current += retval;
93
+        } while (todo > 0);
94
+ 
95
+        return count;
96
+}
97
+
98
+/* Function: vba_writen
99
+        Try hard to write the specified number of bytes
100
+*/
101
+int vba_writen(int fd, void *buff, unsigned int count)
102
+{
103
+        int retval;
104
+        unsigned int todo;
105
+        void *current;
106
+                                                                                                                                                                          
107
+        todo = count;
108
+        current = buff;
109
+                                                                                                                                                                          
110
+        do {
111
+                retval = write(fd, current, todo);
112
+                if (retval < 0) {
113
+                        return -1;
114
+                }
115
+                todo -= retval;
116
+                current += retval;
117
+        } while (todo > 0);
118
+                                                                                                                                                                          
119
+        return count;
120
+}
121
+
122
+char *get_unicode_name(char *name, int size)
123
+{
124
+        int i, j;
125
+        char *newname;
126
+                                                                                                                                                                          
127
+        if (*name == 0 || size == 0) {
128
+                return NULL;
129
+        }
130
+                                                                                                                                                                          
131
+        newname = (char *) cli_malloc(size);
132
+        if (!newname) {
133
+                return NULL;
134
+        }
135
+        j=0;
136
+        for (i=0 ; i < size; i+=2) {
137
+                if (isprint(name[i])) {
138
+                        newname[j++] = name[i];
139
+                } else {
140
+                        if (name[i] < 10 && name[i] >= 0) {
141
+                                newname[j++] = '_';
142
+                                newname[j++] = name[i] + '0';
143
+                        }
144
+                        newname[j++] = '_';
145
+                }
146
+        }
147
+        newname[j] = '\0';
148
+        return newname;
149
+}
150
+                                                                                                                                                                       
151
+vba_project_t *vba56_dir_read(const char *dir)
152
+{
153
+	unsigned char magic[2];
154
+	unsigned char version[4];
155
+	unsigned char *buff, *name;
156
+        unsigned char vba56_signature[] = { 0xcc, 0x61 };
157
+	int16_t record_count, length;
158
+	uint16_t ooff;
159
+	uint8_t byte_count;
160
+	uint32_t offset;
161
+	uint32_t LidA;  //Language identifiers
162
+	uint32_t LidB;
163
+	uint16_t CharSet;
164
+	uint16_t LenA;
165
+	uint32_t UnknownB;
166
+	uint32_t UnknownC;
167
+	uint16_t LenB;
168
+	uint16_t LenC;
169
+	uint16_t LenD;
170
+	int i, j, fd;
171
+	vba_project_t *vba_project;
172
+	char *fullname;
173
+
174
+	unsigned char fixed_octet[8] = { 0x06, 0x02, 0x01, 0x00, 0x08, 0x02, 0x00, 0x00 };
175
+
176
+
177
+	fullname = (char *) cli_malloc(strlen(dir) + 14);
178
+	sprintf(fullname, "%s/_VBA_PROJECT", dir);
179
+        fd = open(fullname, O_RDONLY);
180
+	free(fullname);
181
+
182
+        if (fd == -1) {
183
+                cli_errmsg("Open failed\n");
184
+                return NULL;
185
+        }
186
+
187
+	if (vba_readn(fd, &magic, 2) != 2) {
188
+		return NULL;
189
+	}
190
+	if (strncmp(magic, vba56_signature, 2) != 0) {
191
+		return NULL;
192
+	}
193
+
194
+	if (vba_readn(fd, &version, 4) != 4) {
195
+		return NULL;
196
+	}
197
+	for (i=0 ; i < NUM_VBA_VERSIONS ; i++) {
198
+		if (strncmp(version, vba_version[i].signature, 4) == 0) {
199
+			break;
200
+		}
201
+	}
202
+
203
+	if (i == NUM_VBA_VERSIONS) {
204
+		cli_errmsg("Unknown VBA version signature x0%x0x%x0x%x0x%x\n",
205
+			version[0], version[1], version[2], version[3]);
206
+		return NULL;
207
+	}
208
+
209
+	cli_dbgmsg("VBA Project: %s, VBA Version=%d\n", vba_version[i].name,
210
+				vba_version[i].vba_version);
211
+
212
+
213
+	/*****************************************/
214
+
215
+	/* two bytes, should be equal to 0x00ff */
216
+	if (vba_readn(fd, &ooff, 2) != 2) {
217
+		return NULL;
218
+	}
219
+
220
+	if (vba_readn(fd, &LidA, 4) != 4) {
221
+		return NULL;
222
+	}
223
+
224
+	if (vba_readn(fd, &LidA, 4) != 4) {
225
+		return NULL;
226
+	}
227
+
228
+	if (vba_readn(fd, &CharSet, 2) != 2) {
229
+		return NULL;
230
+	}
231
+	if (vba_readn(fd, &LenA, 2) != 2) {
232
+		return NULL;
233
+	}
234
+
235
+	if (vba_readn(fd, &UnknownB, 4) != 4) {
236
+		return NULL;
237
+	}
238
+	if (vba_readn(fd, &UnknownC, 4) != 4) {
239
+		return NULL;
240
+	}
241
+
242
+	if (vba_readn(fd, &LenB, 2) != 2) {
243
+		return NULL;
244
+	}
245
+	if (vba_readn(fd, &LenC, 2) != 2) {
246
+		return NULL;
247
+	}
248
+	if (vba_readn(fd, &LenD, 2) != 2) {
249
+		return NULL;
250
+	}
251
+
252
+	cli_dbgmsg(" LidA: %d\n LidB: %d\n CharSet: %d\n", LidA, LidB, CharSet);
253
+	cli_dbgmsg(" LenA: %d\n UnknownB: %d\n UnknownC: %d\n", LenA, UnknownB, UnknownC);
254
+	cli_dbgmsg(" LenB: %d\n LenC: %d\n LenD: %d\n", LenB, LenC, LenD);
255
+
256
+	record_count = LenC;
257
+	/*******************************************/
258
+
259
+	/* REPLACED THIS CODE WITH THE CODE ABOVE */
260
+	/* read the rest of the header. most of this is unknown */
261
+/*	buff = (char *) cli_malloc(24);
262
+	if (!buff || vba_readn(fd, buff, 24) != 24) {
263
+		return NULL;
264
+	}
265
+	free(buff);
266
+
267
+	if (vba_readn(fd, &record_count, 2) != 2) {
268
+		return NULL;
269
+	}
270
+	cli_dbgmsg("Record count: %d\n", record_count); */
271
+	/* read two bytes and throw them away */
272
+/*	if (vba_readn(fd, &length, 2) != 2) {
273
+		return NULL;
274
+	}*/
275
+
276
+	for (i=0; record_count >0 ; record_count--) {
277
+
278
+		if (vba_readn(fd, &length, 2) != 2) {
279
+			return NULL;
280
+		}
281
+		cli_dbgmsg ("record: %d.%d, length: %d, ", record_count, i, length);
282
+		buff = cli_malloc(length);
283
+		if (!buff) {
284
+			cli_errmsg("cli_malloc failed\n");
285
+			return NULL;
286
+		}
287
+		if (vba_readn(fd, buff, length) != length) {
288
+			cli_errmsg("read name failed\n");
289
+			return NULL;
290
+		}
291
+		name = get_unicode_name(buff, length);
292
+		cli_dbgmsg("name: %s\n", name);
293
+		free(buff);
294
+
295
+                /* Ignore twelve bytes from entries of type 'G'.
296
+		   Type 'C' entries come in pairs, the second also
297
+		   having a 12 byte trailer */
298
+		/* TODO: Need to check if types H(same as G) and D(same as C) exist */
299
+                if (!strncmp ("*\\G", name, 3)) {
300
+			buff = cli_malloc(12);
301
+                        if (vba_readn(fd, buff, 12) != 12) {
302
+				cli_errmsg("failed to read blob\n");
303
+                                free(buff);
304
+				free(name);
305
+				return NULL;
306
+                        }
307
+			free(buff);
308
+                } else if (!strncmp("*\\C", name, 3)) {
309
+			if (i == 1) {
310
+				buff = cli_malloc(12);
311
+                        	if (vba_readn(fd, buff, 12) != 12) {
312
+					cli_errmsg("failed to read blob\n");
313
+                                	free(buff);
314
+					free(name);
315
+					return NULL;
316
+                        	}
317
+				free(buff);
318
+				i = 0;
319
+			} else {
320
+				i = 1;
321
+				record_count++;
322
+			}
323
+		} else {
324
+			cli_errmsg("unknown record type!!\n\n");
325
+		}
326
+		free(name);
327
+	}
328
+
329
+	/* TODO: may need to seek forward 20 bytes here. Bleh! */
330
+
331
+	if (vba_readn(fd, &record_count, 2) != 2) {
332
+		return NULL;
333
+	}
334
+	cli_dbgmsg("\nVBA Record count: %d\n", record_count);
335
+	/*if (record_count <= 0) {
336
+		return TRUE;
337
+	}*/
338
+
339
+	lseek(fd, 2*record_count, SEEK_CUR);
340
+	lseek(fd, 4, SEEK_CUR);
341
+
342
+	/* Read fixed octet */
343
+	buff = cli_malloc(8);
344
+	if (!buff) {
345
+		return NULL;
346
+	}
347
+	if (vba_readn(fd, buff, 8) != 8) {
348
+		free(buff);
349
+		return NULL;
350
+	}
351
+	if (!strncmp(buff, fixed_octet, 8)) {
352
+		free(buff);
353
+		return NULL;
354
+	}
355
+	free(buff);
356
+	cli_dbgmsg("Read fixed octet ok\n");
357
+
358
+	/* junk some more stuff */
359
+	do {
360
+		if (vba_readn(fd, &ooff, 2) != 2) {
361
+			return NULL;
362
+		}
363
+	} while(ooff != 0xFFFF);
364
+	
365
+	if (vba_readn(fd, &ooff, 2) != 2) {
366
+		return NULL;
367
+	}
368
+
369
+	/* no idea what this stuff is */
370
+	if (ooff != 0xFFFF) {
371
+		lseek(fd, ooff, SEEK_CUR);
372
+	}
373
+	if (vba_readn(fd, &ooff, 2) != 2) {
374
+		return NULL;
375
+	}
376
+	if (ooff != 0xFFFF) {
377
+		lseek(fd, ooff, SEEK_CUR);
378
+	}
379
+	lseek(fd, 100, SEEK_CUR);
380
+
381
+	if (vba_readn(fd, &record_count, 2) != 2) {
382
+		return NULL;
383
+	}
384
+	cli_dbgmsg("\nVBA Record count: %d\n", record_count);
385
+	
386
+	vba_project = (vba_project_t *) cli_malloc(sizeof(struct vba_project_tag));
387
+	vba_project->name = (char **) cli_malloc(sizeof(char *) * record_count);
388
+	vba_project->dir = strdup(dir);
389
+	vba_project->offset = (uint32_t *) cli_malloc (sizeof(uint32_t) *
390
+					record_count);
391
+	vba_project->count = record_count;
392
+	for (i=0 ; i < record_count ; i++) {
393
+		if (vba_readn(fd, &length, 2) != 2) {
394
+			return NULL;
395
+		}
396
+		buff = cli_malloc(length);
397
+		if (!buff) {
398
+			cli_dbgmsg("cli_malloc failed\n");
399
+			return NULL;
400
+		}
401
+		if (vba_readn(fd, buff, length) != length) {
402
+			cli_dbgmsg("read name failed\n");
403
+			return NULL;
404
+		}
405
+		vba_project->name[i] = get_unicode_name(buff, length);
406
+		cli_dbgmsg("project name: %s, ", vba_project->name[i]);
407
+		free(buff);
408
+
409
+		/* some kind of string identifier ?? */
410
+		if (vba_readn(fd, &length, 2) != 2) {
411
+			return NULL;
412
+		}
413
+		lseek(fd, length, SEEK_CUR);
414
+
415
+		/* unknown stuff */
416
+		if (vba_readn(fd, &ooff, 2) != 2) {
417
+			return NULL;
418
+		}
419
+		if (ooff == 0xFFFF) {
420
+			lseek(fd, 2, SEEK_CUR);
421
+			if (vba_readn(fd, &ooff, 2) != 2) {
422
+				return NULL;
423
+			}
424
+			lseek(fd, ooff, SEEK_CUR);
425
+		} else {
426
+			lseek(fd, 2 + ooff, SEEK_CUR);
427
+		}
428
+
429
+		lseek(fd, 8, SEEK_CUR);
430
+		if (vba_readn(fd, &byte_count, 1) != 1) {
431
+			return NULL;
432
+		}
433
+		for (j=0 ; j<byte_count; j++) {
434
+			lseek(fd, 8, SEEK_CUR);
435
+		}
436
+		lseek(fd, 6, SEEK_CUR);
437
+		if (vba_readn(fd, &offset, 4) != 4) {
438
+			return NULL;
439
+		}
440
+		vba_project->offset[i] = offset;
441
+		cli_dbgmsg("offset:%d\n", offset);
442
+		lseek(fd, 2, SEEK_CUR);
443
+	}
444
+	
445
+	
446
+	{ /* There appears to be some code in here */
447
+	
448
+	off_t foffset;
449
+
450
+		foffset = lseek(fd, 0, SEEK_CUR);
451
+		cli_dbgmsg("\nOffset: 0x%x\n", (unsigned int)foffset);
452
+	}
453
+	close(fd);
454
+	return vba_project;
455
+}
456
+
457
+#define VBA_COMPRESSION_WINDOW 4096
458
+
459
+void byte_array_append(byte_array_t *array, unsigned char *src, unsigned int len)
460
+{
461
+	if (array->length == 0) {
462
+		array->data = cli_malloc(len);
463
+		array->length = len;
464
+		strncpy(array->data, src, len);
465
+	} else {
466
+		array->data = realloc(array->data, array->length+len);
467
+		strncpy(array->data+array->length, src, len);
468
+		array->length += len;
469
+	}
470
+}
471
+
472
+unsigned char *vba_decompress(int fd, uint32_t offset)
473
+{
474
+	unsigned int i, pos=0, shift, win_pos, clean=TRUE, mask, distance;
475
+	uint8_t flag;
476
+	uint16_t token, len;
477
+	unsigned char buffer[VBA_COMPRESSION_WINDOW];
478
+	byte_array_t result;
479
+	
480
+	result.length=0;
481
+	result.data=NULL;
482
+	
483
+	lseek(fd, offset+3, SEEK_SET); /* 1byte ?? , 2byte length ?? */ 
484
+	
485
+	while (vba_readn(fd, &flag, 1) == 1) {
486
+		for (mask = 1; mask < 0x100; mask<<=1) {
487
+			if (flag & mask) {
488
+				if (vba_readn(fd, &token, 2) != 2) {
489
+					return FALSE;
490
+				}
491
+				win_pos = pos % VBA_COMPRESSION_WINDOW;
492
+				if (win_pos <= 0x80) {
493
+					if (win_pos <= 0x20) {
494
+						shift = (win_pos <= 0x10) ? 12:11;
495
+					} else {
496
+						shift = (win_pos <= 0x40) ? 10:9;
497
+					}
498
+				} else {
499
+					if (win_pos <= 0x200) {
500
+						shift = (win_pos <= 0x100) ? 8:7;
501
+					} else if (win_pos <= 0x800) {
502
+						shift = (win_pos <= 0x400) ? 6:5;
503
+					} else {
504
+						shift = 4;
505
+					}
506
+				}
507
+				len = (token & ((1 << shift) -1)) + 3;
508
+				distance = token >> shift;
509
+				clean = TRUE;
510
+				
511
+				for (i=0 ; i < len; i++) {
512
+					unsigned int srcpos;
513
+					unsigned char c;
514
+					
515
+					srcpos = (pos - distance - 1) % VBA_COMPRESSION_WINDOW;
516
+					c = buffer[srcpos];
517
+					buffer[pos++ % VBA_COMPRESSION_WINDOW]= c;
518
+				}
519
+			} else {
520
+				if ((pos != 0) &&
521
+					((pos % VBA_COMPRESSION_WINDOW) == 0) && clean) {
522
+					
523
+					if (vba_readn(fd, &token, 2) != 2) {
524
+						return FALSE;
525
+					}
526
+					clean = FALSE;
527
+					byte_array_append(&result, buffer, VBA_COMPRESSION_WINDOW);
528
+					break;
529
+				}
530
+				if (vba_readn(fd, buffer+(pos%VBA_COMPRESSION_WINDOW), 1) == 1){
531
+					pos++;
532
+				}
533
+				clean = TRUE;
534
+			}
535
+		}
536
+	}
537
+			
538
+	if (pos % VBA_COMPRESSION_WINDOW) {
539
+		byte_array_append(&result, buffer, pos % VBA_COMPRESSION_WINDOW);
540
+	}
541
+	return result.data;
542
+
543
+}
544
+
545
+/*
546
+int vba_dump(vba_project_t *vba_project)
547
+{
548
+	int i, fd;
549
+	unsigned char *data;
550
+	char *fullname;
551
+
552
+	for (i=0 ; i<vba_project->count ; i++) {
553
+	
554
+		cli_dbgmsg("\n\n*****************************\n");
555
+		cli_dbgmsg("Deocding file: %s\n", vba_project->name[i]);
556
+		cli_dbgmsg("*****************************\n");
557
+		fullname = (char *) cli_malloc(strlen(vba_project->dir) + strlen(vba_project->name[i]) + 2);
558
+		sprintf(fullname, "%s/%s", vba_project->dir, vba_project->name[i]);
559
+		fd = open(fullname, O_RDONLY);
560
+		free(fullname);
561
+		if (fd == -1) {
562
+			cli_dbgmsg("Open failed\n");
563
+			return FALSE;
564
+		}
565
+		
566
+		data = vba_decompress(fd, vba_project->offset[i]);
567
+		cli_dbgmsg("%s\n", data);
568
+		close(fd);
569
+
570
+	}
571
+	return TRUE;
572
+}
573
+
574
+int main(int argc, char *argv[])
575
+{
576
+        int retval;
577
+	char *dirname=NULL;
578
+	vba_project_t *vba_project;
579
+	
580
+        while ((retval = getopt(argc, argv, "d:w")) != -1) {
581
+                switch (retval) {
582
+                        case 'd':
583
+                                dirname = optarg;
584
+                                break;
585
+                        case ':':
586
+                                cli_dbgmsg("missing option parameter\n");
587
+                                exit(-1);
588
+                        case '?':
589
+                                cli_dbgmsg("unknown option\n");
590
+                                break;
591
+                }
592
+        }
593
+ 
594
+	vba_project = vba56_dir_read(dirname);
595
+
596
+	if (vba_project != NULL) {
597
+		vba_dump(vba_project);
598
+	}
599
+	return TRUE;
600
+}
601
+*/
0 602
new file mode 100644
... ...
@@ -0,0 +1,38 @@
0
+/*
1
+ *  Extract VBA source code for component MS Office Documents)
2
+ *
3
+ *  Copyright (C) 2004 trog@uncon.org
4
+ *
5
+ *  This code is based on the OpenOffice and libgsf sources.
6
+ *                  
7
+ *  This program is free software; you can redistribute it and/or modify
8
+ *  it under the terms of the GNU General Public License as published by
9
+ *  the Free Software Foundation; either version 2 of the License, or
10
+ *  (at your option) any later version.
11
+ *
12
+ *  This program is distributed in the hope that it will be useful,
13
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
+ *  GNU General Public License for more details.
16
+ *
17
+ *  You should have received a copy of the GNU General Public License
18
+ *  along with this program; if not, write to the Free Software
19
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20
+ */
21
+
22
+#ifndef __VBA_EXTRACT_H
23
+#define __VBA_EXTRACT_H
24
+
25
+#include <unistd.h>
26
+#include <sys/types.h>
27
+#include <stdint.h>
28
+#include <stdlib.h>
29
+
30
+typedef struct vba_project_tag {
31
+	int count;
32
+	char **name;
33
+	uint32_t *offset;
34
+	char *dir;
35
+} vba_project_t;
36
+
37
+#endif