Browse code

More tidies

git-svn: trunk@3447

Nigel Horne authored on 2007/12/21 20:17:25
Showing 2 changed files
... ...
@@ -1,3 +1,7 @@
1
+Fri Dec 21 10:39:22 GMT 2007 (njh)
2
+----------------------------------
3
+  * libclamav/vba_extract.c:	More tidies
4
+
1 5
 Thu Dec 20 22:48:35 GMT 2007 (njh)
2 6
 ----------------------------------
3 7
   * clamav-milter:	Removed calls to cli_*msg()
... ...
@@ -1,10 +1,8 @@
1 1
 /*
2 2
  *  Extract VBA source code for component MS Office Documents
3 3
  *
4
- *  Copyright (C) 2004-2005 trog@uncon.org
5
- *
6
- *  This code is based on the OpenOffice and libgsf sources.
7
- *  Libgsf is available under the LGPL.
4
+ *  Copyright (C) 2007 SourceFIRE Inc.
5
+ *  Maintained by trog@uncon.org and njh@clamav.net
8 6
  *
9 7
  *  This program is free software; you can redistribute it and/or modify
10 8
  *  it under the terms of the GNU General Public License as published by
... ...
@@ -30,8 +28,6 @@
30 30
 #ifdef	HAVE_UNISTD_H
31 31
 #include <unistd.h>
32 32
 #endif
33
-#include <sys/types.h>
34
-#include <sys/stat.h>
35 33
 #include <fcntl.h>
36 34
 #include <stdlib.h>
37 35
 #include <ctype.h>
... ...
@@ -53,6 +49,7 @@
53 53
 #define PPT_LZW_BUFFSIZE 8192
54 54
 #define VBA_COMPRESSION_WINDOW 4096
55 55
 #define	MIDDLE_SIZE	20
56
+#define	MAX_VBA_COUNT	1000	/* If there's more than 1000 macros something's up! */
56 57
 
57 58
 #ifndef HAVE_ATTRIB_PACKED
58 59
 #define __attribute__(x)
... ...
@@ -64,34 +61,34 @@
64 64
 struct vba56_header {
65 65
 	unsigned char magic[2];
66 66
 	unsigned char version[4];
67
-	unsigned	char	ignore[28];
67
+	unsigned char ignore[28];
68 68
 };
69 69
 
70 70
 typedef struct {
71 71
 	unsigned char signature[4];
72 72
 	const char *name;
73
-	int is_mac;
73
+	int	big_endian;	/* e.g. MAC Office */
74 74
 } vba_version_t;
75 75
 
76 76
 static	int	skip_past_nul(int fd);
77
-static	int	read_uint16(int fd, uint16_t *u, int is_mac);
78
-static	int	read_uint32(int fd, uint32_t *u, int is_mac);
77
+static	int	read_uint16(int fd, uint16_t *u, int big_endian);
78
+static	int	read_uint32(int fd, uint32_t *u, int big_endian);
79 79
 static	int	seekandread(int fd, off_t offset, int whence, void *data, size_t len);
80 80
 static	vba_project_t	*create_vba_project(int record_count, const char *dir);
81 81
 static	uint32_t	sigtouint32(const unsigned char *fourbytes);
82 82
 
83
-static uint16_t vba_endian_convert_16(uint16_t value, int is_mac)
83
+static uint16_t vba_endian_convert_16(uint16_t value, int big_endian)
84 84
 {
85
-	if (is_mac)
85
+	if (big_endian)
86 86
 		return (uint16_t)be16_to_host(value);
87 87
 	else
88 88
 		return le16_to_host(value);
89 89
 }
90 90
 
91 91
 /* Seems to be a duplicate of riff_endian_convert_32() */
92
-static uint32_t vba_endian_convert_32(uint32_t value, int is_mac)
92
+static uint32_t vba_endian_convert_32(uint32_t value, int big_endian)
93 93
 {
94
-	if (is_mac)
94
+	if (big_endian)
95 95
 		return be32_to_host(value);
96 96
 	else
97 97
 		return le32_to_host(value);
... ...
@@ -117,7 +114,7 @@ static const vba_version_t vba_versions[] = {
117 117
 };
118 118
 
119 119
 static char *
120
-get_unicode_name(const char *name, int size, int is_mac)
120
+get_unicode_name(const char *name, int size, int big_endian)
121 121
 {
122 122
 	int i, increment;
123 123
 	char *newname, *ret;
... ...
@@ -129,12 +126,12 @@ get_unicode_name(const char *name, int size, int is_mac)
129 129
 	if(newname == NULL)
130 130
 		return NULL;
131 131
 
132
-	if((!is_mac) && (size & 0x1)) {
132
+	if((!big_endian) && (size & 0x1)) {
133 133
 		cli_dbgmsg("get_unicode_name: odd number of bytes %d\n", size);
134 134
 		--size;
135 135
 	}
136 136
 
137
-	increment = (is_mac) ? 1 : 2;
137
+	increment = (big_endian) ? 1 : 2;
138 138
 	ret = newname;
139 139
 
140 140
 	for(i = 0; i < size; i += increment) {
... ...
@@ -193,7 +190,7 @@ static void vba56_test_middle(int fd)
193 193
 }
194 194
 
195 195
 static int
196
-vba_read_project_strings(int fd, int is_mac)
196
+vba_read_project_strings(int fd, int big_endian)
197 197
 {
198 198
 	unsigned char *buf = NULL;
199 199
 	uint16_t buflen = 0;
... ...
@@ -203,7 +200,7 @@ vba_read_project_strings(int fd, int is_mac)
203 203
 		uint16_t length;
204 204
 		char *name;
205 205
 
206
-		if(!read_uint16(fd, &length, is_mac)) {
206
+		if(!read_uint16(fd, &length, big_endian)) {
207 207
 			if(buf)
208 208
 				free(buf);
209 209
 			return FALSE;
... ...
@@ -230,12 +227,12 @@ vba_read_project_strings(int fd, int is_mac)
230 230
 			lseek(fd, offset, SEEK_SET);
231 231
 			break;
232 232
 		}
233
-		name = get_unicode_name((const char *)buf, length, is_mac);
233
+		name = get_unicode_name((const char *)buf, length, big_endian);
234 234
 		cli_dbgmsg("length: %d, name: %s\n", length, (name) ? name : "[null]");
235 235
 
236 236
 		if((name == NULL) || (memcmp("*\\", name, 2) != 0) ||
237 237
 		   (strchr("GCHD", name[2]) == NULL)) {
238
-			/* Unknown type - probably ran out of strings - rewind */
238
+			/* Not a string */
239 239
 			lseek(fd, -(length+2), SEEK_CUR);
240 240
 			if(name)
241 241
 				free(name);
... ...
@@ -243,7 +240,7 @@ vba_read_project_strings(int fd, int is_mac)
243 243
 		}
244 244
 		free(name);
245 245
 
246
-		if(!read_uint16(fd, &length, is_mac)) {
246
+		if(!read_uint16(fd, &length, big_endian)) {
247 247
 			if(buf)
248 248
 				free(buf);
249 249
 			return FALSE;
... ...
@@ -269,7 +266,7 @@ cli_vba_readdir(const char *dir)
269 269
 	const unsigned char vba56_signature[] = { 0xcc, 0x61 };
270 270
 	uint16_t record_count, buflen, ffff, byte_count;
271 271
 	uint32_t offset, sig;
272
-	int i, fd, is_mac;
272
+	int i, fd, big_endian;
273 273
 	vba_project_t *vba_project;
274 274
 	const vba_version_t *v;
275 275
 	struct vba56_header v56h;
... ...
@@ -313,11 +310,11 @@ cli_vba_readdir(const char *dir)
313 313
 		switch(v56h.version[3]) {
314 314
 			case 0x01:
315 315
 				cli_warnmsg("Guessing little-endian\n");
316
-				is_mac = FALSE;
316
+				big_endian = FALSE;
317 317
 				break;
318 318
 			case 0x0E:
319 319
 				cli_warnmsg("Guessing big-endian\n");
320
-				is_mac = TRUE;
320
+				big_endian = TRUE;
321 321
 				break;
322 322
 			default:
323 323
 				cli_warnmsg("Unable to guess VBA type\n");
... ...
@@ -326,10 +323,10 @@ cli_vba_readdir(const char *dir)
326 326
 		}
327 327
 	} else {
328 328
 		cli_dbgmsg("VBA Project: %s\n", v->name);
329
-		is_mac = v->is_mac;
329
+		big_endian = v->big_endian;
330 330
 	}
331 331
 
332
-	if (!vba_read_project_strings(fd, is_mac)) {
332
+	if (!vba_read_project_strings(fd, big_endian)) {
333 333
 		close(fd);
334 334
 		return NULL;
335 335
 	}
... ...
@@ -350,7 +347,7 @@ cli_vba_readdir(const char *dir)
350 350
 	if (ffff != 0xFFFF)
351 351
 		lseek(fd, 1, SEEK_CUR);
352 352
 
353
-	if(!read_uint16(fd, &ffff, is_mac)) {
353
+	if(!read_uint16(fd, &ffff, big_endian)) {
354 354
 		close(fd);
355 355
 		return NULL;
356 356
 	}
... ...
@@ -358,7 +355,7 @@ cli_vba_readdir(const char *dir)
358 358
 	if(ffff != 0xFFFF)
359 359
 		lseek(fd, ffff, SEEK_CUR);
360 360
 
361
-	if(!read_uint16(fd, &ffff, is_mac)) {
361
+	if(!read_uint16(fd, &ffff, big_endian)) {
362 362
 		close(fd);
363 363
 		return NULL;
364 364
 	}
... ...
@@ -368,18 +365,19 @@ cli_vba_readdir(const char *dir)
368 368
 
369 369
 	lseek(fd, ffff + 100, SEEK_CUR);
370 370
 
371
-	if(!read_uint16(fd, &record_count, is_mac)) {
371
+	if(!read_uint16(fd, &record_count, big_endian)) {
372 372
 		close(fd);
373 373
 		return NULL;
374 374
 	}
375 375
 	cli_dbgmsg("VBA Record count: %d\n", record_count);
376 376
 	if (record_count == 0) {
377
+		/* No macros, assume clean */
377 378
 		close(fd);
378 379
 		return NULL;
379 380
 	}
380
-	if (record_count > 1000) {
381
+	if (record_count > MAX_VBA_COUNT) {
381 382
 		/* Almost certainly an error */
382
-		cli_dbgmsg("VBA Record count too big");
383
+		cli_dbgmsg("VBA Record count too big\n");
383 384
 		close(fd);
384 385
 		return NULL;
385 386
 	}
... ...
@@ -394,7 +392,7 @@ cli_vba_readdir(const char *dir)
394 394
 	for(i = 0; i < record_count; i++) {
395 395
 		uint16_t length;
396 396
 
397
-		if(!read_uint16(fd, &length, is_mac))
397
+		if(!read_uint16(fd, &length, big_endian))
398 398
 			break;
399 399
 
400 400
 		if (length == 0) {
... ...
@@ -412,7 +410,7 @@ cli_vba_readdir(const char *dir)
412 412
 			cli_dbgmsg("read name failed\n");
413 413
 			break;
414 414
 		}
415
-		vba_project->name[i] = get_unicode_name((const char *)buf, length, is_mac);
415
+		vba_project->name[i] = get_unicode_name((const char *)buf, length, big_endian);
416 416
 		if (!vba_project->name[i]) {
417 417
 			offset = lseek(fd, 0, SEEK_CUR);
418 418
 			vba_project->name[i] = (char *) cli_malloc(18);
... ...
@@ -422,21 +420,19 @@ cli_vba_readdir(const char *dir)
422 422
 		}
423 423
 		cli_dbgmsg("project name: %s\n", vba_project->name[i]);
424 424
 
425
-		/* some kind of string identifier ?? */
426
-		if(!read_uint16(fd, &length, is_mac)) {
425
+		if(!read_uint16(fd, &length, big_endian)) {
427 426
 			free(vba_project->name[i]);
428 427
 			break;
429 428
 		}
430 429
 		lseek(fd, length, SEEK_CUR);
431 430
 
432
-		/* unknown stuff */
433
-		if(!read_uint16(fd, &ffff, is_mac)) {
431
+		if(!read_uint16(fd, &ffff, big_endian)) {
434 432
 			free(vba_project->name[i]);
435 433
 			break;
436 434
 		}
437 435
 		if (ffff == 0xFFFF) {
438 436
 			lseek(fd, 2, SEEK_CUR);
439
-			if(!read_uint16(fd, &ffff, is_mac)) {
437
+			if(!read_uint16(fd, &ffff, big_endian)) {
440 438
 				free(vba_project->name[i]);
441 439
 				break;
442 440
 			}
... ...
@@ -444,12 +440,12 @@ cli_vba_readdir(const char *dir)
444 444
 		} else
445 445
 			lseek(fd, ffff + 10, SEEK_CUR);
446 446
 
447
-		if(!read_uint16(fd, &byte_count, is_mac)) {
447
+		if(!read_uint16(fd, &byte_count, big_endian)) {
448 448
 			free(vba_project->name[i]);
449 449
 			break;
450 450
 		}
451 451
 		lseek(fd, (8 * byte_count) + 5, SEEK_CUR);
452
-		if(!read_uint32(fd, &offset, is_mac)) {
452
+		if(!read_uint32(fd, &offset, big_endian)) {
453 453
 			free(vba_project->name[i]);
454 454
 			break;
455 455
 		}
... ...
@@ -464,7 +460,6 @@ cli_vba_readdir(const char *dir)
464 464
 	close(fd);
465 465
 
466 466
 	if(i < record_count) {
467
-		/* above loop failed */
468 467
 		while(--i >= 0)
469 468
 			free(vba_project->name[i]);
470 469
 
... ...
@@ -583,14 +578,13 @@ ole_copy_file_data(int s, int d, uint32_t len)
583 583
 	unsigned char data[FILEBUFF];
584 584
 
585 585
 	while(len > 0) {
586
-		unsigned int todo = MIN(sizeof(data), len);
587
-		unsigned int count = cli_readn(s, data, todo);
586
+		int todo = MIN(sizeof(data), len);
588 587
 
589
-		if(count != todo)
588
+		if(cli_readn(s, data, (unsigned int)todo) != todo)
590 589
 			break;
591
-		if((unsigned int)cli_writen(d, data, count) != count)
590
+		if(cli_writen(d, data, (unsigned int)todo) != todo)
592 591
 			break;
593
-		len -= count;
592
+		len -= todo;
594 593
 	}
595 594
 }
596 595
 
... ...
@@ -620,19 +614,19 @@ cli_decode_ole_object(int fd, const char *dir)
620 620
 			return -1;
621 621
 		}
622 622
 
623
-		/* Skip attachment name */
623
+		/* Attachment name */
624 624
 		if(!skip_past_nul(fd))
625 625
 			return -1;
626 626
 
627
-		/* Skip attachment full path */
627
+		/* Attachment full path */
628 628
 		if(!skip_past_nul(fd))
629 629
 			return -1;
630 630
 
631
-		/* Skip unknown data */
631
+		/* ??? */
632 632
 		if(lseek(fd, 8, SEEK_CUR) == -1)
633 633
 			return -1;
634 634
 
635
-		/* Skip attachment full path */
635
+		/* Attachment full path */
636 636
 		if(!skip_past_nul(fd))
637 637
 			return -1;
638 638
 
... ...
@@ -640,10 +634,14 @@ cli_decode_ole_object(int fd, const char *dir)
640 640
 			return -1;
641 641
 	}
642 642
 	snprintf(fullname, sizeof(fullname) - 1, "%s/_clam_ole_object", dir);
643
-	ofd = open(fullname, O_RDWR|O_CREAT|O_TRUNC|O_BINARY, 0600);
643
+	ofd = open(fullname, O_RDWR|O_CREAT|O_TRUNC|O_BINARY|O_EXCL,
644
+		S_IWUSR|S_IRUSR);
644 645
 	if (ofd < 0) {
646
+		cli_warnmsg("cli_decode_ole_object: can't create %s\n",
647
+			fullname);
645 648
 		return -1;
646 649
 	}
650
+
647 651
 	ole_copy_file_data(fd, ofd, object_size);
648 652
 	lseek(ofd, 0, SEEK_SET);
649 653
 	return ofd;
... ...
@@ -673,8 +671,8 @@ ppt_read_atom_header(int fd, atom_header_t *atom_header)
673 673
 		return FALSE;
674 674
 	}
675 675
 	v = vba_endian_convert_16(h.ver, FALSE);
676
-	cli_dbgmsg("\tversion: 0x%.2x\n", (uint8_t)(v & 0xF));
677
-	cli_dbgmsg("\tinstance: 0x%.2x\n", (uint16_t)(v >> 4));
676
+	cli_dbgmsg("\tversion: 0x%.2x\n", v & 0xF);
677
+	cli_dbgmsg("\tinstance: 0x%.2x\n", v >> 4);
678 678
 
679 679
 	atom_header->type = vba_endian_convert_16(h.type, FALSE);
680 680
 	cli_dbgmsg("\ttype: 0x%.4x\n", atom_header->type);
... ...
@@ -685,11 +683,13 @@ ppt_read_atom_header(int fd, atom_header_t *atom_header)
685 685
 }
686 686
 
687 687
 /*
688
- * TODO: combine shared code with flatedecode()
688
+ * TODO: combine shared code with flatedecode() or cli_unzip_single()
689
+ *	Needs cli_unzip_single to have a "length" argument
689 690
  */
690
-static int ppt_unlzw(const char *dir, int fd, uint32_t length)
691
+static int
692
+ppt_unlzw(const char *dir, int fd, uint32_t length)
691 693
 {
692
-	int ofd, retval;
694
+	int ofd;
693 695
 	uint32_t bufflen;
694 696
 	z_stream stream;
695 697
 	unsigned char inbuff[PPT_LZW_BUFFSIZE], outbuff[PPT_LZW_BUFFSIZE];
... ...
@@ -698,9 +698,10 @@ static int ppt_unlzw(const char *dir, int fd, uint32_t length)
698 698
 	snprintf(fullname, sizeof(fullname) - 1, "%s/ppt%.8lx.doc",
699 699
 		dir, (long)lseek(fd, 0L, SEEK_CUR));
700 700
 
701
-	ofd = open(fullname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0600);
701
+	ofd = open(fullname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY|O_EXCL,
702
+		S_IWUSR|S_IRUSR);
702 703
 	if (ofd == -1) {
703
-		cli_dbgmsg("ppt_unlzw Open outfile failed\n");
704
+		cli_warnmsg("ppt_unlzw: can't create %s\n", fullname);
704 705
 		return FALSE;
705 706
 	}
706 707
 
... ...
@@ -719,9 +720,11 @@ static int ppt_unlzw(const char *dir, int fd, uint32_t length)
719 719
 	}
720 720
 	length -= stream.avail_in;
721 721
 
722
-	retval = inflateInit(&stream);
723
-	if (retval != Z_OK) {
724
-		cli_dbgmsg("ppt_unlzw !Z_OK: %d\n", retval);
722
+	if(inflateInit(&stream) != Z_OK) {
723
+		close(ofd);
724
+		unlink(fullname);
725
+		cli_warnmsg("ppt_unlzw: inflateInit failed\n");
726
+		return FALSE;
725 727
 	}
726 728
 
727 729
 	bufflen = stream.avail_in;
... ...
@@ -768,15 +771,15 @@ ppt_stream_iter(int fd, const char *dir)
768 768
 			return NULL;
769 769
 
770 770
 		if(atom_header.type == 0x1011) {
771
-			uint32_t ole_id, length;
771
+			uint32_t length;
772 772
 
773
-			if(!read_uint32(fd, &ole_id, FALSE)) {
774
-				cli_dbgmsg("read ole_id failed\n");
773
+			/* Skip over ID */
774
+			if(lseek(fd, sizeof(uint32_t), SEEK_CUR) == -1) {
775
+				cli_dbgmsg("ppt_stream_iter: seek failed\n");
775 776
 				return NULL;
776 777
 			}
777 778
 			length = atom_header.length - 4;
778
-			cli_dbgmsg("OleID: %d, length: %d\n",
779
-					(int)ole_id, (int)length);
779
+			cli_dbgmsg("length: %d\n", (int)length);
780 780
 			if (!ppt_unlzw(dir, fd, length)) {
781 781
 				cli_dbgmsg("ppt_unlzw failed\n");
782 782
 				return NULL;
... ...
@@ -1043,8 +1046,8 @@ word_skip_macro_extnames(int fd)
1043 1043
 		else
1044 1044
 			offset = (off_t)length;
1045 1045
 
1046
-		offset += sizeof(uint16_t);	/* numref */
1047
-		if(lseek(fd, offset, SEEK_CUR) == -1) {
1046
+		/* ignore numref as well */
1047
+		if(lseek(fd, offset + sizeof(uint16_t), SEEK_CUR) == -1) {
1048 1048
 			cli_dbgmsg("read macro_extnames failed to seek\n");
1049 1049
 			return FALSE;
1050 1050
 		}
... ...
@@ -1062,7 +1065,7 @@ word_skip_macro_intnames(int fd)
1062 1062
 		cli_dbgmsg("read macro_intnames failed\n");
1063 1063
 		return FALSE;
1064 1064
 	}
1065
-	cli_dbgmsg("int names count: %u\n", (unsigned int)count);
1065
+	cli_dbgmsg("intnames count: %u\n", (unsigned int)count);
1066 1066
 
1067 1067
 	while(count-- > 0) {
1068 1068
 		uint8_t length;
... ...
@@ -1127,7 +1130,7 @@ cli_wm_readdir(const char *dir)
1127 1127
 	done = FALSE;
1128 1128
 	memset(&macro_info, '\0', sizeof(macro_info));
1129 1129
 
1130
-	while ((lseek(fd, 0, SEEK_CUR) < end_offset) && !done) {
1130
+	while((lseek(fd, 0, SEEK_CUR) < end_offset) && !done) {
1131 1131
 		if (cli_readn(fd, &info_id, 1) != 1) {
1132 1132
 			cli_dbgmsg("read macro_info failed\n");
1133 1133
 			break;
... ...
@@ -1155,14 +1158,8 @@ cli_wm_readdir(const char *dir)
1155 1155
 				if(!word_skip_macro_intnames(fd))
1156 1156
 					done = TRUE;
1157 1157
 				break;
1158
-			case 0x12:
1159
-				/* No sure about these, always seems to
1160
-				come after the macros though, so finish
1161
-				*/
1162
-				done = TRUE;
1163
-				break;
1164
-			case 0x40:
1165
-				/* end marker */
1158
+			case 0x40:	/* end marker */
1159
+			case 0x12:	/* ??? */
1166 1160
 				done = TRUE;
1167 1161
 				break;
1168 1162
 			default:
... ...
@@ -1260,12 +1257,12 @@ skip_past_nul(int fd)
1260 1260
  * Read 2 bytes as a 16-bit number, host byte order. Return success or fail
1261 1261
  */
1262 1262
 static int
1263
-read_uint16(int fd, uint16_t *u, int is_mac)
1263
+read_uint16(int fd, uint16_t *u, int big_endian)
1264 1264
 {
1265 1265
 	if(cli_readn(fd, u, sizeof(uint16_t)) != sizeof(uint16_t))
1266 1266
 		return FALSE;
1267 1267
 
1268
-	*u = vba_endian_convert_16(*u, is_mac);
1268
+	*u = vba_endian_convert_16(*u, big_endian);
1269 1269
 
1270 1270
 	return TRUE;
1271 1271
 }
... ...
@@ -1274,12 +1271,12 @@ read_uint16(int fd, uint16_t *u, int is_mac)
1274 1274
  * Read 4 bytes as a 32-bit number, host byte order. Return success or fail
1275 1275
  */
1276 1276
 static int
1277
-read_uint32(int fd, uint32_t *u, int is_mac)
1277
+read_uint32(int fd, uint32_t *u, int big_endian)
1278 1278
 {
1279 1279
 	if(cli_readn(fd, u, sizeof(uint32_t)) != sizeof(uint32_t))
1280 1280
 		return FALSE;
1281 1281
 
1282
-	*u = vba_endian_convert_32(*u, is_mac);
1282
+	*u = vba_endian_convert_32(*u, big_endian);
1283 1283
 
1284 1284
 	return TRUE;
1285 1285
 }