Browse code

add support for old fashioned tar archives

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@1421 77e5149b-7576-45b1-b177-96237e5ba77b

Tomasz Kojm authored on 2005/03/23 06:26:27
Showing 10 changed files
... ...
@@ -1,3 +1,7 @@
1
+Tue Mar 22 22:22:30 CET 2005 (tk)
2
+---------------------------------
3
+  * libclamav: add support for old fashioned tar archives
4
+
1 5
 Tue Mar 22 11:27:58 GMT 2005 (njh)
2 6
 ----------------------------------
3 7
   * libclamav/mbox.c:	Not all Worm.Bagle.AC were being caught
... ...
@@ -120,6 +120,8 @@ libclamav_la_SOURCES = \
120 120
 	special.c \
121 121
 	special.h \
122 122
 	binhex.c \
123
-	binhex.h
123
+	binhex.h \
124
+	is_tar.c \
125
+	is_tar.h
124 126
 
125 127
 lib_LTLIBRARIES = libclamav.la
... ...
@@ -80,7 +80,7 @@ am_libclamav_la_OBJECTS = matcher-ac.lo matcher-bm.lo matcher.lo \
80 80
 	text.lo ole2_extract.lo vba_extract.lo msexpand.lo pe.lo \
81 81
 	cabd.lo lzxd.lo mszipd.lo qtmd.lo system.lo upx.lo htmlnorm.lo \
82 82
 	chmunpack.lo rebuildpe.lo petite.lo fsg.lo line.lo untar.lo \
83
-	special.lo binhex.lo
83
+	special.lo binhex.lo is_tar.lo
84 84
 libclamav_la_OBJECTS = $(am_libclamav_la_OBJECTS)
85 85
 DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)
86 86
 depcomp = $(SHELL) $(top_srcdir)/depcomp
... ...
@@ -89,8 +89,9 @@ am__depfiles_maybe = depfiles
89 89
 @AMDEP_TRUE@	./$(DEPDIR)/cabd.Plo ./$(DEPDIR)/chmunpack.Plo \
90 90
 @AMDEP_TRUE@	./$(DEPDIR)/cvd.Plo ./$(DEPDIR)/dsig.Plo \
91 91
 @AMDEP_TRUE@	./$(DEPDIR)/filetypes.Plo ./$(DEPDIR)/fsg.Plo \
92
-@AMDEP_TRUE@	./$(DEPDIR)/htmlnorm.Plo ./$(DEPDIR)/line.Plo \
93
-@AMDEP_TRUE@	./$(DEPDIR)/lzxd.Plo ./$(DEPDIR)/matcher-ac.Plo \
92
+@AMDEP_TRUE@	./$(DEPDIR)/htmlnorm.Plo ./$(DEPDIR)/is_tar.Plo \
93
+@AMDEP_TRUE@	./$(DEPDIR)/line.Plo ./$(DEPDIR)/lzxd.Plo \
94
+@AMDEP_TRUE@	./$(DEPDIR)/matcher-ac.Plo \
94 95
 @AMDEP_TRUE@	./$(DEPDIR)/matcher-bm.Plo ./$(DEPDIR)/matcher.Plo \
95 96
 @AMDEP_TRUE@	./$(DEPDIR)/mbox.Plo ./$(DEPDIR)/md5.Plo \
96 97
 @AMDEP_TRUE@	./$(DEPDIR)/message.Plo ./$(DEPDIR)/msexpand.Plo \
... ...
@@ -331,7 +332,9 @@ libclamav_la_SOURCES = \
331 331
 	special.c \
332 332
 	special.h \
333 333
 	binhex.c \
334
-	binhex.h
334
+	binhex.h \
335
+	is_tar.c \
336
+	is_tar.h
335 337
 
336 338
 lib_LTLIBRARIES = libclamav.la
337 339
 all: all-am
... ...
@@ -412,6 +415,7 @@ distclean-compile:
412 412
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/filetypes.Plo@am__quote@
413 413
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fsg.Plo@am__quote@
414 414
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htmlnorm.Plo@am__quote@
415
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/is_tar.Plo@am__quote@
415 416
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/line.Plo@am__quote@
416 417
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lzxd.Plo@am__quote@
417 418
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matcher-ac.Plo@am__quote@
... ...
@@ -61,7 +61,6 @@ static const struct cli_magic_s cli_magic[] = {
61 61
     {0,	    "SZDD",			4,  "compress.exe'd",	CL_TYPE_MSSZDD},
62 62
     {0,	    "MSCF",			4,  "MS CAB",		CL_TYPE_MSCAB},
63 63
     {0,	    "ITSF",			4,  "MS CHM",           CL_TYPE_MSCHM},
64
-    {257,   "ustar",			5,  "POSIX tar",	CL_TYPE_TAR},
65 64
     {0,     "#@~^",			4,  "SCRENC",		CL_TYPE_SCRENC},
66 65
     {0,     "(This file must be converted with BinHex 4.0)",
67 66
 				       45, "BinHex",		CL_TYPE_BINHEX},
... ...
@@ -197,6 +196,48 @@ cli_file_t cli_filetype(const char *buf, size_t buflen)
197 197
     return ascii ? CL_TYPE_UNKNOWN_TEXT : CL_TYPE_UNKNOWN_DATA;
198 198
 }
199 199
 
200
+int is_tar(unsigned char *buf, int nbytes);
201
+
202
+cli_file_t cli_filetype2(int desc)
203
+{
204
+	char smallbuff[MAGIC_BUFFER_SIZE + 1];
205
+	unsigned char *bigbuff;
206
+	int bread;
207
+	cli_file_t ret = CL_TYPE_UNKNOWN_DATA;
208
+
209
+
210
+    memset(smallbuff, 0, sizeof(smallbuff));
211
+    if((bread = read(desc, smallbuff, MAGIC_BUFFER_SIZE)) > 0)
212
+	ret = cli_filetype(smallbuff, bread);
213
+
214
+    if(ret == CL_TYPE_UNKNOWN_DATA || ret == CL_TYPE_UNKNOWN_TEXT) {
215
+
216
+	if(!(bigbuff = (unsigned char *) cli_calloc(16384 + 1, sizeof(unsigned char))))
217
+	    return ret;
218
+
219
+	lseek(desc, 0, SEEK_SET);
220
+	if((bread = read(desc, bigbuff, 16384)) > 0) {
221
+
222
+	    bigbuff[bread] = 0;
223
+
224
+	    switch(is_tar(bigbuff, bread)) {
225
+		case 1:
226
+		    ret = CL_TYPE_OLD_TAR;
227
+		    cli_dbgmsg("Recognized old fashioned tar file\n");
228
+		    break;
229
+		case 2:
230
+		    ret = CL_TYPE_POSIX_TAR;
231
+		    cli_dbgmsg("Recognized POSIX tar file\n");
232
+		    break;
233
+	    }
234
+	}
235
+
236
+	free(bigbuff);
237
+    }
238
+
239
+    return ret;
240
+}
241
+
200 242
 int cli_addtypesigs(struct cl_node *root)
201 243
 {
202 244
 	int i, ret;
... ...
@@ -20,7 +20,7 @@
20 20
 #ifndef __FILETYPES_H
21 21
 #define __FILETYPES_H
22 22
 
23
-#define MAGIC_BUFFER_SIZE 262
23
+#define MAGIC_BUFFER_SIZE 50
24 24
 #define CL_TYPENO 500
25 25
 
26 26
 typedef enum {
... ...
@@ -28,7 +28,8 @@ typedef enum {
28 28
     CL_TYPE_UNKNOWN_DATA,
29 29
     CL_TYPE_MSEXE,
30 30
     CL_TYPE_DATA,
31
-    CL_TYPE_TAR,
31
+    CL_TYPE_POSIX_TAR,
32
+    CL_TYPE_OLD_TAR,
32 33
     CL_TYPE_GZ,
33 34
     CL_TYPE_ZIP,
34 35
     CL_TYPE_BZ,
... ...
@@ -49,6 +50,7 @@ typedef enum {
49 49
 } cli_file_t;
50 50
 
51 51
 cli_file_t cli_filetype(const char *buf, size_t buflen);
52
+cli_file_t cli_filetype2(int desc);
52 53
 int cli_addtypesigs(struct cl_node *root);
53 54
 
54 55
 #endif
55 56
new file mode 100644
... ...
@@ -0,0 +1,97 @@
0
+/*
1
+ * is_tar() -- figure out whether file is a tar archive.
2
+ *
3
+ * Stolen (by the author of the file utility!) from the public domain tar program:
4
+ * Public Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu).
5
+ *
6
+ * @(#)list.c 1.18 9/23/86 Public Domain - gnu
7
+ * $Id: is_tar.c,v 1.1 2005/03/22 21:26:25 kojm Exp $
8
+ *
9
+ * Comments changed and some code/comments reformatted
10
+ * for file command by Ian Darwin.
11
+ */
12
+
13
+#if HAVE_CONFIG_H
14
+#include "clamav-config.h"
15
+#endif
16
+
17
+#include <string.h>
18
+#include <ctype.h>
19
+#include <sys/types.h>
20
+#include "is_tar.h"
21
+
22
+#include "others.h"
23
+
24
+#define	isodigit(c)	( ((c) >= '0') && ((c) <= '7') )
25
+
26
+static int from_oct(int digs, char *where);
27
+
28
+/*
29
+ * Return 
30
+ *	0 if the checksum is bad (i.e., probably not a tar archive), 
31
+ *	1 for old UNIX tar file,
32
+ *	2 for Unix Std (POSIX) tar file.
33
+ */
34
+int is_tar(unsigned char *buf, int nbytes)
35
+{
36
+	union record *header = (union record *)buf;
37
+	int	i;
38
+	int	sum, recsum;
39
+	char	*p;
40
+
41
+
42
+	if (nbytes < sizeof(union record))
43
+		return 0;
44
+
45
+	recsum = from_oct(8,  header->header.chksum);
46
+
47
+	sum = 0;
48
+	p = header->charptr;
49
+	for (i = sizeof(union record); --i >= 0;) {
50
+		/*
51
+		 * We can't use unsigned char here because of old compilers,
52
+		 * e.g. V7.
53
+		 */
54
+		sum += 0xFF & *p++;
55
+	}
56
+
57
+	/* Adjust checksum to count the "chksum" field as blanks. */
58
+	for (i = sizeof(header->header.chksum); --i >= 0;)
59
+		sum -= 0xFF & header->header.chksum[i];
60
+	sum += ' '* sizeof header->header.chksum;	
61
+
62
+	if (sum != recsum)
63
+		return 0;	/* Not a tar archive */
64
+
65
+	if (0==strcmp(header->header.magic, TMAGIC))
66
+		return 2;		/* Unix Standard tar archive */
67
+
68
+	return 1;			/* Old fashioned tar archive */
69
+}
70
+
71
+
72
+/*
73
+ * Quick and dirty octal conversion.
74
+ *
75
+ * Result is -1 if the field is invalid (all blank, or nonoctal).
76
+ */
77
+static int from_oct(int digs, char *where)
78
+{
79
+	int value;
80
+
81
+	while (isspace((unsigned char)*where)) {		/* Skip spaces */
82
+		where++;
83
+		if (--digs <= 0)
84
+			return -1;		/* All blank field */
85
+	}
86
+	value = 0;
87
+	while (digs > 0 && isodigit(*where)) {	/* Scan til nonoctal */
88
+		value = (value << 3) | (*where++ - '0');
89
+		--digs;
90
+	}
91
+
92
+	if (digs > 0 && *where && !isspace((unsigned char)*where))
93
+		return -1;			/* Ended on non-space/nul */
94
+
95
+	return value;
96
+}
0 97
new file mode 100644
... ...
@@ -0,0 +1,45 @@
0
+/*
1
+ * Header file for public domain tar (tape archive) program.
2
+ *
3
+ * @(#)tar.h 1.20 86/10/29	Public Domain.
4
+ *
5
+ * Created 25 August 1985 by John Gilmore, ihnp4!hoptoad!gnu.
6
+ *
7
+ * $Id: is_tar.h,v 1.1 2005/03/22 21:26:25 kojm Exp $ # checkin only
8
+ */
9
+
10
+/*
11
+ * Header block on tape.
12
+ *
13
+ * I'm going to use traditional DP naming conventions here.
14
+ * A "block" is a big chunk of stuff that we do I/O on.
15
+ * A "record" is a piece of info that we care about.
16
+ * Typically many "record"s fit into a "block".
17
+ */
18
+#define	RECORDSIZE	512
19
+#define	NAMSIZ	100
20
+#define	TUNMLEN	32
21
+#define	TGNMLEN	32
22
+
23
+union record {
24
+	char		charptr[RECORDSIZE];
25
+	struct header {
26
+		char	name[NAMSIZ];
27
+		char	mode[8];
28
+		char	uid[8];
29
+		char	gid[8];
30
+		char	size[12];
31
+		char	mtime[12];
32
+		char	chksum[8];
33
+		char	linkflag;
34
+		char	linkname[NAMSIZ];
35
+		char	magic[8];
36
+		char	uname[TUNMLEN];
37
+		char	gname[TGNMLEN];
38
+		char	devmajor[8];
39
+		char	devminor[8];
40
+	} header;
41
+};
42
+
43
+/* The magic field is filled with this if uname and gname are valid. */
44
+#define	TMAGIC		"ustar  "	/* 7 chars and a null */
... ...
@@ -23,9 +23,9 @@
23 23
 #include <stdio.h>
24 24
 #include <string.h>
25 25
 #include <stdlib.h>
26
-#include <unistd.h>
27 26
 #include <sys/types.h>
28 27
 #include <sys/stat.h>
28
+#include <unistd.h>
29 29
 #include <sys/param.h>
30 30
 #include <fcntl.h>
31 31
 #include <dirent.h>
... ...
@@ -1126,7 +1126,7 @@ static int cli_scanole2(int desc, const char **virname, long int *scanned, const
1126 1126
     return ret;
1127 1127
 }
1128 1128
 
1129
-static int cli_scantar(int desc, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, unsigned int options, unsigned int arec, unsigned int mrec)
1129
+static int cli_scantar(int desc, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, unsigned int options, unsigned int arec, unsigned int mrec, unsigned int posix)
1130 1130
 {
1131 1131
 	char *dir;
1132 1132
 	int ret = CL_CLEAN;
... ...
@@ -1141,7 +1141,7 @@ static int cli_scantar(int desc, const char **virname, long int *scanned, const
1141 1141
 	return CL_ETMPDIR;
1142 1142
     }
1143 1143
 
1144
-    if((ret = cli_untar(dir, desc)))
1144
+    if((ret = cli_untar(dir, desc, posix)))
1145 1145
 	cli_dbgmsg("Tar: %s\n", cl_strerror(ret));
1146 1146
     else
1147 1147
 	ret = cli_scandir(dir, virname, scanned, root, limits, options, arec, mrec);
... ...
@@ -1289,11 +1289,21 @@ static int cli_scanmail(int desc, const char **virname, long int *scanned, const
1289 1289
 
1290 1290
 int cli_magic_scandesc(int desc, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, unsigned int options, unsigned int arec, unsigned int mrec)
1291 1291
 {
1292
-	char magic[MAGIC_BUFFER_SIZE + 1];
1293 1292
 	int ret = CL_CLEAN, nret;
1294 1293
 	int bread = 0;
1295 1294
 	cli_file_t type;
1295
+	struct stat sb;
1296
+
1297
+
1298
+    if(fstat(desc, &sb) == -1) {
1299
+	cli_errmsg("Can's fstat descriptor %d\n", desc);
1300
+	return CL_EIO;
1301
+    }
1296 1302
 
1303
+    if(sb.st_size <= 5) {
1304
+	cli_dbgmsg("Small data (%d bytes)\n", sb.st_size);
1305
+	return CL_CLEAN;
1306
+    }
1297 1307
 
1298 1308
     if(!root) {
1299 1309
 	cli_errmsg("CRITICAL: root == NULL\n");
... ...
@@ -1325,18 +1335,8 @@ int cli_magic_scandesc(int desc, const char **virname, long int *scanned, const
1325 1325
 	}
1326 1326
 
1327 1327
     lseek(desc, 0, SEEK_SET);
1328
-    memset(magic, 0, sizeof(magic));
1329
-
1330
-    if((bread = read(desc, magic, MAGIC_BUFFER_SIZE)) == -1) {
1331
-	cli_dbgmsg("Can't read from descriptor %d\n");
1332
-	return CL_EIO;
1333
-    } else if(bread < 2) {
1334
-	/* short read - no need to do magic */
1335
-	return CL_CLEAN;
1336
-    }
1337
-
1328
+    type = cli_filetype2(desc);
1338 1329
     lseek(desc, 0, SEEK_SET);
1339
-    type = cli_filetype(magic, bread);
1340 1330
 
1341 1331
     type == CL_TYPE_MAIL ? mrec++ : arec++;
1342 1332
 
... ...
@@ -1388,9 +1388,14 @@ int cli_magic_scandesc(int desc, const char **virname, long int *scanned, const
1388 1388
 		ret = cli_scanole2(desc, virname, scanned, root, limits, options, arec, mrec);
1389 1389
 	    break;
1390 1390
 
1391
-	case CL_TYPE_TAR:
1391
+	case CL_TYPE_POSIX_TAR:
1392
+	    if(SCAN_ARCHIVE)
1393
+		ret = cli_scantar(desc, virname, scanned, root, limits, options, arec, mrec, 1);
1394
+	    break;
1395
+
1396
+	case CL_TYPE_OLD_TAR:
1392 1397
 	    if(SCAN_ARCHIVE)
1393
-		ret = cli_scantar(desc, virname, scanned, root, limits, options, arec, mrec);
1398
+		ret = cli_scantar(desc, virname, scanned, root, limits, options, arec, mrec, 0);
1394 1399
 	    break;
1395 1400
 
1396 1401
 	case CL_TYPE_BINHEX:
... ...
@@ -21,6 +21,9 @@
21 21
  *
22 22
  * Change History:
23 23
  * $Log: untar.c,v $
24
+ * Revision 1.25  2005/03/22 21:26:27  kojm
25
+ * add support for old fashioned tar archives
26
+ *
24 27
  * Revision 1.24  2005/03/20 18:34:18  nigelhorne
25 28
  * Minor tidy
26 29
  *
... ...
@@ -94,7 +97,7 @@
94 94
  * First draft
95 95
  *
96 96
  */
97
-static	char	const	rcsid[] = "$Id: untar.c,v 1.24 2005/03/20 18:34:18 nigelhorne Exp $";
97
+static	char	const	rcsid[] = "$Id: untar.c,v 1.25 2005/03/22 21:26:27 kojm Exp $";
98 98
 
99 99
 #include <stdio.h>
100 100
 #include <errno.h>
... ...
@@ -126,7 +129,7 @@ octal(const char *str)
126 126
 }
127 127
 
128 128
 int
129
-cli_untar(const char *dir, int desc)
129
+cli_untar(const char *dir, int desc, unsigned int posix)
130 130
 {
131 131
 	int size = 0;
132 132
 	int in_block = 0;
... ...
@@ -169,11 +172,13 @@ cli_untar(const char *dir, int desc)
169 169
 				break;
170 170
 
171 171
 			/* Notice assumption that BLOCKSIZE > 262 */
172
-			strncpy(magic, block+257, 5);
173
-			magic[5] = '\0';
174
-			if(strcmp(magic, "ustar") != 0) {
175
-				cli_dbgmsg("Incorrect magic string '%s' in tar header\n", magic);
176
-				return CL_EFORMAT;
172
+			if(posix) {
173
+				strncpy(magic, block+257, 5);
174
+				magic[5] = '\0';
175
+				if(strcmp(magic, "ustar") != 0) {
176
+					cli_dbgmsg("Incorrect magic string '%s' in tar header\n", magic);
177
+					return CL_EFORMAT;
178
+				}
177 179
 			}
178 180
 
179 181
 			type = block[156];
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: untar.h,v $
20
+ * Revision 1.3  2005/03/22 21:26:27  kojm
21
+ * add support for old fashioned tar archives
22
+ *
20 23
  * Revision 1.2  2004/09/05 18:58:22  nigelhorne
21 24
  * Extract files completed
22 25
  *
... ...
@@ -24,4 +27,4 @@
24 24
  * First draft
25 25
  *
26 26
  */
27
-int	cli_untar(const char *dir, int desc);
27
+int cli_untar(const char *dir, int desc, unsigned int posix);