Browse code

Add file type recognition for Office Open XML.

Steve Morgan authored on 2013/10/18 05:54:21
Showing 4 changed files
... ...
@@ -1,3 +1,8 @@
1
+
2
+Thu Oct 17 16:50:00 2013 EDT 2013 (morgan)
3
+------------------------------------
4
+ * libclamav: bb#6348 - Add file type recognition for Office Open XML.
5
+
1 6
 Tue Oct 15 16:03:25 2013 EDT 2013 (dar)
2 7
 ------------------------------------
3 8
  * libclamav: bb #9154 - ELF handling re-write
... ...
@@ -6,10 +11,18 @@ Tue Oct 15 16:00:14 2013 EDT 2013 (dar)
6 6
 ------------------------------------
7 7
  * libclamav: bb #8696 - Bug reported by NIW Solutions
8 8
 
9
+Mon Oct 14 17:07:40 2013 EDT 2013 (morgan)
10
+------------------------------------
11
+ * bb #8209 - move perror()'s so they will work. Patches supplied by Bill Parker.
12
+
9 13
 Fri Oct 11 16:50:34 2013 EDT 2013 (dar)
10 14
 ------------------------------------
11 15
  * bb #9072: clamscan message separator fix
12 16
 
17
+Tue Oct 8 17:17:44 EDT 2013 (morgan)
18
+------------------------------------
19
+ * libclamav: Add support for XZ file type recognition and decompression.
20
+
13 21
 Thu Oct 3 10:49:11 2013 EDT 2013 (dar)
14 22
 ------------------------------------
15 23
  * bb #1570: Support ADC compression in DMG
... ...
@@ -1,5 +1,5 @@
1 1
 /*
2
- *  Copyright (C) 2007-2008 Sourcefire, Inc.
2
+ *  Copyright (C) 2007-2013 Sourcefire, Inc.
3 3
  *
4 4
  *  Authors: Tomasz Kojm
5 5
  *
... ...
@@ -43,6 +43,8 @@
43 43
 #include "htmlnorm.h"
44 44
 #include "entconv.h"
45 45
 #include "mpool.h"
46
+#define UNZIP_PRIVATE
47
+#include "unzip.h"
46 48
 
47 49
 static const struct ftmap_s {
48 50
     const char *name;
... ...
@@ -106,6 +108,9 @@ static const struct ftmap_s {
106 106
     { "CL_TYPE_PART_ANY",	CL_TYPE_PART_ANY	},
107 107
     { "CL_TYPE_PART_HFSPLUS",	CL_TYPE_PART_HFSPLUS	},
108 108
     { "CL_TYPE_XZ",     	CL_TYPE_XZ      	},
109
+    { "CL_TYPE_OOXML_WORD",	CL_TYPE_OOXML_WORD     	},
110
+    { "CL_TYPE_OOXML_PPT",	CL_TYPE_OOXML_PPT     	},
111
+    { "CL_TYPE_OOXML_XL",	CL_TYPE_OOXML_XL     	},
109 112
     { NULL,			CL_TYPE_IGNORED		}
110 113
 };
111 114
 
... ...
@@ -244,7 +249,40 @@ cli_file_t cli_filetype2(fmap_t *map, const struct cl_engine *engine, cli_file_t
244 244
 		    cli_dbgmsg("Recognized POSIX tar file\n");
245 245
 		    return CL_TYPE_POSIX_TAR;
246 246
 	    }
247
-	}
247
+	} else if (ret == CL_TYPE_ZIP) {
248
+            const char lhdr_magic[4] = {0x50,0x4b,0x03,0x04};
249
+            const unsigned char * zbuff;
250
+            long zread, zoff = SIZEOF_LH;
251
+            int lhc = 1;
252
+
253
+            zbuff = buff + SIZEOF_LH;
254
+            zread = bread - SIZEOF_LH;
255
+
256
+            do {                
257
+                long zc = zread;
258
+                const char * zlh;
259
+                const unsigned char * zp = zbuff;
260
+               
261
+                while (zc > SIZEOF_LH && (zlh = cli_memstr(zp, zc, lhdr_magic, 4))) {
262
+                    lhc++;
263
+                    zp = zlh + SIZEOF_LH;
264
+                    zc = zp - zbuff;
265
+                    if (0 == memcmp(zp, "word/", 5)) {
266
+                        cli_dbgmsg("Recognized OOXML Word file\n");
267
+                        return CL_TYPE_OOXML_WORD;
268
+                    } else if (0 == memcmp(zp, "ppt/", 4)) {
269
+                        cli_dbgmsg("Recognized OOXML PPT file\n");
270
+                        return CL_TYPE_OOXML_PPT;                        
271
+                    } else if (0 == memcmp(zp, "xl/", 3)) {
272
+                        cli_dbgmsg("Recognized OOXML XL file\n");
273
+                        return CL_TYPE_OOXML_XL;
274
+                    }
275
+                    //cli_dbgmsg("zip file name:%.10s\n", zp); 
276
+                }
277
+                zoff += zread - SIZEOF_LH;
278
+                zread = MIN(MAGIC_BUFFER_SIZE, map->len-zoff);
279
+            } while (lhc < 3 && (zbuff = fmap_need_off_once(map, zoff, zread)));
280
+        }
248 281
     }
249 282
 
250 283
     if(ret >= CL_TYPE_TEXT_ASCII && ret <= CL_TYPE_BINARY_DATA) {
... ...
@@ -80,6 +80,9 @@ typedef enum {
80 80
     CL_TYPE_JAVA,
81 81
     CL_TYPE_XAR,
82 82
     CL_TYPE_XZ,
83
+    CL_TYPE_OOXML_WORD,
84
+    CL_TYPE_OOXML_PPT,
85
+    CL_TYPE_OOXML_XL,
83 86
 
84 87
     /* Section for partition types */
85 88
     CL_TYPE_PART_ANY, /* unknown partition type */
... ...
@@ -2540,6 +2540,9 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
2540 2540
 	    }
2541 2541
 	    break;
2542 2542
 
2543
+        case CL_TYPE_OOXML_WORD:
2544
+        case CL_TYPE_OOXML_PPT:
2545
+        case CL_TYPE_OOXML_XL:
2543 2546
 	case CL_TYPE_ZIP:
2544 2547
 	    ctx->container_type = CL_TYPE_ZIP;
2545 2548
 	    if(SCAN_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ZIP))