Browse code

hwpole2: new filetype and handler for hwp embedded ole2 files

Kevin Lin authored on 2015/12/18 01:04:00
Showing 6 changed files
... ...
@@ -124,6 +124,7 @@ static const struct ftmap_s {
124 124
     { "CL_TYPE_XML_XL",		CL_TYPE_XML_XL		},
125 125
     { "CL_TYPE_HWP3",		CL_TYPE_HWP3		},
126 126
     { "CL_TYPE_XML_HWP",	CL_TYPE_XML_HWP		},
127
+    { "CL_TYPE_HWPOLE2",	CL_TYPE_HWPOLE2		},
127 128
     { NULL,			CL_TYPE_IGNORED		}
128 129
 };
129 130
 
... ...
@@ -112,6 +112,7 @@ typedef enum {
112 112
     CL_TYPE_XML_WORD,
113 113
     CL_TYPE_XML_XL,
114 114
     CL_TYPE_XML_HWP,
115
+    CL_TYPE_HWPOLE2,
115 116
 
116 117
     CL_TYPE_OTHER, /* on-the-fly, used for target 14 (OTHER) */
117 118
     CL_TYPE_IGNORED /* please don't add anything below */
... ...
@@ -190,6 +190,7 @@ static const char *ftypes_int[] = {
190 190
   "1:0:3c3f786d6c2076657273696f6e3d22312e3022*3c??3a576f726b626f6f6b:Microsoft Excel 2003 XML Document:CL_TYPE_ANY:CL_TYPE_XML_XL:80",
191 191
   "0:0:49492a00:TIFF Little Endian:CL_TYPE_ANY:CL_TYPE_GRAPHICS:81",
192 192
   "0:0:4d4d:TIFF Big Endian:CL_TYPE_ANY:CL_TYPE_GRAPHICS:81",
193
+  "0:4:d0cf11e0a1b11ae1:HWP embedded OLE2:CL_TYPE_ANY:CL_TYPE_HWPOLE2",
193 194
   "0:0:48575020446F63756D656E742046696C652056332E3030201A0102030405:HWP 3.x Document:CL_TYPE_ANY:CL_TYPE_HWP3:82",
194 195
   "1:0:efbbbf3c3f786d6c2076657273696f6e3d22312e3022*3c4857504d4c:HWPML Document:CL_TYPE_ANY:CL_TYPE_XML_HWP:82",
195 196
   NULL
... ...
@@ -54,7 +54,7 @@
54 54
 
55 55
 #define HWP5_DEBUG 0
56 56
 #define HWP3_DEBUG 1
57
-#define HWPML_DEBUG 1
57
+#define HWPML_DEBUG 0
58 58
 #if HWP5_DEBUG
59 59
 #define hwp5_debug(...) cli_dbgmsg(__VA_ARGS__)
60 60
 #else
... ...
@@ -179,6 +179,28 @@ static int decompress_and_callback(cli_ctx *ctx, fmap_t *input, off_t at, size_t
179 179
     return ret;
180 180
 }
181 181
 
182
+/*** HWPOLE2 ***/
183
+int cli_scanhwpole2(cli_ctx *ctx)
184
+{
185
+    fmap_t *map = *ctx->fmap;
186
+    uint32_t usize, asize;
187
+
188
+    asize = (uint32_t)(map->len - sizeof(usize));
189
+
190
+    if (fmap_readn(map, &usize, 0, sizeof(usize)) != sizeof(usize)) {
191
+        cli_errmsg("HWPOLE2: Failed to read uncompressed ole2 filesize\n");
192
+        return CL_EREAD;
193
+    }
194
+
195
+    if (usize != asize)
196
+        cli_warnmsg("HWPOLE2: Mismatched uncompressed prefix and size: %u != %u\n", usize, asize);
197
+    else
198
+        cli_dbgmsg("HWPOLE2: Matched uncompressed prefix and size: %u == %u\n", usize, asize);
199
+
200
+    return cli_map_scandesc(map, 4, map->len, ctx, CL_TYPE_ANY);
201
+    //return cli_map_scandesc(map, 4, map->len, ctx, CL_TYPE_OLE2);
202
+}
203
+
182 204
 /*** HWP5 ***/
183 205
 
184 206
 int cli_hwp5header(cli_ctx *ctx, hwp5_header_t *hwp5)
... ...
@@ -253,40 +275,16 @@ int cli_hwp5header(cli_ctx *ctx, hwp5_header_t *hwp5)
253 253
 
254 254
 static int hwp5_cb(void *cbdata, int fd, cli_ctx *ctx)
255 255
 {
256
-    int ret, ole2 = *(int *)cbdata;
256
+    int ret;
257 257
 
258 258
     if (fd < 0 || !ctx)
259 259
         return CL_ENULLARG;
260 260
 
261
-    /* trim off 32-bit prefix for OLE2 streams */
262
-    if (ole2) {
263
-        STATBUF statbuf;
264
-        fmap_t *map;
265
-
266
-        if (FSTAT(fd, &statbuf) == -1) {
267
-            cli_errmsg("HWP5.x: Can't stat file descriptor\n");
268
-            return CL_ESTAT;
269
-        }
270
-
271
-        map = fmap(fd, 0, statbuf.st_size);
272
-        if (!map) {
273
-            cli_errmsg("HWP5.x: Failed to get fmap for ole2 stream\n");
274
-            return CL_EMAP;
275
-        }
276
-
277
-        ret = cli_map_scandesc(map, 4, 0, ctx, CL_TYPE_ANY);
278
-        funmap(map);
279
-    } else {
280
-        ret = cli_magic_scandesc(fd, ctx);
281
-    }
282
-
283
-    return ret;
261
+    return cli_magic_scandesc(fd, ctx);
284 262
 }
285 263
 
286 264
 int cli_scanhwp5_stream(cli_ctx *ctx, hwp5_header_t *hwp5, char *name, int fd)
287 265
 {
288
-    int ole2;
289
-
290 266
     hwp5_debug("HWP5.x: NAME: %s\n", name);
291 267
 
292 268
     if (fd < 0) {
... ...
@@ -299,12 +297,6 @@ int cli_scanhwp5_stream(cli_ctx *ctx, hwp5_header_t *hwp5, char *name, int fd)
299 299
         !strncmp(name, "defaultjscript", 14) || !strncmp(name, "section", 7) ||
300 300
         !strncmp(name, "viewtext", 8) || !strncmp(name, "docinfo", 7)) {
301 301
 
302
-        ole2 = 0;
303
-        if (strstr(name, ".ole")) {
304
-            cli_dbgmsg("HWP5.x: Detected embedded OLE2 stream\n");
305
-            ole2 = 1;
306
-        }
307
-
308 302
         if (hwp5->flags & HWP5_PASSWORD) {
309 303
             cli_dbgmsg("HWP5.x: Password encrypted stream, scanning as-is\n");
310 304
             return cli_magic_scandesc(fd, ctx);
... ...
@@ -329,7 +321,7 @@ int cli_scanhwp5_stream(cli_ctx *ctx, hwp5_header_t *hwp5, char *name, int fd)
329 329
                 cli_errmsg("HWP5.x: Failed to get fmap for input stream\n");
330 330
                 return CL_EMAP;
331 331
             }
332
-            ret = decompress_and_callback(ctx, input, 0, 0, "HWP5.x", hwp5_cb, &ole2);
332
+            ret = decompress_and_callback(ctx, input, 0, 0, "HWP5.x", hwp5_cb, NULL);
333 333
             funmap(input);
334 334
             return ret;
335 335
         }
... ...
@@ -44,6 +44,9 @@ typedef struct hwp5_header {
44 44
     /* uint8_t reserved[216] */
45 45
 } hwp5_header_t;
46 46
 
47
+/* HWP EMBEDDED OLE2 - 4-byte prefixed OLE2 */
48
+int cli_scanhwpole2(cli_ctx *ctx);
49
+
47 50
 /* HWP 5.0 - OLE2 */
48 51
 int cli_hwp5header(cli_ctx *ctx, hwp5_header_t *hwp5);
49 52
 int cli_scanhwp5_stream(cli_ctx *ctx, hwp5_header_t *hwp5, char *name, int fd);
... ...
@@ -2685,7 +2685,8 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
2685 2685
                 type == CL_TYPE_XML_WORD ||
2686 2686
                 type == CL_TYPE_XML_XL ||
2687 2687
                 type == CL_TYPE_HWP3 ||
2688
-                type == CL_TYPE_XML_HWP) {
2688
+                type == CL_TYPE_XML_HWP ||
2689
+                type == CL_TYPE_HWPOLE2) {
2689 2690
                 ctx->properties = json_object_new_object();
2690 2691
                 if (NULL == ctx->properties) {
2691 2692
                     cli_errmsg("magic_scandesc: no memory for json properties object\n");
... ...
@@ -2847,6 +2848,10 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
2847 2847
         ret = cli_scanhwp3(ctx);
2848 2848
         break;
2849 2849
 
2850
+    case CL_TYPE_HWPOLE2:
2851
+        ret = cli_scanhwpole2(ctx);
2852
+        break;
2853
+
2850 2854
     case CL_TYPE_XML_WORD:
2851 2855
         ret = cli_scanmsxml(ctx);
2852 2856
         break;