Browse code

Implement xar support

Steve Morgan authored on 2013/09/14 01:22:58
Showing 3 changed files
... ...
@@ -2645,6 +2645,12 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
2645 2645
 		ret = cli_scansis(ctx);
2646 2646
 	    break;
2647 2647
 
2648
+	case CL_TYPE_XAR:
2649
+	    ctx->container_type = CL_TYPE_XAR;
2650
+	    if(SCAN_ARCHIVE && (DCONF_ARCH & ARCH_CONF_XAR))
2651
+		ret = cli_scanxar(ctx);
2652
+	    break;
2653
+
2648 2654
 	case CL_TYPE_BINARY_DATA:
2649 2655
 	case CL_TYPE_TEXT_UTF16BE:
2650 2656
 	    if(SCAN_ALGO && (DCONF_OTHER & OTHER_CONF_MYDOOMLOG))
... ...
@@ -2659,12 +2665,6 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
2659 2659
 		ret = cli_scan_structured(ctx);
2660 2660
 	    break;
2661 2661
 
2662
-	case CL_TYPE_XAR:
2663
-	    ctx->container_type = CL_TYPE_XAR;
2664
-	    if(SCAN_ARCHIVE && (DCONF_ARCH & ARCH_CONF_XAR))
2665
-		ret = cli_scanxar(ctx);
2666
-	    break;
2667
-
2668 2662
 	default:
2669 2663
 	    break;
2670 2664
     }
... ...
@@ -1,7 +1,7 @@
1 1
 /*
2 2
  *  Copyright (C) 2013 Sourcefire, Inc.
3 3
  *
4
- *  Authors: David Raynor <draynor@sourcefire.com>
4
+ *  Authors: Steven Morgan <smorgan@sourcefire.com>
5 5
  *
6 6
  *  This program is free software; you can redistribute it and/or modify
7 7
  *  it under the terms of the GNU General Public License version 2 as
... ...
@@ -22,36 +22,404 @@
22 22
 #include "clamav-config.h"
23 23
 #endif
24 24
 
25
-#include "cltypes.h"
26
-#include "others.h"
25
+#include <errno.h>
27 26
 #include "xar.h"
28 27
 #include "fmap.h"
28
+#if HAVE_LIBXML2
29
+#include <libxml/xmlreader.h>
29 30
 #include "scanners.h"
31
+#include "inflate64.h"
32
+#include "7z_iface.h"
33
+
34
+/*
35
+   xar_cleanup_temp_file - cleanup after cli_gentempfd
36
+   parameters:
37
+     ctx - cli_ctx context pointer
38
+     fd  - fd to close
39
+     tmpname - name of file to unlink, address of storage to free
40
+   returns - CL_SUCCESS or CL_EUNLINK
41
+ */
42
+static int xar_cleanup_temp_file(cli_ctx *ctx, int fd, char * tmpname)
43
+{
44
+    int rc = CL_SUCCESS;
45
+    close(fd);
46
+    if(!ctx->engine->keeptmp) {
47
+        if (cli_unlink(tmpname)) {
48
+            cli_errmsg("cli_scanxar: error unlinking tmpfile %s\n", tmpname); 
49
+            rc = CL_EUNLINK;
50
+        }
51
+    }
52
+    free(tmpname);
53
+    return rc;
54
+}
55
+
56
+/*
57
+   xar_get_numeric_from_xml_element - extract xml element value as numeric
58
+   parameters:
59
+     reader - xmlTextReaderPtr
60
+     value - pointer to long to contain the returned value
61
+   returns - CL_SUCCESS or CL_EFORMAT
62
+ */
63
+static int xar_get_numeric_from_xml_element(xmlTextReaderPtr reader, long * value)
64
+{
65
+    if (xmlTextReaderRead(reader) == 1 && xmlTextReaderNodeType(reader) == XML_READER_TYPE_TEXT) {
66
+        *value = atol((const char *)xmlTextReaderConstValue(reader));
67
+        if (*value < 0) {
68
+            cli_errmsg("cli_scanxar: XML element value %li\n", *value);
69
+            return CL_EFORMAT;
70
+        }
71
+        return CL_SUCCESS;
72
+    }
73
+    cli_errmsg("cli_scanxar: No text for XML element\n");
74
+    return CL_EFORMAT;
75
+}
76
+
77
+/*
78
+   xar_get_toc_data_values - return the values of a <data> or <ea> xml element that represent 
79
+                             an extent of data on the heap.
80
+   parameters:
81
+     reader - xmlTextReaderPtr
82
+     length - pointer to long for returning value of the <length> element.
83
+     offset - pointer to long for returning value of the <offset> element.
84
+     size - pointer to long for returning value of the <size> element.
85
+     encoding - pointer to int for returning indication of the <encoding> style attribute.
86
+   returns - CL_FORMAT, CL_SUCCESS, CL_BREAK. CL_BREAK indicates no more <data>/<ea> element.
87
+ */
88
+static int xar_get_toc_data_values(xmlTextReaderPtr reader, long *length, long *offset, long *size, int *encoding)
89
+{
90
+    const xmlChar *name;
91
+    int indata = 0, inea = 0;
92
+    int rc, gotoffset=0, gotlength=0, gotsize=0;
93
+
94
+    rc = xmlTextReaderRead(reader);
95
+    while (rc == 1) {
96
+        name = xmlTextReaderConstLocalName(reader);
97
+        if (indata || inea) {
98
+            /*  cli_dbgmsg("cli_scanxar: xmlTextReaderRead read %s\n", name); */
99
+            if (xmlStrEqual(name, (const xmlChar *)"offset") && 
100
+                xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
101
+                if (CL_SUCCESS == xar_get_numeric_from_xml_element(reader, offset))
102
+                    gotoffset=1;
103
+            } else if (xmlStrEqual(name, (const xmlChar *)"length") &&
104
+                       xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
105
+                if (CL_SUCCESS == xar_get_numeric_from_xml_element(reader, length))
106
+                    gotlength=1;
107
+
108
+            } else if (xmlStrEqual(name, (const xmlChar *)"size") &&
109
+                       xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
110
+                if (CL_SUCCESS == xar_get_numeric_from_xml_element(reader, size))
111
+                    gotsize=1;
112
+
113
+            } else if (xmlStrEqual(name, (const xmlChar *)"encoding") &&
114
+                       xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
115
+                xmlChar * style;
116
+                style = xmlTextReaderGetAttribute(reader, (const xmlChar *)"style");
117
+                if (style == NULL) {
118
+                    cli_errmsg("cli_scaxar: xmlTextReaderGetAttribute no style attribute "
119
+                               "for encoding element\n");
120
+                    *encoding = CL_TYPE_ANY;
121
+                } else if (xmlStrEqual(style, (const xmlChar *)"application/x-gzip")) {
122
+                    cli_dbgmsg("cli_scanxar: encoding = application/x-gzip.\n");
123
+                    *encoding = CL_TYPE_GZ; 
124
+                } else if (xmlStrEqual(style, (const xmlChar *)"application/octet-stream")) {
125
+                    cli_dbgmsg("cli_scanxar: encoding = application/octet-stream.\n");
126
+                    *encoding = CL_TYPE_ANY; 
127
+                } else if (xmlStrEqual(style, (const xmlChar *)"application/x-bzip2")) {
128
+                    cli_dbgmsg("cli_scanxar: encoding = application/x-bzip2.\n");
129
+                    *encoding = CL_TYPE_BZ;
130
+                } else if (xmlStrEqual(style, (const xmlChar *)"application/x-lzma")) {
131
+                    cli_dbgmsg("cli_scanxar: encoding = application/x-lzma.\n");
132
+                    *encoding = CL_TYPE_7Z;
133
+                 } else if (xmlStrEqual(style, (const xmlChar *)"application/x-xz")) {
134
+                    cli_dbgmsg("cli_scanxar: encoding = application/x-xz.\n");
135
+                    *encoding = CL_TYPE_7Z;
136
+                } else {
137
+                    cli_errmsg("cli_scaxar: unknown style value=%s for encoding element\n", style);
138
+                    *encoding = CL_TYPE_ANY;
139
+                }
140
+            } else if (indata && xmlStrEqual(name, (const xmlChar *)"data") &&
141
+                       xmlTextReaderNodeType(reader) == XML_READER_TYPE_END_ELEMENT) {
142
+                break;
143
+            } else if (inea && xmlStrEqual(name, (const xmlChar *)"ea") &&
144
+                       xmlTextReaderNodeType(reader) == XML_READER_TYPE_END_ELEMENT) {
145
+                break;
146
+            }
147
+        } else {
148
+            if (xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
149
+                if (xmlStrEqual(name, (const xmlChar *)"data")) {
150
+                    cli_dbgmsg("cli_scanxar: xmlTextReaderRead read %s\n", name);
151
+                    indata = 1;
152
+                } else if (xmlStrEqual(name, (const xmlChar *)"ea")) {
153
+                    cli_dbgmsg("cli_scanxar: xmlTextReaderRead read %s\n", name);
154
+                    inea = 1;
155
+                }
156
+            } else if ((xmlTextReaderNodeType(reader) == XML_READER_TYPE_END_ELEMENT) &&
157
+                       xmlStrEqual(name, (const xmlChar *)"xar")) {
158
+                cli_dbgmsg("cli_scanxar: finished parsing xml TOC.\n");   
159
+            }
160
+        }
161
+        rc = xmlTextReaderRead(reader);
162
+    }
163
+    
164
+    if (gotoffset && gotlength && gotsize) {
165
+        rc = CL_SUCCESS;
166
+    }
167
+    else if (0 == gotoffset + gotlength + gotsize)
168
+        rc = CL_BREAK;
169
+    else
170
+        rc = CL_EFORMAT;
171
+
172
+    return rc;
173
+}
174
+
175
+#endif
176
+/*
177
+  cli_scanxar - scan an xar archive.
178
+  Parameters:
179
+    ctx - pointer to cli_ctx.
180
+  returns - CL_SUCCESS or CL_ error code.
181
+*/
30 182
 
31 183
 int cli_scanxar(cli_ctx *ctx)
32 184
 {
185
+    int rc = CL_SUCCESS;
186
+#if HAVE_LIBXML2
187
+    int fd = -1;
33 188
     struct xar_header hdr;
34
-    char name[513];
35
-    unsigned int file = 0, trailer = 0;
36
-    uint32_t filesize, namesize, hdr_namesize;
37
-    int ret, conv;
38
-    off_t pos = 0;
39
-
40
-    if (fmap_readn(*ctx->fmap, &hdr, pos, sizeof(hdr)) != sizeof(hdr)) {
41
-        cli_dbgmsg("cli_scanxar: Invalid header, too short.\n");
189
+    fmap_t *map = *ctx->fmap;
190
+    long length, offset, size, at;
191
+    int encoding;
192
+    z_stream strm = {0};
193
+    char *toc, *tmpname;
194
+    xmlTextReaderPtr reader = NULL;
195
+
196
+    /* retrieve xar header */
197
+    if (fmap_readn(*ctx->fmap, &hdr, 0, sizeof(hdr)) != sizeof(hdr)) {
198
+        cli_errmsg("cli_scanxar: Invalid header, too short.\n");
42 199
         return CL_EFORMAT;
43 200
     }
44 201
     hdr.magic = be32_to_host(hdr.magic);
45
-    if (hdr.magic == 0x78617221) {
202
+
203
+    if (hdr.magic == XAR_HEADER_MAGIC) {
46 204
         cli_dbgmsg("cli_scanxar: Matched magic\n");
47 205
     }
48 206
     else {
49
-        cli_dbgmsg("cli_scanxar: Invalid magic\n");
207
+        cli_errmsg("cli_scanxar: Invalid magic\n");
50 208
         return CL_EFORMAT;
51 209
     }
210
+    hdr.size = be16_to_host(hdr.size);
211
+    hdr.version = be16_to_host(hdr.version);
212
+    hdr.toc_length_compressed = be64_to_host(hdr.toc_length_compressed);
213
+    hdr.toc_length_decompressed = be64_to_host(hdr.toc_length_decompressed);
214
+    hdr.chksum_alg = be32_to_host(hdr.chksum_alg);
52 215
 
53
-    /* TODO: First grab the TOC, parse that, and then unpack the rest. */
216
+    /* cli_dbgmsg("hdr.magic %x\n", hdr.magic); */
217
+    /* cli_dbgmsg("hdr.size %i\n", hdr.size); */
218
+    /* cli_dbgmsg("hdr.version %i\n", hdr.version); */
219
+    /* cli_dbgmsg("hdr.toc_length_compressed %lu\n", hdr.toc_length_compressed); */
220
+    /* cli_dbgmsg("hdr.toc_length_decompressed %lu\n", hdr.toc_length_decompressed); */
221
+    /* cli_dbgmsg("hdr.chksum_alg %i\n", hdr.chksum_alg); */
222
+ 
223
+    /* Uncompress TOC */
224
+    strm.next_in = (unsigned char *)fmap_need_off_once(*ctx->fmap, hdr.size, hdr.toc_length_compressed);
225
+    if (strm.next_in == NULL) {
226
+        cli_errmsg("cli_scanxar: fmap_need_off_once fails on TOC.\n");
227
+        return CL_EFORMAT;
228
+    }
229
+    strm.avail_in = hdr.toc_length_compressed; 
230
+    toc = cli_malloc(hdr.toc_length_decompressed+1);
231
+    if (toc == NULL) {
232
+        cli_errmsg("cli_scanxar: cli_malloc fails on TOC decompress buffer.\n");
233
+        return CL_EMEM;
234
+    }
235
+    toc[hdr.toc_length_decompressed] = '\0';
236
+    strm.avail_out = hdr.toc_length_decompressed;
237
+    strm.next_out = (unsigned char *)toc;
238
+    rc = inflateInit(&strm);
239
+    if (rc != Z_OK) {
240
+        cli_errmsg("cli_scanxar:inflateInit error %i \n", rc);
241
+        rc = CL_EFORMAT;
242
+        goto exit_toc;
243
+    }    
244
+    rc = inflate(&strm, Z_SYNC_FLUSH);
245
+    if (rc != Z_OK && rc != Z_STREAM_END) {
246
+        cli_errmsg("cli_scanxar:inflate error %i \n", rc);
247
+        rc = CL_EFORMAT;
248
+        goto exit_toc;
249
+    }
250
+    rc = inflateEnd(&strm);
251
+    if (rc != Z_OK) {
252
+        cli_errmsg("cli_scanxar:inflateEnd error %i \n", rc);
253
+        rc = CL_EFORMAT;
254
+        goto exit_toc;
255
+    }
54 256
 
55
-    return CL_CLEAN;
56
-}
257
+       /* cli_dbgmsg("cli_scanxar: TOC xml:\n%s\n", toc); */
258
+       /* printf("cli_scanxar: TOC xml:\n%s\n", toc); */
259
+       /* cli_dbgmsg("cli_scanxar: TOC end:\n"); */
260
+       /* printf("cli_scanxar: TOC end:\n"); */
261
+
262
+    /* scan the xml */
263
+    cli_dbgmsg("cli_scanxar: scanning xar TOC xml.\n"); 
264
+    rc = cli_mem_scandesc(toc, hdr.toc_length_decompressed, ctx);
265
+    if (rc != CL_SUCCESS) {
266
+        if (rc != CL_VIRUS || !SCAN_ALL)
267
+            goto exit_toc;        
268
+    }
269
+
270
+    /* make a file to leave if --leave-temps in effect */
271
+    if(ctx->engine->keeptmp) {
272
+        if ((rc = cli_gentempfd(ctx->engine->tmpdir, &tmpname, &fd)) != CL_SUCCESS) {
273
+            cli_errmsg("cli_scanxar: Can't create temporary file for TOC.\n");
274
+            goto exit_toc;
275
+        }
276
+        if (cli_writen(fd, toc, hdr.toc_length_decompressed) < 0) {
277
+            cli_errmsg("cli_scanxar: cli_writen error writing TOC.\n");
278
+            rc = CL_EWRITE;
279
+            xar_cleanup_temp_file(ctx, fd, tmpname);
280
+            goto exit_toc;
281
+        }
282
+        rc = xar_cleanup_temp_file(ctx, fd, tmpname);
283
+        if (rc != CL_SUCCESS)
284
+            goto exit_toc;
285
+    }
286
+
287
+    /*TODO: subdocuments*/
288
+
289
+    reader = xmlReaderForMemory(toc, hdr.toc_length_decompressed, "noname.xml", NULL, 0);
290
+    if (reader == NULL) {
291
+        cli_errmsg("cli_scanxar: xmlReaderForMemory error for TOC\n");
292
+        goto exit_toc;
293
+    }
294
+
295
+    /* Walk the TOC XML and extract files */
296
+    fd = -1;
297
+    tmpname = NULL;
298
+    while (CL_SUCCESS == (rc = xar_get_toc_data_values(reader, &length, &offset, &size, &encoding))) {
299
+        char * blockp;
300
+
301
+        /* clean up temp file from previous loop iteration */
302
+        if (fd > -1 && tmpname) {
303
+            rc = xar_cleanup_temp_file(ctx, fd, tmpname);
304
+            if (rc != CL_SUCCESS)
305
+                goto exit_reader;
306
+        }
307
+
308
+        at = offset + hdr.toc_length_compressed + hdr.size;
309
+
310
+        if ((rc = cli_gentempfd(ctx->engine->tmpdir, &tmpname, &fd)) != CL_SUCCESS) {
311
+            cli_errmsg("cli_scanxar: Can't generate temporary file.\n");
312
+            goto exit_reader;
313
+        }
314
+
315
+        cli_dbgmsg("cli_scanxar: decompress and scanning heap offset %li len %li size %li\n",
316
+                   offset, length, size);
317
+
318
+        switch (encoding) {
319
+        case CL_TYPE_GZ:
320
+            /* inflate gzip directly because file segments do not contain magic */
321
+            memset(&strm, 0, sizeof(strm));
322
+            if ((rc = inflateInit(&strm)) != Z_OK) {
323
+                cli_errmsg("cli_scanxar: InflateInit failed: %d\n", rc);
324
+                rc = CL_EFORMAT;
325
+                goto exit_tmpfile;
326
+            }
327
+            
328
+            while (at < map->len && at < offset+hdr.toc_length_compressed+hdr.size+length) {
329
+                unsigned int bytes = MIN(map->len - at, map->pgsz);
330
+                bytes = MIN(length, bytes);
331
+                cli_dbgmsg("cli_scanxar: fmap %u bytes\n", bytes);
332
+                if(!(strm.next_in = (void*)fmap_need_off_once(map, at, bytes))) {
333
+                    cli_dbgmsg("cli_scanxar: Can't read %u bytes @ %lu.\n", bytes, (long unsigned)at);
334
+                    inflateEnd(&strm);
335
+                    rc = CL_EREAD;
336
+                    goto exit_tmpfile;
337
+                }
338
+                at += bytes;
339
+                strm.avail_in = bytes;
340
+                do {
341
+                    int inf, outsize = 0;
342
+                    unsigned char buff[FILEBUFF];
343
+                    strm.avail_out = sizeof(buff);
344
+                    strm.next_out = buff;
345
+                    cli_dbgmsg("cli_scanxar: inflating.....\n");
346
+                    inf = inflate(&strm, Z_SYNC_FLUSH);
347
+                    if (inf != Z_OK && inf != Z_STREAM_END && inf != Z_BUF_ERROR) {
348
+                        cli_errmsg("cli_scanxar: inflate error %i %s.\n", inf, strm.msg?strm.msg:"");
349
+                        at = map->len;
350
+                        rc = CL_EFORMAT;
351
+                        goto exit_tmpfile;
352
+                    }
353
+                    if (cli_writen(fd, buff, sizeof(buff) - strm.avail_out) < 0) {
354
+                        cli_errmsg("cli_scanxar: cli_writen error file %s.\n", tmpname);
355
+                        inflateEnd(&strm);
356
+                        rc = CL_EWRITE;
357
+                        goto exit_tmpfile;
358
+                    }
359
+                    outsize += sizeof(buff) - strm.avail_out;
360
+                    if (cli_checklimits("cli_scanxar", ctx, outsize, 0, 0) != CL_CLEAN) {
361
+                        break;
362
+                    }
363
+                    if (inf == Z_STREAM_END) {
364
+                        break;
365
+                    }
366
+                } while (strm.avail_out == 0);
367
+            }
368
+            
369
+            inflateEnd(&strm);
370
+            break;
371
+        case CL_TYPE_7Z:
372
+            /*LZMA: TODO*/
373
+            /* this is broken, gives SZ_ERROR_NO_ARCHIVE:  */
374
+            /* rc = cli_7unz(ctx, at); */
375
+            /* if (rc) { */
376
+            /*     cli_errmsg("cli_scanrar: cli_7unz() fails: %i\n", rc); */
377
+            /* } */
378
+        default:
379
+        case CL_TYPE_BZ:
380
+        case CL_TYPE_ANY:
381
+            /* for uncompressed, bzip2, and unknown, just pull the file, cli_magic_scandesc does the rest */
382
+            if (!(blockp = (void*)fmap_need_off_once(map, at, length))) {
383
+                cli_errmsg("cli_scanxar: Can't read %li bytes @ %li, errno:%s.\n", length, at, strerror(errno));
384
+                rc = CL_EREAD;
385
+                goto exit_tmpfile;
386
+            }
387
+            
388
+            if (cli_writen(fd, blockp, length) < 0) {
389
+                cli_dbgmsg("cli_scanxar: cli_writen error %li bytes @ %li.\n", length, at);
390
+                rc = CL_EWRITE;
391
+                goto exit_tmpfile;
392
+            }
393
+            /*break;*/           
394
+        }
57 395
 
396
+        rc = cli_magic_scandesc(fd, ctx);
397
+        if (rc != CL_SUCCESS) {
398
+            if (rc == CL_VIRUS) {
399
+                cli_dbgmsg("cli_scanxar: Infected with %s\n", cli_get_last_virus(ctx));
400
+                if (!SCAN_ALL)
401
+                    goto exit_tmpfile;
402
+            } else if (rc != CL_BREAK) {
403
+                cli_errmsg("cli_scanxar: cli_magic_scandesc error %i\n", rc);
404
+                goto exit_tmpfile;
405
+            }
406
+        }
407
+   }
408
+
409
+ exit_tmpfile:
410
+    xar_cleanup_temp_file(ctx, fd, tmpname);
411
+
412
+ exit_reader:
413
+    xmlFreeTextReader(reader);
414
+    xmlCleanupParser();
415
+
416
+ exit_toc:
417
+    free(toc);
418
+    if (rc == CL_BREAK)
419
+        rc = CL_SUCCESS;
420
+#else
421
+    cli_debug("cli_scanxar: can't scan xar files, need libxml2.\n");
422
+#endif
423
+
424
+    return rc;
425
+}
... ...
@@ -49,6 +49,9 @@ struct xar_header {
49 49
     uint32_t chksum_alg; /* 0 = none */
50 50
 };
51 51
 
52
+#define XAR_HEADER_MAGIC 0x78617221
53
+#define XAR_HEADER_VERSION 0
54
+
52 55
 #ifdef HAVE_PRAGMA_PACK
53 56
 #pragma pack()
54 57
 #endif