Browse code

Add LZMA decompression for xar archives

Steve Morgan authored on 2013/09/19 05:41:31
Showing 1 changed files
... ...
@@ -29,7 +29,7 @@
29 29
 #include <libxml/xmlreader.h>
30 30
 #include "scanners.h"
31 31
 #include "inflate64.h"
32
-#include "7z_iface.h"
32
+#include "lzma_iface.h"
33 33
 
34 34
 /*
35 35
    xar_cleanup_temp_file - cleanup after cli_gentempfd
... ...
@@ -147,15 +147,15 @@ static int xar_get_toc_data_values(xmlTextReaderPtr reader, long *length, long *
147 147
         } else {
148 148
             if (xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
149 149
                 if (xmlStrEqual(name, (const xmlChar *)"data")) {
150
-                    cli_dbgmsg("cli_scanxar: xmlTextReaderRead read %s\n", name);
150
+                    cli_dbgmsg("cli_scanxar: xmlTextReaderRead read <data>\n");
151 151
                     indata = 1;
152 152
                 } else if (xmlStrEqual(name, (const xmlChar *)"ea")) {
153
-                    cli_dbgmsg("cli_scanxar: xmlTextReaderRead read %s\n", name);
153
+                    cli_dbgmsg("cli_scanxar: xmlTextReaderRead read <ea>\n");
154 154
                     inea = 1;
155 155
                 }
156 156
             } else if ((xmlTextReaderNodeType(reader) == XML_READER_TYPE_END_ELEMENT) &&
157 157
                        xmlStrEqual(name, (const xmlChar *)"xar")) {
158
-                cli_dbgmsg("cli_scanxar: finished parsing xml TOC.\n");   
158
+                cli_dbgmsg("cli_scanxar: finished parsing xar TOC.\n");   
159 159
             }
160 160
         }
161 161
         rc = xmlTextReaderRead(reader);
... ...
@@ -254,10 +254,10 @@ int cli_scanxar(cli_ctx *ctx)
254 254
         goto exit_toc;
255 255
     }
256 256
 
257
-       /* cli_dbgmsg("cli_scanxar: TOC xml:\n%s\n", toc); */
258
-       /* printf("cli_scanxar: TOC xml:\n%s\n", toc); */
259
-       /* cli_dbgmsg("cli_scanxar: TOC end:\n"); */
260
-       /* printf("cli_scanxar: TOC end:\n"); */
257
+    /* cli_dbgmsg("cli_scanxar: TOC xml:\n%s\n", toc); */
258
+    /* printf("cli_scanxar: TOC xml:\n%s\n", toc); */
259
+    /* cli_dbgmsg("cli_scanxar: TOC end:\n"); */
260
+    /* printf("cli_scanxar: TOC end:\n"); */
261 261
 
262 262
     /* scan the xml */
263 263
     cli_dbgmsg("cli_scanxar: scanning xar TOC xml.\n"); 
... ...
@@ -312,8 +312,9 @@ int cli_scanxar(cli_ctx *ctx)
312 312
             goto exit_reader;
313 313
         }
314 314
 
315
-        cli_dbgmsg("cli_scanxar: decompress and scanning heap offset %li len %li size %li\n",
316
-                   offset, length, size);
315
+        cli_dbgmsg("cli_scanxar: decompress into temp file:\n%s, size %li,\n"
316
+                   "from xar heap offset %li length %li\n",
317
+                   tmpname, size, offset, length);
317 318
 
318 319
         switch (encoding) {
319 320
         case CL_TYPE_GZ:
... ...
@@ -369,12 +370,113 @@ int cli_scanxar(cli_ctx *ctx)
369 369
             inflateEnd(&strm);
370 370
             break;
371 371
         case CL_TYPE_7Z:
372
-            /*LZMA: TODO*/
373
-            /* this is broken, gives SZ_ERROR_NO_ARCHIVE:  */
374
-            /* rc = cli_7unz(ctx, at); */
375
-            /* if (rc) { */
376
-            /*     cli_errmsg("cli_scanrar: cli_7unz() fails: %i\n", rc); */
377
-            /* } */
372
+            {
373
+#define CLI_LZMA_OBUF_SIZE 1024*1024
374
+#define CLI_LZMA_HDR_SIZE LZMA_PROPS_SIZE+8
375
+#define CLI_LZMA_CRATIO_SHIFT 2 /* estimated compression ratio 25% */
376
+                struct CLI_LZMA lz = {0};
377
+                unsigned long in_remaining = length;
378
+                unsigned long out_size = 0;
379
+                char * buff = __lzma_wrap_alloc(NULL, CLI_LZMA_OBUF_SIZE);
380
+                
381
+                if (buff == NULL) {
382
+                    cli_errmsg("cli_scanxar: memory request for lzma decompression buffer fails.\n");
383
+                    rc = CL_EMEM;
384
+                    goto exit_tmpfile;
385
+                    
386
+                }
387
+
388
+                blockp = (void*)fmap_need_off_once(map, at, CLI_LZMA_HDR_SIZE);
389
+                if (blockp == NULL) {
390
+                    cli_errmsg("cli_scanxar: Can't read %li bytes @ %li, errno:%s.\n",
391
+                               length, at, strerror(errno));
392
+                    rc = CL_EREAD;
393
+                    __lzma_wrap_free(NULL, buff);
394
+                    goto exit_tmpfile;
395
+                }
396
+
397
+                lz.next_in = blockp;
398
+                lz.avail_in = CLI_LZMA_HDR_SIZE;
399
+
400
+                rc = cli_LzmaInit(&lz, 0);
401
+                if (rc != LZMA_RESULT_OK) {
402
+                    cli_errmsg("cli_scanxar: cli_LzmaInit() fails: %i.\n", rc);
403
+                    rc = CL_EFORMAT;
404
+                    __lzma_wrap_free(NULL, buff);
405
+                    goto exit_tmpfile;
406
+                }
407
+                
408
+                at += CLI_LZMA_HDR_SIZE;
409
+                in_remaining -= CLI_LZMA_HDR_SIZE;                
410
+                while (at < map->len && at < offset+hdr.toc_length_compressed+hdr.size+length) {
411
+                    SizeT avail_in;
412
+                    SizeT avail_out;
413
+                    unsigned long in_consumed;
414
+
415
+                    lz.next_out = buff;
416
+                    lz.avail_out = CLI_LZMA_OBUF_SIZE;
417
+                    lz.avail_in = avail_in = MIN(CLI_LZMA_OBUF_SIZE>>CLI_LZMA_CRATIO_SHIFT, in_remaining);
418
+                    lz.next_in = (void*)fmap_need_off_once(map, at, lz.avail_in);
419
+                    if (lz.next_in == NULL) {
420
+                        cli_errmsg("cli_scanxar: Can't read %li bytes @ %li, errno: %s.\n",
421
+                                   length, at, strerror(errno));
422
+                        rc = CL_EREAD;
423
+                        __lzma_wrap_free(NULL, buff);
424
+                        cli_LzmaShutdown(&lz);
425
+                        goto exit_tmpfile;
426
+                    }
427
+
428
+                    rc = cli_LzmaDecode(&lz);
429
+                    if (rc != LZMA_RESULT_OK && rc != LZMA_STREAM_END) {
430
+                        cli_errmsg("cli_scanxar: cli_LzmaDecode() fails: %i.\n", rc);
431
+                        rc = CL_EFORMAT;
432
+                        __lzma_wrap_free(NULL, buff);
433
+                        cli_LzmaShutdown(&lz);
434
+                        goto exit_tmpfile;
435
+                    }
436
+
437
+                    in_consumed = avail_in - lz.avail_in;
438
+                    in_remaining -= in_consumed;
439
+                    at += in_consumed;
440
+                    avail_out = CLI_LZMA_OBUF_SIZE - lz.avail_out;
441
+                    
442
+                    if (avail_out == 0) {
443
+                        cli_errmsg("cli_scanxar: cli_LzmaDecode() produces no output for "
444
+                                   "avail_in %lu, avail_out %lu.\n", avail_in, avail_out);
445
+                        rc = CL_EFORMAT;
446
+                        __lzma_wrap_free(NULL, buff);
447
+                        cli_LzmaShutdown(&lz);
448
+                        goto exit_tmpfile;
449
+                    }
450
+                    
451
+                    /* Write a decompressed block. */
452
+                    cli_dbgmsg("Writing %li bytes to LZMA decompress temp file, "
453
+                               "consumed %li of %li available bytes.\n",
454
+                               avail_out, in_consumed, avail_in);
455
+                    
456
+                    if (cli_writen(fd, buff, avail_out) < 0) {
457
+                        cli_dbgmsg("cli_scanxar: cli_writen error writing lzma temp file for %li bytes.\n",
458
+                                   avail_out);
459
+                        __lzma_wrap_free(NULL, buff);
460
+                        cli_LzmaShutdown(&lz);
461
+                        rc = CL_EWRITE;
462
+                        goto exit_tmpfile;
463
+                    }
464
+                    
465
+                    /* Check file size limitation. */
466
+                    out_size += avail_out;
467
+                    if (cli_checklimits("cli_scanxar", ctx, out_size, 0, 0) != CL_CLEAN) {
468
+                        break;
469
+                    }
470
+                    
471
+                    if (rc == LZMA_STREAM_END)
472
+                        break;
473
+                }
474
+
475
+                cli_LzmaShutdown(&lz);
476
+                __lzma_wrap_free(NULL, buff);
477
+            }
478
+            break; 
378 479
         default:
379 480
         case CL_TYPE_BZ:
380 481
         case CL_TYPE_ANY: