libclamav/xar.c
ca019d6d
 /*
e1cbc270
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
ca019d6d
  *  Copyright (C) 2013 Sourcefire, Inc.
  *
5e56b827
  *  Authors: Steven Morgan <smorgan@sourcefire.com>
ca019d6d
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
  */
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
5e56b827
 #include <errno.h>
ca019d6d
 #include "xar.h"
 #include "fmap.h"
694e7882
 
5e56b827
 #if HAVE_LIBXML2
 #include <libxml/xmlreader.h>
60d8d2c3
 #include "clamav.h"
e3f87685
 #include "str.h"
ca019d6d
 #include "scanners.h"
5e56b827
 #include "inflate64.h"
9b20264d
 #include "lzma_iface.h"
5e56b827
 
 /*
    xar_cleanup_temp_file - cleanup after cli_gentempfd
    parameters:
      ctx - cli_ctx context pointer
      fd  - fd to close
      tmpname - name of file to unlink, address of storage to free
    returns - CL_SUCCESS or CL_EUNLINK
  */
288057e9
 static int xar_cleanup_temp_file(cli_ctx *ctx, int fd, char *tmpname)
5e56b827
 {
     int rc = CL_SUCCESS;
fe91f308
     if (fd > -1)
         close(fd);
     if (tmpname != NULL) {
         if (!ctx->engine->keeptmp) {
             if (cli_unlink(tmpname)) {
288057e9
                 cli_dbgmsg("cli_scanxar: error unlinking tmpfile %s\n", tmpname);
fe91f308
                 rc = CL_EUNLINK;
             }
5e56b827
         }
fe91f308
         free(tmpname);
5e56b827
     }
     return rc;
 }
 
 /*
    xar_get_numeric_from_xml_element - extract xml element value as numeric
    parameters:
      reader - xmlTextReaderPtr
      value - pointer to long to contain the returned value
    returns - CL_SUCCESS or CL_EFORMAT
  */
288057e9
 static int xar_get_numeric_from_xml_element(xmlTextReaderPtr reader, size_t *value)
5e56b827
 {
288057e9
     const xmlChar *numstr;
d96a6b8b
 
5e56b827
     if (xmlTextReaderRead(reader) == 1 && xmlTextReaderNodeType(reader) == XML_READER_TYPE_TEXT) {
e3f87685
         numstr = xmlTextReaderConstValue(reader);
         if (numstr) {
ad5506c9
             long numval;
f767ab82
             char *endptr = NULL;
288057e9
             errno        = 0;
             numval       = strtol((const char *)numstr, &endptr, 10);
02840644
             if ((((numval == LONG_MAX) || (numval == LONG_MIN)) && errno) ||
                 ((const xmlChar *)endptr == numstr)) {
 
717b9c7d
                 cli_dbgmsg("cli_scanxar: XML element value invalid\n");
                 return CL_EFORMAT;
             } else if (numval < 0) {
                 cli_dbgmsg("cli_scanxar: XML element value %li\n", numval);
e3f87685
                 return CL_EFORMAT;
             }
d96a6b8b
             *value = numval;
e3f87685
             return CL_SUCCESS;
5e56b827
         }
     }
96de30ae
     cli_dbgmsg("cli_scanxar: No text for XML element\n");
5e56b827
     return CL_EFORMAT;
 }
 
 /*
e3f87685
   xar_get_checksum_values - extract checksum and hash algorithm from xml element
   parameters:
     reader - xmlTextReaderPtr
     cksum - pointer to char* for returning checksum value.
     hash - pointer to int for returning checksum algorithm.
   returns - void
  */
288057e9
 static void xar_get_checksum_values(xmlTextReaderPtr reader, unsigned char **cksum, int *hash)
e3f87685
 {
288057e9
     xmlChar *style = xmlTextReaderGetAttribute(reader, (const xmlChar *)"style");
     const xmlChar *xmlval;
e3f87685
 
     *hash = XAR_CKSUM_NONE;
     if (style == NULL) {
96de30ae
         cli_dbgmsg("cli_scaxar: xmlTextReaderGetAttribute no style attribute "
e3f87685
                    "for checksum element\n");
     } else {
288057e9
         cli_dbgmsg("cli_scanxar: checksum algorithm is %s.\n", style);
0c6aeb9e
         if (0 == xmlStrcasecmp(style, (const xmlChar *)"sha1")) {
e3f87685
             *hash = XAR_CKSUM_SHA1;
0c6aeb9e
         } else if (0 == xmlStrcasecmp(style, (const xmlChar *)"md5")) {
e3f87685
             *hash = XAR_CKSUM_MD5;
         } else {
             cli_dbgmsg("cli_scanxar: checksum algorithm %s is unsupported.\n", style);
             *hash = XAR_CKSUM_OTHER;
         }
     }
374f21dd
     if (style != NULL)
         xmlFree(style);
e3f87685
 
     if (xmlTextReaderRead(reader) == 1 && xmlTextReaderNodeType(reader) == XML_READER_TYPE_TEXT) {
5a5d38e4
         xmlval = xmlTextReaderConstValue(reader);
e3f87685
         if (xmlval) {
d96a6b8b
             cli_dbgmsg("cli_scanxar: checksum value is %s.\n", xmlval);
288057e9
             if (((*hash == XAR_CKSUM_SHA1) && (xmlStrlen(xmlval) == 2 * CLI_HASHLEN_SHA1)) ||
                 ((*hash == XAR_CKSUM_MD5) && (xmlStrlen(xmlval) == 2 * CLI_HASHLEN_MD5))) {
                 *cksum = xmlStrdup(xmlval);
             } else {
                 cli_dbgmsg("cli_scanxar: checksum type is unknown or length is invalid.\n");
                 *hash  = XAR_CKSUM_OTHER;
                 *cksum = NULL;
             }
e3f87685
         } else {
             *cksum = NULL;
288057e9
             cli_dbgmsg("cli_scanxar: xmlTextReaderConstValue() returns NULL for checksum value.\n");
e3f87685
         }
288057e9
     } else
96de30ae
         cli_dbgmsg("cli_scanxar: No text for XML checksum element.\n");
e3f87685
 }
 
 /*
6c03dc5d
    xar_get_toc_data_values - return the values of a <data> or <ea> xml element that represent
5e56b827
                              an extent of data on the heap.
    parameters:
      reader - xmlTextReaderPtr
      length - pointer to long for returning value of the <length> element.
      offset - pointer to long for returning value of the <offset> element.
      size - pointer to long for returning value of the <size> element.
      encoding - pointer to int for returning indication of the <encoding> style attribute.
e3f87685
      a_cksum - pointer to char* for return archived checksum value.
      a_hash - pointer to int for returning archived checksum algorithm.
      e_cksum - pointer to char* for return extracted checksum value.
      e_hash - pointer to int for returning extracted checksum algorithm.
5e56b827
    returns - CL_FORMAT, CL_SUCCESS, CL_BREAK. CL_BREAK indicates no more <data>/<ea> element.
  */
d96a6b8b
 static int xar_get_toc_data_values(xmlTextReaderPtr reader, size_t *length, size_t *offset, size_t *size, int *encoding,
288057e9
                                    unsigned char **a_cksum, int *a_hash, unsigned char **e_cksum, int *e_hash)
5e56b827
 {
     const xmlChar *name;
     int indata = 0, inea = 0;
288057e9
     int rc, gotoffset = 0, gotlength = 0, gotsize = 0;
5e56b827
 
288057e9
     *a_cksum  = NULL;
     *a_hash   = XAR_CKSUM_NONE;
     *e_cksum  = NULL;
     *e_hash   = XAR_CKSUM_NONE;
e3f87685
     *encoding = CL_TYPE_ANY;
 
5e56b827
     rc = xmlTextReaderRead(reader);
     while (rc == 1) {
         name = xmlTextReaderConstLocalName(reader);
         if (indata || inea) {
             /*  cli_dbgmsg("cli_scanxar: xmlTextReaderRead read %s\n", name); */
288057e9
             if (xmlStrEqual(name, (const xmlChar *)"offset") &&
5e56b827
                 xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
                 if (CL_SUCCESS == xar_get_numeric_from_xml_element(reader, offset))
288057e9
                     gotoffset = 1;
e3f87685
 
5e56b827
             } else if (xmlStrEqual(name, (const xmlChar *)"length") &&
                        xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
                 if (CL_SUCCESS == xar_get_numeric_from_xml_element(reader, length))
288057e9
                     gotlength = 1;
5e56b827
 
             } else if (xmlStrEqual(name, (const xmlChar *)"size") &&
                        xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
                 if (CL_SUCCESS == xar_get_numeric_from_xml_element(reader, size))
288057e9
                     gotsize = 1;
5e56b827
 
e3f87685
             } else if (xmlStrEqual(name, (const xmlChar *)"archived-checksum") &&
                        xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
                 cli_dbgmsg("cli_scanxar: <archived-checksum>:\n");
                 xar_get_checksum_values(reader, a_cksum, a_hash);
288057e9
 
0965d7da
             } else if ((xmlStrEqual(name, (const xmlChar *)"extracted-checksum") ||
                         xmlStrEqual(name, (const xmlChar *)"unarchived-checksum")) &&
e3f87685
                        xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
                 cli_dbgmsg("cli_scanxar: <extracted-checksum>:\n");
                 xar_get_checksum_values(reader, e_cksum, e_hash);
 
5e56b827
             } else if (xmlStrEqual(name, (const xmlChar *)"encoding") &&
                        xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
288057e9
                 xmlChar *style = xmlTextReaderGetAttribute(reader, (const xmlChar *)"style");
5e56b827
                 if (style == NULL) {
96de30ae
                     cli_dbgmsg("cli_scaxar: xmlTextReaderGetAttribute no style attribute "
5e56b827
                                "for encoding element\n");
                     *encoding = CL_TYPE_ANY;
                 } else if (xmlStrEqual(style, (const xmlChar *)"application/x-gzip")) {
                     cli_dbgmsg("cli_scanxar: encoding = application/x-gzip.\n");
288057e9
                     *encoding = CL_TYPE_GZ;
5e56b827
                 } else if (xmlStrEqual(style, (const xmlChar *)"application/octet-stream")) {
                     cli_dbgmsg("cli_scanxar: encoding = application/octet-stream.\n");
288057e9
                     *encoding = CL_TYPE_ANY;
5e56b827
                 } else if (xmlStrEqual(style, (const xmlChar *)"application/x-bzip2")) {
                     cli_dbgmsg("cli_scanxar: encoding = application/x-bzip2.\n");
                     *encoding = CL_TYPE_BZ;
                 } else if (xmlStrEqual(style, (const xmlChar *)"application/x-lzma")) {
                     cli_dbgmsg("cli_scanxar: encoding = application/x-lzma.\n");
                     *encoding = CL_TYPE_7Z;
288057e9
                 } else if (xmlStrEqual(style, (const xmlChar *)"application/x-xz")) {
5e56b827
                     cli_dbgmsg("cli_scanxar: encoding = application/x-xz.\n");
43d7f6f6
                     *encoding = CL_TYPE_XZ;
5e56b827
                 } else {
96de30ae
                     cli_dbgmsg("cli_scaxar: unknown style value=%s for encoding element\n", style);
5e56b827
                     *encoding = CL_TYPE_ANY;
                 }
374f21dd
                 if (style != NULL)
                     xmlFree(style);
e3f87685
 
288057e9
             } else if (indata && xmlStrEqual(name, (const xmlChar *)"data") &&
5e56b827
                        xmlTextReaderNodeType(reader) == XML_READER_TYPE_END_ELEMENT) {
                 break;
e3f87685
 
5e56b827
             } else if (inea && xmlStrEqual(name, (const xmlChar *)"ea") &&
                        xmlTextReaderNodeType(reader) == XML_READER_TYPE_END_ELEMENT) {
                 break;
             }
288057e9
 
5e56b827
         } else {
             if (xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
                 if (xmlStrEqual(name, (const xmlChar *)"data")) {
9b20264d
                     cli_dbgmsg("cli_scanxar: xmlTextReaderRead read <data>\n");
5e56b827
                     indata = 1;
                 } else if (xmlStrEqual(name, (const xmlChar *)"ea")) {
9b20264d
                     cli_dbgmsg("cli_scanxar: xmlTextReaderRead read <ea>\n");
5e56b827
                     inea = 1;
                 }
             } else if ((xmlTextReaderNodeType(reader) == XML_READER_TYPE_END_ELEMENT) &&
                        xmlStrEqual(name, (const xmlChar *)"xar")) {
288057e9
                 cli_dbgmsg("cli_scanxar: finished parsing xar TOC.\n");
e3f87685
                 break;
5e56b827
             }
         }
         rc = xmlTextReaderRead(reader);
     }
288057e9
 
5e56b827
     if (gotoffset && gotlength && gotsize) {
         rc = CL_SUCCESS;
288057e9
     } else if (0 == gotoffset + gotlength + gotsize)
5e56b827
         rc = CL_BREAK;
     else
         rc = CL_EFORMAT;
 
     return rc;
 }
 
e2c74282
 /*
   xar_process_subdocument - check TOC for xml subdocument. If found, extract and
                             scan in memory.
   Parameters:
      reader - xmlTextReaderPtr
      ctx - pointer to cli_ctx
   Returns:
048a88e6
      CL_SUCCESS - subdoc found and clean scan (or virus found and SCAN_ALLMATCHES), or no subdocument
e2c74282
      other - error return code from cli_mem_scandesc()
288057e9
 */
e2c74282
 static int xar_scan_subdocuments(xmlTextReaderPtr reader, cli_ctx *ctx)
 {
e3f87685
     int rc = CL_SUCCESS, subdoc_len, fd;
288057e9
     xmlChar *subdoc;
e2c74282
     const xmlChar *name;
288057e9
     char *tmpname;
e2c74282
 
     while (xmlTextReaderRead(reader) == 1) {
         name = xmlTextReaderConstLocalName(reader);
         if (name == NULL) {
96de30ae
             cli_dbgmsg("cli_scanxar: xmlTextReaderConstLocalName() no name.\n");
e2c74282
             rc = CL_EFORMAT;
             break;
         }
288057e9
         if (xmlStrEqual(name, (const xmlChar *)"toc") &&
e2c74282
             xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT)
             return CL_SUCCESS;
288057e9
         if (xmlStrEqual(name, (const xmlChar *)"subdoc") &&
e2c74282
             xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
             subdoc = xmlTextReaderReadInnerXml(reader);
             if (subdoc == NULL) {
96de30ae
                 cli_dbgmsg("cli_scanxar: no content in subdoc element.\n");
e2c74282
                 xmlTextReaderNext(reader);
                 continue;
             }
             subdoc_len = xmlStrlen(subdoc);
             cli_dbgmsg("cli_scanxar: in-memory scan of xml subdocument, len %i.\n", subdoc_len);
             rc = cli_mem_scandesc(subdoc, subdoc_len, ctx);
048a88e6
             if (rc == CL_VIRUS && SCAN_ALLMATCHES)
e2c74282
                 rc = CL_SUCCESS;
288057e9
 
e2c74282
             /* make a file to leave if --leave-temps in effect */
288057e9
             if (ctx->engine->keeptmp) {
e2c74282
                 if ((rc = cli_gentempfd(ctx->engine->tmpdir, &tmpname, &fd)) != CL_SUCCESS) {
7f50a91c
                     cli_dbgmsg("cli_scanxar: Can't create temporary file for subdocument.\n");
e3f87685
                 } else {
                     cli_dbgmsg("cli_scanxar: Writing subdoc to temp file %s.\n", tmpname);
6c03dc5d
                     if (cli_writen(fd, subdoc, subdoc_len) == (size_t)-1) {
96de30ae
                         cli_dbgmsg("cli_scanxar: cli_writen error writing subdoc temporary file.\n");
e3f87685
                         rc = CL_EWRITE;
                     }
50876732
                     rc      = xar_cleanup_temp_file(ctx, fd, tmpname);
                     tmpname = NULL;
e2c74282
                 }
             }
 
             xmlFree(subdoc);
             if (rc != CL_SUCCESS)
                 return rc;
             xmlTextReaderNext(reader);
288057e9
         }
e2c74282
     }
     return rc;
 }
e3f87685
 
288057e9
 static void *xar_hash_init(int hash, void **sc, void **mc)
e3f87685
 {
     if (!sc && !mc)
         return NULL;
     switch (hash) {
288057e9
         case XAR_CKSUM_SHA1:
             *sc = cl_hash_init("sha1");
             if (!(*sc)) {
                 return NULL;
             }
f077c617
 
288057e9
             return *sc;
         case XAR_CKSUM_MD5:
             *mc = cl_hash_init("md5");
             if (!(*mc)) {
                 return NULL;
             }
f077c617
 
288057e9
             return *mc;
         case XAR_CKSUM_OTHER:
         case XAR_CKSUM_NONE:
         default:
             return NULL;
e3f87685
     }
 }
 
288057e9
 static void xar_hash_update(void *hash_ctx, void *data, unsigned long size, int hash)
e3f87685
 {
     if (!hash_ctx || !data || !size)
         return;
b2e7c931
 
e3f87685
     switch (hash) {
288057e9
         case XAR_CKSUM_NONE:
         case XAR_CKSUM_OTHER:
             return;
e3f87685
     }
b2e7c931
 
da6e06dd
     cl_update_hash(hash_ctx, data, size);
e3f87685
 }
 
288057e9
 static void xar_hash_final(void *hash_ctx, void *result, int hash)
e3f87685
 {
     if (!hash_ctx || !result)
         return;
b2e7c931
 
e3f87685
     switch (hash) {
288057e9
         case XAR_CKSUM_OTHER:
         case XAR_CKSUM_NONE:
             return;
e3f87685
     }
b2e7c931
 
da6e06dd
     cl_finish_hash(hash_ctx, result);
e3f87685
 }
 
288057e9
 static int xar_hash_check(int hash, const void *result, const void *expected)
e3f87685
 {
     int len;
 
     if (!result || !expected)
         return 1;
     switch (hash) {
288057e9
         case XAR_CKSUM_SHA1:
             len = CLI_HASHLEN_SHA1;
             break;
         case XAR_CKSUM_MD5:
             len = CLI_HASHLEN_MD5;
             break;
         case XAR_CKSUM_OTHER:
         case XAR_CKSUM_NONE:
         default:
             return 1;
e3f87685
     }
     return memcmp(result, expected, len);
 }
 
5e56b827
 #endif
e2c74282
 
5e56b827
 /*
   cli_scanxar - scan an xar archive.
   Parameters:
     ctx - pointer to cli_ctx.
   returns - CL_SUCCESS or CL_ error code.
 */
ca019d6d
 
 int cli_scanxar(cli_ctx *ctx)
 {
288057e9
     int rc                      = CL_SUCCESS;
     unsigned int cksum_fails    = 0;
7f50a91c
     unsigned int extract_errors = 0;
5e56b827
 #if HAVE_LIBXML2
     int fd = -1;
ca019d6d
     struct xar_header hdr;
5e56b827
     fmap_t *map = *ctx->fmap;
d96a6b8b
     size_t length, offset, size, at;
5e56b827
     int encoding;
cd94be7a
     z_stream strm;
50876732
     char *toc, *tmpname = NULL;
5e56b827
     xmlTextReaderPtr reader = NULL;
e3f87685
     int a_hash, e_hash;
5a5d38e4
     unsigned char *a_cksum = NULL, *e_cksum = NULL;
a6e7f612
     void *a_hash_ctx = NULL, *e_hash_ctx = NULL;
     char result[SHA1_HASH_SIZE];
5e56b827
 
cd94be7a
     memset(&strm, 0x00, sizeof(z_stream));
 
5e56b827
     /* retrieve xar header */
     if (fmap_readn(*ctx->fmap, &hdr, 0, sizeof(hdr)) != sizeof(hdr)) {
96de30ae
         cli_dbgmsg("cli_scanxar: Invalid header, too short.\n");
ca019d6d
         return CL_EFORMAT;
     }
     hdr.magic = be32_to_host(hdr.magic);
5e56b827
 
     if (hdr.magic == XAR_HEADER_MAGIC) {
ca019d6d
         cli_dbgmsg("cli_scanxar: Matched magic\n");
288057e9
     } else {
96de30ae
         cli_dbgmsg("cli_scanxar: Invalid magic\n");
ca019d6d
         return CL_EFORMAT;
     }
288057e9
     hdr.size                    = be16_to_host(hdr.size);
     hdr.version                 = be16_to_host(hdr.version);
     hdr.toc_length_compressed   = be64_to_host(hdr.toc_length_compressed);
5e56b827
     hdr.toc_length_decompressed = be64_to_host(hdr.toc_length_decompressed);
288057e9
     hdr.chksum_alg              = be32_to_host(hdr.chksum_alg);
ca019d6d
 
5e56b827
     /* cli_dbgmsg("hdr.magic %x\n", hdr.magic); */
     /* cli_dbgmsg("hdr.size %i\n", hdr.size); */
     /* cli_dbgmsg("hdr.version %i\n", hdr.version); */
     /* cli_dbgmsg("hdr.toc_length_compressed %lu\n", hdr.toc_length_compressed); */
     /* cli_dbgmsg("hdr.toc_length_decompressed %lu\n", hdr.toc_length_decompressed); */
     /* cli_dbgmsg("hdr.chksum_alg %i\n", hdr.chksum_alg); */
288057e9
 
5e56b827
     /* Uncompress TOC */
     strm.next_in = (unsigned char *)fmap_need_off_once(*ctx->fmap, hdr.size, hdr.toc_length_compressed);
     if (strm.next_in == NULL) {
96de30ae
         cli_dbgmsg("cli_scanxar: fmap_need_off_once fails on TOC.\n");
         return CL_EREAD;
5e56b827
     }
288057e9
     strm.avail_in = hdr.toc_length_compressed;
     toc           = cli_malloc(hdr.toc_length_decompressed + 1);
5e56b827
     if (toc == NULL) {
96de30ae
         cli_dbgmsg("cli_scanxar: cli_malloc fails on TOC decompress buffer.\n");
5e56b827
         return CL_EMEM;
     }
     toc[hdr.toc_length_decompressed] = '\0';
288057e9
     strm.avail_out                   = hdr.toc_length_decompressed;
     strm.next_out                    = (unsigned char *)toc;
     rc                               = inflateInit(&strm);
5e56b827
     if (rc != Z_OK) {
96de30ae
         cli_dbgmsg("cli_scanxar:inflateInit error %i \n", rc);
5e56b827
         rc = CL_EFORMAT;
         goto exit_toc;
288057e9
     }
5e56b827
     rc = inflate(&strm, Z_SYNC_FLUSH);
     if (rc != Z_OK && rc != Z_STREAM_END) {
96de30ae
         cli_dbgmsg("cli_scanxar:inflate error %i \n", rc);
5e56b827
         rc = CL_EFORMAT;
         goto exit_toc;
     }
     rc = inflateEnd(&strm);
     if (rc != Z_OK) {
96de30ae
         cli_dbgmsg("cli_scanxar:inflateEnd error %i \n", rc);
5e56b827
         rc = CL_EFORMAT;
         goto exit_toc;
     }
ca019d6d
 
d96a6b8b
     if (hdr.toc_length_decompressed != strm.total_out) {
         cli_dbgmsg("TOC decompress length %" PRIu64 " does not match amount decompressed %lu\n",
                    hdr.toc_length_decompressed, strm.total_out);
288057e9
         toc[strm.total_out]         = '\0';
d96a6b8b
         hdr.toc_length_decompressed = strm.total_out;
     }
 
9b20264d
     /* cli_dbgmsg("cli_scanxar: TOC xml:\n%s\n", toc); */
     /* printf("cli_scanxar: TOC xml:\n%s\n", toc); */
     /* cli_dbgmsg("cli_scanxar: TOC end:\n"); */
     /* printf("cli_scanxar: TOC end:\n"); */
5e56b827
 
     /* scan the xml */
288057e9
     cli_dbgmsg("cli_scanxar: scanning xar TOC xml in memory.\n");
5e56b827
     rc = cli_mem_scandesc(toc, hdr.toc_length_decompressed, ctx);
     if (rc != CL_SUCCESS) {
048a88e6
         if (rc != CL_VIRUS || !SCAN_ALLMATCHES)
288057e9
             goto exit_toc;
5e56b827
     }
 
     /* make a file to leave if --leave-temps in effect */
288057e9
     if (ctx->engine->keeptmp) {
5e56b827
         if ((rc = cli_gentempfd(ctx->engine->tmpdir, &tmpname, &fd)) != CL_SUCCESS) {
96de30ae
             cli_dbgmsg("cli_scanxar: Can't create temporary file for TOC.\n");
5e56b827
             goto exit_toc;
         }
6c03dc5d
         if (cli_writen(fd, toc, hdr.toc_length_decompressed) == (size_t)-1) {
96de30ae
             cli_dbgmsg("cli_scanxar: cli_writen error writing TOC.\n");
5e56b827
             rc = CL_EWRITE;
             xar_cleanup_temp_file(ctx, fd, tmpname);
             goto exit_toc;
         }
50876732
         rc      = xar_cleanup_temp_file(ctx, fd, tmpname);
         tmpname = NULL;
5e56b827
         if (rc != CL_SUCCESS)
             goto exit_toc;
     }
 
d3e752ad
     reader = xmlReaderForMemory(toc, hdr.toc_length_decompressed, "noname.xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS);
5e56b827
     if (reader == NULL) {
96de30ae
         cli_dbgmsg("cli_scanxar: xmlReaderForMemory error for TOC\n");
5e56b827
         goto exit_toc;
     }
 
e2c74282
     rc = xar_scan_subdocuments(reader, ctx);
     if (rc != CL_SUCCESS) {
96de30ae
         cli_dbgmsg("xar_scan_subdocuments returns %i.\n", rc);
9d762a7a
         goto exit_reader;
e2c74282
     }
 
5e56b827
     /* Walk the TOC XML and extract files */
288057e9
     fd      = -1;
5e56b827
     tmpname = NULL;
e3f87685
     while (CL_SUCCESS == (rc = xar_get_toc_data_values(reader, &length, &offset, &size, &encoding,
                                                        &a_cksum, &a_hash, &e_cksum, &e_hash))) {
2599f40b
         int do_extract_cksum = 1;
288057e9
         unsigned char *blockp;
da6e06dd
         void *a_sc, *e_sc;
         void *a_mc, *e_mc;
288057e9
         char *expected;
5e56b827
 
         /* clean up temp file from previous loop iteration */
         if (fd > -1 && tmpname) {
50876732
             rc      = xar_cleanup_temp_file(ctx, fd, tmpname);
             tmpname = NULL;
5e56b827
             if (rc != CL_SUCCESS)
                 goto exit_reader;
         }
 
         at = offset + hdr.toc_length_compressed + hdr.size;
 
         if ((rc = cli_gentempfd(ctx->engine->tmpdir, &tmpname, &fd)) != CL_SUCCESS) {
96de30ae
             cli_dbgmsg("cli_scanxar: Can't generate temporary file.\n");
5e56b827
             goto exit_reader;
         }
 
d96a6b8b
         cli_dbgmsg("cli_scanxar: decompress into temp file:\n%s, size %zu,\n"
                    "from xar heap offset %zu length %zu\n",
9b20264d
                    tmpname, size, offset, length);
5e56b827
 
e3f87685
         a_hash_ctx = xar_hash_init(a_hash, &a_sc, &a_mc);
         e_hash_ctx = xar_hash_init(e_hash, &e_sc, &e_mc);
 
5e56b827
         switch (encoding) {
288057e9
             case CL_TYPE_GZ:
                 /* inflate gzip directly because file segments do not contain magic */
                 memset(&strm, 0, sizeof(strm));
                 if ((rc = inflateInit(&strm)) != Z_OK) {
                     cli_dbgmsg("cli_scanxar: InflateInit failed: %d\n", rc);
                     rc = CL_EFORMAT;
                     extract_errors++;
                     break;
5e56b827
                 }
e3f87685
 
288057e9
                 while ((size_t)at < map->len && (unsigned long)at < offset + hdr.toc_length_compressed + hdr.size + length) {
                     unsigned long avail_in;
                     void *next_in;
                     unsigned int bytes = MIN(map->len - at, map->pgsz);
                     bytes              = MIN(length, bytes);
                     if (!(strm.next_in = next_in = (void *)fmap_need_off_once(map, at, bytes))) {
                         cli_dbgmsg("cli_scanxar: Can't read %u bytes @ %lu.\n", bytes, (long unsigned)at);
5e56b827
                         inflateEnd(&strm);
288057e9
                         rc = CL_EREAD;
5e56b827
                         goto exit_tmpfile;
                     }
288057e9
                     at += bytes;
                     strm.avail_in = avail_in = bytes;
                     do {
                         int inf, outsize = 0;
                         unsigned char buff[FILEBUFF];
                         strm.avail_out = sizeof(buff);
                         strm.next_out  = buff;
                         inf            = inflate(&strm, Z_SYNC_FLUSH);
                         if (inf != Z_OK && inf != Z_STREAM_END && inf != Z_BUF_ERROR) {
                             cli_dbgmsg("cli_scanxar: inflate error %i %s.\n", inf, strm.msg ? strm.msg : "");
                             rc = CL_EFORMAT;
                             extract_errors++;
                             break;
                         }
 
                         bytes = sizeof(buff) - strm.avail_out;
 
                         if (e_hash_ctx != NULL)
                             xar_hash_update(e_hash_ctx, buff, bytes, e_hash);
 
6c03dc5d
                         if (cli_writen(fd, buff, bytes) == (size_t)-1) {
288057e9
                             cli_dbgmsg("cli_scanxar: cli_writen error file %s.\n", tmpname);
                             inflateEnd(&strm);
                             rc = CL_EWRITE;
                             goto exit_tmpfile;
                         }
                         outsize += sizeof(buff) - strm.avail_out;
                         if (cli_checklimits("cli_scanxar", ctx, outsize, 0, 0) != CL_CLEAN) {
                             break;
                         }
                         if (inf == Z_STREAM_END) {
                             break;
                         }
                     } while (strm.avail_out == 0);
 
                     if (rc != CL_SUCCESS)
5e56b827
                         break;
7f50a91c
 
288057e9
                     avail_in -= strm.avail_in;
                     if (a_hash_ctx != NULL)
                         xar_hash_update(a_hash_ctx, next_in, avail_in, a_hash);
                 }
7f50a91c
 
288057e9
                 inflateEnd(&strm);
                 break;
             case CL_TYPE_7Z:
 #define CLI_LZMA_OBUF_SIZE 1024 * 1024
 #define CLI_LZMA_HDR_SIZE LZMA_PROPS_SIZE + 8
 #define CLI_LZMA_IBUF_SIZE CLI_LZMA_OBUF_SIZE >> 2 /* estimated compression ratio 25% */
43d7f6f6
             {
5a5d38e4
                 struct CLI_LZMA lz;
d96a6b8b
                 unsigned long in_remaining = MIN(length, map->len - at);
288057e9
                 unsigned long out_size     = 0;
                 unsigned char *buff        = __lzma_wrap_alloc(NULL, CLI_LZMA_OBUF_SIZE);
f8708da8
                 int lret;
d96a6b8b
 
                 if (length > in_remaining)
                     length = in_remaining;
 
5a5d38e4
                 memset(&lz, 0, sizeof(lz));
9b20264d
                 if (buff == NULL) {
96de30ae
                     cli_dbgmsg("cli_scanxar: memory request for lzma decompression buffer fails.\n");
9b20264d
                     rc = CL_EMEM;
                     goto exit_tmpfile;
                 }
 
288057e9
                 blockp = (void *)fmap_need_off_once(map, at, CLI_LZMA_HDR_SIZE);
9b20264d
                 if (blockp == NULL) {
d78095c2
                     char errbuff[128];
                     cli_strerror(errno, errbuff, sizeof(errbuff));
7e64560c
                     cli_dbgmsg("cli_scanxar: Can't read %i bytes @ %zu, errno:%s.\n",
d96a6b8b
                                CLI_LZMA_HDR_SIZE, at, errbuff);
9b20264d
                     rc = CL_EREAD;
                     __lzma_wrap_free(NULL, buff);
                     goto exit_tmpfile;
                 }
 
288057e9
                 lz.next_in  = blockp;
9b20264d
                 lz.avail_in = CLI_LZMA_HDR_SIZE;
 
0965d7da
                 if (a_hash_ctx != NULL)
                     xar_hash_update(a_hash_ctx, blockp, CLI_LZMA_HDR_SIZE, a_hash);
e3f87685
 
f8708da8
                 lret = cli_LzmaInit(&lz, 0);
                 if (lret != LZMA_RESULT_OK) {
                     cli_dbgmsg("cli_scanxar: cli_LzmaInit() fails: %i.\n", lret);
9b20264d
                     rc = CL_EFORMAT;
                     __lzma_wrap_free(NULL, buff);
7f50a91c
                     extract_errors++;
                     break;
9b20264d
                 }
7f50a91c
 
9b20264d
                 at += CLI_LZMA_HDR_SIZE;
e3f87685
                 in_remaining -= CLI_LZMA_HDR_SIZE;
7e64560c
                 while (at < map->len && at < offset + (size_t)hdr.toc_length_compressed + (size_t)hdr.size + length) {
9b20264d
                     SizeT avail_in;
                     SizeT avail_out;
288057e9
                     void *next_in;
9b20264d
                     unsigned long in_consumed;
 
288057e9
                     lz.next_out  = buff;
9b20264d
                     lz.avail_out = CLI_LZMA_OBUF_SIZE;
43d7f6f6
                     lz.avail_in = avail_in = MIN(CLI_LZMA_IBUF_SIZE, in_remaining);
288057e9
                     lz.next_in = next_in = (void *)fmap_need_off_once(map, at, lz.avail_in);
9b20264d
                     if (lz.next_in == NULL) {
d78095c2
                         char errbuff[128];
                         cli_strerror(errno, errbuff, sizeof(errbuff));
7e64560c
                         cli_dbgmsg("cli_scanxar: Can't read %zu bytes @ %zu, errno: %s.\n",
d96a6b8b
                                    lz.avail_in, at, errbuff);
9b20264d
                         rc = CL_EREAD;
                         __lzma_wrap_free(NULL, buff);
                         cli_LzmaShutdown(&lz);
                         goto exit_tmpfile;
                     }
 
f8708da8
                     lret = cli_LzmaDecode(&lz);
                     if (lret != LZMA_RESULT_OK && lret != LZMA_STREAM_END) {
                         cli_dbgmsg("cli_scanxar: cli_LzmaDecode() fails: %i.\n", lret);
9b20264d
                         rc = CL_EFORMAT;
7f50a91c
                         extract_errors++;
                         break;
9b20264d
                     }
 
                     in_consumed = avail_in - lz.avail_in;
                     in_remaining -= in_consumed;
                     at += in_consumed;
                     avail_out = CLI_LZMA_OBUF_SIZE - lz.avail_out;
288057e9
 
3f058bd7
                     if (avail_out == 0)
                         cli_dbgmsg("cli_scanxar: cli_LzmaDecode() produces no output for "
059ca614
                                    "avail_in %llu, avail_out %llu.\n",
                                    (long long unsigned)avail_in, (long long unsigned)avail_out);
3f058bd7
 
0965d7da
                     if (a_hash_ctx != NULL)
288057e9
                         xar_hash_update(a_hash_ctx, next_in, in_consumed, a_hash);
0965d7da
                     if (e_hash_ctx != NULL)
                         xar_hash_update(e_hash_ctx, buff, avail_out, e_hash);
e3f87685
 
9b20264d
                     /* Write a decompressed block. */
3f058bd7
                     /* cli_dbgmsg("Writing %li bytes to LZMA decompress temp file, " */
                     /*            "consumed %li of %li available compressed bytes.\n", */
                     /*            avail_out, in_consumed, avail_in); */
7f50a91c
 
6c03dc5d
                     if (cli_writen(fd, buff, avail_out) == (size_t)-1) {
059ca614
                         cli_dbgmsg("cli_scanxar: cli_writen error writing lzma temp file for %llu bytes.\n",
                                    (long long unsigned)avail_out);
9b20264d
                         __lzma_wrap_free(NULL, buff);
                         cli_LzmaShutdown(&lz);
                         rc = CL_EWRITE;
                         goto exit_tmpfile;
                     }
7f50a91c
 
9b20264d
                     /* Check file size limitation. */
                     out_size += avail_out;
                     if (cli_checklimits("cli_scanxar", ctx, out_size, 0, 0) != CL_CLEAN) {
                         break;
                     }
7f50a91c
 
f8708da8
                     if (lret == LZMA_STREAM_END)
9b20264d
                         break;
                 }
 
                 cli_LzmaShutdown(&lz);
                 __lzma_wrap_free(NULL, buff);
288057e9
             } break;
             case CL_TYPE_ANY:
             default:
             case CL_TYPE_BZ:
             case CL_TYPE_XZ:
                 /* for uncompressed, bzip2, xz, and unknown, just pull the file, cli_magic_scandesc does the rest */
                 do_extract_cksum = 0;
                 {
                     size_t writelen = MIN(map->len - at, length);
d96a6b8b
 
288057e9
                     if (ctx->engine->maxfilesize)
                         writelen = MIN((size_t)(ctx->engine->maxfilesize), writelen);
 
                     if (!(blockp = (void *)fmap_need_off_once(map, at, writelen))) {
                         char errbuff[128];
                         cli_strerror(errno, errbuff, sizeof(errbuff));
                         cli_dbgmsg("cli_scanxar: Can't read %zu bytes @ %zu, errno:%s.\n",
                                    writelen, at, errbuff);
                         rc = CL_EREAD;
                         goto exit_tmpfile;
                     }
 
                     if (a_hash_ctx != NULL)
                         xar_hash_update(a_hash_ctx, blockp, writelen, a_hash);
 
6c03dc5d
                     if (cli_writen(fd, blockp, writelen) == (size_t)-1) {
288057e9
                         cli_dbgmsg("cli_scanxar: cli_writen error %zu bytes @ %zu.\n", writelen, at);
                         rc = CL_EWRITE;
                         goto exit_tmpfile;
                     }
                     /*break;*/
e3f87685
                 }
d96a6b8b
         } /* end of switch */
e3f87685
 
7f50a91c
         if (rc == CL_SUCCESS) {
0965d7da
             if (a_hash_ctx != NULL) {
                 xar_hash_final(a_hash_ctx, result, a_hash);
                 a_hash_ctx = NULL;
             } else {
                 cli_dbgmsg("cli_scanxar: archived-checksum missing.\n");
                 cksum_fails++;
             }
7f50a91c
             if (a_cksum != NULL) {
                 expected = cli_hex2str((char *)a_cksum);
                 if (xar_hash_check(a_hash, result, expected) != 0) {
0965d7da
                     cli_dbgmsg("cli_scanxar: archived-checksum mismatch.\n");
2599f40b
                     cksum_fails++;
                 } else {
288057e9
                     cli_dbgmsg("cli_scanxar: archived-checksum matched.\n");
2599f40b
                 }
                 free(expected);
5e56b827
             }
0965d7da
 
             if (e_hash_ctx != NULL) {
                 xar_hash_final(e_hash_ctx, result, e_hash);
                 e_hash_ctx = NULL;
             } else {
                 cli_dbgmsg("cli_scanxar: extracted-checksum(unarchived-checksum) missing.\n");
                 cksum_fails++;
             }
7f50a91c
             if (e_cksum != NULL) {
                 if (do_extract_cksum) {
                     expected = cli_hex2str((char *)e_cksum);
                     if (xar_hash_check(e_hash, result, expected) != 0) {
0965d7da
                         cli_dbgmsg("cli_scanxar: extracted-checksum mismatch.\n");
7f50a91c
                         cksum_fails++;
                     } else {
288057e9
                         cli_dbgmsg("cli_scanxar: extracted-checksum matched.\n");
7f50a91c
                     }
                     free(expected);
                 }
             }
288057e9
 
01eebc13
             rc = cli_magic_scandesc(fd, tmpname, ctx);
7f50a91c
             if (rc != CL_SUCCESS) {
                 if (rc == CL_VIRUS) {
                     cli_dbgmsg("cli_scanxar: Infected with %s\n", cli_get_last_virus(ctx));
048a88e6
                     if (!SCAN_ALLMATCHES)
7f50a91c
                         goto exit_tmpfile;
                 } else if (rc != CL_BREAK) {
                     cli_dbgmsg("cli_scanxar: cli_magic_scandesc error %i\n", rc);
5e56b827
                     goto exit_tmpfile;
7f50a91c
                 }
5e56b827
             }
         }
288057e9
 
7f50a91c
         if (a_cksum != NULL) {
             xmlFree(a_cksum);
             a_cksum = NULL;
         }
         if (e_cksum != NULL) {
             xmlFree(e_cksum);
             e_cksum = NULL;
         }
     }
5e56b827
 
288057e9
 exit_tmpfile:
5e56b827
     xar_cleanup_temp_file(ctx, fd, tmpname);
a6e7f612
     if (a_hash_ctx != NULL)
         xar_hash_final(a_hash_ctx, result, a_hash);
     if (e_hash_ctx != NULL)
         xar_hash_final(e_hash_ctx, result, e_hash);
288057e9
 
 exit_reader:
e3f87685
     if (a_cksum != NULL)
288057e9
         xmlFree(a_cksum);
e3f87685
     if (e_cksum != NULL)
         xmlFree(e_cksum);
e2c74282
     xmlTextReaderClose(reader);
5e56b827
     xmlFreeTextReader(reader);
 
288057e9
 exit_toc:
5e56b827
     free(toc);
     if (rc == CL_BREAK)
         rc = CL_SUCCESS;
 #else
9eb30a47
     cli_dbgmsg("cli_scanxar: can't scan xar files, need libxml2.\n");
5e56b827
 #endif
7f50a91c
     if (cksum_fails + extract_errors != 0) {
d96a6b8b
         cli_dbgmsg("cli_scanxar: %u checksum errors and %u extraction errors.\n",
288057e9
                    cksum_fails, extract_errors);
7f50a91c
     }
5e56b827
 
     return rc;
 }