libclamav/untar.c
a8b7c1dd
 /*
e1cbc270
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
  *  Copyright (C) 2007-2013 Sourcefire, Inc.
2023340a
  *
  *  Authors: Nigel Horne
6c03dc5d
  *
6289eda8
  *  Summary: Extract files compressed with TAR compression format.
6c03dc5d
  *
6289eda8
  *  Acknowledgements: ClamAV untar code is based on a public domain minitar utility
  *                    by Charles G. Waldman.
a8b7c1dd
  *
  *  This program is free software; you can redistribute it and/or modify
2023340a
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
a8b7c1dd
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
48b7b4a7
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
a8b7c1dd
  */
2023340a
 
dea34e7d
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
95fb46e5
 
 #include <stdio.h>
 #include <errno.h>
 #include <string.h>
288057e9
 #ifdef HAVE_UNISTD_H
95fb46e5
 #include <unistd.h>
45b28aba
 #endif
95fb46e5
 #include <sys/stat.h>
bb3fdd1b
 #include <fcntl.h>
288057e9
 #ifdef HAVE_SYS_PARAM_H
 #include <sys/param.h> /* for NAME_MAX */
45b28aba
 #endif
95fb46e5
 
 #include "clamav.h"
 #include "others.h"
bb3fdd1b
 #include "untar.h"
11b50569
 #include "mbox.h"
 #include "blob.h"
96522097
 #include "scanners.h"
570b1d00
 #include "matcher.h"
95fb46e5
 
0cf813f8
 #define TARHEADERSIZE 512
 /* BLOCKSIZE must be >= TARHEADERSIZE */
 #define BLOCKSIZE TARHEADERSIZE
eeeca3bd
 #define TARSIZEOFFSET 124
 #define TARSIZELEN 12
 #define TARCHECKSUMOFFSET 148
 #define TARCHECKSUMLEN 8
 #define TARFILETYPEOFFSET 156
95fb46e5
 
bb3fdd1b
 static int
 octal(const char *str)
95fb46e5
 {
288057e9
     int ret;
95fb46e5
 
288057e9
     if (sscanf(str, "%o", (unsigned int *)&ret) != 1)
         return -1;
     return ret;
95fb46e5
 }
 
eeeca3bd
 /**
  * Retrieve checksum values from a tar header block.
  * @param header Header data block, padded with zeroes to reach BLOCKSIZE
  * @return int value of checksum, -1 (from octal()) if bad value
  */
 static int
 getchecksum(const char *header)
 {
288057e9
     char ochecksum[TARCHECKSUMLEN + 1];
     int checksum = -1;
eeeca3bd
 
288057e9
     strncpy(ochecksum, header + TARCHECKSUMOFFSET, TARCHECKSUMLEN);
     ochecksum[TARCHECKSUMLEN] = '\0';
     checksum                  = octal(ochecksum);
     return checksum;
eeeca3bd
 }
 
 /**
  * Calculate checksum values for tar header blocks.
  * @param header Header data block, padded with zeroes to reach BLOCKSIZE
  * @param targetsum Check value to match (as int not octal!)
  * @return 0 if checksum matches target, -1 if not
  */
 static int
 testchecksum(const char *header, int targetsum)
 {
288057e9
     const unsigned char *posix;
     const signed char *legacy;
     int posix_sum = 0, legacy_sum = 0;
     int i;
 
     // targetsum -1 represents an error from octal()
     if (targetsum == -1) {
         return -1;
     }
 
     /* Build checksums. POSIX is unsigned; some legacy tars use signed. */
     posix  = (unsigned char *)header;
     legacy = (signed char *)header;
     for (i = 0; i < BLOCKSIZE; i++) {
         if ((i >= TARCHECKSUMOFFSET) && (i < TARCHECKSUMOFFSET + TARCHECKSUMLEN)) {
             /* Use ascii value of space in place of checksum value */
             posix_sum += 32;
             legacy_sum += 32;
         } else {
             posix_sum += posix[i];
             legacy_sum += legacy[i];
         }
     }
 
     if ((targetsum == posix_sum) || (targetsum == legacy_sum)) {
         return 0;
     }
     return -1;
eeeca3bd
 }
 
6c03dc5d
 cl_error_t cli_untar(const char *dir, unsigned int posix, cli_ctx *ctx)
95fb46e5
 {
6c03dc5d
     cl_error_t ret;
     size_t size         = 0;
     int size_int        = 0;
     int fout            = -1;
288057e9
     int in_block        = 0;
     int last_header_bad = 0;
     int limitnear       = 0;
     unsigned int files  = 0;
     char fullname[NAME_MAX + 1];
     size_t pos      = 0;
     size_t currsize = 0;
     char zero[BLOCKSIZE];
     unsigned int num_viruses = 0;
 
     cli_dbgmsg("In untar(%s)\n", dir);
     memset(zero, 0, sizeof(zero));
 
     for (;;) {
         const char *block;
         size_t nread;
 
         block = fmap_need_off_once_len(*ctx->fmap, pos, BLOCKSIZE, &nread);
         cli_dbgmsg("cli_untar: pos = %lu\n", (unsigned long)pos);
 
         if (!in_block && !nread)
             break;
 
         if (!nread)
             block = zero;
 
         if (!block) {
             if (fout >= 0)
                 close(fout);
             cli_errmsg("cli_untar: block read error\n");
             return CL_EREAD;
         }
         pos += nread;
 
         if (!in_block) {
             char type;
             int directory, skipEntry = 0;
             int checksum = -1;
             char magic[7], name[101], osize[TARSIZELEN + 1];
             currsize = 0;
 
             if (fout >= 0) {
                 lseek(fout, 0, SEEK_SET);
                 ret = cli_magic_scandesc(fout, fullname, ctx);
                 close(fout);
                 if (!ctx->engine->keeptmp)
                     if (cli_unlink(fullname)) return CL_EUNLINK;
                 if (ret == CL_VIRUS) {
                     if (!SCAN_ALLMATCHES)
                         return CL_VIRUS;
                     else
                         num_viruses++;
                 }
                 fout = -1;
             }
 
             if (block[0] == '\0') /* We're done */
                 break;
             if ((ret = cli_checklimits("cli_untar", ctx, 0, 0, 0)) != CL_CLEAN)
                 return ret;
 
             if (nread < TARHEADERSIZE) {
                 return CL_CLEAN;
             }
 
             checksum = getchecksum(block);
             cli_dbgmsg("cli_untar: Candidate checksum = %d, [%o in octal]\n", checksum, checksum);
             if (testchecksum(block, checksum) != 0) {
                 // If checksum is bad, dump and look for next header block
                 cli_dbgmsg("cli_untar: Invalid checksum in tar header. Skip to next...\n");
                 if (last_header_bad == 0) {
                     last_header_bad++;
                     cli_dbgmsg("cli_untar: Invalid checksum found inside archive!\n");
                 }
                 continue;
             } else {
                 last_header_bad = 0;
                 cli_dbgmsg("cli_untar: Checksum %d is valid.\n", checksum);
             }
 
             if (posix) {
                 strncpy(magic, block + 257, 5);
                 magic[5] = '\0';
                 if (strcmp(magic, "ustar") != 0) {
                     cli_dbgmsg("cli_untar: Incorrect magic string '%s' in tar header\n", magic);
                     return CL_EFORMAT;
                 }
             }
 
             type = block[TARFILETYPEOFFSET];
 
             switch (type) {
                 default:
                     cli_dbgmsg("cli_untar: unknown type flag %c\n", type);
                 case '0':  /* plain file */
                 case '\0': /* plain file */
                 case '7':  /* contiguous file */
                 case 'M':  /* continuation of a file from another volume; might as well scan it. */
                     files++;
                     directory = 0;
                     break;
                 case '1': /* Link to already archived file */
                 case '5': /* directory */
                 case '2': /* sym link */
                 case '3': /* char device */
                 case '4': /* block device */
                 case '6': /* fifo special */
                 case 'V': /* Volume header */
                     directory = 1;
                     break;
                 case 'K':
                 case 'L':
                     /* GNU extension - ././@LongLink
d2888b89
 					 * Discard the blocks with the extended filename,
 					 * the last header will contain parts of it anyway
 					 */
288057e9
                 case 'N': /* Old GNU format way of storing long filenames. */
                 case 'A': /* Solaris ACL */
                 case 'E': /* Solaris Extended attribute s*/
                 case 'I': /* Inode only */
                 case 'g': /* Global extended header */
                 case 'x': /* Extended attributes */
                 case 'X': /* Extended attributes (POSIX) */
                     directory = 0;
                     skipEntry = 1;
                     break;
             }
 
             if (directory) {
                 in_block = 0;
                 continue;
             }
 
             strncpy(osize, block + TARSIZEOFFSET, TARSIZELEN);
             osize[TARSIZELEN] = '\0';
6c03dc5d
             size_int          = octal(osize);
             if (size_int < 0) {
288057e9
                 cli_dbgmsg("cli_untar: Invalid size in tar header\n");
                 skipEntry++;
             } else {
6c03dc5d
                 size = (size_t)size_int;
                 cli_dbgmsg("cli_untar: size = %zu\n", size);
288057e9
                 ret = cli_checklimits("cli_untar", ctx, size, 0, 0);
                 switch (ret) {
                     case CL_EMAXFILES: // Scan no more files
                         skipEntry++;
                         limitnear = 0;
                         break;
                     case CL_EMAXSIZE: // Either single file limit or total byte limit would be exceeded
                         cli_dbgmsg("cli_untar: would exceed limit, will try up to max");
                         limitnear = 1;
                         break;
                     default: // Ok based on reported content size
                         limitnear = 0;
                         break;
                 }
             }
 
             if (skipEntry) {
                 const int nskip = (size % BLOCKSIZE || !size) ? size + BLOCKSIZE - (size % BLOCKSIZE) : size;
 
                 if (nskip < 0) {
                     cli_dbgmsg("cli_untar: got negative skip size, giving up\n");
                     return CL_CLEAN;
                 }
                 cli_dbgmsg("cli_untar: skipping entry\n");
                 pos += nskip;
                 continue;
             }
 
             strncpy(name, block, 100);
             name[100] = '\0';
             if (cli_matchmeta(ctx, name, size, size, 0, files, 0, NULL) == CL_VIRUS) {
                 if (!SCAN_ALLMATCHES)
                     return CL_VIRUS;
                 else
                     num_viruses++;
             }
 
             snprintf(fullname, sizeof(fullname) - 1, "%s" PATHSEP "tar%02u", dir, files);
             fullname[sizeof(fullname) - 1] = '\0';
             fout                           = open(fullname, O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_BINARY, 0600);
 
             if (fout < 0) {
                 char err[128];
                 cli_errmsg("cli_untar: Can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err)));
                 return CL_ETMPFILE;
             }
 
             cli_dbgmsg("cli_untar: extracting to %s\n", fullname);
 
             in_block = 1;
         } else { /* write or continue writing file contents */
6c03dc5d
             size_t nbytes, nwritten;
288057e9
             int skipwrite = 0;
             char err[128];
 
6c03dc5d
             nbytes = (size > 512) ? 512 : size;
             if (nread && (nread < nbytes))
288057e9
                 nbytes = nread;
 
             if (limitnear > 0) {
                 currsize += nbytes;
                 cli_dbgmsg("cli_untar: Approaching limit...\n");
                 if (cli_checklimits("cli_untar", ctx, (unsigned long)currsize, 0, 0) != CL_SUCCESS) {
                     // Limit would be exceeded by this file, suppress writing beyond limit
                     // Need to keep reading to get to end of file chunk
                     skipwrite++;
                 }
             }
 
             if (skipwrite == 0) {
6c03dc5d
                 nwritten = cli_writen(fout, block, nbytes);
288057e9
 
                 if (nwritten != nbytes) {
6c03dc5d
                     cli_errmsg("cli_untar: only wrote %zu bytes to file %s (out of disc space?): %s\n",
288057e9
                                nwritten, fullname, cli_strerror(errno, err, sizeof(err)));
                     close(fout);
                     return CL_EWRITE;
                 }
             }
6c03dc5d
             if (nbytes > size) {
                 cli_warnmsg("cli_untar: More bytes written than requested!\n");
                 size = 0;
             } else {
                 size -= nbytes;
             }
288057e9
             if ((size != 0) && (nread == 0)) {
                 // Truncated tar file, so end file content like tar behavior
                 cli_dbgmsg("cli_untar: No bytes read! Forcing end of file content.\n");
                 size = 0;
             }
a91f6d95
         }
288057e9
         if (size == 0)
             in_block = 0;
     }
     if (fout >= 0) {
         lseek(fout, 0, SEEK_SET);
         ret = cli_magic_scandesc(fout, fullname, ctx);
         close(fout);
         if (!ctx->engine->keeptmp)
             if (cli_unlink(fullname)) return CL_EUNLINK;
         if (ret == CL_VIRUS)
             return CL_VIRUS;
     }
     if (num_viruses)
         return CL_VIRUS;
     return CL_CLEAN;
95fb46e5
 }