libclamav/unzip.c
0968c257
 /*
  *  Copyright (C) 2003 - 2006 Tomasz Kojm <tkojm@clamav.net>
  *
  *  The code of this module was based on zziplib 0.12.83:
  *  (c) 1999 - 2002 Guido Draheim <guidod@gmx.de>, published under
  *  the Lesser GNU General Public License
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
  */
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <zlib.h>
 #include <sys/types.h>
 #include <sys/stat.h>
75333e2a
 #ifdef HAVE_UNISTD_H
0968c257
 #include <unistd.h>
75333e2a
 #endif
0968c257
 #include <fcntl.h>
 
 #include "clamav.h"
 #include "others.h"
 #include "unzip.h"
 
 #define EC32(x) le32_to_host(x) /* Convert little endian to host */
 #define EC16(x) le16_to_host(x)
 
 #define __sizeof(X) ((ssize_t)(sizeof(X)))
 
027e3a84
 #define ZIPBUFSIZ   1024
0968c257
 #define ZIP32K	    32768
 
 inline static void __fixup_rootseek(off_t offset_of_trailer, struct zip_disk_trailer *trailer)
 {
     if((off_t) EC32(trailer->z_rootseek) >
 	offset_of_trailer - (off_t) EC32(trailer->z_rootsize) &&
 	offset_of_trailer > (off_t) EC32(trailer->z_rootsize))
 	    trailer->z_rootseek = (uint32_t) (offset_of_trailer -  EC32(trailer->z_rootsize)); 
 }
 
027e3a84
 int __zip_find_disk_trailer(int fd, off_t filesize, struct zip_disk_trailer *trailer, off_t *start)
0968c257
 {
 	char *buf, *end, *tail;
027e3a84
 	off_t offset = 0, bufsize;
 	struct zip_root_dirent dirent;
 	uint32_t u_rootseek, shift = 0;
 	int i;
0968c257
 
 
     if(!trailer) {
 	cli_errmsg("Unzip: __zip_find_disk_trailer: trailer == NULL\n");
 	return CL_ENULLARG;
     }
 
     if(filesize < __sizeof(struct zip_disk_trailer)) {
 	cli_errmsg("Unzip: __zip_find_disk_trailer: File too short\n");
 	return CL_EFORMAT;
     }
 
027e3a84
     if(!(buf = cli_malloc(ZIPBUFSIZ)))
0968c257
 	return CL_EMEM;
 
     offset = filesize;
     while(1) {
 
027e3a84
 	if(offset <= 0) {
 	     cli_dbgmsg("Unzip: __zip_find_disk_trailer: Central directory not found\n");
0968c257
 	     free(buf);
 	     return CL_EFORMAT;
 	}
 
027e3a84
 	if(offset >= ZIPBUFSIZ) {
 	    if(offset == filesize)
 		offset -= ZIPBUFSIZ;
 	    else
33c7c474
 		offset -= ZIPBUFSIZ - sizeof(struct zip_disk_trailer);
0968c257
 
027e3a84
 	    bufsize = ZIPBUFSIZ;
0968c257
 	} else {
027e3a84
 	    if(filesize < ZIPBUFSIZ)
 		bufsize = offset;
 	    else
 		bufsize = ZIPBUFSIZ;
0968c257
 
027e3a84
 	    offset = 0;
 	}
0968c257
 
         if(lseek(fd, offset, SEEK_SET) < 0) {
 	    cli_errmsg("Unzip: __zip_find_disk_trailer: Can't lseek descriptor %d\n", fd);
 	    free(buf);
 	    return CL_EIO;
 	}
 
6cbdb5c5
         if(cli_readn(fd, buf, (size_t) bufsize) < (ssize_t) bufsize) {
027e3a84
 	    cli_errmsg("Unzip: __zip_find_disk_trailer: Can't read %d bytes\n", bufsize);
0968c257
 	    free(buf);
 	    return CL_EIO;
 	}
 
 	end = buf + bufsize;
 	for(tail = end - 1; tail >= buf; tail--) {
 	    if((*tail == 'P') && (end - tail >= __sizeof(struct zip_disk_trailer) - 2) && cli_readint32(tail) == ZIP_DISK_TRAILER_MAGIC) {
 		if(end - tail >= __sizeof(struct zip_disk_trailer)) {
 		    memcpy(trailer, tail, sizeof(struct zip_disk_trailer)); 
 		} else {
 		    memcpy(trailer, tail, sizeof(struct zip_disk_trailer) - 2);
 		    trailer->z_comment = 0; 
 		}
 		__fixup_rootseek(offset + tail - buf, trailer);
027e3a84
 
 		u_rootseek = EC32(trailer->z_rootseek);
e2b5770b
 		if(u_rootseek > (uint32_t) filesize) {
027e3a84
 		    cli_dbgmsg("Unzip: __zip_find_disk_trailer: u_rootseek > filesize, continue search\n");
 		    continue;
 		}
 
 		for(i = 0; i < 2; i++) {
e2b5770b
 		    if(u_rootseek + shift + sizeof(dirent) < (uint32_t) filesize) {
027e3a84
 			if(lseek(fd, u_rootseek + shift, SEEK_SET) < 0) {
 			    cli_errmsg("Unzip: __zip_find_disk_trailer: Can't lseek descriptor %d\n", fd);
 			    free(buf);
 			    return CL_EIO;
 			}
 
6cbdb5c5
 			if(cli_readn(fd, &dirent, sizeof(dirent)) < __sizeof(dirent)) {
027e3a84
 			    cli_errmsg("Unzip: __zip_find_disk_trailer: Can't read %d bytes\n", bufsize);
 			    free(buf);
 			    return CL_EIO;
 			}
 
 			if(EC32(dirent.z_magic) == ZIP_ROOT_DIRENT_MAGIC) {
 			    cli_dbgmsg("Unzip: __zip_find_disk_trailer: found file header at %u, shift %u\n", u_rootseek + shift, shift);
 			    free(buf);
 			    *start = shift;
 			    return CL_SUCCESS;
 			}
 
 			shift = *start;
 		    }
 		}
0968c257
 	    }
 	}
     }
 
     /* this should never be reached */
     free(buf);
     return CL_EFORMAT;
 }
 
 int __zip_parse_root_directory(int fd, struct zip_disk_trailer *trailer, zip_dir_hdr **hdr_return, off_t start)
 {
 	struct zip_root_dirent dirent, *d;
124e4d5f
 	zip_dir_hdr *hdr, *hdr0, *prev_hdr;
0968c257
 	uint16_t *p_reclen = NULL, entries;
e2b5770b
 	uint32_t offset;
0968c257
 	struct stat sb;
 	uint16_t u_entries  = EC16(trailer->z_entries);   
 	uint32_t u_rootsize = EC32(trailer->z_rootsize);  
 	uint32_t u_rootseek = EC32(trailer->z_rootseek) + start;
124e4d5f
 	uint16_t u_extras, u_comment, u_namlen, u_flags;
6219456d
 	unsigned int bfcnt;
0367831a
 	char *pt;
0968c257
 
 
     if(fstat(fd, &sb) == -1) {
 	cli_errmsg("Unzip: __zip_parse_root_directory: Can't fstat file descriptor %d\n", fd);
 	return CL_EIO;
     }
 
6219456d
     if(!u_entries) {
 	cli_errmsg("Unzip: __zip_parse_root_directory: File contains no entries\n");
 	return CL_EFORMAT;
     }
 
e2b5770b
     if(u_rootsize > (uint32_t) sb.st_size) {
0968c257
 	cli_errmsg("Unzip: __zip_parse_root_directory: Incorrect root size\n");
 	return CL_EFORMAT;
     }
 
     hdr0 = (zip_dir_hdr*) cli_malloc(u_rootsize);
     if (!hdr0) 
         return CL_EMEM;
 
     hdr = hdr0;
 
     for(entries = u_entries, offset = 0; entries > 0; entries--) {
124e4d5f
 
0968c257
 	if(lseek(fd, u_rootseek + offset, SEEK_SET) < 0) {
 	    free(hdr0);
 	    cli_errmsg("Unzip: __zip_parse_root_directory: Can't lseek descriptor %d\n", fd);
             return CL_EIO;
 	}
 
6cbdb5c5
         if(cli_readn(fd, &dirent, sizeof(dirent)) < __sizeof(dirent)) {
0968c257
 	    if(entries != u_entries) {
 		entries = 0;
 		break;
 	    } else {
 		free(hdr0);
 		cli_dbgmsg("Unzip: __zip_parse_root_directory: Can't read %d bytes\n", sizeof(dirent));
 		return CL_EIO;
 	    }
 	}
         d = &dirent;
 
 	if(offset + sizeof(struct zip_root_dirent) > u_rootsize) {
 	    cli_dbgmsg("Unzip: __zip_parse_root_directory: Entry %d outside of root directory\n", entries);
 	    break;
 	}
 
         u_extras  = EC16(d->z_extras); 
         u_comment = EC16(d->z_comment); 
6cbdb5c5
 
0968c257
         u_namlen  = EC16(d->z_namlen); 
6cbdb5c5
 	if(u_namlen > 1024) {
 	    cli_dbgmsg("Unzip: __zip_parse_root_directory: Entry %d name too long\n", entries);
 	    break;
 	}
 
0968c257
 	u_flags   = EC16(d->z_flags);
 
         hdr->d_crc32 = EC32(d->z_crc32);
         hdr->d_csize = EC32(d->z_csize); 
         hdr->d_usize = EC32(d->z_usize); 
         hdr->d_off   = EC32(d->z_off) + start;
 
         hdr->d_compr = EC16(d->z_compr);
6219456d
 
 	bfcnt = 0;
0968c257
 	if(!hdr->d_compr && hdr->d_csize != hdr->d_usize) {
 	    cli_warnmsg("Unzip: __zip_parse_root_directory: File claims to be stored but csize != usize\n");
6219456d
 	    cli_dbgmsg("Unzip: __zip_parse_root_directory: Also checking for method 'deflated'\n");
 	    hdr->d_bf[bfcnt] = ZIP_METHOD_DEFLATED;
 	    bfcnt++;
0968c257
 	} else if(hdr->d_compr && hdr->d_csize == hdr->d_usize) {
 	    cli_dbgmsg("Unzip: __zip_parse_root_directory: File claims to be deflated but csize == usize\n");
6219456d
 	    cli_dbgmsg("Unzip: __zip_parse_root_directory: Also checking for method 'stored'\n");
 	    hdr->d_bf[bfcnt] = ZIP_METHOD_STORED;
 	    bfcnt++;
0968c257
 	}
6219456d
 	hdr->d_bf[bfcnt] = -1;
0968c257
 
 	hdr->d_flags = u_flags;
 
 	if(offset + sizeof(struct zip_root_dirent) + u_namlen > u_rootsize) {
 	    cli_dbgmsg("Unzip: __zip_parse_root_directory: Name of entry %d outside of root directory\n", entries);
 	    break;
 	}
 
6cbdb5c5
 	if(cli_readn(fd, hdr->d_name, u_namlen) != u_namlen) {
 	    cli_dbgmsg("Unzip: __zip_parse_root_directory: Can't read name of entry %d\n", entries);
 	    break;
 	}
 
0968c257
         hdr->d_name[u_namlen] = '\0'; 
         hdr->d_namlen = u_namlen;
 
         offset += sizeof(struct zip_root_dirent) + u_namlen + u_extras + u_comment;
 
         if(offset > u_rootsize) {
 	    cli_dbgmsg("Unzip: __zip_parse_root_directory: End of entry %d outside of root directory\n", entries);
 	    break;
 	}
 
0367831a
 	pt = (char *) hdr + sizeof(zip_dir_hdr) + u_namlen + 1;
 	pt += ((long) pt) & 1;
 	pt += ((long) pt) & 2;
 	hdr->d_reclen = (uint16_t) (pt - (char *) hdr);
0968c257
 	p_reclen = &hdr->d_reclen;
0367831a
 
124e4d5f
 	prev_hdr = hdr;
0968c257
 	hdr = (zip_dir_hdr *) ((char *) hdr + hdr->d_reclen);
     }
 
     if(p_reclen) {
 	*p_reclen = 0;
 	if(hdr_return) 
 	    *hdr_return = hdr0;
     } else {
 	free(hdr0);
     }
 
     return entries ? CL_EFORMAT : CL_SUCCESS;
 }
 
 int zip_dir_close(zip_dir *dir)
 {
     if(dir->hdr0)
 	free(dir->hdr0);
 
     if(dir->cache.fp)
 	free(dir->cache.fp);
 
     if(dir->cache.buf32k)
 	free(dir->cache.buf32k);
 
     free(dir);
     return CL_SUCCESS;
 }
 
 static int __zip_dir_parse(zip_dir *dir, off_t start)
 {
 	int ret;
 	struct zip_disk_trailer trailer;
 	struct stat sb;
 
 
     if(fstat(dir->fd, &sb) == -1) {
 	cli_errmsg("Unzip: __zip_dir_parse: Can't fstat file descriptor %d\n", dir->fd);
 	return CL_EIO;
     }
 
027e3a84
     if((ret = __zip_find_disk_trailer(dir->fd, sb.st_size, &trailer, &start)))
0968c257
 	return ret;
 
     if((ret = __zip_parse_root_directory(dir->fd, &trailer, &dir->hdr0, start)))
 	return ret;
 
     return CL_SUCCESS;
 }
 
 zip_dir *zip_dir_open(int fd, off_t start, int *errcode_p)
 {
 	int ret;
 	zip_dir * dir;
 
 
     dir = (zip_dir *) cli_calloc(1, sizeof(zip_dir));
     if(!dir) {
 	if(errcode_p)
 	    *errcode_p = CL_EMEM;
 	return NULL;
     }
 
     if(start) {
 	if(lseek(fd, start, SEEK_SET) == -1) {
 	    cli_errmsg("Unzip: zip_dir_open: Can't lseek descriptor %d\n", fd);
 	    if(errcode_p)
 		*errcode_p = CL_EIO;
 	    return NULL;
 	}
     }
 
     dir->fd = fd;
 
     if((ret = __zip_dir_parse(dir, start))) {
 	zip_dir_close(dir);
 	if(errcode_p)
 	    *errcode_p = CL_EMEM;
 	return NULL;
     }
 
     dir->hdr = dir->hdr0;
 
     if(errcode_p)
 	*errcode_p = CL_SUCCESS;
 
     return dir;
 }
 
 int zip_dir_read(zip_dir *dir, zip_dirent *d)
 {
 
     if(!dir || !dir->hdr || !d)
 	return 0;
 
     d->d_compr = dir->hdr->d_compr;
     d->d_csize = dir->hdr->d_csize;
     d->st_size = dir->hdr->d_usize;
     d->d_name  = dir->hdr->d_name;
     d->d_flags = dir->hdr->d_flags;
     d->d_off   = dir->hdr->d_off;
     d->d_crc32 = dir->hdr->d_crc32;
 
     if(!dir->hdr->d_reclen)
 	dir->hdr = NULL;
     else
 	dir->hdr = (zip_dir_hdr *) ((char *) dir->hdr + dir->hdr->d_reclen);
 
     return 1;
 }
 
 int zip_file_close(zip_file *fp)
 {
7c6380b0
 	zip_dir *dir;
 
 
     if(!fp) {
 	cli_errmsg("Unzip: zip_file_close: fp == NULL\n");
 	return CL_ENULLARG;
     }
0968c257
 
     if(fp->method)
         inflateEnd(&fp->d_stream);
 
7c6380b0
     dir = fp->dir;
0968c257
     if(fp->buf32k) {
         if(!dir->cache.buf32k)
 	    dir->cache.buf32k = fp->buf32k;
         else
 	    free(fp->buf32k);
     }
 
     memset(fp, 0, sizeof(zip_file)); 
 
     if(!dir->cache.fp)
 	dir->cache.fp = fp;
     else
 	free(fp);
 
     return CL_SUCCESS;
 }
 
 static int __zip_inflate_init(zip_file *fp, zip_dir_hdr *hdr)
 {
 	int ret;
 	fp->method = hdr->d_compr;
 	fp->restlen = hdr->d_usize;
 
 
     if(fp->method) {
         memset(&fp->d_stream, 0, sizeof(fp->d_stream));
 
         ret = inflateInit2(&fp->d_stream, -MAX_WBITS);
 
         if(ret != Z_OK) {
 	    cli_errmsg("Unzip: __zip_inflate_init: inflateInit2 failed\n");
 	    return CL_EZIP;
 	}
 
         fp->crestlen = hdr->d_csize;
     }
 
     return CL_SUCCESS;
 }
 
 zip_file *zip_file_open(zip_dir *dir, const char *name, int d_off)
 {
 	int ret;
 	zip_file *fp = NULL;
 	zip_dir_hdr *hdr = dir->hdr0;
         const char *hdr_name;
         ssize_t dataoff;
         struct zip_file_header *hp;
  
 
6081f695
     if(!dir || dir->fd < 0) {
0968c257
 	cli_errmsg("Unzip: zip_file_open: dir == NULL || dir->fd <= 0\n");
 	return NULL;
     }
 
     if(!hdr) {
 	cli_errmsg("Unzip: zip_file_open: hdr == NULL\n");
 	dir->errcode = CL_ENULLARG;
 	return NULL;
     }
 
     while(1) {
 	hdr_name = hdr->d_name;
 
e2b5770b
         if(!strcmp(hdr_name, name) && (d_off == -1 || (uint32_t) d_off == hdr->d_off)) {
0968c257
 	    switch (hdr->d_compr) {
 		case ZIP_METHOD_STORED:
 		case ZIP_METHOD_DEFLATED:
 		    break;
 
 		case ZIP_METHOD_SHRUNK:
 		case ZIP_METHOD_REDUCEDx1:
 		case ZIP_METHOD_REDUCEDx2:
 		case ZIP_METHOD_REDUCEDx3:
 		case ZIP_METHOD_REDUCEDx4:
 		case ZIP_METHOD_IMPLODED:
 		case ZIP_METHOD_TOKENIZED:
 		case ZIP_METHOD_DEFLATED64:
 		case ZIP_METHOD_IMPLODED_DCL:
 		case ZIP_METHOD_BZIP2:
 		case ZIP_METHOD_AES:
fe24ab45
 		    cli_dbgmsg("Unzip: zip_file_open: Not supported compression method (%d)\n", hdr->d_compr);
0968c257
 		    dir->errcode = CL_ESUPPORT;
 		    return NULL;
 
 		default:
fe24ab45
 		    cli_errmsg("Unzip: zip_file_read: Unknown compression method (%d)\n", hdr->d_compr);
0968c257
 		    dir->errcode = CL_EFORMAT;
 		    return NULL;
             }
 
             if(dir->cache.fp) {
                 fp = dir->cache.fp;
 		dir->cache.fp = NULL;
             } else {
                 fp = (zip_file *) cli_calloc(1, sizeof(zip_file));
                 if(!fp) {
 		    dir->errcode = CL_EMEM;
 		    return NULL;
 		}
             }
 
             fp->dir = dir;
 
             if(dir->cache.buf32k) {
 		fp->buf32k = dir->cache.buf32k;
 		dir->cache.buf32k = NULL;
 	    } else {
                 fp->buf32k = (char *) cli_malloc(ZIP32K);
                 if(!fp->buf32k) {
 		    dir->errcode = CL_EMEM;
 		    zip_file_close(fp);
 		    return NULL;
 		}
             }
 
             if(lseek(dir->fd, hdr->d_off, SEEK_SET) < 0) {
 		cli_errmsg("Unzip: zip_file_open: Can't lseek descriptor %d\n", dir->fd);
 		dir->errcode = CL_EIO;
 		zip_file_close(fp);
 		return NULL;
 	    }
 
 	    hp = (void *) fp->buf32k;
6cbdb5c5
 	    dataoff = cli_readn(dir->fd, (void *) hp, sizeof(struct zip_file_header));
0968c257
 
 	    if(dataoff < __sizeof(struct zip_file_header)) {
 		cli_errmsg("Unzip: zip_file_open: Can't read zip header (only read %d bytes)\n", dataoff);
 		dir->errcode = CL_EIO;
 		zip_file_close(fp);
 		return NULL;
 	    }
 
 	    dataoff = EC16(hp->z_namlen) + EC16(hp->z_extras);
 
             if(lseek(dir->fd, dataoff, SEEK_CUR) < 0) {
 		cli_errmsg("Unzip: zip_file_open: Can't lseek descriptor %d\n", dir->fd);
 		dir->errcode = CL_EIO;
 		zip_file_close(fp);
 		return NULL;
 	    }
 
             fp->usize = hdr->d_usize;
             fp->csize = hdr->d_csize;
 
6219456d
 	    fp->bf = hdr->d_bf;
 
0968c257
             ret = __zip_inflate_init(fp, hdr);
 
 	    if(ret) {
 		dir->errcode = CL_EIO;
 		zip_file_close(fp);
 		return NULL;
 	    }
 
             return fp;
 
         } else {
             if(!hdr->d_reclen)
                 break;
 
             hdr = (zip_dir_hdr *) ((char *) hdr + hdr->d_reclen);
         }
     }
 
     dir->errcode = CL_EOPEN;
     if(fp)
 	zip_file_close(fp);
     return NULL;
 }
 
 ssize_t zip_file_read(zip_file *fp, char *buf, size_t len)
 {
 	zip_dir *dir; 
 	size_t l;
 	ssize_t bread;
 
 
     if(!fp || !fp->dir) {
 	cli_errmsg("Unzip: zip_file_read: fp == NULL || fp->dir == NULL\n");
 	return -1;
     }
 
     dir = fp->dir;
     l = fp->restlen > len ? len : fp->restlen;
     if(!fp->restlen)
         return 0;
 
     switch(fp->method) {
 
 	case ZIP_METHOD_STORED:
6cbdb5c5
 	    bread = cli_readn(dir->fd, buf, l);
0968c257
 	    if(bread > 0) {
 		fp->restlen -= bread;
 	    } else if(bread < 0) {
 		cli_errmsg("Unzip: zip_file_read: Can't read %d bytes\n", l);
 		dir->errcode = CL_EIO;
 	    }
 	    return bread;
 
 	case ZIP_METHOD_DEFLATED:
 	    fp->d_stream.avail_out = l;
 	    fp->d_stream.next_out = (unsigned char *) buf;
 	    do {
 		int ret;
 		size_t startlen;
 
 		if(fp->crestlen > 0 && fp->d_stream.avail_in == 0) {
 		    size_t cl = (fp->crestlen < ZIP32K ? fp->crestlen : ZIP32K);
6cbdb5c5
 		    ssize_t i = cli_readn(dir->fd, fp->buf32k, cl);
0968c257
 		    if(i <= 0) {
 			dir->errcode = CL_EIO;
 			cli_errmsg("Unzip: zip_file_read: Can't read %d bytes (read %d)\n", cl, i);
 			return -1;
 		    }
 		    fp->crestlen -= i;
 		    fp->d_stream.avail_in = i;
 		    fp->d_stream.next_in = (unsigned char *) fp->buf32k;
 		}
 
 		startlen = fp->d_stream.total_out;
 		ret = inflate(&fp->d_stream, Z_NO_FLUSH);
 
 		if(ret == Z_STREAM_END) {
 		    fp->restlen = 0;
 		} else {
 		    if(ret == Z_OK) {
 			fp->restlen -= (fp->d_stream.total_out - startlen);
 		    } else {
 			dir->errcode = CL_EZIP;
 			return -1;
 		    }
 		}
 	    } while(fp->restlen && fp->d_stream.avail_out);
 
 	    return l - fp->d_stream.avail_out;
 
 	default:
 	    /* this should not be reached because of the previous check in
 	     * zip_file_open
 	     */
 	    cli_errmsg("Unzip: zip_file_read: Unknown compression method (%d)\n", fp->method);
 	    dir->errcode = CL_EZIP;
 	    return -1;
     }
 }