/* * Copyright (C) 2005 Nigel Horne <njh@bandsman.co.uk> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ static char const rcsid[] = "$Id: pdf.c,v 1.14 2005/05/21 22:04:22 nigelhorne Exp $"; #if HAVE_CONFIG_H #include "clamav-config.h" #endif #include "clamav.h" #if HAVE_MMAP #if HAVE_SYS_MMAN_H #include <sys/mman.h> #else /* HAVE_SYS_MMAN_H */ #undef HAVE_MMAP #endif #endif #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> #include <ctype.h> #include <string.h> #include <fcntl.h> #include <stdlib.h> #include <limits.h> #include <errno.h> #ifdef HAVE_ZLIB_H #include <zlib.h> #endif #include "table.h" #include "mbox.h" #include "others.h" #include "blob.h" #ifndef MIN #define MIN(a, b) (((a) < (b)) ? (a) : (b)) #endif static int flatedecode(const unsigned char *buf, size_t len, int fout); static int ascii85decode(const char *buf, size_t len, unsigned char *output); int cli_pdf(const char *dir, int desc) { #ifndef HAVE_MMAP cli_warnmsg("File not decoded - PDF decoding needs mmap() (for now)\n"); return CL_CLEAN; #else struct stat statb; off_t size; long bytesleft; char *buf; const char *p, *q; int rc = CL_CLEAN; cli_dbgmsg("in cli_pdf()\n"); if(fstat(desc, &statb) < 0) return CL_EOPEN; size = (size_t)statb.st_size; if(size == 0) return CL_CLEAN; p = buf = mmap(NULL, size, PROT_READ, MAP_SHARED, desc, 0); if(buf == MAP_FAILED) return CL_EMEM; bytesleft = (long)size; cli_dbgmsg("cli_pdf: scanning %lu bytes\n", bytesleft); while((q = cli_pmemstr(p, bytesleft, "obj", 3)) != NULL) { int length, is_ascii85decode, is_flatedecode, fout; const char *s, *t, *u, *obj; size_t objlen; char fullname[NAME_MAX + 1]; bytesleft -= (q - p) + 3; obj = p = &q[3]; q = cli_pmemstr(p, bytesleft, "endobj", 6); if(q == NULL) { cli_dbgmsg("No matching endobj"); break; } bytesleft -= (q - p) + 6; p = &q[6]; objlen = (size_t)(q - obj); t = cli_pmemstr(obj, objlen, "stream\n", 7); if(t == NULL) { t = cli_pmemstr(obj, objlen, "stream\r", 7); if(t == NULL) continue; } length = is_ascii85decode = is_flatedecode = 0; for(s = obj; s < t; s++) if(*s == '/') { if(strncmp(++s, "Length ", 7) == 0) { s += 7; length = atoi(s); while(isdigit(*s)) s++; } else if((strncmp(s, "FlateDecode ", 12) == 0) || (strncmp(s, "FlateDecode\n", 12) == 0)) { is_flatedecode = 1; s += 12; } else if((strncmp(s, "ASCII85Decode ", 12) == 0) || (strncmp(s, "ASCII85Decode\n", 12) == 0)) { is_ascii85decode = 1; s += 12; } } t += 7; u = cli_pmemstr(t, objlen - 7, "endstream\n", 10); if(u == NULL) { const char *v = u; u = cli_pmemstr(t, objlen - 7, "endstream\r", 10); if(u == NULL) { cli_dbgmsg("No endstream"); break; } v = u; while(strchr("\r\n", *--v)) --u; } snprintf(fullname, sizeof(fullname), "%s/pdfXXXXXX", dir); #if defined(C_LINUX) || defined(C_BSD) || defined(HAVE_MKSTEMP) || defined(C_SOLARIS) || defined(C_CYGWIN) fout = mkstemp(fullname); #else (void)mktemp(fullname); fout = open(fullname, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600); #endif if(fout < 0) { cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, strerror(errno)); rc = CL_ETMPFILE; break; } if(is_ascii85decode) { int len = (int)(u - t); unsigned char *tmpbuf = cli_malloc(len * 2); len = ascii85decode(t, (size_t)len, tmpbuf); if(len == -1) { free(tmpbuf); rc = CL_EFORMAT; continue; } /* free unused traling bytes */ tmpbuf = cli_realloc(tmpbuf, len); /* * Note that it will probably be both ascii85encoded * and flateencoded */ if(is_flatedecode) { const int zstat = flatedecode((unsigned char *)tmpbuf, (size_t)len, fout); if(zstat != Z_OK) rc = zstat; } free(tmpbuf); } else if(is_flatedecode) { const int zstat = flatedecode((unsigned char *)t, (size_t)(u - t), fout); if(zstat != Z_OK) rc = zstat; } else write(fout, t, (size_t)(u - t)); close(fout); cli_dbgmsg("cli_pdf: extracted to %s\n", fullname); } munmap(buf, size); cli_dbgmsg("cli_pdf: returning %d\n", rc); return rc; #endif } /* flate inflation - returns zlib status, e.g. Z_OK */ static int flatedecode(const unsigned char *buf, size_t len, int fout) { int zstat; z_stream stream; unsigned char output[BUFSIZ]; cli_dbgmsg("cli_pdf: flatedecode %lu bytes\n", len); while(strchr("\r\n", *buf)) { len--; buf++; } stream.zalloc = (alloc_func)Z_NULL; stream.zfree = (free_func)Z_NULL; stream.opaque = (void *)NULL; stream.next_in = (unsigned char *)buf; stream.avail_in = len; zstat = inflateInit(&stream); if(zstat != Z_OK) { cli_warnmsg("cli_pdf: inflateInit failed"); return zstat; } stream.next_out = output; stream.avail_out = sizeof(output); for(;;) { if(stream.avail_out == 0) { cli_dbgmsg("write BUFSIZ\n"); write(fout, output, BUFSIZ); stream.next_out = output; stream.avail_out = BUFSIZ; } zstat = inflate(&stream, Z_NO_FLUSH); switch(zstat) { case Z_OK: continue; case Z_STREAM_END: break; default: cli_warnmsg("Error %d inflating PDF attachment\n", zstat); inflateEnd(&stream); return zstat; } break; } if(stream.avail_out != sizeof(output)) { cli_dbgmsg("flush %lu\n", sizeof(output) - stream.avail_out); write(fout, output, sizeof(output) - stream.avail_out); } return inflateEnd(&stream); } /* * http://cvs.gnome.org/viewcvs/sketch/Filter/ascii85filter.c?rev=1.2 */ /* ascii85 inflation, returns number of bytes in output, -1 for error */ static int ascii85decode(const char *buf, size_t len, unsigned char *output) { const char *ptr = buf; uint32_t sum = 0; int quintet = 0; int ret = 0; cli_dbgmsg("cli_pdf: ascii85decode %lu bytes\n", len); for(;;) { int byte = (len--) ? *ptr++ : EOF; if((byte == '~') && (*ptr == '>')) byte = EOF; if(byte >= '!' && byte <= 'u') { sum = sum * 85 + ((unsigned long)byte - '!'); if(++quintet == 5) { *output++ = sum >> 24; *output++ = (sum >> 16) & 0xFF; *output++ = (sum >> 8) & 0xFF; *output++ = sum & 0xFF; ret += 4; quintet = 0; sum = 0; } } else if(byte == 'z') { if(quintet) { cli_warnmsg("ascii85decode: z in wrong position\n"); return -1; } *output++ = '\0'; *output++ = '\0'; *output++ = '\0'; *output++ = '\0'; ret += 4; } else if(byte == EOF) { if(quintet) { int i; if(quintet == 1) { cli_warnmsg("ascii85Decode: only 1 byte in last quintet\n"); return -1; } for(i = 0; i < 5 - quintet; i++) sum *= 85; if(quintet > 1) sum += (0xFFFFFF >> ((quintet - 2) * 8)); ret += quintet; for(i = 0; i < quintet - 1; i++) *output++ = (sum >> (24 - 8 * i)) & 0xFF; quintet = 0; } break; } else if(!isspace(byte)) { cli_warnmsg("ascii85Decode: invalid character 0x%x, len %lu\n", byte, len); return -1; } } return ret; }