/* * Extract VBA source code for component MS Office Documents * * Copyright (C) 2004 trog@uncon.org * * This code is based on the OpenOffice and libgsf sources. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include #include #include #include #include #include #include #include #if HAVE_CONFIG_H #include "clamav-config.h" #endif #include "vba_extract.h" #define FALSE (0) #define TRUE (1) typedef struct vba_version_tag { unsigned char signature[4]; const char *name; int vba_version; int is_mac; } vba_version_t; #if WORDS_BIGENDIAN == 0 #define vba_endian_convert_16(v) (v) #else static uint16_t vba_endian_convert_16(uint16_t v) { return ((v >> 8) + (v << 8)); } #endif #if WORDS_BIGENDIAN == 0 #define vba_endian_convert_32(v) (v) #else static uint32_t vba_endian_convert_32(uint32_t v) { return ((v >> 24) | ((v & 0x00FF0000) >> 8) | ((v & 0x0000FF00) << 8) | (v << 24)); } #endif typedef struct byte_array_tag { unsigned int length; unsigned char *data; } byte_array_t; #define NUM_VBA_VERSIONS 9 vba_version_t vba_version[] = { { { 0x5e, 0x00, 0x00, 0x01 }, "Office 97", 5, FALSE}, { { 0x5f, 0x00, 0x00, 0x01 }, "Office 97 SR1", 5, FALSE }, { { 0x65, 0x00, 0x00, 0x01 }, "Office 2000 alpha?", 6, FALSE }, { { 0x6b, 0x00, 0x00, 0x01 }, "Office 2000 beta?", 6, FALSE }, { { 0x6d, 0x00, 0x00, 0x01 }, "Office 2000", 6, FALSE }, { { 0x70, 0x00, 0x00, 0x01 }, "Office XP beta 1/2", 6, FALSE }, { { 0x73, 0x00, 0x00, 0x01 }, "Office XP", 6, FALSE }, { { 0x60, 0x00, 0x00, 0x0e }, "MacOffice 98", 5, TRUE }, { { 0x62, 0x00, 0x00, 0x0e }, "MacOffice 2001", 5, TRUE }, }; #define VBA56_DIRENT_RECORD_COUNT (2 + /* magic */ \ 4 + /* version */ \ 2 + /* 0x00 0xff */ \ 22) /* unknown */ #define VBA56_DIRENT_HEADER_SIZE (VBA56_DIRENT_RECORD_COUNT + \ 2 + /* type1 record count */ \ 2) /* unknown */ /* Function: vba_readn Try hard to read the requested number of bytes */ int vba_readn(int fd, void *buff, unsigned int count) { int retval; unsigned int todo; void *current; todo = count; current = buff; do { retval = read(fd, current, todo); if (retval == 0) { return (count - todo); } if (retval < 0) { return -1; } todo -= retval; current += retval; } while (todo > 0); return count; } /* Function: vba_writen Try hard to write the specified number of bytes */ int vba_writen(int fd, void *buff, unsigned int count) { int retval; unsigned int todo; void *current; todo = count; current = buff; do { retval = write(fd, current, todo); if (retval < 0) { return -1; } todo -= retval; current += retval; } while (todo > 0); return count; } char *get_unicode_name(char *name, int size) { int i, j; char *newname; if (*name == 0 || size == 0) { return NULL; } newname = (char *) cli_malloc(size*2); if (!newname) { return NULL; } j=0; for (i=0 ; i < size; i+=2) { if (isprint(name[i])) { newname[j++] = name[i]; } else { if (name[i] < 10 && name[i] >= 0) { newname[j++] = '_'; newname[j++] = name[i] + '0'; } newname[j++] = '_'; } } newname[j] = '\0'; return newname; } static void vba56_test_middle(int fd) { char test_middle[20]; static const uint8_t middle_str[20] = { 0x00, 0x00, 0xe1, 0x2e, 0x45, 0x0d, 0x8f, 0xe0, 0x1a, 0x10, 0x85, 0x2e, 0x02, 0x60, 0x8c, 0x4d, 0x0b, 0xb4, 0x00, 0x00 }; if (vba_readn(fd, &test_middle, 20) != 20) { return; } if (memcmp(test_middle, middle_str, 20) != 0) { lseek(fd, -20, SEEK_CUR); } return; } static void vba56_test_end(int fd) { char test_end[20]; static const uint8_t end_str[20] = { 0x00, 0x00, 0x2e, 0xc9, 0x27, 0x8e, 0x64, 0x12, 0x1c, 0x10, 0x8a, 0x2f, 0x04, 0x02, 0x24, 0x00, 0x9c, 0x02, 0x00, 0x00 }; if (vba_readn(fd, &test_end, 20) != 20) { return; } if (memcmp(test_end, end_str, 20) != 0) { lseek(fd, -20, SEEK_CUR); } return; } vba_project_t *vba56_dir_read(const char *dir) { unsigned char magic[2]; unsigned char version[4]; unsigned char *buff, *name; unsigned char vba56_signature[] = { 0xcc, 0x61 }; int16_t record_count, length; uint16_t ooff; uint8_t byte_count; uint32_t offset; uint32_t LidA; //Language identifiers uint32_t LidB; uint16_t CharSet; uint16_t LenA; uint32_t UnknownB; uint32_t UnknownC; uint16_t LenB; uint16_t LenC; uint16_t LenD; int i, j, fd; vba_project_t *vba_project; char *fullname; unsigned char fixed_octet[8] = { 0x06, 0x02, 0x01, 0x00, 0x08, 0x02, 0x00, 0x00 }; cli_dbgmsg("in vba56_dir_read()\n"); fullname = (char *) cli_malloc(strlen(dir) + 15); sprintf(fullname, "%s/_VBA_PROJECT", dir); fd = open(fullname, O_RDONLY); if (fd == -1) { cli_dbgmsg("Can't open %s\n", fullname); free(fullname); return NULL; } free(fullname); if (vba_readn(fd, &magic, 2) != 2) { close(fd); return NULL; } if (memcmp(magic, vba56_signature, 2) != 0) { close(fd); return NULL; } if (vba_readn(fd, &version, 4) != 4) { close(fd); return NULL; } for (i=0 ; i < NUM_VBA_VERSIONS ; i++) { if (memcmp(version, vba_version[i].signature, 4) == 0) { break; } } if (i == NUM_VBA_VERSIONS) { cli_dbgmsg("Unknown VBA version signature x0%x0x%x0x%x0x%x\n", version[0], version[1], version[2], version[3]); close(fd); return NULL; } cli_dbgmsg("VBA Project: %s, VBA Version=%d\n", vba_version[i].name, vba_version[i].vba_version); /*****************************************/ /* two bytes, should be equal to 0x00ff */ if (vba_readn(fd, &ooff, 2) != 2) { close(fd); return NULL; } if (vba_readn(fd, &LidA, 4) != 4) { close(fd); return NULL; } if (vba_readn(fd, &LidA, 4) != 4) { close(fd); return NULL; } if (vba_readn(fd, &CharSet, 2) != 2) { close(fd); return NULL; } if (vba_readn(fd, &LenA, 2) != 2) { close(fd); return NULL; } if (vba_readn(fd, &UnknownB, 4) != 4) { close(fd); return NULL; } if (vba_readn(fd, &UnknownC, 4) != 4) { close(fd); return NULL; } if (vba_readn(fd, &LenB, 2) != 2) { close(fd); return NULL; } if (vba_readn(fd, &LenC, 2) != 2) { close(fd); return NULL; } if (vba_readn(fd, &LenD, 2) != 2) { close(fd); return NULL; } LidA = vba_endian_convert_32(LidA); LidB = vba_endian_convert_32(LidB); CharSet = vba_endian_convert_16(CharSet); LenA = vba_endian_convert_16(LenA); LenB = vba_endian_convert_16(LenB); LenC = vba_endian_convert_16(LenC); LenD = vba_endian_convert_16(LenD); cli_dbgmsg(" LidA: %d\n LidB: %d\n CharSet: %d\n", LidA, LidB, CharSet); cli_dbgmsg(" LenA: %d\n UnknownB: %d\n UnknownC: %d\n", LenA, UnknownB, UnknownC); cli_dbgmsg(" LenB: %d\n LenC: %d\n LenD: %d\n", LenB, LenC, LenD); record_count = LenC; /*******************************************/ /* REPLACED THIS CODE WITH THE CODE ABOVE */ /* read the rest of the header. most of this is unknown */ /* buff = (char *) cli_malloc(24); if (!buff || vba_readn(fd, buff, 24) != 24) { close(fd); return NULL; } free(buff); if (vba_readn(fd, &record_count, 2) != 2) { close(fd); return NULL; } cli_dbgmsg("Record count: %d\n", record_count); */ /* read two bytes and throw them away */ /* if (vba_readn(fd, &length, 2) != 2) { close(fd); return NULL; }*/ for (;;) { if (vba_readn(fd, &length, 2) != 2) { return NULL; } length = vba_endian_convert_16(length); if (length < 6) { lseek(fd, -2, SEEK_CUR); break; } cli_dbgmsg ("record: %d.%d, length: %d, ", record_count, i, length); buff = (unsigned char *) cli_malloc(length); if (!buff) { cli_errmsg("cli_malloc failed\n"); close(fd); return NULL; } if (vba_readn(fd, buff, length) != length) { cli_errmsg("read name failed\n"); close(fd); return NULL; } name = get_unicode_name(buff, length); cli_dbgmsg("name: %s\n", name); free(buff); /* Ignore twelve bytes from entries of type 'G'. Type 'C' entries come in pairs, the second also having a 12 byte trailer */ /* TODO: Need to check if types H(same as G) and D(same as C) exist */ if (!strncmp ("*\\G", name, 3)) { buff = (unsigned char *) cli_malloc(12); if (vba_readn(fd, buff, 12) != 12) { cli_errmsg("failed to read blob\n"); free(buff); free(name); close(fd); return NULL; } free(buff); } else if (!strncmp("*\\C", name, 3)) { if (i == 1) { buff = (unsigned char *) cli_malloc(12); if (vba_readn(fd, buff, 12) != 12) { cli_errmsg("failed to read blob\n"); free(buff); free(name); close(fd); return NULL; } free(buff); i = 0; } else { i = 1; record_count++; } } else { /* Unknown type - probably ran out of strings - rewind */ lseek(fd, -(length+2), SEEK_CUR); free(name); break; } free(name); vba56_test_middle(fd); } /* may need to seek forward 20 bytes here. Bleh! */ vba56_test_end(fd); if (vba_readn(fd, &record_count, 2) != 2) { close(fd); return NULL; } record_count = vba_endian_convert_16(record_count); cli_dbgmsg("\nVBA Record count: %d\n", record_count); /*if (record_count <= 0) { close(fd); return TRUE; }*/ lseek(fd, 2*record_count, SEEK_CUR); lseek(fd, 4, SEEK_CUR); /* Read fixed octet */ buff = (unsigned char *) cli_malloc(8); if (!buff) { close(fd); return NULL; } if (vba_readn(fd, buff, 8) != 8) { free(buff); close(fd); return NULL; } if (!memcmp(buff, fixed_octet, 8)) { free(buff); close(fd); return NULL; } free(buff); cli_dbgmsg("Read fixed octet ok\n"); /* junk some more stuff */ do { if (vba_readn(fd, &ooff, 2) != 2) { close(fd); return NULL; } } while(ooff != 0xFFFF); if (vba_readn(fd, &ooff, 2) != 2) { close(fd); return NULL; } /* no idea what this stuff is */ if (ooff != 0xFFFF) { ooff = vba_endian_convert_16(ooff); lseek(fd, ooff, SEEK_CUR); } if (vba_readn(fd, &ooff, 2) != 2) { close(fd); return NULL; } if (ooff != 0xFFFF) { ooff = vba_endian_convert_16(ooff); lseek(fd, ooff, SEEK_CUR); } lseek(fd, 100, SEEK_CUR); if (vba_readn(fd, &record_count, 2) != 2) { close(fd); return NULL; } record_count = vba_endian_convert_16(record_count); cli_dbgmsg("\nVBA Record count: %d\n", record_count); vba_project = (vba_project_t *) cli_malloc(sizeof(struct vba_project_tag)); vba_project->name = (char **) cli_malloc(sizeof(char *) * record_count); vba_project->dir = strdup(dir); vba_project->offset = (uint32_t *) cli_malloc (sizeof(uint32_t) * record_count); vba_project->count = record_count; for (i=0 ; i < record_count ; i++) { if (vba_readn(fd, &length, 2) != 2) { goto out_error; } length = vba_endian_convert_16(length); buff = (unsigned char *) cli_malloc(length); if (!buff) { cli_dbgmsg("cli_malloc failed\n"); goto out_error; } if (vba_readn(fd, buff, length) != length) { cli_dbgmsg("read name failed\n"); free(buff); goto out_error; } vba_project->name[i] = get_unicode_name(buff, length); cli_dbgmsg("project name: %s, ", vba_project->name[i]); free(buff); /* some kind of string identifier ?? */ if (vba_readn(fd, &length, 2) != 2) { free(vba_project->name[i]); goto out_error; } length = vba_endian_convert_16(length); lseek(fd, length, SEEK_CUR); /* unknown stuff */ if (vba_readn(fd, &ooff, 2) != 2) { free(vba_project->name[i]); goto out_error; } ooff = vba_endian_convert_16(ooff); if (ooff == 0xFFFF) { lseek(fd, 2, SEEK_CUR); if (vba_readn(fd, &ooff, 2) != 2) { free(vba_project->name[i]); goto out_error; } ooff = vba_endian_convert_16(ooff); lseek(fd, ooff, SEEK_CUR); } else { lseek(fd, 2 + ooff, SEEK_CUR); } lseek(fd, 8, SEEK_CUR); if (vba_readn(fd, &byte_count, 1) != 1) { free(vba_project->name[i]); goto out_error; } for (j=0 ; jname[i]); goto out_error; } offset = vba_endian_convert_32(offset); vba_project->offset[i] = offset; cli_dbgmsg("offset:%d\n", offset); lseek(fd, 2, SEEK_CUR); } { /* There appears to be some code in here */ off_t foffset; foffset = lseek(fd, 0, SEEK_CUR); cli_dbgmsg("\nOffset: 0x%x\n", (unsigned int)foffset); } close(fd); return vba_project; out_error: /* Note: only to be called from the above loop when i == number of allocated stings */ for (j=0 ; jname[j]); } free(vba_project->name); free(vba_project->dir); free(vba_project->offset); free(vba_project); close(fd); return NULL; } #define VBA_COMPRESSION_WINDOW 4096 void byte_array_append(byte_array_t *array, unsigned char *src, unsigned int len) { if (array->length == 0) { array->data = (unsigned char *) cli_malloc(len); array->length = len; memcpy(array->data, src, len); } else { array->data = realloc(array->data, array->length+len); memcpy(array->data+array->length, src, len); array->length += len; } } unsigned char *vba_decompress(int fd, uint32_t offset) { unsigned int i, pos=0, shift, win_pos, clean=TRUE, mask, distance; uint8_t flag; uint16_t token, len; unsigned char buffer[VBA_COMPRESSION_WINDOW]; byte_array_t result; result.length=0; result.data=NULL; lseek(fd, offset+3, SEEK_SET); /* 1byte ?? , 2byte length ?? */ while (vba_readn(fd, &flag, 1) == 1) { for (mask = 1; mask < 0x100; mask<<=1) { if (flag & mask) { if (vba_readn(fd, &token, 2) != 2) { if (result.data) { free(result.data); } return NULL; } token = vba_endian_convert_16(token); win_pos = pos % VBA_COMPRESSION_WINDOW; if (win_pos <= 0x80) { if (win_pos <= 0x20) { shift = (win_pos <= 0x10) ? 12:11; } else { shift = (win_pos <= 0x40) ? 10:9; } } else { if (win_pos <= 0x200) { shift = (win_pos <= 0x100) ? 8:7; } else if (win_pos <= 0x800) { shift = (win_pos <= 0x400) ? 6:5; } else { shift = 4; } } len = (token & ((1 << shift) -1)) + 3; distance = token >> shift; clean = TRUE; for (i=0 ; i < len; i++) { unsigned int srcpos; unsigned char c; srcpos = (pos - distance - 1) % VBA_COMPRESSION_WINDOW; c = buffer[srcpos]; buffer[pos++ % VBA_COMPRESSION_WINDOW]= c; } } else { if ((pos != 0) && ((pos % VBA_COMPRESSION_WINDOW) == 0) && clean) { if (vba_readn(fd, &token, 2) != 2) { if (result.data) { free(result.data); } return NULL; } clean = FALSE; byte_array_append(&result, buffer, VBA_COMPRESSION_WINDOW); break; } if (vba_readn(fd, buffer+(pos%VBA_COMPRESSION_WINDOW), 1) == 1){ pos++; } clean = TRUE; } } } if (pos % VBA_COMPRESSION_WINDOW) { byte_array_append(&result, buffer, pos % VBA_COMPRESSION_WINDOW); } byte_array_append(&result, "\0", 1); return result.data; } /* int vba_dump(vba_project_t *vba_project) { int i, fd; unsigned char *data; char *fullname; for (i=0 ; icount ; i++) { cli_dbgmsg("\n\n*****************************\n"); cli_dbgmsg("Deocding file: %s\n", vba_project->name[i]); cli_dbgmsg("*****************************\n"); fullname = (char *) cli_malloc(strlen(vba_project->dir) + strlen(vba_project->name[i]) + 2); sprintf(fullname, "%s/%s", vba_project->dir, vba_project->name[i]); fd = open(fullname, O_RDONLY); free(fullname); if (fd == -1) { cli_dbgmsg("Open failed\n"); return FALSE; } data = vba_decompress(fd, vba_project->offset[i]); cli_dbgmsg("%s\n", data); close(fd); } return TRUE; } int main(int argc, char *argv[]) { int retval; char *dirname=NULL; vba_project_t *vba_project; while ((retval = getopt(argc, argv, "d:w")) != -1) { switch (retval) { case 'd': dirname = optarg; break; case ':': cli_dbgmsg("missing option parameter\n"); exit(-1); case '?': cli_dbgmsg("unknown option\n"); break; } } vba_project = vba56_dir_read(dirname); if (vba_project != NULL) { vba_dump(vba_project); } return TRUE; } */