c561d2a3 |
/* |
1f301ecc |
* Extract VBA source code for component MS Office Documents |
c561d2a3 |
*
* Copyright (C) 2004 trog@uncon.org
*
* This code is based on the OpenOffice and libgsf sources.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <ctype.h>
#include "vba_extract.h"
#define FALSE (0)
#define TRUE (1)
typedef struct vba_version_tag {
unsigned char signature[4];
const char *name;
int vba_version;
int is_mac;
} vba_version_t;
typedef struct byte_array_tag {
unsigned int length;
unsigned char *data;
} byte_array_t;
#define NUM_VBA_VERSIONS 9
vba_version_t vba_version[] = {
{ { 0x5e, 0x00, 0x00, 0x01 }, "Office 97", 5, FALSE},
{ { 0x5f, 0x00, 0x00, 0x01 }, "Office 97 SR1", 5, FALSE },
{ { 0x65, 0x00, 0x00, 0x01 }, "Office 2000 alpha?", 6, FALSE },
{ { 0x6b, 0x00, 0x00, 0x01 }, "Office 2000 beta?", 6, FALSE },
{ { 0x6d, 0x00, 0x00, 0x01 }, "Office 2000", 6, FALSE },
{ { 0x70, 0x00, 0x00, 0x01 }, "Office XP beta 1/2", 6, FALSE },
{ { 0x73, 0x00, 0x00, 0x01 }, "Office XP", 6, FALSE },
{ { 0x60, 0x00, 0x00, 0x0e }, "MacOffice 98", 5, TRUE },
{ { 0x62, 0x00, 0x00, 0x0e }, "MacOffice 2001", 5, TRUE },
};
#define VBA56_DIRENT_RECORD_COUNT (2 + /* magic */ \
4 + /* version */ \
2 + /* 0x00 0xff */ \
22) /* unknown */
#define VBA56_DIRENT_HEADER_SIZE (VBA56_DIRENT_RECORD_COUNT + \
2 + /* type1 record count */ \
2) /* unknown */
/* Function: vba_readn
Try hard to read the requested number of bytes
*/
int vba_readn(int fd, void *buff, unsigned int count)
{
int retval;
unsigned int todo;
void *current;
todo = count;
current = buff;
do {
retval = read(fd, current, todo);
if (retval == 0) {
return (count - todo);
}
if (retval < 0) {
return -1;
}
todo -= retval;
current += retval;
} while (todo > 0);
return count;
}
/* Function: vba_writen
Try hard to write the specified number of bytes
*/
int vba_writen(int fd, void *buff, unsigned int count)
{
int retval;
unsigned int todo;
void *current;
todo = count;
current = buff;
do {
retval = write(fd, current, todo);
if (retval < 0) {
return -1;
}
todo -= retval;
current += retval;
} while (todo > 0);
return count;
}
char *get_unicode_name(char *name, int size)
{
int i, j;
char *newname;
if (*name == 0 || size == 0) {
return NULL;
}
|
ee5c926e |
newname = (char *) cli_malloc(size*2); |
c561d2a3 |
if (!newname) {
return NULL;
}
j=0;
for (i=0 ; i < size; i+=2) {
if (isprint(name[i])) {
newname[j++] = name[i];
} else {
if (name[i] < 10 && name[i] >= 0) {
newname[j++] = '_';
newname[j++] = name[i] + '0';
}
newname[j++] = '_';
}
}
newname[j] = '\0';
return newname;
} |
ee5c926e |
static void vba56_test_middle(int fd)
{
char test_middle[20];
static const uint8_t middle_str[20] = {
0x00, 0x00, 0xe1, 0x2e, 0x45, 0x0d, 0x8f, 0xe0,
0x1a, 0x10, 0x85, 0x2e, 0x02, 0x60, 0x8c, 0x4d,
0x0b, 0xb4, 0x00, 0x00
};
if (vba_readn(fd, &test_middle, 20) != 20) {
return;
}
if (strncmp(test_middle, middle_str, 20) != 0) {
lseek(fd, -20, SEEK_CUR);
}
return;
}
static void vba56_test_end(int fd)
{
char test_end[20];
static const uint8_t end_str[20] =
{
0x00, 0x00, 0x2e, 0xc9, 0x27, 0x8e, 0x64, 0x12,
0x1c, 0x10, 0x8a, 0x2f, 0x04, 0x02, 0x24, 0x00,
0x9c, 0x02, 0x00, 0x00
};
if (vba_readn(fd, &test_end, 20) != 20) {
return;
}
if (strncmp(test_end, end_str, 20) != 0) {
lseek(fd, -20, SEEK_CUR);
}
printf("End found\n");
return;
}
|
c561d2a3 |
vba_project_t *vba56_dir_read(const char *dir)
{
unsigned char magic[2];
unsigned char version[4];
unsigned char *buff, *name;
unsigned char vba56_signature[] = { 0xcc, 0x61 };
int16_t record_count, length;
uint16_t ooff;
uint8_t byte_count;
uint32_t offset;
uint32_t LidA; //Language identifiers
uint32_t LidB;
uint16_t CharSet;
uint16_t LenA;
uint32_t UnknownB;
uint32_t UnknownC;
uint16_t LenB;
uint16_t LenC;
uint16_t LenD;
int i, j, fd;
vba_project_t *vba_project;
char *fullname;
unsigned char fixed_octet[8] = { 0x06, 0x02, 0x01, 0x00, 0x08, 0x02, 0x00, 0x00 };
|
1f301ecc |
cli_dbgmsg("in vba56_dir_read()\n");
|
ee5c926e |
fullname = (char *) cli_malloc(strlen(dir) + 15); |
c561d2a3 |
sprintf(fullname, "%s/_VBA_PROJECT", dir);
fd = open(fullname, O_RDONLY);
if (fd == -1) { |
1f301ecc |
cli_dbgmsg("Can't open %s\n", fullname);
free(fullname); |
c561d2a3 |
return NULL;
} |
1f301ecc |
free(fullname); |
c561d2a3 |
if (vba_readn(fd, &magic, 2) != 2) {
return NULL;
}
if (strncmp(magic, vba56_signature, 2) != 0) {
return NULL;
}
if (vba_readn(fd, &version, 4) != 4) {
return NULL;
}
for (i=0 ; i < NUM_VBA_VERSIONS ; i++) {
if (strncmp(version, vba_version[i].signature, 4) == 0) {
break;
}
}
if (i == NUM_VBA_VERSIONS) { |
1f301ecc |
cli_dbgmsg("Unknown VBA version signature x0%x0x%x0x%x0x%x\n", |
c561d2a3 |
version[0], version[1], version[2], version[3]);
return NULL;
}
cli_dbgmsg("VBA Project: %s, VBA Version=%d\n", vba_version[i].name,
vba_version[i].vba_version);
/*****************************************/
/* two bytes, should be equal to 0x00ff */
if (vba_readn(fd, &ooff, 2) != 2) {
return NULL;
}
if (vba_readn(fd, &LidA, 4) != 4) {
return NULL;
}
if (vba_readn(fd, &LidA, 4) != 4) {
return NULL;
}
if (vba_readn(fd, &CharSet, 2) != 2) {
return NULL;
}
if (vba_readn(fd, &LenA, 2) != 2) {
return NULL;
}
if (vba_readn(fd, &UnknownB, 4) != 4) {
return NULL;
}
if (vba_readn(fd, &UnknownC, 4) != 4) {
return NULL;
}
if (vba_readn(fd, &LenB, 2) != 2) {
return NULL;
}
if (vba_readn(fd, &LenC, 2) != 2) {
return NULL;
}
if (vba_readn(fd, &LenD, 2) != 2) {
return NULL;
}
cli_dbgmsg(" LidA: %d\n LidB: %d\n CharSet: %d\n", LidA, LidB, CharSet);
cli_dbgmsg(" LenA: %d\n UnknownB: %d\n UnknownC: %d\n", LenA, UnknownB, UnknownC);
cli_dbgmsg(" LenB: %d\n LenC: %d\n LenD: %d\n", LenB, LenC, LenD);
record_count = LenC;
/*******************************************/
/* REPLACED THIS CODE WITH THE CODE ABOVE */
/* read the rest of the header. most of this is unknown */
/* buff = (char *) cli_malloc(24);
if (!buff || vba_readn(fd, buff, 24) != 24) {
return NULL;
}
free(buff);
if (vba_readn(fd, &record_count, 2) != 2) {
return NULL;
}
cli_dbgmsg("Record count: %d\n", record_count); */
/* read two bytes and throw them away */
/* if (vba_readn(fd, &length, 2) != 2) {
return NULL;
}*/
|
ee5c926e |
for (;;) { |
c561d2a3 |
if (vba_readn(fd, &length, 2) != 2) {
return NULL;
} |
ee5c926e |
if (length < 6) {
lseek(fd, -2, SEEK_CUR);
break;
} |
c561d2a3 |
cli_dbgmsg ("record: %d.%d, length: %d, ", record_count, i, length); |
ee5c926e |
buff = (unsigned char *) cli_malloc(length); |
c561d2a3 |
if (!buff) {
cli_errmsg("cli_malloc failed\n");
return NULL;
}
if (vba_readn(fd, buff, length) != length) {
cli_errmsg("read name failed\n");
return NULL;
}
name = get_unicode_name(buff, length);
cli_dbgmsg("name: %s\n", name);
free(buff);
/* Ignore twelve bytes from entries of type 'G'.
Type 'C' entries come in pairs, the second also
having a 12 byte trailer */
/* TODO: Need to check if types H(same as G) and D(same as C) exist */
if (!strncmp ("*\\G", name, 3)) { |
ee5c926e |
buff = (unsigned char *) cli_malloc(12); |
c561d2a3 |
if (vba_readn(fd, buff, 12) != 12) {
cli_errmsg("failed to read blob\n");
free(buff);
free(name);
return NULL;
}
free(buff);
} else if (!strncmp("*\\C", name, 3)) {
if (i == 1) { |
ee5c926e |
buff = (unsigned char *) cli_malloc(12); |
c561d2a3 |
if (vba_readn(fd, buff, 12) != 12) {
cli_errmsg("failed to read blob\n");
free(buff);
free(name);
return NULL;
}
free(buff);
i = 0;
} else {
i = 1;
record_count++;
}
} else { |
ee5c926e |
/* Unknown type - probably ran out of strings - rewind */
lseek(fd, -(length+2), SEEK_CUR);
break; |
c561d2a3 |
}
free(name); |
ee5c926e |
vba56_test_middle(fd); |
c561d2a3 |
}
|
ee5c926e |
/* may need to seek forward 20 bytes here. Bleh! */
vba56_test_end(fd); |
c561d2a3 |
if (vba_readn(fd, &record_count, 2) != 2) {
return NULL;
}
cli_dbgmsg("\nVBA Record count: %d\n", record_count);
/*if (record_count <= 0) {
return TRUE;
}*/
lseek(fd, 2*record_count, SEEK_CUR);
lseek(fd, 4, SEEK_CUR);
/* Read fixed octet */ |
ee5c926e |
buff = (unsigned char *) cli_malloc(8); |
c561d2a3 |
if (!buff) {
return NULL;
}
if (vba_readn(fd, buff, 8) != 8) {
free(buff);
return NULL;
}
if (!strncmp(buff, fixed_octet, 8)) {
free(buff);
return NULL;
}
free(buff);
cli_dbgmsg("Read fixed octet ok\n");
/* junk some more stuff */
do {
if (vba_readn(fd, &ooff, 2) != 2) {
return NULL;
}
} while(ooff != 0xFFFF);
if (vba_readn(fd, &ooff, 2) != 2) {
return NULL;
}
/* no idea what this stuff is */
if (ooff != 0xFFFF) {
lseek(fd, ooff, SEEK_CUR);
}
if (vba_readn(fd, &ooff, 2) != 2) {
return NULL;
}
if (ooff != 0xFFFF) {
lseek(fd, ooff, SEEK_CUR);
}
lseek(fd, 100, SEEK_CUR);
if (vba_readn(fd, &record_count, 2) != 2) {
return NULL;
}
cli_dbgmsg("\nVBA Record count: %d\n", record_count);
vba_project = (vba_project_t *) cli_malloc(sizeof(struct vba_project_tag));
vba_project->name = (char **) cli_malloc(sizeof(char *) * record_count);
vba_project->dir = strdup(dir);
vba_project->offset = (uint32_t *) cli_malloc (sizeof(uint32_t) *
record_count);
vba_project->count = record_count;
for (i=0 ; i < record_count ; i++) {
if (vba_readn(fd, &length, 2) != 2) {
return NULL;
} |
ee5c926e |
buff = (unsigned char *) cli_malloc(length); |
c561d2a3 |
if (!buff) {
cli_dbgmsg("cli_malloc failed\n");
return NULL;
}
if (vba_readn(fd, buff, length) != length) {
cli_dbgmsg("read name failed\n");
return NULL;
}
vba_project->name[i] = get_unicode_name(buff, length);
cli_dbgmsg("project name: %s, ", vba_project->name[i]);
free(buff);
/* some kind of string identifier ?? */
if (vba_readn(fd, &length, 2) != 2) {
return NULL;
}
lseek(fd, length, SEEK_CUR);
/* unknown stuff */
if (vba_readn(fd, &ooff, 2) != 2) {
return NULL;
}
if (ooff == 0xFFFF) {
lseek(fd, 2, SEEK_CUR);
if (vba_readn(fd, &ooff, 2) != 2) {
return NULL;
}
lseek(fd, ooff, SEEK_CUR);
} else {
lseek(fd, 2 + ooff, SEEK_CUR);
}
lseek(fd, 8, SEEK_CUR);
if (vba_readn(fd, &byte_count, 1) != 1) {
return NULL;
}
for (j=0 ; j<byte_count; j++) {
lseek(fd, 8, SEEK_CUR);
}
lseek(fd, 6, SEEK_CUR);
if (vba_readn(fd, &offset, 4) != 4) {
return NULL;
}
vba_project->offset[i] = offset;
cli_dbgmsg("offset:%d\n", offset);
lseek(fd, 2, SEEK_CUR);
}
{ /* There appears to be some code in here */
off_t foffset;
foffset = lseek(fd, 0, SEEK_CUR);
cli_dbgmsg("\nOffset: 0x%x\n", (unsigned int)foffset);
}
close(fd);
return vba_project;
}
#define VBA_COMPRESSION_WINDOW 4096
void byte_array_append(byte_array_t *array, unsigned char *src, unsigned int len)
{
if (array->length == 0) { |
ee5c926e |
array->data = (unsigned char *) cli_malloc(len); |
c561d2a3 |
array->length = len;
strncpy(array->data, src, len);
} else {
array->data = realloc(array->data, array->length+len);
strncpy(array->data+array->length, src, len);
array->length += len;
}
}
unsigned char *vba_decompress(int fd, uint32_t offset)
{
unsigned int i, pos=0, shift, win_pos, clean=TRUE, mask, distance;
uint8_t flag;
uint16_t token, len;
unsigned char buffer[VBA_COMPRESSION_WINDOW];
byte_array_t result;
result.length=0;
result.data=NULL;
lseek(fd, offset+3, SEEK_SET); /* 1byte ?? , 2byte length ?? */
while (vba_readn(fd, &flag, 1) == 1) {
for (mask = 1; mask < 0x100; mask<<=1) {
if (flag & mask) {
if (vba_readn(fd, &token, 2) != 2) {
return FALSE;
}
win_pos = pos % VBA_COMPRESSION_WINDOW;
if (win_pos <= 0x80) {
if (win_pos <= 0x20) {
shift = (win_pos <= 0x10) ? 12:11;
} else {
shift = (win_pos <= 0x40) ? 10:9;
}
} else {
if (win_pos <= 0x200) {
shift = (win_pos <= 0x100) ? 8:7;
} else if (win_pos <= 0x800) {
shift = (win_pos <= 0x400) ? 6:5;
} else {
shift = 4;
}
}
len = (token & ((1 << shift) -1)) + 3;
distance = token >> shift;
clean = TRUE;
for (i=0 ; i < len; i++) {
unsigned int srcpos;
unsigned char c;
srcpos = (pos - distance - 1) % VBA_COMPRESSION_WINDOW;
c = buffer[srcpos];
buffer[pos++ % VBA_COMPRESSION_WINDOW]= c;
}
} else {
if ((pos != 0) &&
((pos % VBA_COMPRESSION_WINDOW) == 0) && clean) {
if (vba_readn(fd, &token, 2) != 2) {
return FALSE;
}
clean = FALSE;
byte_array_append(&result, buffer, VBA_COMPRESSION_WINDOW);
break;
}
if (vba_readn(fd, buffer+(pos%VBA_COMPRESSION_WINDOW), 1) == 1){
pos++;
}
clean = TRUE;
}
}
}
if (pos % VBA_COMPRESSION_WINDOW) {
byte_array_append(&result, buffer, pos % VBA_COMPRESSION_WINDOW);
}
return result.data;
}
/*
int vba_dump(vba_project_t *vba_project)
{
int i, fd;
unsigned char *data;
char *fullname;
for (i=0 ; i<vba_project->count ; i++) {
cli_dbgmsg("\n\n*****************************\n");
cli_dbgmsg("Deocding file: %s\n", vba_project->name[i]);
cli_dbgmsg("*****************************\n");
fullname = (char *) cli_malloc(strlen(vba_project->dir) + strlen(vba_project->name[i]) + 2);
sprintf(fullname, "%s/%s", vba_project->dir, vba_project->name[i]);
fd = open(fullname, O_RDONLY);
free(fullname);
if (fd == -1) {
cli_dbgmsg("Open failed\n");
return FALSE;
}
data = vba_decompress(fd, vba_project->offset[i]);
cli_dbgmsg("%s\n", data);
close(fd);
}
return TRUE;
}
int main(int argc, char *argv[])
{
int retval;
char *dirname=NULL;
vba_project_t *vba_project;
while ((retval = getopt(argc, argv, "d:w")) != -1) {
switch (retval) {
case 'd':
dirname = optarg;
break;
case ':':
cli_dbgmsg("missing option parameter\n");
exit(-1);
case '?':
cli_dbgmsg("unknown option\n");
break;
}
}
vba_project = vba56_dir_read(dirname);
if (vba_project != NULL) {
vba_dump(vba_project);
}
return TRUE;
}
*/ |