Browse code

dd code to extract OLE2 objects embedded inside PowerPoint Documents. That's where the VBA is stored (not yet activated).

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@578 77e5149b-7576-45b1-b177-96237e5ba77b

Trog authored on 2004/05/28 00:38:39
Showing 2 changed files
... ...
@@ -1,3 +1,9 @@
1
+Thu May 27 16:38:14 BST 2004 (trog)
2
+-----------------------------------
3
+  *  libclamav/vba_extract.c: Add code to extract OLE2 objects embedded
4
+	inside PowerPoint Documents. That's where the VBA is stored
5
+	(not yet activated).
6
+
1 7
 Wed May 26 16:57:21 CEST 2004 (tk)
2 8
 ----------------------------------
3 9
   * libclamav: disable (accidentally enabled yesterday) PE code
... ...
@@ -28,6 +28,9 @@
28 28
 #include <fcntl.h>
29 29
 #include <stdlib.h>
30 30
 #include <ctype.h>
31
+#include <zlib.h>
32
+
33
+#include "clamav.h"
31 34
 
32 35
 #if HAVE_CONFIG_H
33 36
 #include "clamav-config.h"
... ...
@@ -653,6 +656,407 @@ unsigned char *vba_decompress(int fd, uint32_t offset, int *size)
653 653
 }
654 654
 
655 655
 /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
656
+/* Code to extract Power Point Embedded OLE2 Objects
657
+/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
658
+
659
+#define MIN(a, b)  (((a) < (b)) ? (a) : (b))
660
+
661
+typedef struct atom_header_tag {
662
+	off_t foffset;
663
+	uint16_t ver_inst;
664
+	uint8_t version;
665
+	uint16_t instance;
666
+	uint16_t type;
667
+	uint32_t length;
668
+} atom_header_t;
669
+
670
+typedef struct ppt_currentuser_tag {
671
+	atom_header_t atom_hdr;
672
+	uint32_t len;
673
+	uint32_t magic;
674
+	uint32_t current_edit_offset;
675
+} ppt_currentuser_t;
676
+
677
+typedef struct ppt_useredit_tag {
678
+	atom_header_t atom_hdr;
679
+	int32_t last_slide_id;
680
+	uint32_t version;
681
+	uint32_t last_edit_offset;
682
+	uint32_t persist_dir_offset;
683
+	uint32_t document_ref;
684
+	uint32_t max_persist;
685
+	int16_t	 last_view_type;
686
+} ppt_useredit_t;
687
+
688
+static int ppt_read_atom_header(int fd, atom_header_t *atom_header)
689
+{
690
+	atom_header->foffset = lseek(fd, 0, SEEK_CUR);
691
+	if (cli_readn(fd, &atom_header->ver_inst, 2) != 2) {
692
+		cli_dbgmsg("read ppt_current_user failed\n");
693
+		return FALSE;
694
+	}	
695
+	atom_header->version = atom_header->ver_inst & 0x000f;
696
+	atom_header->instance = atom_header->ver_inst >> 4;
697
+	if (cli_readn(fd, &atom_header->type, 2) != 2) {
698
+		cli_dbgmsg("read ppt_current_user failed\n");
699
+		return FALSE;
700
+	}
701
+	if (cli_readn(fd, &atom_header->length, 4) != 4) {
702
+		cli_dbgmsg("read ppt_current_user failed\n");
703
+		return FALSE;
704
+	}
705
+	return TRUE;
706
+}
707
+
708
+static void ppt_print_atom_header(atom_header_t *atom_header)
709
+{
710
+	cli_dbgmsg("Atom Hdr:\n");
711
+	cli_dbgmsg("  Version: 0x%.2x\n", atom_header->version);
712
+	cli_dbgmsg("  Instance: 0x%.4x\n", atom_header->instance);
713
+	cli_dbgmsg("  Type: 0x%.4x\n", atom_header->type);
714
+	cli_dbgmsg("  Length: 0x%.8x\n", atom_header->length);
715
+}
716
+
717
+static void ppt_print_useredit(ppt_useredit_t *ppt_useredit)
718
+{
719
+	ppt_print_atom_header(&ppt_useredit->atom_hdr);
720
+	cli_dbgmsg("Last Slide ID: 0x%.4x\n", ppt_useredit->last_slide_id);
721
+	cli_dbgmsg("Version: 0x%.4x\n", ppt_useredit->version);
722
+	cli_dbgmsg("Last Edit Offset: 0x%.4x\n", ppt_useredit->last_edit_offset);
723
+	cli_dbgmsg("Persist Dir Offset: 0x%.4x\n", ppt_useredit->persist_dir_offset);
724
+	cli_dbgmsg("Document Ref: 0x%.4x\n", ppt_useredit->document_ref);
725
+	cli_dbgmsg("Max Persist: 0x%.4x\n", ppt_useredit->max_persist);
726
+	cli_dbgmsg("Last view type: 0x%.4x\n\n", ppt_useredit->last_view_type);
727
+}
728
+
729
+static int ppt_read_useredit(int fd, ppt_useredit_t *ppt_useredit)
730
+{
731
+	if (!ppt_read_atom_header(fd, &ppt_useredit->atom_hdr)) {
732
+		return FALSE;
733
+	}
734
+	if (cli_readn(fd, &ppt_useredit->last_slide_id, 4) != 4) {
735
+		cli_dbgmsg("read ppt_useredit failed\n");
736
+		return FALSE;
737
+	}
738
+	if (cli_readn(fd, &ppt_useredit->version, 4) != 4) {
739
+		cli_dbgmsg("read ppt_useredit failed\n");
740
+		return FALSE;
741
+	}
742
+	if (cli_readn(fd, &ppt_useredit->last_edit_offset, 4) != 4) {
743
+		cli_dbgmsg("read ppt_useredit failed\n");
744
+		return FALSE;
745
+	}
746
+	if (cli_readn(fd, &ppt_useredit->persist_dir_offset, 4) != 4) {
747
+		cli_dbgmsg("read ppt_useredit failed\n");
748
+		return FALSE;
749
+	}
750
+	if (cli_readn(fd, &ppt_useredit->document_ref, 4) != 4) {
751
+		cli_dbgmsg("read ppt_useredit failed\n");
752
+		return FALSE;
753
+	}
754
+	if (cli_readn(fd, &ppt_useredit->max_persist, 4) != 4) {
755
+		cli_dbgmsg("read ppt_useredit failed\n");
756
+		return FALSE;
757
+	}
758
+	if (cli_readn(fd, &ppt_useredit->last_view_type, 2) != 2) {
759
+		cli_dbgmsg("read ppt_useredit failed\n");
760
+		return FALSE;
761
+	}
762
+	return TRUE;
763
+}
764
+
765
+static void ppt_print_current_user(ppt_currentuser_t *ppt_current_user)
766
+{
767
+	ppt_print_atom_header(&ppt_current_user->atom_hdr);
768
+	cli_dbgmsg("Magic: 0x%.8x\n", ppt_current_user->magic);
769
+	cli_dbgmsg("Curr Edit Offset: 0x%.8x\n", ppt_current_user->current_edit_offset);
770
+}
771
+
772
+static int ppt_read_current_user(int fd, ppt_currentuser_t *ppt_current_user)
773
+{
774
+	if (!ppt_read_atom_header(fd, &ppt_current_user->atom_hdr)) {
775
+		return FALSE;
776
+	}
777
+	if (cli_readn(fd, &ppt_current_user->len, 4) != 4) {
778
+		cli_dbgmsg("read ppt_current_user failed\n");
779
+		return FALSE;
780
+	}
781
+	
782
+	if (cli_readn(fd, &ppt_current_user->magic, 4) != 4) {
783
+		cli_dbgmsg("read ppt_current_user 1 failed\n");
784
+		return FALSE;
785
+	}
786
+	if (cli_readn(fd, &ppt_current_user->current_edit_offset, 4) != 4) {
787
+		cli_dbgmsg("read ppt_current_user 2 failed\n");
788
+		return FALSE;
789
+	}
790
+	
791
+	/* Don't need to read the rest of the Current User file in order
792
+		to extract what we need */
793
+	return TRUE;
794
+}
795
+
796
+static uint32_t *ppt_read_persist_dir(int fd, ppt_useredit_t *ppt_useredit)
797
+{
798
+	uint32_t *persist_dir, noffsets, off_index;
799
+	atom_header_t atom_header;
800
+	int size, i, off_count=0;
801
+	
802
+	if (lseek(fd, ppt_useredit->persist_dir_offset, SEEK_SET) != 
803
+			ppt_useredit->persist_dir_offset) {
804
+		return NULL;
805
+	}
806
+
807
+	if (!ppt_read_atom_header(fd, &atom_header)) {
808
+		return NULL;
809
+	}
810
+	ppt_print_atom_header(&atom_header);
811
+	
812
+	size = sizeof(uint32_t) * (ppt_useredit->max_persist+1);
813
+	persist_dir = malloc(size);
814
+	if (!persist_dir) {
815
+		return NULL;
816
+	}
817
+	memset(persist_dir, 0xFF, size);
818
+	
819
+	while ((off_count < ppt_useredit->max_persist) && 
820
+			(lseek(fd, 0, SEEK_CUR) < atom_header.foffset+atom_header.length)) {
821
+		if (cli_readn(fd, &noffsets, 4) != 4) {
822
+			cli_dbgmsg("read ppt_current_user failed\n");
823
+			free(persist_dir);
824
+			return NULL;
825
+		}
826
+		off_index = noffsets & 0x000FFFFF;
827
+		noffsets = noffsets >> 20;
828
+		cli_dbgmsg("nOffsets: %d\n", noffsets);
829
+		cli_dbgmsg("Offset index: %d\n",off_index);
830
+		for (i=0 ; i<noffsets; i++) {
831
+			if ((off_index+i-1) > ppt_useredit->max_persist)
832
+			{
833
+				cli_dbgmsg("ppt_read_persist_dir overflow\n");
834
+				free(persist_dir);
835
+				return NULL;
836
+			}
837
+			if (cli_readn(fd, &persist_dir[off_index+i-1], 4) != 4) {
838
+				cli_dbgmsg("read ppt_read_persist_dir failed\n");
839
+				free(persist_dir);
840
+				return NULL;
841
+			}
842
+			cli_dbgmsg("persist_dir[%d] = 0x%.8x\n", off_index+i-1, persist_dir[off_index+i-1]);
843
+			off_count++;
844
+		}
845
+	}
846
+	cli_dbgmsg("File offset: 0x%.8x\n\n", lseek(fd, 0, SEEK_CUR));
847
+	
848
+	return persist_dir;
849
+}
850
+
851
+#define PPT_LZW_BUFFSIZE 8192
852
+static int ppt_unlzw(const char *dir, int fd, uint32_t length)
853
+{
854
+	int ofd, retval;
855
+	unsigned char inbuff[PPT_LZW_BUFFSIZE], outbuff[PPT_LZW_BUFFSIZE];
856
+	char *fullname;
857
+	uint32_t bufflen;
858
+	z_stream stream;
859
+	
860
+	fullname = malloc(strlen(dir) + 17);
861
+	sprintf(fullname, "%s/ppt%.8x.doc", dir, lseek(fd, 0, SEEK_CUR));
862
+	
863
+	ofd = open(fullname, O_WRONLY|O_CREAT|O_TRUNC, 0600);
864
+	free(fullname);
865
+        if (ofd == -1) {
866
+                cli_dbgmsg("ppt_unlzw Open outfile failed\n");
867
+                return -1;
868
+        }
869
+	
870
+	stream.zalloc = Z_NULL;
871
+	stream.zfree = Z_NULL;
872
+	stream.opaque = (void *)0;
873
+	
874
+	stream.next_in = inbuff;
875
+	bufflen = stream.avail_in = MIN(length, PPT_LZW_BUFFSIZE);
876
+	
877
+	if (cli_readn(fd, inbuff, stream.avail_in) != stream.avail_in) {
878
+		close(ofd);
879
+		return FALSE;
880
+	}
881
+	length -= stream.avail_in;
882
+	
883
+	retval = inflateInit(&stream);
884
+	if (retval != Z_OK) {
885
+		cli_dbgmsg(" ppt_unlzw !Z_OK: %d\n", retval);
886
+	}
887
+	
888
+	stream.next_out = outbuff;
889
+	stream.avail_out = PPT_LZW_BUFFSIZE;
890
+	
891
+	do {
892
+		if (stream.avail_out == 0) {
893
+			if (cli_writen(ofd, outbuff, PPT_LZW_BUFFSIZE)
894
+						!= PPT_LZW_BUFFSIZE) {
895
+				close(ofd);
896
+				inflateEnd(&stream);
897
+				return FALSE;
898
+			}
899
+			stream.next_out = outbuff;
900
+			stream.avail_out = PPT_LZW_BUFFSIZE;
901
+		}
902
+		if (stream.avail_in == 0) {
903
+			stream.next_in = inbuff;
904
+			bufflen = stream.avail_in = MIN(length, PPT_LZW_BUFFSIZE);
905
+			if (cli_readn(fd, inbuff, stream.avail_in) != stream.avail_in) {
906
+				close(ofd);
907
+				inflateEnd(&stream);
908
+				return FALSE;
909
+			}
910
+			length -= stream.avail_in;
911
+		}
912
+		retval = inflate(&stream, Z_NO_FLUSH);
913
+	} while (retval == Z_OK);
914
+	
915
+	if (cli_writen(ofd, outbuff, bufflen) != bufflen) {
916
+		close(ofd);
917
+		inflateEnd(&stream);
918
+		return FALSE;
919
+	}
920
+	inflateEnd(&stream);
921
+	close(ofd);
922
+	return TRUE;
923
+}
924
+
925
+char *ppt_vba_read(const char *dir)
926
+{
927
+	ppt_currentuser_t ppt_current_user;
928
+	ppt_useredit_t ppt_useredit;
929
+	uint32_t *persist_dir;
930
+	char *fullname, *out_dir, *tmpdir;
931
+	int fd, i, ofd;
932
+	unsigned char *buffer;
933
+	atom_header_t atom_header;
934
+	uint32_t ole_id;
935
+	
936
+	fullname = (char *) cli_malloc(strlen(dir) + 14);
937
+	if (!fullname) {
938
+		return NULL;
939
+	}
940
+	sprintf(fullname, "%s/Current User", dir);
941
+	fd = open(fullname, O_RDONLY);
942
+	free(fullname);
943
+	if (fd == -1) {
944
+		cli_dbgmsg("Open Current User failed\n");
945
+		return NULL;
946
+	}
947
+	
948
+	if (!ppt_read_current_user(fd, &ppt_current_user)) {
949
+		close(fd);
950
+		return NULL;
951
+	}
952
+	
953
+	ppt_print_current_user(&ppt_current_user);
954
+	close(fd);
955
+
956
+	fullname = (char *) cli_malloc(strlen(dir) + 21);
957
+	if (!fullname) {
958
+		return NULL;
959
+	}
960
+	sprintf(fullname, "%s/PowerPoint Document", dir);
961
+	fd = open(fullname, O_RDONLY);
962
+	free(fullname);
963
+	if (fd == -1) {
964
+		cli_dbgmsg("Open Current User failed\n");
965
+		return NULL;
966
+	}
967
+	if (lseek(fd, ppt_current_user.current_edit_offset, SEEK_SET) !=
968
+					ppt_current_user.current_edit_offset) {
969
+		cli_dbgmsg("lseek cli_ppt_vbaread failed\n");
970
+		close(fd);
971
+		return FALSE;
972
+	}
973
+
974
+	/* Create a directory to store the extracted OLE2 objects */
975
+	tmpdir = getenv("TMPDIR");
976
+
977
+	if(tmpdir == NULL)
978
+#ifdef P_tmpdir
979
+		tmpdir = P_tmpdir;
980
+#else
981
+		tmpdir = "/tmp";
982
+#endif
983
+
984
+	/* generate the temporary directory */
985
+	out_dir = cl_gentemp(tmpdir);
986
+	if(mkdir(out_dir, 0700)) {
987
+	    printf("ScanOLE2 -> Can't create temporary directory %s\n", dir);
988
+	    close(fd);
989
+	    return NULL;
990
+	}
991
+
992
+	do {	
993
+		if (!ppt_read_useredit(fd, &ppt_useredit)) {
994
+			close(fd);
995
+			cli_rmdirs(out_dir);
996
+			free(out_dir);
997
+			return NULL;
998
+		}
999
+		ppt_print_useredit(&ppt_useredit);
1000
+		
1001
+		persist_dir = ppt_read_persist_dir(fd, &ppt_useredit);
1002
+		if (!persist_dir) {
1003
+			close(fd);
1004
+			cli_rmdirs(out_dir);
1005
+			free(out_dir);
1006
+			return NULL;
1007
+		}
1008
+		for (i=0 ; i < ppt_useredit.max_persist ; i++) {
1009
+			if (persist_dir[i] != 0xFFFFFFFF) {
1010
+				if (lseek(fd, persist_dir[i], SEEK_SET) == persist_dir[i]) {				
1011
+					if (!ppt_read_atom_header(fd, &atom_header)) {
1012
+						close(fd);
1013
+						free(persist_dir);
1014
+						cli_rmdirs(out_dir);
1015
+						free(out_dir);
1016
+						return NULL;
1017
+					}
1018
+					ppt_print_atom_header(&atom_header);
1019
+					if (atom_header.type == 0x1011) {
1020
+						if (cli_readn(fd, &ole_id, 4) != 4) {
1021
+							cli_dbgmsg("read ole_id failed\n");
1022
+							close(fd);
1023
+							free(persist_dir);
1024
+							cli_rmdirs(out_dir);
1025
+							free(out_dir);
1026
+							return NULL;
1027
+						}
1028
+						cli_dbgmsg("OleID: %d, length: %d\n",
1029
+								ole_id, atom_header.length-4);
1030
+						if (!ppt_unlzw(out_dir, fd, atom_header.length-4)) {
1031
+							cli_dbgmsg("ppt_unlzw failed\n");
1032
+							close(fd);
1033
+							free(persist_dir);
1034
+							cli_rmdirs(out_dir);
1035
+							free(out_dir);
1036
+							return NULL;
1037
+						}
1038
+							
1039
+					}	
1040
+				}
1041
+			}
1042
+		}
1043
+		free(persist_dir);
1044
+		
1045
+		if (lseek(fd, ppt_useredit.last_edit_offset, SEEK_SET) !=
1046
+					ppt_useredit.last_edit_offset) {
1047
+			cli_dbgmsg("lseek cli_ppt_vbaread failed\n");
1048
+			close(fd);
1049
+			return NULL;
1050
+		}
1051
+	} while (ppt_useredit.last_edit_offset != 0);
1052
+
1053
+	return out_dir;
1054
+}	
1055
+
1056
+/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
656 1057
 /* Code to extract Word6 macros
657 1058
 /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
658 1059