Browse code

commit experimental code from Edvin for extracting embedded objects from RTF files

git-svn: trunk@2534

Tomasz Kojm authored on 2006/12/04 09:10:46
Showing 8 changed files
... ...
@@ -1,3 +1,8 @@
1
+Mon Dec  4 01:04:30 CET 2006 (tk)
2
+---------------------------------
3
+  * libclamav: commit experimental code from Edvin for extracting embedded
4
+	       objects from RTF files
5
+
1 6
 Sat Dec  2 17:46:31 GMT 2006 (njh)
2 7
 ----------------------------------
3 8
   * libclamav/pst.c:	Fix compilation errors
... ...
@@ -52,6 +52,8 @@ libclamav_la_SOURCES = \
52 52
 	scanners.h \
53 53
 	filetypes.c \
54 54
 	filetypes.h \
55
+	rtf.c \
56
+	rtf.h \
55 57
 	blob.c \
56 58
 	blob.h \
57 59
 	mbox.c \
... ...
@@ -79,7 +79,7 @@ LTLIBRARIES = $(lib_LTLIBRARIES)
79 79
 libclamav_la_DEPENDENCIES =
80 80
 am_libclamav_la_OBJECTS = matcher-ac.lo matcher-bm.lo matcher-ncore.lo \
81 81
 	matcher.lo md5.lo others.lo readdb.lo cvd.lo dsig.lo str.lo \
82
-	scanners.lo filetypes.lo blob.lo mbox.lo message.lo \
82
+	scanners.lo filetypes.lo rtf.lo blob.lo mbox.lo message.lo \
83 83
 	snprintf.lo table.lo text.lo ole2_extract.lo vba_extract.lo \
84 84
 	msexpand.lo pe.lo cabd.lo lzxd.lo mszipd.lo qtmd.lo system.lo \
85 85
 	upx.lo htmlnorm.lo chmunpack.lo rebuildpe.lo petite.lo \
... ...
@@ -253,6 +253,8 @@ libclamav_la_SOURCES = \
253 253
 	scanners.h \
254 254
 	filetypes.c \
255 255
 	filetypes.h \
256
+	rtf.c \
257
+	rtf.h \
256 258
 	blob.c \
257 259
 	blob.h \
258 260
 	mbox.c \
... ...
@@ -466,6 +468,7 @@ distclean-compile:
466 466
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readdb.Plo@am__quote@
467 467
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rebuildpe.Plo@am__quote@
468 468
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regex_list.Plo@am__quote@
469
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rtf.Plo@am__quote@
469 470
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scanners.Plo@am__quote@
470 471
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256.Plo@am__quote@
471 472
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sis.Plo@am__quote@
... ...
@@ -123,6 +123,9 @@ static const struct cli_magic_s cli_magic[] = {
123 123
     {0,  "\%PDF-",			 5, "PDF document", CL_TYPE_PDF},
124 124
     {0,  "\266\271\254\256\376\377\377\377", 8, "CryptFF", CL_TYPE_CRYPTFF},
125 125
 
126
+#ifdef CL_EXPERIMENTAL  
127
+    {0,  "{\\rtf",                           5, "RTF", CL_TYPE_RTF}, 
128
+#endif    
126 129
     /* Ignored types */
127 130
 
128 131
     {0,  "\000\000\001\263",             4, "MPEG video stream",  CL_TYPE_DATA},
... ...
@@ -54,7 +54,9 @@ typedef enum {
54 54
     CL_TYPE_UUENCODED,
55 55
     CL_TYPE_PST,	/* Microsoft Outlook binary email folder (.pst file) */
56 56
     CL_TYPE_HTML_UTF16,
57
-
57
+#ifdef CL_EXPERIMENTAL
58
+    CL_TYPE_RTF,
59
+#endif
58 60
     /* bigger numbers have higher priority (in o-t-f detection) */
59 61
     CL_TYPE_HTML, /* on the fly */
60 62
     CL_TYPE_MAIL,  /* magic + on the fly */
61 63
new file mode 100644
... ...
@@ -0,0 +1,661 @@
0
+/*
1

                
2
+ *
3
+ *  This program is free software; you can redistribute it and/or modify
4
+ *  it under the terms of the GNU General Public License as published by
5
+ *  the Free Software Foundation; either version 2 of the License, or
6
+ *  (at your option) any later version.
7
+ *
8
+ *  This program is distributed in the hope that it will be useful,
9
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
+ *  GNU General Public License for more details.
12
+ *
13
+ *  You should have received a copy of the GNU General Public License
14
+ *  along with this program; if not, write to the Free Software
15
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
16
+ *  MA 02110-1301, USA.
17
+ */
18
+
19
+#if HAVE_CONFIG_H
20
+#include "clamav-config.h"
21
+#endif
22
+
23
+#ifdef CL_EXPERIMENTAL
24
+
25
+#include <stdio.h>
26
+#include <string.h>
27
+#include <sys/types.h>
28
+#include <ctype.h>
29
+
30
+#ifdef HAVE_UNISTD_H
31
+#include <unistd.h>
32
+#endif
33
+
34
+#include "others.h"
35
+#include "rtf.h"
36
+#include "clamav.h"
37
+#include "table.h"
38
+#include "scanners.h"
39
+#include "vba_extract.h"
40
+
41
+enum parse_state { PARSE_MAIN, PARSE_CONTROL_, PARSE_CONTROL_WORD, PARSE_CONTROL_SYMBOL, PARSE_CONTROL_WORD_PARAM, PARSE_INTERPRET_CONTROLWORD };
42
+
43
+enum rtf_action
44
+{
45
+	RTF_OBJECT,
46
+	RTF_OBJECT_DATA
47
+};
48
+
49
+struct rtf_state;
50
+typedef int (*rtf_callback_begin)(struct rtf_state*, cli_ctx* ctx,const char* tmpdir);
51
+typedef int (*rtf_callback_process)(struct rtf_state*, const unsigned char* data,const size_t len);
52
+typedef int (*rtf_callback_end)(struct rtf_state*, cli_ctx*);
53
+
54
+struct rtf_state {
55
+	size_t default_elements;
56
+	size_t controlword_cnt;
57
+	ssize_t controlword_param;
58
+	enum parse_state parse_state;
59
+	int  controlword_param_sign;
60
+	int  encounteredTopLevel;/* encountered top-level control words that we care about */
61
+	char controlword[33];
62
+	rtf_callback_begin cb_begin;/* must be non-null if you want cb_process, and cb_end to be called, also it must change cb_data to non-null */
63
+	rtf_callback_process cb_process;
64
+	rtf_callback_end cb_end;
65
+	void* cb_data;/* data set up by cb_begin, used by cb_process, and cleaned up by cb_end. typically state data */
66
+};
67
+
68
+static const struct rtf_state base_state = {
69
+	0,0,0,PARSE_MAIN,0,0,"                              ",NULL,NULL,NULL,NULL
70
+};
71
+
72
+struct stack {
73
+	struct rtf_state* states;
74
+	size_t elements;
75
+	size_t stack_cnt;
76
+	size_t stack_size;
77
+};
78
+
79
+static const struct rtf_action_mapping {
80
+	const char* controlword;
81
+	const enum rtf_action action;
82
+} rtf_action_mapping [] = 
83
+{
84
+	{"object", RTF_OBJECT},
85
+	{"objdata ",RTF_OBJECT_DATA}
86
+};
87
+
88
+static const size_t rtf_action_mapping_cnt = sizeof(rtf_action_mapping)/sizeof(rtf_action_mapping[0]);
89
+
90
+enum rtf_objdata_state {WAIT_MAGIC, WAIT_DESC_LEN, WAIT_DESC, WAIT_ZERO, WAIT_DATA_SIZE, DUMP_DATA, DUMP_DISCARD};
91
+static const unsigned char rtf_data_magic[] = {0x01, 0x05, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00};/* is this a magic number, or does it mean something */
92
+static const size_t rtf_data_magic_len  = sizeof(rtf_data_magic);
93
+
94
+struct rtf_object_data {
95
+	char* name;
96
+	int  fd;
97
+	int partial;
98
+	int has_partial;
99
+	enum rtf_objdata_state internal_state;
100
+	char* desc_name;
101
+	const char* tmpdir;
102
+	cli_ctx*    ctx;
103
+	size_t desc_len;
104
+	size_t bread;
105
+};
106
+
107
+#define BUFF_SIZE 8192
108
+/* generated by contrib/phishing/generate_tables.c */
109
+static const short int hextable[256] = {
110
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
111
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
112
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
113
+       0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
114
+       0x0, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
115
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
116
+       0x0, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
117
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
118
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
119
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
120
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
121
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
122
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
123
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
124
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
125
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
126
+};
127
+
128
+extern int short cli_leavetemps_flag;
129
+
130
+static void init_rtf_state(struct rtf_state* state)
131
+{
132
+	*state = base_state;
133
+	state->parse_state = PARSE_MAIN;
134
+	state->controlword_cnt = 0;
135
+}
136
+
137
+static int compare_state(const struct rtf_state* a,const struct rtf_state* b)
138
+{
139
+	return (a->controlword_param == b->controlword_param && 
140
+			a->parse_state == b->parse_state &&
141
+			a->encounteredTopLevel == b->encounteredTopLevel &&
142
+			memcmp(a->controlword,b->controlword,33)==0 &&
143
+			a->cb_begin == b->cb_begin &&
144
+			a->cb_process == b->cb_process &&
145
+			a->cb_end == b->cb_end &&
146
+			a->cb_data == b->cb_data);
147
+}
148
+
149
+
150
+static int push_state(struct stack* stack,struct rtf_state* state)
151
+{
152
+	int toplevel;
153
+	size_t defelements;
154
+
155
+	stack->elements++;
156
+	if( compare_state(state,&base_state)) { 
157
+		state->default_elements++;
158
+		return 0;/* this is default state, don't push it, we'll know when we pop it that it was the default one,
159
+			  we store in the state how many default elements we have on the stack */
160
+	}
161
+	if(stack->stack_cnt >= stack->stack_size) {
162
+		/* grow stack */
163
+		stack->stack_size += 128;
164
+		stack->states = cli_realloc(stack->states, stack->stack_size*sizeof(*stack->states));
165
+		if(!stack->states)
166
+			return CL_EMEM;
167
+	}
168
+	stack->states[stack->stack_cnt++] = *state;
169
+	toplevel = state->encounteredTopLevel;
170
+	defelements = state->default_elements;
171
+
172
+	*state = base_state;
173
+
174
+	state->encounteredTopLevel = toplevel;
175
+	state->default_elements = defelements;
176
+	return 0; 
177
+}
178
+
179
+
180
+static int pop_state(struct stack* stack,struct rtf_state* state)
181
+{
182
+	stack->elements--;
183
+	if(state->default_elements) {
184
+		const size_t default_elements = state->default_elements-1;
185
+		const int toplevel = state->encounteredTopLevel;
186
+		*state = base_state;
187
+		state->default_elements = default_elements;
188
+		state->encounteredTopLevel = toplevel;
189
+		return 0;/* this is a default 'state'*/
190
+	}
191
+	if(!stack->stack_cnt) {
192
+		cli_dbgmsg("Warning: attempt to pop from empty stack!\n");
193
+		*state = base_state;/* lets assume we give it a base state */
194
+		return 0;
195
+	}
196
+	*state = stack->states[--stack->stack_cnt];
197
+	return 0;
198
+}
199
+
200
+
201
+static int load_actions(table_t* t)
202
+{
203
+	size_t i;
204
+	for(i=0; i<rtf_action_mapping_cnt; i++)
205
+		tableInsert(t, rtf_action_mapping[i].controlword, rtf_action_mapping[i].action);
206
+	return 0;
207
+}
208
+
209
+static int rtf_object_begin(struct rtf_state* state,cli_ctx* ctx,const char* tmpdir)
210
+{
211
+	struct rtf_object_data* data = cli_malloc(sizeof(*data));
212
+	if(!data)
213
+		return CL_EMEM;
214
+	data->fd = -1;
215
+	data->partial = 0;
216
+	data->has_partial = 0;
217
+	data->bread = 0;
218
+	data->internal_state = WAIT_MAGIC;
219
+	data->tmpdir = tmpdir;
220
+	data->ctx    = ctx;
221
+
222
+	state->cb_data = data;
223
+	return 0;
224
+}
225
+
226
+
227
+static int decode_and_scan(struct rtf_object_data* data, cli_ctx* ctx)
228
+{
229
+	int ofd, ret=0;
230
+
231
+	cli_dbgmsg("Scanning embedded object:%s\n",data->name);
232
+	if(data->bread == 1) {
233
+		cli_dbgmsg("Decoding ole object\n");
234
+		lseek(data->fd,0,SEEK_SET);
235
+		ofd = cli_decode_ole_object(data->fd,data->tmpdir);
236
+		if (ofd >= 0) {
237
+			ret = cli_magic_scandesc(ofd, ctx);
238
+			close(ofd);
239
+		}
240
+	}
241
+	else
242
+		ret = cli_magic_scandesc(data->fd,ctx);
243
+	close(data->fd);
244
+	data->fd = 0;
245
+	if(data->name) {
246
+		if(!cli_leavetemps_flag)
247
+			unlink(data->name);
248
+		free(data->name);
249
+		data->name = NULL;
250
+	}
251
+
252
+	if(ret != CL_CLEAN)
253
+		return ret;
254
+	return 0;
255
+}
256
+
257
+static int rtf_object_process(struct rtf_state* state, const unsigned char* input,const size_t len)
258
+{
259
+	struct rtf_object_data* data = state->cb_data;
260
+	unsigned char outdata[BUFF_SIZE];
261
+	const unsigned char* out_data;
262
+	size_t out_cnt = 0;
263
+	size_t i;
264
+
265
+	if(!data || !len)
266
+		return 0;
267
+
268
+	if(data->has_partial) {
269
+		outdata[out_cnt++] = data->partial | input[0];
270
+	}
271
+
272
+	data->has_partial = 0;
273
+	for(i=0;i<len;i++) {
274
+		if(isxdigit(input[i])) {
275
+				const unsigned char byte = hextable[ input[i++] ] << 4;
276
+				while(i<len && !isxdigit(input[i]))
277
+					i++;
278
+				if(i == len) {
279
+					data->partial = byte;
280
+					data->has_partial = 1;
281
+					break;
282
+				}
283
+				outdata[out_cnt++] = byte | hextable[ input[i] ];
284
+		}
285
+	}
286
+
287
+	out_data = outdata;
288
+	while(out_data && out_cnt) {
289
+		switch(data->internal_state) {
290
+			case WAIT_MAGIC: {
291
+						 for(i=0; i<out_cnt && data->bread < rtf_data_magic_len; i++, data->bread++)
292
+							 if(rtf_data_magic[data->bread] != out_data[i]) {
293
+								 cli_dbgmsg("Warning: rtf objdata magic number not matched, expected:%d, got: %d, at pos:%d\n",rtf_data_magic[i],out_data[i],data->bread);
294
+							 }
295
+						 out_cnt  -= i;
296
+						 if(data->bread == rtf_data_magic_len) {
297
+							 out_data += i;
298
+							 data->bread = 0;
299
+							 data->internal_state = WAIT_DESC_LEN;						 
300
+						 }
301
+						 break;
302
+					 }
303
+			case WAIT_DESC_LEN: {
304
+						    if(data->bread == 0)
305
+							    data->desc_len = 0;
306
+						    for(i=0; i<out_cnt && data->bread < 4; i++,data->bread++)
307
+							    data->desc_len  |=  ((size_t)out_data[i]) << (data->bread*8);
308
+						    out_cnt  -= i;
309
+						    if(data->bread == 4) {
310
+							    out_data += i;
311
+							    data->bread=0;
312
+							    if(data->desc_len > 64) {
313
+								    cli_dbgmsg("Description length too big (%d), showing only 64 bytes of it\n",data->desc_len);
314
+								    data->desc_name = cli_malloc(65);
315
+							    }
316
+							    else
317
+								    data->desc_name = cli_malloc(data->desc_len+1);
318
+							    if(!data->desc_name) {
319
+								    return CL_EMEM;
320
+							    }
321
+							    data->internal_state = WAIT_DESC;
322
+						    }
323
+						    break;
324
+					    }
325
+			case WAIT_DESC:{
326
+					       for(i=0;i<out_cnt && data->bread < data->desc_len && data->bread < 64;i++, data->bread++)
327
+						       data->desc_name[data->bread] = out_data[i];
328
+					       /*FIXME: sanity check here, to avoid segfault */
329
+					       if(i+data->desc_len-data->bread > out_cnt) {
330
+						       cli_dbgmsg("Can't interpret length in wait_desc\n");
331
+						       return 0;/* bail out */
332
+					       }
333
+					       out_cnt  -= i + data->desc_len - data->bread;
334
+					       if(data->bread <= data->desc_len) {
335
+						       out_data += i + data->desc_len - data->bread;
336
+						       data->desc_name[data->bread] = '\0';
337
+						       data->bread = 0;
338
+						       cli_dbgmsg("Preparing to dump rtf embedded object, description:%s\n",data->desc_name);
339
+						       free(data->desc_name);
340
+						       data->desc_name = NULL;
341
+						       data->internal_state = WAIT_ZERO;
342
+					       }
343
+					       break;
344
+				       }
345
+			case WAIT_ZERO:{
346
+					       if(out_cnt < 8-data->bread) {
347
+						       out_cnt = 0;
348
+						       data->bread += out_cnt;
349
+					       }
350
+					       else {
351
+						       out_cnt  -= 8-data->bread;
352
+						       data->bread = 8;
353
+					       }
354
+					       if(data->bread == 8) {
355
+						       out_data += 8;
356
+						       data->bread = 0;
357
+						       data->internal_state = WAIT_DATA_SIZE;
358
+					       }
359
+					       break;
360
+				       }
361
+
362
+			case WAIT_DATA_SIZE: {
363
+						    if(data->bread == 0)
364
+							    data->desc_len = 0;
365
+						    for(i=0; i<out_cnt && data->bread < 4; i++,data->bread++)
366
+							    data->desc_len  |= ((size_t)out_data[i]) << (8*data->bread);
367
+						    out_cnt  -= i;
368
+						    if(data->bread == 4) {
369
+							    out_data += i;
370
+							    data->bread=0;
371
+							    cli_dbgmsg("Dumping rtf embedded object of size:%ld\n",data->desc_len);
372
+					    		    data->name = cli_gentempdesc(data->tmpdir, &data->fd);
373
+							    if(!data->name)
374
+								    return CL_ETMPFILE;
375
+							    data->internal_state = DUMP_DATA;
376
+						    }
377
+						    break;
378
+					     }
379
+			case DUMP_DATA: {
380
+						ssize_t out_want = out_cnt < data->desc_len ? out_cnt : data->desc_len;
381
+						if(!data->bread) {
382
+							if(out_data[0] != 0xd0 || out_data[1]!=0xcf) {
383
+								/* this is not an ole2 doc, but some ole (stream?) to be
384
+								 * decoded by cli_decode_ole_object*/
385
+							    char out[4];
386
+							    data->bread = 1;/* flag to indicate this needs to be scanned with cli_decode_ole_object*/
387
+							    cli_writeint32(out,data->desc_len);
388
+							    if(cli_writen(data->fd,out,4)!=4)
389
+								    return CL_EIO; 
390
+							}
391
+							else
392
+								data->bread = 2;
393
+						}
394
+
395
+						data->desc_len -= out_want;
396
+						if(cli_writen(data->fd,out_data,out_want) != out_want) {
397
+							return CL_EIO;
398
+						}
399
+						out_data += out_want;
400
+						out_cnt  -= out_want;
401
+						if(!data->desc_len) { 
402
+							int rc;
403
+							if(( rc = decode_and_scan(data, data->ctx) ))
404
+								return rc;
405
+							data->bread=0;
406
+							data->internal_state = WAIT_MAGIC;
407
+						}
408
+						break;					
409
+					}				    
410
+			case DUMP_DISCARD:
411
+			default:
412
+					out_cnt = 0;
413
+					;
414
+		}
415
+	}
416
+	return 0;
417
+}
418
+
419
+
420
+
421
+static int rtf_object_end(struct rtf_state* state,cli_ctx* ctx)
422
+{
423
+	struct rtf_object_data* data = state->cb_data;
424
+	int rc = 0;
425
+	if(!data)
426
+		return 0;
427
+	if(data->fd) { 
428
+		rc = decode_and_scan(data, ctx);
429
+	}
430
+	if(data->name)
431
+		free(data->name);
432
+	if(data->desc_name)
433
+		free(data->desc_name);
434
+	free(data);
435
+	state->cb_data = NULL;
436
+	return rc;
437
+}
438
+
439
+
440
+static void rtf_action(struct rtf_state* state,long action, const char* tempname,cli_ctx* ctx)
441
+{
442
+	switch(action) {
443
+		case RTF_OBJECT:
444
+			state->encounteredTopLevel |= 1<<RTF_OBJECT;
445
+			break;
446
+		case RTF_OBJECT_DATA:
447
+			if(state->encounteredTopLevel & (1<<RTF_OBJECT) ) {
448
+				state->cb_begin = rtf_object_begin;
449
+				state->cb_process = rtf_object_process;
450
+				state->cb_end = rtf_object_end;
451
+			}
452
+			break;
453
+	};
454
+}
455
+
456
+static void cleanup_stack(struct stack* stack,struct rtf_state* state,cli_ctx* ctx)
457
+{
458
+	while(stack && stack->stack_cnt && state->default_elements) {
459
+		pop_state(stack,state);
460
+		if(state->cb_begin)
461
+			state->cb_end(state,ctx);
462
+	}
463
+}
464
+
465
+
466
+#define SCAN_CLEANUP \
467
+	tableDestroy(actiontable);\
468
+	cleanup_stack(&stack,&state,ctx);\
469
+	free(buff);\
470
+        if(!cli_leavetemps_flag)\
471
+		cli_rmdirs(tempname);\
472
+	free(tempname);\
473
+	free(stack.states);
474
+
475
+int cli_scanrtf(int desc, cli_ctx *ctx)
476
+{
477
+	char* tempname;
478
+	const unsigned char* ptr;
479
+	const unsigned char* ptr_end;
480
+	unsigned char* buff;
481
+	int ret = CL_CLEAN;
482
+	struct rtf_state state;
483
+	struct stack stack;
484
+	size_t bread;
485
+	table_t* actiontable;
486
+	uint8_t main_symbols[256];
487
+
488
+	cli_dbgmsg("in cli_scanrtf()\n");
489
+
490
+	memset(main_symbols, 0, 256);
491
+	main_symbols['{']=1;
492
+	main_symbols['}']=1;
493
+	main_symbols['\\']=1;
494
+
495
+	stack.stack_cnt = 0;
496
+	stack.stack_size = 16;
497
+	stack.elements = 0;
498
+	stack.states = cli_malloc(stack.stack_size*sizeof(*stack.states));
499
+
500
+	if(!stack.states)
501
+		return CL_EMEM;
502
+
503
+	buff = cli_malloc(BUFF_SIZE);
504
+	if(!buff) {
505
+		free(stack.states);
506
+		return CL_EMEM;
507
+	}
508
+
509
+	tempname = cli_gentemp(NULL);
510
+
511
+	if(mkdir(tempname, 0700)) {
512
+	    	cli_dbgmsg("ScanRTF -> Can't create temporary directory %s\n", tempname);
513
+		free(stack.states);
514
+		free(buff);
515
+		free(tempname);
516
+		return CL_ETMPDIR;
517
+	}
518
+
519
+	actiontable = tableCreate();
520
+	if((ret = load_actions(actiontable))) {
521
+		cli_dbgmsg("RTF: Unable to load rtf action table\n");
522
+		free(stack.states);
523
+		free(buff);
524
+		if(!cli_leavetemps_flag)
525
+			cli_rmdirs(tempname);
526
+		free(tempname);
527
+		return ret;
528
+	}
529
+
530
+	init_rtf_state(&state);
531
+
532
+	while(( bread = cli_readn(desc, buff, BUFF_SIZE) )) {
533
+		ptr = buff;
534
+		ptr_end = buff + bread;
535
+		while(ptr < ptr_end) {
536
+			switch(state.parse_state) {
537
+				case PARSE_MAIN: 
538
+					switch(*ptr++) {
539
+						case '{':
540
+							if(( ret = push_state(&stack,&state) )) {
541
+								cli_dbgmsg("RTF:Push failure!\n");
542
+								SCAN_CLEANUP;
543
+								return ret;
544
+							}
545
+							break;
546
+						case '}':
547
+							if(state.cb_data)
548
+								if(( ret = state.cb_end(&state, ctx) )) {
549
+									SCAN_CLEANUP;
550
+									return ret;
551
+								}
552
+							if(( ret = pop_state(&stack,&state) )) {
553
+								cli_dbgmsg("RTF:pop failure!\n");
554
+								SCAN_CLEANUP;
555
+								return ret;
556
+							}
557
+							break;
558
+						case '\\':
559
+							state.parse_state = PARSE_CONTROL_;
560
+							break;
561
+						default:
562
+							ptr--;
563
+							{
564
+								size_t i;
565
+								size_t left = ptr_end - ptr;
566
+								size_t use = left;
567
+								for(i = 1;i < left; i++)
568
+									if(main_symbols[ptr[i]]) {
569
+										use = i;
570
+										break;
571
+									}
572
+								if(state.cb_begin) {
573
+									if(!state.cb_data)
574
+										 if(( ret = state.cb_begin(&state, ctx,tempname) )) {
575
+											 SCAN_CLEANUP;
576
+											 return ret;
577
+										}
578
+									if(( ret = state.cb_process(&state, ptr, use) )) {
579
+										state.cb_end(&state,ctx);
580
+										SCAN_CLEANUP;
581
+										return ret;
582
+									}
583
+								}
584
+								ptr += use;
585
+							}
586
+					}
587
+					break;
588
+				case PARSE_CONTROL_:					
589
+					if(isalpha(*ptr))  {
590
+						state.parse_state = PARSE_CONTROL_WORD;
591
+						state.controlword_cnt = 0;
592
+					}
593
+					else
594
+						state.parse_state = PARSE_CONTROL_SYMBOL;
595
+					break;
596
+				case PARSE_CONTROL_SYMBOL:
597
+					ptr++;	/* Do nothing */
598
+					state.parse_state = PARSE_MAIN;
599
+					break;
600
+				case PARSE_CONTROL_WORD:
601
+					if(state.controlword_cnt == 32) {
602
+						cli_dbgmsg("Invalid control word: maximum size exceeded:%s\n",state.controlword);
603
+						state.parse_state = PARSE_MAIN;
604
+					}
605
+					else if(isalpha(*ptr))
606
+						state.controlword[state.controlword_cnt++] = *ptr++;
607
+					else {
608
+						if(isspace(*ptr)) {
609
+							state.controlword[state.controlword_cnt++] = *ptr++;
610
+							state.parse_state = PARSE_INTERPRET_CONTROLWORD;
611
+						}
612
+						else if (isdigit(*ptr)) {
613
+							state.parse_state = PARSE_CONTROL_WORD_PARAM;
614
+							state.controlword_param = 0;
615
+							state.controlword_param_sign = 1;
616
+						}
617
+						else if(*ptr == '-') {
618
+							ptr++;
619
+							state.parse_state = PARSE_CONTROL_WORD_PARAM;
620
+							state.controlword_param = 0;
621
+							state.controlword_param_sign = -1;
622
+						}
623
+						else {
624
+							state.parse_state = PARSE_INTERPRET_CONTROLWORD;
625
+						}
626
+					}
627
+					break;
628
+				case PARSE_CONTROL_WORD_PARAM:
629
+					if(isdigit(*ptr)) {
630
+						state.controlword_param = state.controlword_param*10 + *ptr++ - '0';
631
+					}
632
+					else if(isalpha(*ptr)) {
633
+						ptr++;
634
+					}
635
+					else {
636
+						if(state.controlword_param_sign < 0)
637
+							state.controlword_param = -state.controlword_param;
638
+						state.parse_state = PARSE_INTERPRET_CONTROLWORD;
639
+					}
640
+					break;
641
+				case PARSE_INTERPRET_CONTROLWORD:
642
+					{
643
+						int action;
644
+
645
+						state.controlword[state.controlword_cnt] = '\0';
646
+						action = tableFind(actiontable, state.controlword);
647
+						if(action != -1)
648
+							rtf_action(&state,action, tempname, ctx);
649
+						state.parse_state = PARSE_MAIN;
650
+						break;
651
+					}
652
+			}
653
+		}
654
+	}
655
+
656
+	SCAN_CLEANUP;
657
+	return ret;
658
+}
659
+
660
+#endif
0 661
new file mode 100644
... ...
@@ -0,0 +1,11 @@
0
+#ifndef _RTF_H
1
+#define _RTF_H
2
+#include <clamav-config.h>
3
+#ifdef CL_EXPERIMENTAL
4
+
5
+
6
+int cli_scanrtf(int desc, cli_ctx *ctx);
7
+
8
+#endif
9
+
10
+#endif
... ...
@@ -80,6 +80,10 @@ extern short cli_leavetemps_flag;
80 80
 #include "pdf.h"
81 81
 #include "str.h"
82 82
 
83
+#ifdef CL_EXPERIMENTAL
84
+#include "rtf.h"
85
+#endif
86
+
83 87
 #ifdef HAVE_ZLIB_H
84 88
 #include <zlib.h>
85 89
 #include "unzip.h"
... ...
@@ -1775,6 +1779,12 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
1775 1775
 		ret = cli_scanhtml_utf16(desc, ctx);
1776 1776
 	    break;
1777 1777
 
1778
+#ifdef CL_EXPERIMENTAL
1779
+	case CL_TYPE_RTF:
1780
+	    ret = cli_scanrtf(desc, ctx);
1781
+	    break;
1782
+#endif
1783
+
1778 1784
 	case CL_TYPE_MAIL:
1779 1785
 	    if(SCAN_MAIL)
1780 1786
 		ret = cli_scanmail(desc, ctx);