libclamav/rtf.c
52c2a8bd
f84c2df9
 	int    warned;
52c2a8bd
 };
 
 static const struct rtf_action_mapping {
 	const char* controlword;
 	const enum rtf_action action;
 } rtf_action_mapping [] = 
 {
 	{"object", RTF_OBJECT},
 	{"objdata ",RTF_OBJECT_DATA}
 };
 
 static const size_t rtf_action_mapping_cnt = sizeof(rtf_action_mapping)/sizeof(rtf_action_mapping[0]);
 
 enum rtf_objdata_state {WAIT_MAGIC, WAIT_DESC_LEN, WAIT_DESC, WAIT_ZERO, WAIT_DATA_SIZE, DUMP_DATA, DUMP_DISCARD};
 static const unsigned char rtf_data_magic[] = {0x01, 0x05, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00};/* is this a magic number, or does it mean something */
 static const size_t rtf_data_magic_len  = sizeof(rtf_data_magic);
 
 struct rtf_object_data {
 	char* name;
 	int  fd;
 	int partial;
 	int has_partial;
 	enum rtf_objdata_state internal_state;
 	char* desc_name;
 	const char* tmpdir;
 	cli_ctx*    ctx;
 	size_t desc_len;
 	size_t bread;
 };
 
 #define BUFF_SIZE 8192
 /* generated by contrib/phishing/generate_tables.c */
 static const short int hextable[256] = {
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
        0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
        0x0, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
        0x0, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
 };
 
 extern int short cli_leavetemps_flag;
 
 static void init_rtf_state(struct rtf_state* state)
 {
 	*state = base_state;
 	state->parse_state = PARSE_MAIN;
 	state->controlword_cnt = 0;
 }
 
 static int compare_state(const struct rtf_state* a,const struct rtf_state* b)
 {
 	return (a->controlword_param == b->controlword_param && 
 			a->parse_state == b->parse_state &&
 			a->encounteredTopLevel == b->encounteredTopLevel &&
 			memcmp(a->controlword,b->controlword,33)==0 &&
 			a->cb_begin == b->cb_begin &&
 			a->cb_process == b->cb_process &&
 			a->cb_end == b->cb_end &&
 			a->cb_data == b->cb_data);
 }
 
 
 static int push_state(struct stack* stack,struct rtf_state* state)
 {
 	int toplevel;
 	size_t defelements;
 
 	stack->elements++;
 	if( compare_state(state,&base_state)) { 
 		state->default_elements++;
 		return 0;/* this is default state, don't push it, we'll know when we pop it that it was the default one,
 			  we store in the state how many default elements we have on the stack */
 	}
 	if(stack->stack_cnt >= stack->stack_size) {
 		/* grow stack */
 		stack->stack_size += 128;
84fd5a61
 		stack->states = cli_realloc2(stack->states, stack->stack_size*sizeof(*stack->states));
52c2a8bd
 		if(!stack->states)
 			return CL_EMEM;
 	}
 	stack->states[stack->stack_cnt++] = *state;
 	toplevel = state->encounteredTopLevel;
 	defelements = state->default_elements;
 
 	*state = base_state;
 
 	state->encounteredTopLevel = toplevel;
 	state->default_elements = defelements;
 	return 0; 
 }
 
 
 static int pop_state(struct stack* stack,struct rtf_state* state)
 {
 	stack->elements--;
 	if(state->default_elements) {
 		const size_t default_elements = state->default_elements-1;
 		const int toplevel = state->encounteredTopLevel;
 		*state = base_state;
 		state->default_elements = default_elements;
 		state->encounteredTopLevel = toplevel;
 		return 0;/* this is a default 'state'*/
 	}
 	if(!stack->stack_cnt) {
f84c2df9
 		if(!stack->warned) {
90d25531
 			cli_dbgmsg("Warning: attempt to pop from empty stack!\n");
f84c2df9
 			stack->warned = 1;
 		}
52c2a8bd
 		*state = base_state;/* lets assume we give it a base state */
 		return 0;
 	}
 	*state = stack->states[--stack->stack_cnt];
 	return 0;
 }
 
 
 static int load_actions(table_t* t)
 {
 	size_t i;
 	for(i=0; i<rtf_action_mapping_cnt; i++)
3dc58be2
 		if(tableInsert(t, rtf_action_mapping[i].controlword, rtf_action_mapping[i].action) == -1)
 			return -1;
52c2a8bd
 	return 0;
 }
 
 static int rtf_object_begin(struct rtf_state* state,cli_ctx* ctx,const char* tmpdir)
 {
 	struct rtf_object_data* data = cli_malloc(sizeof(*data));
 	if(!data)
 		return CL_EMEM;
 	data->fd = -1;
 	data->partial = 0;
 	data->has_partial = 0;
 	data->bread = 0;
 	data->internal_state = WAIT_MAGIC;
 	data->tmpdir = tmpdir;
 	data->ctx    = ctx;
3dc58be2
 	data->name   = NULL;
 	data->desc_name = NULL;
52c2a8bd
 
 	state->cb_data = data;
 	return 0;
 }
 
 
 static int decode_and_scan(struct rtf_object_data* data, cli_ctx* ctx)
 {
 	int ofd, ret=0;
 
3dc58be2
 	cli_dbgmsg("RTF:Scanning embedded object:%s\n",data->name);
 	if(data->bread == 1 && data->fd > 0) {
52c2a8bd
 		cli_dbgmsg("Decoding ole object\n");
 		lseek(data->fd,0,SEEK_SET);
 		ofd = cli_decode_ole_object(data->fd,data->tmpdir);
 		if (ofd >= 0) {
 			ret = cli_magic_scandesc(ofd, ctx);
 			close(ofd);
 		}
 	}
3dc58be2
 	else if(data->fd > 0)
52c2a8bd
 		ret = cli_magic_scandesc(data->fd,ctx);
3dc58be2
 	if(data->fd > 0)
52c2a8bd
 	close(data->fd);
3dc58be2
 	data->fd = -1;
52c2a8bd
 	if(data->name) {
 		if(!cli_leavetemps_flag)
 			unlink(data->name);
 		free(data->name);
 		data->name = NULL;
 	}
 
 	if(ret != CL_CLEAN)
 		return ret;
 	return 0;
 }
 
 static int rtf_object_process(struct rtf_state* state, const unsigned char* input,const size_t len)
 {
 	struct rtf_object_data* data = state->cb_data;
 	unsigned char outdata[BUFF_SIZE];
 	const unsigned char* out_data;
 	size_t out_cnt = 0;
 	size_t i;
 
 	if(!data || !len)
 		return 0;
 
 	if(data->has_partial) {
380fb4f0
 		for(i=0;i<len && !isxdigit(input[i]);i++)
 			;
 		if(i<len) {
 			outdata[out_cnt++] = data->partial | hextable[input[i++]];
 			data->has_partial = 0;
 		}
 		else
 			return 0;
52c2a8bd
 	}
380fb4f0
 	else
 		i = 0;
52c2a8bd
 
380fb4f0
 	for(;i<len;i++) {
52c2a8bd
 		if(isxdigit(input[i])) {
 				const unsigned char byte = hextable[ input[i++] ] << 4;
 				while(i<len && !isxdigit(input[i]))
 					i++;
 				if(i == len) {
 					data->partial = byte;
 					data->has_partial = 1;
 					break;
 				}
 				outdata[out_cnt++] = byte | hextable[ input[i] ];
 		}
 	}
 
 	out_data = outdata;
 	while(out_data && out_cnt) {
 		switch(data->internal_state) {
 			case WAIT_MAGIC: {
3dc58be2
 						 cli_dbgmsg("RTF: waiting for magic\n");
52c2a8bd
 						 for(i=0; i<out_cnt && data->bread < rtf_data_magic_len; i++, data->bread++)
 							 if(rtf_data_magic[data->bread] != out_data[i]) {
3dc58be2
 								 cli_dbgmsg("Warning: rtf objdata magic number not matched, expected:%d, got: %d, at pos:%lu\n",rtf_data_magic[i],out_data[i],data->bread);
52c2a8bd
 							 }
 						 out_cnt  -= i;
 						 if(data->bread == rtf_data_magic_len) {
 							 out_data += i;
 							 data->bread = 0;
 							 data->internal_state = WAIT_DESC_LEN;						 
 						 }
 						 break;
 					 }
 			case WAIT_DESC_LEN: {
 						    if(data->bread == 0)
 							    data->desc_len = 0;
 						    for(i=0; i<out_cnt && data->bread < 4; i++,data->bread++)
 							    data->desc_len  |=  ((size_t)out_data[i]) << (data->bread*8);
 						    out_cnt  -= i;
 						    if(data->bread == 4) {
 							    out_data += i;
 							    data->bread=0;
 							    if(data->desc_len > 64) {
3dc58be2
 								    cli_dbgmsg("Description length too big (%lu), showing only 64 bytes of it\n",data->desc_len);
52c2a8bd
 								    data->desc_name = cli_malloc(65);
 							    }
 							    else
 								    data->desc_name = cli_malloc(data->desc_len+1);
 							    if(!data->desc_name) {
 								    return CL_EMEM;
 							    }
 							    data->internal_state = WAIT_DESC;
3dc58be2
 							    cli_dbgmsg("RTF: description length:%lu\n",data->desc_len);
52c2a8bd
 						    }
 						    break;
 					    }
 			case WAIT_DESC:{
3dc58be2
 					       cli_dbgmsg("RTF: in WAIT_DESC\n");
52c2a8bd
 					       for(i=0;i<out_cnt && data->bread < data->desc_len && data->bread < 64;i++, data->bread++)
 						       data->desc_name[data->bread] = out_data[i];
3dc58be2
 					       out_cnt -= i;
 					       out_data += i;
 					       if(data->bread < data->desc_len && data->bread < 64) {
 						       cli_dbgmsg("RTF: waiting for more data(1)\n");
 						       return 0;/* wait for more data */
52c2a8bd
 					       }
 						       data->desc_name[data->bread] = '\0';
3dc58be2
 					       if(data->desc_len - data->bread > out_cnt) {
 						       data->desc_len -= out_cnt;
 						       cli_dbgmsg("RTF: waiting for more data(2)\n");
 						       return 0;/* wait for more data */
 					       }
 					       out_cnt  -= data->desc_len - data->bread;
 					       if(data->bread >= data->desc_len) {
 						       out_data += data->desc_len - data->bread;
52c2a8bd
 						       data->bread = 0;
 						       cli_dbgmsg("Preparing to dump rtf embedded object, description:%s\n",data->desc_name);
 						       free(data->desc_name);
 						       data->desc_name = NULL;
 						       data->internal_state = WAIT_ZERO;
 					       }
 					       break;
 				       }
 			case WAIT_ZERO:{
 					       if(out_cnt < 8-data->bread) {
 						       out_cnt = 0;
 						       data->bread += out_cnt;
 					       }
 					       else {
 						       out_cnt  -= 8-data->bread;
 						       data->bread = 8;
 					       }
 					       if(data->bread == 8) {
 						       out_data += 8;
 						       data->bread = 0;
3dc58be2
 						       cli_dbgmsg("RTF: next state: wait_data_size\n");
52c2a8bd
 						       data->internal_state = WAIT_DATA_SIZE;
 					       }
 					       break;
 				       }
 
 			case WAIT_DATA_SIZE: {
3dc58be2
 						     cli_dbgmsg("RTF: in WAIT_DATA_SIZE\n");
52c2a8bd
 						    if(data->bread == 0)
 							    data->desc_len = 0;
 						    for(i=0; i<out_cnt && data->bread < 4; i++,data->bread++)
 							    data->desc_len  |= ((size_t)out_data[i]) << (8*data->bread);
 						    out_cnt  -= i;
 						    if(data->bread == 4) {
 							    out_data += i;
 							    data->bread=0;
 							    cli_dbgmsg("Dumping rtf embedded object of size:%ld\n",data->desc_len);
 					    		    data->name = cli_gentempdesc(data->tmpdir, &data->fd);
3dc58be2
 							    if(!data->name || data->fd < 0)
52c2a8bd
 								    return CL_ETMPFILE;
 							    data->internal_state = DUMP_DATA;
3dc58be2
 	    						    cli_dbgmsg("RTF: next state: DUMP_DATA\n");
52c2a8bd
 						    }
 						    break;
 					     }
 			case DUMP_DATA: {
 						ssize_t out_want = out_cnt < data->desc_len ? out_cnt : data->desc_len;
 						if(!data->bread) {
 							if(out_data[0] != 0xd0 || out_data[1]!=0xcf) {
 								/* this is not an ole2 doc, but some ole (stream?) to be
 								 * decoded by cli_decode_ole_object*/
 							    char out[4];
 							    data->bread = 1;/* flag to indicate this needs to be scanned with cli_decode_ole_object*/
 							    cli_writeint32(out,data->desc_len);
 							    if(cli_writen(data->fd,out,4)!=4)
 								    return CL_EIO; 
 							}
 							else
 								data->bread = 2;
 						}
 
 						data->desc_len -= out_want;
 						if(cli_writen(data->fd,out_data,out_want) != out_want) {
 							return CL_EIO;
 						}
 						out_data += out_want;
 						out_cnt  -= out_want;
 						if(!data->desc_len) { 
 							int rc;
 							if(( rc = decode_and_scan(data, data->ctx) ))
 								return rc;
 							data->bread=0;
 							data->internal_state = WAIT_MAGIC;
 						}
 						break;					
 					}				    
 			case DUMP_DISCARD:
 			default:
 					out_cnt = 0;
 					;
 		}
 	}
 	return 0;
 }
 
 
 
 static int rtf_object_end(struct rtf_state* state,cli_ctx* ctx)
 {
 	struct rtf_object_data* data = state->cb_data;
 	int rc = 0;
 	if(!data)
 		return 0;
3dc58be2
 	if(data->fd > 0) { 
52c2a8bd
 		rc = decode_and_scan(data, ctx);
 	}
 	if(data->name)
 		free(data->name);
 	if(data->desc_name)
 		free(data->desc_name);
 	free(data);
 	state->cb_data = NULL;
 	return rc;
 }
 
 
bda5598b
 static void rtf_action(struct rtf_state* state,long action)
52c2a8bd
 {
 	switch(action) {
 		case RTF_OBJECT:
 			state->encounteredTopLevel |= 1<<RTF_OBJECT;
 			break;
 		case RTF_OBJECT_DATA:
 			if(state->encounteredTopLevel & (1<<RTF_OBJECT) ) {
 				state->cb_begin = rtf_object_begin;
 				state->cb_process = rtf_object_process;
 				state->cb_end = rtf_object_end;
 			}
 			break;
 	};
 }
 
 static void cleanup_stack(struct stack* stack,struct rtf_state* state,cli_ctx* ctx)
 {
90d25531
 	while(stack && stack->stack_cnt /* && state->default_elements*/) {
52c2a8bd
 		pop_state(stack,state);
90d25531
 		if(state->cb_data && state->cb_end)
52c2a8bd
 			state->cb_end(state,ctx);
 	}
 }
 
 
 #define SCAN_CLEANUP \
90d25531
 	if(state.cb_data && state.cb_end)\
 		state.cb_end(&state,ctx);\
52c2a8bd
 	tableDestroy(actiontable);\
 	cleanup_stack(&stack,&state,ctx);\
 	free(buff);\
         if(!cli_leavetemps_flag)\
 		cli_rmdirs(tempname);\
 	free(tempname);\
 	free(stack.states);
 
 int cli_scanrtf(int desc, cli_ctx *ctx)
 {
 	char* tempname;
 	const unsigned char* ptr;
 	const unsigned char* ptr_end;
 	unsigned char* buff;
 	int ret = CL_CLEAN;
 	struct rtf_state state;
 	struct stack stack;
f84c2df9
 	ssize_t bread;
52c2a8bd
 	table_t* actiontable;
 	uint8_t main_symbols[256];
 
 	cli_dbgmsg("in cli_scanrtf()\n");
 
 	memset(main_symbols, 0, 256);
 	main_symbols['{']=1;
 	main_symbols['}']=1;
 	main_symbols['\\']=1;
 
 	stack.stack_cnt = 0;
 	stack.stack_size = 16;
 	stack.elements = 0;
f84c2df9
 	stack.warned = 0;
52c2a8bd
 	stack.states = cli_malloc(stack.stack_size*sizeof(*stack.states));
 
 	if(!stack.states)
 		return CL_EMEM;
 
 	buff = cli_malloc(BUFF_SIZE);
 	if(!buff) {
 		free(stack.states);
 		return CL_EMEM;
 	}
 
 	tempname = cli_gentemp(NULL);
 
 	if(mkdir(tempname, 0700)) {
 	    	cli_dbgmsg("ScanRTF -> Can't create temporary directory %s\n", tempname);
 		free(stack.states);
 		free(buff);
 		free(tempname);
 		return CL_ETMPDIR;
 	}
 
 	actiontable = tableCreate();
 	if((ret = load_actions(actiontable))) {
 		cli_dbgmsg("RTF: Unable to load rtf action table\n");
 		free(stack.states);
 		free(buff);
 		if(!cli_leavetemps_flag)
 			cli_rmdirs(tempname);
 		free(tempname);
3dc58be2
 		tableDestroy(actiontable);
52c2a8bd
 		return ret;
 	}
 
 	init_rtf_state(&state);
 
f84c2df9
 	while(( bread = cli_readn(desc, buff, BUFF_SIZE) ) > 0) {
52c2a8bd
 		ptr = buff;
 		ptr_end = buff + bread;
 		while(ptr < ptr_end) {
 			switch(state.parse_state) {
 				case PARSE_MAIN: 
 					switch(*ptr++) {
 						case '{':
 							if(( ret = push_state(&stack,&state) )) {
 								cli_dbgmsg("RTF:Push failure!\n");
 								SCAN_CLEANUP;
 								return ret;
 							}
 							break;
 						case '}':
90d25531
 							if(state.cb_data && state.cb_end)
52c2a8bd
 								if(( ret = state.cb_end(&state, ctx) )) {
 									SCAN_CLEANUP;
 									return ret;
 								}
 							if(( ret = pop_state(&stack,&state) )) {
 								cli_dbgmsg("RTF:pop failure!\n");
 								SCAN_CLEANUP;
 								return ret;
 							}
 							break;
 						case '\\':
 							state.parse_state = PARSE_CONTROL_;
 							break;
 						default:
 							ptr--;
 							{
 								size_t i;
 								size_t left = ptr_end - ptr;
 								size_t use = left;
 								for(i = 1;i < left; i++)
 									if(main_symbols[ptr[i]]) {
 										use = i;
 										break;
 									}
 								if(state.cb_begin) {
 									if(!state.cb_data)
 										 if(( ret = state.cb_begin(&state, ctx,tempname) )) {
 											 SCAN_CLEANUP;
 											 return ret;
 										}
 									if(( ret = state.cb_process(&state, ptr, use) )) {
 										state.cb_end(&state,ctx);
 										SCAN_CLEANUP;
 										return ret;
 									}
 								}
 								ptr += use;
 							}
 					}
 					break;
 				case PARSE_CONTROL_:					
 					if(isalpha(*ptr))  {
 						state.parse_state = PARSE_CONTROL_WORD;
 						state.controlword_cnt = 0;
 					}
 					else
 						state.parse_state = PARSE_CONTROL_SYMBOL;
 					break;
 				case PARSE_CONTROL_SYMBOL:
 					ptr++;	/* Do nothing */
 					state.parse_state = PARSE_MAIN;
 					break;
 				case PARSE_CONTROL_WORD:
 					if(state.controlword_cnt == 32) {
 						cli_dbgmsg("Invalid control word: maximum size exceeded:%s\n",state.controlword);
 						state.parse_state = PARSE_MAIN;
 					}
 					else if(isalpha(*ptr))
 						state.controlword[state.controlword_cnt++] = *ptr++;
 					else {
 						if(isspace(*ptr)) {
 							state.controlword[state.controlword_cnt++] = *ptr++;
 							state.parse_state = PARSE_INTERPRET_CONTROLWORD;
 						}
 						else if (isdigit(*ptr)) {
 							state.parse_state = PARSE_CONTROL_WORD_PARAM;
 							state.controlword_param = 0;
 							state.controlword_param_sign = 1;
 						}
 						else if(*ptr == '-') {
 							ptr++;
 							state.parse_state = PARSE_CONTROL_WORD_PARAM;
 							state.controlword_param = 0;
 							state.controlword_param_sign = -1;
 						}
 						else {
 							state.parse_state = PARSE_INTERPRET_CONTROLWORD;
 						}
 					}
 					break;
 				case PARSE_CONTROL_WORD_PARAM:
 					if(isdigit(*ptr)) {
 						state.controlword_param = state.controlword_param*10 + *ptr++ - '0';
 					}
 					else if(isalpha(*ptr)) {
 						ptr++;
 					}
 					else {
 						if(state.controlword_param_sign < 0)
 							state.controlword_param = -state.controlword_param;
 						state.parse_state = PARSE_INTERPRET_CONTROLWORD;
 					}
 					break;
 				case PARSE_INTERPRET_CONTROLWORD:
 					{
 						int action;
 
 						state.controlword[state.controlword_cnt] = '\0';
 						action = tableFind(actiontable, state.controlword);
90d25531
 						if(action != -1) {
 							if(state.cb_data && state.cb_end) {/* premature end of previous block */
 								state.cb_end(&state,ctx);
 								state.cb_end = NULL;
 								state.cb_data = NULL;
 							}
bda5598b
 							rtf_action(&state,action);
90d25531
 						}
52c2a8bd
 						state.parse_state = PARSE_MAIN;
 						break;
 					}
 			}
 		}
 	}
 
 	SCAN_CLEANUP;
 	return ret;
 }