libclamav/rtf.c
f9d258e2
b0651a64
 		for(i=0;i<len && !isxdigit(input[i]);i++)
 			;
 		if(i<len) {
 			outdata[out_cnt++] = data->partial | hextable[input[i++]];
 			data->has_partial = 0;
 		}
 		else
 			return 0;
f9d258e2
 	}
b0651a64
 	else
 		i = 0;
f9d258e2
 
b0651a64
 	for(;i<len;i++) {
f9d258e2
 		if(isxdigit(input[i])) {
 				const unsigned char byte = hextable[ input[i++] ] << 4;
 				while(i<len && !isxdigit(input[i]))
 					i++;
 				if(i == len) {
 					data->partial = byte;
 					data->has_partial = 1;
 					break;
 				}
 				outdata[out_cnt++] = byte | hextable[ input[i] ];
 		}
 	}
 
 	out_data = outdata;
 	while(out_data && out_cnt) {
 		switch(data->internal_state) {
 			case WAIT_MAGIC: {
 						 for(i=0; i<out_cnt && data->bread < rtf_data_magic_len; i++, data->bread++)
 							 if(rtf_data_magic[data->bread] != out_data[i]) {
 								 cli_dbgmsg("Warning: rtf objdata magic number not matched, expected:%d, got: %d, at pos:%d\n",rtf_data_magic[i],out_data[i],data->bread);
 							 }
 						 out_cnt  -= i;
 						 if(data->bread == rtf_data_magic_len) {
 							 out_data += i;
 							 data->bread = 0;
 							 data->internal_state = WAIT_DESC_LEN;						 
 						 }
 						 break;
 					 }
 			case WAIT_DESC_LEN: {
 						    if(data->bread == 0)
 							    data->desc_len = 0;
 						    for(i=0; i<out_cnt && data->bread < 4; i++,data->bread++)
 							    data->desc_len  |=  ((size_t)out_data[i]) << (data->bread*8);
 						    out_cnt  -= i;
 						    if(data->bread == 4) {
 							    out_data += i;
 							    data->bread=0;
 							    if(data->desc_len > 64) {
 								    cli_dbgmsg("Description length too big (%d), showing only 64 bytes of it\n",data->desc_len);
 								    data->desc_name = cli_malloc(65);
 							    }
 							    else
 								    data->desc_name = cli_malloc(data->desc_len+1);
 							    if(!data->desc_name) {
 								    return CL_EMEM;
 							    }
 							    data->internal_state = WAIT_DESC;
 						    }
 						    break;
 					    }
 			case WAIT_DESC:{
 					       for(i=0;i<out_cnt && data->bread < data->desc_len && data->bread < 64;i++, data->bread++)
 						       data->desc_name[data->bread] = out_data[i];
 					       /*FIXME: sanity check here, to avoid segfault */
 					       if(i+data->desc_len-data->bread > out_cnt) {
 						       cli_dbgmsg("Can't interpret length in wait_desc\n");
 						       return 0;/* bail out */
 					       }
 					       out_cnt  -= i + data->desc_len - data->bread;
 					       if(data->bread <= data->desc_len) {
 						       out_data += i + data->desc_len - data->bread;
 						       data->desc_name[data->bread] = '\0';
 						       data->bread = 0;
 						       cli_dbgmsg("Preparing to dump rtf embedded object, description:%s\n",data->desc_name);
 						       free(data->desc_name);
 						       data->desc_name = NULL;
 						       data->internal_state = WAIT_ZERO;
 					       }
 					       break;
 				       }
 			case WAIT_ZERO:{
 					       if(out_cnt < 8-data->bread) {
 						       out_cnt = 0;
 						       data->bread += out_cnt;
 					       }
 					       else {
 						       out_cnt  -= 8-data->bread;
 						       data->bread = 8;
 					       }
 					       if(data->bread == 8) {
 						       out_data += 8;
 						       data->bread = 0;
 						       data->internal_state = WAIT_DATA_SIZE;
 					       }
 					       break;
 				       }
 
 			case WAIT_DATA_SIZE: {
 						    if(data->bread == 0)
 							    data->desc_len = 0;
 						    for(i=0; i<out_cnt && data->bread < 4; i++,data->bread++)
 							    data->desc_len  |= ((size_t)out_data[i]) << (8*data->bread);
 						    out_cnt  -= i;
 						    if(data->bread == 4) {
 							    out_data += i;
 							    data->bread=0;
 							    cli_dbgmsg("Dumping rtf embedded object of size:%ld\n",data->desc_len);
 					    		    data->name = cli_gentempdesc(data->tmpdir, &data->fd);
 							    if(!data->name)
 								    return CL_ETMPFILE;
 							    data->internal_state = DUMP_DATA;
 						    }
 						    break;
 					     }
 			case DUMP_DATA: {
 						ssize_t out_want = out_cnt < data->desc_len ? out_cnt : data->desc_len;
 						if(!data->bread) {
 							if(out_data[0] != 0xd0 || out_data[1]!=0xcf) {
 								/* this is not an ole2 doc, but some ole (stream?) to be
 								 * decoded by cli_decode_ole_object*/
 							    char out[4];
 							    data->bread = 1;/* flag to indicate this needs to be scanned with cli_decode_ole_object*/
 							    cli_writeint32(out,data->desc_len);
 							    if(cli_writen(data->fd,out,4)!=4)
 								    return CL_EIO; 
 							}
 							else
 								data->bread = 2;
 						}
 
 						data->desc_len -= out_want;
 						if(cli_writen(data->fd,out_data,out_want) != out_want) {
 							return CL_EIO;
 						}
 						out_data += out_want;
 						out_cnt  -= out_want;
 						if(!data->desc_len) { 
 							int rc;
 							if(( rc = decode_and_scan(data, data->ctx) ))
 								return rc;
 							data->bread=0;
 							data->internal_state = WAIT_MAGIC;
 						}
 						break;					
 					}				    
 			case DUMP_DISCARD:
 			default:
 					out_cnt = 0;
 					;
 		}
 	}
 	return 0;
 }
 
 
 
 static int rtf_object_end(struct rtf_state* state,cli_ctx* ctx)
 {
 	struct rtf_object_data* data = state->cb_data;
 	int rc = 0;
 	if(!data)
 		return 0;
 	if(data->fd) { 
 		rc = decode_and_scan(data, ctx);
 	}
 	if(data->name)
 		free(data->name);
 	if(data->desc_name)
 		free(data->desc_name);
 	free(data);
 	state->cb_data = NULL;
 	return rc;
 }
 
 
066b62d0
 static void rtf_action(struct rtf_state* state,long action)
f9d258e2
 {
 	switch(action) {
 		case RTF_OBJECT:
 			state->encounteredTopLevel |= 1<<RTF_OBJECT;
 			break;
 		case RTF_OBJECT_DATA:
 			if(state->encounteredTopLevel & (1<<RTF_OBJECT) ) {
 				state->cb_begin = rtf_object_begin;
 				state->cb_process = rtf_object_process;
 				state->cb_end = rtf_object_end;
 			}
 			break;
 	};
 }
 
 static void cleanup_stack(struct stack* stack,struct rtf_state* state,cli_ctx* ctx)
 {
 	while(stack && stack->stack_cnt && state->default_elements) {
 		pop_state(stack,state);
 		if(state->cb_begin)
 			state->cb_end(state,ctx);
 	}
 }
 
 
 #define SCAN_CLEANUP \
 	tableDestroy(actiontable);\
 	cleanup_stack(&stack,&state,ctx);\
 	free(buff);\
         if(!cli_leavetemps_flag)\
 		cli_rmdirs(tempname);\
 	free(tempname);\
 	free(stack.states);
 
 int cli_scanrtf(int desc, cli_ctx *ctx)
 {
 	char* tempname;
 	const unsigned char* ptr;
 	const unsigned char* ptr_end;
 	unsigned char* buff;
 	int ret = CL_CLEAN;
 	struct rtf_state state;
 	struct stack stack;
 	size_t bread;
 	table_t* actiontable;
 	uint8_t main_symbols[256];
 
 	cli_dbgmsg("in cli_scanrtf()\n");
 
 	memset(main_symbols, 0, 256);
 	main_symbols['{']=1;
 	main_symbols['}']=1;
 	main_symbols['\\']=1;
 
 	stack.stack_cnt = 0;
 	stack.stack_size = 16;
 	stack.elements = 0;
 	stack.states = cli_malloc(stack.stack_size*sizeof(*stack.states));
 
 	if(!stack.states)
 		return CL_EMEM;
 
 	buff = cli_malloc(BUFF_SIZE);
 	if(!buff) {
 		free(stack.states);
 		return CL_EMEM;
 	}
 
 	tempname = cli_gentemp(NULL);
 
 	if(mkdir(tempname, 0700)) {
 	    	cli_dbgmsg("ScanRTF -> Can't create temporary directory %s\n", tempname);
 		free(stack.states);
 		free(buff);
 		free(tempname);
 		return CL_ETMPDIR;
 	}
 
 	actiontable = tableCreate();
 	if((ret = load_actions(actiontable))) {
 		cli_dbgmsg("RTF: Unable to load rtf action table\n");
 		free(stack.states);
 		free(buff);
 		if(!cli_leavetemps_flag)
 			cli_rmdirs(tempname);
 		free(tempname);
 		return ret;
 	}
 
 	init_rtf_state(&state);
 
 	while(( bread = cli_readn(desc, buff, BUFF_SIZE) )) {
 		ptr = buff;
 		ptr_end = buff + bread;
 		while(ptr < ptr_end) {
 			switch(state.parse_state) {
 				case PARSE_MAIN: 
 					switch(*ptr++) {
 						case '{':
 							if(( ret = push_state(&stack,&state) )) {
 								cli_dbgmsg("RTF:Push failure!\n");
 								SCAN_CLEANUP;
 								return ret;
 							}
 							break;
 						case '}':
 							if(state.cb_data)
 								if(( ret = state.cb_end(&state, ctx) )) {
 									SCAN_CLEANUP;
 									return ret;
 								}
 							if(( ret = pop_state(&stack,&state) )) {
 								cli_dbgmsg("RTF:pop failure!\n");
 								SCAN_CLEANUP;
 								return ret;
 							}
 							break;
 						case '\\':
 							state.parse_state = PARSE_CONTROL_;
 							break;
 						default:
 							ptr--;
 							{
 								size_t i;
 								size_t left = ptr_end - ptr;
 								size_t use = left;
 								for(i = 1;i < left; i++)
 									if(main_symbols[ptr[i]]) {
 										use = i;
 										break;
 									}
 								if(state.cb_begin) {
 									if(!state.cb_data)
 										 if(( ret = state.cb_begin(&state, ctx,tempname) )) {
 											 SCAN_CLEANUP;
 											 return ret;
 										}
 									if(( ret = state.cb_process(&state, ptr, use) )) {
 										state.cb_end(&state,ctx);
 										SCAN_CLEANUP;
 										return ret;
 									}
 								}
 								ptr += use;
 							}
 					}
 					break;
 				case PARSE_CONTROL_:					
 					if(isalpha(*ptr))  {
 						state.parse_state = PARSE_CONTROL_WORD;
 						state.controlword_cnt = 0;
 					}
 					else
 						state.parse_state = PARSE_CONTROL_SYMBOL;
 					break;
 				case PARSE_CONTROL_SYMBOL:
 					ptr++;	/* Do nothing */
 					state.parse_state = PARSE_MAIN;
 					break;
 				case PARSE_CONTROL_WORD:
 					if(state.controlword_cnt == 32) {
 						cli_dbgmsg("Invalid control word: maximum size exceeded:%s\n",state.controlword);
 						state.parse_state = PARSE_MAIN;
 					}
 					else if(isalpha(*ptr))
 						state.controlword[state.controlword_cnt++] = *ptr++;
 					else {
 						if(isspace(*ptr)) {
 							state.controlword[state.controlword_cnt++] = *ptr++;
 							state.parse_state = PARSE_INTERPRET_CONTROLWORD;
 						}
 						else if (isdigit(*ptr)) {
 							state.parse_state = PARSE_CONTROL_WORD_PARAM;
 							state.controlword_param = 0;
 							state.controlword_param_sign = 1;
 						}
 						else if(*ptr == '-') {
 							ptr++;
 							state.parse_state = PARSE_CONTROL_WORD_PARAM;
 							state.controlword_param = 0;
 							state.controlword_param_sign = -1;
 						}
 						else {
 							state.parse_state = PARSE_INTERPRET_CONTROLWORD;
 						}
 					}
 					break;
 				case PARSE_CONTROL_WORD_PARAM:
 					if(isdigit(*ptr)) {
 						state.controlword_param = state.controlword_param*10 + *ptr++ - '0';
 					}
 					else if(isalpha(*ptr)) {
 						ptr++;
 					}
 					else {
 						if(state.controlword_param_sign < 0)
 							state.controlword_param = -state.controlword_param;
 						state.parse_state = PARSE_INTERPRET_CONTROLWORD;
 					}
 					break;
 				case PARSE_INTERPRET_CONTROLWORD:
 					{
 						int action;
 
 						state.controlword[state.controlword_cnt] = '\0';
 						action = tableFind(actiontable, state.controlword);
 						if(action != -1)
066b62d0
 							rtf_action(&state,action);
f9d258e2
 						state.parse_state = PARSE_MAIN;
 						break;
 					}
 			}
 		}
 	}
 
 	SCAN_CLEANUP;
 	return ret;
 }