libclamav/str.c
e3aaff8e
 /*
2023340a
  *  Copyright (C) 2007-2008 Sourcefire, Inc.
  *
  *  Authors: Tomasz Kojm, Nigel Horne, Török Edvin
e3aaff8e
  *
  *  This program is free software; you can redistribute it and/or modify
bb34cb31
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
e3aaff8e
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
48b7b4a7
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
e3aaff8e
  */
 
6d6e8271
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
8515ab9e
 #include "str.h"
 
e3aaff8e
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
c7029064
 #ifdef HAVE_STRINGS_H
 #include <strings.h>
 #endif
e3aaff8e
 #include <ctype.h>
e4e8366f
 #include <sys/types.h>
e3aaff8e
 
 #include "clamav.h"
 #include "others.h"
bedc58de
 #include "matcher.h"
 #include "cltypes.h"
eb290151
 #include "jsparse/textbuf.h"
e3aaff8e
 
a72b7d2e
 static const int hex_chars[256] = {
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
     -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 };
 
 static inline int cli_hex2int(const char c)
e3aaff8e
 {
a72b7d2e
 	return hex_chars[(const unsigned char)c];
e3aaff8e
 }
 
 
38e881e3
 int cli_realhex2ui(const char *hex, uint16_t *ptr, unsigned int len) {
 	uint16_t val;
 	unsigned int i;
 	int c;
e3aaff8e
 
     for(i = 0; i < len; i += 2) {
bedc58de
 	val = 0;
 
 	if(hex[i] == '?' && hex[i + 1] == '?') {
 	    val |= CLI_MATCH_IGNORE;
 
 	} else if(hex[i + 1] == '?') {
 	    if((c = cli_hex2int(hex[i])) >= 0) {
 		val = c << 4;
 	    } else {
38e881e3
 		return 0;
bedc58de
 	    }
 	    val |= CLI_MATCH_NIBBLE_HIGH;
 
 	} else if(hex[i] == '?') {
 	    if((c = cli_hex2int(hex[i + 1])) >= 0) {
 		val = c;
 	    } else {
38e881e3
 		return 0;
bedc58de
 	    }
 	    val |= CLI_MATCH_NIBBLE_LOW;
 
fbcef1b0
 	} else if(hex[i] == '(') {
bedc58de
 	    val |= CLI_MATCH_ALTERNATIVE;
 
e3aaff8e
 	} else {
 	    if((c = cli_hex2int(hex[i])) >= 0) {
 		val = c;
 		if((c = cli_hex2int(hex[i+1])) >= 0) {
 		    val = (val << 4) + c;
8f0f9d56
 		} else {
38e881e3
 		    return 0;
e3aaff8e
 		}
 	    } else {
38e881e3
 		return 0;
e3aaff8e
 	    }
 	}
bedc58de
 
e3aaff8e
 	*ptr++ = val;
     }
38e881e3
     return 1;
 }
 
 uint16_t *cli_hex2ui(const char *hex)
 {
 	uint16_t *str;
 	unsigned int len;
 
     len = strlen(hex);
 
     if(len % 2 != 0) {
 	cli_errmsg("cli_hex2si(): Malformed hexstring: %s (length: %u)\n", hex, len);
 	return NULL;
     }
 
     str = cli_calloc((len / 2) + 1, sizeof(uint16_t));
     if(!str)
 	return NULL;
e3aaff8e
 
38e881e3
     if(cli_realhex2ui(hex, str, len))
         return str;
     
     free(str);
     return NULL;
e3aaff8e
 }
 
4048c4f6
 char *cli_hex2str(const char *hex)
 {
eaf2aebd
     unsigned char *str;
     size_t len;
4048c4f6
 
     len = strlen(hex);
 
     if(len % 2 != 0) {
1f48ace8
 	cli_errmsg("cli_hex2str(): Malformed hexstring: %s (length: %u)\n", hex, (unsigned)len);
4048c4f6
 	return NULL;
     }
 
     str = cli_calloc((len / 2) + 1, sizeof(char));
     if(!str)
 	return NULL;
 
eaf2aebd
     if (cli_hex2str_to(hex, str, len) == -1) {
 	free(str);
 	return NULL;
     }
     return str;
 }
 
 int cli_hex2str_to(const char *hex, unsigned char *ptr, size_t len)
 {
     size_t i;
     int c;
     unsigned char val;
4048c4f6
 
     for(i = 0; i < len; i += 2) {
 	if((c = cli_hex2int(hex[i])) >= 0) {
 	    val = c;
 	    if((c = cli_hex2int(hex[i+1])) >= 0) {
 		val = (val << 4) + c;
 	    } else {
eaf2aebd
 		return -1;
4048c4f6
 	    }
 	} else {
eaf2aebd
 	    return -1;
4048c4f6
 	}
 
eaf2aebd
 	*ptr++ = val;
4048c4f6
     }
 
eaf2aebd
     return 0;
4048c4f6
 }
 
e5916a51
 int cli_hex2num(const char *hex)
 {
 	int hexval, ret = 0, len, i;
 
 
     len = strlen(hex);
 
     if(len % 2 != 0) {
 	cli_errmsg("cli_hex2num(): Malformed hexstring: %s (length: %d)\n", hex, len);
 	return -1;
     }
 
     for(i = 0; i < len; i++) {
 	if((hexval = cli_hex2int(hex[i])) < 0)
 	    break;
 	ret = (ret << 4) | hexval;
     }
 
     return ret;
 }
 
8000d078
 char *cli_str2hex(const char *string, unsigned int len)
e3aaff8e
 {
 	char *hexstr;
 	char HEX[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
 		       'a', 'b', 'c', 'd', 'e', 'f' };
8515ab9e
 	unsigned int i, j;
e3aaff8e
 
     if((hexstr = (char *) cli_calloc(2 * len + 1, sizeof(char))) == NULL)
 	return NULL;
 
     for(i = 0, j = 0; i < len; i++, j += 2) {
 	hexstr[j] = HEX[(string[i] >> 4) & 0xf];
 	hexstr[j + 1] = HEX[string[i] & 0xf];
     }
 
     return hexstr;
 }
 
bd988961
 char *cli_utf16toascii(const char *str, unsigned int length)
 {
 	char *decoded;
 	unsigned int i, j;
 
 
     if(length < 2) {
3ca95340
 	cli_dbgmsg("cli_utf16toascii: length < 2\n");
bd988961
 	return NULL;
     }
 
     if(length % 2)
 	length--;
 
     if(!(decoded = cli_calloc(length / 2 + 1, sizeof(char))))
 	return NULL;
 
     for(i = 0, j = 0; i < length; i += 2, j++) {
        decoded[j] = str[i + 1] << 4;
        decoded[j] += str[i];
     }
 
     return decoded;
 }
 
e3aaff8e
 int cli_strbcasestr(const char *haystack, const char *needle)
 {
fc83da82
 	const char *pt =  haystack;
e3aaff8e
 	int i, j;
 
     i = strlen(haystack);
     j = strlen(needle);
 
     if(i < j)
 	return 0;
 
     pt += i - j;
 
     return !strcasecmp(pt, needle);
 }
 
8f0f9d56
 /*
  * Remove trailing NL and CR characters from the end of the given string.
  * Return the new length of the string (ala strlen)
  */
 int
 cli_chomp(char *string)
e3aaff8e
 {
8f0f9d56
 	int l;
96b02502
 
8f0f9d56
 	if(string == NULL)
 		return -1;
976bcd2a
 
8f0f9d56
 	l  = strlen(string);
486fa0d3
 
8f0f9d56
 	if(l == 0)
 		return 0;
486fa0d3
 
8f0f9d56
 	--l;
 
 	while((l >= 0) && ((string[l] == '\n') || (string[l] == '\r')))
 		string[l--] = '\0';
e3aaff8e
 
8f0f9d56
 	return l + 1;
 }
486fa0d3
 
2d70a403
 /*
  * char *cli_strok(const char *line, int fieldno, char *delim)
  * Return a copy of field <fieldno> from the string <line>, where
  * fields are delimited by any char from <delim>, or NULL if <line>
  * doesn't have <fieldno> fields or not enough memory is available.
  * The caller has to free() the result afterwards.
  */
8515ab9e
 char *cli_strtok(const char *line, int fieldno, const char *delim)
e3aaff8e
 {
2d70a403
     int counter = 0, i, j;
     char *buffer = NULL;
976bcd2a
 
e3aaff8e
 
2d70a403
     /* step to arg # <fieldno> */
     for (i=0; line[i] && counter != fieldno; i++) {
 	if (strchr(delim, line[i])) {
 	    counter++;
 	    while(line[i+1] && strchr(delim, line[i+1])) {
 		i++;
e3aaff8e
 	    }
2d70a403
 	}
     }
     if (!line[i]) {
 	/* end of buffer before field reached */
 	return NULL;
e3aaff8e
     }
 
2d70a403
     for (j=i; line[j]; j++) {
 	if (strchr(delim, line[j])) {
 	    break;
 	}
     }
     if (i == j) {
976bcd2a
 	return NULL;
     }
e12c29d2
     buffer = cli_malloc(j-i+1);
cfeb200c
     if(!buffer)
 	return NULL;
658f19f8
     strncpy(buffer, line+i, j-i);
     buffer[j-i] = '\0';
2d70a403
 
     return buffer;
e3aaff8e
 }
908ac3b2
 
 /*
  * Like cli_strtok, but this puts the output into a given argument, rather
  * than allocating fresh memory
  * Returns NULL for error, or a pointer to output
  * njh@bandsman.co.uk
  */
 char *cli_strtokbuf(const char *input, int fieldno, const char *delim, char *output)
 {
     int counter = 0, i, j;
 
     /* step to arg # <fieldno> */
     for (i=0; input[i] && counter != fieldno; i++) {
 	if (strchr(delim, input[i])) {
 	    counter++;
 	    while(input[i+1] && strchr(delim, input[i+1])) {
 		i++;
 	    }
 	}
     }
     if (input[i] == '\0') {
 	/* end of buffer before field reached */
 	return NULL;
     }
 
     for (j=i; input[j]; j++) {
 	if (strchr(delim, input[j])) {
 	    break;
 	}
     }
     if (i == j) {
 	return NULL;
     }
     strncpy(output, input+i, j-i);
     output[j-i] = '\0';
 
     return output;
 }
8f84357e
 
b03230c2
 const char *cli_memstr(const char *haystack, int hs, const char *needle, int ns)
8f84357e
 {
 	const char *pt, *hay;
 	int n;
 
 
     if(hs < ns)
 	return NULL;
 
     if(haystack == needle)
 	return haystack;
 
     if(!memcmp(haystack, needle, ns))
 	return haystack;
 
     pt = hay = haystack;
     n = hs;
 
     while((pt = memchr(hay, needle[0], n)) != NULL) {
b03230c2
 	n -= (int) (pt - hay);
8f84357e
 	if(n < ns)
 	    break;
 
 	if(!memcmp(pt, needle, ns))
 	    return pt;
 
 	if(hay == pt) {
 	    n--;
 	    hay++;
 	} else {
 	    hay = pt;
 	}
     }
 
     return NULL;
 }
9b133473
 
 char *cli_strrcpy(char *dest, const char *source) /* by NJH */
 {
 
     if(!dest || !source) {
 	cli_errmsg("cli_strrcpy: NULL argument\n");
 	return NULL;
     }
 
     while((*dest++ = *source++));
 
     return --dest;
 }
e4e8366f
 
ff75dedb
 #ifndef HAVE_STRCASESTR
 const char* cli_strcasestr(const char* a, const char *b)
 {
 	size_t l;
 	char f[3];
a8ece7ba
 	const size_t strlen_a = strlen(a);
 	const size_t strlen_b = strlen(b);
 
 	f[0] = tolower(*b);
 	f[1] = toupper(*b);
 	f[2] = '\0';
 	for (l = strcspn(a, f); l != strlen_a; l += strcspn(a + l + 1, f) + 1)
 		if (strncasecmp(a + l, b, strlen_b) == 0)
ff75dedb
 			return(a + l);
 	return(NULL);
 }
 #endif
 
72fb25ea
 size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens)
e4e8366f
 {
72fb25ea
 	size_t tokens_found, i;
e4e8366f
 
 
     for(tokens_found = 0; tokens_found < token_count; ) {
 	tokens[tokens_found++] = buffer;
 	buffer = strchr(buffer, delim);
 	if(buffer) {
 	    *buffer++ = '\0';
 	} else {
72fb25ea
 	    i = tokens_found;
 	    while(i < token_count)
 		tokens[i++] = NULL;
e4e8366f
 
72fb25ea
 	    return tokens_found;
e4e8366f
 	}
     }
72fb25ea
     return tokens_found;
e4e8366f
 }
a3fe2c5b
 
 int cli_isnumber(const char *str)
 {
     while(*str++)
 	if(!strchr("0123456789", *str))
 	    return 0;
 
     return 1;
 }
eb290151
 
 /* encodes the unicode character as utf-8 */
 static inline size_t output_utf8(uint16_t u, unsigned char* dst)
 {
 	if(!u) {
 		*dst = 0x1; /* don't add \0, add \1 instead */
 		return 1;
 	}
 	if(u < 0x80) {
 		*dst = u&0xff;
 		return 1;
 	}
 	if(u < 0x800) {
 		*dst++ = 0xc0 | (u>>6);   /* 110yyyyy */
 		*dst = 0x80 | (u & 0x3f); /* 10zzzzzz */
 		return 2;
 	}
 	/* u < 0x10000 because we only handle utf-16,
 	 * values in range 0xd800 - 0xdfff aren't valid, but we don't check for
 	 * that*/
 	*dst++ = 0xe0 | (u>>12);        /* 1110xxxx */
 	*dst++ = 0x80 | ((u>>6)&0x3f); /* 10yyyyyy */
 	*dst = 0x80 | (u & 0x3f);      /* 10zzzzzz */
 	return 3;
 }
 
 /* javascript-like unescape() function */
 char *cli_unescape(const char *str)
 {
 	char *R;
 	size_t k, i=0;
 	const size_t len = strlen(str);
 	/* unescaped string is at most as long as original,
 	 * it will usually be shorter */
 	R = cli_malloc(len + 1);
 	if(!R)
 		return NULL;
 	for(k=0;k < len;k++) {
 		unsigned char c = str[k];
 		if (str[k] == '%') {
 			if(k+5 >= len || str[k+1] != 'u' || !isxdigit(str[k+2]) || !isxdigit(str[k+3])
 						|| !isxdigit(str[k+4]) || !isxdigit(str[k+5])) {
 				if(k+2 < len && isxdigit(str[k+1]) && isxdigit(str[k+2])) {
 					c = (cli_hex2int(str[k+1])<<4) | cli_hex2int(str[k+2]);
 					k += 2;
 				}
 			} else {
 				uint16_t u = (cli_hex2int(str[k+2])<<12) | (cli_hex2int(str[k+3])<<8) |
 					(cli_hex2int(str[k+4])<<4) | cli_hex2int(str[k+5]);
 				i += output_utf8(u, (unsigned char*)&R[i]);
 				k += 5;
 				continue;
 			}
 		}
 		if(!c) c = 1; /* don't add \0 */
 		R[i++] = c;
 	}
 	R[i++] = '\0';
 	R = cli_realloc2(R, i);
 	return R;
 }
 
 /* handle javascript's escape sequences inside strings */
 int cli_textbuffer_append_normalize(struct text_buffer *buf, const char *str, size_t len)
 {
 	size_t i;
 	for(i=0;i < len;i++) {
 		char c = str[i];
 		if (c == '\\' && i+1 < len) {
 			i++;
 			switch (str[i]) {
 				case '0':
 					c = 0;
 					break;
 				case 'b':
 					c = 8;
 					break;
 				case 't':
 					c = 9;
 					break;
 				case 'n':
 					c = 10;
 					break;
 				case 'v':
 					c = 11;
 					break;
 				case 'f':
 					c = 12;
 					break;
 				case 'r':
 					c=13;
 					break;
 				case 'x':
 					if(i+2 < len)
 						c = (cli_hex2int(str[i+1])<<4)|cli_hex2int(str[i+2]);
 					i += 2;
 					break;
 				case 'u':
 					if(i+4 < len) {
 						uint16_t u = (cli_hex2int(str[i+1])<<12) | (cli_hex2int(str[i+2])<<8) |
 							(cli_hex2int(str[i+3])<<4) | cli_hex2int(str[i+4]);
 						if(textbuffer_ensure_capacity(buf, 4) == -1)
 							return -1;
 						buf->pos += output_utf8(u, (unsigned char*)&buf->data[buf->pos]);
 						i += 4;
 						continue;
 					}
 					break;
 				default:
 					c = str[i];
 					break;
 			}
 		}
 		if(!c) c = 1; /* we don't insert \0 */
 		if(textbuffer_putc(buf, c) == -1)
 			return -1;
 	}
 	return 0;
 }