/* * Copyright (C) 2007-2008 Sourcefire, Inc. * * Authors: Tomasz Kojm, Nigel Horne, Török Edvin * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #if HAVE_CONFIG_H #include "clamav-config.h" #endif #include "str.h" #include #include #include #ifdef HAVE_STRINGS_H #include #endif #include #include #include "clamav.h" #include "others.h" #include "matcher.h" #include "cltypes.h" #include "jsparse/textbuf.h" static const int hex_chars[256] = { -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1, -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, }; static inline int cli_hex2int(const char c) { return hex_chars[(const unsigned char)c]; } int cli_realhex2ui(const char *hex, uint16_t *ptr, unsigned int len) { uint16_t val; unsigned int i; int c; for(i = 0; i < len; i += 2) { val = 0; if(hex[i] == '?' && hex[i + 1] == '?') { val |= CLI_MATCH_IGNORE; } else if(hex[i + 1] == '?') { if((c = cli_hex2int(hex[i])) >= 0) { val = c << 4; } else { return 0; } val |= CLI_MATCH_NIBBLE_HIGH; } else if(hex[i] == '?') { if((c = cli_hex2int(hex[i + 1])) >= 0) { val = c; } else { return 0; } val |= CLI_MATCH_NIBBLE_LOW; } else if(hex[i] == '(') { val |= CLI_MATCH_ALTERNATIVE; } else { if((c = cli_hex2int(hex[i])) >= 0) { val = c; if((c = cli_hex2int(hex[i+1])) >= 0) { val = (val << 4) + c; } else { return 0; } } else { return 0; } } *ptr++ = val; } return 1; } uint16_t *cli_hex2ui(const char *hex) { uint16_t *str; unsigned int len; len = strlen(hex); if(len % 2 != 0) { cli_errmsg("cli_hex2si(): Malformed hexstring: %s (length: %u)\n", hex, len); return NULL; } str = cli_calloc((len / 2) + 1, sizeof(uint16_t)); if(!str) return NULL; if(cli_realhex2ui(hex, str, len)) return str; free(str); return NULL; } char *cli_hex2str(const char *hex) { unsigned char *str; size_t len; len = strlen(hex); if(len % 2 != 0) { cli_errmsg("cli_hex2str(): Malformed hexstring: %s (length: %u)\n", hex, (unsigned)len); return NULL; } str = cli_calloc((len / 2) + 1, sizeof(char)); if(!str) return NULL; if (cli_hex2str_to(hex, str, len) == -1) { free(str); return NULL; } return str; } int cli_hex2str_to(const char *hex, unsigned char *ptr, size_t len) { size_t i; int c; unsigned char val; for(i = 0; i < len; i += 2) { if((c = cli_hex2int(hex[i])) >= 0) { val = c; if((c = cli_hex2int(hex[i+1])) >= 0) { val = (val << 4) + c; } else { return -1; } } else { return -1; } *ptr++ = val; } return 0; } int cli_hex2num(const char *hex) { int hexval, ret = 0, len, i; len = strlen(hex); if(len % 2 != 0) { cli_errmsg("cli_hex2num(): Malformed hexstring: %s (length: %d)\n", hex, len); return -1; } for(i = 0; i < len; i++) { if((hexval = cli_hex2int(hex[i])) < 0) break; ret = (ret << 4) | hexval; } return ret; } char *cli_str2hex(const char *string, unsigned int len) { char *hexstr; char HEX[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; unsigned int i, j; if((hexstr = (char *) cli_calloc(2 * len + 1, sizeof(char))) == NULL) return NULL; for(i = 0, j = 0; i < len; i++, j += 2) { hexstr[j] = HEX[(string[i] >> 4) & 0xf]; hexstr[j + 1] = HEX[string[i] & 0xf]; } return hexstr; } char *cli_utf16toascii(const char *str, unsigned int length) { char *decoded; unsigned int i, j; if(length < 2) { cli_dbgmsg("cli_utf16toascii: length < 2\n"); return NULL; } if(length % 2) length--; if(!(decoded = cli_calloc(length / 2 + 1, sizeof(char)))) return NULL; for(i = 0, j = 0; i < length; i += 2, j++) { decoded[j] = str[i + 1] << 4; decoded[j] += str[i]; } return decoded; } int cli_strbcasestr(const char *haystack, const char *needle) { const char *pt = haystack; int i, j; i = strlen(haystack); j = strlen(needle); if(i < j) return 0; pt += i - j; return !strcasecmp(pt, needle); } /* * Remove trailing NL and CR characters from the end of the given string. * Return the new length of the string (ala strlen) */ int cli_chomp(char *string) { int l; if(string == NULL) return -1; l = strlen(string); if(l == 0) return 0; --l; while((l >= 0) && ((string[l] == '\n') || (string[l] == '\r'))) string[l--] = '\0'; return l + 1; } /* * char *cli_strok(const char *line, int fieldno, char *delim) * Return a copy of field from the string , where * fields are delimited by any char from , or NULL if * doesn't have fields or not enough memory is available. * The caller has to free() the result afterwards. */ char *cli_strtok(const char *line, int fieldno, const char *delim) { int counter = 0, i, j; char *buffer = NULL; /* step to arg # */ for (i=0; line[i] && counter != fieldno; i++) { if (strchr(delim, line[i])) { counter++; while(line[i+1] && strchr(delim, line[i+1])) { i++; } } } if (!line[i]) { /* end of buffer before field reached */ return NULL; } for (j=i; line[j]; j++) { if (strchr(delim, line[j])) { break; } } if (i == j) { return NULL; } buffer = cli_malloc(j-i+1); if(!buffer) return NULL; strncpy(buffer, line+i, j-i); buffer[j-i] = '\0'; return buffer; } /* * Like cli_strtok, but this puts the output into a given argument, rather * than allocating fresh memory * Returns NULL for error, or a pointer to output * njh@bandsman.co.uk */ char *cli_strtokbuf(const char *input, int fieldno, const char *delim, char *output) { int counter = 0, i, j; /* step to arg # */ for (i=0; input[i] && counter != fieldno; i++) { if (strchr(delim, input[i])) { counter++; while(input[i+1] && strchr(delim, input[i+1])) { i++; } } } if (input[i] == '\0') { /* end of buffer before field reached */ return NULL; } for (j=i; input[j]; j++) { if (strchr(delim, input[j])) { break; } } if (i == j) { return NULL; } strncpy(output, input+i, j-i); output[j-i] = '\0'; return output; } const char *cli_memstr(const char *haystack, int hs, const char *needle, int ns) { const char *pt, *hay; int n; if(hs < ns) return NULL; if(haystack == needle) return haystack; if(!memcmp(haystack, needle, ns)) return haystack; pt = hay = haystack; n = hs; while((pt = memchr(hay, needle[0], n)) != NULL) { n -= (int) (pt - hay); if(n < ns) break; if(!memcmp(pt, needle, ns)) return pt; if(hay == pt) { n--; hay++; } else { hay = pt; } } return NULL; } char *cli_strrcpy(char *dest, const char *source) /* by NJH */ { if(!dest || !source) { cli_errmsg("cli_strrcpy: NULL argument\n"); return NULL; } while((*dest++ = *source++)); return --dest; } #ifndef HAVE_STRCASESTR const char* cli_strcasestr(const char* a, const char *b) { size_t l; char f[3]; const size_t strlen_a = strlen(a); const size_t strlen_b = strlen(b); f[0] = tolower(*b); f[1] = toupper(*b); f[2] = '\0'; for (l = strcspn(a, f); l != strlen_a; l += strcspn(a + l + 1, f) + 1) if (strncasecmp(a + l, b, strlen_b) == 0) return(a + l); return(NULL); } #endif size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens) { size_t tokens_found, i; for(tokens_found = 0; tokens_found < token_count; ) { tokens[tokens_found++] = buffer; buffer = strchr(buffer, delim); if(buffer) { *buffer++ = '\0'; } else { i = tokens_found; while(i < token_count) tokens[i++] = NULL; return tokens_found; } } return tokens_found; } int cli_isnumber(const char *str) { while(*str++) if(!strchr("0123456789", *str)) return 0; return 1; } /* encodes the unicode character as utf-8 */ static inline size_t output_utf8(uint16_t u, unsigned char* dst) { if(!u) { *dst = 0x1; /* don't add \0, add \1 instead */ return 1; } if(u < 0x80) { *dst = u&0xff; return 1; } if(u < 0x800) { *dst++ = 0xc0 | (u>>6); /* 110yyyyy */ *dst = 0x80 | (u & 0x3f); /* 10zzzzzz */ return 2; } /* u < 0x10000 because we only handle utf-16, * values in range 0xd800 - 0xdfff aren't valid, but we don't check for * that*/ *dst++ = 0xe0 | (u>>12); /* 1110xxxx */ *dst++ = 0x80 | ((u>>6)&0x3f); /* 10yyyyyy */ *dst = 0x80 | (u & 0x3f); /* 10zzzzzz */ return 3; } /* javascript-like unescape() function */ char *cli_unescape(const char *str) { char *R; size_t k, i=0; const size_t len = strlen(str); /* unescaped string is at most as long as original, * it will usually be shorter */ R = cli_malloc(len + 1); if(!R) return NULL; for(k=0;k < len;k++) { unsigned char c = str[k]; if (str[k] == '%') { if(k+5 >= len || str[k+1] != 'u' || !isxdigit(str[k+2]) || !isxdigit(str[k+3]) || !isxdigit(str[k+4]) || !isxdigit(str[k+5])) { if(k+2 < len && isxdigit(str[k+1]) && isxdigit(str[k+2])) { c = (cli_hex2int(str[k+1])<<4) | cli_hex2int(str[k+2]); k += 2; } } else { uint16_t u = (cli_hex2int(str[k+2])<<12) | (cli_hex2int(str[k+3])<<8) | (cli_hex2int(str[k+4])<<4) | cli_hex2int(str[k+5]); i += output_utf8(u, (unsigned char*)&R[i]); k += 5; continue; } } if(!c) c = 1; /* don't add \0 */ R[i++] = c; } R[i++] = '\0'; R = cli_realloc2(R, i); return R; } /* handle javascript's escape sequences inside strings */ int cli_textbuffer_append_normalize(struct text_buffer *buf, const char *str, size_t len) { size_t i; for(i=0;i < len;i++) { char c = str[i]; if (c == '\\' && i+1 < len) { i++; switch (str[i]) { case '0': c = 0; break; case 'b': c = 8; break; case 't': c = 9; break; case 'n': c = 10; break; case 'v': c = 11; break; case 'f': c = 12; break; case 'r': c=13; break; case 'x': if(i+2 < len) c = (cli_hex2int(str[i+1])<<4)|cli_hex2int(str[i+2]); i += 2; break; case 'u': if(i+4 < len) { uint16_t u = (cli_hex2int(str[i+1])<<12) | (cli_hex2int(str[i+2])<<8) | (cli_hex2int(str[i+3])<<4) | cli_hex2int(str[i+4]); if(textbuffer_ensure_capacity(buf, 4) == -1) return -1; buf->pos += output_utf8(u, (unsigned char*)&buf->data[buf->pos]); i += 4; continue; } break; default: c = str[i]; break; } } if(!c) c = 1; /* we don't insert \0 */ if(textbuffer_putc(buf, c) == -1) return -1; } return 0; }