libclamav/matcher-hash.c
c802edd5
 /*
c442ca9c
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
  *  Copyright (C) 2010-2013 Sourcefire, Inc.
c802edd5
  *
  *  Authors: aCaB
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
  */
 
b2e7c931
 #include <string.h>
 #include <stdlib.h>
 
c802edd5
 #include "matcher.h"
 #include "others.h"
 #include "str.h"
 
 
7cb86faf
 int hm_addhash_str(struct cli_matcher *root, const char *strhash, uint32_t size, const char *virusname) {
c802edd5
     enum CLI_HASH_TYPE type;
e37613ad
     char binhash[CLI_HASHLEN_MAX];
7cb86faf
     int hlen;
c802edd5
 
7cb86faf
     if(!root || !strhash) {
 	cli_errmsg("hm_addhash_str: NULL root or hash\n");
c802edd5
 	return CL_ENULLARG;
     }
 
e37613ad
     /* size 0 here is now a wildcard size match */
     if(size == (uint32_t)-1) {
7cb86faf
 	cli_errmsg("hm_addhash_str: null or invalid size (%u)\n", size);
c802edd5
 	return CL_EARG;
     }
 
7cb86faf
     hlen = strlen(strhash);
     switch(hlen) {
c802edd5
     case 32:
 	type = CLI_HASH_MD5;
 	break;
     case 40:
 	type = CLI_HASH_SHA1;
 	break;
     case 64:
 	type = CLI_HASH_SHA256;
 	break;
     default:
7cb86faf
 	cli_errmsg("hm_addhash_str: invalid hash %s -- FIXME!\n", strhash);
c802edd5
 	return CL_EARG;
     }
7cb86faf
     if(cli_hex2str_to(strhash, (char *)binhash, hlen)) {
 	cli_errmsg("hm_addhash_str: invalid hash %s\n", strhash);
c802edd5
 	return CL_EARG;
     }
 
7cb86faf
     return hm_addhash_bin(root, binhash, type, size, virusname);
 }
 
8cb85148
 const unsigned int hashlen[] = {
e37613ad
     CLI_HASHLEN_MD5,
     CLI_HASHLEN_SHA1,
     CLI_HASHLEN_SHA256
7cb86faf
 };
 
 int hm_addhash_bin(struct cli_matcher *root, const void *binhash, enum CLI_HASH_TYPE type, uint32_t size, const char *virusname) {
     const unsigned int hlen = hashlen[type];
     const struct cli_htu32_element *item;
     struct cli_sz_hash *szh;
     struct cli_htu32 *ht;
     int i;
 
e37613ad
     if (size) {
         /* size non-zero, find sz_hash element in size-driven hashtable  */
         ht = &root->hm.sizehashes[type];
         if(!root->hm.sizehashes[type].capacity) {
             i = cli_htu32_init(ht, 64, root->mempool);
             if(i) return i;
         }
 
         item = cli_htu32_find(ht, size);
         if(!item) {
 	    struct cli_htu32_element htitem;
 	    szh = mpool_calloc(root->mempool, 1, sizeof(*szh));
 	    if(!szh) {
 	        cli_errmsg("hm_addhash_bin: failed to allocate size hash\n");
 	        return CL_EMEM;
 	    }
 
 	    htitem.key = size;
 	    htitem.data.as_ptr = szh;
 	    i = cli_htu32_insert(ht, &htitem, root->mempool);
 	    if(i) {
 	        cli_errmsg("hm_addhash_bin: failed to add item to hashtab");
 	        mpool_free(root->mempool, szh);
 	        return i;
 	    }
         } else
 	    szh = (struct cli_sz_hash *)item->data.as_ptr;
     }
     else {
         /* size 0 = wildcard */
         szh = &root->hwild.hashes[type];
a08646d2
     }
38d54c92
     szh->items++;
c802edd5
 
7cb86faf
     szh->hash_array = mpool_realloc2(root->mempool, szh->hash_array, hlen * szh->items);
38d54c92
     if(!szh->hash_array) {
7cb86faf
 	cli_errmsg("hm_addhash_bin: failed to grow hash array to %u entries\n", szh->items);
38d54c92
 	szh->items=0;
 	mpool_free(root->mempool, szh->virusnames);
3faa9783
 	szh->virusnames = NULL;
38d54c92
 	return CL_EMEM;
c802edd5
     }
 
38d54c92
     szh->virusnames = mpool_realloc2(root->mempool, szh->virusnames, sizeof(*szh->virusnames) * szh->items);
     if(!szh->virusnames) {
7cb86faf
 	cli_errmsg("hm_addhash_bin: failed to grow virusname array to %u entries\n", szh->items);
38d54c92
 	szh->items=0;
 	mpool_free(root->mempool, szh->hash_array);
3faa9783
 	szh->hash_array = NULL;
38d54c92
 	return CL_EMEM;
     }
 
7cb86faf
     memcpy(&szh->hash_array[(szh->items-1) * hlen], binhash, hlen);
38d54c92
     szh->virusnames[(szh->items-1)] = virusname;
a08646d2
     
c802edd5
     return 0;
 }
bb2f6b0b
 
096cea46
 static inline int hm_cmp(const uint8_t *itm, const uint8_t *ref, unsigned int keylen) {
e958ea48
 #if WORDS_BIGENDIAN == 0
125827cf
     uint32_t i = *(uint32_t *)itm, r = *(uint32_t *)ref;
     if(i!=r)
 	return (i<r) * 2 -1;
bb2f6b0b
     return memcmp(&itm[4], &ref[4], keylen - 4);
e958ea48
 #else
     return memcmp(itm, ref, keylen);
 #endif
bb2f6b0b
 }
 
b33354e5
 static void hm_sort(struct cli_sz_hash *szh, size_t l, size_t r, unsigned int keylen) {
e37613ad
     uint8_t piv[CLI_HASHLEN_MAX], tmph[CLI_HASHLEN_MAX];
bb2f6b0b
     size_t l1, r1;
 
     const char *tmpv;
 
125827cf
     if(l + 1 >= r)
bb2f6b0b
 	return;
 
     l1 = l+1, r1 = r;
 
     memcpy(piv, &szh->hash_array[keylen * l], keylen);
     while(l1 < r1) {
2296ab0f
 	if(hm_cmp(&szh->hash_array[keylen * l1], piv, keylen) > 0) {
bb2f6b0b
 	    r1--;
656dfd0b
 	    if(l1 == r1) break;
bb2f6b0b
 	    memcpy(tmph, &szh->hash_array[keylen * l1], keylen);
 	    tmpv = szh->virusnames[l1];
 	    memcpy(&szh->hash_array[keylen * l1], &szh->hash_array[keylen * r1], keylen);
 	    szh->virusnames[l1] = szh->virusnames[r1];
 	    memcpy(&szh->hash_array[keylen * r1], tmph, keylen);
 	    szh->virusnames[r1] = tmpv;
 	} else
 	    l1++;
     }
 
     l1--;
125827cf
     if(l1!=l) {
 	memcpy(tmph, &szh->hash_array[keylen * l1], keylen);
 	tmpv = szh->virusnames[l1];
 	memcpy(&szh->hash_array[keylen * l1], &szh->hash_array[keylen * l], keylen);
 	szh->virusnames[l1] = szh->virusnames[l];
 	memcpy(&szh->hash_array[keylen * l], tmph, keylen);
 	szh->virusnames[l] = tmpv;
     }
bb2f6b0b
 
     hm_sort(szh, l, l1, keylen);
     hm_sort(szh, r1, r, keylen);
 }
 
e37613ad
 /* flush both size-specific and agnostic hash sets */
bb2f6b0b
 void hm_flush(struct cli_matcher *root) {
     enum CLI_HASH_TYPE type;
a996bd01
     unsigned int keylen;
     struct cli_sz_hash *szh;
bb2f6b0b
 
     if(!root)
 	return;
 
     for(type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {
a08646d2
 	struct cli_htu32 *ht = &root->hm.sizehashes[type];
 	const struct cli_htu32_element *item = NULL;
a996bd01
 	szh = NULL;
a08646d2
 
3faa9783
 	if(!root->hm.sizehashes[type].capacity)
a08646d2
 	    continue;
 
 	while((item = cli_htu32_next(ht, item))) {
e37613ad
 	    szh = (struct cli_sz_hash *)item->data.as_ptr;
a996bd01
 	    keylen = hashlen[type];
bb2f6b0b
 
 	    if(szh->items > 1)
 		hm_sort(szh, 0, szh->items, keylen);
 	}
     }
e37613ad
 
     for(type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {
a996bd01
 	szh = &root->hwild.hashes[type];
 	keylen = hashlen[type];
e37613ad
 
 	if(szh->items > 1)
 	    hm_sort(szh, 0, szh->items, keylen);
     }
bb2f6b0b
 }
 
096cea46
 
2296ab0f
 int cli_hm_have_size(const struct cli_matcher *root, enum CLI_HASH_TYPE type, uint32_t size) {
3faa9783
     return (size && size != 0xffffffff && root && root->hm.sizehashes[type].capacity && cli_htu32_find(&root->hm.sizehashes[type], size));
2296ab0f
 }
 
e37613ad
 int cli_hm_have_wild(const struct cli_matcher *root, enum CLI_HASH_TYPE type) {
     return (root && root->hwild.hashes[type].items);
 }
 
211edda0
 int cli_hm_have_any(const struct cli_matcher *root, enum CLI_HASH_TYPE type) {
     return (root && (root->hwild.hashes[type].items || root->hm.sizehashes[type].capacity));
 }
 
e37613ad
 /* cli_hm_scan will scan only size-specific hashes, if any */
 static int hm_scan(const unsigned char *digest, const char **virname, const struct cli_sz_hash *szh, enum CLI_HASH_TYPE type) {
096cea46
     unsigned int keylen;
     size_t l, r;
 
e37613ad
     if(!digest || !szh || !szh->items)
096cea46
 	return CL_CLEAN;
 
     keylen = hashlen[type];
 
     l = 0;
4842733e
     r = szh->items - 1;
125827cf
     while(l <= r) {
096cea46
 	size_t c = (l + r) / 2;
a08646d2
 	int res = hm_cmp(digest, &szh->hash_array[keylen * c], keylen);
096cea46
 
125827cf
 	if(res < 0) {
 	    if(!c)
 		break;
 	    r = c - 1;
 	} else if(res > 0)
 	    l = c + 1;
096cea46
 	else {
 	    if(virname)
 		*virname = szh->virusnames[c];
 	    return CL_VIRUS;
 	}
     }
     return CL_CLEAN;
 }
3faa9783
 
e37613ad
 /* cli_hm_scan will scan only size-specific hashes, if any */
 int cli_hm_scan(const unsigned char *digest, uint32_t size, const char **virname, const struct cli_matcher *root, enum CLI_HASH_TYPE type) {
     const struct cli_htu32_element *item;
     struct cli_sz_hash *szh;
 
     if(!digest || !size || size == 0xffffffff || !root || !root->hm.sizehashes[type].capacity)
 	return CL_CLEAN;
 
     item = cli_htu32_find(&root->hm.sizehashes[type], size);
     if(!item)
 	return CL_CLEAN;
 
     szh = (struct cli_sz_hash *)item->data.as_ptr;
 
     return hm_scan(digest, virname, szh, type);
 }
 
 /* cli_hm_scan_wild will scan only size-agnostic hashes, if any */
 int cli_hm_scan_wild(const unsigned char *digest, const char **virname, const struct cli_matcher *root, enum CLI_HASH_TYPE type) {
     if(!digest || !root || !root->hwild.hashes[type].items)
 	return CL_CLEAN;
 
     return hm_scan(digest, virname, &root->hwild.hashes[type], type);
 }
 
 /* free both size-specific and agnostic hash sets */
3faa9783
 void hm_free(struct cli_matcher *root) {
     enum CLI_HASH_TYPE type;
 
     if(!root)
 	return;
 
     for(type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {
 	struct cli_htu32 *ht = &root->hm.sizehashes[type];
 	const struct cli_htu32_element *item = NULL;
 
 	if(!root->hm.sizehashes[type].capacity)
 	    continue;
 
 	while((item = cli_htu32_next(ht, item))) {
 	    struct cli_sz_hash *szh = (struct cli_sz_hash *)item->data.as_ptr;
 
 	    mpool_free(root->mempool, szh->hash_array);
e010aa7a
 	    while(szh->items)
e10286bd
 		mpool_free(root->mempool, (void *)szh->virusnames[--szh->items]);
3faa9783
 	    mpool_free(root->mempool, szh->virusnames);
 	    mpool_free(root->mempool, szh);
 	}
 	cli_htu32_free(ht, root->mempool);
     }
e37613ad
 
     for(type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {
 	struct cli_sz_hash *szh = &root->hwild.hashes[type];
 
 	if(!szh->items)
 	    continue;
 
 	mpool_free(root->mempool, szh->hash_array);
 	while(szh->items)
 	    mpool_free(root->mempool, (void *)szh->virusnames[--szh->items]);
 	mpool_free(root->mempool, szh->virusnames);
     }
3faa9783
 }