libclamav/matcher-hash.c
c802edd5
 /*
e1cbc270
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
  *  Copyright (C) 2010-2013 Sourcefire, Inc.
c802edd5
  *
  *  Authors: aCaB
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
  */
 
b2e7c931
 #include <string.h>
 #include <stdlib.h>
 
c802edd5
 #include "matcher.h"
 #include "others.h"
 #include "str.h"
 
288057e9
 int hm_addhash_str(struct cli_matcher *root, const char *strhash, uint32_t size, const char *virusname)
 {
c802edd5
     enum CLI_HASH_TYPE type;
e37613ad
     char binhash[CLI_HASHLEN_MAX];
7cb86faf
     int hlen;
c802edd5
 
288057e9
     if (!root || !strhash) {
         cli_errmsg("hm_addhash_str: NULL root or hash\n");
         return CL_ENULLARG;
c802edd5
     }
 
e37613ad
     /* size 0 here is now a wildcard size match */
288057e9
     if (size == (uint32_t)-1) {
         cli_errmsg("hm_addhash_str: null or invalid size (%u)\n", size);
         return CL_EARG;
c802edd5
     }
 
7cb86faf
     hlen = strlen(strhash);
288057e9
     switch (hlen) {
         case 32:
             type = CLI_HASH_MD5;
             break;
         case 40:
             type = CLI_HASH_SHA1;
             break;
         case 64:
             type = CLI_HASH_SHA256;
             break;
         default:
             cli_errmsg("hm_addhash_str: invalid hash %s -- FIXME!\n", strhash);
             return CL_EARG;
c802edd5
     }
288057e9
     if (cli_hex2str_to(strhash, (char *)binhash, hlen)) {
         cli_errmsg("hm_addhash_str: invalid hash %s\n", strhash);
         return CL_EARG;
c802edd5
     }
 
7cb86faf
     return hm_addhash_bin(root, binhash, type, size, virusname);
 }
 
8cb85148
 const unsigned int hashlen[] = {
e37613ad
     CLI_HASHLEN_MD5,
     CLI_HASHLEN_SHA1,
288057e9
     CLI_HASHLEN_SHA256};
7cb86faf
 
288057e9
 int hm_addhash_bin(struct cli_matcher *root, const void *binhash, enum CLI_HASH_TYPE type, uint32_t size, const char *virusname)
 {
7cb86faf
     const unsigned int hlen = hashlen[type];
     const struct cli_htu32_element *item;
     struct cli_sz_hash *szh;
     struct cli_htu32 *ht;
     int i;
 
e37613ad
     if (size) {
         /* size non-zero, find sz_hash element in size-driven hashtable  */
         ht = &root->hm.sizehashes[type];
288057e9
         if (!root->hm.sizehashes[type].capacity) {
e37613ad
             i = cli_htu32_init(ht, 64, root->mempool);
288057e9
             if (i) return i;
e37613ad
         }
 
         item = cli_htu32_find(ht, size);
288057e9
         if (!item) {
             struct cli_htu32_element htitem;
544fa973
             szh = MPOOL_CALLOC(root->mempool, 1, sizeof(*szh));
288057e9
             if (!szh) {
                 cli_errmsg("hm_addhash_bin: failed to allocate size hash\n");
                 return CL_EMEM;
             }
 
             htitem.key         = size;
             htitem.data.as_ptr = szh;
             i                  = cli_htu32_insert(ht, &htitem, root->mempool);
             if (i) {
                 cli_errmsg("hm_addhash_bin: failed to add item to hashtab");
544fa973
                 MPOOL_FREE(root->mempool, szh);
288057e9
                 return i;
             }
e37613ad
         } else
288057e9
             szh = (struct cli_sz_hash *)item->data.as_ptr;
     } else {
e37613ad
         /* size 0 = wildcard */
         szh = &root->hwild.hashes[type];
a08646d2
     }
38d54c92
     szh->items++;
c802edd5
 
544fa973
     szh->hash_array = MPOOL_REALLOC2(root->mempool, szh->hash_array, hlen * szh->items);
288057e9
     if (!szh->hash_array) {
         cli_errmsg("hm_addhash_bin: failed to grow hash array to %u entries\n", szh->items);
         szh->items = 0;
544fa973
         MPOOL_FREE(root->mempool, szh->virusnames);
288057e9
         szh->virusnames = NULL;
         return CL_EMEM;
c802edd5
     }
 
544fa973
     szh->virusnames = MPOOL_REALLOC2(root->mempool, szh->virusnames, sizeof(*szh->virusnames) * szh->items);
288057e9
     if (!szh->virusnames) {
         cli_errmsg("hm_addhash_bin: failed to grow virusname array to %u entries\n", szh->items);
         szh->items = 0;
544fa973
         MPOOL_FREE(root->mempool, szh->hash_array);
288057e9
         szh->hash_array = NULL;
         return CL_EMEM;
38d54c92
     }
 
288057e9
     memcpy(&szh->hash_array[(szh->items - 1) * hlen], binhash, hlen);
     szh->virusnames[(szh->items - 1)] = virusname;
 
c802edd5
     return 0;
 }
bb2f6b0b
 
288057e9
 static inline int hm_cmp(const uint8_t *itm, const uint8_t *ref, unsigned int keylen)
 {
e958ea48
 #if WORDS_BIGENDIAN == 0
125827cf
     uint32_t i = *(uint32_t *)itm, r = *(uint32_t *)ref;
288057e9
     if (i != r)
         return (i < r) * 2 - 1;
bb2f6b0b
     return memcmp(&itm[4], &ref[4], keylen - 4);
e958ea48
 #else
     return memcmp(itm, ref, keylen);
 #endif
bb2f6b0b
 }
 
288057e9
 static void hm_sort(struct cli_sz_hash *szh, size_t l, size_t r, unsigned int keylen)
 {
e37613ad
     uint8_t piv[CLI_HASHLEN_MAX], tmph[CLI_HASHLEN_MAX];
bb2f6b0b
     size_t l1, r1;
 
     const char *tmpv;
 
288057e9
     if (l + 1 >= r)
         return;
bb2f6b0b
 
288057e9
     l1 = l + 1, r1 = r;
bb2f6b0b
 
     memcpy(piv, &szh->hash_array[keylen * l], keylen);
288057e9
     while (l1 < r1) {
         if (hm_cmp(&szh->hash_array[keylen * l1], piv, keylen) > 0) {
             r1--;
             if (l1 == r1) break;
             memcpy(tmph, &szh->hash_array[keylen * l1], keylen);
             tmpv = szh->virusnames[l1];
             memcpy(&szh->hash_array[keylen * l1], &szh->hash_array[keylen * r1], keylen);
             szh->virusnames[l1] = szh->virusnames[r1];
             memcpy(&szh->hash_array[keylen * r1], tmph, keylen);
             szh->virusnames[r1] = tmpv;
         } else
             l1++;
bb2f6b0b
     }
 
     l1--;
288057e9
     if (l1 != l) {
         memcpy(tmph, &szh->hash_array[keylen * l1], keylen);
         tmpv = szh->virusnames[l1];
         memcpy(&szh->hash_array[keylen * l1], &szh->hash_array[keylen * l], keylen);
         szh->virusnames[l1] = szh->virusnames[l];
         memcpy(&szh->hash_array[keylen * l], tmph, keylen);
         szh->virusnames[l] = tmpv;
125827cf
     }
bb2f6b0b
 
     hm_sort(szh, l, l1, keylen);
     hm_sort(szh, r1, r, keylen);
 }
 
e37613ad
 /* flush both size-specific and agnostic hash sets */
288057e9
 void hm_flush(struct cli_matcher *root)
 {
bb2f6b0b
     enum CLI_HASH_TYPE type;
a996bd01
     unsigned int keylen;
     struct cli_sz_hash *szh;
bb2f6b0b
 
288057e9
     if (!root)
         return;
bb2f6b0b
 
288057e9
     for (type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {
         struct cli_htu32 *ht                 = &root->hm.sizehashes[type];
         const struct cli_htu32_element *item = NULL;
         szh                                  = NULL;
a08646d2
 
288057e9
         if (!root->hm.sizehashes[type].capacity)
             continue;
a08646d2
 
288057e9
         while ((item = cli_htu32_next(ht, item))) {
             szh    = (struct cli_sz_hash *)item->data.as_ptr;
             keylen = hashlen[type];
bb2f6b0b
 
288057e9
             if (szh->items > 1)
                 hm_sort(szh, 0, szh->items, keylen);
         }
bb2f6b0b
     }
e37613ad
 
288057e9
     for (type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {
         szh    = &root->hwild.hashes[type];
         keylen = hashlen[type];
e37613ad
 
288057e9
         if (szh->items > 1)
             hm_sort(szh, 0, szh->items, keylen);
e37613ad
     }
bb2f6b0b
 }
 
288057e9
 int cli_hm_have_size(const struct cli_matcher *root, enum CLI_HASH_TYPE type, uint32_t size)
 {
3faa9783
     return (size && size != 0xffffffff && root && root->hm.sizehashes[type].capacity && cli_htu32_find(&root->hm.sizehashes[type], size));
2296ab0f
 }
 
288057e9
 int cli_hm_have_wild(const struct cli_matcher *root, enum CLI_HASH_TYPE type)
 {
e37613ad
     return (root && root->hwild.hashes[type].items);
 }
 
288057e9
 int cli_hm_have_any(const struct cli_matcher *root, enum CLI_HASH_TYPE type)
 {
211edda0
     return (root && (root->hwild.hashes[type].items || root->hm.sizehashes[type].capacity));
 }
 
e37613ad
 /* cli_hm_scan will scan only size-specific hashes, if any */
288057e9
 static int hm_scan(const unsigned char *digest, const char **virname, const struct cli_sz_hash *szh, enum CLI_HASH_TYPE type)
 {
096cea46
     unsigned int keylen;
     size_t l, r;
 
288057e9
     if (!digest || !szh || !szh->items)
         return CL_CLEAN;
096cea46
 
     keylen = hashlen[type];
 
     l = 0;
4842733e
     r = szh->items - 1;
288057e9
     while (l <= r) {
         size_t c = (l + r) / 2;
         int res  = hm_cmp(digest, &szh->hash_array[keylen * c], keylen);
 
         if (res < 0) {
             if (!c)
                 break;
             r = c - 1;
         } else if (res > 0)
             l = c + 1;
         else {
             if (virname)
                 *virname = szh->virusnames[c];
             return CL_VIRUS;
         }
096cea46
     }
     return CL_CLEAN;
 }
3faa9783
 
e37613ad
 /* cli_hm_scan will scan only size-specific hashes, if any */
288057e9
 int cli_hm_scan(const unsigned char *digest, uint32_t size, const char **virname, const struct cli_matcher *root, enum CLI_HASH_TYPE type)
 {
e37613ad
     const struct cli_htu32_element *item;
     struct cli_sz_hash *szh;
 
288057e9
     if (!digest || !size || size == 0xffffffff || !root || !root->hm.sizehashes[type].capacity)
         return CL_CLEAN;
e37613ad
 
     item = cli_htu32_find(&root->hm.sizehashes[type], size);
288057e9
     if (!item)
         return CL_CLEAN;
e37613ad
 
     szh = (struct cli_sz_hash *)item->data.as_ptr;
 
     return hm_scan(digest, virname, szh, type);
 }
 
 /* cli_hm_scan_wild will scan only size-agnostic hashes, if any */
288057e9
 int cli_hm_scan_wild(const unsigned char *digest, const char **virname, const struct cli_matcher *root, enum CLI_HASH_TYPE type)
 {
     if (!digest || !root || !root->hwild.hashes[type].items)
         return CL_CLEAN;
e37613ad
 
     return hm_scan(digest, virname, &root->hwild.hashes[type], type);
 }
 
 /* free both size-specific and agnostic hash sets */
288057e9
 void hm_free(struct cli_matcher *root)
 {
3faa9783
     enum CLI_HASH_TYPE type;
 
288057e9
     if (!root)
         return;
3faa9783
 
288057e9
     for (type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {
         struct cli_htu32 *ht                 = &root->hm.sizehashes[type];
         const struct cli_htu32_element *item = NULL;
3faa9783
 
288057e9
         if (!root->hm.sizehashes[type].capacity)
             continue;
3faa9783
 
288057e9
         while ((item = cli_htu32_next(ht, item))) {
             struct cli_sz_hash *szh = (struct cli_sz_hash *)item->data.as_ptr;
3faa9783
 
544fa973
             MPOOL_FREE(root->mempool, szh->hash_array);
288057e9
             while (szh->items)
544fa973
                 MPOOL_FREE(root->mempool, (void *)szh->virusnames[--szh->items]);
             MPOOL_FREE(root->mempool, szh->virusnames);
             MPOOL_FREE(root->mempool, szh);
288057e9
         }
         cli_htu32_free(ht, root->mempool);
3faa9783
     }
e37613ad
 
288057e9
     for (type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {
         struct cli_sz_hash *szh = &root->hwild.hashes[type];
e37613ad
 
288057e9
         if (!szh->items)
             continue;
e37613ad
 
544fa973
         MPOOL_FREE(root->mempool, szh->hash_array);
288057e9
         while (szh->items)
544fa973
             MPOOL_FREE(root->mempool, (void *)szh->virusnames[--szh->items]);
         MPOOL_FREE(root->mempool, szh->virusnames);
e37613ad
     }
3faa9783
 }