libclamav/uniq.c
937ade08
 /*
  *  md5 based hashtab
  *
e1cbc270
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
  *  Copyright (C) 2008-2013 Sourcefire, Inc.
937ade08
  *
  *  Authors: aCaB <acab@clamav.net>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
  */
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
53ed2cb7
 #include <stdlib.h>
 #if HAVE_STRING_H
 #include <string.h>
 #endif
 
60d8d2c3
 #include "clamav.h"
937ade08
 #include "uniq.h"
53ed2cb7
 #include "others.h"
937ade08
 
288057e9
 struct uniq *uniq_init(uint32_t count)
 {
     struct uniq *U;
 
     if (!count) return NULL;
     U = cli_calloc(1, sizeof(*U));
     if (!U) return NULL;
 
     U->md5s = cli_malloc(count * sizeof(*U->md5s));
     if (!U->md5s) {
         uniq_free(U);
         return NULL;
     }
49df8ea7
     U->max_unique_items = count;
937ade08
 
288057e9
     return U;
937ade08
 }
 
288057e9
 void uniq_free(struct uniq *U)
 {
     free(U->md5s);
     free(U);
937ade08
 }
 
49df8ea7
 cl_error_t uniq_add(struct uniq *U, const char *item, uint32_t item_len, char **rhash, uint32_t *count)
288057e9
 {
49df8ea7
     cl_error_t status = CL_EARG;
288057e9
     unsigned int i;
     uint8_t digest[16];
     struct UNIQMD5 *m = NULL;
 
49df8ea7
     if (!U) {
         /* Invalid args */
         goto done;
     }
 
     /* Uniq adds are limited by the maximum allocated in uniq_init(). */
     if (U->cur_unique_items >= U->max_unique_items) {
         /* Attempted to add more uniq items than may be stored. */
         status = CL_EMAXSIZE;
         goto done;
     }
 
     /* Make a hash of the item string */
     if (NULL == cl_hash_data("md5", item, item_len, digest, NULL)) {
         /* Failed to create hash of item. */
         status = CL_EFORMAT;
         goto done;
     }
937ade08
 
49df8ea7
     /* Check for md5 digest match in md5 collection */
288057e9
     if (U->items && U->md5s[U->idx[*digest]].md5[0] == *digest)
         for (m = &U->md5s[U->idx[*digest]]; m; m = m->next)
             if (!memcmp(&digest[1], &m->md5[1], 15)) break;
937ade08
 
288057e9
     if (!m) {
49df8ea7
         /* No match. Add new md5 to list */
288057e9
         const char HEX[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
53ed2cb7
 
288057e9
         m        = &U->md5s[U->items];
         m->count = 0;
53ed2cb7
 
288057e9
         if (U->items && U->md5s[U->idx[*digest]].md5[0] == *digest)
             m->next = &U->md5s[U->idx[*digest]];
         else
             m->next = NULL;
53ed2cb7
 
288057e9
         U->idx[*digest] = U->items;
53ed2cb7
 
288057e9
         for (i = 0; i < 16; i++) {
             m->name[i * 2]     = HEX[digest[i] >> 4 & 0xf];
             m->name[i * 2 + 1] = HEX[digest[i] & 0xf];
             m->md5[i]          = digest[i];
         }
         m->name[32] = '\0';
49df8ea7
 
         /* Increment # of unique items. */
         U->cur_unique_items++;
937ade08
     }
53ed2cb7
 
49df8ea7
     /* Increment total # of items. */
288057e9
     U->items++;
49df8ea7
 
     /* Increment # items matching this md5 digest (probably just this 1). */
     m->count++;
 
     /* Pass back the ascii hash, if requested. */
288057e9
     if (rhash) *rhash = m->name;
49df8ea7
 
     /* Pass back the count, if requested. */
     if (count) *count = m->count;
 
     status = CL_SUCCESS;
 
 done:
     return status;
937ade08
 }
 
49df8ea7
 cl_error_t uniq_get(struct uniq *U, const char *item, uint32_t item_len, char **rhash, uint32_t *count)
288057e9
 {
49df8ea7
     cl_error_t status = CL_EARG;
288057e9
     uint8_t digest[16];
     struct UNIQMD5 *m = NULL;
49df8ea7
     uint32_t idx      = 0;
937ade08
 
49df8ea7
     if (!U || !count) {
         /* Invalid args */
         goto done;
     }
937ade08
 
49df8ea7
     *count = 0;
53ed2cb7
 
49df8ea7
     if (!U->items) {
         goto not_found;
288057e9
     }
937ade08
 
49df8ea7
     /* Make a hash of the item string */
     if (NULL == cl_hash_data("md5", item, item_len, digest, NULL)) {
         /* Failed to create hash of item. */
         status = CL_EFORMAT;
         goto done;
     }
 
     /* Get the md5s array index for the bucket list head. */
     idx = U->idx[*digest];
     m   = &U->md5s[idx];
 
     if (m->md5[0] != *digest) {
         /*
          * If the first two bytes in the digest doesn't actually match,
          * then the item has never been added.
          * This is a common scenario because the idx table is initialized
          * to 0's.
          */
         goto not_found;
     }
 
     do {
         if (0 == memcmp(&digest[1], &m->md5[1], 15)) {
             /* The item-hash matched.
              * Pass back the ascii hash value (if requested).
              * Return the count of matching items (will be 1+).
              */
             if (rhash)
                 *rhash = m->name;
             *count = m->count;
             break;
         }
         m = m->next;
     } while (NULL != m);
 
 not_found:
     status = CL_SUCCESS;
 
 done:
     return status;
937ade08
 }