libclamav/cache.c
6e11bf61
 /*
e1cbc270
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
  *  Copyright (C) 2010-2013 Sourcefire, Inc.
6e11bf61
  *
60790424
  *  Authors: aCaB <acab@clamav.net>, Török Edvin <edwin@clamav.net>
6e11bf61
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
  */
 
60b21251
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
a99a6040
 #include <string.h>
 #include <stdlib.h>
 #include <pthread.h>
 #include <assert.h>
60b21251
 
511c2e79
 #include "mpool.h"
 #include "clamav.h"
 #include "cache.h"
ea6f1300
 #include "fmap.h"
511c2e79
 
b0e47a6d
 // #define USE_LRUHASHCACHE /* The replacement policy algorithm to use */
 #define USE_SPLAY
 
 #if defined(CL_THREAD_SAFE) && defined(USE_LRUHASHCACHE)
91505254
 static pthread_mutex_t pool_mutex = PTHREAD_MUTEX_INITIALIZER;
 #endif
52f52931
 
544fa973
 /* The number of root trees and the chooser function
52f52931
    Each tree is protected by a mutex against concurrent access */
 /* #define TREES 1 */
 /* static inline unsigned int getkey(uint8_t *hash) { return 0; } */
 #define TREES 256
288057e9
 static inline unsigned int getkey(uint8_t *hash)
 {
     return *hash;
 }
52f52931
 /* #define TREES 4096 */
 /* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | ((unsigned int)(hash[1] & 0xf)<<8) ; } */
 /* #define TREES 65536 */
 /* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | (((unsigned int)hash[1])<<8) ; } */
 
 /* The number of nodes in each tree */
6223810b
 #define NODES 256
5b95022b
 
52f52931
 /* LRUHASHCACHE --------------------------------------------------------------------- */
5b95022b
 #ifdef USE_LRUHASHCACHE
a6f7b467
 struct cache_key {
402f4b19
     int64_t digest[2];
a6f7b467
     uint32_t size; /* 0 is used to mark an empty hash slot! */
     struct cache_key *lru_next, *lru_prev;
 };
 
 struct cache_set {
     struct cache_key *data;
     size_t maxelements; /* considering load factor */
402f4b19
     size_t maxdeleted;
a6f7b467
     size_t elements;
402f4b19
     size_t deleted;
a6f7b467
     struct cache_key *lru_head, *lru_tail;
 };
 
 #define CACHE_KEY_DELETED ~0u
 #define CACHE_KEY_EMPTY 0
 
 static void cacheset_lru_remove(struct cache_set *map, size_t howmany)
 {
     while (howmany--) {
288057e9
         struct cache_key *old;
         assert(map->lru_head);
         /* Remove a key from the head of the list */
         old = map->lru_head;
         assert(!old->lru_prev);
         map->lru_head = old->lru_next;
         old->size     = CACHE_KEY_DELETED;
         /* This slot is now deleted, it is not empty,
a6f7b467
 	 * because previously we could have inserted a key that has seen this
 	 * slot as occupied, to find that key we need to ensure that all keys
 	 * that were occupied when the key was inserted, are seen as occupied
 	 * when searching too.
 	 * Of course when inserting a new value, we treat deleted slots as
 	 * empty.
 	 * We only replace old values with new values, but there is no guarantee
 	 * that the newly inserted value would hash to same place as the value
 	 * we remove due to LRU! */
288057e9
         if (old == map->lru_tail)
             map->lru_tail = 0;
         map->elements--;
         map->deleted++;
a6f7b467
     }
 }
 
402f4b19
 static inline int cacheset_lookup_internal(struct cache_set *map,
288057e9
                                            const char *md5, size_t size,
                                            uint32_t *insert_pos, int deletedok)
a6f7b467
 {
288057e9
     const struct cache_key *data = map->data;
     uint32_t capmask             = NODES - 1;
402f4b19
     const struct cache_key *k;
     uint32_t idx, tries = 0;
     uint64_t md5_0, md5_1;
     uint64_t md5a[2];
 
     memcpy(&md5a, md5, 16);
     md5_0 = md5a[0];
     md5_1 = md5a[1];
288057e9
     idx   = md5_1 & capmask;
     k     = &data[idx];
402f4b19
     while (k->size != CACHE_KEY_EMPTY && tries <= capmask) {
288057e9
         if (k->digest[0] == md5_0 &&
             k->digest[1] == md5_1 &&
             k->size == size) {
             /* found key */
             *insert_pos = idx;
             return 1;
         }
         if (deletedok && k->size == CACHE_KEY_DELETED) {
             /* treat deleted slot as empty */
             *insert_pos = idx;
             return 0;
         }
         idx = (idx + tries++) & capmask;
         k   = &data[idx];
a6f7b467
     }
     /* found empty pos */
     *insert_pos = idx;
     return 0;
 }
 
 static inline void lru_remove(struct cache_set *map, struct cache_key *newkey)
 {
     if (newkey->lru_next)
288057e9
         newkey->lru_next->lru_prev = newkey->lru_prev;
a6f7b467
     if (newkey->lru_prev)
288057e9
         newkey->lru_prev->lru_next = newkey->lru_next;
a6f7b467
     if (newkey == map->lru_head)
288057e9
         map->lru_head = newkey->lru_next;
a6f7b467
 }
 
 static inline void lru_addtail(struct cache_set *map, struct cache_key *newkey)
 {
     if (!map->lru_head)
288057e9
         map->lru_head = newkey;
a6f7b467
     if (map->lru_tail)
288057e9
         map->lru_tail->lru_next = newkey;
a6f7b467
     newkey->lru_next = NULL;
     newkey->lru_prev = map->lru_tail;
288057e9
     map->lru_tail    = newkey;
a6f7b467
 }
 
6223810b
 static void cacheset_add(struct cache_set *map, unsigned char *md5, size_t size, mpool_t *mempool);
 static int cacheset_init(struct cache_set *map, mpool_t *mempool);
402f4b19
 
6223810b
 static void cacheset_rehash(struct cache_set *map, mpool_t *mempool)
402f4b19
 {
     unsigned i;
     int ret;
     struct cache_set tmp_set;
     struct cache_key *key;
b0e47a6d
 #ifdef CL_THREAD_SAFE
402f4b19
     pthread_mutex_lock(&pool_mutex);
b0e47a6d
 #endif
6223810b
     ret = cacheset_init(&tmp_set, mempool);
b0e47a6d
 #ifdef CL_THREAD_SAFE
402f4b19
     pthread_mutex_unlock(&pool_mutex);
b0e47a6d
 #endif
402f4b19
     if (ret)
288057e9
         return;
402f4b19
 
     key = map->lru_head;
288057e9
     for (i = 0; key && i < tmp_set.maxelements / 2; i++) {
         cacheset_add(&tmp_set, (unsigned char *)&key->digest, key->size, mempool);
         key = key->lru_next;
402f4b19
     }
b0e47a6d
 #ifdef CL_THREAD_SAFE
402f4b19
     pthread_mutex_lock(&pool_mutex);
b0e47a6d
 #endif
544fa973
     MPOOL_FREE(mempool, map->data);
b0e47a6d
 #ifdef CL_THREAD_SAFE
402f4b19
     pthread_mutex_unlock(&pool_mutex);
b0e47a6d
 #endif
402f4b19
     memcpy(map, &tmp_set, sizeof(tmp_set));
 }
 
6223810b
 static void cacheset_add(struct cache_set *map, unsigned char *md5, size_t size, mpool_t *mempool)
a6f7b467
 {
     int ret;
     uint32_t pos;
     struct cache_key *newkey;
402f4b19
 
     if (map->elements >= map->maxelements) {
288057e9
         cacheset_lru_remove(map, 1);
         if (map->deleted >= map->maxdeleted) {
             cacheset_rehash(map, mempool);
         }
402f4b19
     }
a6f7b467
     assert(map->elements < map->maxelements);
 
288057e9
     ret    = cacheset_lookup_internal(map, md5, size, &pos, 1);
a6f7b467
     newkey = &map->data[pos];
402f4b19
     if (newkey->size == CACHE_KEY_DELETED)
288057e9
         map->deleted--;
a6f7b467
     if (ret) {
288057e9
         /* was already added, remove from LRU list */
         lru_remove(map, newkey);
a6f7b467
     }
     /* add new key to tail of LRU list */
5b95022b
     memcpy(&map->data[pos].digest, md5, sizeof(map->data[pos].digest));
     map->data[pos].size = size;
a6f7b467
     lru_addtail(map, newkey);
 
     map->elements++;
 
     assert(pos < map->maxelements);
 }
 
59d02e32
 static void cacheset_remove(struct cache_set *map, unsigned char *md5, size_t size, mpool_t *mempool)
 {
     int ret;
     uint32_t pos;
     struct cache_key *newkey;
288057e9
     ret    = cacheset_lookup_internal(map, md5, size, &pos, 1);
59d02e32
     newkey = &map->data[pos];
     if (!ret || (newkey->size == CACHE_KEY_DELETED)) {
         /* already deleted */
         return;
     }
     /* remove from list */
     lru_remove(map, newkey);
     newkey->size = CACHE_KEY_DELETED;
     map->deleted++;
     map->elements--;
     if (map->deleted >= map->maxdeleted) {
         cacheset_rehash(map, mempool);
     }
 }
 
5b95022b
 static int cacheset_lookup(struct cache_set *map, unsigned char *md5, size_t size)
a6f7b467
 {
     struct cache_key *newkey;
     int ret;
     uint32_t pos;
402f4b19
 
5b95022b
     ret = cacheset_lookup_internal(map, md5, size, &pos, 0);
a6f7b467
     if (!ret)
288057e9
         return 0;
a6f7b467
     newkey = &map->data[pos];
     /* update LRU position: move to tail */
     lru_remove(map, newkey);
     lru_addtail(map, newkey);
6223810b
     return 1;
a6f7b467
 }
 
288057e9
 static int cacheset_init(struct cache_set *map, mpool_t *mempool)
 {
544fa973
     map->data = MPOOL_CALLOC(mempool, NODES, sizeof(*map->data));
5b95022b
     if (!map->data)
288057e9
         return CL_EMEM;
6223810b
     map->maxelements = 80 * NODES / 100;
288057e9
     map->maxdeleted  = NODES - map->maxelements - 1;
     map->elements    = 0;
5b95022b
     map->lru_head = map->lru_tail = NULL;
     return 0;
 }
6223810b
 
288057e9
 static inline void cacheset_destroy(struct cache_set *cs, mpool_t *mempool)
 {
544fa973
     MPOOL_FREE(mempool, cs->data);
6223810b
     cs->data = NULL;
 }
 
a2c45a6b
 #endif /* USE_LRUHASHCACHE */
5b95022b
 
52f52931
 /* SPLAY --------------------------------------------------------------------- */
5b95022b
 #ifdef USE_SPLAY
52f52931
 
 struct node { /* a node */
a2c45a6b
     int64_t digest[2];
5b95022b
     struct node *left;
     struct node *right;
5bf3c763
     struct node *up;
08605d2f
     struct node *next;
     struct node *prev;
1e4d5656
     uint32_t size;
2b70f02a
     uint32_t minrec;
5b95022b
 };
 
52f52931
 struct cache_set { /* a tree */
5b95022b
     struct node *data;
     struct node *root;
08605d2f
     struct node *first;
     struct node *last;
5b95022b
 };
 
52f52931
 /* Allocates all the nodes and sets up the replacement chain */
288057e9
 static int cacheset_init(struct cache_set *cs, mpool_t *mempool)
 {
08605d2f
     unsigned int i;
b0e47a6d
 
 #ifndef USE_MPOOL
     UNUSEDPARAM(mempool);
 #endif
 
544fa973
     cs->data = MPOOL_CALLOC(mempool, NODES, sizeof(*cs->data));
08605d2f
     cs->root = NULL;
5b95022b
 
288057e9
     if (!cs->data)
         return 1;
08605d2f
 
288057e9
     for (i = 1; i < NODES; i++) {
         cs->data[i - 1].next = &cs->data[i];
         cs->data[i].prev     = &cs->data[i - 1];
08605d2f
     }
 
     cs->first = cs->data;
288057e9
     cs->last  = &cs->data[NODES - 1];
08605d2f
 
5b95022b
     return 0;
 }
 
52f52931
 /* Frees all the nodes */
288057e9
 static inline void cacheset_destroy(struct cache_set *cs, mpool_t *mempool)
 {
b0e47a6d
 #ifndef USE_MPOOL
     UNUSEDPARAM(mempool);
 #endif
 
544fa973
     MPOOL_FREE(mempool, cs->data);
6223810b
     cs->data = NULL;
 }
 
52f52931
 /* The left/right cooser for the splay tree */
288057e9
 static inline int cmp(int64_t *a, ssize_t sa, int64_t *b, ssize_t sb)
 {
     if (a[1] < b[1]) return -1;
     if (a[1] > b[1]) return 1;
     if (a[0] < b[0]) return -1;
     if (a[0] > b[0]) return 1;
     if (sa < sb) return -1;
     if (sa > sb) return 1;
52f52931
     return 0;
a2c45a6b
 }
5b95022b
 
52f52931
 /* #define PRINT_TREE */
402f4b19
 #ifdef PRINT_TREE
 #define ptree printf
 #else
52f52931
 #define ptree(...)
402f4b19
 #endif
 
52f52931
 /* Debug function to print the tree and check its consistency */
 /* #define CHECK_TREE */
402f4b19
 #ifdef CHECK_TREE
288057e9
 static int printtree(struct cache_set *cs, struct node *n, int d)
 {
402f4b19
     int i;
     int ab = 0;
f2e50833
     if ((n == NULL) || (cs == NULL) || (cs->data == NULL)) return 0;
288057e9
     if (n == cs->root) {
         ptree("--------------------------\n");
     }
     ab |= printtree(cs, n->right, d + 1);
     if (n->right) {
         if (cmp(n->digest, n->size, n->right->digest, n->right->size) >= 0) {
             for (i = 0; i < d; i++) ptree("        ");
             ptree("^^^^ %lld >= %lld\n", n->digest[1], n->right->digest[1]);
             ab = 1;
         }
     }
     for (i = 0; i < d; i++) ptree("        ");
     ptree("%08x(%02u)\n", n->digest[1] >> 48, n - cs->data);
     if (n->left) {
         if (cmp(n->digest, n->size, n->left->digest, n->left->size) <= 0) {
             for (i = 0; i < d; i++) ptree("        ");
             ptree("vvvv %lld <= %lld\n", n->digest[1], n->left->digest[1]);
             ab = 1;
         }
     }
     if (d) {
         if (!n->up) {
             printf("no parent, [node %02u]!\n", n - cs->data);
             ab = 1;
         } else {
             if (n->up->left != n && n->up->right != n) {
                 printf("broken parent [node %02u, parent node %02u]\n", n - cs->data, n->up - cs->data);
                 ab = 1;
             }
         }
402f4b19
     } else {
288057e9
         if (n->up) {
             printf("root with a parent, [node %02u]!\n", n - cs->data);
             ab = 1;
         }
402f4b19
     }
288057e9
     ab |= printtree(cs, n->left, d + 1);
402f4b19
     return ab;
 }
 #else
288057e9
 #define printtree(a, b, c) (0)
402f4b19
 #endif
 
f2e50833
 /* For troubleshooting only; prints out one specific node */
 /* #define PRINT_NODE */
 #ifdef PRINT_NODE
288057e9
 static void printnode(const char *prefix, struct cache_set *cs, struct node *n)
 {
f2e50833
     if (!prefix || !cs || !cs->data) {
         printf("bad args!\n");
         return;
     }
     if (!n) {
         printf("no node!\n");
         return;
     }
     printf("%s node [%02u]:", prefix, n - cs->data);
     printf(" size=%lu digest=%llx,%llx\n", (unsigned long)(n->size), n->digest[0], n->digest[1]);
     printf("\tleft=");
288057e9
     if (n->left)
f2e50833
         printf("%02u ", n->left - cs->data);
     else
         printf("NULL ");
     printf("right=");
288057e9
     if (n->right)
f2e50833
         printf("%02u ", n->right - cs->data);
     else
         printf("NULL ");
     printf("up=");
288057e9
     if (n->up)
f2e50833
         printf("%02u ", n->up - cs->data);
     else
         printf("NULL ");
 
     printf("\tprev=");
288057e9
     if (n->prev)
f2e50833
         printf("%02u ", n->prev - cs->data);
     else
         printf("NULL ");
     printf("next=");
288057e9
     if (n->next)
f2e50833
         printf("%02u\n", n->next - cs->data);
     else
         printf("NULL\n");
 }
 #else
288057e9
 #define printnode(a, b, c) (0)
f2e50833
 #endif
 
 /* #define PRINT_CHAINS */
 #ifdef PRINT_CHAINS
 /* For troubleshooting only, print the chain forwards and back */
288057e9
 static inline void printchain(const char *prefix, struct cache_set *cs)
 {
f2e50833
     if (!cs || !cs->data) return;
     if (prefix) printf("%s: ", prefix);
     printf("chain by next: ");
     {
         unsigned int i = 0;
         struct node *x = cs->first;
288057e9
         while (x) {
f2e50833
             printf("%02d,", x - cs->data);
288057e9
             x = x->next;
f2e50833
             i++;
         }
         printf(" [count=%u]\nchain by prev: ", i);
288057e9
         x = cs->last;
         i = 0;
         while (x) {
f2e50833
             printf("%02d,", x - cs->data);
288057e9
             x = x->prev;
f2e50833
             i++;
         }
         printf(" [count=%u]\n", i);
     }
 }
 #else
288057e9
 #define printchain(a, b) (0)
f2e50833
 #endif
 
52f52931
 /* Looks up a node and splays it up to the root of the tree */
288057e9
 static int splay(int64_t *md5, size_t len, struct cache_set *cs)
 {
2b70f02a
     struct node next = {{0, 0}, NULL, NULL, NULL, NULL, NULL, 0, 0}, *right = &next, *left = &next, *temp, *root = cs->root;
402f4b19
     int comp, found = 0;
5bf3c763
 
288057e9
     if (!root)
         return 0;
5b95022b
 
288057e9
     while (1) {
         comp = cmp(md5, len, root->digest, root->size);
         if (comp < 0) {
             if (!root->left) break;
             if (cmp(md5, len, root->left->digest, root->left->size) < 0) {
                 temp       = root->left;
5b95022b
                 root->left = temp->right;
288057e9
                 if (temp->right) temp->right->up = root;
5b95022b
                 temp->right = root;
288057e9
                 root->up    = temp;
                 root        = temp;
                 if (!root->left) break;
             }
5b95022b
             right->left = root;
288057e9
             root->up    = right;
             right       = root;
             root        = root->left;
         } else if (comp > 0) {
             if (!root->right) break;
             if (cmp(md5, len, root->right->digest, root->right->size) > 0) {
                 temp        = root->right;
5b95022b
                 root->right = temp->left;
288057e9
                 if (temp->left) temp->left->up = root;
5b95022b
                 temp->left = root;
288057e9
                 root->up   = temp;
                 root       = temp;
                 if (!root->right) break;
             }
             left->right = root;
             root->up    = left;
             left        = root;
             root        = root->right;
         } else {
             found = 1;
             break;
         }
5b95022b
     }
a2c45a6b
 
5b95022b
     left->right = root->left;
288057e9
     if (root->left) root->left->up = left;
5b95022b
     right->left = root->right;
288057e9
     if (root->right) root->right->up = right;
5b95022b
     root->left = next.right;
288057e9
     if (next.right) next.right->up = root;
5b95022b
     root->right = next.left;
288057e9
     if (next.left) next.left->up = root;
211fc872
     root->up = NULL;
5b95022b
     cs->root = root;
402f4b19
     return found;
5b95022b
 }
 
52f52931
 /* Looks up an hash in the tree and maintains the replacement chain */
288057e9
 static inline int cacheset_lookup(struct cache_set *cs, unsigned char *md5, size_t size, uint32_t reclevel)
 {
a2c45a6b
     int64_t hash[2];
5b95022b
 
     memcpy(hash, md5, 16);
288057e9
     if (splay(hash, size, cs)) {
         struct node *o = cs->root->prev, *p = cs->root, *q = cs->root->next;
402f4b19
 #ifdef PRINT_CHAINS
288057e9
         printf("promoting %02d\n", p - cs->data);
         printchain("before", cs);
402f4b19
 #endif
288057e9
         if (q) {
             if (o)
                 o->next = q;
             else
                 cs->first = q;
             q->prev        = o;
             cs->last->next = p;
             p->prev        = cs->last;
             p->next        = NULL;
             cs->last       = p;
         }
402f4b19
 #ifdef PRINT_CHAINS
288057e9
         printchain("after", cs);
402f4b19
 #endif
288057e9
         if (reclevel >= p->minrec)
             return 1;
402f4b19
     }
     return 0;
5b95022b
 }
 
52f52931
 /* If the hash is present nothing happens.
    Otherwise a new node is created for the hash picking one from the begin of the chain.
    Used nodes are moved to the end of the chain */
288057e9
 static inline void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size, uint32_t reclevel)
 {
5b95022b
     struct node *newnode;
a2c45a6b
     int64_t hash[2];
5b95022b
 
     memcpy(hash, md5, 16);
288057e9
     if (splay(hash, size, cs)) {
         if (cs->root->minrec > reclevel)
             cs->root->minrec = reclevel;
         return; /* Already there */
2b70f02a
     }
08605d2f
 
402f4b19
     ptree("1:\n");
288057e9
     if (printtree(cs, cs->root, 0)) {
         cli_errmsg("cacheset_add: inconsistent tree before choosing newnode, good luck\n");
         return;
402f4b19
     }
 
08605d2f
     newnode = cs->first;
288057e9
     while (newnode) {
         if (!newnode->right && !newnode->left)
b89dc676
             break;
288057e9
         if (newnode->next) {
             if (newnode == newnode->next) {
f2e50833
                 cli_errmsg("cacheset_add: cache chain in a bad state\n");
                 return;
             }
             newnode = newnode->next;
288057e9
         } else {
             cli_warnmsg("cacheset_add: end of chain reached\n");
             return;
b89dc676
         }
08605d2f
     }
288057e9
     if (!newnode) {
         cli_errmsg("cacheset_add: tree has got no end nodes\n");
         return;
     }
     if (newnode->up) {
         if (newnode->up->left == newnode)
             newnode->up->left = NULL;
         else
             newnode->up->right = NULL;
     }
     if (newnode->prev)
         newnode->prev->next = newnode->next;
     if (newnode->next)
         newnode->next->prev = newnode->prev;
     if (cs->first == newnode)
         cs->first = newnode->next;
 
     newnode->prev  = cs->last;
     newnode->next  = NULL;
08605d2f
     cs->last->next = newnode;
288057e9
     cs->last       = newnode;
402f4b19
 
     ptree("2:\n");
288057e9
     if (printtree(cs, cs->root, 0)) {
         cli_errmsg("cacheset_add: inconsistent tree before adding newnode, good luck\n");
         return;
402f4b19
     }
5b95022b
 
288057e9
     if (!cs->root) {
         newnode->left  = NULL;
         newnode->right = NULL;
5b95022b
     } else {
288057e9
         if (cmp(hash, size, cs->root->digest, cs->root->size) < 0) {
             newnode->left  = cs->root->left;
             newnode->right = cs->root;
             cs->root->left = NULL;
         } else {
             newnode->right  = cs->root->right;
             newnode->left   = cs->root;
             cs->root->right = NULL;
         }
         if (newnode->left) newnode->left->up = newnode;
         if (newnode->right) newnode->right->up = newnode;
5b95022b
     }
     newnode->digest[0] = hash[0];
     newnode->digest[1] = hash[1];
288057e9
     newnode->up        = NULL;
     newnode->size      = size;
     newnode->minrec    = reclevel;
     cs->root           = newnode;
402f4b19
 
     ptree("3: %lld\n", hash[1]);
288057e9
     if (printtree(cs, cs->root, 0)) {
         cli_errmsg("cacheset_add: inconsistent tree after adding newnode, good luck\n");
         return;
402f4b19
     }
f2e50833
     printnode("newnode", cs, newnode);
5b95022b
 }
f2e50833
 
59d02e32
 /* If the hash is not present nothing happens other than splaying the tree.
544fa973
    Otherwise the identified node is removed from the tree and then placed back at
59d02e32
    the front of the chain. */
288057e9
 static inline void cacheset_remove(struct cache_set *cs, unsigned char *md5, size_t size)
 {
59d02e32
     struct node *targetnode;
     struct node *reattachnode;
     int64_t hash[2];
 
     memcpy(hash, md5, 16);
288057e9
     if (splay(hash, size, cs) != 1) {
         cli_dbgmsg("cacheset_remove: node not found in tree\n");
         return; /* No op */
59d02e32
     }
 
     ptree("cacheset_remove: node found and splayed to root\n");
     targetnode = cs->root;
f2e50833
     printnode("targetnode", cs, targetnode);
59d02e32
 
     /* First fix the tree */
288057e9
     if (targetnode->left == NULL) {
59d02e32
         /* At left edge so prune */
         cs->root = targetnode->right;
288057e9
         if (cs->root)
59d02e32
             cs->root->up = NULL;
288057e9
     } else {
59d02e32
         /* new root will come from leftside tree */
288057e9
         cs->root     = targetnode->left;
59d02e32
         cs->root->up = NULL;
         /* splay tree, expecting not found, bringing rightmost member to root */
         splay(hash, size, cs);
f2e50833
 
59d02e32
         if (targetnode->right) {
             /* reattach right tree to clean right-side attach point */
             reattachnode = cs->root;
288057e9
             while (reattachnode->right)
59d02e32
                 reattachnode = reattachnode->right; /* shouldn't happen, but safer in case of dupe */
288057e9
             reattachnode->right   = targetnode->right;
f2e50833
             targetnode->right->up = reattachnode;
59d02e32
         }
     }
288057e9
     targetnode->size      = (size_t)0;
f2e50833
     targetnode->digest[0] = 0;
     targetnode->digest[1] = 0;
288057e9
     targetnode->up        = NULL;
     targetnode->left      = NULL;
     targetnode->right     = NULL;
59d02e32
 
     /* Tree is fixed, so now fix chain around targetnode */
288057e9
     if (targetnode->prev)
59d02e32
         targetnode->prev->next = targetnode->next;
288057e9
     if (targetnode->next)
59d02e32
         targetnode->next->prev = targetnode->prev;
288057e9
     if (cs->last == targetnode)
59d02e32
         cs->last = targetnode->prev;
 
b89dc676
     /* Put targetnode at front of chain, if not there already */
288057e9
     if (cs->first != targetnode) {
b89dc676
         targetnode->next = cs->first;
288057e9
         if (cs->first)
b89dc676
             cs->first->prev = targetnode;
         cs->first = targetnode;
59d02e32
     }
     targetnode->prev = NULL;
f2e50833
 
     printnode("root", cs, cs->root);
     printnode("first", cs, cs->first);
     printnode("last", cs, cs->last);
 
     printchain("remove (after)", cs);
59d02e32
 }
5b95022b
 #endif /* USE_SPLAY */
64e6c97f
 
52f52931
 /* COMMON STUFF --------------------------------------------------------------------- */
64e6c97f
 
6223810b
 struct CACHE {
5b95022b
     struct cache_set cacheset;
fa9612e6
 #ifdef CL_THREAD_SAFE
5b95022b
     pthread_mutex_t mutex;
fa9612e6
 #endif
6223810b
 };
08605d2f
 
52f52931
 /* Allocates the trees for the engine cache */
288057e9
 int cli_cache_init(struct cl_engine *engine)
 {
a7cf187a
     struct CACHE *cache;
6223810b
     unsigned int i, j;
5bf3c763
 
288057e9
     if (!engine) {
         cli_errmsg("cli_cache_init: mpool malloc fail\n");
         return 1;
64e6c97f
     }
6223810b
 
af34d981
     if (engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
         cli_dbgmsg("cli_cache_init: Caching disabled.\n");
         return 0;
     }
 
544fa973
     if (!(cache = MPOOL_MALLOC(engine->mempool, sizeof(struct CACHE) * TREES))) {
288057e9
         cli_errmsg("cli_cache_init: mpool malloc fail\n");
         return 1;
64e6c97f
     }
 
288057e9
     for (i = 0; i < TREES; i++) {
b0e47a6d
 #ifdef CL_THREAD_SAFE
288057e9
         if (pthread_mutex_init(&cache[i].mutex, NULL)) {
             cli_errmsg("cli_cache_init: mutex init fail\n");
             for (j = 0; j < i; j++) cacheset_destroy(&cache[j].cacheset, engine->mempool);
             for (j = 0; j < i; j++) pthread_mutex_destroy(&cache[j].mutex);
544fa973
             MPOOL_FREE(engine->mempool, cache);
288057e9
             return 1;
         }
b0e47a6d
 #endif
288057e9
         if (cacheset_init(&cache[i].cacheset, engine->mempool)) {
             for (j = 0; j < i; j++) cacheset_destroy(&cache[j].cacheset, engine->mempool);
b0e47a6d
 #ifdef CL_THREAD_SAFE
288057e9
             for (j = 0; j <= i; j++) pthread_mutex_destroy(&cache[j].mutex);
b0e47a6d
 #endif
544fa973
             MPOOL_FREE(engine->mempool, cache);
288057e9
             return 1;
         }
64e6c97f
     }
6223810b
     engine->cache = cache;
64e6c97f
     return 0;
 }
 
52f52931
 /* Frees the engine cache */
288057e9
 void cli_cache_destroy(struct cl_engine *engine)
 {
a7cf187a
     struct CACHE *cache;
6223810b
     unsigned int i;
5b95022b
 
288057e9
     if (!engine || !(cache = engine->cache))
         return;
af34d981
 
34e9acb0
     if (engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
         return;
     }
 
288057e9
     for (i = 0; i < TREES; i++) {
         cacheset_destroy(&cache[i].cacheset, engine->mempool);
b0e47a6d
 #ifdef CL_THREAD_SAFE
288057e9
         pthread_mutex_destroy(&cache[i].mutex);
b0e47a6d
 #endif
6223810b
     }
544fa973
     MPOOL_FREE(engine->mempool, cache);
6223810b
 }
 
52f52931
 /* Looks up an hash in the proper tree */
288057e9
 static int cache_lookup_hash(unsigned char *md5, size_t len, struct CACHE *cache, uint32_t reclevel)
 {
64e6c97f
     unsigned int key = getkey(md5);
288057e9
     int ret          = CL_VIRUS;
64e6c97f
     struct CACHE *c;
 
     c = &cache[key];
b0e47a6d
 #ifdef CL_THREAD_SAFE
288057e9
     if (pthread_mutex_lock(&c->mutex)) {
         cli_errmsg("cache_lookup_hash: cache_lookup_hash: mutex lock fail\n");
         return ret;
64e6c97f
     }
b0e47a6d
 #endif
5b95022b
 
f2e50833
     /* cli_warnmsg("cache_lookup_hash: key is %u\n", key); */
 
2b70f02a
     ret = (cacheset_lookup(&c->cacheset, md5, len, reclevel)) ? CL_CLEAN : CL_VIRUS;
b0e47a6d
 #ifdef CL_THREAD_SAFE
5b95022b
     pthread_mutex_unlock(&c->mutex);
b0e47a6d
     // if(ret == CL_CLEAN) cli_warnmsg("cached\n");
 #endif
64e6c97f
     return ret;
 }
 
52f52931
 /* Adds an hash to the cache */
288057e9
 void cache_add(unsigned char *md5, size_t size, cli_ctx *ctx)
 {
64e6c97f
     unsigned int key = getkey(md5);
2b70f02a
     uint32_t level;
64e6c97f
     struct CACHE *c;
 
288057e9
     if (!ctx || !ctx->engine || !ctx->engine->cache)
         return;
af34d981
 
34e9acb0
     if (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
         cli_dbgmsg("cache_add: Caching disabled. Not adding sample to cache.\n");
         return;
     }
 
288057e9
     level = (*ctx->fmap && (*ctx->fmap)->dont_cache_flag) ? ctx->recursion : 0;
65cc5979
     if (ctx->found_possibly_unwanted && (level || !ctx->recursion))
288057e9
         return;
048a88e6
     if (SCAN_ALLMATCHES && (ctx->num_viruses > 0)) {
288057e9
         cli_dbgmsg("cache_add: alert found within same topfile, skipping cache\n");
         return;
a3dcc429
     }
6223810b
     c = &ctx->engine->cache[key];
b0e47a6d
 #ifdef CL_THREAD_SAFE
288057e9
     if (pthread_mutex_lock(&c->mutex)) {
         cli_errmsg("cli_add: mutex lock fail\n");
         return;
64e6c97f
     }
b0e47a6d
 #endif
5b95022b
 
f2e50833
     /* cli_warnmsg("cache_add: key is %u\n", key); */
 
6223810b
 #ifdef USE_LRUHASHCACHE
52f52931
     cacheset_add(&c->cacheset, md5, size, ctx->engine->mempool);
6223810b
 #else
 #ifdef USE_SPLAY
2b70f02a
     cacheset_add(&c->cacheset, md5, size, level);
6223810b
 #else
 #error #define USE_SPLAY or USE_LRUHASHCACHE
 #endif
 #endif
5b95022b
 
b0e47a6d
 #ifdef CL_THREAD_SAFE
64e6c97f
     pthread_mutex_unlock(&c->mutex);
b0e47a6d
 #endif
2b70f02a
     cli_dbgmsg("cache_add: %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x (level %u)\n", md5[0], md5[1], md5[2], md5[3], md5[4], md5[5], md5[6], md5[7], md5[8], md5[9], md5[10], md5[11], md5[12], md5[13], md5[14], md5[15], level);
64e6c97f
     return;
 }
 
59d02e32
 /* Removes a hash from the cache */
288057e9
 void cache_remove(unsigned char *md5, size_t size, const struct cl_engine *engine)
 {
59d02e32
     unsigned int key = getkey(md5);
     struct CACHE *c;
 
288057e9
     if (!engine || !engine->cache)
         return;
af34d981
 
34e9acb0
     if (engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
         cli_dbgmsg("cache_remove: Caching disabled.\n");
         return;
     }
 
f2e50833
     /* cli_warnmsg("cache_remove: key is %u\n", key); */
 
f129bc1f
     c = &engine->cache[key];
b0e47a6d
 #ifdef CL_THREAD_SAFE
288057e9
     if (pthread_mutex_lock(&c->mutex)) {
         cli_errmsg("cli_add: mutex lock fail\n");
         return;
59d02e32
     }
b0e47a6d
 #endif
59d02e32
 
 #ifdef USE_LRUHASHCACHE
f129bc1f
     cacheset_remove(&c->cacheset, md5, size, engine->mempool);
59d02e32
 #else
 #ifdef USE_SPLAY
     cacheset_remove(&c->cacheset, md5, size);
 #else
 #error #define USE_SPLAY or USE_LRUHASHCACHE
 #endif
 #endif
 
b0e47a6d
 #ifdef CL_THREAD_SAFE
59d02e32
     pthread_mutex_unlock(&c->mutex);
b0e47a6d
 #endif
59d02e32
     cli_dbgmsg("cache_remove: %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", md5[0], md5[1], md5[2], md5[3], md5[4], md5[5], md5[6], md5[7], md5[8], md5[9], md5[10], md5[11], md5[12], md5[13], md5[14], md5[15]);
     return;
 }
 
7d4213a7
 int cache_get_MD5(unsigned char *hash, cli_ctx *ctx)
 {
f6798708
     fmap_t *map;
     size_t todo, at = 0;
da6e06dd
     void *hashctx;
34e9acb0
 
288057e9
     map  = *ctx->fmap;
f6798708
     todo = map->len;
 
da6e06dd
     hashctx = cl_hash_init("md5");
f077c617
     if (!(hashctx))
         return CL_VIRUS;
 
288057e9
     while (todo) {
b2e7c931
         const void *buf;
         size_t readme = todo < FILEBUFF ? todo : FILEBUFF;
 
288057e9
         if (!(buf = fmap_need_off_once(map, at, readme))) {
da6e06dd
             cl_hash_destroy(hashctx);
b2e7c931
             return CL_EREAD;
da6e06dd
         }
b2e7c931
 
         todo -= readme;
         at += readme;
 
cd94be7a
         if (cl_update_hash(hashctx, (void *)buf, readme)) {
da6e06dd
             cl_hash_destroy(hashctx);
b2e7c931
             cli_errmsg("cache_check: error reading while generating hash!\n");
             return CL_EREAD;
         }
64e6c97f
     }
b2e7c931
 
da6e06dd
     cl_finish_hash(hashctx, hash);
b2e7c931
 
7d4213a7
     return CL_CLEAN;
 }
 
 /* Hashes a file onto the provided buffer and looks it up the cache.
    Returns CL_VIRUS if found, CL_CLEAN if not FIXME or a recoverable error,
    and returns CL_EREAD if unrecoverable */
288057e9
 int cache_check(unsigned char *hash, cli_ctx *ctx)
 {
7d4213a7
     fmap_t *map;
     int ret;
 
288057e9
     if (!ctx || !ctx->engine || !ctx->engine->cache)
         return CL_VIRUS;
7d4213a7
 
     if (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
         cli_dbgmsg("cache_check: Caching disabled. Returning CL_VIRUS.\n");
         return CL_VIRUS;
     }
 
     ret = cache_get_MD5(hash, ctx);
     if (ret != CL_CLEAN)
         return ret;
288057e9
 
7d4213a7
     map = *ctx->fmap;
2b70f02a
     ret = cache_lookup_hash(hash, map->len, ctx->engine->cache, ctx->recursion);
aef8d4ac
     cli_dbgmsg("cache_check: %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x is %s\n", hash[0], hash[1], hash[2], hash[3], hash[4], hash[5], hash[6], hash[7], hash[8], hash[9], hash[10], hash[11], hash[12], hash[13], hash[14], hash[15], (ret == CL_VIRUS) ? "negative" : "positive");
     return ret;
64e6c97f
 }