Browse code

a fixed size, LRU hash-based cache.

Török Edvin authored on 2010/01/07 02:32:34
Showing 1 changed files
... ...
@@ -1,10 +1,10 @@
1
-#include <string.h>
2
-#include <stdlib.h>
3
-#include <pthread.h>
4
-
5 1
 #if HAVE_CONFIG_H
6 2
 #include "clamav-config.h"
7 3
 #endif
4
+#include <string.h>
5
+#include <stdlib.h>
6
+#include <pthread.h>
7
+#include <assert.h>
8 8
 
9 9
 #include "md5.h"
10 10
 #include "mpool.h"
... ...
@@ -14,7 +14,6 @@
14 14
 
15 15
 #define CACHE_PERTURB 10
16 16
 /* 1/10th */
17
-
18 17
 static mpool_t *mempool = NULL;
19 18
 static struct CACHE {
20 19
     struct CACHE_ENTRY {
... ...
@@ -183,3 +182,278 @@ int cache_check(unsigned char *hash, cli_ctx *ctx) {
183 183
     cli_md5_final(hash, &md5);
184 184
     return cache_lookup_hash(hash, ctx);
185 185
 }
186
+
187
+struct cache_key {
188
+    char digest[16];
189
+    uint32_t size; /* 0 is used to mark an empty hash slot! */
190
+    struct cache_key *lru_next, *lru_prev;
191
+};
192
+
193
+struct cache_set {
194
+    struct cache_key *data;
195
+    size_t capacity;
196
+    size_t maxelements;/* considering load factor */
197
+    size_t elements;
198
+    size_t version;
199
+    struct cache_key *lru_head, *lru_tail;
200
+    pthread_mutex_t mutex;
201
+};
202
+
203
+#define CACHE_INVALID_VERSION ~0u
204
+
205
+/* size must be power of 2! */
206
+static int cacheset_init(struct cache_set* map, size_t maxsize, uint8_t loadfactor)
207
+{
208
+    map->data = cli_calloc(maxsize, sizeof(*map->data));
209
+    if (!map->data)
210
+	return CL_EMEM;
211
+    map->capacity = maxsize;
212
+    map->maxelements = loadfactor*maxsize / 100;
213
+    map->elements = 0;
214
+    map->version = CACHE_INVALID_VERSION;
215
+    map->lru_head = map->lru_tail = NULL;
216
+    if (pthread_mutex_init(&map->mutex, NULL)) {
217
+	cli_errmsg("mutex init fail\n");
218
+	return CL_EMEM;
219
+    }
220
+}
221
+
222
+static void cacheset_destroy(struct cache_set *map)
223
+{
224
+    pthread_mutex_destroy(&map->mutex);
225
+    free(map->data);
226
+}
227
+
228
+static void cacheset_acquire(struct cache_set *map)
229
+{
230
+    pthread_mutex_lock(&map->mutex);
231
+}
232
+
233
+static void cache_setversion(struct cache_set* map, uint32_t version)
234
+{
235
+    unsigned i;
236
+    if (map->version == version)
237
+	return;
238
+    map->version = version;
239
+    map->elements = 0;/* all elements have expired now */
240
+    for (i=0;i<map->capacity;i++)
241
+	map->data[i].size = 0;
242
+    map->lru_head = map->lru_tail = NULL;
243
+}
244
+
245
+static void cacheset_lru_remove(struct cache_set *map, size_t howmany)
246
+{
247
+    while (howmany--) {
248
+	struct cache_key *old;
249
+	assert(map->lru_head);
250
+	assert(!old->lru_prev);
251
+	// Remove a key from the head of the list
252
+	old = map->lru_head;
253
+	map->lru_head = old->lru_next;
254
+	old->size = 0; /* this slot is now empty */
255
+	if (old == map->lru_tail)
256
+	    map->lru_tail = 0;
257
+    }
258
+}
259
+
260
+static inline uint32_t hash32shift(uint32_t key)
261
+{
262
+  key = ~key + (key << 15);
263
+  key = key ^ (key >> 12);
264
+  key = key + (key << 2);
265
+  key = key ^ (key >> 4);
266
+  key = (key + (key << 3)) + (key << 11);
267
+  key = key ^ (key >> 16);
268
+  return key;
269
+}
270
+
271
+static inline size_t hash(const unsigned char* k,const size_t len,const size_t SIZE)
272
+{
273
+    size_t Hash = 1;
274
+    size_t i;
275
+    for(i=0;i<len;i++) {
276
+	/* a simple add is good, because we use the mixing function below */
277
+	Hash +=  k[i];
278
+	/* mixing function */
279
+	Hash = hash32shift(Hash);
280
+    }
281
+    /* SIZE is power of 2 */
282
+    return Hash & (SIZE - 1);
283
+}
284
+
285
+int cacheset_lookup_internal(struct cache_set *map, const struct cache_key *key,
286
+			     uint32_t *insert_pos)
287
+{
288
+    uint32_t idx = hash((const unsigned char*)key, sizeof(*key), map->capacity);
289
+    uint32_t tries = 0;
290
+    struct cache_key *k = &map->data[idx];
291
+    while (k->size) {
292
+	if (k->size == key->size &&
293
+	    !memcmp(k->digest, key, 16)) {
294
+	    /* found key */
295
+	    *insert_pos = idx;
296
+	    return 1;
297
+	}
298
+	idx = (idx + tries++)&(map->capacity-1);
299
+	k = &map->data[idx];
300
+    }
301
+    /* found empty pos */
302
+    *insert_pos = idx;
303
+    return 0;
304
+}
305
+
306
+static inline void lru_remove(struct cache_set *map, struct cache_key *newkey)
307
+{
308
+    if (newkey->lru_next)
309
+	newkey->lru_next->lru_prev = newkey->lru_prev;
310
+    if (newkey->lru_prev)
311
+	newkey->lru_prev->lru_next = newkey->lru_next;
312
+    if (newkey == map->lru_head)
313
+	map->lru_head = newkey->lru_next;
314
+}
315
+
316
+static inline void lru_addtail(struct cache_set *map, struct cache_key *newkey)
317
+{
318
+    if (!map->lru_head)
319
+	map->lru_head = newkey;
320
+    if (map->lru_tail)
321
+	map->lru_tail->lru_next = newkey;
322
+    newkey->lru_next = NULL;
323
+    newkey->lru_prev = map->lru_tail;
324
+    map->lru_tail = newkey;
325
+}
326
+
327
+static void cacheset_add(struct cache_set *map, const struct cache_key *key)
328
+{
329
+    int ret;
330
+    uint32_t pos;
331
+    struct cache_key *newkey;
332
+    if (map->elements >= map->maxelements)
333
+	cacheset_lru_remove(map, 1);
334
+    assert(map->elements < map->maxelements);
335
+
336
+    ret = cacheset_lookup_internal(map, key, &pos);
337
+    newkey = &map->data[pos];
338
+    if (ret) {
339
+	/* was already added, remove from LRU list */
340
+	lru_remove(map, newkey);
341
+    }
342
+    /* add new key to tail of LRU list */
343
+    lru_addtail(map, newkey);
344
+
345
+    map->elements++;
346
+
347
+    assert(pos < map->maxelements);
348
+
349
+    memcpy(&map->data[pos], key, sizeof(*key));
350
+}
351
+
352
+static int cacheset_lookup(struct cache_set *map, const struct cache_key *key)
353
+{
354
+    struct cache_key *newkey;
355
+    int ret;
356
+    uint32_t pos;
357
+    ret = cacheset_lookup_internal(map, key, &pos);
358
+    if (!ret)
359
+	return CACHE_INVALID_VERSION;
360
+    newkey = &map->data[pos];
361
+    /* update LRU position: move to tail */
362
+    lru_remove(map, newkey);
363
+    lru_addtail(map, newkey);
364
+
365
+    return map->version;
366
+}
367
+
368
+static void cacheset_release(struct cache_set *map)
369
+{
370
+    pthread_mutex_unlock(&map->mutex);
371
+}
372
+
373
+#if 0
374
+int main(int argc, char **argv)
375
+{
376
+    struct cache_key key;
377
+    struct cache_set map;
378
+    cacheset_init(&map, 256, 80);
379
+    cacheset_acquire(&map);
380
+    cache_setversion(&map, 10);
381
+
382
+    key.size = 1024;
383
+    memcpy(key.digest, "1234567890123456", 16);
384
+    cacheset_add(&map, &key);
385
+    memcpy(key.digest, "1234567890123457", 16);
386
+    cacheset_add(&map, &key);
387
+    memcpy(key.digest, "0123456789012345", 16);
388
+    cacheset_add(&map, &key);
389
+
390
+    key.size = 1024;
391
+    memcpy(key.digest, "1234567890123456", 16);
392
+    if (cacheset_lookup(&map, &key) != 10)
393
+	abort();
394
+    memcpy(key.digest, "1234567890123456", 16);
395
+    if (cacheset_lookup(&map, &key) != 10)
396
+	abort();
397
+    memcpy(key.digest, "1234567890123457", 16);
398
+    if (cacheset_lookup(&map, &key) != 10)
399
+	abort();
400
+    memcpy(key.digest, "0123456789012345", 16);
401
+    if (cacheset_lookup(&map, &key) != 10)
402
+	abort();
403
+    memcpy(key.digest, "0123456789012346", 16);
404
+    if (cacheset_lookup(&map, &key) == 10)
405
+	abort();
406
+
407
+    cache_setversion(&map, 1);
408
+    memcpy(key.digest, "1234567890123456", 16);
409
+    if (cacheset_lookup(&map, &key) != CACHE_INVALID_VERSION)
410
+	abort();
411
+    memcpy(key.digest, "1234567890123456", 16);
412
+    if (cacheset_lookup(&map, &key) != CACHE_INVALID_VERSION)
413
+	abort();
414
+    memcpy(key.digest, "1234567890123457", 16);
415
+    if (cacheset_lookup(&map, &key) != CACHE_INVALID_VERSION)
416
+	abort();
417
+    memcpy(key.digest, "0123456789012345", 16);
418
+    if (cacheset_lookup(&map, &key) != CACHE_INVALID_VERSION)
419
+	abort();
420
+
421
+    cacheset_release(&map);
422
+
423
+    cacheset_destroy(&map);
424
+
425
+    cacheset_init(&map, 8, 50);
426
+    cacheset_acquire(&map);
427
+    cache_setversion(&map, 10);
428
+
429
+    key.size = 416;
430
+    memcpy(key.digest, "1234567890123456", 16);
431
+    cacheset_add(&map, &key);
432
+    memcpy(key.digest, "1234567890123457", 16);
433
+    cacheset_add(&map, &key);
434
+    memcpy(key.digest, "1234567890123459", 16);
435
+    cacheset_add(&map, &key);
436
+    key.size = 400;
437
+    memcpy(key.digest, "1234567890123450", 16);
438
+    cacheset_add(&map, &key);
439
+
440
+    key.size = 416;
441
+    memcpy(key.digest, "1234567890123456", 16);
442
+    if (cacheset_lookup(&map, &key) != 10)
443
+	abort();
444
+    if (cacheset_lookup(&map, &key) != 10)
445
+	abort();
446
+    if (cacheset_lookup(&map, &key) != 10)
447
+	abort();
448
+
449
+    key.size = 500;
450
+    cacheset_add(&map, &key);
451
+    memcpy(key.digest, "1234567890123457", 16);
452
+    if (cacheset_lookup(&map, &key) == 10)
453
+	abort();
454
+
455
+    cacheset_release(&map);
456
+    cacheset_destroy(&map);
457
+
458
+    return 0;
459
+}
460
+#endif