aCaB authored on 2010/01/09 10:19:25
Showing 1 changed files
... ...
@@ -33,7 +33,12 @@
33 33
 #include "cache.h"
34 34
 #include "fmap.h"
35 35
 
36
+static mpool_t *mempool = NULL;
37
+
38
+//#define USE_LRUHASHCACHE
39
+#define USE_SPLAY
36 40
 
41
+#ifdef USE_LRUHASHCACHE
37 42
 struct cache_key {
38 43
     char digest[16];
39 44
     uint32_t size; /* 0 is used to mark an empty hash slot! */
... ...
@@ -90,40 +95,14 @@ static void cacheset_lru_remove(struct cache_set *map, size_t howmany)
90 90
     }
91 91
 }
92 92
 
93
-static inline uint32_t hash32shift(uint32_t key)
94
-{
95
-  key = ~key + (key << 15);
96
-  key = key ^ (key >> 12);
97
-  key = key + (key << 2);
98
-  key = key ^ (key >> 4);
99
-  key = (key + (key << 3)) + (key << 11);
100
-  key = key ^ (key >> 16);
101
-  return key;
102
-}
103
-
104
-static inline size_t hash(const unsigned char* k,const size_t len,const size_t SIZE)
105
-{
106
-    size_t Hash = 1;
107
-    size_t i;
108
-    for(i=0;i<len;i++) {
109
-	/* a simple add is good, because we use the mixing function below */
110
-	Hash +=  k[i];
111
-	/* mixing function */
112
-	Hash = hash32shift(Hash);
113
-    }
114
-    /* SIZE is power of 2 */
115
-    return Hash & (SIZE - 1);
116
-}
117
-
118
-int cacheset_lookup_internal(struct cache_set *map, const struct cache_key *key,
119
-			     uint32_t *insert_pos, int deletedok)
93
+int cacheset_lookup_internal(struct cache_set *map, unsigned char *md5, size_t size, uint32_t *insert_pos, int deletedok)
120 94
 {
121
-    uint32_t idx = hash((const unsigned char*)key, sizeof(*key), map->capacity);
95
+    uint32_t idx = cli_readint32(md5+8) & (map->capacity -1);
122 96
     uint32_t tries = 0;
123 97
     struct cache_key *k = &map->data[idx];
124 98
     while (k->size != CACHE_KEY_EMPTY) {
125
-	if (k->size == key->size &&
126
-	    !memcmp(k->digest, key, 16)) {
99
+	if (k->size == size &&
100
+	    !memcmp(k->digest, md5, 16)) {
127 101
 	    /* found key */
128 102
 	    *insert_pos = idx;
129 103
 	    return 1;
... ...
@@ -162,7 +141,7 @@ static inline void lru_addtail(struct cache_set *map, struct cache_key *newkey)
162 162
     map->lru_tail = newkey;
163 163
 }
164 164
 
165
-static void cacheset_add(struct cache_set *map, const struct cache_key *key)
165
+static void cacheset_add(struct cache_set *map, unsigned char *md5, size_t size)
166 166
 {
167 167
     int ret;
168 168
     uint32_t pos;
... ...
@@ -171,28 +150,28 @@ static void cacheset_add(struct cache_set *map, const struct cache_key *key)
171 171
 	cacheset_lru_remove(map, 1);
172 172
     assert(map->elements < map->maxelements);
173 173
 
174
-    ret = cacheset_lookup_internal(map, key, &pos, 1);
174
+    ret = cacheset_lookup_internal(map, md5, size, &pos, 1);
175 175
     newkey = &map->data[pos];
176 176
     if (ret) {
177 177
 	/* was already added, remove from LRU list */
178 178
 	lru_remove(map, newkey);
179 179
     }
180 180
     /* add new key to tail of LRU list */
181
-    memcpy(&map->data[pos], key, sizeof(*key));
181
+    memcpy(&map->data[pos].digest, md5, sizeof(map->data[pos].digest));
182
+    map->data[pos].size = size;
182 183
     lru_addtail(map, newkey);
183 184
 
184 185
     map->elements++;
185 186
 
186 187
     assert(pos < map->maxelements);
187
-
188 188
 }
189 189
 
190
-static int cacheset_lookup(struct cache_set *map, const struct cache_key *key)
190
+static int cacheset_lookup(struct cache_set *map, unsigned char *md5, size_t size)
191 191
 {
192 192
     struct cache_key *newkey;
193 193
     int ret;
194 194
     uint32_t pos;
195
-    ret = cacheset_lookup_internal(map, key, &pos, 0);
195
+    ret = cacheset_lookup_internal(map, md5, size, &pos, 0);
196 196
     if (!ret)
197 197
 	return CACHE_INVALID_VERSION;
198 198
     newkey = &map->data[pos];
... ...
@@ -203,13 +182,137 @@ static int cacheset_lookup(struct cache_set *map, const struct cache_key *key)
203 203
     return map->version;
204 204
 }
205 205
 
206
-static mpool_t *mempool = NULL;
207
-static struct CACHE {
208
-    struct cache_set cacheset;
209
-    pthread_mutex_t mutex;
210
-    uint32_t lastdb;
211
-} *cache = NULL;
212
-static unsigned int cache_entries = 0;
206
+
207
+static int cacheset_init(struct cache_set *map, unsigned int entries) {
208
+    map->data = mpool_calloc(mempool, 256, sizeof(*map->data));
209
+    if (!map->data)
210
+	return CL_EMEM;
211
+    map->capacity = entries;
212
+    map->maxelements = 80*entries / 100;
213
+    map->elements = 0;
214
+    map->version = CACHE_INVALID_VERSION;
215
+    map->lru_head = map->lru_tail = NULL;
216
+    map->version = 1337;
217
+    return 0;
218
+}
219
+
220
+#else
221
+#ifdef USE_SPLAY
222
+struct node {
223
+    uint64_t digest[2];
224
+    struct node *left;
225
+    struct node *right;
226
+    uint32_t size; /* 0 is used to mark an empty hash slot! */
227
+};
228
+
229
+struct cache_set {
230
+    struct node *data;
231
+    struct node *root;
232
+    unsigned int used;
233
+    unsigned int total;
234
+};
235
+
236
+static int cacheset_init(struct cache_set *map, unsigned int entries) {
237
+    map->data = mpool_calloc(mempool, entries, sizeof(*map->data));
238
+    map->root = NULL;
239
+
240
+    if(!map->data)
241
+	return CL_EMEM;
242
+    map->used = 0;
243
+    map->total = entries;
244
+    return 0;
245
+}
246
+
247
+void splay(uint64_t *md5, struct cache_set *cs) {
248
+    struct node next = {{0, 0}, NULL, NULL, 0}, *right = &next, *left = &next, *temp, *root = cs->root;
249
+
250
+    if(!root)
251
+	return;
252
+
253
+    while(1) {
254
+	if(md5[1] < root->digest[1] || md5[1] == root->digest[1] && md5[0] < root->digest[0]) {
255
+	    if(!root->left) break;
256
+	    if(md5[1] < root->left->digest[1] || md5[1] == root->left->digest[1] && md5[0] < root->left->digest[0]) {
257
+		temp = root->left;
258
+                root->left = temp->right;
259
+                temp->right = root;
260
+                root = temp;
261
+                if (!root->left) break;
262
+	    }
263
+            right->left = root;
264
+            right = root;
265
+            root = root->left;
266
+	} else if(md5[1] > root->digest[1] || md5[1] == root->digest[1] && md5[0] > root->digest[0]) {
267
+	    if(!root->right) break;
268
+	    if(md5[1] > root->right->digest[1] || md5[1] == root->right->digest[1] && md5[0] > root->right->digest[0]) {
269
+		temp = root->right;
270
+                root->right = temp->left;
271
+                temp->left = root;
272
+                root = temp;
273
+		if(!root->right) break;
274
+	    }
275
+	    left->right = root;
276
+            left = root;
277
+            root = root->right;
278
+	} else break;
279
+    }
280
+    left->right = root->left;
281
+    right->left = root->right;
282
+    root->left = next.right;
283
+    root->right = next.left;
284
+    cs->root = root;
285
+}
286
+
287
+
288
+static int cacheset_lookup(struct cache_set *cs, unsigned char *md5, size_t size) {
289
+    uint64_t hash[2];
290
+
291
+    memcpy(hash, md5, 16);
292
+    splay(hash, cs);
293
+    if(!cs->root || cs->root->digest[1] != hash[1] || cs->root->digest[0] != hash[0])
294
+	return 0;
295
+    return 1337;
296
+}
297
+
298
+
299
+static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size) {
300
+    uint64_t hash[2];
301
+    struct node *newnode;
302
+
303
+    memcpy(hash, md5, 16);
304
+    splay(hash, cs);
305
+    if(cs->root && cs->root->digest[1] == hash[1] && cs->root->digest[0] == hash[0])
306
+	return; /* Already there */
307
+
308
+    if(cs->used == cs->total) {
309
+	/* FIXME: drop something */
310
+	cli_errmsg("FULL!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
311
+	return;
312
+    } else {
313
+	newnode = &cs->data[cs->used++];
314
+    }
315
+
316
+    if(!cs->root) {
317
+	newnode->left = NULL;
318
+	newnode->right = NULL;
319
+    } else if(hash[1] < cs->root->digest[1] || hash[1] == cs->root->digest[1] && hash[0] < cs->root->digest[0]) {
320
+	newnode->left = cs->root->left;
321
+	newnode->right = cs->root;
322
+	cs->root->left = NULL;
323
+    } else {
324
+	newnode->right = cs->root->right;
325
+	newnode->left = cs->root;
326
+	cs->root->right = NULL;
327
+    }
328
+    newnode->digest[0] = hash[0];
329
+    newnode->digest[1] = hash[1];
330
+    cs->root = newnode;
331
+}
332
+
333
+
334
+#endif /* USE_SPLAY */
335
+#endif /* USE_LRUHASHCACHE */
336
+
213 337
 
214 338
 #define TREES 256
215 339
 static inline unsigned int getkey(uint8_t *hash) { return *hash; }
... ...
@@ -220,11 +323,17 @@ static inline unsigned int getkey(uint8_t *hash) { return *hash; }
220 220
 /* #define TREES 65536 */
221 221
 /* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | (((unsigned int)hash[1])<<8) ; } */
222 222
 
223
+static struct CACHE {
224
+    struct cache_set cacheset;
225
+    pthread_mutex_t mutex;
226
+    uint32_t lastdb;
227
+} *cache = NULL;
228
+
223 229
 
224 230
 int cl_cache_init(unsigned int entries) {
225 231
     unsigned int i;
232
+    int ret;
226 233
 
227
-    entries = MAX(entries / (TREES / 256), 10);
228 234
     if(!(mempool = mpool_create())) {
229 235
 	cli_errmsg("mpool init fail\n");
230 236
 	return 1;
... ...
@@ -244,23 +353,19 @@ int cl_cache_init(unsigned int entries) {
244 244
 	    cache = NULL;
245 245
 	    return 1;
246 246
 	}
247
-
248
-	cache[i].cacheset.data = mpool_calloc(mempool, 256, sizeof(*cache[i].cacheset.data));
249
-	if (!cache[i].cacheset.data)
250
-	    return CL_EMEM;
251
-	cache_setversion(&cache[i].cacheset, 1337);
252
-	cache[i].cacheset.capacity = 256;
253
-	cache[i].cacheset.maxelements = 80*256 / 100;
254
-	cache[i].cacheset.elements = 0;
255
-	cache[i].cacheset.version = CACHE_INVALID_VERSION;
256
-	cache[i].cacheset.lru_head = cache[i].cacheset.lru_tail = NULL;
247
+	ret = cacheset_init(&cache[i].cacheset, entries);
248
+	if(ret) {
249
+	    mpool_destroy(mempool);
250
+	    mempool = NULL;
251
+	    cache = NULL;
252
+	    return 1;
253
+	}
257 254
     }
258
-    cache_entries = entries;
259 255
     return 0;
260 256
 }
261 257
 
258
+
262 259
 static int cache_lookup_hash(unsigned char *md5, cli_ctx *ctx) {
263
-    struct cache_key entry;
264 260
     int ret = CL_VIRUS;
265 261
     unsigned int key = getkey(md5);
266 262
     struct CACHE *c;
... ...
@@ -272,16 +377,14 @@ static int cache_lookup_hash(unsigned char *md5, cli_ctx *ctx) {
272 272
 	cli_errmsg("mutex lock fail\n");
273 273
 	return ret;
274 274
     }
275
-    entry.size = 1024;
276
-    memcpy(entry.digest, md5, 16);
277
-    ret = (cacheset_lookup(&c->cacheset, &entry) == 1337) ? CL_CLEAN : CL_VIRUS;
278
-    pthread_mutex_unlock(&c->mutex);
275
+
276
+    ret = (cacheset_lookup(&c->cacheset, md5, 1024) == 1337) ? CL_CLEAN : CL_VIRUS;
279 277
     if(ret == CL_CLEAN) cli_warnmsg("cached\n");
278
+    pthread_mutex_unlock(&c->mutex);
280 279
     return ret;
281 280
 }
282 281
 
283 282
 void cache_add(unsigned char *md5, cli_ctx *ctx) {
284
-    struct cache_key entry;
285 283
     unsigned int key = getkey(md5);
286 284
     struct CACHE *c;
287 285
 
... ...
@@ -292,9 +395,9 @@ void cache_add(unsigned char *md5, cli_ctx *ctx) {
292 292
 	cli_errmsg("mutex lock fail\n");
293 293
 	return;
294 294
     }
295
-    entry.size = 1024;
296
-    memcpy(entry.digest, md5, 16);
297
-    cacheset_add(&c->cacheset, &entry);
295
+
296
+    cacheset_add(&c->cacheset, md5, 1024);
297
+
298 298
     pthread_mutex_unlock(&c->mutex);
299 299
     return;
300 300
 }