Browse code

caching final

aCaB authored on 2010/01/14 12:38:31
Showing 3 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,33 @@
0
+#!/usr/bin/perl
1
+
2
+use strict;
3
+use warnings;
4
+
5
+my %h = ();
6
+my $added = 0;
7
+my $found = 0;
8
+my $notfound = 0;
9
+
10
+while(1) {
11
+	my $hash = '';
12
+	last if(read(STDIN, $hash, 17) != 17);
13
+	my $op = substr($hash, 0, 1);
14
+	$hash = substr($hash, 1);
15
+	if($op eq "A") {
16
+		$h{$hash} = 1;
17
+		$added++;
18
+	} elsif ($op eq "C") {
19
+		if(exists($h{$hash})) {
20
+			$found++;
21
+		} else {
22
+			$notfound++;
23
+		}
24
+	} else {
25
+		die "bad command $op\n";
26
+	}
27
+}
28
+
29
+my $lookups = $found + $notfound;
30
+print "added: $added\nlooked up: $lookups (found $found, not found $notfound)\n";
31
+printf "items in the hash: ".(scalar keys %h)."\n";
32
+
0 33
new file mode 100644
1 34
Binary files /dev/null and b/hashes.lzma differ
... ...
@@ -46,7 +46,7 @@ static mpool_t *mempool = NULL;
46 46
 
47 47
 #ifdef USE_LRUHASHCACHE
48 48
 struct cache_key {
49
-    char digest[16];
49
+    int64_t digest[2];
50 50
     uint32_t size; /* 0 is used to mark an empty hash slot! */
51 51
     struct cache_key *lru_next, *lru_prev;
52 52
 };
... ...
@@ -55,7 +55,9 @@ struct cache_set {
55 55
     struct cache_key *data;
56 56
     size_t capacity;
57 57
     size_t maxelements; /* considering load factor */
58
+    size_t maxdeleted;
58 59
     size_t elements;
60
+    size_t deleted;
59 61
     size_t version;
60 62
     struct cache_key *lru_head, *lru_tail;
61 63
 };
... ...
@@ -99,28 +101,41 @@ static void cacheset_lru_remove(struct cache_set *map, size_t howmany)
99 99
 	if (old == map->lru_tail)
100 100
 	    map->lru_tail = 0;
101 101
 	map->elements--;
102
+	map->deleted++;
102 103
     }
103 104
 }
104 105
 
105
-int cacheset_lookup_internal(struct cache_set *map, unsigned char *md5, size_t size, uint32_t *insert_pos, int deletedok)
106
+static inline int cacheset_lookup_internal(struct cache_set *map,
107
+					   const char *md5,  size_t size,
108
+					   uint32_t *insert_pos, int deletedok)
106 109
 {
107
-    uint32_t idx = cli_readint32(md5+8) & (map->capacity -1);
108
-    uint32_t tries = 0;
109
-    struct cache_key *k = &map->data[idx];
110
-    while (k->size != CACHE_KEY_EMPTY && tries < map->capacity) {
111
-	if (k->size == size &&
112
-	    !memcmp(k->digest, md5, 16)) {
110
+    const struct cache_key*data = map->data;
111
+    uint32_t capmask = map->capacity - 1;
112
+    const struct cache_key *k;
113
+    uint32_t idx, tries = 0;
114
+    uint64_t md5_0, md5_1;
115
+    uint64_t md5a[2];
116
+
117
+    memcpy(&md5a, md5, 16);
118
+    md5_0 = md5a[0];
119
+    md5_1 = md5a[1];
120
+    idx = md5_1 & capmask;
121
+    k = &data[idx];
122
+    while (k->size != CACHE_KEY_EMPTY && tries <= capmask) {
123
+	if (k->digest[0] == md5_0 &&
124
+	    k->digest[1] == md5_1 &&
125
+	    k->size == size) {
113 126
 	    /* found key */
114 127
 	    *insert_pos = idx;
115 128
 	    return 1;
116 129
 	}
117
-       if (deletedok && k->size == CACHE_KEY_DELETED) {
130
+	if (deletedok && k->size == CACHE_KEY_DELETED) {
118 131
            /* treat deleted slot as empty */
119 132
            *insert_pos = idx;
120 133
            return 0;
121
-       }
122
-	idx = (idx + tries++)&(map->capacity-1);
123
-	k = &map->data[idx];
134
+	}
135
+	idx = (idx + tries++) & capmask;
136
+	k = &data[idx];
124 137
     }
125 138
     /* found empty pos */
126 139
     *insert_pos = idx;
... ...
@@ -148,17 +163,52 @@ static inline void lru_addtail(struct cache_set *map, struct cache_key *newkey)
148 148
     map->lru_tail = newkey;
149 149
 }
150 150
 
151
+static pthread_mutex_t pool_mutex = PTHREAD_MUTEX_INITIALIZER;
152
+
153
+static void cacheset_add(struct cache_set *map, unsigned char *md5, size_t size);
154
+static int cacheset_init(struct cache_set *map, unsigned int entries);
155
+
156
+static void cacheset_rehash(struct cache_set *map)
157
+{
158
+    unsigned i;
159
+    int ret;
160
+    struct cache_set tmp_set;
161
+    struct cache_key *key;
162
+    pthread_mutex_lock(&pool_mutex);
163
+    ret = cacheset_init(&tmp_set, map->capacity);
164
+    pthread_mutex_unlock(&pool_mutex);
165
+    if (ret)
166
+	return;
167
+
168
+    key = map->lru_head;
169
+    for (i=0;key && i < tmp_set.maxelements/2;i++) {
170
+	cacheset_add(&tmp_set, (unsigned char*)&key->digest, key->size);
171
+	key = key->lru_next;
172
+    }
173
+    pthread_mutex_lock(&pool_mutex);
174
+    mpool_free(mempool, map->data);
175
+    pthread_mutex_unlock(&pool_mutex);
176
+    memcpy(map, &tmp_set, sizeof(tmp_set));
177
+}
178
+
151 179
 static void cacheset_add(struct cache_set *map, unsigned char *md5, size_t size)
152 180
 {
153 181
     int ret;
154 182
     uint32_t pos;
155 183
     struct cache_key *newkey;
156
-    if (map->elements >= map->maxelements)
184
+
185
+    if (map->elements >= map->maxelements) {
157 186
 	cacheset_lru_remove(map, 1);
187
+	if (map->deleted >= map->maxdeleted) {
188
+	    cacheset_rehash(map);
189
+	}
190
+    }
158 191
     assert(map->elements < map->maxelements);
159 192
 
160 193
     ret = cacheset_lookup_internal(map, md5, size, &pos, 1);
161 194
     newkey = &map->data[pos];
195
+    if (newkey->size == CACHE_KEY_DELETED)
196
+	map->deleted--;
162 197
     if (ret) {
163 198
 	/* was already added, remove from LRU list */
164 199
 	lru_remove(map, newkey);
... ...
@@ -178,6 +228,7 @@ static int cacheset_lookup(struct cache_set *map, unsigned char *md5, size_t siz
178 178
     struct cache_key *newkey;
179 179
     int ret;
180 180
     uint32_t pos;
181
+
181 182
     ret = cacheset_lookup_internal(map, md5, size, &pos, 0);
182 183
     if (!ret)
183 184
 	return CACHE_INVALID_VERSION;
... ...
@@ -185,17 +236,16 @@ static int cacheset_lookup(struct cache_set *map, unsigned char *md5, size_t siz
185 185
     /* update LRU position: move to tail */
186 186
     lru_remove(map, newkey);
187 187
     lru_addtail(map, newkey);
188
-
189 188
     return map->version;
190 189
 }
191 190
 
192
-
193 191
 static int cacheset_init(struct cache_set *map, unsigned int entries) {
194 192
     map->data = mpool_calloc(mempool, entries, sizeof(*map->data));
195 193
     if (!map->data)
196 194
 	return CL_EMEM;
197 195
     map->capacity = entries;
198 196
     map->maxelements = 80*entries / 100;
197
+    map->maxdeleted = map->capacity - map->maxelements - 1;
199 198
     map->elements = 0;
200 199
     map->version = CACHE_INVALID_VERSION;
201 200
     map->lru_head = map->lru_tail = NULL;
... ...
@@ -241,21 +291,86 @@ static int cacheset_init(struct cache_set *cs, unsigned int entries) {
241 241
     return 0;
242 242
 }
243 243
 
244
+/* static inline int64_t cmp(int64_t *a, int64_t *b) { */
245
+/*     int64_t ret = a[1] - b[1]; */
246
+/*     if(!ret) ret = a[0] - b[0]; */
247
+/*     return ret; */
248
+/* } */
249
+
244 250
 static inline int cmp(int64_t *a, int64_t *b) {
245
-    int64_t ret = a[1] - b[1];
246
-    if(!ret) ret = a[0] - b[0];
247
-    return ret;
251
+    if(a[1] < b[1]) return -1;
252
+    if(a[1] > b[1]) return 1;
253
+    if(a[0] == b[0]) return 0;
254
+    if(a[0] < b[0]) return -1;
255
+    return 1;
248 256
 }
249 257
 
258
+
259
+//#define PRINT_TREE
260
+#ifdef PRINT_TREE
261
+#define ptree printf
262
+#else
263
+#define ptree (void)
264
+#endif
265
+
266
+//#define CHECK_TREE
267
+#ifdef CHECK_TREE
268
+static int printtree(struct cache_set *cs, struct node *n, int d) {
269
+    int i;
270
+    int ab = 0;
271
+    if (n == NULL) return 0;
272
+    if(n == cs->root) ptree("--------------------------\n");
273
+    ab |= printtree(cs, n->right, d+1);
274
+    if(n->right) {
275
+	if(cmp(n->digest, n->right->digest) >= 0) {
276
+	    for (i=0; i<d; i++) ptree("        ");
277
+	    ptree("^^^^ %lld >= %lld - %lld\n", n->digest[1], n->right->digest[1], cmp(n->digest, n->right->digest));
278
+	    ab = 1;
279
+	}
280
+    }
281
+    for (i=0; i<d; i++) ptree("        ");
282
+    ptree("%08x(%02u)\n", n->digest[1]>>48, n - cs->data);
283
+    if(n->left) {
284
+	if(cmp(n->digest, n->left->digest) <= 0) {
285
+	    for (i=0; i<d; i++) ptree("        ");
286
+	    ptree("vvvv %lld <= %lld - %lld\n", n->digest[1], n->left->digest[1], cmp(n->digest, n->left->digest));
287
+	    ab = 1;
288
+	}
289
+    }
290
+    if(d){
291
+	if(!n->up) {
292
+	    ptree("no parent!\n");
293
+	    ab = 1;
294
+	} else {
295
+	    if(n->up->left != n && n->up->right != n) {
296
+		ptree("broken parent\n");
297
+		ab = 1;
298
+	    }
299
+	}
300
+    } else {
301
+	if(n->up) {
302
+	    ptree("root with a parent!\n");
303
+	    ab = 1;
304
+	}
305
+    }
306
+    ab |= printtree(cs, n->left, d+1);
307
+    return ab;
308
+}
309
+#else
310
+static inline int printtree(struct cache_set *cs, struct node *n, int d) {
311
+    return 0;
312
+}
313
+#endif
314
+
250 315
 static int splay(int64_t *md5, struct cache_set *cs) {
251 316
     struct node next = {{0, 0}, NULL, NULL, NULL, NULL, NULL, 0}, *right = &next, *left = &next, *temp, *root = cs->root;
252
-    int ret = 0;
317
+    int comp, found = 0;
253 318
 
254 319
     if(!root)
255 320
 	return 0;
256 321
 
257 322
     while(1) {
258
-	int comp = cmp(md5, root->digest);
323
+	comp = cmp(md5, root->digest);
259 324
 	if(comp < 0) {
260 325
 	    if(!root->left) break;
261 326
 	    if(cmp(md5, root->left->digest) < 0) {
... ...
@@ -287,7 +402,7 @@ static int splay(int64_t *md5, struct cache_set *cs) {
287 287
             left = root;
288 288
             root = root->right;
289 289
 	} else {
290
-	    ret = 1;
290
+	    found = 1;
291 291
 	    break;
292 292
 	}
293 293
     }
... ...
@@ -302,16 +417,78 @@ static int splay(int64_t *md5, struct cache_set *cs) {
302 302
     if(next.left) next.left->up = root;
303 303
     root->up = NULL;
304 304
     cs->root = root;
305
-
306
-    return ret;
305
+    return found;
307 306
 }
308 307
 
309
-
310 308
 static int cacheset_lookup(struct cache_set *cs, unsigned char *md5, size_t size) {
311 309
     int64_t hash[2];
312 310
 
313 311
     memcpy(hash, md5, 16);
314
-    return splay(hash, cs) * 1337;
312
+    if(splay(hash, cs)) {
313
+	struct node *o = cs->root->prev, *p = cs->root, *q = cs->root->next;
314
+#ifdef PRINT_CHAINS
315
+	printf("promoting %02d\n", p - cs->data);
316
+	{
317
+	    struct node *x = cs->first;
318
+	    printf("before: ");
319
+	    while(x) {
320
+		printf("%02d,", x - cs->data);
321
+		x=x->next;
322
+	    }
323
+	    printf(" --- ");
324
+	    x=cs->last;
325
+	    while(x) {
326
+		printf("%02d,", x - cs->data);
327
+		x=x->prev;
328
+	    }
329
+	    printf("\n");
330
+	}
331
+#endif
332
+#define TO_END_OF_CHAIN
333
+#ifdef TO_END_OF_CHAIN
334
+    	if(q) {
335
+	    if(o)
336
+		o->next = q;
337
+	    else
338
+		cs->first = q;
339
+	    q->prev = o;
340
+	    cs->last->next = p;
341
+	    p->prev = cs->last;
342
+	    p->next = NULL;
343
+	    cs->last = p;
344
+	}
345
+#else
346
+	if(cs->last != p) {
347
+	    if(cs->last == q) cs->last = p;
348
+	    if(o) o->next = q;
349
+	    else cs->first = q;
350
+	    p->next = q->next;
351
+	    if(q->next) q->next->prev = p;
352
+	    q->next = p;
353
+	    q->prev = o;
354
+	    p->prev = q;
355
+	}
356
+#endif
357
+#ifdef PRINT_CHAINS
358
+	{
359
+	    struct node *x = cs->first;
360
+	    printf("after : ");
361
+	    while(x) {
362
+		printf("%02d,", x - cs->data);
363
+		x=x->next;
364
+	    }
365
+	    printf(" --- ");
366
+	    x=cs->last;
367
+	    while(x) {
368
+		printf("%02d,", x - cs->data);
369
+		x=x->prev;
370
+	    }
371
+	    printf("\n");
372
+	}
373
+#endif
374
+	return 1337;
375
+    }
376
+    return 0;
315 377
 }
316 378
 
317 379
 static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size) {
... ...
@@ -322,39 +499,90 @@ static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size)
322 322
     if(splay(hash, cs))
323 323
 	return; /* Already there */
324 324
 
325
+    ptree("1:\n");
326
+    if(printtree(cs, cs->root, 0)) {
327
+	abort();
328
+    }
329
+
325 330
     newnode = cs->first;
331
+    //#define TAKE_FIRST
332
+#ifdef TAKE_FIRST
333
+    if((newnode->left || newnode->right || newnode->up)) {
334
+	if(!splay(newnode->digest, cs)) {
335
+	    cli_errmsg("WTF\n");
336
+	    abort();
337
+	}
338
+	if(!newnode->left) {
339
+	    cs->root = newnode->right;
340
+	    newnode->right->up = NULL;
341
+	} else if(!newnode->right) {
342
+	    cs->root = newnode->left;
343
+	    newnode->left->up = NULL;
344
+	} else {
345
+	    cs->root = newnode->left;
346
+	    newnode->left->up = NULL;
347
+	    if(splay(newnode->digest, cs)) {
348
+		cli_errmsg("WTF #2\n");
349
+		abort();
350
+	    }
351
+	    cs->root->up = NULL;
352
+	    cs->root->right = newnode->right;
353
+	    if(newnode->right) newnode->right->up = cs->root;
354
+	}
355
+	newnode->up = NULL;
356
+	newnode->right = NULL;
357
+	newnode->left = NULL;
358
+	if(splay(hash, cs)) {
359
+	    cli_errmsg("WTF #3\n");
360
+	    abort();
361
+	}
362
+    }
363
+    newnode->prev = cs->last;
364
+    cs->last->next = newnode;
365
+    cs->last = newnode;
366
+    newnode->next->prev = NULL;
367
+    cs->first = newnode->next;
368
+    newnode->next = NULL;
369
+
370
+#else
326 371
     while(newnode) {
327
-	if(!newnode->right && !newnode->left)
328
-	    break;
329
-	newnode = newnode->next;
372
+    	if(!newnode->right && !newnode->left)
373
+    	    break;
374
+    	newnode = newnode->next;
330 375
     }
331 376
     if(!newnode) {
332
-	cli_errmsg("NO NEWNODE!\n");
333
-	abort();
377
+    	cli_errmsg("NO NEWNODE!\n");
378
+    	abort();
334 379
     }
335 380
     if(newnode->up) {
336
-	if(newnode->up->left == newnode)
337
-	    newnode->up->left = NULL;
338
-	else
339
-	    newnode->up->right = NULL;
381
+    	if(newnode->up->left == newnode)
382
+    	    newnode->up->left = NULL;
383
+    	else
384
+    	    newnode->up->right = NULL;
340 385
     }
341 386
     if(newnode->prev)
342
-	newnode->prev->next = newnode->next;
387
+    	newnode->prev->next = newnode->next;
343 388
     if(newnode->next)
344
-	newnode->next->prev = newnode->prev;
389
+    	newnode->next->prev = newnode->prev;
345 390
     if(cs->first == newnode)
346
-	cs->first = newnode->next;
391
+    	cs->first = newnode->next;
347 392
 
348 393
     newnode->prev = cs->last;
349 394
     newnode->next = NULL;
350 395
     cs->last->next = newnode;
351 396
     cs->last = newnode;
397
+#endif
398
+
399
+    ptree("2:\n");
400
+    if(printtree(cs, cs->root, 0)) {
401
+	abort();
402
+    }
352 403
 
353 404
     if(!cs->root) {
354 405
 	newnode->left = NULL;
355 406
 	newnode->right = NULL;
356 407
     } else {
357
-	if(cmp(hash, cs->root->digest)) {
408
+	if(cmp(hash, cs->root->digest) < 0) {
358 409
 	    newnode->left = cs->root->left;
359 410
 	    newnode->right = cs->root;
360 411
 	    cs->root->left = NULL;
... ...
@@ -370,14 +598,19 @@ static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size)
370 370
     newnode->digest[1] = hash[1];
371 371
     newnode->up = NULL;
372 372
     cs->root = newnode;
373
+
374
+    ptree("3: %lld\n", hash[1]);
375
+    if(printtree(cs, cs->root, 0)) {
376
+	abort();
377
+    }
373 378
 }
374 379
 #endif /* USE_SPLAY */
375 380
 
376
-/* #define TREES 1 */
377
-/* static inline unsigned int getkey(uint8_t *hash) { return 0; } */
381
+#define TREES 1
382
+static inline unsigned int getkey(uint8_t *hash) { return 0; }
378 383
 
379
-#define TREES 256
380
-static inline unsigned int getkey(uint8_t *hash) { return *hash; }
384
+/* #define TREES 256 */
385
+/* static inline unsigned int getkey(uint8_t *hash) { return *hash; } */
381 386
 
382 387
 /* #define TREES 4096 */
383 388
 /* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | ((unsigned int)(hash[1] & 0xf)<<8) ; } */