Browse code

TEST: drop the per size hashtable and size prefilter which doesn't help at all arrange hashes in 4 groups currently on glibc allocator as mpool needs retuning

aCaB authored on 2011/01/10 07:45:37
Showing 2 changed files
... ...
@@ -31,7 +31,7 @@ int hm_addhash(struct cli_matcher *root, const char *hash, uint32_t size, const
31 31
     struct cli_sz_hash *szh;
32 32
     struct cli_htu32 *ht;
33 33
     enum CLI_HASH_TYPE type;
34
-    uint8_t binhash[32];
34
+    uint8_t binhash[32 + 4];
35 35
     int hashlen, i;
36 36
 
37 37
     if(!root || !hash) {
... ...
@@ -65,32 +65,9 @@ int hm_addhash(struct cli_matcher *root, const char *hash, uint32_t size, const
65 65
     }
66 66
 
67 67
     hashlen /= 2;
68
-    ht = &root->hm.sizehashes[type];
69
-    if(!root->hm.htinint[type]) {
70
-	i = cli_htu32_init(ht, 64, root->mempool);
71
-	if(i) return i;
72
-	root->hm.htinint[type] = 1;
73
-    }
74
-
75
-    item = cli_htu32_find(ht, size);
76
-    if(!item) {
77
-	struct cli_htu32_element htitem;
78
-	szh = mpool_calloc(root->mempool, 1, sizeof(*szh));
79
-	if(!szh) {
80
-	    cli_errmsg("hm_addhash: failed to allocate size hash\n");
81
-	    return CL_EMEM;
82
-	}
83
-
84
-	htitem.key = size;
85
-	htitem.data.as_ptr = szh;
86
-	i = cli_htu32_insert(ht, &htitem, root->mempool);
87
-	if(i) {
88
-	    cli_errmsg("ht_addhash: failed to add item to hashtab");
89
-	    mpool_free(root->mempool, szh);
90
-	    return i;
91
-	}
92
-    } else
93
-	szh = (struct cli_sz_hash *)item->data.as_ptr;
68
+    *(uint32_t *)&binhash[hashlen] = size;
69
+    hashlen += 4;
70
+    szh = &root->hm.sizehashes[type][*binhash % HM_NUM_ENTRIES];
94 71
 
95 72
     if(szh->items == szh->max) {
96 73
 	if(!szh->max)
... ...
@@ -98,13 +75,15 @@ int hm_addhash(struct cli_matcher *root, const char *hash, uint32_t size, const
98 98
 	else
99 99
 	    szh->max += 1 + szh->max / 2;
100 100
 
101
-	szh->hash_array = mpool_realloc2(root->mempool, szh->hash_array, hashlen * szh->max);
101
+	//szh->hash_array = mpool_realloc2(root->mempool, szh->hash_array, hashlen * szh->max);
102
+	szh->hash_array = realloc(szh->hash_array, hashlen * szh->max);
102 103
 	if(!szh->hash_array) {
103 104
 	    cli_errmsg("ht_add: failed to grow hash array to %u entries\n", szh->max);
104 105
 	    return CL_EMEM;
105 106
 	}
106 107
 
107
-	szh->virusnames = mpool_realloc2(root->mempool, szh->virusnames, sizeof(*szh->virusnames) * szh->max);
108
+	//szh->virusnames = mpool_realloc2(root->mempool, szh->virusnames, sizeof(*szh->virusnames) * szh->max);
109
+	szh->virusnames = realloc(szh->virusnames, sizeof(*szh->virusnames) * szh->max);
108 110
 	if(!szh->virusnames) {
109 111
 	    cli_errmsg("ht_add: failed to grow virusname array to %u entries\n", szh->max);
110 112
 	    return CL_EMEM;
... ...
@@ -114,16 +93,16 @@ int hm_addhash(struct cli_matcher *root, const char *hash, uint32_t size, const
114 114
     memcpy(&szh->hash_array[szh->items * hashlen], binhash, hashlen);
115 115
     szh->virusnames[szh->items] = virusname;
116 116
     szh->items++;
117
-    
117
+
118 118
     return 0;
119 119
 }
120 120
 
121 121
 
122 122
 
123 123
 static const unsigned int hashlen[] = {
124
-    16, /* CLI_HASH_MD5 */
125
-    20, /* CLI_HASH_SHA1 */
126
-    32, /* CLI_HASH_SHA256 */
124
+    16 + 4, /* CLI_HASH_MD5 */
125
+    20 + 4, /* CLI_HASH_SHA1 */
126
+    32 + 4, /* CLI_HASH_SHA256 */
127 127
 };
128 128
 
129 129
 
... ...
@@ -135,7 +114,7 @@ static inline int hm_cmp(const uint8_t *itm, const uint8_t *ref, unsigned int ke
135 135
 }
136 136
 
137 137
 void hm_sort(struct cli_sz_hash *szh, size_t l, size_t r, unsigned int keylen) {
138
-    uint8_t piv[32], tmph[32];
138
+    uint8_t piv[32 + 4], tmph[32 + 4];
139 139
     size_t l1, r1;
140 140
 
141 141
     const char *tmpv;
... ...
@@ -181,21 +160,19 @@ void hm_flush(struct cli_matcher *root) {
181 181
 	return;
182 182
 
183 183
     for(type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {
184
-	struct cli_htu32 *ht = &root->hm.sizehashes[type];
185
-	const struct cli_htu32_element *item = NULL;
186
-
187
-	if(!root->hm.htinint[type])
188
-	    continue;
189
-
190
-	while((item = cli_htu32_next(ht, item))) {
191
-	    struct cli_sz_hash *szh = (struct cli_sz_hash *)item->data.as_ptr;
184
+	int i;
185
+	for(i=0; i<HM_NUM_ENTRIES; i++) {
186
+	    struct cli_sz_hash *szh = szh = &root->hm.sizehashes[type][i];
192 187
 	    unsigned int keylen = hashlen[type];
188
+	    //cli_errmsg("type %u - entry %u => %u items\n", type, i, szh->items);
193 189
 
194 190
 	    if(szh->items != szh->max) {
195 191
 		void *p;
196
-		p = mpool_realloc(root->mempool, szh->hash_array, keylen * szh->items);
192
+		//p = mpool_realloc(root->mempool, szh->hash_array, keylen * szh->items);
193
+		p = realloc(szh->hash_array, keylen * szh->items);
197 194
 		if(p) szh->hash_array = p;
198
-		p = mpool_realloc(root->mempool, szh->virusnames, sizeof(*szh->virusnames) * szh->items);
195
+		//p = mpool_realloc(root->mempool, szh->virusnames, sizeof(*szh->virusnames) * szh->items);
196
+		p = realloc(szh->virusnames, sizeof(*szh->virusnames) * szh->items);
199 197
 		if(p) szh->virusnames = p;
200 198
 		szh->max = szh->items;
201 199
 	    }
... ...
@@ -207,30 +184,31 @@ void hm_flush(struct cli_matcher *root) {
207 207
 
208 208
 
209 209
 int cli_hm_have_size(const struct cli_matcher *root, enum CLI_HASH_TYPE type, uint32_t size) {
210
-    return (size && size != 0xffffffff && root && root->hm.htinint[type] && cli_htu32_find(&root->hm.sizehashes[type], size));
210
+    return 1;
211 211
 }
212 212
 
213 213
 int cli_hm_scan(const unsigned char *digest, uint32_t size, const char **virname, const struct cli_matcher *root, enum CLI_HASH_TYPE type) {
214
-    const struct cli_htu32_element *item;
215 214
     unsigned int keylen;
216 215
     struct cli_sz_hash *szh;
216
+    uint8_t tmph[32 + 4];
217 217
     size_t l, r;
218 218
 
219
-    if(!digest || !size || size == 0xffffffff || !root || !root->hm.htinint[type])
219
+    if(!digest || !size || size == 0xffffffff || !root)
220 220
 	return CL_CLEAN;
221 221
 
222
-    item = cli_htu32_find(&root->hm.sizehashes[type], size);
223
-    if(!item)
222
+    szh = &root->hm.sizehashes[type][*digest % HM_NUM_ENTRIES];
223
+    if(!szh->items)
224 224
 	return CL_CLEAN;
225 225
 
226
-    szh = (struct cli_sz_hash *)item->data.as_ptr;
227 226
     keylen = hashlen[type];
227
+    memcpy(tmph, digest, keylen - 4);
228
+    *(uint32_t *)&tmph[keylen - 4] = size;
228 229
 
229 230
     l = 0;
230 231
     r = szh->items;
231 232
     while(l <= r) {
232 233
 	size_t c = (l + r) / 2;
233
-	int res = hm_cmp(digest, &szh->hash_array[keylen * c], keylen);
234
+	int res = hm_cmp(tmph, &szh->hash_array[keylen * c], keylen);
234 235
 
235 236
 	if(res < 0) {
236 237
 	    if(!c)
... ...
@@ -38,16 +38,17 @@ enum CLI_HASH_TYPE {
38 38
 };
39 39
 
40 40
 struct cli_sz_hash {
41
-    uint8_t *hash_array; /* FIXME: make 256 entries? */
41
+    uint8_t *hash_array;
42 42
     const char **virusnames;
43 43
     uint32_t items;
44 44
     uint32_t max;
45 45
 };
46 46
 
47 47
 
48
+#define HM_NUM_ENTRIES 4
49
+
48 50
 struct cli_hash_patt {
49
-    struct cli_htu32 sizehashes[CLI_HASH_AVAIL_TYPES];
50
-    int htinint[CLI_HASH_AVAIL_TYPES];
51
+    struct cli_sz_hash sizehashes[CLI_HASH_AVAIL_TYPES][HM_NUM_ENTRIES];
51 52
 };
52 53
 
53 54