Browse code

Merge branch 'cache'

aCaB authored on 2010/01/05 22:56:33
Showing 11 changed files
... ...
@@ -306,6 +306,8 @@ int main(int argc, char **argv)
306 306
 	break;
307 307
     }
308 308
 
309
+    cache_init(256);
310
+
309 311
     if(!(engine = cl_engine_new())) {
310 312
 	logg("!Can't initialize antivirus engine\n");
311 313
 	ret = 1;
... ...
@@ -144,6 +144,8 @@ int main(int argc, char **argv)
144 144
 
145 145
     gettimeofday(&t1, NULL);
146 146
 
147
+    cache_init(256);
148
+
147 149
     ret = scanmanager(opts);
148 150
 
149 151
     if(!optget(opts, "no-summary")->enabled) {
... ...
@@ -346,6 +346,8 @@ libclamav_la_SOURCES = \
346 346
 	bytecode_api.h \
347 347
 	bytecode_api_impl.h \
348 348
 	bytecode_hooks.h
349
+	cache.c \
350
+	cache.h
349 351
 
350 352
 if !LINK_TOMMATH
351 353
 libclamav_la_SOURCES += bignum.c \
... ...
@@ -2231,6 +2231,8 @@ uninstall-am: uninstall-includeHEADERS uninstall-libLTLIBRARIES
2231 2231
 	tags tags-recursive uninstall uninstall-am \
2232 2232
 	uninstall-includeHEADERS uninstall-libLTLIBRARIES
2233 2233
 
2234
+	cache.c \
2235
+	cache.h
2234 2236
 
2235 2237
 .PHONY: version.h.tmp
2236 2238
 version.c: version.h
2237 2239
new file mode 100644
... ...
@@ -0,0 +1,187 @@
0
+#include <string.h>
1
+#include <stdlib.h>
2
+#include <pthread.h>
3
+
4
+#include "md5.h"
5
+#include "mpool.h"
6
+#include "clamav.h"
7
+#include "cache.h"
8
+#include "fmap.h"
9
+
10
+#if HAVE_CONFIG_H
11
+#include "clamav-config.h"
12
+#endif
13
+
14
+#define CACHE_PERTURB 10
15
+/* 1/10th */
16
+
17
+static mpool_t *mempool = NULL;
18
+static struct CACHE {
19
+    struct CACHE_ENTRY {
20
+	unsigned char hash[15];
21
+	uint32_t dbver;
22
+	uint32_t hits;
23
+    } *items;
24
+    pthread_mutex_t mutex;
25
+    uint32_t lastdb;
26
+} *cache = NULL;
27
+static unsigned int cache_entries = 0;
28
+
29
+int cl_cache_init(unsigned int entries) {
30
+    unsigned int i;
31
+
32
+    if(!(mempool = mpool_create())) {
33
+	cli_errmsg("mpool init fail\n");
34
+	return 1;
35
+    }
36
+    if(!(cache = mpool_malloc(mempool, sizeof(struct CACHE) * 256))) {
37
+	cli_errmsg("mpool malloc fail\n");
38
+	mpool_destroy(mempool);
39
+	return 1;
40
+    }
41
+
42
+    for(i=0; i<256; i++) {
43
+	struct CACHE_ENTRY *e = mpool_calloc(mempool, sizeof(struct CACHE_ENTRY), entries);
44
+	if(!e) {
45
+	    cli_errmsg("mpool calloc fail\n");
46
+	    mpool_destroy(mempool);
47
+	    return 1;
48
+	}
49
+	cache[i].items = e;
50
+	cache[i].lastdb = 0;
51
+	if(pthread_mutex_init(&cache[i].mutex, NULL)) {
52
+	    cli_errmsg("mutex init fail\n");
53
+	    mpool_destroy(mempool);
54
+	    return 1;
55
+	}
56
+    }
57
+    cache_entries = entries;
58
+    return 0;
59
+}
60
+
61
+void cache_swap(struct CACHE_ENTRY *e, unsigned int a) {
62
+    struct CACHE_ENTRY t;
63
+    unsigned int b = a-1;
64
+
65
+    if(!a || e[a].hits <= e[b].hits)
66
+	return;
67
+
68
+    do {
69
+	if(e[a].hits > e[b].hits)
70
+	    continue;
71
+	break;
72
+    } while(b--);
73
+    b++;
74
+
75
+    memcpy(&t, &e[a], sizeof(t));
76
+    memcpy(&e[a], &e[b], sizeof(t));
77
+    memcpy(&e[b], &t, sizeof(t));
78
+}
79
+
80
+static void updb(uint32_t db, unsigned int skip) {
81
+    unsigned int i;
82
+    for(i=0; i<256; i++) {
83
+	if(i==skip) continue;
84
+	if(pthread_mutex_lock(&cache[i].mutex)) {
85
+	    cli_errmsg("mutex lock fail\n");
86
+	    continue;
87
+	}
88
+	cache[i].lastdb = db;
89
+	pthread_mutex_unlock(&cache[i].mutex);	
90
+    }
91
+}
92
+
93
+static int cache_lookup_hash(unsigned char *md5, cli_ctx *ctx) {
94
+    unsigned int i;
95
+    int ret = CL_VIRUS;
96
+    struct CACHE_ENTRY *e;
97
+    struct CACHE *c;
98
+
99
+    if(!cache) return ret;
100
+
101
+    c = &cache[*md5];
102
+    e = c->items;
103
+    if(pthread_mutex_lock(&c->mutex)) {
104
+	cli_errmsg("mutex lock fail\n");
105
+	return ret;
106
+    }
107
+    if(c->lastdb <= ctx->engine->dbversion[0]) {
108
+	if(c->lastdb < ctx->engine->dbversion[0]) {
109
+	    c->lastdb = ctx->engine->dbversion[0];
110
+	    updb(c->lastdb, *md5);
111
+	} else {
112
+	    for(i=0; i<cache_entries; i++) {
113
+		if(!e[i].hits) break;
114
+		if(e[i].dbver == c->lastdb && !memcmp(e[i].hash, md5 + 1, 15)) {
115
+		    e[i].hits++;
116
+		    cache_swap(e, i);
117
+		    ret = CL_CLEAN;
118
+		    cli_warnmsg("cached\n");
119
+		    break;
120
+		}
121
+	    }
122
+	}
123
+    }
124
+    pthread_mutex_unlock(&c->mutex);
125
+    return ret;
126
+}
127
+
128
+void cache_add(unsigned char *md5, cli_ctx *ctx) {
129
+    unsigned int i, replace;
130
+    struct CACHE_ENTRY *e;
131
+    struct CACHE *c;
132
+
133
+    if(!cache) return;
134
+
135
+    c = &cache[*md5];
136
+    e = c->items;
137
+    if(pthread_mutex_lock(&c->mutex)) {
138
+	cli_errmsg("mutex lock fail\n");
139
+	return;
140
+    }
141
+    if(c->lastdb == ctx->engine->dbversion[0]) {
142
+	replace = cache_entries;
143
+	for(i=0; i<cache_entries; i++) {
144
+	    if(!e[i].hits) break;
145
+	    if(replace == cache_entries && e[i].dbver < c->lastdb) {
146
+		replace = i;
147
+	    } else if(e[i].hits && !memcmp(e[i].hash, md5 + 1, 15)) {
148
+		e[i].hits++;
149
+		cache_swap(e, i);
150
+		pthread_mutex_unlock(&c->mutex);
151
+		return;
152
+	    }
153
+	}
154
+	if(replace == cache_entries)
155
+	    replace = cache_entries - 1 - (rand() % (cache_entries / CACHE_PERTURB));
156
+	e[replace].hits = 1;
157
+	e[replace].dbver = c->lastdb;
158
+	memcpy(e[replace].hash, md5 + 1, 15);
159
+	cache_swap(e, replace);
160
+    }
161
+    pthread_mutex_unlock(&c->mutex);
162
+    return;
163
+}
164
+
165
+int cache_check(unsigned char *hash, cli_ctx *ctx) {
166
+    fmap_t *map = *ctx->fmap;
167
+    size_t todo = map->len, at = 0;;
168
+    cli_md5_ctx md5;
169
+
170
+    if(!cache) return CL_VIRUS;
171
+
172
+    cli_md5_init(&md5);
173
+    while(todo) {
174
+	void *buf;
175
+	size_t readme = todo < FILEBUFF ? todo : FILEBUFF;
176
+	if(!(buf = fmap_need_off_once(map, at, readme)) != readme) {
177
+	    lseek(desc, seekback, SEEK_SET);
178
+	    return CL_VIRUS;
179
+	}
180
+	todo -= readme;
181
+	at += readme;
182
+	cli_md5_update(&md5, buf, readme);
183
+    }
184
+    cli_md5_final(hash, &md5);
185
+    return cache_lookup_hash(hash, ctx);
186
+}
0 187
new file mode 100644
... ...
@@ -0,0 +1,30 @@
0
+/*
1
+ *  Copyright (C) 2010 Sourcefire, Inc.
2
+ *
3
+ *  Authors: aCaB <acab@clamav.net>
4
+ *
5
+ *  This program is free software; you can redistribute it and/or modify
6
+ *  it under the terms of the GNU General Public License version 2 as
7
+ *  published by the Free Software Foundation.
8
+ *
9
+ *  This program is distributed in the hope that it will be useful,
10
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+ *  GNU General Public License for more details.
13
+ *
14
+ *  You should have received a copy of the GNU General Public License
15
+ *  along with this program; if not, write to the Free Software
16
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17
+ *  MA 02110-1301, USA.
18
+ */
19
+
20
+#ifndef __CACHE_H
21
+#define __CACHE_H
22
+
23
+#include "clamav.h"
24
+#include "others.h"
25
+
26
+void cache_add(unsigned char *md5, cli_ctx *ctx);
27
+int cache_check(unsigned char *hash, cli_ctx *ctx);
28
+
29
+#endif
... ...
@@ -208,6 +208,7 @@ extern const char *cl_retver(void);
208 208
 
209 209
 /* others */
210 210
 extern const char *cl_strerror(int clerror);
211
+extern int cl_cache_init(unsigned int entries);
211 212
 
212 213
 #ifdef __cplusplus
213 214
 }
... ...
@@ -27,6 +27,7 @@ CLAMAV_PUBLIC {
27 27
     cl_statfree;
28 28
     cl_statinidir;
29 29
     cl_strerror;
30
+    cl_cache_init;
30 31
   local:
31 32
     *;
32 33
 };
... ...
@@ -344,7 +344,7 @@ int cli_scandesc(int desc, cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struc
344 344
     fmap_t *map = *ctx->fmap;
345 345
 
346 346
     if((*ctx->fmap = fmap(desc, 0, 0))) {
347
-	ret = cli_fmap_scandesc(ctx, ftype, ftonly, ftoffset, acmode);
347
+	ret = cli_fmap_scandesc(ctx, ftype, ftonly, ftoffset, acmode, NULL);
348 348
 	funmap(*ctx->fmap);
349 349
     }
350 350
     *ctx->fmap = map;
... ...
@@ -352,7 +352,7 @@ int cli_scandesc(int desc, cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struc
352 352
 }
353 353
 
354 354
 
355
-int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli_matched_type **ftoffset, unsigned int acmode)
355
+int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli_matched_type **ftoffset, unsigned int acmode, unsigned char *refhash)
356 356
 {
357 357
  	unsigned char *buff;
358 358
 	int ret = CL_CLEAN, type = CL_CLEAN, bytes;
... ...
@@ -420,7 +420,7 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
420 420
 	}
421 421
     }
422 422
 
423
-    if(!ftonly && ctx->engine->md5_hdb)
423
+    if(!refhash && !ftonly && ctx->engine->md5_hdb)
424 424
 	cli_md5_init(&md5ctx);
425 425
 
426 426
     while(offset < map->len) {
... ...
@@ -459,7 +459,7 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
459 459
 		    type = ret;
460 460
 	    }
461 461
 
462
-	    if(ctx->engine->md5_hdb)
462
+	    if(!refhash && ctx->engine->md5_hdb)
463 463
 		cli_md5_update(&md5ctx, buff + maxpatlen * (offset!=0), bytes - maxpatlen * (offset!=0));
464 464
 	}
465 465
 
... ...
@@ -534,8 +534,11 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
534 534
 
535 535
     if(!ftonly && ctx->engine->md5_hdb) {
536 536
 	    const struct cli_bm_patt *patt;
537
-	cli_md5_final(digest, &md5ctx);
538
-	if(cli_bm_scanbuff(digest, 16, ctx->virname, &patt, ctx->engine->md5_hdb, 0, NULL, NULL) == CL_VIRUS && patt->filesize == map->len && (cli_bm_scanbuff(digest, 16, NULL, &patt, ctx->engine->md5_fp, 0, NULL, NULL) != CL_VIRUS || patt->filesize != map->len))
537
+	if(!refhash) {
538
+	    cli_md5_final(digest, &md5ctx);
539
+	    refhash = digest;
540
+	}
541
+	if(cli_bm_scanbuff(refhash, 16, ctx->virname, &patt, ctx->engine->md5_hdb, 0, NULL, NULL) == CL_VIRUS && patt->filesize == map->len && (cli_bm_scanbuff(refhash, 16, NULL, &patt, ctx->engine->md5_fp, 0, NULL, NULL) != CL_VIRUS || patt->filesize != map->len))
539 542
 	    return CL_VIRUS;
540 543
     }
541 544
 
... ...
@@ -146,8 +146,7 @@ struct cli_target_info {
146 146
 int cli_scanbuff(const unsigned char *buffer, uint32_t length, uint32_t offset, cli_ctx *ctx, cli_file_t ftype, struct cli_ac_data **acdata);
147 147
 
148 148
 int cli_scandesc(int desc, cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli_matched_type **ftoffset, unsigned int acmode);
149
-int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli_matched_type **ftoffset, unsigned int acmode);
150
-
149
+int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli_matched_type **ftoffset, unsigned int acmode, unsigned char *digest);
151 150
 int cli_caloff(const char *offstr, struct cli_target_info *info, fmap_t *map, unsigned int target, uint32_t *offdata, uint32_t *offset_min, uint32_t *offset_max);
152 151
 
153 152
 int cli_checkfp(int fd, cli_ctx *ctx);
... ...
@@ -85,6 +85,7 @@
85 85
 #include "ishield.h"
86 86
 #include "7z.h"
87 87
 #include "fmap.h"
88
+#include "cache.h"
88 89
 
89 90
 #ifdef HAVE_BZLIB_H
90 91
 #include <bzlib.h>
... ...
@@ -1691,7 +1692,7 @@ static int cli_scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_file_
1691 1691
     if(typercg)
1692 1692
 	acmode |= AC_SCAN_FT;
1693 1693
 
1694
-    ret = cli_fmap_scandesc(ctx, type == CL_TYPE_TEXT_ASCII ? 0 : type, 0, &ftoffset, acmode);
1694
+    ret = cli_fmap_scandesc(ctx, type == CL_TYPE_TEXT_ASCII ? 0 : type, 0, &ftoffset, acmode, NULL);
1695 1695
 
1696 1696
     if(ret >= CL_TYPENO) {
1697 1697
 	ctx->recursion++;
... ...
@@ -1840,6 +1841,7 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
1840 1840
 	struct stat sb;
1841 1841
 	uint8_t typercg = 1;
1842 1842
 	cli_file_t current_container = ctx->container_type; /* TODO: container tracking code TBD - bb#1293 */
1843
+	unsigned char hash[16];
1843 1844
 
1844 1845
     if(ctx->engine->maxreclevel && ctx->recursion > ctx->engine->maxreclevel) {
1845 1846
         cli_dbgmsg("cli_magic_scandesc: Archive recursion limit exceeded (%u, max: %u)\n", ctx->recursion, ctx->engine->maxreclevel);
... ...
@@ -1875,15 +1877,23 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
1875 1875
 	return CL_EMEM;
1876 1876
     }
1877 1877
 
1878
+    if(cache_check(hash, ctx) == CL_CLEAN)
1879
+	return CL_CLEAN;
1880
+    
1878 1881
     if(!ctx->options || (ctx->recursion == ctx->engine->maxreclevel)) { /* raw mode (stdin, etc.) or last level of recursion */
1879 1882
 	if(ctx->recursion == ctx->engine->maxreclevel)
1880 1883
 	    cli_dbgmsg("cli_magic_scandesc: Hit recursion limit, only scanning raw file\n");
1881 1884
 	else
1882 1885
 	    cli_dbgmsg("Raw mode: No support for special files\n");
1883
-	if((ret = cli_fmap_scandesc(ctx, 0, 0, NULL, AC_SCAN_VIR)) == CL_VIRUS)
1886
+
1887
+	if((ret = cli_fmap_scandesc(ctx, 0, 0, NULL, AC_SCAN_VIR, hash)) == CL_VIRUS)
1884 1888
 	    cli_dbgmsg("%s found in descriptor %d\n", *ctx->virname, desc);
1889
+	else
1890
+	    cache_add(hash, ctx);
1891
+
1885 1892
 	funmap(*ctx->fmap);
1886 1893
 	ctx->fmap--; 
1894
+
1887 1895
 	return ret;
1888 1896
     }
1889 1897
 
... ...
@@ -2167,8 +2177,10 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
2167 2167
 	case CL_EMAXSIZE:
2168 2168
 	case CL_EMAXFILES:
2169 2169
 	    cli_dbgmsg("Descriptor[%d]: %s\n", desc, cl_strerror(ret));
2170
+	    cache_add(hash, ctx);
2170 2171
 	    return CL_CLEAN;
2171 2172
 	default:
2173
+	    if(ret == CL_CLEAN) cache_add(hash, ctx);
2172 2174
 	    return ret;
2173 2175
     }
2174 2176
 }