Browse code

libclamav: improve loading speed of compressed databases (bb#1105)

Tomasz Kojm authored on 2009/07/29 03:23:31
Showing 5 changed files
... ...
@@ -1,3 +1,7 @@
1
+Tue Jul 28 20:19:08 CEST 2009 (tk)
2
+----------------------------------
3
+ * libclamav: improve loading speed of compressed databases (bb#1105)
4
+
1 5
 Mon Jul 27 13:53:15 CEST 2009 (tk)
2 6
 ----------------------------------
3 7
  * libclamav/macho.c: improve detection of Universal Binaries
... ...
@@ -1,5 +1,5 @@
1 1
 /*
2
- *  Copyright (C) 2007-2008 Sourcefire, Inc.
2
+ *  Copyright (C) 2007-2009 Sourcefire, Inc.
3 3
  *
4 4
  *  Authors: Tomasz Kojm
5 5
  *
... ...
@@ -41,6 +41,7 @@
41 41
 #include "str.h"
42 42
 #include "cvd.h"
43 43
 #include "readdb.h"
44
+#include "default.h"
44 45
 
45 46
 #define TAR_BLOCKSIZE 512
46 47
 
... ...
@@ -227,6 +228,17 @@ static int cli_tgzload(int fd, struct cl_engine *engine, unsigned int *signo, un
227 227
 	dbio.gzs = NULL;
228 228
     }
229 229
 
230
+    dbio.bufsize = CLI_DEFAULT_DBIO_BUFSIZE;
231
+    dbio.buf = cli_malloc(dbio.bufsize);
232
+    if(!dbio.buf) {
233
+	cli_errmsg("cli_tgzload: Can't allocate memory for dbio.buf\n");
234
+	CLOSE_DBIO;
235
+	return CL_EMALFDB;
236
+    }
237
+    dbio.bufpt = NULL;
238
+    dbio.usebuf = 1;
239
+    dbio.readpt = dbio.buf;
240
+
230 241
     while(1) {
231 242
 
232 243
 	if(compr)
... ...
@@ -239,6 +251,7 @@ static int cli_tgzload(int fd, struct cl_engine *engine, unsigned int *signo, un
239 239
 
240 240
 	if(nread != TAR_BLOCKSIZE) {
241 241
 	    cli_errmsg("cli_tgzload: Incomplete block read\n");
242
+	    free(dbio.buf);
242 243
 	    CLOSE_DBIO;
243 244
 	    return CL_EMALFDB;
244 245
 	}
... ...
@@ -251,6 +264,7 @@ static int cli_tgzload(int fd, struct cl_engine *engine, unsigned int *signo, un
251 251
 
252 252
 	if(strchr(name, '/')) {
253 253
 	    cli_errmsg("cli_tgzload: Slash separators are not allowed in CVD\n");
254
+	    free(dbio.buf);
254 255
 	    CLOSE_DBIO;
255 256
 	    return CL_EMALFDB;
256 257
 	}
... ...
@@ -263,10 +277,12 @@ static int cli_tgzload(int fd, struct cl_engine *engine, unsigned int *signo, un
263 263
 		break;
264 264
 	    case '5':
265 265
 		cli_errmsg("cli_tgzload: Directories are not supported in CVD\n");
266
+		free(dbio.buf);
266 267
 		CLOSE_DBIO;
267 268
 		return CL_EMALFDB;
268 269
 	    default:
269 270
 		cli_errmsg("cli_tgzload: Unknown type flag '%c'\n", type);
271
+		free(dbio.buf);
270 272
 		CLOSE_DBIO;
271 273
 		return CL_EMALFDB;
272 274
 	}
... ...
@@ -276,10 +292,14 @@ static int cli_tgzload(int fd, struct cl_engine *engine, unsigned int *signo, un
276 276
 
277 277
 	if((sscanf(osize, "%o", &size)) == 0) {
278 278
 	    cli_errmsg("cli_tgzload: Invalid size in header\n");
279
+	    free(dbio.buf);
279 280
 	    CLOSE_DBIO;
280 281
 	    return CL_EMALFDB;
281 282
 	}
282 283
 	dbio.size = size;
284
+	dbio.readsize = dbio.size < dbio.bufsize ? dbio.size : dbio.bufsize - 1;
285
+	dbio.bufpt = NULL;
286
+	dbio.readpt = dbio.buf;
283 287
 
284 288
 	/* cli_dbgmsg("cli_tgzload: Loading %s, size: %u\n", name, size); */
285 289
 	if(compr)
... ...
@@ -291,6 +311,7 @@ static int cli_tgzload(int fd, struct cl_engine *engine, unsigned int *signo, un
291 291
 	    ret = cli_load(name, engine, signo, options, &dbio);
292 292
 	    if(ret) {
293 293
 		cli_errmsg("cli_tgzload: Can't load %s\n", name);
294
+		free(dbio.buf);
294 295
 		CLOSE_DBIO;
295 296
 		return CL_EMALFDB;
296 297
 	    }
... ...
@@ -309,6 +330,7 @@ static int cli_tgzload(int fd, struct cl_engine *engine, unsigned int *signo, un
309 309
 	}
310 310
     }
311 311
 
312
+    free(dbio.buf);
312 313
     CLOSE_DBIO;
313 314
     return CL_SUCCESS;
314 315
 }
... ...
@@ -1,5 +1,5 @@
1 1
 /*
2
- *  Copyright (C) 2007-2008 Sourcefire, Inc.
2
+ *  Copyright (C) 2007-2009 Sourcefire, Inc.
3 3
  *
4 4
  *  Authors: Tomasz Kojm
5 5
  *
... ...
@@ -29,6 +29,9 @@ struct cli_dbio {
29 29
     gzFile *gzs;
30 30
     FILE *fs;
31 31
     unsigned int size;
32
+    char *buf, *bufpt, *readpt;
33
+    unsigned int usebuf, bufsize, readsize;
34
+
32 35
 };
33 36
 
34 37
 int cli_cvdload(FILE *fs, struct cl_engine *engine, unsigned int *signo, unsigned int daily, unsigned int options, unsigned int cld);
... ...
@@ -26,6 +26,7 @@
26 26
 #define CLI_DEFAULT_AC_TRACKLEN	    8
27 27
 
28 28
 #define CLI_DEFAULT_LSIG_BUFSIZE    32768
29
+#define CLI_DEFAULT_DBIO_BUFSIZE    CLI_DEFAULT_LSIG_BUFSIZE + 1
29 30
 
30 31
 #define CLI_DEFAULT_MAXSCANSIZE	    104857600
31 32
 #define CLI_DEFAULT_MAXFILESIZE	    26214400
... ...
@@ -1,5 +1,5 @@
1 1
 /*
2
- *  Copyright (C) 2007-2008 Sourcefire, Inc.
2
+ *  Copyright (C) 2007-2009 Sourcefire, Inc.
3 3
  *
4 4
  *  Authors: Tomasz Kojm
5 5
  *
... ...
@@ -340,10 +340,68 @@ static int cli_initroots(struct cl_engine *engine, unsigned int options)
340 340
 
341 341
 char *cli_dbgets(char *buff, unsigned int size, FILE *fs, struct cli_dbio *dbio)
342 342
 {
343
-    if(fs) {
343
+    if(fs)
344 344
 	return fgets(buff, size, fs);
345 345
 
346
-    } else {
346
+    if(dbio->usebuf) {
347
+	    int bread;
348
+	    char *nl;
349
+
350
+	while(1) {
351
+	    if(!dbio->bufpt) {
352
+		if(!dbio->size)
353
+		    return NULL;
354
+
355
+		if(dbio->gzs) {
356
+		    bread = gzread(dbio->gzs, dbio->readpt, dbio->readsize);
357
+		    if(bread == -1) {
358
+			cli_errmsg("cli_dbgets: gzread() failed\n");
359
+			return NULL;
360
+		    }
361
+		} else {
362
+		    bread = fread(dbio->readpt, 1, dbio->readsize, dbio->fs);
363
+		    if(!bread && ferror(dbio->fs)) {
364
+			cli_errmsg("cli_dbgets: gzread() failed\n");
365
+			return NULL;
366
+		    }
367
+		}
368
+		if(!bread)
369
+		    return NULL;
370
+		dbio->readpt[bread] = 0;
371
+		dbio->bufpt = dbio->buf;
372
+		dbio->size -= bread;
373
+	    }
374
+	    nl = strchr(dbio->bufpt, '\n');
375
+	    if(nl) {
376
+		if(nl - dbio->bufpt >= size) {
377
+		    cli_errmsg("cli_dbgets: Line too long for provided buffer\n");
378
+		    return NULL;
379
+		}
380
+		strncpy(buff, dbio->bufpt, nl - dbio->bufpt);
381
+		buff[nl - dbio->bufpt] = 0;
382
+		if(nl < dbio->buf + dbio->bufsize) {
383
+		    dbio->bufpt = ++nl;
384
+		} else {
385
+		    dbio->bufpt = NULL;
386
+		    dbio->readpt = dbio->buf;
387
+		    dbio->readsize = dbio->size < dbio->bufsize ? dbio->size : dbio->bufsize - 1;
388
+		}
389
+		return buff;
390
+	    } else {
391
+		    unsigned int remain = dbio->buf + dbio->bufsize - 1 - dbio->bufpt;
392
+
393
+		if(dbio->bufpt == dbio->buf) {
394
+		    cli_errmsg("cli_dbgets: Invalid data or internal buffer too small\n");
395
+		    return NULL;
396
+		}
397
+		memmove(dbio->buf, dbio->bufpt, remain);
398
+		dbio->readpt = dbio->buf + remain;
399
+		dbio->readsize = dbio->bufsize - remain;
400
+		dbio->readsize = dbio->size < dbio->bufsize - remain ? dbio->size : dbio->bufsize - remain - 1;
401
+		dbio->bufpt = NULL;
402
+	    }
403
+	}
404
+    } else { /* use gzgets/fgets */
347 405
 	    char *pt;
348 406
 	    unsigned int bs;
349 407