Browse code

faster loading of uncompressed .cld files

git-svn: trunk@3854

Tomasz Kojm authored on 2008/05/19 06:32:27
Showing 9 changed files
... ...
@@ -1,3 +1,7 @@
1
+Sun May 18 22:39:00 CEST 2008 (tk)
2
+----------------------------------
3
+  * libclamav: faster loading of uncompressed .cld files
4
+
1 5
 Fri May 16 10:23:01 EEST 2008 (edwin)
2 6
 ------------------------------------
3 7
   * libclamav/readdb.c, str.c: improve DB load time
... ...
@@ -184,9 +184,14 @@ static int cli_tgzload(int fd, struct cl_engine **engine, unsigned int *signo, u
184 184
 	char block[TAR_BLOCKSIZE];
185 185
 	int nread, fdd, ret;
186 186
 	unsigned int type, size, pad, compr = 1;
187
-	gzFile *infile;
188
-	z_off_t off;
187
+	off_t off;
188
+	struct cli_dbio dbio;
189 189
 
190
+#define CLOSE_DBIO	    \
191
+    if(compr)		    \
192
+	gzclose(dbio.gzs);  \
193
+    else		    \
194
+	fclose(dbio.fs)
190 195
 
191 196
     cli_dbgmsg("in cli_tgzload()\n");
192 197
 
... ...
@@ -197,31 +202,38 @@ static int cli_tgzload(int fd, struct cl_engine **engine, unsigned int *signo, u
197 197
     if(!strncmp(block, "COPYING", 7))
198 198
 	compr = 0;
199 199
 
200
+    lseek(fd, 512, SEEK_SET);
201
+
200 202
     if((fdd = dup(fd)) == -1) {
201 203
 	cli_errmsg("cli_tgzload: Can't duplicate descriptor %d\n", fd);
202 204
 	return CL_EIO;
203 205
     }
204 206
 
205
-    lseek(fdd, 512, SEEK_SET);
206
-
207
-    if((infile = gzdopen(fdd, "rb")) == NULL) {
208
-	cli_errmsg("cli_tgzload: Can't gzdopen() descriptor %d, errno = %d\n", fdd, errno);
209
-	return CL_EIO;
207
+    if(compr) {
208
+	if((dbio.gzs = gzdopen(fdd, "rb")) == NULL) {
209
+	    cli_errmsg("cli_tgzload: Can't gzdopen() descriptor %d, errno = %d\n", fdd, errno);
210
+	    return CL_EIO;
211
+	}
212
+    } else {
213
+	if((dbio.fs = fdopen(fdd, "rb")) == NULL) {
214
+	    cli_errmsg("cli_tgzload: Can't fdopen() descriptor %d, errno = %d\n", fdd, errno);
215
+	    return CL_EIO;
216
+	}
210 217
     }
211 218
 
212
-    if(!compr)
213
-	gzseek(infile, 512, SEEK_SET);
214
-
215 219
     while(1) {
216 220
 
217
-	nread = gzread(infile, block, TAR_BLOCKSIZE);
221
+	if(compr)
222
+	    nread = gzread(dbio.gzs, block, TAR_BLOCKSIZE);
223
+	else
224
+	    nread = fread(block, 1, TAR_BLOCKSIZE, dbio.fs);
218 225
 
219 226
 	if(!nread)
220 227
 	    break;
221 228
 
222 229
 	if(nread != TAR_BLOCKSIZE) {
223 230
 	    cli_errmsg("cli_tgzload: Incomplete block read\n");
224
-	    gzclose(infile);
231
+	    CLOSE_DBIO;
225 232
 	    return CL_EMALFDB;
226 233
 	}
227 234
 
... ...
@@ -233,7 +245,7 @@ static int cli_tgzload(int fd, struct cl_engine **engine, unsigned int *signo, u
233 233
 
234 234
 	if(strchr(name, '/')) {
235 235
 	    cli_errmsg("cli_tgzload: Slash separators are not allowed in CVD\n");
236
-	    gzclose(infile);
236
+	    CLOSE_DBIO;
237 237
 	    return CL_EMALFDB;
238 238
 	}
239 239
 
... ...
@@ -245,11 +257,11 @@ static int cli_tgzload(int fd, struct cl_engine **engine, unsigned int *signo, u
245 245
 		break;
246 246
 	    case '5':
247 247
 		cli_errmsg("cli_tgzload: Directories are not supported in CVD\n");
248
-	        gzclose(infile);
248
+		CLOSE_DBIO;
249 249
 		return CL_EMALFDB;
250 250
 	    default:
251 251
 		cli_errmsg("cli_tgzload: Unknown type flag '%c'\n", type);
252
-	        gzclose(infile);
252
+		CLOSE_DBIO;
253 253
 		return CL_EMALFDB;
254 254
 	}
255 255
 
... ...
@@ -258,30 +270,41 @@ static int cli_tgzload(int fd, struct cl_engine **engine, unsigned int *signo, u
258 258
 
259 259
 	if((sscanf(osize, "%o", &size)) == 0) {
260 260
 	    cli_errmsg("cli_tgzload: Invalid size in header\n");
261
-	    gzclose(infile);
261
+	    CLOSE_DBIO;
262 262
 	    return CL_EMALFDB;
263 263
 	}
264
+	dbio.size = size;
264 265
 
265 266
 	/* cli_dbgmsg("cli_tgzload: Loading %s, size: %u\n", name, size); */
266
-	off = gzseek(infile, 0, SEEK_CUR);
267
+	if(compr)
268
+	    off = (off_t) gzseek(dbio.gzs, 0, SEEK_CUR);
269
+	else
270
+	    off = ftell(dbio.fs);
271
+
267 272
 	if(CLI_DBEXT(name)) {
268
-	    ret = cli_load(name, engine, signo, options, infile, size);
273
+	    ret = cli_load(name, engine, signo, options, &dbio);
269 274
 	    if(ret) {
270 275
 		cli_errmsg("cli_tgzload: Invalid size in header\n");
271
-		gzclose(infile);
276
+		CLOSE_DBIO;
272 277
 		return CL_EMALFDB;
273 278
 	    }
274 279
 	}
275 280
 	pad = size % TAR_BLOCKSIZE ? (TAR_BLOCKSIZE - (size % TAR_BLOCKSIZE)) : 0;
276
-	if(off == gzseek(infile, 0, SEEK_CUR))
277
-	    gzseek(infile, size + pad, SEEK_CUR);
278
-	else if(pad)
279
-	    gzseek(infile, pad, SEEK_CUR);
280
-
281
+	if(compr) {
282
+	    if(off == gzseek(dbio.gzs, 0, SEEK_CUR))
283
+		gzseek(dbio.gzs, size + pad, SEEK_CUR);
284
+	    else if(pad)
285
+		gzseek(dbio.gzs, pad, SEEK_CUR);
286
+	} else {
287
+	    if(off == ftell(dbio.fs))
288
+		fseek(dbio.fs, size + pad, SEEK_CUR);
289
+	    else if(pad)
290
+		fseek(dbio.fs, pad, SEEK_CUR);
291
+	}
281 292
     }
282 293
 
283
-    gzclose(infile);
284
-    return 0;
294
+    CLOSE_DBIO;
295
+    return CL_SUCCESS;
285 296
 }
286 297
 
287 298
 struct cl_cvd *cl_cvdparse(const char *head)
... ...
@@ -22,8 +22,15 @@
22 22
 #define __CVD_H
23 23
 
24 24
 #include <stdio.h>
25
+#include <zlib.h>
25 26
 #include "clamav.h"
26 27
 
28
+struct cli_dbio {
29
+    gzFile *gzs;
30
+    FILE *fs;
31
+    unsigned int size;
32
+};
33
+
27 34
 int cli_cvdload(FILE *fs, struct cl_engine **engine, unsigned int *signo, short warn, unsigned int options, unsigned int cld);
28 35
 int cli_untgz(int fd, const char *destdir);
29 36
 
... ...
@@ -264,7 +264,7 @@ static int chkflevel(const char *entry, int field)
264 264
     return 1;
265 265
 }
266 266
 
267
-int cli_dconf_load(FILE *fs, struct cl_engine **engine, unsigned int options, gzFile *gzs, unsigned int gzrsize)
267
+int cli_dconf_load(FILE *fs, struct cl_engine **engine, unsigned int options, struct cli_dbio *dbio)
268 268
 {
269 269
 	char buffer[FILEBUFF];
270 270
 	unsigned int line = 0;
... ...
@@ -280,7 +280,7 @@ int cli_dconf_load(FILE *fs, struct cl_engine **engine, unsigned int options, gz
280 280
 
281 281
     dconf = (struct cli_dconf *) (*engine)->dconf;
282 282
 
283
-    while(cli_dbgets(buffer, FILEBUFF, fs, gzs, &gzrsize)) {
283
+    while(cli_dbgets(buffer, FILEBUFF, fs, dbio)) {
284 284
 	line++;
285 285
 	cli_chomp(buffer);
286 286
 
... ...
@@ -27,6 +27,7 @@
27 27
 
28 28
 #include "clamav.h"
29 29
 #include "cltypes.h"
30
+#include "cvd.h"
30 31
 
31 32
 struct cli_dconf {
32 33
     uint32_t pe;
... ...
@@ -97,5 +98,5 @@ struct cli_dconf {
97 97
 
98 98
 struct cli_dconf *cli_dconf_init(void);
99 99
 void cli_dconf_print(struct cli_dconf *dconf);
100
-int cli_dconf_load(FILE *fs, struct cl_engine **engine, unsigned int options, gzFile *gzs, unsigned int gzrsize);
100
+int cli_dconf_load(FILE *fs, struct cl_engine **engine, unsigned int options, struct cli_dbio *dbio);
101 101
 #endif
... ...
@@ -367,7 +367,7 @@ static int cli_initroots(struct cl_engine *engine, unsigned int options)
367 367
     return CL_SUCCESS;
368 368
 }
369 369
 
370
-char *cli_dbgets(char *buff, unsigned int size, FILE *fs, gzFile *gzs, unsigned int *gzrsize)
370
+char *cli_dbgets(char *buff, unsigned int size, FILE *fs, struct cli_dbio *dbio)
371 371
 {
372 372
     if(fs) {
373 373
 	return fgets(buff, size, fs);
... ...
@@ -376,12 +376,16 @@ char *cli_dbgets(char *buff, unsigned int size, FILE *fs, gzFile *gzs, unsigned
376 376
 	    char *pt;
377 377
 	    unsigned int bs;
378 378
 
379
-	if(!*gzrsize)
379
+	if(!dbio->size)
380 380
 	    return NULL;
381 381
 
382
-	bs = *gzrsize < size ? *gzrsize + 1 : size;
383
-	pt = gzgets(gzs, buff, bs);
384
-	*gzrsize -= strlen(buff);
382
+	bs = dbio->size < size ? dbio->size + 1 : size;
383
+	if(dbio->gzs)
384
+	    pt = gzgets(dbio->gzs, buff, bs);
385
+	else
386
+	    pt = fgets(buff, bs, dbio->fs);
387
+
388
+	dbio->size -= strlen(buff);
385 389
 	if(!pt)
386 390
 	    cli_errmsg("cli_dbgets: Preliminary end of data\n");
387 391
 	return pt;
... ...
@@ -409,7 +413,7 @@ static int cli_chkign(const struct cli_ignored *ignored, const char *dbname, uns
409 409
     return 0;
410 410
 }
411 411
 
412
-static int cli_loaddb(FILE *fs, struct cl_engine **engine, unsigned int *signo, unsigned int options, gzFile *gzs, unsigned int gzrsize, const char *dbname)
412
+static int cli_loaddb(FILE *fs, struct cl_engine **engine, unsigned int *signo, unsigned int options, struct cli_dbio *dbio, const char *dbname)
413 413
 {
414 414
 	char buffer[FILEBUFF], *pt, *start;
415 415
 	unsigned int line = 0, sigs = 0;
... ...
@@ -429,7 +433,7 @@ static int cli_loaddb(FILE *fs, struct cl_engine **engine, unsigned int *signo,
429 429
 
430 430
     root = (*engine)->root[0];
431 431
 
432
-    while(cli_dbgets(buffer, FILEBUFF, fs, gzs, &gzrsize)) {
432
+    while(cli_dbgets(buffer, FILEBUFF, fs, dbio)) {
433 433
 	line++;
434 434
 	cli_chomp(buffer);
435 435
 
... ...
@@ -473,7 +477,7 @@ static int cli_loaddb(FILE *fs, struct cl_engine **engine, unsigned int *signo,
473 473
     return CL_SUCCESS;
474 474
 }
475 475
 
476
-static int cli_loadwdb(FILE *fs, struct cl_engine **engine, unsigned int options, gzFile *gzs, unsigned int gzrsize)
476
+static int cli_loadwdb(FILE *fs, struct cl_engine **engine, unsigned int options, struct cli_dbio *dbio)
477 477
 {
478 478
 	int ret = 0;
479 479
 
... ...
@@ -494,7 +498,7 @@ static int cli_loadwdb(FILE *fs, struct cl_engine **engine, unsigned int options
494 494
 	}
495 495
     }
496 496
 
497
-    if((ret = load_regex_matcher((*engine)->whitelist_matcher, fs, options, 1, gzs, gzrsize))) {
497
+    if((ret = load_regex_matcher((*engine)->whitelist_matcher, fs, options, 1, dbio))) {
498 498
 	phishing_done(*engine);
499 499
 	cl_free(*engine);
500 500
 	return ret;
... ...
@@ -503,7 +507,7 @@ static int cli_loadwdb(FILE *fs, struct cl_engine **engine, unsigned int options
503 503
     return CL_SUCCESS;
504 504
 }
505 505
 
506
-static int cli_loadpdb(FILE *fs, struct cl_engine **engine, unsigned int options, gzFile *gzs, unsigned int gzrsize)
506
+static int cli_loadpdb(FILE *fs, struct cl_engine **engine, unsigned int options, struct cli_dbio *dbio)
507 507
 {
508 508
 	int ret = 0;
509 509
 
... ...
@@ -524,7 +528,7 @@ static int cli_loadpdb(FILE *fs, struct cl_engine **engine, unsigned int options
524 524
 	}
525 525
     }
526 526
 
527
-    if((ret = load_regex_matcher((*engine)->domainlist_matcher, fs, options, 0, gzs, gzrsize))) {
527
+    if((ret = load_regex_matcher((*engine)->domainlist_matcher, fs, options, 0, dbio))) {
528 528
 	phishing_done(*engine);
529 529
 	cl_free(*engine);
530 530
 	return ret;
... ...
@@ -534,7 +538,7 @@ static int cli_loadpdb(FILE *fs, struct cl_engine **engine, unsigned int options
534 534
 }
535 535
 
536 536
 #define NDB_TOKENS 6
537
-static int cli_loadndb(FILE *fs, struct cl_engine **engine, unsigned int *signo, unsigned short sdb, unsigned int options, gzFile *gzs, unsigned int gzrsize, const char *dbname)
537
+static int cli_loadndb(FILE *fs, struct cl_engine **engine, unsigned int *signo, unsigned short sdb, unsigned int options, struct cli_dbio *dbio, const char *dbname)
538 538
 {
539 539
 	const char *tokens[NDB_TOKENS];
540 540
 	char buffer[FILEBUFF];
... ...
@@ -555,7 +559,7 @@ static int cli_loadndb(FILE *fs, struct cl_engine **engine, unsigned int *signo,
555 555
 	return ret;
556 556
     }
557 557
 
558
-    while(cli_dbgets(buffer, FILEBUFF, fs, gzs, &gzrsize)) {
558
+    while(cli_dbgets(buffer, FILEBUFF, fs, dbio)) {
559 559
 	line++;
560 560
 
561 561
 	if(!strncmp(buffer, "Exploit.JPEG.Comment", 20)) /* temporary */
... ...
@@ -658,7 +662,7 @@ static int cli_loadndb(FILE *fs, struct cl_engine **engine, unsigned int *signo,
658 658
 }
659 659
 
660 660
 #define FTM_TOKENS 8	
661
-static int cli_loadftm(FILE *fs, struct cl_engine **engine, unsigned int options, unsigned int internal, gzFile *gzs, unsigned int gzrsize)
661
+static int cli_loadftm(FILE *fs, struct cl_engine **engine, unsigned int options, unsigned int internal, struct cli_dbio *dbio)
662 662
 {
663 663
 	const char *tokens[FTM_TOKENS], *pt;
664 664
 	char buffer[FILEBUFF];
... ...
@@ -684,7 +688,7 @@ static int cli_loadftm(FILE *fs, struct cl_engine **engine, unsigned int options
684 684
 		break;
685 685
 	    strncpy(buffer, ftypes_int[line], sizeof(buffer));
686 686
 	} else {
687
-	    if(!cli_dbgets(buffer, FILEBUFF, fs, gzs, &gzrsize))
687
+	    if(!cli_dbgets(buffer, FILEBUFF, fs, dbio))
688 688
 		break;
689 689
 	    cli_chomp(buffer);
690 690
 	}
... ...
@@ -775,7 +779,7 @@ static int cli_loadftm(FILE *fs, struct cl_engine **engine, unsigned int options
775 775
     return CL_SUCCESS;
776 776
 }
777 777
 
778
-static int cli_loadign(FILE *fs, struct cl_engine **engine, unsigned int options, gzFile *gzs, unsigned int gzrsize)
778
+static int cli_loadign(FILE *fs, struct cl_engine **engine, unsigned int options, struct cli_dbio *dbio)
779 779
 {
780 780
 	char buffer[FILEBUFF], *pt;
781 781
 	unsigned int line = 0;
... ...
@@ -797,7 +801,7 @@ static int cli_loadign(FILE *fs, struct cl_engine **engine, unsigned int options
797 797
 	}
798 798
     }
799 799
 
800
-    while(cli_dbgets(buffer, FILEBUFF, fs, gzs, &gzrsize)) {
800
+    while(cli_dbgets(buffer, FILEBUFF, fs, dbio)) {
801 801
 	line++;
802 802
 	cli_chomp(buffer);
803 803
 
... ...
@@ -912,7 +916,7 @@ static int cli_md5db_init(struct cl_engine **engine, unsigned int mode)
912 912
 	db = (*engine)->md5_fp;
913 913
 
914 914
 #define MD5_TOKENS 3
915
-static int cli_loadmd5(FILE *fs, struct cl_engine **engine, unsigned int *signo, unsigned int mode, unsigned int options, gzFile *gzs, unsigned int gzrsize, const char *dbname)
915
+static int cli_loadmd5(FILE *fs, struct cl_engine **engine, unsigned int *signo, unsigned int mode, unsigned int options, struct cli_dbio *dbio, const char *dbname)
916 916
 {
917 917
 	const char *tokens[MD5_TOKENS];
918 918
 	char buffer[FILEBUFF];
... ...
@@ -933,7 +937,7 @@ static int cli_loadmd5(FILE *fs, struct cl_engine **engine, unsigned int *signo,
933 933
 	md5_field = 1;
934 934
     }
935 935
 
936
-    while(cli_dbgets(buffer, FILEBUFF, fs, gzs, &gzrsize)) {
936
+    while(cli_dbgets(buffer, FILEBUFF, fs, dbio)) {
937 937
 	line++;
938 938
 	cli_chomp(buffer);
939 939
 
... ...
@@ -1027,7 +1031,7 @@ static int cli_loadmd5(FILE *fs, struct cl_engine **engine, unsigned int *signo,
1027 1027
     return CL_SUCCESS;
1028 1028
 }
1029 1029
 
1030
-static int cli_loadmd(FILE *fs, struct cl_engine **engine, unsigned int *signo, int type, unsigned int options, gzFile *gzs, unsigned int gzrsize, const char *dbname)
1030
+static int cli_loadmd(FILE *fs, struct cl_engine **engine, unsigned int *signo, int type, unsigned int options, struct cli_dbio *dbio, const char *dbname)
1031 1031
 {
1032 1032
 	char buffer[FILEBUFF], *pt;
1033 1033
 	unsigned int line = 0, sigs = 0;
... ...
@@ -1040,7 +1044,7 @@ static int cli_loadmd(FILE *fs, struct cl_engine **engine, unsigned int *signo,
1040 1040
 	return ret;
1041 1041
     }
1042 1042
 
1043
-    while(cli_dbgets(buffer, FILEBUFF, fs, gzs, &gzrsize)) {
1043
+    while(cli_dbgets(buffer, FILEBUFF, fs, dbio)) {
1044 1044
 	line++;
1045 1045
 	if(buffer[0] == '#')
1046 1046
 	    continue;
... ...
@@ -1208,7 +1212,7 @@ static int cli_loadmd(FILE *fs, struct cl_engine **engine, unsigned int *signo,
1208 1208
 
1209 1209
 static int cli_loaddbdir(const char *dirname, struct cl_engine **engine, unsigned int *signo, unsigned int options);
1210 1210
 
1211
-int cli_load(const char *filename, struct cl_engine **engine, unsigned int *signo, unsigned int options, gzFile *gzs, unsigned int gzrsize)
1211
+int cli_load(const char *filename, struct cl_engine **engine, unsigned int *signo, unsigned int options, struct cli_dbio *dbio)
1212 1212
 {
1213 1213
 	FILE *fs = NULL;
1214 1214
 	int ret = CL_SUCCESS;
... ...
@@ -1216,7 +1220,7 @@ int cli_load(const char *filename, struct cl_engine **engine, unsigned int *sign
1216 1216
 	const char *dbname;
1217 1217
 
1218 1218
 
1219
-    if(!gzs && (fs = fopen(filename, "rb")) == NULL) {
1219
+    if(!dbio && (fs = fopen(filename, "rb")) == NULL) {
1220 1220
 	cli_errmsg("cli_load(): Can't open file %s\n", filename);
1221 1221
 	return CL_EOPEN;
1222 1222
     }
... ...
@@ -1233,7 +1237,7 @@ int cli_load(const char *filename, struct cl_engine **engine, unsigned int *sign
1233 1233
 	dbname = filename;
1234 1234
 
1235 1235
     if(cli_strbcasestr(dbname, ".db")) {
1236
-	ret = cli_loaddb(fs, engine, signo, options, gzs, gzrsize, dbname);
1236
+	ret = cli_loaddb(fs, engine, signo, options, dbio, dbname);
1237 1237
 
1238 1238
     } else if(cli_strbcasestr(dbname, ".cvd")) {
1239 1239
 	    int warn = 0;
... ...
@@ -1252,66 +1256,66 @@ int cli_load(const char *filename, struct cl_engine **engine, unsigned int *sign
1252 1252
 	ret = cli_cvdload(fs, engine, signo, warn, options | CL_DB_CVDNOTMP, 1);
1253 1253
 
1254 1254
     } else if(cli_strbcasestr(dbname, ".hdb")) {
1255
-	ret = cli_loadmd5(fs, engine, signo, MD5_HDB, options, gzs, gzrsize, dbname);
1255
+	ret = cli_loadmd5(fs, engine, signo, MD5_HDB, options, dbio, dbname);
1256 1256
 
1257 1257
     } else if(cli_strbcasestr(dbname, ".hdu")) {
1258 1258
 	if(options & CL_DB_PUA)
1259
-	    ret = cli_loadmd5(fs, engine, signo, MD5_HDB, options, gzs, gzrsize, dbname);
1259
+	    ret = cli_loadmd5(fs, engine, signo, MD5_HDB, options, dbio, dbname);
1260 1260
 	else
1261 1261
 	    skipped = 1;
1262 1262
 
1263 1263
     } else if(cli_strbcasestr(dbname, ".fp")) {
1264
-	ret = cli_loadmd5(fs, engine, signo, MD5_FP, options, gzs, gzrsize, dbname);
1264
+	ret = cli_loadmd5(fs, engine, signo, MD5_FP, options, dbio, dbname);
1265 1265
 
1266 1266
     } else if(cli_strbcasestr(dbname, ".mdb")) {
1267
-	ret = cli_loadmd5(fs, engine, signo, MD5_MDB, options, gzs, gzrsize, dbname);
1267
+	ret = cli_loadmd5(fs, engine, signo, MD5_MDB, options, dbio, dbname);
1268 1268
 
1269 1269
     } else if(cli_strbcasestr(dbname, ".mdu")) {
1270 1270
 	if(options & CL_DB_PUA)
1271
-	    ret = cli_loadmd5(fs, engine, signo, MD5_MDB, options, gzs, gzrsize, dbname);
1271
+	    ret = cli_loadmd5(fs, engine, signo, MD5_MDB, options, dbio, dbname);
1272 1272
 	else
1273 1273
 	    skipped = 1;
1274 1274
 
1275 1275
     } else if(cli_strbcasestr(dbname, ".ndb")) {
1276
-	ret = cli_loadndb(fs, engine, signo, 0, options, gzs, gzrsize, dbname);
1276
+	ret = cli_loadndb(fs, engine, signo, 0, options, dbio, dbname);
1277 1277
 
1278 1278
     } else if(cli_strbcasestr(dbname, ".ndu")) {
1279 1279
 	if(!(options & CL_DB_PUA))
1280 1280
 	    skipped = 1;
1281 1281
 	else
1282
-	    ret = cli_loadndb(fs, engine, signo, 0, options, gzs, gzrsize, dbname);
1282
+	    ret = cli_loadndb(fs, engine, signo, 0, options, dbio, dbname);
1283 1283
 
1284 1284
     } else if(cli_strbcasestr(dbname, ".sdb")) {
1285
-	ret = cli_loadndb(fs, engine, signo, 1, options, gzs, gzrsize, dbname);
1285
+	ret = cli_loadndb(fs, engine, signo, 1, options, dbio, dbname);
1286 1286
 
1287 1287
     } else if(cli_strbcasestr(dbname, ".zmd")) {
1288
-	ret = cli_loadmd(fs, engine, signo, 1, options, gzs, gzrsize, dbname);
1288
+	ret = cli_loadmd(fs, engine, signo, 1, options, dbio, dbname);
1289 1289
 
1290 1290
     } else if(cli_strbcasestr(dbname, ".rmd")) {
1291
-	ret = cli_loadmd(fs, engine, signo, 2, options, gzs, gzrsize, dbname);
1291
+	ret = cli_loadmd(fs, engine, signo, 2, options, dbio, dbname);
1292 1292
 
1293 1293
     } else if(cli_strbcasestr(dbname, ".cfg")) {
1294
-	ret = cli_dconf_load(fs, engine, options, gzs, gzrsize);
1294
+	ret = cli_dconf_load(fs, engine, options, dbio);
1295 1295
 
1296 1296
     } else if(cli_strbcasestr(dbname, ".wdb")) {
1297 1297
 	if(options & CL_DB_PHISHING_URLS) {
1298
-	    ret = cli_loadwdb(fs, engine, options, gzs, gzrsize);
1298
+	    ret = cli_loadwdb(fs, engine, options, dbio);
1299 1299
 	} else
1300 1300
 	    skipped = 1;
1301 1301
     } else if(cli_strbcasestr(dbname, ".pdb")) {
1302 1302
 	if(options & CL_DB_PHISHING_URLS) {
1303
-	    ret = cli_loadpdb(fs, engine, options, gzs, gzrsize);
1303
+	    ret = cli_loadpdb(fs, engine, options, dbio);
1304 1304
 	} else
1305 1305
 	    skipped = 1;
1306 1306
     } else if(cli_strbcasestr(dbname, ".ftm")) {
1307
-	ret = cli_loadftm(fs, engine, options, 0, gzs, gzrsize);
1307
+	ret = cli_loadftm(fs, engine, options, 0, dbio);
1308 1308
 
1309 1309
     } else if(cli_strbcasestr(dbname, ".ign")) {
1310
-	ret = cli_loadign(fs, engine, options, gzs, gzrsize);
1310
+	ret = cli_loadign(fs, engine, options, dbio);
1311 1311
 
1312 1312
     } else {
1313 1313
 	cli_dbgmsg("cli_load: unknown extension - assuming old database format\n");
1314
-	ret = cli_loaddb(fs, engine, signo, options, gzs, gzrsize, dbname);
1314
+	ret = cli_loaddb(fs, engine, signo, options, dbio, dbname);
1315 1315
     }
1316 1316
 
1317 1317
     if(ret) {
... ...
@@ -1330,7 +1334,7 @@ int cli_load(const char *filename, struct cl_engine **engine, unsigned int *sign
1330 1330
 }
1331 1331
 
1332 1332
 int cl_loaddb(const char *filename, struct cl_engine **engine, unsigned int *signo) {
1333
-    return cli_load(filename, engine, signo, CL_DB_STDOPT, NULL, 0);
1333
+    return cli_load(filename, engine, signo, CL_DB_STDOPT, NULL);
1334 1334
 }
1335 1335
 
1336 1336
 static int cli_loaddbdir(const char *dirname, struct cl_engine **engine, unsigned int *signo, unsigned int options)
... ...
@@ -1354,7 +1358,7 @@ static int cli_loaddbdir(const char *dirname, struct cl_engine **engine, unsigne
1354 1354
 
1355 1355
     /* try to load local.ign and daily.cvd/daily.ign first */
1356 1356
     sprintf(dbfile, "%s/local.ign", dirname);
1357
-    if(!access(dbfile, R_OK) && (ret = cli_load(dbfile, engine, signo, options, NULL, 0))) {
1357
+    if(!access(dbfile, R_OK) && (ret = cli_load(dbfile, engine, signo, options, NULL))) {
1358 1358
 	free(dbfile);
1359 1359
 	return ret;
1360 1360
     }
... ...
@@ -1362,20 +1366,20 @@ static int cli_loaddbdir(const char *dirname, struct cl_engine **engine, unsigne
1362 1362
     sprintf(dbfile, "%s/daily.cld", dirname);
1363 1363
     if(access(dbfile, R_OK))
1364 1364
 	sprintf(dbfile, "%s/daily.cvd", dirname);
1365
-    if(!access(dbfile, R_OK) && (ret = cli_load(dbfile, engine, signo, options, NULL, 0))) {
1365
+    if(!access(dbfile, R_OK) && (ret = cli_load(dbfile, engine, signo, options, NULL))) {
1366 1366
 	free(dbfile);
1367 1367
 	return ret;
1368 1368
     }
1369 1369
 
1370 1370
     sprintf(dbfile, "%s/daily.ign", dirname);
1371
-    if(!access(dbfile, R_OK) && (ret = cli_load(dbfile, engine, signo, options, NULL, 0))) {
1371
+    if(!access(dbfile, R_OK) && (ret = cli_load(dbfile, engine, signo, options, NULL))) {
1372 1372
 	free(dbfile);
1373 1373
 	return ret;
1374 1374
     }
1375 1375
 
1376 1376
     /* check for and load daily.cfg */
1377 1377
     sprintf(dbfile, "%s/daily.cfg", dirname);
1378
-    if(!access(dbfile, R_OK) && (ret = cli_load(dbfile, engine, signo, options, NULL, 0))) {
1378
+    if(!access(dbfile, R_OK) && (ret = cli_load(dbfile, engine, signo, options, NULL))) {
1379 1379
 	free(dbfile);
1380 1380
 	return ret;
1381 1381
     }
... ...
@@ -1407,7 +1411,7 @@ static int cli_loaddbdir(const char *dirname, struct cl_engine **engine, unsigne
1407 1407
 		    return CL_EMEM;
1408 1408
 		}
1409 1409
 		sprintf(dbfile, "%s/%s", dirname, dent->d_name);
1410
-		ret = cli_load(dbfile, engine, signo, options, NULL, 0);
1410
+		ret = cli_load(dbfile, engine, signo, options, NULL);
1411 1411
 
1412 1412
 		if(ret) {
1413 1413
 		    cli_dbgmsg("cli_loaddbdir(): error loading database %s\n", dbfile);
... ...
@@ -1451,7 +1455,7 @@ int cl_load(const char *path, struct cl_engine **engine, unsigned int *signo, un
1451 1451
 
1452 1452
     switch(sb.st_mode & S_IFMT) {
1453 1453
 	case S_IFREG: 
1454
-	    ret = cli_load(path, engine, signo, options, NULL, 0);
1454
+	    ret = cli_load(path, engine, signo, options, NULL);
1455 1455
 	    break;
1456 1456
 
1457 1457
 	case S_IFDIR:
... ...
@@ -1783,7 +1787,7 @@ int cl_build(struct cl_engine *engine)
1783 1783
 	return CL_ENULLARG;
1784 1784
 
1785 1785
     if(!engine->ftypes)
1786
-	if((ret = cli_loadftm(NULL, &engine, 0, 1, NULL, 0)))
1786
+	if((ret = cli_loadftm(NULL, &engine, 0, 1, NULL)))
1787 1787
 	    return ret;
1788 1788
 
1789 1789
     for(i = 0; i < CLI_MTARGETS; i++) {
... ...
@@ -21,12 +21,11 @@
21 21
 #ifndef __READDB_H
22 22
 #define __READDB_H
23 23
 
24
-#include <zlib.h>
25
-
26 24
 #include "clamav.h"
27 25
 #include "matcher.h"
28 26
 #include "str.h"
29 27
 #include "cltypes.h"
28
+#include "cvd.h"
30 29
 
31 30
 #define CLI_DBEXT(ext)				\
32 31
     (						\
... ...
@@ -52,13 +51,12 @@
52 52
 	cli_strbcasestr(ext, ".cld")		\
53 53
     )
54 54
 
55
-
56 55
 int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hexsig, uint16_t rtype, uint16_t type, const char *offset, uint8_t target);
57 56
 
58 57
 int cli_initengine(struct cl_engine **engine, unsigned int options);
59 58
 
60
-int cli_load(const char *filename, struct cl_engine **engine, unsigned int *signo, unsigned int options, gzFile *gzs, unsigned int gzrsize);
59
+int cli_load(const char *filename, struct cl_engine **engine, unsigned int *signo, unsigned int options, struct cli_dbio *dbio);
61 60
 
62
-char *cli_dbgets(char *buff, unsigned int size, FILE *fs, gzFile *gzs, unsigned int *gzrsize);
61
+char *cli_dbgets(char *buff, unsigned int size, FILE *fs, struct cli_dbio *dbio);
63 62
 
64 63
 #endif
... ...
@@ -522,7 +522,7 @@ static int functionality_level_check(char* line)
522 522
 
523 523
 
524 524
 /* Load patterns/regexes from file */
525
-int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int options,int is_whitelist,gzFile *gzs,unsigned int gzrsize)
525
+int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int options,int is_whitelist,struct cli_dbio *dbio)
526 526
 {
527 527
 	int rc,line=0;
528 528
 	char buffer[FILEBUFF];
... ...
@@ -535,7 +535,7 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio
535 535
 		cli_warnmsg("Regex list has already been loaded, ignoring further requests for load\n");
536 536
 		return CL_SUCCESS;
537 537
 	}*/
538
-	if(!fd && !gzs) {
538
+	if(!fd && !dbio) {
539 539
 		cli_errmsg("Unable to load regex list (null file)\n");
540 540
 		return CL_EIO;
541 541
 	}
... ...
@@ -570,7 +570,7 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio
570 570
 	 * If a line in the file doesn't conform to this format, loading fails
571 571
 	 * 
572 572
 	 */
573
-	while(cli_dbgets(buffer, FILEBUFF, fd, gzs, &gzrsize)) {
573
+	while(cli_dbgets(buffer, FILEBUFF, fd, dbio)) {
574 574
 		char* pattern;
575 575
 		char* flags;
576 576
 		cli_chomp(buffer);
... ...
@@ -35,6 +35,8 @@
35 35
 #endif
36 36
 
37 37
 #include "phishcheck.h"
38
+#include "readdb.h"
39
+#include "matcher.h"
38 40
 #include <zlib.h> /* for gzFile */
39 41
 struct node_stack {
40 42
 	struct tree_node** data;
... ...
@@ -56,7 +58,7 @@ struct regex_matcher {
56 56
 
57 57
 int regex_list_match(struct regex_matcher* matcher, char* real_url,const char* display_url,const struct pre_fixup_info* pre_fixup, int hostOnly,const char** info,int is_whitelist);
58 58
 int init_regex_list(struct regex_matcher* matcher);
59
-int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int options,int is_whitelist,gzFile *gzs,unsigned int gzrsize);
59
+int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int options,int is_whitelist,struct cli_dbio *dbio);
60 60
 void regex_list_cleanup(struct regex_matcher* matcher);
61 61
 void regex_list_done(struct regex_matcher* matcher);
62 62
 int is_regex_ok(struct regex_matcher* matcher);