Browse code

add (initial) support for direct loading of CVD files

git-svn: trunk@3435

Tomasz Kojm authored on 2007/12/19 04:23:56
Showing 9 changed files
... ...
@@ -1,3 +1,8 @@
1
+Tue Dec 18 19:43:04 CET 2007 (tk)
2
+---------------------------------
3
+  * libclamav: add (initial) support for direct loading of CVD files (without
4
+	       extracting to /tmp); requires CL_DB_CVDNOTMP passed to cl_load()
5
+
1 6
 Tue Dec 18 16:25:22 GMT 2007 (njh)
2 7
 ----------------------------------
3 8
   * libclamav/mbox.c:	Fix warnings with various compilers
... ...
@@ -69,6 +69,7 @@ extern "C"
69 69
 #define CL_DB_ACONLY	    0x4 /* WARNING: only for developers */
70 70
 #define CL_DB_PHISHING_URLS 0x8
71 71
 #define CL_DB_PUA	    0x10
72
+#define CL_DB_CVDNOTMP	    0x20
72 73
 
73 74
 /* recommended db settings */
74 75
 #define CL_DB_STDOPT	    (CL_DB_PHISHING | CL_DB_PHISHING_URLS)
... ...
@@ -40,6 +40,7 @@
40 40
 #include "dsig.h"
41 41
 #include "str.h"
42 42
 #include "cvd.h"
43
+#include "readdb.h"
43 44
 
44 45
 #define TAR_BLOCKSIZE 512
45 46
 
... ...
@@ -173,6 +174,99 @@ int cli_untgz(int fd, const char *destdir)
173 173
     return 0;
174 174
 }
175 175
 
176
+static int cli_tgzload(int fd, struct cl_engine **engine, unsigned int *signo, unsigned int options)
177
+{
178
+	char osize[13], name[101];
179
+	char block[TAR_BLOCKSIZE];
180
+	int nread, fdd, ret;
181
+	unsigned int type, size;
182
+	gzFile *infile;
183
+	z_off_t off;
184
+
185
+
186
+    cli_dbgmsg("in cli_untgz()\n");
187
+
188
+    if((fdd = dup(fd)) == -1) {
189
+	cli_errmsg("cli_tgzload: Can't duplicate descriptor %d\n", fd);
190
+	return CL_EIO;
191
+    }
192
+
193
+    if((infile = gzdopen(fdd, "rb")) == NULL) {
194
+	cli_errmsg("cli_tgzload: Can't gzdopen() descriptor %d, errno = %d\n", fdd, errno);
195
+	return CL_EIO;
196
+    }
197
+
198
+    while(1) {
199
+
200
+	nread = gzread(infile, block, TAR_BLOCKSIZE);
201
+
202
+	if(!nread)
203
+	    break;
204
+
205
+	if(nread != TAR_BLOCKSIZE) {
206
+	    cli_errmsg("cli_tgzload: Incomplete block read\n");
207
+	    gzclose(infile);
208
+	    return CL_EMALFDB;
209
+	}
210
+
211
+	if(block[0] == '\0')  /* We're done */
212
+	    break;
213
+
214
+	strncpy(name, block, 100);
215
+	name[100] = '\0';
216
+
217
+	if(strchr(name, '/')) {
218
+	    cli_errmsg("cli_tgzload: Slash separators are not allowed in CVD\n");
219
+	    gzclose(infile);
220
+	    return CL_EMALFDB;
221
+	}
222
+
223
+	type = block[156];
224
+
225
+	switch(type) {
226
+	    case '0':
227
+	    case '\0':
228
+		break;
229
+	    case '5':
230
+		cli_errmsg("cli_tgzload: Directories are not supported in CVD\n");
231
+	        gzclose(infile);
232
+		return CL_EMALFDB;
233
+	    default:
234
+		cli_errmsg("cli_tgzload: Unknown type flag '%c'\n", type);
235
+	        gzclose(infile);
236
+		return CL_EMALFDB;
237
+	}
238
+
239
+	strncpy(osize, block + 124, 12);
240
+	osize[12] = '\0';
241
+
242
+	if((sscanf(osize, "%o", &size)) == 0) {
243
+	    cli_errmsg("cli_tgzload: Invalid size in header\n");
244
+	    gzclose(infile);
245
+	    return CL_EMALFDB;
246
+	}
247
+
248
+	/* cli_dbgmsg("cli_tgzload: Loading %s, size: %u\n", name, size); */
249
+	off = gzseek(infile, 0, SEEK_CUR);
250
+	if(CLI_DBEXT(name)) {
251
+	    ret = cli_load(name, engine, signo, options, infile, size);
252
+	    if(ret) {
253
+		cli_errmsg("cli_tgzload: Invalid size in header\n");
254
+		gzclose(infile);
255
+		return CL_EMALFDB;
256
+	    }
257
+	}
258
+	if(off == gzseek(infile, 0, SEEK_CUR))
259
+	    gzseek(infile, size + TAR_BLOCKSIZE - (size % TAR_BLOCKSIZE), SEEK_CUR);
260
+	else
261
+	    gzseek(infile, TAR_BLOCKSIZE - (size % TAR_BLOCKSIZE), SEEK_CUR);
262
+
263
+    }
264
+
265
+    gzclose(infile);
266
+    return 0;
267
+}
268
+
176 269
 struct cl_cvd *cl_cvdparse(const char *head)
177 270
 {
178 271
 	struct cl_cvd *cvd;
... ...
@@ -391,37 +485,42 @@ int cli_cvdload(FILE *fs, struct cl_engine **engine, unsigned int *signo, short
391 391
 	cli_warnmsg("***********************************************************\n");
392 392
     }
393 393
 
394
-    dir = cli_gentemp(NULL);
395
-    if(mkdir(dir, 0700)) {
396
-	cli_errmsg("cli_cvdload(): Can't create temporary directory %s\n", dir);
397
-	free(dir);
398
-	return CL_ETMPDIR;
399
-    }
400
-
401 394
     cfd = fileno(fs);
402
-
403 395
     /* use only operations on file descriptors, and not on the FILE* from here on 
404 396
      * if we seek the FILE*, the underlying descriptor may not seek as expected
405 397
      * (for example on OpenBSD, cygwin, etc.).
406 398
      * So seek the descriptor directly.
407 399
      */ 
408
-
409 400
     if(lseek(cfd, 512, SEEK_SET) == -1) {
410 401
 	cli_errmsg("cli_cvdload(): lseek(fs, 512, SEEK_SET) failed\n");
411 402
 	return CL_EIO;
412 403
     }
413 404
 
414
-    if(cli_untgz(cfd, dir)) {
415
-	cli_errmsg("cli_cvdload(): Can't unpack CVD file.\n");
416
-	free(dir);
417
-	return CL_ECVDEXTR;
418
-    }
405
+    if(options & CL_DB_CVDNOTMP) {
419 406
 
420
-    /* load extracted directory */
421
-    ret = cl_load(dir, engine, signo, options);
407
+	return cli_tgzload(cfd, engine, signo, options);
422 408
 
423
-    cli_rmdirs(dir);
424
-    free(dir);
409
+    } else {
425 410
 
426
-    return ret;
411
+	dir = cli_gentemp(NULL);
412
+	if(mkdir(dir, 0700)) {
413
+	    cli_errmsg("cli_cvdload(): Can't create temporary directory %s\n", dir);
414
+	    free(dir);
415
+	    return CL_ETMPDIR;
416
+	}
417
+
418
+	if(cli_untgz(cfd, dir)) {
419
+	    cli_errmsg("cli_cvdload(): Can't unpack CVD file.\n");
420
+	    free(dir);
421
+	    return CL_ECVDEXTR;
422
+	}
423
+
424
+	/* load extracted directory */
425
+	ret = cl_load(dir, engine, signo, options);
426
+
427
+	cli_rmdirs(dir);
428
+	free(dir);
429
+
430
+	return ret;
431
+    }
427 432
 }
... ...
@@ -24,6 +24,7 @@
24 24
 #include <string.h>
25 25
 #include <stdlib.h>
26 26
 #include <ctype.h>
27
+#include <zlib.h>
27 28
 
28 29
 #include "clamav.h"
29 30
 #include "cltypes.h"
... ...
@@ -259,7 +260,7 @@ static int chkflevel(const char *entry, int field)
259 259
     return 1;
260 260
 }
261 261
 
262
-int cli_dconf_load(FILE *fd, struct cl_engine **engine, unsigned int options)
262
+int cli_dconf_load(FILE *fs, struct cl_engine **engine, unsigned int options, gzFile *gzs, unsigned int gzrsize)
263 263
 {
264 264
 	char buffer[FILEBUFF];
265 265
 	unsigned int line = 0;
... ...
@@ -275,7 +276,7 @@ int cli_dconf_load(FILE *fd, struct cl_engine **engine, unsigned int options)
275 275
 
276 276
     dconf = (struct cli_dconf *) (*engine)->dconf;
277 277
 
278
-    while(fgets(buffer, FILEBUFF, fd)) {
278
+    while(cli_dbgets(buffer, FILEBUFF, fs, gzs, &gzrsize)) {
279 279
 	line++;
280 280
 	cli_chomp(buffer);
281 281
 
... ...
@@ -20,6 +20,7 @@
20 20
 #define __DCONF_H
21 21
 
22 22
 #include <stdio.h>
23
+#include <zlib.h>
23 24
 
24 25
 #include "clamav.h"
25 26
 #include "cltypes.h"
... ...
@@ -91,6 +92,5 @@ struct cli_dconf {
91 91
 
92 92
 struct cli_dconf *cli_dconf_init(void);
93 93
 void cli_dconf_print(struct cli_dconf *dconf);
94
-int cli_dconf_load(FILE *fd, struct cl_engine **engine, unsigned int options);
95
-
94
+int cli_dconf_load(FILE *fs, struct cl_engine **engine, unsigned int options, gzFile *gzs, unsigned int gzrsize);
96 95
 #endif
... ...
@@ -40,6 +40,7 @@
40 40
 #include <sys/param.h>
41 41
 #endif
42 42
 #include <fcntl.h>
43
+#include <zlib.h>
43 44
 
44 45
 #include "clamav.h"
45 46
 #include "cvd.h"
... ...
@@ -349,7 +350,28 @@ static int cli_initroots(struct cl_engine *engine, unsigned int options)
349 349
     return CL_SUCCESS;
350 350
 }
351 351
 
352
-static int cli_loaddb(FILE *fd, struct cl_engine **engine, unsigned int *signo, unsigned int options)
352
+char *cli_dbgets(char *buff, unsigned int size, FILE *fs, gzFile *gzs, unsigned int *gzrsize)
353
+{
354
+    if(fs) {
355
+	return fgets(buff, size, fs);
356
+
357
+    } else {
358
+	    char *pt;
359
+	    unsigned int bs;
360
+
361
+	if(!*gzrsize)
362
+	    return NULL;
363
+
364
+	bs = *gzrsize < size ? *gzrsize + 1 : size;
365
+	pt = gzgets(gzs, buff, bs);
366
+	*gzrsize -= strlen(buff);
367
+	if(!pt)
368
+	    cli_errmsg("cli_dbgets: Preliminary end of data\n");
369
+	return pt;
370
+    }
371
+}
372
+
373
+static int cli_loaddb(FILE *fs, struct cl_engine **engine, unsigned int *signo, unsigned int options, gzFile *gzs, unsigned int gzrsize)
353 374
 {
354 375
 	char buffer[FILEBUFF], *pt, *start;
355 376
 	int line = 0, ret = 0;
... ...
@@ -368,7 +390,7 @@ static int cli_loaddb(FILE *fd, struct cl_engine **engine, unsigned int *signo,
368 368
 
369 369
     root = (*engine)->root[0];
370 370
 
371
-    while(fgets(buffer, FILEBUFF, fd)) {
371
+    while(cli_dbgets(buffer, FILEBUFF, fs, gzs, &gzrsize)) {
372 372
 	line++;
373 373
 	cli_chomp(buffer);
374 374
 
... ...
@@ -409,7 +431,7 @@ static int cli_loaddb(FILE *fd, struct cl_engine **engine, unsigned int *signo,
409 409
     return CL_SUCCESS;
410 410
 }
411 411
 
412
-static int cli_loadwdb(FILE *fd, struct cl_engine **engine, unsigned int options)
412
+static int cli_loadwdb(FILE *fs, struct cl_engine **engine, unsigned int options, gzFile *gzs, unsigned int gzrsize)
413 413
 {
414 414
 	int ret = 0;
415 415
 
... ...
@@ -430,7 +452,7 @@ static int cli_loadwdb(FILE *fd, struct cl_engine **engine, unsigned int options
430 430
 	}
431 431
     }
432 432
 
433
-    if((ret = load_regex_matcher((*engine)->whitelist_matcher, fd, options, 1))) {
433
+    if((ret = load_regex_matcher((*engine)->whitelist_matcher, fs, options, 1, gzs, gzrsize))) {
434 434
 	phishing_done(*engine);
435 435
 	cl_free(*engine);
436 436
 	return ret;
... ...
@@ -439,7 +461,7 @@ static int cli_loadwdb(FILE *fd, struct cl_engine **engine, unsigned int options
439 439
     return CL_SUCCESS;
440 440
 }
441 441
 
442
-static int cli_loadpdb(FILE *fd, struct cl_engine **engine, unsigned int options)
442
+static int cli_loadpdb(FILE *fs, struct cl_engine **engine, unsigned int options, gzFile *gzs, unsigned int gzrsize)
443 443
 {
444 444
 	int ret = 0;
445 445
 
... ...
@@ -460,7 +482,7 @@ static int cli_loadpdb(FILE *fd, struct cl_engine **engine, unsigned int options
460 460
 	}
461 461
     }
462 462
 
463
-    if((ret = load_regex_matcher((*engine)->domainlist_matcher, fd, options, 0))) {
463
+    if((ret = load_regex_matcher((*engine)->domainlist_matcher, fs, options, 0, gzs, gzrsize))) {
464 464
 	phishing_done(*engine);
465 465
 	cl_free(*engine);
466 466
 	return ret;
... ...
@@ -470,7 +492,7 @@ static int cli_loadpdb(FILE *fd, struct cl_engine **engine, unsigned int options
470 470
 }
471 471
 
472 472
 #define NDB_TOKENS 6
473
-static int cli_loadndb(FILE *fd, struct cl_engine **engine, unsigned int *signo, unsigned short sdb, unsigned int options)
473
+static int cli_loadndb(FILE *fs, struct cl_engine **engine, unsigned int *signo, unsigned short sdb, unsigned int options, gzFile *gzs, unsigned int gzrsize)
474 474
 {
475 475
 	const char *tokens[NDB_TOKENS];
476 476
 	char buffer[FILEBUFF];
... ...
@@ -491,7 +513,7 @@ static int cli_loadndb(FILE *fd, struct cl_engine **engine, unsigned int *signo,
491 491
 	return ret;
492 492
     }
493 493
 
494
-    while(fgets(buffer, FILEBUFF, fd)) {
494
+    while(cli_dbgets(buffer, FILEBUFF, fs, gzs, &gzrsize)) {
495 495
 	line++;
496 496
 
497 497
 	if(!strncmp(buffer, "Exploit.JPEG.Comment", 20)) /* temporary */
... ...
@@ -596,7 +618,7 @@ static int cli_loadndb(FILE *fd, struct cl_engine **engine, unsigned int *signo,
596 596
 }
597 597
 
598 598
 #define FT_TOKENS 4
599
-static int cli_loadft(FILE *fd, struct cl_engine **engine, unsigned int options, unsigned int internal)
599
+static int cli_loadft(FILE *fs, struct cl_engine **engine, unsigned int options, unsigned int internal, gzFile *gzs, unsigned int gzrsize)
600 600
 {
601 601
 	const char *tokens[FT_TOKENS];
602 602
 	char buffer[FILEBUFF];
... ...
@@ -622,7 +644,7 @@ static int cli_loadft(FILE *fd, struct cl_engine **engine, unsigned int options,
622 622
 		break;
623 623
 	    strncpy(buffer, ftypes_int[line], sizeof(buffer));
624 624
 	} else {
625
-	    if(!fgets(buffer, FILEBUFF, fd))
625
+	    if(!cli_dbgets(buffer, FILEBUFF, fs, gzs, &gzrsize))
626 626
 		break;
627 627
 	    cli_chomp(buffer);
628 628
 	}
... ...
@@ -730,7 +752,7 @@ static int cli_md5db_init(struct cl_engine **engine, unsigned int mode)
730 730
     else			    \
731 731
 	db = (*engine)->md5_fp;
732 732
 
733
-static int cli_loadmd5(FILE *fd, struct cl_engine **engine, unsigned int *signo, unsigned int mode, unsigned int options)
733
+static int cli_loadmd5(FILE *fs, struct cl_engine **engine, unsigned int *signo, unsigned int mode, unsigned int options, gzFile *gzs, unsigned int gzrsize)
734 734
 {
735 735
 	char buffer[FILEBUFF], *pt;
736 736
 	int ret = CL_SUCCESS;
... ...
@@ -750,7 +772,7 @@ static int cli_loadmd5(FILE *fd, struct cl_engine **engine, unsigned int *signo,
750 750
 	md5_field = 1;
751 751
     }
752 752
 
753
-    while(fgets(buffer, FILEBUFF, fd)) {
753
+    while(cli_dbgets(buffer, FILEBUFF, fs, gzs, &gzrsize)) {
754 754
 	line++;
755 755
 	cli_chomp(buffer);
756 756
 
... ...
@@ -852,7 +874,7 @@ static int cli_loadmd5(FILE *fd, struct cl_engine **engine, unsigned int *signo,
852 852
     return CL_SUCCESS;
853 853
 }
854 854
 
855
-static int cli_loadmd(FILE *fd, struct cl_engine **engine, unsigned int *signo, int type, unsigned int options)
855
+static int cli_loadmd(FILE *fs, struct cl_engine **engine, unsigned int *signo, int type, unsigned int options, gzFile *gzs, unsigned int gzrsize)
856 856
 {
857 857
 	char buffer[FILEBUFF], *pt;
858 858
 	int line = 0, comments = 0, ret = 0, crc32;
... ...
@@ -864,7 +886,7 @@ static int cli_loadmd(FILE *fd, struct cl_engine **engine, unsigned int *signo,
864 864
 	return ret;
865 865
     }
866 866
 
867
-    while(fgets(buffer, FILEBUFF, fd)) {
867
+    while(cli_dbgets(buffer, FILEBUFF, fs, gzs, &gzrsize)) {
868 868
 	line++;
869 869
 	if(buffer[0] == '#') {
870 870
 	    comments++;
... ...
@@ -1026,20 +1048,20 @@ static int cli_loadmd(FILE *fd, struct cl_engine **engine, unsigned int *signo,
1026 1026
 
1027 1027
 static int cli_loaddbdir(const char *dirname, struct cl_engine **engine, unsigned int *signo, unsigned int options);
1028 1028
 
1029
-static int cli_load(const char *filename, struct cl_engine **engine, unsigned int *signo, unsigned int options)
1029
+int cli_load(const char *filename, struct cl_engine **engine, unsigned int *signo, unsigned int options, gzFile *gzs, unsigned int gzrsize)
1030 1030
 {
1031
-	FILE *fd;
1031
+	FILE *fs = NULL;
1032 1032
 	int ret = CL_SUCCESS;
1033 1033
 	uint8_t skipped = 0;
1034 1034
 
1035 1035
 
1036
-    if((fd = fopen(filename, "rb")) == NULL) {
1036
+    if(!gzs && (fs = fopen(filename, "rb")) == NULL) {
1037 1037
 	cli_errmsg("cli_load(): Can't open file %s\n", filename);
1038 1038
 	return CL_EOPEN;
1039 1039
     }
1040 1040
 
1041 1041
     if(cli_strbcasestr(filename, ".db")) {
1042
-	ret = cli_loaddb(fd, engine, signo, options);
1042
+	ret = cli_loaddb(fs, engine, signo, options, gzs, gzrsize);
1043 1043
 
1044 1044
     } else if(cli_strbcasestr(filename, ".cvd")) {
1045 1045
 	    int warn = 0;
... ...
@@ -1047,66 +1069,66 @@ static int cli_load(const char *filename, struct cl_engine **engine, unsigned in
1047 1047
 	if(strstr(filename, "daily.cvd"))
1048 1048
 	    warn = 1;
1049 1049
 
1050
-	ret = cli_cvdload(fd, engine, signo, warn, options);
1050
+	ret = cli_cvdload(fs, engine, signo, warn, options);
1051 1051
 
1052 1052
     } else if(cli_strbcasestr(filename, ".hdb")) {
1053
-	ret = cli_loadmd5(fd, engine, signo, MD5_HDB, options);
1053
+	ret = cli_loadmd5(fs, engine, signo, MD5_HDB, options, gzs, gzrsize);
1054 1054
 
1055 1055
     } else if(cli_strbcasestr(filename, ".hdu")) {
1056 1056
 	if(options & CL_DB_PUA)
1057
-	    ret = cli_loadmd5(fd, engine, signo, MD5_HDB, options);
1057
+	    ret = cli_loadmd5(fs, engine, signo, MD5_HDB, options, gzs, gzrsize);
1058 1058
 	else
1059 1059
 	    skipped = 1;
1060 1060
 
1061 1061
     } else if(cli_strbcasestr(filename, ".fp")) {
1062
-	ret = cli_loadmd5(fd, engine, signo, MD5_FP, options);
1062
+	ret = cli_loadmd5(fs, engine, signo, MD5_FP, options, gzs, gzrsize);
1063 1063
 
1064 1064
     } else if(cli_strbcasestr(filename, ".mdb")) {
1065
-	ret = cli_loadmd5(fd, engine, signo, MD5_MDB, options);
1065
+	ret = cli_loadmd5(fs, engine, signo, MD5_MDB, options, gzs, gzrsize);
1066 1066
 
1067 1067
     } else if(cli_strbcasestr(filename, ".mdu")) {
1068 1068
 	if(options & CL_DB_PUA)
1069
-	    ret = cli_loadmd5(fd, engine, signo, MD5_MDB, options);
1069
+	    ret = cli_loadmd5(fs, engine, signo, MD5_MDB, options, gzs, gzrsize);
1070 1070
 	else
1071 1071
 	    skipped = 1;
1072 1072
 
1073 1073
     } else if(cli_strbcasestr(filename, ".ndb")) {
1074
-	ret = cli_loadndb(fd, engine, signo, 0, options);
1074
+	ret = cli_loadndb(fs, engine, signo, 0, options, gzs, gzrsize);
1075 1075
 
1076 1076
     } else if(cli_strbcasestr(filename, ".ndu")) {
1077 1077
 	if(!(options & CL_DB_PUA))
1078 1078
 	    skipped = 1;
1079 1079
 	else
1080
-	    ret = cli_loadndb(fd, engine, signo, 0, options);
1080
+	    ret = cli_loadndb(fs, engine, signo, 0, options, gzs, gzrsize);
1081 1081
 
1082 1082
     } else if(cli_strbcasestr(filename, ".sdb")) {
1083
-	ret = cli_loadndb(fd, engine, signo, 1, options);
1083
+	ret = cli_loadndb(fs, engine, signo, 1, options, gzs, gzrsize);
1084 1084
 
1085 1085
     } else if(cli_strbcasestr(filename, ".zmd")) {
1086
-	ret = cli_loadmd(fd, engine, signo, 1, options);
1086
+	ret = cli_loadmd(fs, engine, signo, 1, options, gzs, gzrsize);
1087 1087
 
1088 1088
     } else if(cli_strbcasestr(filename, ".rmd")) {
1089
-	ret = cli_loadmd(fd, engine, signo, 2, options);
1089
+	ret = cli_loadmd(fs, engine, signo, 2, options, gzs, gzrsize);
1090 1090
 
1091 1091
     } else if(cli_strbcasestr(filename, ".cfg")) {
1092
-	ret = cli_dconf_load(fd, engine, options);
1092
+	ret = cli_dconf_load(fs, engine, options, gzs, gzrsize);
1093 1093
 
1094 1094
     } else if(cli_strbcasestr(filename, ".wdb")) {
1095
-	if(options & CL_DB_PHISHING_URLS)
1096
-	    ret = cli_loadwdb(fd, engine, options);
1097
-	else
1095
+	if(options & CL_DB_PHISHING_URLS) {
1096
+	    ret = cli_loadwdb(fs, engine, options, gzs, gzrsize);
1097
+	} else
1098 1098
 	    skipped = 1;
1099 1099
     } else if(cli_strbcasestr(filename, ".pdb")) {
1100
-	if(options & CL_DB_PHISHING_URLS)
1101
-	    ret = cli_loadpdb(fd, engine, options);
1102
-	else
1100
+	if(options & CL_DB_PHISHING_URLS) {
1101
+	    ret = cli_loadpdb(fs, engine, options, gzs, gzrsize);
1102
+	} else
1103 1103
 	    skipped = 1;
1104 1104
     } else if(cli_strbcasestr(filename, ".ft")) {
1105
-	ret = cli_loadft(fd, engine, options, 0);
1105
+	ret = cli_loadft(fs, engine, options, 0, gzs, gzrsize);
1106 1106
 
1107 1107
     } else {
1108 1108
 	cli_dbgmsg("cli_load: unknown extension - assuming old database format\n");
1109
-	ret = cli_loaddb(fd, engine, signo, options);
1109
+	ret = cli_loaddb(fs, engine, signo, options, gzs, gzrsize);
1110 1110
     }
1111 1111
 
1112 1112
     if(ret) {
... ...
@@ -1118,36 +1140,16 @@ static int cli_load(const char *filename, struct cl_engine **engine, unsigned in
1118 1118
 	    cli_dbgmsg("%s loaded\n", filename);
1119 1119
     }
1120 1120
 
1121
-    fclose(fd);
1121
+    if(fs)
1122
+	fclose(fs);
1123
+
1122 1124
     return ret;
1123 1125
 }
1124 1126
 
1125 1127
 int cl_loaddb(const char *filename, struct cl_engine **engine, unsigned int *signo) {
1126
-    return cli_load(filename, engine, signo, CL_DB_STDOPT);
1128
+    return cli_load(filename, engine, signo, CL_DB_STDOPT, NULL, 0);
1127 1129
 }
1128 1130
 
1129
-#define CLI_DBEXT(ext)				\
1130
-    (						\
1131
-	cli_strbcasestr(ext, ".db")    ||	\
1132
-	cli_strbcasestr(ext, ".db2")   ||	\
1133
-	cli_strbcasestr(ext, ".db3")   ||	\
1134
-	cli_strbcasestr(ext, ".hdb")   ||	\
1135
-	cli_strbcasestr(ext, ".hdu")   ||	\
1136
-	cli_strbcasestr(ext, ".fp")    ||	\
1137
-	cli_strbcasestr(ext, ".mdb")   ||	\
1138
-	cli_strbcasestr(ext, ".mdu")   ||	\
1139
-	cli_strbcasestr(ext, ".ndb")   ||	\
1140
-	cli_strbcasestr(ext, ".ndu")   ||	\
1141
-	cli_strbcasestr(ext, ".sdb")   ||	\
1142
-	cli_strbcasestr(ext, ".zmd")   ||	\
1143
-	cli_strbcasestr(ext, ".rmd")   ||	\
1144
-	cli_strbcasestr(ext, ".pdb")   ||	\
1145
-	cli_strbcasestr(ext, ".wdb")   ||	\
1146
-	cli_strbcasestr(ext, ".ft")    ||	\
1147
-	cli_strbcasestr(ext, ".inc")   ||	\
1148
-	cli_strbcasestr(ext, ".cvd")		\
1149
-    )
1150
-
1151 1131
 static int cli_loaddbdir_l(const char *dirname, struct cl_engine **engine, unsigned int *signo, unsigned int options)
1152 1132
 {
1153 1133
 	DIR *dd;
... ...
@@ -1171,7 +1173,7 @@ static int cli_loaddbdir_l(const char *dirname, struct cl_engine **engine, unsig
1171 1171
 	return CL_EMEM;
1172 1172
     sprintf(dbfile, "%s/daily.cfg", dirname);
1173 1173
     if(stat(dbfile, &sb) != -1) {
1174
-	if((ret = cli_load(dbfile, engine, signo, options))) {
1174
+	if((ret = cli_load(dbfile, engine, signo, options, NULL, 0))) {
1175 1175
 	    free(dbfile);
1176 1176
 	    return ret;
1177 1177
 	}
... ...
@@ -1208,7 +1210,7 @@ static int cli_loaddbdir_l(const char *dirname, struct cl_engine **engine, unsig
1208 1208
 		if(cli_strbcasestr(dbfile, ".inc"))
1209 1209
 		    ret = cli_loaddbdir(dbfile, engine, signo, options);
1210 1210
 		else
1211
-		    ret = cli_load(dbfile, engine, signo, options);
1211
+		    ret = cli_load(dbfile, engine, signo, options, NULL, 0);
1212 1212
 
1213 1213
 		if(ret) {
1214 1214
 		    cli_dbgmsg("cli_loaddbdir(): error loading database %s\n", dbfile);
... ...
@@ -1277,7 +1279,7 @@ int cl_load(const char *path, struct cl_engine **engine, unsigned int *signo, un
1277 1277
 
1278 1278
     switch(sb.st_mode & S_IFMT) {
1279 1279
 	case S_IFREG: 
1280
-	    ret = cli_load(path, engine, signo, options);
1280
+	    ret = cli_load(path, engine, signo, options, NULL, 0);
1281 1281
 	    break;
1282 1282
 
1283 1283
 	case S_IFDIR:
... ...
@@ -1603,7 +1605,7 @@ int cl_build(struct cl_engine *engine)
1603 1603
 	return CL_ENULLARG;
1604 1604
 
1605 1605
     if(!engine->ftypes)
1606
-	if((ret = cli_loadft(NULL, &engine, 0, 1)))
1606
+	if((ret = cli_loadft(NULL, &engine, 0, 1, NULL, 0)))
1607 1607
 	    return ret;
1608 1608
 
1609 1609
     for(i = 0; i < CL_TARGET_TABLE_SIZE; i++)
... ...
@@ -19,11 +19,41 @@
19 19
 #ifndef __READDB_H
20 20
 #define __READDB_H
21 21
 
22
+#include <zlib.h>
23
+
22 24
 #include "clamav.h"
23 25
 #include "matcher.h"
26
+#include "str.h"
27
+
28
+#define CLI_DBEXT(ext)				\
29
+    (						\
30
+	cli_strbcasestr(ext, ".db")    ||	\
31
+	cli_strbcasestr(ext, ".db2")   ||	\
32
+	cli_strbcasestr(ext, ".db3")   ||	\
33
+	cli_strbcasestr(ext, ".hdb")   ||	\
34
+	cli_strbcasestr(ext, ".hdu")   ||	\
35
+	cli_strbcasestr(ext, ".fp")    ||	\
36
+	cli_strbcasestr(ext, ".mdb")   ||	\
37
+	cli_strbcasestr(ext, ".mdu")   ||	\
38
+	cli_strbcasestr(ext, ".ndb")   ||	\
39
+	cli_strbcasestr(ext, ".ndu")   ||	\
40
+	cli_strbcasestr(ext, ".sdb")   ||	\
41
+	cli_strbcasestr(ext, ".zmd")   ||	\
42
+	cli_strbcasestr(ext, ".rmd")   ||	\
43
+	cli_strbcasestr(ext, ".pdb")   ||	\
44
+	cli_strbcasestr(ext, ".wdb")   ||	\
45
+	cli_strbcasestr(ext, ".ft")    ||	\
46
+	cli_strbcasestr(ext, ".inc")   ||	\
47
+	cli_strbcasestr(ext, ".cvd")		\
48
+    )
49
+
24 50
 
25 51
 int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hexsig, unsigned short type, const char *offset, unsigned short target);
26 52
 
27 53
 int cli_initengine(struct cl_engine **engine, unsigned int options);
28 54
 
55
+int cli_load(const char *filename, struct cl_engine **engine, unsigned int *signo, unsigned int options, gzFile *gzs, unsigned int gzrsize);
56
+
57
+char *cli_dbgets(char *buff, unsigned int size, FILE *fs, gzFile *gzs, unsigned int *gzrsize);
58
+
29 59
 #endif
... ...
@@ -48,6 +48,7 @@
48 48
 #include <stdlib.h>
49 49
 #include <string.h>
50 50
 #include <ctype.h>
51
+#include <zlib.h>
51 52
 
52 53
 #include <limits.h>
53 54
 #include <sys/types.h>
... ...
@@ -60,6 +61,7 @@
60 60
 #include "regex_list.h"
61 61
 #include "matcher-ac.h"
62 62
 #include "str.h"
63
+#include "readdb.h"
63 64
 
64 65
 /*Tree*/
65 66
 enum token_op_t {OP_CHAR,OP_STDCLASS,OP_CUSTOMCLASS,OP_DOT,OP_LEAF,OP_ROOT,OP_PARCLOSE};
... ...
@@ -513,13 +515,12 @@ static int functionality_level_check(char* line)
513 513
 
514 514
 
515 515
 /* Load patterns/regexes from file */
516
-int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int options,int is_whitelist)
516
+int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int options,int is_whitelist,gzFile *gzs,unsigned int gzrsize)
517 517
 {
518 518
 	int rc,line=0;
519 519
 	char buffer[FILEBUFF];
520 520
 
521 521
 	massert(matcher);
522
-	massert(fd);
523 522
 
524 523
 	if(matcher->list_inited==-1)
525 524
 		return CL_EMALFDB; /* already failed to load */
... ...
@@ -527,7 +528,7 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio
527 527
 		cli_warnmsg("Regex list has already been loaded, ignoring further requests for load\n");
528 528
 		return CL_SUCCESS;
529 529
 	}*/
530
-	if(!fd) {
530
+	if(!fd && !gzs) {
531 531
 		cli_errmsg("Unable to load regex list (null file)\n");
532 532
 		return CL_EIO;
533 533
 	}
... ...
@@ -562,7 +563,7 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio
562 562
 	 * If a line in the file doesn't conform to this format, loading fails
563 563
 	 * 
564 564
 	 */
565
-	while(fgets(buffer,FILEBUFF,fd)) {
565
+	while(cli_dbgets(buffer, FILEBUFF, fd, gzs, &gzrsize)) {
566 566
 		char* pattern;
567 567
 		char* flags;
568 568
 		cli_chomp(buffer);
... ...
@@ -34,6 +34,7 @@
34 34
 #endif
35 35
 
36 36
 #include "phishcheck.h"
37
+#include <zlib.h> /* for gzFile */
37 38
 struct node_stack {
38 39
 	struct tree_node** data;
39 40
 	size_t capacity;
... ...
@@ -54,7 +55,7 @@ struct regex_matcher {
54 54
 
55 55
 int regex_list_match(struct regex_matcher* matcher, char* real_url,const char* display_url,const struct pre_fixup_info* pre_fixup, int hostOnly,const char** info,int is_whitelist);
56 56
 int init_regex_list(struct regex_matcher* matcher);
57
-int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int options,int is_whitelist);
57
+int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int options,int is_whitelist,gzFile *gzs,unsigned int gzrsize);
58 58
 void regex_list_cleanup(struct regex_matcher* matcher);
59 59
 void regex_list_done(struct regex_matcher* matcher);
60 60
 int is_regex_ok(struct regex_matcher* matcher);