Browse code

libclamav: add support for loading specific PUA categories (bb#939) clamd: new options ExcludePUA and IncludePUA clamscan: new switches --exclude-pua and --include-pua

git-svn: trunk@4046

Tomasz Kojm authored on 2008/08/01 01:26:50
Showing 9 changed files
... ...
@@ -1,3 +1,9 @@
1
+Thu Jul 31 18:09:58 CEST 2008 (tk)
2
+----------------------------------
3
+  * libclamav: add support for loading specific PUA categories (bb#939)
4
+  * clamd: new options ExcludePUA and IncludePUA
5
+  * clamscan: new switches --exclude-pua and --include-pua
6
+
1 7
 Thu Jul 31 13:35:11 EEST 2008 (edwin)
2 8
 -------------------------------------
3 9
   * clamd, clamscan, libclamav: new option HeuristicScanPrecedence (bb #649)
... ...
@@ -97,7 +97,8 @@ int main(int argc, char **argv)
97 97
 	time_t currtime;
98 98
 	struct cl_engine *engine = NULL;
99 99
 	const char *dbdir, *cfgfile;
100
-	int ret, tcpsock = 0, localsock = 0;
100
+	char *pua_cats = NULL;
101
+	int ret, tcpsock = 0, localsock = 0, i;
101 102
 	unsigned int sigs = 0;
102 103
 	int lsockets[2], nlsockets = 0;
103 104
 	unsigned int dboptions = 0;
... ...
@@ -302,10 +303,74 @@ int main(int argc, char **argv)
302 302
     dbdir = cfgopt(copt, "DatabaseDirectory")->strarg;
303 303
     logg("#Reading databases from %s\n", dbdir);
304 304
 
305
-    if(cfgopt(copt, "DetectPUA")->enabled)
305
+    if(cfgopt(copt, "DetectPUA")->enabled) {
306 306
 	dboptions |= CL_DB_PUA;
307
-    else
307
+
308
+	if((cpt = cfgopt(copt, "ExcludePUA"))->enabled) {
309
+	    dboptions |= CL_DB_PUA_EXCLUDE;
310
+	    i = 0;
311
+	    logg("#Excluded PUA categories:");
312
+	    while(cpt) {
313
+		if(!(pua_cats = realloc(pua_cats, i + strlen(cpt->strarg) + 3))) {
314
+		    logg("!Can't allocate memory for pua_cats\n");
315
+		    logg_close();
316
+		    freecfg(copt);
317
+		    return 1;
318
+		}
319
+		logg("# %s", cpt->strarg);
320
+		sprintf(pua_cats + i, ".%s", cpt->strarg);
321
+		i += strlen(cpt->strarg) + 1;
322
+		pua_cats[i] = 0;
323
+		cpt = cpt->nextarg;
324
+	    }
325
+	    logg("#\n");
326
+	    pua_cats[i] = '.';
327
+	    pua_cats[i + 1] = 0;
328
+	}
329
+
330
+	if((cpt = cfgopt(copt, "IncludePUA"))->enabled) {
331
+	    if(pua_cats) {
332
+		logg("!ExcludePUA and IncludePUA cannot be used at the same time\n");
333
+		logg_close();
334
+		freecfg(copt);
335
+		free(pua_cats);
336
+		return 1;
337
+	    }
338
+	    dboptions |= CL_DB_PUA_INCLUDE;
339
+	    i = 0;
340
+	    logg("#Included PUA categories:");
341
+	    while(cpt) {
342
+		if(!(pua_cats = realloc(pua_cats, i + strlen(cpt->strarg) + 3))) {
343
+		    logg("!Can't allocate memory for pua_cats\n");
344
+		    logg_close();
345
+		    freecfg(copt);
346
+		    return 1;
347
+		}
348
+		logg("# %s", cpt->strarg);
349
+		sprintf(pua_cats + i, ".%s", cpt->strarg);
350
+		i += strlen(cpt->strarg) + 1;
351
+		pua_cats[i] = 0;
352
+		cpt = cpt->nextarg;
353
+	    }
354
+	    logg("#\n");
355
+	    pua_cats[i] = '.';
356
+	    pua_cats[i + 1] = 0;
357
+	}
358
+
359
+	if(pua_cats) {
360
+	    /* FIXME with the new API */
361
+	    if((ret = cli_initengine(&engine, dboptions))) {
362
+		logg("!cli_initengine() failed: %s\n", cl_strerror(ret));
363
+		logg_close();
364
+		freecfg(copt);
365
+		free(pua_cats);
366
+		return 1;
367
+	    }
368
+	    engine->pua_cats = pua_cats;
369
+	}
370
+    } else {
308 371
 	logg("#Not loading PUA signatures.\n");
372
+    }
309 373
 
310 374
     if(cfgopt(copt, "PhishingSignatures")->enabled)
311 375
 	dboptions |= CL_DB_PHISHING;
... ...
@@ -194,6 +194,7 @@ static struct cl_engine *reload_db(struct cl_engine *engine, unsigned int dbopti
194 194
 	const char *dbdir;
195 195
 	int retval;
196 196
 	unsigned int sigs = 0;
197
+	char *pua_cats = NULL;
197 198
 
198 199
     *ret = 0;
199 200
     if(do_check) {
... ...
@@ -211,12 +212,6 @@ static struct cl_engine *reload_db(struct cl_engine *engine, unsigned int dbopti
211 211
 	}
212 212
     }
213 213
 
214
-    /* release old structure */
215
-    if(engine) {
216
-	cl_free(engine);
217
-	engine = NULL;
218
-    }
219
-
220 214
     dbdir = cfgopt(copt, "DatabaseDirectory")->strarg;
221 215
     logg("Reading databases from %s\n", dbdir);
222 216
 
... ...
@@ -238,6 +233,26 @@ static struct cl_engine *reload_db(struct cl_engine *engine, unsigned int dbopti
238 238
 	return NULL;
239 239
     }
240 240
 
241
+    /* release old structure */
242
+    if(engine) {
243
+	if(engine->pua_cats)
244
+	    if(!(pua_cats = strdup(engine->pua_cats)))
245
+		logg("^Can't make a copy of pua_cats\n");
246
+
247
+	cl_free(engine);
248
+	engine = NULL;
249
+    }
250
+
251
+    if(pua_cats) {
252
+	if((retval = cli_initengine(&engine, dboptions))) {
253
+	    logg("!cli_initengine() failed: %s\n", cl_strerror(retval));
254
+	    *ret = 1;
255
+	    free(pua_cats);
256
+	    return NULL;
257
+	}
258
+	engine->pua_cats = pua_cats;
259
+    }
260
+
241 261
     if((retval = cl_load(dbdir, &engine, &sigs, dboptions))) {
242 262
 	logg("!reload db failed: %s\n", cl_strerror(retval));
243 263
 	*ret = 1;
... ...
@@ -315,6 +315,8 @@ void help(void)
315 315
 #endif
316 316
     mprintf("\n");
317 317
     mprintf("    --detect-pua                         Detect Possibly Unwanted Applications\n");
318
+    mprintf("    --exclude-pua=CAT                    Skip PUA sigs of category CAT\n");
319
+    mprintf("    --include-pua=CAT                    Load PUA sigs of category CAT\n");
318 320
     mprintf("    --detect-structured                  Detect structured data (SSN, Credit Card)\n");
319 321
     mprintf("    --structured-ssn-format=X            SSN format (0=normal,1=stripped,2=both)\n");
320 322
     mprintf("    --structured-ssn-count=N             Min SSN count to generate a detect\n");
... ...
@@ -59,6 +59,8 @@ static struct option clamscan_longopt[] = {
59 59
     {"max-recursion", 1, 0, 0},
60 60
     {"max-dir-recursion", 1, 0, 0},
61 61
     {"detect-pua", 0, 0, 0},
62
+    {"exclude-pua", 1, 0, 0},
63
+    {"include-pua", 1, 0, 0},
62 64
     {"detect-structured", 0, 0, 0},
63 65
     {"structured-ssn-format", 1, 0, 0},
64 66
     {"structured-ssn-count", 1, 0, 0},
... ...
@@ -344,7 +344,8 @@ int scanmanager(const struct optstruct *opt)
344 344
 	struct cl_engine *engine = NULL;
345 345
 	struct cl_limits limits;
346 346
 	struct stat sb;
347
-	char *file, cwd[1024];
347
+	char *file, cwd[1024], *pua_cats = NULL, *argument;
348
+	const struct optnode *optnode;
348 349
 #ifndef C_WINDOWS
349 350
 	struct rlimit rlim;
350 351
 #endif
... ...
@@ -371,9 +372,61 @@ int scanmanager(const struct optstruct *opt)
371 371
     if(opt_check(opt, "dev-ac-depth"))
372 372
 	cli_ac_setdepth(AC_DEFAULT_MIN_DEPTH, atoi(opt_arg(opt, "dev-ac-depth")));
373 373
 
374
-    if(opt_check(opt, "detect-pua"))
374
+    if(opt_check(opt, "detect-pua")) {
375 375
 	dboptions |= CL_DB_PUA;
376 376
 
377
+	if(opt_check(opt, "exclude-pua")) {
378
+	    dboptions |= CL_DB_PUA_EXCLUDE;
379
+	    argument = opt_firstarg(opt, "exclude-pua", &optnode);
380
+	    i = 0;
381
+	    while(argument) {
382
+		if(!(pua_cats = realloc(pua_cats, i + strlen(argument) + 3))) {
383
+		    logg("!Can't allocate memory for pua_cats\n");
384
+		    return 70;
385
+		}
386
+		sprintf(pua_cats + i, ".%s", argument);
387
+		i += strlen(argument) + 1;
388
+		pua_cats[i] = 0;
389
+		argument = opt_nextarg(&optnode, "exclude-pua");
390
+	    }
391
+	    pua_cats[i] = '.';
392
+	    pua_cats[i + 1] = 0;
393
+	}
394
+
395
+	if(opt_check(opt, "include-pua")) {
396
+	    if(pua_cats) {
397
+		logg("!--exclude-pua and --include-pua cannot be used at the same time\n");
398
+		free(pua_cats);
399
+		return 40;
400
+	    }
401
+	    dboptions |= CL_DB_PUA_INCLUDE;
402
+	    argument = opt_firstarg(opt, "include-pua", &optnode);
403
+	    i = 0;
404
+	    while(argument) {
405
+		if(!(pua_cats = realloc(pua_cats, i + strlen(argument) + 3))) {
406
+		    logg("!Can't allocate memory for pua_cats\n");
407
+		    return 70;
408
+		}
409
+		sprintf(pua_cats + i, ".%s", argument);
410
+		i += strlen(argument) + 1;
411
+		pua_cats[i] = 0;
412
+		argument = opt_nextarg(&optnode, "include-pua");
413
+	    }
414
+	    pua_cats[i] = '.';
415
+	    pua_cats[i + 1] = 0;
416
+	}
417
+
418
+	if(pua_cats) {
419
+	    /* FIXME with the new API */
420
+	    if((ret = cli_initengine(&engine, dboptions))) {
421
+		logg("!cli_initengine() failed: %s\n", cl_strerror(ret));
422
+		free(pua_cats);
423
+		return 50;
424
+	    }
425
+	    engine->pua_cats = pua_cats;
426
+	}
427
+    }
428
+
377 429
     if(opt_check(opt, "database")) {
378 430
 	if((ret = cl_load(opt_arg(opt, "database"), &engine, &info.sigs, dboptions))) {
379 431
 	    logg("!%s\n", cl_strerror(ret));
... ...
@@ -72,6 +72,9 @@ extern "C"
72 72
 #define CL_DB_PUA	    0x10
73 73
 #define CL_DB_CVDNOTMP	    0x20
74 74
 #define CL_DB_OFFICIAL	    0x40
75
+#define CL_DB_PUA_MODE	    0x80
76
+#define CL_DB_PUA_INCLUDE   0x100
77
+#define CL_DB_PUA_EXCLUDE   0x200
75 78
 
76 79
 /* recommended db settings */
77 80
 #define CL_DB_STDOPT	    (CL_DB_PHISHING | CL_DB_PHISHING_URLS)
... ...
@@ -146,6 +149,9 @@ struct cl_engine {
146 146
 
147 147
     /* Ignored signatures */
148 148
     void *ignored;
149
+
150
+    /* PUA categories (to be included or excluded) */
151
+    char *pua_cats;
149 152
 };
150 153
 
151 154
 struct cl_limits {
... ...
@@ -448,6 +448,42 @@ static int cli_chkign(const struct cli_ignored *ignored, const char *dbname, uns
448 448
     return 0;
449 449
 }
450 450
 
451
+static int cli_chkpua(const char *signame, const char *pua_cats, unsigned int options)
452
+{
453
+	char cat[32], *pt;
454
+	const char *sig;
455
+	int ret;
456
+
457
+    if(strncmp(signame, "PUA.", 4)) {
458
+	cli_dbgmsg("Skipping signature %s - no PUA prefix\n", signame);
459
+	return 1;
460
+    }
461
+    sig = signame + 3;
462
+    if(!(pt = strchr(sig + 1, '.'))) {
463
+	cli_dbgmsg("Skipping signature %s - bad syntax\n", signame);
464
+	return 1;
465
+    }
466
+
467
+    if(pt - sig + 2 > sizeof(cat)) {
468
+	cli_dbgmsg("Skipping signature %s - too long category name\n", signame);
469
+	return 1;
470
+    }
471
+
472
+    strncpy(cat, sig, pt - signame + 1);
473
+    cat[pt - sig + 1] = 0;
474
+    pt = strstr(pua_cats, cat);
475
+
476
+    if(options & CL_DB_PUA_INCLUDE)
477
+	ret = pt ? 0 : 1;
478
+    else
479
+	ret = pt ? 1 : 0;
480
+
481
+    if(ret)
482
+	cli_dbgmsg("Skipping PUA signature %s - excluded category\n", signame);
483
+
484
+    return ret;
485
+}
486
+
451 487
 static int cli_loaddb(FILE *fs, struct cl_engine **engine, unsigned int *signo, unsigned int options, struct cli_dbio *dbio, const char *dbname)
452 488
 {
453 489
 	char buffer[FILEBUFF], *pt, *start;
... ...
@@ -613,6 +649,10 @@ static int cli_loadndb(FILE *fs, struct cl_engine **engine, unsigned int *signo,
613 613
 	    break;
614 614
 	}
615 615
 
616
+	if((*engine)->pua_cats && (options & CL_DB_PUA_MODE) && (options & (CL_DB_PUA_INCLUDE | CL_DB_PUA_EXCLUDE)))
617
+	    if(cli_chkpua(virname, (*engine)->pua_cats, options))
618
+		continue;
619
+
616 620
 	if((*engine)->ignored && cli_chkign((*engine)->ignored, dbname, line, virname))
617 621
 	    continue;
618 622
 
... ...
@@ -891,6 +931,10 @@ static int cli_loadldb(FILE *fs, struct cl_engine **engine, unsigned int *signo,
891 891
 	    break;
892 892
 	}
893 893
 
894
+	if((*engine)->pua_cats && (options & CL_DB_PUA_MODE) && (options & (CL_DB_PUA_INCLUDE | CL_DB_PUA_EXCLUDE)))
895
+	    if(cli_chkpua(virname, (*engine)->pua_cats, options))
896
+		continue;
897
+
894 898
 	if((*engine)->ignored && cli_chkign((*engine)->ignored, dbname, line, virname))
895 899
 	    continue;
896 900
 
... ...
@@ -1307,9 +1351,21 @@ static int cli_loadmd5(FILE *fs, struct cl_engine **engine, unsigned int *signo,
1307 1307
     while(cli_dbgets(buffer, FILEBUFF, fs, dbio)) {
1308 1308
 	line++;
1309 1309
 	cli_chomp(buffer);
1310
-
1311 1310
 	cli_strtokenize(buffer, ':', MD5_TOKENS, tokens);
1312 1311
 
1312
+	if(!(pt = tokens[2])) { /* virname */
1313
+	    ret = CL_EMALFDB;
1314
+	    break;
1315
+	}
1316
+
1317
+	if((*engine)->pua_cats && (options & CL_DB_PUA_MODE) && (options & (CL_DB_PUA_INCLUDE | CL_DB_PUA_EXCLUDE)))
1318
+	    if(cli_chkpua(pt, (*engine)->pua_cats, options))
1319
+		continue;
1320
+
1321
+	if((*engine)->ignored && cli_chkign((*engine)->ignored, dbname, line, pt))
1322
+	    continue;
1323
+
1324
+
1313 1325
 	new = (struct cli_bm_patt *) cli_calloc(1, sizeof(struct cli_bm_patt));
1314 1326
 	if(!new) {
1315 1327
 	    ret = CL_EMEM;
... ...
@@ -1345,13 +1401,6 @@ static int cli_loadmd5(FILE *fs, struct cl_engine **engine, unsigned int *signo,
1345 1345
 	    break;
1346 1346
 	}
1347 1347
 
1348
-	if((*engine)->ignored && cli_chkign((*engine)->ignored, dbname, line, new->virname)) {
1349
-	    free(new->virname);
1350
-	    free(new->pattern);
1351
-	    free(new);
1352
-	    continue;
1353
-	}
1354
-
1355 1348
 	MD5_DB;
1356 1349
 	if(!db && (ret = cli_md5db_init(engine, mode))) {
1357 1350
 	    free(new->pattern);
... ...
@@ -1627,7 +1676,7 @@ int cli_load(const char *filename, struct cl_engine **engine, unsigned int *sign
1627 1627
 
1628 1628
     } else if(cli_strbcasestr(dbname, ".hdu")) {
1629 1629
 	if(options & CL_DB_PUA)
1630
-	    ret = cli_loadmd5(fs, engine, signo, MD5_HDB, options, dbio, dbname);
1630
+	    ret = cli_loadmd5(fs, engine, signo, MD5_HDB, options | CL_DB_PUA_MODE, dbio, dbname);
1631 1631
 	else
1632 1632
 	    skipped = 1;
1633 1633
 
... ...
@@ -1639,7 +1688,7 @@ int cli_load(const char *filename, struct cl_engine **engine, unsigned int *sign
1639 1639
 
1640 1640
     } else if(cli_strbcasestr(dbname, ".mdu")) {
1641 1641
 	if(options & CL_DB_PUA)
1642
-	    ret = cli_loadmd5(fs, engine, signo, MD5_MDB, options, dbio, dbname);
1642
+	    ret = cli_loadmd5(fs, engine, signo, MD5_MDB, options | CL_DB_PUA_MODE, dbio, dbname);
1643 1643
 	else
1644 1644
 	    skipped = 1;
1645 1645
 
... ...
@@ -1650,14 +1699,14 @@ int cli_load(const char *filename, struct cl_engine **engine, unsigned int *sign
1650 1650
 	if(!(options & CL_DB_PUA))
1651 1651
 	    skipped = 1;
1652 1652
 	else
1653
-	    ret = cli_loadndb(fs, engine, signo, 0, options, dbio, dbname);
1653
+	    ret = cli_loadndb(fs, engine, signo, 0, options | CL_DB_PUA_MODE, dbio, dbname);
1654 1654
 
1655 1655
     } else if(cli_strbcasestr(filename, ".ldb")) {
1656 1656
        ret = cli_loadldb(fs, engine, signo, options, dbio, dbname);
1657 1657
 
1658 1658
     } else if(cli_strbcasestr(filename, ".ldu")) {
1659 1659
 	if(options & CL_DB_PUA)
1660
-	    ret = cli_loadldb(fs, engine, signo, options, dbio, dbname);
1660
+	    ret = cli_loadldb(fs, engine, signo, options | CL_DB_PUA_MODE, dbio, dbname);
1661 1661
 	else
1662 1662
 	    skipped = 1;
1663 1663
 
... ...
@@ -2145,6 +2194,9 @@ void cl_free(struct cl_engine *engine)
2145 2145
     if(engine->dconf)
2146 2146
 	free(engine->dconf);
2147 2147
 
2148
+    if(engine->pua_cats)
2149
+	free(engine->pua_cats);
2150
+
2148 2151
     cli_ftfree(engine->ftypes);
2149 2152
     cli_freeign(engine);
2150 2153
     free(engine);
... ...
@@ -56,6 +56,8 @@ struct cfgoption cfg_options[] = {
56 56
     {"HeuristicScanPrecedence", OPT_BOOL, 0, NULL, 0, OPT_CLAMD},
57 57
     /* end of FP prone options */
58 58
     {"DetectPUA", OPT_BOOL, 0, NULL, 0, OPT_CLAMD},
59
+    {"ExcludePUA", OPT_QUOTESTR, -1, NULL, 1, OPT_CLAMD},
60
+    {"IncludePUA", OPT_QUOTESTR, -1, NULL, 1, OPT_CLAMD},
59 61
     {"StructuredDataDetection", OPT_BOOL, 0, NULL, 0, OPT_CLAMD},
60 62
     {"StructuredMinCreditCardCount", OPT_NUM, 1, NULL, 0, OPT_CLAMD},
61 63
     {"StructuredMinSSNCount", OPT_NUM, 1, NULL, 0, OPT_CLAMD},