Browse code

local.gdb whitelisting of safebrowsing entries (bb #1482).

git-svn: trunk@5015

Török Edvin authored on 2009/04/03 05:36:27
Showing 6 changed files
... ...
@@ -1,3 +1,9 @@
1
+Thu Apr  2 23:35:36 EEST 2009 (edwin)
2
+-------------------------------------
3
+ * docs/phishsigs_howto.tex, libclamav/phishcheck.c,
4
+ libclamav/readdb.c, libclamav/regex_list.c: local.gdb whitelisting
5
+ of safebrowsing entries (bb #1482).
6
+
1 7
 Thu Apr  2 22:59:30 EEST 2009 (edwin)
2 8
 -------------------------------------
3 9
  * libclamav/htmlnorm.c, libclamav/htmlnorm.h, libclamav/mbox.c,
4 10
Binary files a/docs/phishsigs_howto.pdf and b/docs/phishsigs_howto.pdf differ
... ...
@@ -57,6 +57,7 @@ S1:P:HostPrefix[:FuncLevelSpec]
57 57
 S1:F:Sha256hash[:FuncLevelSpec]
58 58
 S2:P:HostPrefix[:FuncLevelSpec]
59 59
 S2:F:Sha256hash[:FuncLevelSpec]
60
+S:W:Sha256hash[:FuncLevelSpec]
60 61
 \end{verbatim}
61 62
 
62 63
 \begin{description}
... ...
@@ -67,6 +68,8 @@ S2:F:Sha256hash[:FuncLevelSpec]
67 67
  \item [{S1:}]
68 68
 	Hashes for blacklisting phishing sites.
69 69
 	Virus name: Phishing.URL.Blacklisted
70
+ \item [{S:W}]
71
+	Locally whitelisted hashes.
70 72
  \item [{HostPrefix}]
71 73
 	4-byte prefix of the sha256 hash of the last 2 or 3 components of the hostname.
72 74
 If prefix doesn't match, no further lookups are performed.
... ...
@@ -76,7 +79,8 @@ If prefix doesn't match, no further lookups are performed.
76 76
 
77 77
 To see which hash/URL matched, look at the \verb+clamscan --debug+ output, and look for the following strings:
78 78
 \verb+Looking up hash+, \verb+prefix matched+, and \verb+Hash matched+.
79
-Local whitelisting of .gdb entries can be done by creating .wdb entries.
79
+Local whitelisting of .gdb entries can be done by creating a local.gdb file, and
80
+adding a line \verb+S:W:<HASH>+.
80 81
 
81 82
 \subsection{WDB format}
82 83
 This file contains whitelisted url pairs
... ...
@@ -1206,6 +1206,9 @@ static int hash_match(const struct regex_matcher *rlist, const char *host, size_
1206 1206
 	    }
1207 1207
 	    if (cli_bm_scanbuff(sha256_dig, 32, &virname, &rlist->sha256_hashes,0,0,-1) == CL_VIRUS) {
1208 1208
 		switch(*virname) {
1209
+		    case 'W':
1210
+			cli_dbgmsg("Hash is whitelisted, skipping\n");
1211
+			break;
1209 1212
 		    case '1':
1210 1213
 			return CL_PHISH_HASH1;
1211 1214
 		    case '2':
... ...
@@ -1413,7 +1416,7 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url
1413 1413
 {
1414 1414
 	struct url_check host_url;
1415 1415
 	int rc = CL_PHISH_NODECISION;
1416
-	int phishy=0, blacklisted=0;
1416
+	int phishy=0;
1417 1417
 	const struct phishcheck* pchk = (const struct phishcheck*) engine->phishcheck;
1418 1418
 
1419 1419
 	if(!urls->realLink.data)
... ...
@@ -1436,10 +1439,14 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url
1436 1436
 		return CL_PHISH_CLEAN;
1437 1437
 	    } else {
1438 1438
 		cli_dbgmsg("Hash matched for: %s\n", urls->realLink.data);
1439
-		blacklisted = rc;
1439
+		return rc;
1440 1440
 	    }
1441 1441
 	}
1442 1442
 
1443
+	if (urls->displayLink.data[0] == '\0') {
1444
+	    return CL_PHISH_CLEAN;
1445
+	}
1446
+
1443 1447
 	if((rc = cleanupURLs(urls))) {
1444 1448
 		/* it can only return an error, or say its clean;
1445 1449
 		 * it is not allowed to decide it is phishing */
... ...
@@ -1453,20 +1460,12 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url
1453 1453
 			( (phishy&PHISHY_NUMERIC_IP && !isNumericURL(pchk, urls->displayLink.data)) ||
1454 1454
 			  !(phishy&PHISHY_NUMERIC_IP))) {
1455 1455
 		cli_dbgmsg("Displayed 'url' is not url:%s\n",urls->displayLink.data);
1456
-		if (!blacklisted)
1457
-		    return CL_PHISH_CLEAN;
1456
+		return CL_PHISH_CLEAN;
1458 1457
 	}
1459 1458
 
1460 1459
 	if(whitelist_check(engine, urls, 0))
1461 1460
 		return CL_PHISH_CLEAN;/* if url is whitelisted don't perform further checks */
1462 1461
 
1463
-	if (blacklisted)
1464
-	    return blacklisted;
1465
-
1466
-	if (urls->displayLink.data[0] == '\0') {
1467
-	    return CL_PHISH_CLEAN;
1468
-	}
1469
-
1470 1462
 	url_check_init(&host_url);
1471 1463
 
1472 1464
 	if((rc = url_get_host(urls, &host_url, DOMAIN_DISPLAY, &phishy))) {
... ...
@@ -1585,6 +1585,13 @@ static int cli_loaddbdir(const char *dirname, struct cl_engine *engine, unsigned
1585 1585
 	return ret;
1586 1586
     }
1587 1587
 
1588
+    /* try to load local.gdb next */
1589
+    sprintf(dbfile, "%s/local.gdb", dirname);
1590
+    if(!access(dbfile, R_OK) && (ret = cli_load(dbfile, engine, signo, options, NULL))) {
1591
+	free(dbfile);
1592
+	return ret;
1593
+    }
1594
+
1588 1595
     /* check for and load daily.cfg */
1589 1596
     sprintf(dbfile, "%s/daily.cfg", dirname);
1590 1597
     if(!access(dbfile, R_OK) && (ret = cli_load(dbfile, engine, signo, options, NULL))) {
... ...
@@ -432,18 +432,13 @@ static int add_hash(struct regex_matcher *matcher, char* pattern, const char fl,
432 432
 	int rc;
433 433
 	struct cli_bm_patt *pat = mpool_calloc(matcher->mempool, 1, sizeof(*pat));
434 434
 	struct cli_matcher *bm;
435
+	const char *vname = NULL;
435 436
 	if(!pat)
436 437
 		return CL_EMEM;
437 438
 	pat->pattern = (unsigned char*)cli_mpool_hex2str(matcher->mempool, pattern);
438 439
 	if(!pat->pattern)
439 440
 		return CL_EMALFDB;
440 441
 	pat->length = 32;
441
-	pat->virname = mpool_malloc(matcher->mempool, 1);
442
-	if(!pat->virname) {
443
-		free(pat);
444
-		return CL_EMEM;
445
-	}
446
-	*pat->virname = fl;
447 442
 	if (is_prefix) {
448 443
 	    pat->length=4;
449 444
 	    bm = &matcher->hostkey_prefix;
... ...
@@ -451,6 +446,23 @@ static int add_hash(struct regex_matcher *matcher, char* pattern, const char fl,
451 451
 	    bm = &matcher->sha256_hashes;
452 452
 	}
453 453
 
454
+	if (fl != 'W' && pat->length == 32 &&
455
+	    cli_bm_scanbuff(pat->pattern, 32, &vname, &matcher->sha256_hashes,0,0,-1) == CL_VIRUS) {
456
+	    if (*vname == 'W') {
457
+		/* hash is whitelisted in local.gdb */
458
+		cli_dbgmsg("Skipping hash %s\n", pattern);
459
+		mpool_free(matcher->mempool, pat->pattern);
460
+		mpool_free(matcher->mempool, pat);
461
+		return CL_SUCCESS;
462
+	    }
463
+	}
464
+	pat->virname = mpool_malloc(matcher->mempool, 1);
465
+	if(!pat->virname) {
466
+		free(pat);
467
+		return CL_EMEM;
468
+	}
469
+	*pat->virname = fl;
470
+
454 471
 	if((rc = cli_bm_addpatt(bm, pat))) {
455 472
 		cli_errmsg("add_hash: failed to add BM pattern\n");
456 473
 		free(pat->pattern);
... ...
@@ -550,9 +562,11 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int *sign
550 550
 			/*matches displayed host*/
551 551
 			if (( rc = add_static_pattern(matcher, pattern) ))
552 552
 				return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;
553
-		} else if (buffer[0] == 'S' && !is_whitelist) {
553
+		} else if (buffer[0] == 'S' && (!is_whitelist || pattern[0]=='W')) {
554 554
 			pattern[pattern_len] = '\0';
555
-			if((pattern[0]=='F' || pattern[0]=='P') && pattern[1]==':') {
555
+			if (pattern[0]=='W')
556
+			    flags[0]='W';
557
+			if((pattern[0]=='W' || pattern[0]=='F' || pattern[0]=='P') && pattern[1]==':') {
556 558
 			    pattern += 2;
557 559
 			    if (( rc = add_hash(matcher, pattern, flags[0], pattern[-2] == 'P') )) {
558 560
 				cli_errmsg("Error loading at line: %d\n", line);