git-svn: trunk@4929
Török Edvin authored on 2009/03/12 05:06:35... | ... |
@@ -1,3 +1,9 @@ |
1 |
+Wed Mar 11 22:06:30 EET 2009 (edwin) |
|
2 |
+------------------------------------ |
|
3 |
+ * libclamav/phishcheck.c, libclamav/regex_list.c, |
|
4 |
+ libclamav/regex_list.h, unit_tests/check_regex.c, |
|
5 |
+ unit_tests/input/daily.gdb: make use of hostkey prefix entries |
|
6 |
+ |
|
1 | 7 |
Wed Mar 11 21:27:32 EET 2009 (edwin) |
2 | 8 |
------------------------------------ |
3 | 9 |
* clamd/others.c, sigtool/Makefile.in: fix previous commit |
... | ... |
@@ -1172,7 +1172,7 @@ static int whitelist_check(const struct cl_engine* engine,struct url_check* urls |
1172 | 1172 |
return whitelist_match(engine,urls->realLink.data,urls->displayLink.data,hostOnly); |
1173 | 1173 |
} |
1174 | 1174 |
|
1175 |
-static int hash_match(const struct regex_matcher *rlist, const char *host, size_t hlen, const char *path, size_t plen) |
|
1175 |
+static int hash_match(const struct regex_matcher *rlist, const char *host, size_t hlen, const char *path, size_t plen, int *prefix_matched) |
|
1176 | 1176 |
{ |
1177 | 1177 |
const char *virname; |
1178 | 1178 |
#if 0 |
... | ... |
@@ -1198,9 +1198,15 @@ static int hash_match(const struct regex_matcher *rlist, const char *host, size_ |
1198 | 1198 |
h[2*i+1] = hexchars[sha256_dig[i]&0xf]; |
1199 | 1199 |
} |
1200 | 1200 |
h[64]='\0'; |
1201 |
- cli_dbgmsg("Looking up hash %s for %s%s\n", h, host, path); |
|
1202 |
- if(SO_search(&rlist->sha256_filter, sha256_dig, 32) != -1 && |
|
1203 |
- cli_bm_scanbuff(sha256_dig, 32, &virname, &rlist->sha256_hashes,0,0,-1) == CL_VIRUS) { |
|
1201 |
+ cli_dbgmsg("Looking up hash %s for %s(%u)%s(%u)\n", h, host, hlen, path, plen); |
|
1202 |
+ if (prefix_matched) { |
|
1203 |
+ if (cli_bm_scanbuff(sha256_dig, 4, &virname, &rlist->hostkey_prefix,0,0,-1) == CL_VIRUS) { |
|
1204 |
+ cli_dbgmsg("prefix matched\n", virname); |
|
1205 |
+ *prefix_matched = 1; |
|
1206 |
+ } else |
|
1207 |
+ return CL_SUCCESS; |
|
1208 |
+ } |
|
1209 |
+ if (cli_bm_scanbuff(sha256_dig, 32, &virname, &rlist->sha256_hashes,0,0,-1) == CL_VIRUS) { |
|
1204 | 1210 |
switch(*virname) { |
1205 | 1211 |
case '1': |
1206 | 1212 |
return CL_PHISH_HASH1; |
... | ... |
@@ -1316,10 +1322,11 @@ static int url_hash_match(const struct regex_matcher *rlist, const char *inurl, |
1316 | 1316 |
size_t path_len; |
1317 | 1317 |
size_t host_len; |
1318 | 1318 |
char *p; |
1319 |
- int rc; |
|
1319 |
+ int rc, prefix_matched=0; |
|
1320 | 1320 |
const char *lp[COMPONENTS+1]; |
1321 | 1321 |
size_t pp[COMPONENTS+2]; |
1322 | 1322 |
char urlbuff[URL_MAX_LEN+3];/* htmlnorm truncates at 1024 bytes + terminating null + slash + host end null */ |
1323 |
+ unsigned count; |
|
1323 | 1324 |
|
1324 | 1325 |
if(!rlist || !rlist->sha256_hashes.bm_patterns) { |
1325 | 1326 |
return CL_SUCCESS; |
... | ... |
@@ -1358,15 +1365,27 @@ static int url_hash_match(const struct regex_matcher *rlist, const char *inurl, |
1358 | 1358 |
} |
1359 | 1359 |
} else |
1360 | 1360 |
k = 1; |
1361 |
- |
|
1362 |
- for(ji=j;ji < COMPONENTS+1; ji++) { |
|
1363 |
- for(ki=0;ki < k; ki++) { |
|
1364 |
- assert(pp[ki] <= path_len); |
|
1365 |
- rc = hash_match(rlist, lp[ji], host_begin + host_len - lp[ji] + 1, path_begin, pp[ki]); |
|
1366 |
- if(rc) { |
|
1367 |
- return rc; |
|
1368 |
- } |
|
1361 |
+ count = 0; |
|
1362 |
+ for(ki=k;ki > 0;) { |
|
1363 |
+ --ki; |
|
1364 |
+ for(ji=COMPONENTS+1;ji > j;) { |
|
1365 |
+ /* lookup last 2 and 3 components of host, as hostkey prefix, |
|
1366 |
+ * if not matched, shortcircuit lookups */ |
|
1367 |
+ int need_prefixmatch = (count<2 && !prefix_matched) && |
|
1368 |
+ rlist->hostkey_prefix.bm_patterns; |
|
1369 |
+ --ji; |
|
1370 |
+ assert(pp[ki] <= path_len); |
|
1371 |
+ rc = hash_match(rlist, lp[ji], host_begin + host_len - lp[ji] + 1, path_begin, pp[ki], |
|
1372 |
+ need_prefixmatch ? &prefix_matched : NULL); |
|
1373 |
+ if(rc) { |
|
1374 |
+ return rc; |
|
1375 |
+ } |
|
1376 |
+ count++; |
|
1377 |
+ if (count == 2 && !prefix_matched && rlist->hostkey_prefix.bm_patterns) { |
|
1378 |
+ cli_dbgmsg("hostkey prefix not matched, short-circuiting lookups\n"); |
|
1379 |
+ return CL_SUCCESS; |
|
1369 | 1380 |
} |
1381 |
+ } |
|
1370 | 1382 |
} |
1371 | 1383 |
return CL_SUCCESS; |
1372 | 1384 |
} |
... | ... |
@@ -1394,8 +1413,11 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url |
1394 | 1394 |
} |
1395 | 1395 |
|
1396 | 1396 |
if(( rc = url_hash_match(engine->domainlist_matcher, urls->realLink.data, strlen(urls->realLink.data)) )) { |
1397 |
+ if (rc == CL_PHISH_CLEAN) |
|
1398 |
+ cli_dbgmsg("not analyzing, not a real url: %s\n", urls->realLink.data); |
|
1399 |
+ else |
|
1397 | 1400 |
cli_dbgmsg("Hash matched for: %s\n", urls->realLink.data); |
1398 |
- return rc; |
|
1401 |
+ return rc; |
|
1399 | 1402 |
} |
1400 | 1403 |
|
1401 | 1404 |
if((rc = cleanupURLs(urls))) { |
... | ... |
@@ -372,12 +372,15 @@ int init_regex_list(struct regex_matcher* matcher) |
372 | 372 |
} |
373 | 373 |
#ifdef USE_MPOOL |
374 | 374 |
matcher->sha256_hashes.mempool = mp; |
375 |
+ matcher->hostkey_prefix.mempool = mp; |
|
375 | 376 |
#endif |
376 | 377 |
if((rc = cli_bm_init(&matcher->sha256_hashes))) { |
377 | 378 |
return rc; |
378 | 379 |
} |
380 |
+ if((rc = cli_bm_init(&matcher->hostkey_prefix))) { |
|
381 |
+ return rc; |
|
382 |
+ } |
|
379 | 383 |
SO_init(&matcher->filter); |
380 |
- SO_init(&matcher->sha256_filter); |
|
381 | 384 |
return CL_SUCCESS; |
382 | 385 |
} |
383 | 386 |
|
... | ... |
@@ -424,10 +427,11 @@ static int functionality_level_check(char* line) |
424 | 424 |
} |
425 | 425 |
} |
426 | 426 |
|
427 |
-static int add_hash(struct regex_matcher *matcher, char* pattern, const char fl) |
|
427 |
+static int add_hash(struct regex_matcher *matcher, char* pattern, const char fl, int is_prefix) |
|
428 | 428 |
{ |
429 | 429 |
int rc; |
430 | 430 |
struct cli_bm_patt *pat = mpool_calloc(matcher->mempool, 1, sizeof(*pat)); |
431 |
+ struct cli_matcher *bm; |
|
431 | 432 |
if(!pat) |
432 | 433 |
return CL_EMEM; |
433 | 434 |
pat->pattern = (unsigned char*)cli_mpool_hex2str(matcher->mempool, pattern); |
... | ... |
@@ -440,8 +444,14 @@ static int add_hash(struct regex_matcher *matcher, char* pattern, const char fl) |
440 | 440 |
return CL_EMEM; |
441 | 441 |
} |
442 | 442 |
*pat->virname = fl; |
443 |
- SO_preprocess_add(&matcher->sha256_filter, pat->pattern, pat->length); |
|
444 |
- if((rc = cli_bm_addpatt(&matcher->sha256_hashes, pat))) { |
|
443 |
+ if (is_prefix) { |
|
444 |
+ pat->length=4; |
|
445 |
+ bm = &matcher->hostkey_prefix; |
|
446 |
+ } else { |
|
447 |
+ bm = &matcher->sha256_hashes; |
|
448 |
+ } |
|
449 |
+ |
|
450 |
+ if((rc = cli_bm_addpatt(bm, pat))) { |
|
445 | 451 |
cli_errmsg("add_hash: failed to add BM pattern\n"); |
446 | 452 |
free(pat->pattern); |
447 | 453 |
free(pat->virname); |
... | ... |
@@ -542,15 +552,12 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int *sign |
542 | 542 |
return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB; |
543 | 543 |
} else if (buffer[0] == 'S' && !is_whitelist) { |
544 | 544 |
pattern[pattern_len] = '\0'; |
545 |
- if(*pattern=='F' && pattern[1]==':') { |
|
545 |
+ if((pattern[0]=='F' || pattern[0]=='P') && pattern[1]==':') { |
|
546 | 546 |
pattern += 2; |
547 |
- if (( rc = add_hash(matcher, pattern, flags[0]) )) { |
|
547 |
+ if (( rc = add_hash(matcher, pattern, flags[0], pattern[-2] == 'P') )) { |
|
548 | 548 |
cli_errmsg("Error loading at line: %d\n", line); |
549 | 549 |
return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB; |
550 | 550 |
} |
551 |
- } else if (*pattern=='P' && pattern[1]==':') { |
|
552 |
- pattern += 2; |
|
553 |
- /* TODO: hostkey prefix */ |
|
554 | 551 |
} else { |
555 | 552 |
cli_errmsg("Error loading line: %d, %c\n", line, *pattern); |
556 | 553 |
return CL_EMALFDB; |
... | ... |
@@ -617,6 +624,7 @@ void regex_list_done(struct regex_matcher* matcher) |
617 | 617 |
} |
618 | 618 |
hashtab_free(&matcher->suffix_hash); |
619 | 619 |
cli_bm_free(&matcher->sha256_hashes); |
620 |
+ cli_bm_free(&matcher->hostkey_prefix); |
|
620 | 621 |
} |
621 | 622 |
} |
622 | 623 |
|
... | ... |
@@ -313,7 +313,7 @@ static void psetup_impl(int load2) |
313 | 313 |
fail_unless(rc == 0, "load_regex_matcher"); |
314 | 314 |
fclose(f); |
315 | 315 |
|
316 |
- fail_unless_fmt(signo == 2, "Incorrect number of signatures: %u, expected %u", signo, 2); |
|
316 |
+ fail_unless_fmt(signo == 4, "Incorrect number of signatures: %u, expected %u", signo, 4); |
|
317 | 317 |
} |
318 | 318 |
loaded_2 = load2; |
319 | 319 |
|