git-svn: trunk@4539
Török Edvin authored on 2008/12/06 23:49:00... | ... |
@@ -1,3 +1,10 @@ |
1 |
+Sat Dec 6 16:54:43 EET 2008 (edwin) |
|
2 |
+------------------------------------ |
|
3 |
+ * libclamav/pe.c, libclamav/special.c, libclamav/special.h: Improve |
|
4 |
+ Trojan.Swizzor.Gen detection: do per file statistics in addition to |
|
5 |
+ per string. It is amazing how a much simpler rule can do the same |
|
6 |
+ job better. |
|
7 |
+ |
|
1 | 8 |
Thu Dec 4 17:43:01 CET 2008 (acab) |
2 | 9 |
----------------------------------- |
3 | 10 |
* clamav-milter: r4519:r4536 merge new clamav milter |
... | ... |
@@ -1215,15 +1215,21 @@ int cli_scanpe(int desc, cli_ctx *ctx) |
1215 | 1215 |
|
1216 | 1216 |
/* Trojan.Swizzor.Gen */ |
1217 | 1217 |
if (SCAN_ALGO && (DCONF & PE_CONF_SWIZZOR) && nsections > 1 && fsize > 64*1024 && fsize < 4*1024*1024) { |
1218 |
- int ret = CL_CLEAN; |
|
1219 | 1218 |
if(dirs[2].Size) { |
1220 |
- struct swizz_stats stats; |
|
1221 |
- unsigned int m = 10000; |
|
1222 |
- memset(&stats, 0, sizeof(stats)); |
|
1223 |
- cli_parseres_special(EC32(dirs[2].VirtualAddress), EC32(dirs[2].VirtualAddress), desc, exe_sections, nsections, fsize, hdr_size, 0, 0, &m, &stats); |
|
1224 |
- if (cli_detect_swizz(&stats) == CL_VIRUS) { |
|
1225 |
- *ctx->virname = "Trojan.Swizzor.Gen"; |
|
1226 |
- ret = CL_VIRUS; |
|
1219 |
+ struct swizz_stats *stats = cli_calloc(1, sizeof(*stats)); |
|
1220 |
+ unsigned int m = 1000; |
|
1221 |
+ int ret = CL_CLEAN; |
|
1222 |
+ |
|
1223 |
+ if (!stats) |
|
1224 |
+ ret = CL_EMEM; |
|
1225 |
+ else { |
|
1226 |
+ cli_parseres_special(EC32(dirs[2].VirtualAddress), EC32(dirs[2].VirtualAddress), desc, exe_sections, nsections, fsize, hdr_size, 0, 0, &m, stats); |
|
1227 |
+ if ((ret = cli_detect_swizz(stats)) == CL_VIRUS) { |
|
1228 |
+ *ctx->virname = "Trojan.Swizzor.Gen"; |
|
1229 |
+ } |
|
1230 |
+ free(stats); |
|
1231 |
+ } |
|
1232 |
+ if (ret != CL_CLEAN) { |
|
1227 | 1233 |
free(exe_sections); |
1228 | 1234 |
return ret; |
1229 | 1235 |
} |
... | ... |
@@ -360,46 +360,12 @@ static inline int swizz_j48(const uint16_t n[]) |
360 | 360 |
{ |
361 | 361 |
cli_dbgmsg("swizz_j48: %u, %u, %u\n",n[0],n[1],n[2]); |
362 | 362 |
/* rules based on J48 tree */ |
363 |
- if (n[0] <= 924) |
|
364 |
- return CL_CLEAN; |
|
365 |
- if (n[0] <= 940) { |
|
366 |
- return (n[2] > 1 && n[2] <= 8) ? CL_VIRUS : CL_CLEAN; |
|
367 |
- } |
|
368 |
- if (n[2] <= 14) { |
|
369 |
- if (n[2] <= 0) { |
|
370 |
- if (n[0] <= 999) |
|
371 |
- return CL_CLEAN; |
|
372 |
- if (n[0] <= 1012) { |
|
373 |
- if (n[1] <= 23) { |
|
374 |
- if (n[0] <= 1003) |
|
375 |
- return CL_CLEAN; |
|
376 |
- return (n[1] <= 19 && n[0] > 1007 && n[1] > 15) || (n[1] > 19) ? CL_VIRUS : CL_CLEAN; |
|
377 |
- } |
|
378 |
- return CL_VIRUS; |
|
379 |
- } |
|
380 |
- return n[1] == 0 ? CL_CLEAN : CL_VIRUS; |
|
381 |
- } |
|
382 |
- if (n[2] <= 8) |
|
383 |
- return CL_VIRUS; |
|
384 |
- if (n[0] <= 954) |
|
385 |
- return CL_CLEAN; |
|
386 |
- if (n[2] <= 10) |
|
387 |
- return CL_VIRUS; |
|
388 |
- if (n[2] <= 12) { |
|
389 |
- if (n[0] <= 1011) { |
|
390 |
- if (n[1] <=32) |
|
391 |
- return CL_VIRUS; |
|
392 |
- return (n[2] <= 11 || n[1] > 51) ? CL_VIRUS : CL_CLEAN; |
|
393 |
- } |
|
394 |
- return CL_CLEAN; |
|
395 |
- } |
|
396 |
- if (n[1] <= 52) { |
|
397 |
- return (n[1] <= 43 && n[1] > 6 && |
|
398 |
- (n[2] <= 13 || n[1] <= 30 || n[1] > 40)) |
|
399 |
- ? CL_CLEAN : CL_VIRUS; |
|
400 |
- } |
|
401 |
- } |
|
402 |
- return CL_CLEAN; |
|
363 |
+ if (n[0] <= 945 || !n[1]) |
|
364 |
+ return 0; |
|
365 |
+ if (n[0] <= 1006) |
|
366 |
+ return (n[2] > 0 && n[2] <= 6); |
|
367 |
+ else |
|
368 |
+ return n[1] <= 10; |
|
403 | 369 |
} |
404 | 370 |
|
405 | 371 |
void cli_detect_swizz_str(const unsigned char *str, uint32_t len, struct swizz_stats *stats, int blob) |
... | ... |
@@ -440,8 +406,10 @@ void cli_detect_swizz_str(const unsigned char *str, uint32_t len, struct swizz_s |
440 | 440 |
for(i=0;i<j-2;i++) { |
441 | 441 |
if (stri[i] != ' ' && stri[i+1] != ' ' && stri[i+2] != ' ') { |
442 | 442 |
uint16_t idx = (stri[i] - 'a')*676 + (stri[i+1] - 'a')*26 + (stri[i+2] - 'a'); |
443 |
- if (idx < sizeof(ngrams)) |
|
443 |
+ if (idx < sizeof(ngrams)) { |
|
444 | 444 |
ngrams[idx]++; |
445 |
+ stats->gngrams[idx]++; |
|
446 |
+ } |
|
445 | 447 |
} else if (stri[i] == ' ') |
446 | 448 |
words++; |
447 | 449 |
} |
... | ... |
@@ -461,21 +429,72 @@ void cli_detect_swizz_str(const unsigned char *str, uint32_t len, struct swizz_s |
461 | 461 |
uint32_t v = ngram_cnts[i]; |
462 | 462 |
ngram_cnts[i] = (v<<10)/all; |
463 | 463 |
} |
464 |
- ret = swizz_j48(ngram_cnts); |
|
464 |
+ ret = swizz_j48(ngram_cnts) ? CL_VIRUS : CL_CLEAN; |
|
465 | 465 |
cli_dbgmsg("cli_detect_swizz_str: %s, %u words\n", ret == CL_VIRUS ? "suspicious" : "ok", words); |
466 | 466 |
if (ret == CL_VIRUS) |
467 | 467 |
stats->suspicious += j; |
468 | 468 |
stats->total += j; |
469 | 469 |
} |
470 | 470 |
|
471 |
+static inline swizz_j48_global(const uint32_t gn[]) |
|
472 |
+{ |
|
473 |
+ if (gn[0] <= 24185) { |
|
474 |
+ return gn[0] > 22980 && gn[8] > 0 && gn[8] <= 97; |
|
475 |
+ } |
|
476 |
+ if (!gn[8]) { |
|
477 |
+ if (gn[4] <= 311) { |
|
478 |
+ if (!gn[4]) { |
|
479 |
+ return gn[1] > 0 && |
|
480 |
+ ((gn[0] <= 26579 && gn[3] > 0) || |
|
481 |
+ (gn[0] > 28672 && gn[0] <= 30506)); |
|
482 |
+ } |
|
483 |
+ if (gn[5] <= 616) { |
|
484 |
+ if (gn[6] <= 104) { |
|
485 |
+ return gn[9] <= 167; |
|
486 |
+ } |
|
487 |
+ return gn[6] <= 286; |
|
488 |
+ } |
|
489 |
+ } |
|
490 |
+ return 0; |
|
491 |
+ } |
|
492 |
+ return 1; |
|
493 |
+} |
|
494 |
+ |
|
471 | 495 |
int cli_detect_swizz(struct swizz_stats *stats) |
472 | 496 |
{ |
497 |
+ uint32_t gn[10]; |
|
498 |
+ uint32_t all = 0; |
|
499 |
+ unsigned i; |
|
500 |
+ int global_swizz = CL_CLEAN; |
|
501 |
+ |
|
473 | 502 |
cli_dbgmsg("cli_detect_swizz: %lu/%lu, version:%d, manifest: %d \n", |
474 | 503 |
(unsigned long)stats->suspicious, (unsigned long)stats->total, |
475 | 504 |
stats->has_version, stats->has_manifest); |
476 |
- /* not all have version/manifest */ |
|
477 |
- if (stats->total > 128 && stats->suspicious > 2*stats->total/10) { |
|
478 |
- return CL_VIRUS; |
|
505 |
+ memset(gn, 0, sizeof(gn)); |
|
506 |
+ for(i=0;i<17576;i++) { |
|
507 |
+ uint8_t v = stats->gngrams[i]; |
|
508 |
+ if (v > 10) v = 10; |
|
509 |
+ if (v) { |
|
510 |
+ gn[v-1]++; |
|
511 |
+ all++; |
|
512 |
+ } |
|
513 |
+ } |
|
514 |
+ if (all) { |
|
515 |
+ /* normalize */ |
|
516 |
+ cli_dbgmsg("cli_detect_swizz: gn: "); |
|
517 |
+ for(i=0;i<sizeof(gn)/sizeof(gn[0]);i++) { |
|
518 |
+ uint32_t v = gn[i]; |
|
519 |
+ gn[i] = (v<<15)/all; |
|
520 |
+ if (cli_debug_flag) |
|
521 |
+ cli_dbgmsg("%lu, ", (unsigned long)gn[i]); |
|
522 |
+ } |
|
523 |
+ global_swizz = swizz_j48_global(gn) ? CL_VIRUS : CL_CLEAN; |
|
524 |
+ cli_dbgmsg("\ncli_detect_swizz: global: %s\n", global_swizz ? "suspicious" : "clean"); |
|
479 | 525 |
} |
480 |
- return CL_CLEAN; |
|
526 |
+ |
|
527 |
+ if (stats->total <= 337) |
|
528 |
+ return CL_CLEAN; |
|
529 |
+ if (stats->suspicious<<10 > 20*stats->total) |
|
530 |
+ return CL_VIRUS; |
|
531 |
+ return global_swizz; |
|
481 | 532 |
} |