Browse code

Heuristic detection of Trojan.Swizzor.Gen (bb #1310)

git-svn: trunk@4511

Török Edvin authored on 2008/12/03 04:55:57
Showing 6 changed files
... ...
@@ -1,3 +1,9 @@
1
+Tue Dec  2 22:00:10 EET 2008 (edwin)
2
+------------------------------------
3
+ * libclamav/dconf.c, libclamav/dconf.h, libclamav/pe.c,
4
+ libclamav/special.c, libclamav/special.h: Heuristic detection of
5
+ Trojan.Swizzor.Gen (bb #1310)
6
+
1 7
 Mon Dec  1 19:51:52 CET 2008 (tk)
2 8
 ---------------------------------
3 9
  * libclamav/matcher-ac.c: fix parsing of lsig modifiers
... ...
@@ -59,6 +59,7 @@ static struct dconf_module modules[] = {
59 59
     { "PE",	    "MD5SECT",	    PE_CONF_MD5SECT,	    1 },
60 60
     { "PE",	    "UPX",	    PE_CONF_UPX,	    1 },
61 61
     { "PE",	    "FSG",	    PE_CONF_FSG,	    1 },
62
+    { "PE",         "SWIZZOR",      PE_CONF_SWIZZOR,        1 },
62 63
 
63 64
     { "PE",	    "PETITE",	    PE_CONF_PETITE,	    1 },
64 65
     { "PE",	    "PESPIN",	    PE_CONF_PESPIN,	    1 },
... ...
@@ -48,7 +48,7 @@ struct cli_dconf {
48 48
 #define PE_CONF_MD5SECT	    0x10
49 49
 #define PE_CONF_UPX	    0x20
50 50
 #define PE_CONF_FSG	    0x40
51
-/*#define PE_CONF_REUSEME	    0x80 */
51
+#define PE_CONF_SWIZZOR     0x80
52 52
 #define PE_CONF_PETITE	    0x100
53 53
 #define PE_CONF_PESPIN	    0x200
54 54
 #define PE_CONF_YC	    0x400
... ...
@@ -21,7 +21,7 @@
21 21
 #if HAVE_CONFIG_H
22 22
 #include "clamav-config.h"
23 23
 #endif
24
-
24
+#define _XOPEN_SOURCE 500
25 25
 #include <stdio.h>
26 26
 #if HAVE_STRING_H
27 27
 #include <string.h>
... ...
@@ -56,6 +56,7 @@
56 56
 #include "matcher.h"
57 57
 #include "matcher-bm.h"
58 58
 #include "disasm.h"
59
+#include "special.h"
59 60
 
60 61
 #ifndef	O_BINARY
61 62
 #define	O_BINARY	0
... ...
@@ -316,6 +317,102 @@ static unsigned int cli_md5sect(int fd, struct cli_exe_section *s, unsigned char
316 316
     return 1;
317 317
 }
318 318
 
319
+static void cli_parseres_special(uint32_t base, uint32_t rva, int srcfd, struct cli_exe_section *exe_sections, uint16_t nsections, size_t fsize, uint32_t hdr_size, unsigned int level, uint32_t type, unsigned int *maxres, struct swizz_stats *stats) {
320
+    unsigned int err = 0, i;
321
+    uint8_t resdir[16];
322
+    uint8_t *entry, *oentry;
323
+    uint16_t named, unnamed;
324
+    uint32_t rawaddr = cli_rawaddr(rva, exe_sections, nsections, &err, fsize, hdr_size);
325
+    uint32_t entries;
326
+
327
+    if(level>2 || !*maxres) return;
328
+    *maxres-=1;
329
+    if(err || (pread(srcfd,resdir, sizeof(resdir), rawaddr) != sizeof(resdir)))
330
+	    return;
331
+    named = (uint16_t)cli_readint16(resdir+12);
332
+    unnamed = (uint16_t)cli_readint16(resdir+14);
333
+
334
+    entries = /*named+*/unnamed;
335
+    if (!entries)
336
+	    return;
337
+    oentry = entry = cli_malloc(entries*8);
338
+    rawaddr += named*8; /* skip named */
339
+    /* this is just used in a heuristic detection, so don't give error on failure */
340
+    if (!entry) {
341
+	    cli_dbgmsg("cli_parseres_special: failed to allocate memory for resource directory:%lu\n", (unsigned long)entries);
342
+	    return;
343
+    }
344
+    if (pread(srcfd, entry, entries*8, rawaddr+16) != entries*8) {
345
+	    cli_dbgmsg("cli_parseres_special: failed to read resource directory at:%lu\n", (unsigned long)rawaddr+16);
346
+	    free(oentry);
347
+	    return;
348
+    }
349
+    /*for (i=0; i<named; i++) {
350
+	uint32_t id, offs;
351
+	id = cli_readint32(entry);
352
+	offs = cli_readint32(entry+4);
353
+	if(offs>>31)
354
+	    cli_parseres( base, base + (offs&0x7fffffff), srcfd, exe_sections, nsections, fsize, hdr_size, level+1, type, maxres, stats);
355
+	entry+=8;
356
+    }*/
357
+    for (i=0; i<unnamed; i++) {
358
+	uint32_t id, offs;
359
+	id = cli_readint32(entry)&0x7fffffff;
360
+	if(level==0) {
361
+		switch(id) {
362
+			case 4: /* menu */
363
+			case 5: /* dialog */
364
+			case 6: /* string */
365
+			case 11:/* msgtable */
366
+				type = id;
367
+				break;
368
+			case 16:
369
+				/* 14: version */
370
+				stats->has_version = 1;
371
+				break;
372
+			case 24: /* manifest */
373
+				stats->has_manifest = 1;
374
+				break;
375
+			/* otherwise keep it 0, we don't want it */
376
+		}
377
+	} else if (!type) {
378
+		/* if we are not interested in this type, skip */
379
+		continue;
380
+	}
381
+	offs = cli_readint32(entry+4);
382
+	if(offs>>31)
383
+		cli_parseres_special(base, base + (offs&0x7fffffff), srcfd, exe_sections, nsections, fsize, hdr_size, level+1, type, maxres, stats);
384
+	else {
385
+		if (type == 4 || type == 5 || type == 6 || type ==11) {
386
+			offs = cli_readint32(entry+4);
387
+			rawaddr = cli_rawaddr(base + offs, exe_sections, nsections, &err, fsize, hdr_size);
388
+			if (!err && pread(srcfd, resdir, sizeof(resdir), rawaddr) == sizeof(resdir)) {
389
+				uint32_t isz = cli_readint32(resdir+4);
390
+				char *str;
391
+				rawaddr = cli_rawaddr(cli_readint32(resdir), exe_sections, nsections, &err, fsize, hdr_size);
392
+				if (err || !isz || rawaddr+isz >= fsize) {
393
+					cli_dbgmsg("cli_parseres_special: invalid resource table entry: %lu + %lu\n", 
394
+							(unsigned long)rawaddr, 
395
+							(unsigned long)isz);
396
+					continue;
397
+				}
398
+				str = cli_malloc(isz);
399
+				if (!str) {
400
+					cli_dbgmsg("cli_parseres_special: failed to allocate string mem: %lu\n", (unsigned long)isz);
401
+					continue;
402
+				}
403
+				if(pread(srcfd, str, isz, rawaddr) == isz) {
404
+					cli_detect_swizz_str(str, isz, stats, type);
405
+				}
406
+				free (str);
407
+			}
408
+		}
409
+	}
410
+	entry+=8;
411
+    }
412
+    free (oentry);
413
+}
414
+
319 415
 int cli_scanpe(int desc, cli_ctx *ctx)
320 416
 {
321 417
 	uint16_t e_magic; /* DOS signature ("MZ") */
... ...
@@ -344,6 +441,7 @@ int cli_scanpe(int desc, cli_ctx *ctx)
344 344
 	struct cli_exe_section *exe_sections;
345 345
 	struct cli_matcher *md5_sect;
346 346
 	char timestr[32];
347
+	struct pe_image_data_dir *dirs;
347 348
 
348 349
 
349 350
     if(!ctx) {
... ...
@@ -597,6 +695,7 @@ int cli_scanpe(int desc, cli_ctx *ctx)
597 597
 	cli_dbgmsg("SizeOfImage: 0x%x\n", EC32(optional_hdr32.SizeOfImage));
598 598
 	cli_dbgmsg("SizeOfHeaders: 0x%x\n", hdr_size);
599 599
 	cli_dbgmsg("NumberOfRvaAndSizes: %d\n", EC32(optional_hdr32.NumberOfRvaAndSizes));
600
+	dirs = optional_hdr32.DataDirectory;
600 601
 
601 602
     } else { /* PE+ */
602 603
         /* read the remaining part of the header */
... ...
@@ -628,6 +727,7 @@ int cli_scanpe(int desc, cli_ctx *ctx)
628 628
 	cli_dbgmsg("SizeOfImage: 0x%x\n", EC32(optional_hdr64.SizeOfImage));
629 629
 	cli_dbgmsg("SizeOfHeaders: 0x%x\n", hdr_size);
630 630
 	cli_dbgmsg("NumberOfRvaAndSizes: %d\n", EC32(optional_hdr64.NumberOfRvaAndSizes));
631
+	dirs = optional_hdr64.DataDirectory;
631 632
     }
632 633
 
633 634
 
... ...
@@ -1113,6 +1213,22 @@ int cli_scanpe(int desc, cli_ctx *ctx)
1113 1113
 	break;
1114 1114
     }
1115 1115
 
1116
+    /* Trojan.Swizzor.Gen */
1117
+    if (SCAN_ALGO && (DCONF & PE_CONF_SWIZZOR) && nsections > 1 && fsize > 64*1024 && fsize < 4*1024*1024) {
1118
+	    int ret = CL_CLEAN;
1119
+	    if(dirs[2].Size) {
1120
+		    struct swizz_stats stats;
1121
+		    unsigned int m = 10000;
1122
+		    memset(&stats, 0, sizeof(stats));
1123
+		    cli_parseres_special(EC32(dirs[2].VirtualAddress), EC32(dirs[2].VirtualAddress), desc, exe_sections, nsections, fsize, hdr_size, 0, 0, &m, &stats);
1124
+		    if (cli_detect_swizz(&stats) == CL_VIRUS) {
1125
+			    *ctx->virname = "Trojan.Swizzor.Gen";
1126
+			    ret = CL_VIRUS;
1127
+			    free(exe_sections);
1128
+			    return ret;
1129
+		    }
1130
+	    }
1131
+    }
1116 1132
 
1117 1133
     /* UPX, FSG, MEW support */
1118 1134
 
... ...
@@ -1,7 +1,7 @@
1 1
 /*
2 2
  *  Copyright (C) 2007-2008 Sourcefire, Inc.
3 3
  *
4
- *  Authors: Trog
4
+ *  Authors: Trog, Török Edvin
5 5
  *
6 6
  *  This program is free software; you can redistribute it and/or modify
7 7
  *  it under the terms of the GNU General Public License version 2 as
... ...
@@ -34,7 +34,7 @@
34 34
 #include <netinet/in.h>
35 35
 #endif
36 36
 #include <string.h>
37
-
37
+#include <ctype.h>
38 38
 #include "clamav.h"
39 39
 #include "others.h"
40 40
 #include "cltypes.h"
... ...
@@ -355,3 +355,97 @@ int cli_check_riff_exploit(int fd)
355 355
 	}
356 356
 	return retval;
357 357
 }
358
+
359
+static inline int swizz_j48(const uint16_t n[])
360
+{
361
+	cli_dbgmsg("swizz_j48: %u, %u, %u\n",n[0],n[1],n[2]);
362
+	/* rules based on J48 tree */
363
+	if (n[0] <= 951 || n[1] == 0)
364
+		return CL_CLEAN;
365
+	if (n[2] == 0) {
366
+		if (n[0] <= 984)
367
+			return CL_CLEAN;
368
+		if (n[1] <= 15)
369
+			return n[0] <= 1008 ? CL_CLEAN : CL_VIRUS;
370
+		return CL_CLEAN;
371
+	}
372
+	return n[2] <= 7 ? CL_VIRUS : CL_CLEAN;
373
+}
374
+
375
+void cli_detect_swizz_str(const unsigned char *str, uint32_t len, struct swizz_stats *stats, int blob)
376
+{
377
+	unsigned char stri[4096];
378
+        uint32_t i, j = 0;
379
+	int bad = 0;
380
+	int lastalnum = 0;
381
+	uint8_t ngrams[17576];
382
+	uint16_t all=0;
383
+	uint16_t ngram_cnts[3];
384
+	uint16_t words = 0;
385
+	int ret;
386
+
387
+	for(i=0;i<len-1 && j < sizeof(stri)-2;i += 2) {
388
+		unsigned char c = str[i];
389
+		if (str[i+1] || !c) {
390
+			bad++;
391
+			continue;
392
+		}
393
+		if (!isalnum(c)) {
394
+			if (!lastalnum)
395
+				continue;
396
+			lastalnum = 0;
397
+			c = ' ';
398
+		} else {
399
+			lastalnum = 1;
400
+			if (isdigit(c))
401
+				continue;
402
+		}
403
+		stri[j++] = tolower(c);
404
+	}
405
+	stri[j++] = '\0';
406
+	if ((!blob && (bad >= 8)) || j < 4)
407
+		return;
408
+	memset(ngrams, 0, sizeof(ngrams));
409
+	memset(ngram_cnts, 0, sizeof(ngram_cnts));
410
+	for(i=0;i<j-2;i++) {
411
+		if (stri[i] != ' ' && stri[i+1] != ' ' && stri[i+2] != ' ') {
412
+			uint16_t idx = (stri[i] - 'a')*676 + (stri[i+1] - 'a')*26 + (stri[i+2] - 'a');
413
+			if (idx < sizeof(ngrams))
414
+				ngrams[idx]++;
415
+		} else if (stri[i] == ' ')
416
+			words++;
417
+	}
418
+	for(i=0;i<sizeof(ngrams);i++) {
419
+		uint8_t v = ngrams[i];
420
+		if (v > 3) v = 3;
421
+		if (v) {
422
+			ngram_cnts[v-1]++;
423
+			all++;
424
+		}
425
+	}
426
+	if (!all)
427
+		return;
428
+	cli_dbgmsg("cli_detect_swizz_str: %u, %u, %u\n",ngram_cnts[0],ngram_cnts[1],ngram_cnts[2]);
429
+	/* normalize */
430
+	for(i=0;i<sizeof(ngram_cnts)/sizeof(ngram_cnts[0]);i++) {
431
+		uint32_t v = ngram_cnts[i];
432
+		ngram_cnts[i] = (v<<10)/all;
433
+	}
434
+	ret = swizz_j48(ngram_cnts);
435
+	cli_dbgmsg("cli_detect_swizz_str: %s, %u words\n", ret == CL_VIRUS ? "suspicious" : "ok", words);
436
+	if (ret == CL_VIRUS)
437
+		stats->suspicious += j;
438
+	stats->total += j;
439
+}
440
+
441
+int cli_detect_swizz(struct swizz_stats *stats)
442
+{
443
+	cli_dbgmsg("cli_detect_swizz: %lu/%lu, version:%d, manifest: %d \n",
444
+			(unsigned long)stats->suspicious, (unsigned long)stats->total,
445
+			stats->has_version, stats->has_manifest);
446
+	/* not all have version/manifest */
447
+	if (stats->total > 128 && stats->suspicious > 3*stats->total/10) {
448
+		return CL_VIRUS;
449
+	}
450
+	return CL_CLEAN;
451
+}
... ...
@@ -22,9 +22,17 @@
22 22
 #define __SPECIAL_H
23 23
 
24 24
 #include "others.h"
25
+struct swizz_stats {
26
+	uint32_t total;
27
+	uint32_t suspicious;
28
+	int has_version;
29
+	int has_manifest;
30
+};
25 31
 
26 32
 int cli_check_mydoom_log(int desc, const char **virname);
27 33
 int cli_check_jpeg_exploit(int fd, cli_ctx *ctx);
28 34
 int cli_check_riff_exploit(int fd);
35
+void cli_detect_swizz_str(const unsigned char *str, uint32_t len, struct swizz_stats *stats, int blob);
36
+int cli_detect_swizz(struct swizz_stats *stats);
29 37
 
30 38
 #endif