Browse code

Add bytecode performance statistics

Steve Morgan authored on 2012/12/06 08:48:52
Showing 13 changed files
... ...
@@ -1,7 +1,7 @@
1 1
 /*
2 2
  *  ClamAV bytecode handler tool.
3 3
  *
4
- *  Copyright (C) 2009 Sourcefire, Inc.
4
+ *  Copyright (C) 2009-2012 Sourcefire, Inc.
5 5
  *
6 6
  *  Authors: Török Edvin
7 7
  *
... ...
@@ -306,7 +306,8 @@ int main(int argc, char *argv[])
306 306
     bcs.all_bcs = bc;
307 307
     bcs.count = 1;
308 308
 
309
-    rc = cli_bytecode_load(bc, f, NULL, optget(opts, "trust-bytecode")->enabled);
309
+    rc = cli_bytecode_load(bc, f, NULL, optget(opts, "trust-bytecode")->enabled, 
310
+			   optget(opts, "bytecode-statistics")->enabled);
310 311
     if (rc != CL_SUCCESS) {
311 312
 	fprintf(stderr,"Unable to load bytecode: %s\n", cl_strerror(rc));
312 313
 	optfree(opts);
... ...
@@ -1,5 +1,5 @@
1 1
 /*
2
- *  Copyright (C) 2007-2009 Sourcefire, Inc.
2
+ *  Copyright (C) 2007-2012 Sourcefire, Inc.
3 3
  *
4 4
  *  Authors: Tomasz Kojm
5 5
  *
... ...
@@ -231,6 +231,7 @@ void help(void)
231 231
     mprintf("    --bytecode[=yes(*)/no]               Load bytecode from the database\n");
232 232
     mprintf("    --bytecode-unsigned[=yes/no(*)]      Load unsigned bytecode\n");
233 233
     mprintf("    --bytecode-timeout=N                 Set bytecode timeout (in milliseconds)\n");
234
+    mprintf("    --bytecode-statistics[=yes/no(*)]    Collect and print bytecode statistics\n");
234 235
     mprintf("    --detect-pua[=yes/no(*)]             Detect Possibly Unwanted Applications\n");
235 236
     mprintf("    --exclude-pua=CAT                    Skip PUA sigs of category CAT\n");
236 237
     mprintf("    --include-pua=CAT                    Load PUA sigs of category CAT\n");
... ...
@@ -1,5 +1,5 @@
1 1
 /*
2
- *  Copyright (C) 2007-2009 Sourcefire, Inc.
2
+ *  Copyright (C) 2007-2012 Sourcefire, Inc.
3 3
  *
4 4
  *  Authors: Tomasz Kojm
5 5
  *
... ...
@@ -635,6 +635,9 @@ int scanmanager(const struct optstruct *opts)
635 635
     if(optget(opts, "bytecode-unsigned")->enabled)
636 636
 	dboptions |= CL_DB_BYTECODE_UNSIGNED;
637 637
 
638
+    if(optget(opts, "bytecode-statistics")->enabled)
639
+	dboptions |= CL_DB_BYTECODE_STATS;
640
+
638 641
     if((opt = optget(opts,"bytecode-timeout"))->enabled)
639 642
 	cl_engine_set_num(engine, CL_ENGINE_BYTECODE_TIMEOUT, opt->numarg);
640 643
     if((opt = optget(opts,"bytecode-mode"))->enabled) {
... ...
@@ -938,6 +941,11 @@ int scanmanager(const struct optstruct *opts)
938 938
 	}
939 939
     }
940 940
 
941
+    if(optget(opts, "bytecode-statistics")->enabled) {
942
+	cli_sigperf_print();
943
+	cli_sigperf_events_destroy();
944
+    }
945
+
941 946
     /* free the engine */
942 947
     cl_engine_free(engine);
943 948
 
... ...
@@ -96,6 +96,9 @@ Allow loading bytecode from outside digitally signed .c[lv]d files.
96 96
 \fB\-\-bytecode\-timeout=N\fR
97 97
 Set bytecode timeout in milliseconds (default: 60000 = 60s)
98 98
 .TP 
99
+\fB\-\-bytecode\-statistics[=yes/no(*)]\fR
100
+Collect and print bytecode statistics.
101
+.TP 
99 102
 \fB\-\-detect\-pua[=yes/no(*)]\fR
100 103
 Detect Possibly Unwanted Applications.
101 104
 .TP 
... ...
@@ -1,7 +1,7 @@
1 1
 /*
2 2
  *  Load, and verify ClamAV bytecode.
3 3
  *
4
- *  Copyright (C) 2009-2010 Sourcefire, Inc.
4
+ *  Copyright (C) 2009-2012 Sourcefire, Inc.
5 5
  *
6 6
  *  Authors: Török Edvin
7 7
  *
... ...
@@ -40,6 +40,13 @@
40 40
 #include "builtin_bytecodes.h"
41 41
 #include <string.h>
42 42
 
43
+#define MAX_BC 64
44
+#define BC_EVENTS_PER_SIG 2
45
+#define MAX_BC_SIGEVENT_ID MAX_BC*BC_EVENTS_PER_SIG
46
+
47
+cli_events_t * g_sigevents = NULL;
48
+unsigned int g_sigid;
49
+
43 50
 /* dummy values */
44 51
 static const uint32_t nomatch[64] = {
45 52
     0xdeadbeef, 0xdeaddead, 0xbeefdead, 0xdeaddead, 0xdeadbeef, 0, 0, 0,
... ...
@@ -1409,7 +1416,112 @@ enum parse_state {
1409 1409
     PARSE_SKIP
1410 1410
 };
1411 1411
 
1412
-int cli_bytecode_load(struct cli_bc *bc, FILE *f, struct cli_dbio *dbio, int trust)
1412
+struct sigperf_elem {
1413
+    const char * bc_name;
1414
+    uint64_t usecs;
1415
+    unsigned long run_count;
1416
+    unsigned long match_count;
1417
+};
1418
+
1419
+static int sigelem_comp(const void * a, const void * b)
1420
+{
1421
+    const struct sigperf_elem *ela = a;
1422
+    const struct sigperf_elem *elb = b;
1423
+    return elb->usecs/elb->run_count - ela->usecs/ela->run_count;
1424
+}
1425
+
1426
+void cli_sigperf_print()
1427
+{
1428
+    struct sigperf_elem stats[MAX_BC], *elem = stats;
1429
+    int i, elems = 0, max_name_len = 0, name_len;
1430
+
1431
+    memset(stats, 0, sizeof(stats));
1432
+    for (i=0;i<MAX_BC;i++) {
1433
+	union ev_val val;
1434
+	uint32_t count;
1435
+	const char * name = cli_event_get_name(g_sigevents, i*BC_EVENTS_PER_SIG);
1436
+	cli_event_get(g_sigevents, i*BC_EVENTS_PER_SIG, &val, &count);
1437
+	if (!count) {
1438
+	    if (name)
1439
+		cli_dbgmsg("No event triggered for %s\n", name);
1440
+	    continue;
1441
+	}
1442
+	name_len = strlen(name);
1443
+	if (name_len > max_name_len)
1444
+	    max_name_len = name_len;
1445
+	elem->bc_name = name?name:"\"noname\"";
1446
+	elem->usecs = val.v_int;
1447
+	elem->run_count = count;
1448
+	cli_event_get(g_sigevents, i*BC_EVENTS_PER_SIG+1, &val, &count);
1449
+	elem->match_count = count;
1450
+	elem++, elems++;
1451
+    }
1452
+
1453
+    qsort(stats, elems, sizeof(struct sigperf_elem), sigelem_comp);
1454
+
1455
+    elem = stats;
1456
+    /* name runs matches microsecs avg */
1457
+    cli_infomsg (NULL, "%-*s %*s %*s %*s %*s\n", max_name_len, "Bytecode name",
1458
+	    8, "#runs", 8, "#matches", 12, "usecs total", 9, "usecs avg");
1459
+    cli_infomsg (NULL, "%-*s %*s %*s %*s %*s\n", max_name_len, "=============",
1460
+	    8, "=====", 8, "========", 12, "===========", 9, "=========");
1461
+    while (elem->run_count) {
1462
+	cli_infomsg (NULL, "%-*s %*lu %*lu %*lu %*.2f\n", max_name_len, elem->bc_name,
1463
+		     8, elem->run_count, 8, elem->match_count, 
1464
+		12, elem->usecs, 9, (double)elem->usecs/elem->run_count);
1465
+	elem++;
1466
+    }
1467
+}
1468
+
1469
+static void sigperf_events_init(struct cli_bc *bc)
1470
+{
1471
+    int ret;
1472
+
1473
+    if (!g_sigevents)
1474
+	g_sigevents = cli_events_new(MAX_BC_SIGEVENT_ID);
1475
+
1476
+    if (!g_sigevents) {
1477
+	cli_errmsg("No memory for events table\n");
1478
+	return;
1479
+    }
1480
+
1481
+    if (g_sigid > MAX_BC_SIGEVENT_ID - BC_EVENTS_PER_SIG - 1) {
1482
+	cli_errmsg("sigperf_events_init: events table full. Increase MAX_BC\n");
1483
+	return;
1484
+    }
1485
+
1486
+    cli_dbgmsg("sigperf_events_init(): adding sig ids starting %u for %s\n", g_sigid, bc->lsig);
1487
+
1488
+    if (!bc->lsig) {
1489
+	cli_dbgmsg("cli_event_define error for time event id %d\n", bc->sigtime_id);
1490
+	return;
1491
+    }
1492
+
1493
+    /* register time event */
1494
+    bc->sigtime_id = g_sigid;
1495
+    ret = cli_event_define(g_sigevents, g_sigid++, bc->lsig, ev_time, multiple_sum);
1496
+    if (ret) {
1497
+	cli_errmsg("sigperf_events_init: cli_event_define() error for time event id %d\n", bc->sigtime_id);
1498
+	bc->sigtime_id = MAX_BC_SIGEVENT_ID+1;
1499
+	return;
1500
+    }
1501
+
1502
+    /* register match count */
1503
+    bc->sigmatch_id = g_sigid;
1504
+    ret = cli_event_define(g_sigevents, g_sigid++, bc->lsig, ev_int, multiple_sum);
1505
+    if (ret) {
1506
+	cli_errmsg("sigperf_events_init: cli_event_define() error for matches event id %d\n", bc->sigmatch_id);
1507
+	bc->sigtime_id = MAX_BC_SIGEVENT_ID+1;
1508
+	return;
1509
+    }
1510
+}
1511
+
1512
+void cli_sigperf_events_destroy()
1513
+{
1514
+    cli_events_free(g_sigevents);
1515
+}
1516
+
1517
+int cli_bytecode_load(struct cli_bc *bc, FILE *f, struct cli_dbio *dbio, int trust, int sigperf)
1413 1518
 {
1414 1519
     unsigned row = 0, current_func = 0, bb=0;
1415 1520
     char *buffer;
... ...
@@ -1565,6 +1677,8 @@ int cli_bytecode_load(struct cli_bc *bc, FILE *f, struct cli_dbio *dbio, int tru
1565 1565
     }
1566 1566
     free(buffer);
1567 1567
     cli_dbgmsg("Parsed %d functions\n", current_func);
1568
+    if (sigperf)
1569
+	sigperf_events_init(bc);
1568 1570
     if (current_func != bc->num_func && bc->state != bc_skip) {
1569 1571
 	cli_errmsg("Loaded less functions than declared: %u vs. %u\n",
1570 1572
 		   current_func, bc->num_func);
... ...
@@ -1652,6 +1766,7 @@ int cli_bytecode_run(const struct cli_all_bc *bcs, const struct cli_bc *bc, stru
1652 1652
 	    return CL_EBYTECODE_TESTFAIL;
1653 1653
 	}
1654 1654
     }
1655
+    cli_event_time_start(g_sigevents, bc->sigtime_id);
1655 1656
     if (bc->state == bc_interp || test_mode) {
1656 1657
 	ctx->bc_events = interp_ev;
1657 1658
 	memset(&func, 0, sizeof(func));
... ...
@@ -1705,6 +1820,9 @@ int cli_bytecode_run(const struct cli_all_bc *bcs, const struct cli_bc *bc, stru
1705 1705
 	if (ctx->outfd)
1706 1706
 	    cli_bcapi_extract_new(ctx, -1);
1707 1707
     }
1708
+    cli_event_time_stop(g_sigevents, bc->sigtime_id);
1709
+    if (ctx->virname)
1710
+	cli_event_count(g_sigevents, bc->sigmatch_id);
1708 1711
 
1709 1712
     if (test_mode) {
1710 1713
 	unsigned interp_errors = cli_event_errors(interp_ev);
... ...
@@ -1738,7 +1856,8 @@ int cli_bytecode_run(const struct cli_all_bc *bcs, const struct cli_bc *bc, stru
1738 1738
 	    ok = 0;
1739 1739
 	}
1740 1740
 	/*cli_event_debug(jit_ev, BCEV_EXEC_TIME);
1741
-	cli_event_debug(interp_ev, BCEV_EXEC_TIME);*/
1741
+        cli_event_debug(interp_ev, BCEV_EXEC_TIME);
1742
+	cli_event_debug(g_sigevents, bc->sigtime_id);*/
1742 1743
 	if (!ok) {
1743 1744
 	    cli_events_free(jit_ev);
1744 1745
 	    cli_events_free(interp_ev);
... ...
@@ -2398,7 +2517,7 @@ static int run_builtin_or_loaded(struct cli_all_bc *bcs, uint8_t kind, const cha
2398 2398
 	    return CL_EMALFDB;
2399 2399
 	}
2400 2400
 
2401
-	rc = cli_bytecode_load(bc, NULL, &dbio, 1);
2401
+	rc = cli_bytecode_load(bc, NULL, &dbio, 1, 0);
2402 2402
 	if (rc) {
2403 2403
 	    cli_errmsg("Failed to load builtin %s bytecode\n", desc);
2404 2404
 	    free(bc);
... ...
@@ -2692,7 +2811,7 @@ int cli_bytecode_runhook(cli_ctx *cctx, const struct cl_engine *engine, struct c
2692 2692
 	    continue;
2693 2693
 	}
2694 2694
 	if (ctx->virname) {
2695
-	    cli_dbgmsg("Bytecode found virus: %s\n", ctx->virname);
2695
+	    cli_dbgmsg("Bytecode runhook found virus: %s\n", ctx->virname);
2696 2696
 	    cli_append_virus(cctx, ctx->virname);
2697 2697
 	    if (!(cctx->options & CL_SCAN_ALLMATCHES)) {
2698 2698
 		cli_bytecode_context_clear(ctx);
... ...
@@ -1,7 +1,7 @@
1 1
 /*
2 2
  *  Load, verify and execute ClamAV bytecode.
3 3
  *
4
- *  Copyright (C) 2009-2010 Sourcefire, Inc.
4
+ *  Copyright (C) 2009-2012 Sourcefire, Inc.
5 5
  *
6 6
  *  Authors: Török Edvin
7 7
  *
... ...
@@ -47,29 +47,30 @@ enum bc_state {
47 47
 };
48 48
 
49 49
 struct cli_bc {
50
-  struct bytecode_metadata metadata;
51
-  unsigned id;
52
-  unsigned kind;
53
-  unsigned num_types;
54
-  unsigned num_func;
55
-  struct cli_bc_func *funcs;
56
-  struct cli_bc_type *types;
57
-  uint64_t **globals;
58
-  uint16_t *globaltys;
59
-  size_t num_globals;
60
-  enum bc_state state;
61
-  struct bitset_tag *uses_apis;
62
-  char *lsig;
63
-  char *vnameprefix;
64
-  char **vnames;
65
-  unsigned vnames_cnt;
66
-  uint16_t start_tid;
67
-  struct cli_bc_dbgnode *dbgnodes;
68
-  unsigned dbgnode_cnt;
69
-  unsigned hook_lsig_id;
70
-  unsigned trusted;
71
-  uint32_t numGlobalBytes;
72
-  uint8_t *globalBytes;
50
+    struct bytecode_metadata metadata;
51
+    unsigned id;
52
+    unsigned kind;
53
+    unsigned num_types;
54
+    unsigned num_func;
55
+    struct cli_bc_func *funcs;
56
+    struct cli_bc_type *types;
57
+    uint64_t **globals;
58
+    uint16_t *globaltys;
59
+    size_t num_globals;
60
+    enum bc_state state;
61
+    struct bitset_tag *uses_apis;
62
+    char *lsig;
63
+    char *vnameprefix;
64
+    char **vnames;
65
+    unsigned vnames_cnt;
66
+    uint16_t start_tid;
67
+    struct cli_bc_dbgnode *dbgnodes;
68
+    unsigned dbgnode_cnt;
69
+    unsigned hook_lsig_id;
70
+    unsigned trusted;
71
+    uint32_t numGlobalBytes;
72
+    uint8_t *globalBytes;
73
+    uint32_t sigtime_id, sigmatch_id;
73 74
 };
74 75
 
75 76
 struct cli_all_bc {
... ...
@@ -107,7 +108,7 @@ extern int have_clamjit;
107 107
 }
108 108
 #endif
109 109
 int cli_bytecode_init(struct cli_all_bc *allbc);
110
-int cli_bytecode_load(struct cli_bc *bc, FILE *f, struct cli_dbio *dbio, int security);
110
+int cli_bytecode_load(struct cli_bc *bc, FILE *f, struct cli_dbio *dbio, int security, int sigperf);
111 111
 int cli_bytecode_prepare2(struct cl_engine *engine, struct cli_all_bc *allbc, unsigned dconfmask);
112 112
 int cli_bytecode_run(const struct cli_all_bc *bcs, const struct cli_bc *bc, struct cli_bc_ctx *ctx);
113 113
 void cli_bytecode_destroy(struct cli_bc *bc);
... ...
@@ -141,7 +142,8 @@ void cli_bytecode_context_set_trace(struct cli_bc_ctx*, unsigned level,
141 141
 				    bc_dbg_callback_trace_op,
142 142
 				    bc_dbg_callback_trace_val,
143 143
 				    bc_dbg_callback_trace_ptr);
144
-
144
+void cli_sigperf_print(void);
145
+void cli_sigperf_events_destroy(void);
145 146
 #ifdef __cplusplus
146 147
 }
147 148
 #endif
... ...
@@ -120,6 +120,7 @@ typedef enum {
120 120
 #define CL_DB_SIGNED	    0x4000  /* internal */
121 121
 #define CL_DB_BYTECODE_UNSIGNED	0x8000
122 122
 #define CL_DB_UNSIGNED	    0x10000 /* internal */
123
+#define CL_DB_BYTECODE_STATS 0x20000
123 124
 
124 125
 /* recommended db settings */
125 126
 #define CL_DB_STDOPT	    (CL_DB_PHISHING | CL_DB_PHISHING_URLS | CL_DB_BYTECODE)
... ...
@@ -1,7 +1,7 @@
1 1
 /*
2 2
  *  (bytecode) events
3 3
  *
4
- *  Copyright (C) 2010 Sourcefire, Inc.
4
+ *  Copyright (C) 2010-2012 Sourcefire, Inc.
5 5
  *
6 6
  *  Authors: Török Edvin
7 7
  *
... ...
@@ -138,6 +138,14 @@ static inline void ev_chain(cli_events_t *ctx, struct cli_event *ev, union ev_va
138 138
     ev->count++;
139 139
 }
140 140
 
141
+const char * cli_event_get_name(cli_events_t *ctx, unsigned id)
142
+{
143
+    struct cli_event *ev = get_event(ctx, id);
144
+    if (!ev)
145
+	return NULL;
146
+    return ev->name;
147
+}
148
+
141 149
 void cli_event_int(cli_events_t *ctx, unsigned id, uint64_t arg)
142 150
 {
143 151
     struct cli_event *ev = get_event(ctx, id);
... ...
@@ -1,7 +1,7 @@
1 1
 /*
2 2
  *  (bytecode) events
3 3
  *
4
- *  Copyright (C) 2010 Sourcefire, Inc.
4
+ *  Copyright (C) 2010-2012 Sourcefire, Inc.
5 5
  *
6 6
  *  Authors: Török Edvin
7 7
  *
... ...
@@ -83,6 +83,8 @@ void cli_event_count(cli_events_t *ctx, unsigned id);
83 83
 
84 84
 void cli_event_get(cli_events_t* ctx, unsigned id, union ev_val *val, uint32_t *count);
85 85
 
86
+const char * cli_event_get_name(cli_events_t* ctx, unsigned id);
87
+
86 88
 /* print all recorded events */
87 89
 void cli_event_debug_all(cli_events_t *ctx);
88 90
 
... ...
@@ -44,6 +44,9 @@ CLAMAV_PUBLIC {
44 44
 };
45 45
 CLAMAV_PRIVATE {
46 46
   global:
47
+    cli_sigperf_print; 
48
+    cli_sigperf_events_destroy; 
49
+
47 50
     cli_gettmpdir;
48 51
     cli_strtok;
49 52
     cli_strtokenize;
... ...
@@ -1,5 +1,5 @@
1 1
 /*
2
- *  Copyright (C) 2007-2010 Sourcefire, Inc.
2
+ *  Copyright (C) 2007-2012 Sourcefire, Inc.
3 3
  *
4 4
  *  Authors: Tomasz Kojm
5 5
  *
... ...
@@ -1487,7 +1487,7 @@ static int cli_loadcbc(FILE *fs, struct cl_engine *engine, unsigned int *signo,
1487 1487
 	    security_trust = 0;
1488 1488
     }
1489 1489
 
1490
-    rc = cli_bytecode_load(bc, fs, dbio, security_trust);
1490
+    rc = cli_bytecode_load(bc, fs, dbio, security_trust, options&CL_DB_BYTECODE_STATS);
1491 1491
     /* read remainder of DB, needed because cvd.c checks that we read the entire
1492 1492
      * file */
1493 1493
     while (cli_dbgets(buf, sizeof(buf), fs, dbio)) {}
... ...
@@ -1,5 +1,5 @@
1 1
 /*
2
- *  Copyright (C) 2008-2009 Sourcefire, Inc.
2
+ *  Copyright (C) 2008-2012 Sourcefire, Inc.
3 3
  *
4 4
  *  Author: Tomasz Kojm <tkojm@clamav.net>
5 5
  *
... ...
@@ -276,7 +276,9 @@ const struct clam_option __clam_options[] = {
276 276
     { "BytecodeMode", "bytecode-mode", 0, TYPE_STRING, "^(Auto|ForceJIT|ForceInterpreter|Test)$", -1, "Auto", FLAG_REQUIRED, OPT_CLAMD | OPT_CLAMSCAN,
277 277
 	"Set bytecode execution mode.\nPossible values:\n\tAuto - automatically choose JIT if possible, fallback to interpreter\nForceJIT - always choose JIT, fail if not possible\nForceIntepreter - always choose interpreter\nTest - run with both JIT and interpreter and compare results. Make all failures fatal.","Auto"},
278 278
 
279
-    { "DetectPUA", "detect-pua", 0, TYPE_BOOL, MATCH_BOOL, 0, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "Detect Potentially Unwanted Applications.", "yes" },
279
+    { "BytecodeStatistics", "bytecode-statistics", 0, TYPE_BOOL, MATCH_BOOL, 0, NULL, 0, OPT_CLAMSCAN, "Collect and print bytecode execution statistics.", "no" },
280
+
281
+   { "DetectPUA", "detect-pua", 0, TYPE_BOOL, MATCH_BOOL, 0, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "Detect Potentially Unwanted Applications.", "yes" },
280 282
 
281 283
     { "ExcludePUA", "exclude-pua", 0, TYPE_STRING, NULL, -1, NULL, FLAG_MULTIPLE, OPT_CLAMD | OPT_CLAMSCAN, "Exclude a specific PUA category. This directive can be used multiple times.\nSee http://www.clamav.net/support/pua for the complete list of PUA\ncategories.", "NetTool\nPWTool" },
282 284
 
... ...
@@ -1,7 +1,7 @@
1 1
 /*
2 2
  *  Unit tests for bytecode functions. 
3 3
  *
4
- *  Copyright (C) 2009 Sourcefire, Inc.
4
+ *  Copyright (C) 2009-2012 Sourcefire, Inc.
5 5
  *
6 6
  *  Authors: Török Edvin
7 7
  *
... ...
@@ -87,7 +87,7 @@ static void runtest(const char *file, uint64_t expected, int fail, int nojit,
87 87
     bcs.all_bcs = &bc;
88 88
     bcs.count = 1;
89 89
 
90
-    rc = cli_bytecode_load(&bc, f, NULL, 1);
90
+    rc = cli_bytecode_load(&bc, f, NULL, 1, 0);
91 91
     fail_unless(rc == CL_SUCCESS, "cli_bytecode_load failed");
92 92
     fclose(f);
93 93