Browse code

Introduce BC_STARTUP bytecode (bb #2101, #2078).

This bytecode will be run in interpreter mode on startup:
it can disable the JIT, or disable all further bytecodes.
There will be a builtin copy of it that is loaded if
no BC_STARTUP bytecodes were loaded (like filetypes_int.h and daily.ftm).
Only one BC_STARTUP bytecode is accepted, so as soon as bytecode.cvd will
contain one, it won't be overridable!

This bytecode will replace all the JIT checks (CPU, selinux, pax) etc.,
and allows to disable the JIT on just specific OS/arch/compiler/etc.
combinations. There are too many combinations to have a dconf flag for each.

Also fix the bytecode dconf so that the individual JIT_* flags actually work
(previously we could disable the entire JIT, or none at all).

Also introduce preliminary support for bytecode test mode (we already have
auto, jit and interpreter mode, introducing another mode here is easiest).
The test mode doesn't actually compare the outputs yet, but it does fail if
the JIT is disabled / falls back to interpreter.

Török Edvin authored on 2010/07/29 19:32:15
Showing 4 changed files
... ...
@@ -1532,6 +1532,11 @@ int cli_bytecode_run(const struct cli_all_bc *bcs, const struct cli_bc *bc, stru
1532 1532
 	cli_errmsg("bytecode has to be prepared either for interpreter or JIT!\n");
1533 1533
 	return CL_EARG;
1534 1534
     }
1535
+    if (bc->state == bc_disabled) {
1536
+	cli_dbgmsg("bytecode triggered but running bytecodes is disabled\n");
1537
+	return CL_SUCCESS;
1538
+    }
1539
+    ctx->env = &bcs->env;
1535 1540
     context_safe(ctx);
1536 1541
     if (bc->state == bc_interp) {
1537 1542
 	memset(&func, 0, sizeof(func));
... ...
@@ -2090,38 +2095,220 @@ static int selfcheck(int jit, struct cli_bcengine *engine)
2090 2090
     return rc;
2091 2091
 }
2092 2092
 
2093
+static int set_mode(struct cl_engine *engine, enum bytecode_mode mode)
2094
+{
2095
+    if (engine->bytecode_mode == mode)
2096
+	return 0;
2097
+    if (engine->bytecode_mode == CL_BYTECODE_MODE_OFF) {
2098
+	cli_errmsg("bytecode: already turned off, can't turn it on again!\n");
2099
+	return -1;
2100
+    }
2101
+    cli_dbgmsg("Bytecode: mode changed to %d\n", mode);
2102
+    if (engine->bytecode_mode == CL_BYTECODE_MODE_TEST) {
2103
+	cli_errmsg("bytecode: in test mode but JIT/bytecode is about to be disabled: %d\n", mode);
2104
+	engine->bytecode_mode = mode;
2105
+	return -1;
2106
+    }
2107
+    if (engine->bytecode_mode == CL_BYTECODE_MODE_JIT) {
2108
+	cli_errmsg("bytecode: in JIT mode but JIT is about to be disabled: %d\n", mode);
2109
+	engine->bytecode_mode = mode;
2110
+	return -1;
2111
+    }
2112
+    engine->bytecode_mode = mode;
2113
+    return 0;
2114
+}
2115
+
2116
+/* runs the first bytecode of the specified kind, or the builtin one if no
2117
+ * bytecode of that kind is loaded */
2118
+static int run_builtin_or_loaded(struct cli_all_bc *bcs, uint8_t kind, const char* builtin_cbc, struct cli_bc_ctx *ctx, const char *desc)
2119
+{
2120
+    unsigned i, builtin = 0, rc = 0;
2121
+    struct cli_bc *bc = NULL;
2122
+
2123
+    for (i=0;i<bcs->count;i++) {
2124
+	bc = &bcs->all_bcs[i];
2125
+	if (bc->kind == kind)
2126
+	    break;
2127
+    }
2128
+    if (i == bcs->count)
2129
+	bc = NULL;
2130
+    if (!bc) {
2131
+	/* no loaded bytecode found, load the builtin one! */
2132
+	struct cli_dbio dbio;
2133
+	bc = cli_calloc(1, sizeof(*bc));
2134
+	if (!bc) {
2135
+	    cli_errmsg("Out of memory allocating bytecode\n");
2136
+	    return CL_EMEM;
2137
+	}
2138
+	builtin = 1;
2139
+
2140
+	memset(&dbio, 0, sizeof(dbio));
2141
+	dbio.usebuf = 1;
2142
+	dbio.bufpt = dbio.buf = (char*)builtin_cbc;
2143
+	dbio.bufsize = strlen(builtin_cbc)+1;
2144
+	if (!dbio.bufsize || dbio.bufpt[dbio.bufsize-2] != '\n') {
2145
+	    cli_errmsg("Invalid builtin bytecode: missing terminator\n");
2146
+	    free(bc);
2147
+	    return CL_EMALFDB;
2148
+	}
2149
+
2150
+	rc = cli_bytecode_load(bc, NULL, &dbio, 1);
2151
+	if (rc) {
2152
+	    cli_errmsg("Failed to load builtin %s bytecode\n", desc);
2153
+	    free(bc);
2154
+	    return rc;
2155
+	}
2156
+    }
2157
+    rc = cli_bytecode_prepare_interpreter(bc);
2158
+    if (rc) {
2159
+	cli_errmsg("Failed to prepare %s %s bytecode for interpreter: %s\n",
2160
+		   builtin ? "builtin" : "loaded", desc, cl_strerror(rc));
2161
+    }
2162
+    if (bc->state != bc_interp) {
2163
+	cli_errmsg("Failed to prepare %s %s bytecode for interpreter\n",
2164
+		   builtin ? "builtin" : "loaded", desc);
2165
+	rc = CL_EMALFDB;
2166
+    }
2167
+    if (!rc) {
2168
+	cli_bytecode_context_setfuncid(ctx, bc, 0);
2169
+	cli_dbgmsg("Bytecode: %s running (%s)\n", desc,
2170
+		   builtin ? "builtin" : "loaded");
2171
+	rc = cli_bytecode_run(bcs, bc, ctx);
2172
+    }
2173
+    if (rc) {
2174
+	cli_errmsg("Failed to execute %s %s bytecode: %s\n",builtin ? "builtin":"loaded",
2175
+		   desc, cl_strerror(rc));
2176
+    }
2177
+    if (builtin) {
2178
+	cli_bytecode_destroy(bc);
2179
+	free(bc);
2180
+    }
2181
+    return rc;
2182
+}
2183
+
2093 2184
 int cli_bytecode_prepare(struct cl_engine *engine, struct cli_all_bc *bcs, unsigned dconfmask)
2094 2185
 {
2095
-    unsigned i, interp = 0;
2186
+    unsigned i, interp = 0, jitok = 0, jitcount=0;
2096 2187
     int rc1, rc2, rc;
2188
+    struct cli_bc_ctx *ctx;
2097 2189
 
2098
-    /* run both selfchecks */
2099
-    rc1 = selfcheck(0, bcs->engine);
2100
-    rc2 = selfcheck(1, bcs->engine);
2101
-    if (rc1)
2102
-	return rc1;
2103
-    if (rc2)
2104
-	return rc2;
2190
+    cli_detect_environment(&bcs->env);
2191
+    switch (bcs->env.arch) {
2192
+	case arch_i386:
2193
+	case arch_x86_64:
2194
+	    if (!(dconfmask & BYTECODE_JIT_X86)) {
2195
+		cli_dbgmsg("Bytecode: disabled on X86 via DCONF\n");
2196
+		if (set_mode(engine, CL_BYTECODE_MODE_INTERPRETER) == -1)
2197
+		    return CL_EBYTECODE_TESTFAIL;
2198
+	    }
2199
+	    break;
2200
+	case arch_ppc32:
2201
+	case arch_ppc64:
2202
+	    if (!(dconfmask & BYTECODE_JIT_PPC)) {
2203
+		cli_dbgmsg("Bytecode: disabled on PPC via DCONF\n");
2204
+		if (set_mode(engine, CL_BYTECODE_MODE_INTERPRETER) == -1)
2205
+		    return CL_EBYTECODE_TESTFAIL;
2206
+	    }
2207
+	    break;
2208
+	case arch_arm:
2209
+	    if (!(dconfmask & BYTECODE_JIT_ARM)) {
2210
+		cli_dbgmsg("Bytecode: disabled on ARM via DCONF\n");
2211
+		if (set_mode(engine, CL_BYTECODE_MODE_INTERPRETER) == -1)
2212
+		    return CL_EBYTECODE_TESTFAIL;
2213
+	    }
2214
+	    break;
2215
+	default:
2216
+	    cli_dbgmsg("Bytecode: JIT not supported on this architecture, falling back\n");
2217
+	    if (set_mode(engine, CL_BYTECODE_MODE_INTERPRETER) == -1)
2218
+		return CL_EBYTECODE_TESTFAIL;
2219
+	    break;
2220
+    }
2221
+    cli_dbgmsg("Bytecode: mode is %d\n", engine->bytecode_mode);
2105 2222
 
2106
-    if (cli_bytecode_prepare_jit(bcs) == CL_SUCCESS) {
2107
-	cli_dbgmsg("Bytecode: %u bytecode prepared with JIT\n", bcs->count);
2223
+    ctx = cli_bytecode_context_alloc();
2224
+    if (!ctx) {
2225
+	cli_errmsg("Bytecode: failed to allocate bytecode context\n");
2226
+	return CL_EMEM;
2227
+    }
2228
+    rc = run_builtin_or_loaded(bcs, BC_STARTUP, "", ctx, "BC_STARTUP");
2229
+    if (rc != CL_SUCCESS) {
2230
+	cli_warnmsg("Bytecode: BC_STARTUP failed to run, disabling ALL bytecodes! Please report to http://bugs.clamav.net\n");
2231
+	ctx->bytecode_disable_status = 2;
2232
+    } else {
2233
+	cli_dbgmsg("Bytecode: disable status is %d\n", ctx->bytecode_disable_status);
2234
+	rc = cli_bytecode_context_getresult_int(ctx);
2235
+	if (rc) {
2236
+	    cli_warnmsg("Bytecode: selftest failed with code %d. Please report to http://bugs.clamav.net\n",
2237
+			rc);
2238
+	    if (engine->bytecode_mode == CL_BYTECODE_MODE_TEST)
2239
+		return CL_EBYTECODE_TESTFAIL;
2240
+	}
2241
+    }
2242
+    switch (ctx->bytecode_disable_status) {
2243
+	case 1:
2244
+	    if (set_mode(engine, CL_BYTECODE_MODE_INTERPRETER) == -1)
2245
+		return CL_EBYTECODE_TESTFAIL;
2246
+	    break;
2247
+	case 2:
2248
+	    if (set_mode(engine, CL_BYTECODE_MODE_OFF) == -1)
2249
+		return CL_EBYTECODE_TESTFAIL;
2250
+	    break;
2251
+	default:
2252
+	    break;
2253
+    }
2254
+    cli_bytecode_context_destroy(ctx);
2255
+
2256
+    if (engine->bytecode_mode != CL_BYTECODE_MODE_INTERPRETER &&
2257
+	engine->bytecode_mode != CL_BYTECODE_MODE_OFF) {
2258
+	rc = cli_bytecode_prepare_jit(bcs);
2259
+	if (rc == CL_SUCCESS) {
2260
+	    jitok = 1;
2261
+	    cli_dbgmsg("Bytecode: %u bytecode prepared with JIT\n", bcs->count);
2262
+	    if (engine->bytecode_mode != CL_BYTECODE_MODE_TEST)
2263
+		return CL_SUCCESS;
2264
+	}
2265
+	if (engine->bytecode_mode == CL_BYTECODE_MODE_JIT) {
2266
+	    cli_errmsg("Bytecode: JIT required, but not all bytecodes could be prepared with JIT\n");
2267
+	    return CL_EMALFDB;
2268
+	}
2269
+    } else {
2270
+	cli_bytecode_done_jit(bcs, 0);
2271
+    }
2272
+
2273
+    if (!(dconfmask & BYTECODE_INTERPRETER)) {
2274
+	cli_dbgmsg("Bytecode: needs interpreter, but interpreter is disabled\n");
2275
+	if (set_mode(engine, CL_BYTECODE_MODE_OFF) == -1)
2276
+	    return CL_EBYTECODE_TESTFAIL;
2277
+    }
2278
+
2279
+    if (engine->bytecode_mode == CL_BYTECODE_MODE_OFF) {
2280
+	for (i=0;i<bcs->count;i++)
2281
+	    bcs->all_bcs[i].state = bc_disabled;
2282
+	cli_dbgmsg("Bytecode: ALL bytecodes disabled\n");
2108 2283
 	return CL_SUCCESS;
2109 2284
     }
2285
+
2110 2286
     for (i=0;i<bcs->count;i++) {
2111 2287
 	struct cli_bc *bc = &bcs->all_bcs[i];
2112
-	if (bc->state == bc_interp || bc->state == bc_jit)
2288
+	if (bc->state == bc_jit) {
2289
+	    jitcount++;
2113 2290
 	    continue;
2114
-	if (!(dconfmask & BYTECODE_INTERPRETER)) {
2115
-	    cli_warnmsg("Bytecode needs interpreter, but interpreter is disabled\n");
2291
+	}
2292
+	if (bc->state == bc_interp) {
2293
+	    interp++;
2116 2294
 	    continue;
2117 2295
 	}
2118 2296
 	rc = cli_bytecode_prepare_interpreter(bc);
2119
-	interp++;
2120
-	if (rc != CL_SUCCESS)
2297
+	if (rc != CL_SUCCESS) {
2298
+	    bc->state = bc_disabled;
2299
+	    cli_warnmsg("Bytecode: %d failed to prepare for interpreter mode\n", bc->id);
2121 2300
 	    return rc;
2301
+	}
2302
+	interp++;
2122 2303
     }
2123 2304
     cli_dbgmsg("Bytecode: %u bytecode prepared with JIT, "
2124
-	       "%u prepared with interpreter\n", bcs->count-interp, interp);
2305
+	       "%u prepared with interpreter, %u failed\n", jitcount, interp,
2306
+	       bcs->count - jitcount - interp);
2125 2307
     return CL_SUCCESS;
2126 2308
 }
2127 2309
 
... ...
@@ -2294,7 +2481,8 @@ void cli_bytecode_describe(const struct cli_bc *bc)
2294 2294
     printf("Bytecode format functionality level: %u\n", bc->metadata.formatlevel);
2295 2295
     printf("Bytecode metadata:\n\tcompiler version: %s\n",
2296 2296
 	   bc->metadata.compiler ? bc->metadata.compiler : "N/A");
2297
-    printf("\tcompiled on: %s",
2297
+    printf("\tcompiled on: (%d) %s",
2298
+	   stamp,
2298 2299
 	   cli_ctime(&stamp, buf, sizeof(buf)));
2299 2300
     printf("\tcompiled by: %s\n", bc->metadata.sigmaker ? bc->metadata.sigmaker : "N/A");
2300 2301
     /*TODO: parse and display arch name, also take it into account when
... ...
@@ -2305,6 +2493,9 @@ void cli_bytecode_describe(const struct cli_bc *bc)
2305 2305
 	case BC_GENERIC:
2306 2306
 	    puts("generic, not loadable by clamscan/clamd");
2307 2307
 	    break;
2308
+	case BC_STARTUP:
2309
+	    puts("run on startup (unique)");
2310
+	    break;
2308 2311
 	case BC_LOGICAL:
2309 2312
 	    puts("logical only");
2310 2313
 	    break;
... ...
@@ -44,6 +44,7 @@ struct DISASM_RESULT;
44 44
 enum BytecodeKind {
45 45
     /** generic bytecode, not tied a specific hook */
46 46
     BC_GENERIC=0,
47
+    BC_STARTUP=1,
47 48
     _BC_START_HOOKS=256,
48 49
     /** triggered by a logical signature */
49 50
     BC_LOGICAL=256,
... ...
@@ -188,6 +188,8 @@ struct cli_bc_ctx {
188 188
     unsigned nmaps;
189 189
     unsigned containertype;
190 190
     unsigned extracted_file_input;
191
+    const struct cli_environment *env;
192
+    unsigned bytecode_disable_status;
191 193
 };
192 194
 struct cli_all_bc;
193 195
 int cli_vm_execute(const struct cli_bc *bc, struct cli_bc_ctx *ctx, const struct cli_bc_func *func, const struct cli_bc_inst *inst);
... ...
@@ -1407,6 +1407,7 @@ static int cli_loadcbc(FILE *fs, struct cl_engine *engine, unsigned int *signo,
1407 1407
     struct cli_bc *bc;
1408 1408
     unsigned sigs = 0;
1409 1409
     unsigned security_trust = 0;
1410
+    unsigned i;
1410 1411
 
1411 1412
 
1412 1413
     /* TODO: virusname have a common prefix, and whitelist by that */
... ...
@@ -1497,6 +1498,20 @@ static int cli_loadcbc(FILE *fs, struct cl_engine *engine, unsigned int *signo,
1497 1497
 	    }
1498 1498
 	    engine->hooks[hook][cnt-1] = bcs->count-1;
1499 1499
 	} else switch (bc->kind) {
1500
+	    case BC_STARTUP:
1501
+		for (i=0;i<bcs->count-1;i++)
1502
+		    if (bcs->all_bcs[i].kind == BC_STARTUP) {
1503
+			struct cli_bc *bc0 = &bcs->all_bcs[i];
1504
+			cli_errmsg("Can only load 1 BC_STARTUP bytecode, attempted to load 2nd!\n");
1505
+			cli_warnmsg("Previous BC_STARTUP: %d %d by %s\n",
1506
+				    bc0->id, bc0->metadata.timestamp,
1507
+				    bc0->metadata.sigmaker ? bc0->metadata.sigmaker : "N/A");
1508
+			cli_warnmsg("Conflicting BC_STARTUP: %d %d by %s\n",
1509
+				    bc->id, bc->metadata.timestamp,
1510
+				    bc->metadata.sigmaker ? bc->metadata.sigmaker : "N/A");
1511
+			return CL_EMALFDB;
1512
+		    }
1513
+		break;
1500 1514
 	    default:
1501 1515
 		cli_errmsg("Bytecode: unhandled bytecode kind %u\n", bc->kind);
1502 1516
 		return CL_EMALFDB;