Browse code

min/max functionality level support.

Török Edvin authored on 2010/04/27 00:19:28
Showing 6 changed files
... ...
@@ -1,3 +1,7 @@
1
+Mon Apr 26 18:18:47 EEST 2010 (edwin)
2
+-------------------------------------
3
+ * bytecode: min/max functionality level support.
4
+
1 5
 Mon Apr 26 16:12:50 CEST 2010 (tk)
2 6
 ----------------------------------
3 7
  * libclamav/others.c: bump f-level
... ...
@@ -309,8 +309,12 @@ int main(int argc, char *argv[])
309 309
 	exit(4);
310 310
     }
311 311
     fclose(f);
312
+    if (bc->state == bc_skip) {
313
+	fprintf(stderr,"bytecode load skipped\n");
314
+	exit(0);
315
+    }
312 316
     if (cli_debug_flag)
313
-    printf("[clambc] Bytecode loaded\n");
317
+	printf("[clambc] Bytecode loaded\n");
314 318
     if (optget(opts, "info")->enabled) {
315 319
 	cli_bytecode_describe(bc);
316 320
     } else if (optget(opts, "printsrc")->enabled) {
... ...
@@ -34,6 +34,7 @@
34 34
 #include "bytecode_priv.h"
35 35
 #include "readdb.h"
36 36
 #include "scanners.h"
37
+#include "bytecode_api.h"
37 38
 #include "bytecode_api_impl.h"
38 39
 #include <string.h>
39 40
 
... ...
@@ -486,19 +487,23 @@ static int parseHeader(struct cli_bc *bc, unsigned char *buffer, unsigned *linel
486 486
     char ok = 1;
487 487
     unsigned offset, len, flevel;
488 488
     char *pos;
489
+
489 490
     if (strncmp((const char*)buffer, BC_HEADER, sizeof(BC_HEADER)-1)) {
490 491
 	cli_errmsg("Missing file magic in bytecode");
491 492
 	return CL_EMALFDB;
492 493
     }
493 494
     offset = sizeof(BC_HEADER)-1;
494 495
     len = strlen((const char*)buffer);
495
-    flevel = readNumber(buffer, &offset, len, &ok);
496
+    bc->metadata.formatlevel = readNumber(buffer, &offset, len, &ok);
496 497
     if (!ok) {
497
-	cli_errmsg("Unable to parse functionality level in bytecode header\n");
498
+	cli_errmsg("Unable to parse (format) functionality level in bytecode header\n");
498 499
 	return CL_EMALFDB;
499 500
     }
500
-    if (flevel != BC_FUNC_LEVEL) {
501
-	cli_dbgmsg("Skipping bytecode with functionality level: %u (current %u)\n", flevel, BC_FUNC_LEVEL);
501
+    /* we support 2 bytecode formats */
502
+    if (bc->metadata.formatlevel != BC_FORMAT_096 &&
503
+	bc->metadata.formatlevel != BC_FORMAT_LEVEL) {
504
+	cli_dbgmsg("Skipping bytecode with (format) functionality level: %u (current %u)\n", 
505
+		   bc->metadata.formatlevel, BC_FORMAT_LEVEL);
502 506
 	return CL_BREAK;
503 507
     }
504 508
     /* Optimistic parsing, check for error only at the end.*/
... ...
@@ -506,9 +511,22 @@ static int parseHeader(struct cli_bc *bc, unsigned char *buffer, unsigned *linel
506 506
     bc->metadata.sigmaker = readString(buffer, &offset, len, &ok);
507 507
     bc->metadata.targetExclude = readNumber(buffer, &offset, len, &ok);
508 508
     bc->kind = readNumber(buffer, &offset, len, &ok);
509
-    bc->metadata.maxStack = readNumber(buffer, &offset, len, &ok);
510
-    bc->metadata.maxMem = readNumber(buffer, &offset, len, &ok);
511
-    bc->metadata.maxTime = readNumber(buffer, &offset, len, &ok);
509
+    bc->metadata.minfunc = readNumber(buffer, &offset, len, &ok);
510
+    bc->metadata.maxfunc = readNumber(buffer, &offset, len, &ok);
511
+    flevel = cl_retflevel();
512
+    /* in 0.96 these 2 fields are unused / zero, in post 0.96 these mean
513
+     * min/max flevel.
514
+     * So 0 for min/max means no min/max
515
+     * Note that post 0.96 bytecode/bytecode lsig needs format 7, because
516
+     * 0.96 doesn't check lsig functionality level.
517
+     */
518
+    if ((bc->metadata.minfunc && bc->metadata.minfunc > flevel) ||
519
+        (bc->metadata.maxfunc && bc->metadata.maxfunc < flevel)) {
520
+      cli_dbgmsg("Skipping bytecode with (engine) functionality level %u-%u (current %u)\n",
521
+                 bc->metadata.minfunc, bc->metadata.maxfunc, flevel);
522
+      return CL_BREAK;
523
+    }
524
+    bc->metadata.maxresource = readNumber(buffer, &offset, len, &ok);
512 525
     bc->metadata.compiler = readString(buffer, &offset, len, &ok);
513 526
     bc->num_types = readNumber(buffer, &offset, len, &ok);
514 527
     bc->num_func = readNumber(buffer, &offset, len, &ok);
... ...
@@ -2046,7 +2064,7 @@ void cli_bytecode_describe(const struct cli_bc *bc)
2046 2046
     }
2047 2047
 
2048 2048
     stamp = bc->metadata.timestamp;
2049
-    printf("Bytecode format functionality level: %u\n", BC_FUNC_LEVEL);
2049
+    printf("Bytecode format functionality level: %u\n", bc->metadata.formatlevel);
2050 2050
     printf("Bytecode metadata:\n\tcompiler version: %s\n",
2051 2051
 	   bc->metadata.compiler ? bc->metadata.compiler : "N/A");
2052 2052
     printf("\tcompiled on: %s",
... ...
@@ -2070,6 +2088,9 @@ void cli_bytecode_describe(const struct cli_bc *bc)
2070 2070
 	    printf("Unknown (type %u)", bc->kind);
2071 2071
 	    break;
2072 2072
     }
2073
+    /* 0 means no limit */
2074
+    printf("\tbytecode functionality level: %u - %u\n",
2075
+	   bc->metadata.minfunc, bc->metadata.maxfunc);
2073 2076
     printf("\tbytecode logical signature: %s\n",
2074 2077
 	       bc->lsig ? bc->lsig : "<none>");
2075 2078
     printf("\tvirusname prefix: %s\n",
... ...
@@ -52,7 +52,7 @@ enum BytecodeKind {
52 52
     _BC_LAST_HOOK
53 53
 };
54 54
 
55
-static const unsigned PE_INVALID_RVA = 0xFFFFFFFF;
55
+enum { PE_INVALID_RVA = 0xFFFFFFFF };
56 56
 
57 57
 #ifdef __CLAMBC__
58 58
 
... ...
@@ -70,7 +70,12 @@ extern const uint32_t __clambc_filesize[1];
70 70
 /** Kind of the bytecode */
71 71
 const uint16_t __clambc_kind;
72 72
 
73
-uint32_t test1(uint32_t, uint32_t);
73
+/** Test api. 
74
+  @param a 0xf00dbeef
75
+  @param b 0xbeeff00d
76
+  @return 0x12345678 if parameters match, 0x55 otherwise
77
+*/
78
+uint32_t test1(uint32_t a, uint32_t b);
74 79
 
75 80
 /**
76 81
  * @brief Reads specified amount of bytes from the current file
... ...
@@ -82,7 +87,6 @@ uint32_t test1(uint32_t, uint32_t);
82 82
  */
83 83
 int32_t read(uint8_t *data, int32_t size);
84 84
 
85
-
86 85
 enum {
87 86
     /**set file position to specified absolute position */
88 87
     SEEK_SET=0,
... ...
@@ -188,6 +192,9 @@ int32_t file_byteat(uint32_t offset);
188 188
   @return pointer to allocated memory */
189 189
 void* malloc(uint32_t size);
190 190
 
191
+/** Test api2.
192
+  * @param a 0xf00d
193
+  * @return 0xd00f if parameter matches, 0x5555 otherwise */
191 194
 uint32_t test2(uint32_t a);
192 195
 
193 196
 /** Gets information about the specified PE section.
... ...
@@ -206,7 +213,8 @@ int32_t get_pe_section(struct cli_exe_section *section, uint32_t num);
206 206
  *          number bytes available in buffer (starting from 0)
207 207
  * The character at the cursor will be at position 0 after this call.
208 208
  */
209
-int32_t fill_buffer(uint8_t* buffer, uint32_t len, uint32_t filled, uint32_t cur, uint32_t fill);
209
+int32_t fill_buffer(uint8_t* buffer, uint32_t len, uint32_t filled,
210
+                    uint32_t cursor, uint32_t fill);
210 211
 
211 212
 /**
212 213
  * Prepares for extracting a new file, if we've already extracted one it scans
... ...
@@ -224,31 +232,172 @@ int32_t extract_new(int32_t id);
224 224
   */
225 225
 int32_t read_number(uint32_t radix);
226 226
 
227
+/**
228
+  * Creates a new hashset and returns its id.
229
+  * @return ID for new hashset */
227 230
 int32_t hashset_new(void);
231
+
232
+/**
233
+  * Add a new 32-bit key to the hashset.
234
+  * @param hs ID of hashset (from hashset_new)
235
+  * @param key the key to add
236
+  * @return 0 on success */
228 237
 int32_t hashset_add(int32_t hs, uint32_t key);
238
+
239
+/**
240
+  * Remove a 32-bit key from the hashset.
241
+  * @param hs ID of hashset (from hashset_new)
242
+  * @param key the key to add
243
+  * @return 0 on success */
229 244
 int32_t hashset_remove(int32_t hs, uint32_t key);
245
+
246
+/**
247
+  * Returns whether the hashset contains the specified key.
248
+  * @param hs ID of hashset (from hashset_new)
249
+  * @param key the key to lookup
250
+  * @return 1 if found, 0 if not found, <0 on invalid hashset ID */
230 251
 int32_t hashset_contains(int32_t hs, uint32_t key);
252
+
253
+/**
254
+  * Deallocates the memory used by the specified hashset.
255
+  * Trying to use the hashset after this will result in an error.
256
+  * The hashset may not be used after this.
257
+  * All hashsets are automatically deallocated when bytecode
258
+  * finishes execution.
259
+  * @param id ID of hashset (from hashset_new)
260
+  * @return 0 on success */
231 261
 int32_t hashset_done(int32_t id);
262
+
263
+/**
264
+  * Returns whether the hashset is empty.
265
+  * @param id of hashset (from hashset_new)
266
+  * @return 0 on success */
232 267
 int32_t hashset_empty(int32_t id);
233 268
 
269
+/**
270
+  * Creates a new pipe with the specified buffer size
271
+  * @param size size of buffer
272
+  * @return ID of newly created buffer_pipe */
234 273
 int32_t  buffer_pipe_new(uint32_t size);
274
+
275
+/**
276
+  * Same as buffer_pipe_new, except the pipe's input is tied
277
+  * to the current file, at the specified position.
278
+  * @param pos starting position of pipe input in current file
279
+  * @return ID of newly created buffer_pipe */
235 280
 int32_t  buffer_pipe_new_fromfile(uint32_t pos);
281
+
282
+/**
283
+  * Returns the amount of bytes available to read.
284
+  * @param id ID of buffer_pipe
285
+  * @return amount of bytes available to read */
236 286
 uint32_t buffer_pipe_read_avail(int32_t id);
287
+
288
+/**
289
+  * Returns a pointer to the buffer for reading.
290
+  * The 'amount' parameter should be obtained by a call to
291
+  * buffer_pipe_read_avail().
292
+  * @param id ID of buffer_pipe
293
+  * @param amount to read
294
+  * @return pointer to buffer, or NULL if buffer has less than
295
+  specified amount */
237 296
 uint8_t *buffer_pipe_read_get(int32_t id, uint32_t amount);
297
+
298
+/**
299
+  * Updates read cursor in buffer_pipe.
300
+  * @param id ID of buffer_pipe
301
+  * @param amount amount of bytes to move read cursor
302
+  * @return 0 on success */
238 303
 int32_t  buffer_pipe_read_stopped(int32_t id, uint32_t amount);
304
+
305
+/**
306
+  * Returns the amount of bytes available for writing.
307
+  * @param id ID of buffer_pipe
308
+  * @return amount of bytes available for writing */
239 309
 uint32_t buffer_pipe_write_avail(int32_t id);
310
+
311
+/**
312
+  * Returns pointer to writable buffer.
313
+  * The 'amount' parameter should be obtained by a call to
314
+  * buffer_pipe_write_avail().
315
+  * @param id ID of buffer_pipe
316
+  * @param size amount of bytes to write
317
+  * @return pointer to write buffer, or NULL if requested amount
318
+  is more than what is available in the buffer */
240 319
 uint8_t *buffer_pipe_write_get(int32_t id, uint32_t size);
320
+
321
+/**
322
+  * Updates the write cursor in buffer_pipe.
323
+  * @param id ID of buffer_pipe
324
+  * @param amount amount of bytes to move write cursor
325
+  * @return 0 on success */
241 326
 int32_t  buffer_pipe_write_stopped(int32_t id, uint32_t amount);
327
+
328
+/**
329
+  * Deallocate memory used by buffer.
330
+  * After this all attempts to use this buffer will result in error.
331
+  * All buffer_pipes are automatically deallocated when bytecode
332
+  * finishes execution.
333
+  * @param id ID of buffer_pipe
334
+  * @return 0 on success */
242 335
 int32_t  buffer_pipe_done(int32_t id);
243 336
 
337
+/**
338
+  * Initializes inflate data structures for decompressing data
339
+  * 'from_buffer' and writing uncompressed uncompressed data 'to_buffer'.
340
+  * @param from_buffer ID of buffer_pipe to read compressed data from
341
+  * @param to_buffer ID of buffer_pipe to write decompressed data to
342
+  * @param windowBits (see zlib documentation)
343
+  * @return ID of newly created inflate data structure, <0 on failure */
244 344
 int32_t inflate_init(int32_t from_buffer, int32_t to_buffer, int32_t windowBits);
345
+
346
+/**
347
+  * Inflate all available data in the input buffer, and write to output buffer.
348
+  * Stops when the input buffer becomes empty, or write buffer becomes full.
349
+  * Also attempts to recover from corrupted inflate stream (via inflateSync).
350
+  * This function can be called repeatedly on success after filling the input
351
+  * buffer, and flushing the output buffer.
352
+  * The inflate stream is done processing when 0 bytes are available from output
353
+  * buffer, and input buffer is not empty.
354
+  * @param id ID of inflate data structure
355
+  * @return 0 on success, zlib error code otherwise */
245 356
 int32_t inflate_process(int32_t id);
357
+
358
+/**
359
+  * Deallocates inflate data structure.
360
+  * Using the inflate data structure after this will result in an error.
361
+  * All inflate data structures are automatically deallocated when bytecode
362
+  * finishes execution.
363
+  * @param id ID of inflate data structure
364
+  * @return 0 on success.*/
246 365
 int32_t inflate_done(int32_t id);
247 366
 
367
+/** 
368
+  * Report a runtime error at the specified locationID.
369
+  * @param locationid (line << 8) | (column&0xff)
370
+  * @return 0 */
248 371
 int32_t bytecode_rt_error(int32_t locationid);
249 372
 
373
+/**
374
+  * Initializes JS normalizer for reading 'from_buffer'.
375
+  * Normalized JS will be written to a single tempfile,
376
+  * one normalized JS per line, and automatically scanned 
377
+  * when the bytecode finishes execution. 
378
+  * @param from_buffer ID of buffer_pipe to read javascript from
379
+  * @return ID of JS normalizer, <0 on failure */
250 380
 int32_t jsnorm_init(int32_t from_buffer);
381
+
382
+/**
383
+  * Normalize all javascript from the input buffer, and write to tempfile.
384
+  * You can call this function repeatedly on success, if you (re)fill the input
385
+  * buffer.
386
+  * @param id ID of JS normalizer
387
+  * @return 0 on success, <0 on failure */
251 388
 int32_t jsnorm_process(int32_t id);
389
+
390
+/**
391
+  * Flushes JS normalizer.
392
+  * @param id ID of js normalizer to flush */
252 393
 int32_t jsnorm_done(int32_t id);
253 394
 
254 395
 #endif
... ...
@@ -26,12 +26,14 @@ struct bytecode_metadata {
26 26
     char *compiler;
27 27
     char *sigmaker;
28 28
     uint64_t timestamp;
29
-    unsigned long maxStack, maxMem;
30
-    unsigned long maxTime;
29
+    unsigned formatlevel;
30
+    unsigned minfunc, maxfunc;
31
+    unsigned maxresource;/* reserved */
31 32
     unsigned targetExclude;
32 33
 };
33 34
 
34
-#define BC_FUNC_LEVEL 6
35
+#define BC_FORMAT_096 6
36
+#define BC_FORMAT_LEVEL 7
35 37
 #define BC_HEADER "ClamBC"
36 38
 
37 39
 enum bc_opcode {
... ...
@@ -128,4 +130,5 @@ enum bc_global {
128 128
 };
129 129
 
130 130
 #define BC_START_TID 69
131
+
131 132
 #endif
... ...
@@ -1419,6 +1419,7 @@ static int cli_loadcbc(FILE *fs, struct cl_engine *engine, unsigned int *signo,
1419 1419
     bc->id = bcs->count;/* must set after _load, since load zeroes */
1420 1420
     sigs++;
1421 1421
     if (bc->kind == BC_LOGICAL || bc->lsig) {
1422
+        unsigned oldsigs = sigs;
1422 1423
 	if (!bc->lsig) {
1423 1424
 	    cli_errmsg("Bytecode %s has logical kind, but missing logical signature!\n", dbname);
1424 1425
 	    return CL_EMALFDB;
... ...
@@ -1430,6 +1431,12 @@ static int cli_loadcbc(FILE *fs, struct cl_engine *engine, unsigned int *signo,
1430 1430
 		       bc->lsig, dbname, cl_strerror(rc));
1431 1431
 	    return rc;
1432 1432
 	}
1433
+        if (sigs != oldsigs) {
1434
+          /* compiler ensures Engine field in lsig matches the one in bytecode,
1435
+           * so this should never happen. */
1436
+          cli_errmsg("Bytecode logical signature skipped, but bytecode itself not?");
1437
+          return CL_EMALFDB;
1438
+        }
1433 1439
     }
1434 1440
     if (bc->kind != BC_LOGICAL) {
1435 1441
 	if (bc->lsig) {