... | ... |
@@ -309,8 +309,12 @@ int main(int argc, char *argv[]) |
309 | 309 |
exit(4); |
310 | 310 |
} |
311 | 311 |
fclose(f); |
312 |
+ if (bc->state == bc_skip) { |
|
313 |
+ fprintf(stderr,"bytecode load skipped\n"); |
|
314 |
+ exit(0); |
|
315 |
+ } |
|
312 | 316 |
if (cli_debug_flag) |
313 |
- printf("[clambc] Bytecode loaded\n"); |
|
317 |
+ printf("[clambc] Bytecode loaded\n"); |
|
314 | 318 |
if (optget(opts, "info")->enabled) { |
315 | 319 |
cli_bytecode_describe(bc); |
316 | 320 |
} else if (optget(opts, "printsrc")->enabled) { |
... | ... |
@@ -34,6 +34,7 @@ |
34 | 34 |
#include "bytecode_priv.h" |
35 | 35 |
#include "readdb.h" |
36 | 36 |
#include "scanners.h" |
37 |
+#include "bytecode_api.h" |
|
37 | 38 |
#include "bytecode_api_impl.h" |
38 | 39 |
#include <string.h> |
39 | 40 |
|
... | ... |
@@ -486,19 +487,23 @@ static int parseHeader(struct cli_bc *bc, unsigned char *buffer, unsigned *linel |
486 | 486 |
char ok = 1; |
487 | 487 |
unsigned offset, len, flevel; |
488 | 488 |
char *pos; |
489 |
+ |
|
489 | 490 |
if (strncmp((const char*)buffer, BC_HEADER, sizeof(BC_HEADER)-1)) { |
490 | 491 |
cli_errmsg("Missing file magic in bytecode"); |
491 | 492 |
return CL_EMALFDB; |
492 | 493 |
} |
493 | 494 |
offset = sizeof(BC_HEADER)-1; |
494 | 495 |
len = strlen((const char*)buffer); |
495 |
- flevel = readNumber(buffer, &offset, len, &ok); |
|
496 |
+ bc->metadata.formatlevel = readNumber(buffer, &offset, len, &ok); |
|
496 | 497 |
if (!ok) { |
497 |
- cli_errmsg("Unable to parse functionality level in bytecode header\n"); |
|
498 |
+ cli_errmsg("Unable to parse (format) functionality level in bytecode header\n"); |
|
498 | 499 |
return CL_EMALFDB; |
499 | 500 |
} |
500 |
- if (flevel != BC_FUNC_LEVEL) { |
|
501 |
- cli_dbgmsg("Skipping bytecode with functionality level: %u (current %u)\n", flevel, BC_FUNC_LEVEL); |
|
501 |
+ /* we support 2 bytecode formats */ |
|
502 |
+ if (bc->metadata.formatlevel != BC_FORMAT_096 && |
|
503 |
+ bc->metadata.formatlevel != BC_FORMAT_LEVEL) { |
|
504 |
+ cli_dbgmsg("Skipping bytecode with (format) functionality level: %u (current %u)\n", |
|
505 |
+ bc->metadata.formatlevel, BC_FORMAT_LEVEL); |
|
502 | 506 |
return CL_BREAK; |
503 | 507 |
} |
504 | 508 |
/* Optimistic parsing, check for error only at the end.*/ |
... | ... |
@@ -506,9 +511,22 @@ static int parseHeader(struct cli_bc *bc, unsigned char *buffer, unsigned *linel |
506 | 506 |
bc->metadata.sigmaker = readString(buffer, &offset, len, &ok); |
507 | 507 |
bc->metadata.targetExclude = readNumber(buffer, &offset, len, &ok); |
508 | 508 |
bc->kind = readNumber(buffer, &offset, len, &ok); |
509 |
- bc->metadata.maxStack = readNumber(buffer, &offset, len, &ok); |
|
510 |
- bc->metadata.maxMem = readNumber(buffer, &offset, len, &ok); |
|
511 |
- bc->metadata.maxTime = readNumber(buffer, &offset, len, &ok); |
|
509 |
+ bc->metadata.minfunc = readNumber(buffer, &offset, len, &ok); |
|
510 |
+ bc->metadata.maxfunc = readNumber(buffer, &offset, len, &ok); |
|
511 |
+ flevel = cl_retflevel(); |
|
512 |
+ /* in 0.96 these 2 fields are unused / zero, in post 0.96 these mean |
|
513 |
+ * min/max flevel. |
|
514 |
+ * So 0 for min/max means no min/max |
|
515 |
+ * Note that post 0.96 bytecode/bytecode lsig needs format 7, because |
|
516 |
+ * 0.96 doesn't check lsig functionality level. |
|
517 |
+ */ |
|
518 |
+ if ((bc->metadata.minfunc && bc->metadata.minfunc > flevel) || |
|
519 |
+ (bc->metadata.maxfunc && bc->metadata.maxfunc < flevel)) { |
|
520 |
+ cli_dbgmsg("Skipping bytecode with (engine) functionality level %u-%u (current %u)\n", |
|
521 |
+ bc->metadata.minfunc, bc->metadata.maxfunc, flevel); |
|
522 |
+ return CL_BREAK; |
|
523 |
+ } |
|
524 |
+ bc->metadata.maxresource = readNumber(buffer, &offset, len, &ok); |
|
512 | 525 |
bc->metadata.compiler = readString(buffer, &offset, len, &ok); |
513 | 526 |
bc->num_types = readNumber(buffer, &offset, len, &ok); |
514 | 527 |
bc->num_func = readNumber(buffer, &offset, len, &ok); |
... | ... |
@@ -2046,7 +2064,7 @@ void cli_bytecode_describe(const struct cli_bc *bc) |
2046 | 2046 |
} |
2047 | 2047 |
|
2048 | 2048 |
stamp = bc->metadata.timestamp; |
2049 |
- printf("Bytecode format functionality level: %u\n", BC_FUNC_LEVEL); |
|
2049 |
+ printf("Bytecode format functionality level: %u\n", bc->metadata.formatlevel); |
|
2050 | 2050 |
printf("Bytecode metadata:\n\tcompiler version: %s\n", |
2051 | 2051 |
bc->metadata.compiler ? bc->metadata.compiler : "N/A"); |
2052 | 2052 |
printf("\tcompiled on: %s", |
... | ... |
@@ -2070,6 +2088,9 @@ void cli_bytecode_describe(const struct cli_bc *bc) |
2070 | 2070 |
printf("Unknown (type %u)", bc->kind); |
2071 | 2071 |
break; |
2072 | 2072 |
} |
2073 |
+ /* 0 means no limit */ |
|
2074 |
+ printf("\tbytecode functionality level: %u - %u\n", |
|
2075 |
+ bc->metadata.minfunc, bc->metadata.maxfunc); |
|
2073 | 2076 |
printf("\tbytecode logical signature: %s\n", |
2074 | 2077 |
bc->lsig ? bc->lsig : "<none>"); |
2075 | 2078 |
printf("\tvirusname prefix: %s\n", |
... | ... |
@@ -52,7 +52,7 @@ enum BytecodeKind { |
52 | 52 |
_BC_LAST_HOOK |
53 | 53 |
}; |
54 | 54 |
|
55 |
-static const unsigned PE_INVALID_RVA = 0xFFFFFFFF; |
|
55 |
+enum { PE_INVALID_RVA = 0xFFFFFFFF }; |
|
56 | 56 |
|
57 | 57 |
#ifdef __CLAMBC__ |
58 | 58 |
|
... | ... |
@@ -70,7 +70,12 @@ extern const uint32_t __clambc_filesize[1]; |
70 | 70 |
/** Kind of the bytecode */ |
71 | 71 |
const uint16_t __clambc_kind; |
72 | 72 |
|
73 |
-uint32_t test1(uint32_t, uint32_t); |
|
73 |
+/** Test api. |
|
74 |
+ @param a 0xf00dbeef |
|
75 |
+ @param b 0xbeeff00d |
|
76 |
+ @return 0x12345678 if parameters match, 0x55 otherwise |
|
77 |
+*/ |
|
78 |
+uint32_t test1(uint32_t a, uint32_t b); |
|
74 | 79 |
|
75 | 80 |
/** |
76 | 81 |
* @brief Reads specified amount of bytes from the current file |
... | ... |
@@ -82,7 +87,6 @@ uint32_t test1(uint32_t, uint32_t); |
82 | 82 |
*/ |
83 | 83 |
int32_t read(uint8_t *data, int32_t size); |
84 | 84 |
|
85 |
- |
|
86 | 85 |
enum { |
87 | 86 |
/**set file position to specified absolute position */ |
88 | 87 |
SEEK_SET=0, |
... | ... |
@@ -188,6 +192,9 @@ int32_t file_byteat(uint32_t offset); |
188 | 188 |
@return pointer to allocated memory */ |
189 | 189 |
void* malloc(uint32_t size); |
190 | 190 |
|
191 |
+/** Test api2. |
|
192 |
+ * @param a 0xf00d |
|
193 |
+ * @return 0xd00f if parameter matches, 0x5555 otherwise */ |
|
191 | 194 |
uint32_t test2(uint32_t a); |
192 | 195 |
|
193 | 196 |
/** Gets information about the specified PE section. |
... | ... |
@@ -206,7 +213,8 @@ int32_t get_pe_section(struct cli_exe_section *section, uint32_t num); |
206 | 206 |
* number bytes available in buffer (starting from 0) |
207 | 207 |
* The character at the cursor will be at position 0 after this call. |
208 | 208 |
*/ |
209 |
-int32_t fill_buffer(uint8_t* buffer, uint32_t len, uint32_t filled, uint32_t cur, uint32_t fill); |
|
209 |
+int32_t fill_buffer(uint8_t* buffer, uint32_t len, uint32_t filled, |
|
210 |
+ uint32_t cursor, uint32_t fill); |
|
210 | 211 |
|
211 | 212 |
/** |
212 | 213 |
* Prepares for extracting a new file, if we've already extracted one it scans |
... | ... |
@@ -224,31 +232,172 @@ int32_t extract_new(int32_t id); |
224 | 224 |
*/ |
225 | 225 |
int32_t read_number(uint32_t radix); |
226 | 226 |
|
227 |
+/** |
|
228 |
+ * Creates a new hashset and returns its id. |
|
229 |
+ * @return ID for new hashset */ |
|
227 | 230 |
int32_t hashset_new(void); |
231 |
+ |
|
232 |
+/** |
|
233 |
+ * Add a new 32-bit key to the hashset. |
|
234 |
+ * @param hs ID of hashset (from hashset_new) |
|
235 |
+ * @param key the key to add |
|
236 |
+ * @return 0 on success */ |
|
228 | 237 |
int32_t hashset_add(int32_t hs, uint32_t key); |
238 |
+ |
|
239 |
+/** |
|
240 |
+ * Remove a 32-bit key from the hashset. |
|
241 |
+ * @param hs ID of hashset (from hashset_new) |
|
242 |
+ * @param key the key to add |
|
243 |
+ * @return 0 on success */ |
|
229 | 244 |
int32_t hashset_remove(int32_t hs, uint32_t key); |
245 |
+ |
|
246 |
+/** |
|
247 |
+ * Returns whether the hashset contains the specified key. |
|
248 |
+ * @param hs ID of hashset (from hashset_new) |
|
249 |
+ * @param key the key to lookup |
|
250 |
+ * @return 1 if found, 0 if not found, <0 on invalid hashset ID */ |
|
230 | 251 |
int32_t hashset_contains(int32_t hs, uint32_t key); |
252 |
+ |
|
253 |
+/** |
|
254 |
+ * Deallocates the memory used by the specified hashset. |
|
255 |
+ * Trying to use the hashset after this will result in an error. |
|
256 |
+ * The hashset may not be used after this. |
|
257 |
+ * All hashsets are automatically deallocated when bytecode |
|
258 |
+ * finishes execution. |
|
259 |
+ * @param id ID of hashset (from hashset_new) |
|
260 |
+ * @return 0 on success */ |
|
231 | 261 |
int32_t hashset_done(int32_t id); |
262 |
+ |
|
263 |
+/** |
|
264 |
+ * Returns whether the hashset is empty. |
|
265 |
+ * @param id of hashset (from hashset_new) |
|
266 |
+ * @return 0 on success */ |
|
232 | 267 |
int32_t hashset_empty(int32_t id); |
233 | 268 |
|
269 |
+/** |
|
270 |
+ * Creates a new pipe with the specified buffer size |
|
271 |
+ * @param size size of buffer |
|
272 |
+ * @return ID of newly created buffer_pipe */ |
|
234 | 273 |
int32_t buffer_pipe_new(uint32_t size); |
274 |
+ |
|
275 |
+/** |
|
276 |
+ * Same as buffer_pipe_new, except the pipe's input is tied |
|
277 |
+ * to the current file, at the specified position. |
|
278 |
+ * @param pos starting position of pipe input in current file |
|
279 |
+ * @return ID of newly created buffer_pipe */ |
|
235 | 280 |
int32_t buffer_pipe_new_fromfile(uint32_t pos); |
281 |
+ |
|
282 |
+/** |
|
283 |
+ * Returns the amount of bytes available to read. |
|
284 |
+ * @param id ID of buffer_pipe |
|
285 |
+ * @return amount of bytes available to read */ |
|
236 | 286 |
uint32_t buffer_pipe_read_avail(int32_t id); |
287 |
+ |
|
288 |
+/** |
|
289 |
+ * Returns a pointer to the buffer for reading. |
|
290 |
+ * The 'amount' parameter should be obtained by a call to |
|
291 |
+ * buffer_pipe_read_avail(). |
|
292 |
+ * @param id ID of buffer_pipe |
|
293 |
+ * @param amount to read |
|
294 |
+ * @return pointer to buffer, or NULL if buffer has less than |
|
295 |
+ specified amount */ |
|
237 | 296 |
uint8_t *buffer_pipe_read_get(int32_t id, uint32_t amount); |
297 |
+ |
|
298 |
+/** |
|
299 |
+ * Updates read cursor in buffer_pipe. |
|
300 |
+ * @param id ID of buffer_pipe |
|
301 |
+ * @param amount amount of bytes to move read cursor |
|
302 |
+ * @return 0 on success */ |
|
238 | 303 |
int32_t buffer_pipe_read_stopped(int32_t id, uint32_t amount); |
304 |
+ |
|
305 |
+/** |
|
306 |
+ * Returns the amount of bytes available for writing. |
|
307 |
+ * @param id ID of buffer_pipe |
|
308 |
+ * @return amount of bytes available for writing */ |
|
239 | 309 |
uint32_t buffer_pipe_write_avail(int32_t id); |
310 |
+ |
|
311 |
+/** |
|
312 |
+ * Returns pointer to writable buffer. |
|
313 |
+ * The 'amount' parameter should be obtained by a call to |
|
314 |
+ * buffer_pipe_write_avail(). |
|
315 |
+ * @param id ID of buffer_pipe |
|
316 |
+ * @param size amount of bytes to write |
|
317 |
+ * @return pointer to write buffer, or NULL if requested amount |
|
318 |
+ is more than what is available in the buffer */ |
|
240 | 319 |
uint8_t *buffer_pipe_write_get(int32_t id, uint32_t size); |
320 |
+ |
|
321 |
+/** |
|
322 |
+ * Updates the write cursor in buffer_pipe. |
|
323 |
+ * @param id ID of buffer_pipe |
|
324 |
+ * @param amount amount of bytes to move write cursor |
|
325 |
+ * @return 0 on success */ |
|
241 | 326 |
int32_t buffer_pipe_write_stopped(int32_t id, uint32_t amount); |
327 |
+ |
|
328 |
+/** |
|
329 |
+ * Deallocate memory used by buffer. |
|
330 |
+ * After this all attempts to use this buffer will result in error. |
|
331 |
+ * All buffer_pipes are automatically deallocated when bytecode |
|
332 |
+ * finishes execution. |
|
333 |
+ * @param id ID of buffer_pipe |
|
334 |
+ * @return 0 on success */ |
|
242 | 335 |
int32_t buffer_pipe_done(int32_t id); |
243 | 336 |
|
337 |
+/** |
|
338 |
+ * Initializes inflate data structures for decompressing data |
|
339 |
+ * 'from_buffer' and writing uncompressed uncompressed data 'to_buffer'. |
|
340 |
+ * @param from_buffer ID of buffer_pipe to read compressed data from |
|
341 |
+ * @param to_buffer ID of buffer_pipe to write decompressed data to |
|
342 |
+ * @param windowBits (see zlib documentation) |
|
343 |
+ * @return ID of newly created inflate data structure, <0 on failure */ |
|
244 | 344 |
int32_t inflate_init(int32_t from_buffer, int32_t to_buffer, int32_t windowBits); |
345 |
+ |
|
346 |
+/** |
|
347 |
+ * Inflate all available data in the input buffer, and write to output buffer. |
|
348 |
+ * Stops when the input buffer becomes empty, or write buffer becomes full. |
|
349 |
+ * Also attempts to recover from corrupted inflate stream (via inflateSync). |
|
350 |
+ * This function can be called repeatedly on success after filling the input |
|
351 |
+ * buffer, and flushing the output buffer. |
|
352 |
+ * The inflate stream is done processing when 0 bytes are available from output |
|
353 |
+ * buffer, and input buffer is not empty. |
|
354 |
+ * @param id ID of inflate data structure |
|
355 |
+ * @return 0 on success, zlib error code otherwise */ |
|
245 | 356 |
int32_t inflate_process(int32_t id); |
357 |
+ |
|
358 |
+/** |
|
359 |
+ * Deallocates inflate data structure. |
|
360 |
+ * Using the inflate data structure after this will result in an error. |
|
361 |
+ * All inflate data structures are automatically deallocated when bytecode |
|
362 |
+ * finishes execution. |
|
363 |
+ * @param id ID of inflate data structure |
|
364 |
+ * @return 0 on success.*/ |
|
246 | 365 |
int32_t inflate_done(int32_t id); |
247 | 366 |
|
367 |
+/** |
|
368 |
+ * Report a runtime error at the specified locationID. |
|
369 |
+ * @param locationid (line << 8) | (column&0xff) |
|
370 |
+ * @return 0 */ |
|
248 | 371 |
int32_t bytecode_rt_error(int32_t locationid); |
249 | 372 |
|
373 |
+/** |
|
374 |
+ * Initializes JS normalizer for reading 'from_buffer'. |
|
375 |
+ * Normalized JS will be written to a single tempfile, |
|
376 |
+ * one normalized JS per line, and automatically scanned |
|
377 |
+ * when the bytecode finishes execution. |
|
378 |
+ * @param from_buffer ID of buffer_pipe to read javascript from |
|
379 |
+ * @return ID of JS normalizer, <0 on failure */ |
|
250 | 380 |
int32_t jsnorm_init(int32_t from_buffer); |
381 |
+ |
|
382 |
+/** |
|
383 |
+ * Normalize all javascript from the input buffer, and write to tempfile. |
|
384 |
+ * You can call this function repeatedly on success, if you (re)fill the input |
|
385 |
+ * buffer. |
|
386 |
+ * @param id ID of JS normalizer |
|
387 |
+ * @return 0 on success, <0 on failure */ |
|
251 | 388 |
int32_t jsnorm_process(int32_t id); |
389 |
+ |
|
390 |
+/** |
|
391 |
+ * Flushes JS normalizer. |
|
392 |
+ * @param id ID of js normalizer to flush */ |
|
252 | 393 |
int32_t jsnorm_done(int32_t id); |
253 | 394 |
|
254 | 395 |
#endif |
... | ... |
@@ -26,12 +26,14 @@ struct bytecode_metadata { |
26 | 26 |
char *compiler; |
27 | 27 |
char *sigmaker; |
28 | 28 |
uint64_t timestamp; |
29 |
- unsigned long maxStack, maxMem; |
|
30 |
- unsigned long maxTime; |
|
29 |
+ unsigned formatlevel; |
|
30 |
+ unsigned minfunc, maxfunc; |
|
31 |
+ unsigned maxresource;/* reserved */ |
|
31 | 32 |
unsigned targetExclude; |
32 | 33 |
}; |
33 | 34 |
|
34 |
-#define BC_FUNC_LEVEL 6 |
|
35 |
+#define BC_FORMAT_096 6 |
|
36 |
+#define BC_FORMAT_LEVEL 7 |
|
35 | 37 |
#define BC_HEADER "ClamBC" |
36 | 38 |
|
37 | 39 |
enum bc_opcode { |
... | ... |
@@ -128,4 +130,5 @@ enum bc_global { |
128 | 128 |
}; |
129 | 129 |
|
130 | 130 |
#define BC_START_TID 69 |
131 |
+ |
|
131 | 132 |
#endif |
... | ... |
@@ -1419,6 +1419,7 @@ static int cli_loadcbc(FILE *fs, struct cl_engine *engine, unsigned int *signo, |
1419 | 1419 |
bc->id = bcs->count;/* must set after _load, since load zeroes */ |
1420 | 1420 |
sigs++; |
1421 | 1421 |
if (bc->kind == BC_LOGICAL || bc->lsig) { |
1422 |
+ unsigned oldsigs = sigs; |
|
1422 | 1423 |
if (!bc->lsig) { |
1423 | 1424 |
cli_errmsg("Bytecode %s has logical kind, but missing logical signature!\n", dbname); |
1424 | 1425 |
return CL_EMALFDB; |
... | ... |
@@ -1430,6 +1431,12 @@ static int cli_loadcbc(FILE *fs, struct cl_engine *engine, unsigned int *signo, |
1430 | 1430 |
bc->lsig, dbname, cl_strerror(rc)); |
1431 | 1431 |
return rc; |
1432 | 1432 |
} |
1433 |
+ if (sigs != oldsigs) { |
|
1434 |
+ /* compiler ensures Engine field in lsig matches the one in bytecode, |
|
1435 |
+ * so this should never happen. */ |
|
1436 |
+ cli_errmsg("Bytecode logical signature skipped, but bytecode itself not?"); |
|
1437 |
+ return CL_EMALFDB; |
|
1438 |
+ } |
|
1433 | 1439 |
} |
1434 | 1440 |
if (bc->kind != BC_LOGICAL) { |
1435 | 1441 |
if (bc->lsig) { |