Browse code

Add LZMA & BZip2 decompression to bytecode API

Adds LZMA and BZip2 decompression routines to the bytecode API.
The ability to decompress LZMA and BZip2 streams is particularly
useful for bytecode signatures that extend clamav executable
unpacking capabilities.

Of note, the LZMA format is not well standardized. This API
expects the stream to start with the LZMA_Alone header.

Also fixed a bug in LZMA dictionary size setting.

Jonas Zaddach (jzaddach) authored on 2020/04/30 01:26:07
Showing 8 changed files
... ...
@@ -201,6 +201,20 @@ static int cli_bytecode_context_reset(struct cli_bc_ctx *ctx)
201 201
     ctx->inflates  = NULL;
202 202
     ctx->ninflates = 0;
203 203
 
204
+    for (i = 0; i < ctx->nlzmas; i++)
205
+        cli_bcapi_lzma_done(ctx, i);
206
+    free(ctx->lzmas);
207
+    ctx->lzmas  = NULL;
208
+    ctx->nlzmas = 0;
209
+
210
+#if HAVE_BZLIB_H
211
+    for (i = 0; i < ctx->nbzip2s; i++)
212
+        cli_bcapi_bzip2_done(ctx, i);
213
+    free(ctx->bzip2s);
214
+    ctx->bzip2s  = NULL;
215
+    ctx->nbzip2s = 0;
216
+#endif
217
+
204 218
     for (i = 0; i < ctx->nbuffers; i++)
205 219
         cli_bcapi_buffer_pipe_done(ctx, i);
206 220
     free(ctx->buffers);
... ...
@@ -51,9 +51,13 @@
51 51
 #include "hashtab.h"
52 52
 #include "str.h"
53 53
 #include "filetypes.h"
54
+#include "lzma_iface.h"
54 55
 #if HAVE_JSON
55 56
 #include "json.h"
56 57
 #endif
58
+#if HAVE_BZLIB_H
59
+#include <bzlib.h>
60
+#endif
57 61
 
58 62
 #define EV ctx->bc_events
59 63
 
... ...
@@ -61,6 +65,20 @@
61 61
 #define TOSTRING(x) STRINGIFY(x)
62 62
 #define API_MISUSE() cli_event_error_str(EV, "API misuse @" TOSTRING(__LINE__))
63 63
 
64
+struct bc_lzma {
65
+    struct CLI_LZMA stream;
66
+    int32_t from;
67
+    int32_t to;
68
+};
69
+
70
+#if HAVE_BZLIB_H
71
+struct bc_bzip2 {
72
+    bz_stream stream;
73
+    int32_t from;
74
+    int32_t to;
75
+};
76
+#endif
77
+
64 78
 uint32_t cli_bcapi_test1(struct cli_bc_ctx *ctx, uint32_t a, uint32_t b)
65 79
 {
66 80
     UNUSEDPARAM(ctx);
... ...
@@ -928,6 +946,209 @@ int32_t cli_bcapi_inflate_done(struct cli_bc_ctx *ctx, int32_t id)
928 928
     return ret;
929 929
 }
930 930
 
931
+int32_t cli_bcapi_lzma_init(struct cli_bc_ctx *ctx, int32_t from, int32_t to)
932
+{
933
+    int ret;
934
+    struct bc_lzma *b;
935
+    unsigned n = ctx->nlzmas + 1;
936
+    unsigned avail_in_orig;
937
+
938
+    if (!get_buffer(ctx, from) || !get_buffer(ctx, to)) {
939
+        cli_dbgmsg("bytecode api: lzma_init: invalid buffers!\n");
940
+        return -1;
941
+    }
942
+
943
+    avail_in_orig = cli_bcapi_buffer_pipe_read_avail(ctx, from);
944
+    if (avail_in_orig < LZMA_PROPS_SIZE + 8) {
945
+        cli_dbgmsg("bytecode api: lzma_init: not enough bytes in pipe to read LZMA header!\n");
946
+        return -1;
947
+    }
948
+
949
+    b = cli_realloc(ctx->lzmas, sizeof(*ctx->lzmas) * n);
950
+    if (!b) {
951
+        return -1;
952
+    }
953
+    ctx->lzmas  = b;
954
+    ctx->nlzmas = n;
955
+    b           = &b[n - 1];
956
+
957
+    b->from = from;
958
+    b->to   = to;
959
+    memset(&b->stream, 0, sizeof(b->stream));
960
+
961
+    b->stream.avail_in = avail_in_orig;
962
+    b->stream.next_in  = (void *)cli_bcapi_buffer_pipe_read_get(ctx, b->from,
963
+                                                               b->stream.avail_in);
964
+
965
+    if ((ret = cli_LzmaInit(&b->stream, 0)) != LZMA_RESULT_OK) {
966
+        cli_dbgmsg("bytecode api: LzmaInit: Failed to initialize LZMA decompressor: %d!\n", ret);
967
+        cli_bcapi_buffer_pipe_read_stopped(ctx, b->from, avail_in_orig - b->stream.avail_in);
968
+        return ret;
969
+    }
970
+
971
+    cli_bcapi_buffer_pipe_read_stopped(ctx, b->from, avail_in_orig - b->stream.avail_in);
972
+    return n - 1;
973
+}
974
+
975
+static struct bc_lzma *get_lzma(struct cli_bc_ctx *ctx, int32_t id)
976
+{
977
+    if (id < 0 || (unsigned int)id >= ctx->nlzmas || !ctx->lzmas)
978
+        return NULL;
979
+    return &ctx->lzmas[id];
980
+}
981
+
982
+int32_t cli_bcapi_lzma_process(struct cli_bc_ctx *ctx, int32_t id)
983
+{
984
+    int ret;
985
+    unsigned avail_in_orig, avail_out_orig;
986
+    struct bc_lzma *b = get_lzma(ctx, id);
987
+    if (!b || b->from == -1 || b->to == -1)
988
+        return -1;
989
+
990
+    b->stream.avail_in = avail_in_orig =
991
+        cli_bcapi_buffer_pipe_read_avail(ctx, b->from);
992
+
993
+    b->stream.next_in = (void *)cli_bcapi_buffer_pipe_read_get(ctx, b->from,
994
+                                                               b->stream.avail_in);
995
+
996
+    b->stream.avail_out = avail_out_orig =
997
+        cli_bcapi_buffer_pipe_write_avail(ctx, b->to);
998
+    b->stream.next_out = (uint8_t *)cli_bcapi_buffer_pipe_write_get(ctx, b->to,
999
+                                                                    b->stream.avail_out);
1000
+
1001
+    if (!b->stream.avail_in || !b->stream.avail_out || !b->stream.next_in || !b->stream.next_out)
1002
+        return -1;
1003
+
1004
+    ret = cli_LzmaDecode(&b->stream);
1005
+    cli_bcapi_buffer_pipe_read_stopped(ctx, b->from, avail_in_orig - b->stream.avail_in);
1006
+    cli_bcapi_buffer_pipe_write_stopped(ctx, b->to, avail_out_orig - b->stream.avail_out);
1007
+
1008
+    if (ret != LZMA_RESULT_OK && ret != LZMA_STREAM_END) {
1009
+        cli_dbgmsg("bytecode api: LzmaDecode: Error %d while decoding\n", ret);
1010
+        cli_bcapi_lzma_done(ctx, id);
1011
+    }
1012
+
1013
+    return ret;
1014
+}
1015
+
1016
+int32_t cli_bcapi_lzma_done(struct cli_bc_ctx *ctx, int32_t id)
1017
+{
1018
+    struct bc_lzma *b = get_lzma(ctx, id);
1019
+    if (!b || b->from == -1 || b->to == -1)
1020
+        return -1;
1021
+    cli_LzmaShutdown(&b->stream);
1022
+    b->from = b->to = -1;
1023
+    return 0;
1024
+}
1025
+
1026
+int32_t cli_bcapi_bzip2_init(struct cli_bc_ctx *ctx, int32_t from, int32_t to)
1027
+{
1028
+#if HAVE_BZLIB_H
1029
+    int ret;
1030
+    struct bc_bzip2 *b;
1031
+    unsigned n = ctx->nbzip2s + 1;
1032
+    if (!get_buffer(ctx, from) || !get_buffer(ctx, to)) {
1033
+        cli_dbgmsg("bytecode api: bzip2_init: invalid buffers!\n");
1034
+        return -1;
1035
+    }
1036
+    b = cli_realloc(ctx->bzip2s, sizeof(*ctx->bzip2s) * n);
1037
+    if (!b) {
1038
+        return -1;
1039
+    }
1040
+    ctx->bzip2s  = b;
1041
+    ctx->nbzip2s = n;
1042
+    b            = &b[n - 1];
1043
+
1044
+    b->from = from;
1045
+    b->to   = to;
1046
+    memset(&b->stream, 0, sizeof(b->stream));
1047
+    ret = BZ2_bzDecompressInit(&b->stream, 0, 0);
1048
+    switch (ret) {
1049
+        case BZ_CONFIG_ERROR:
1050
+            cli_dbgmsg("bytecode api: BZ2_bzDecompressInit: Library has been mis-compiled!\n");
1051
+            return -1;
1052
+        case BZ_PARAM_ERROR:
1053
+            cli_dbgmsg("bytecode api: BZ2_bzDecompressInit: Invalid arguments!\n");
1054
+            return -1;
1055
+        case BZ_MEM_ERROR:
1056
+            cli_dbgmsg("bytecode api: BZ2_bzDecompressInit: Insufficient memory available!\n");
1057
+            return -1;
1058
+        case BZ_OK:
1059
+            break;
1060
+        default:
1061
+            cli_dbgmsg("bytecode api: BZ2_bzDecompressInit: unknown error %d\n", ret);
1062
+            return -1;
1063
+    }
1064
+
1065
+    return n - 1;
1066
+#else
1067
+    return -1;
1068
+#endif
1069
+}
1070
+
1071
+#if HAVE_BZLIB_H
1072
+static struct bc_bzip2 *get_bzip2(struct cli_bc_ctx *ctx, int32_t id)
1073
+{
1074
+    if (id < 0 || (unsigned int)id >= ctx->nbzip2s || !ctx->bzip2s)
1075
+        return NULL;
1076
+    return &ctx->bzip2s[id];
1077
+}
1078
+#endif
1079
+
1080
+int32_t cli_bcapi_bzip2_process(struct cli_bc_ctx *ctx, int32_t id)
1081
+{
1082
+#if HAVE_BZLIB_H
1083
+    int ret;
1084
+    unsigned avail_in_orig, avail_out_orig;
1085
+    struct bc_bzip2 *b = get_bzip2(ctx, id);
1086
+    if (!b || b->from == -1 || b->to == -1)
1087
+        return -1;
1088
+
1089
+    b->stream.avail_in = avail_in_orig =
1090
+        cli_bcapi_buffer_pipe_read_avail(ctx, b->from);
1091
+
1092
+    b->stream.next_in = (void *)cli_bcapi_buffer_pipe_read_get(ctx, b->from,
1093
+                                                               b->stream.avail_in);
1094
+
1095
+    b->stream.avail_out = avail_out_orig =
1096
+        cli_bcapi_buffer_pipe_write_avail(ctx, b->to);
1097
+
1098
+    b->stream.next_out = (char *)cli_bcapi_buffer_pipe_write_get(ctx, b->to,
1099
+                                                                 b->stream.avail_out);
1100
+
1101
+    if (!b->stream.avail_in || !b->stream.avail_out || !b->stream.next_in || !b->stream.next_out)
1102
+        return -1;
1103
+    /* try hard to extract data, skipping over corrupted data */
1104
+    ret = BZ2_bzDecompress(&b->stream);
1105
+    cli_bcapi_buffer_pipe_read_stopped(ctx, b->from, avail_in_orig - b->stream.avail_in);
1106
+    cli_bcapi_buffer_pipe_write_stopped(ctx, b->to, avail_out_orig - b->stream.avail_out);
1107
+
1108
+    /* check if nothing written whatsoever */
1109
+    if ((ret != BZ_OK) && (b->stream.avail_out == avail_out_orig)) {
1110
+        /* Inflation failed */
1111
+        cli_errmsg("cli_bcapi_bzip2_process: failed to decompress data\n");
1112
+    }
1113
+
1114
+    return ret;
1115
+#else
1116
+    return -1;
1117
+#endif
1118
+}
1119
+
1120
+int32_t cli_bcapi_bzip2_done(struct cli_bc_ctx *ctx, int32_t id)
1121
+{
1122
+#if HAVE_BZLIB_H
1123
+    struct bc_bzip2 *b = get_bzip2(ctx, id);
1124
+    if (!b || b->from == -1 || b->to == -1)
1125
+        return -1;
1126
+    BZ2_bzDecompressEnd(&b->stream);
1127
+    b->from = b->to = -1;
1128
+    return 0;
1129
+#else
1130
+    return -1;
1131
+#endif
1132
+}
1133
+
931 1134
 int32_t cli_bcapi_bytecode_rt_error(struct cli_bc_ctx *ctx, int32_t id)
932 1135
 {
933 1136
     int32_t line = id >> 8;
... ...
@@ -1,5 +1,5 @@
1 1
 /*
2
- *  Copyright (C) 2013-2020 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
2
+ *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
3 3
  *  Copyright (C) 2009-2013 Sourcefire, Inc.
4 4
 
5 5
  *  Authors: Török Edvin, Kevin Lin
... ...
@@ -235,6 +235,37 @@ enum bc_json_type {
235 235
 };
236 236
 
237 237
 /**
238
+\group_adt
239
+  * LZMA return codes
240
+  */
241
+enum lzma_returncode {
242
+    LZMA_RESULT_OK         = 0, /* Function completed successfully */
243
+    LZMA_RESULT_DATA_ERROR = 1, /* The LZMA stream contained invalid data */
244
+    LZMA_STREAM_END        = 2  /* The LZMA stream ended unexpectedly */
245
+};
246
+
247
+/**
248
+\group adt
249
+ * Bzip2 return codes
250
+ */
251
+enum bzip2_returncode {
252
+    BZIP2_OK               = 0,  /* Function returned without error */
253
+    BZIP2_SEQUENCE_ERROR   = -1,
254
+    BZIP2_PARAM_ERROR      = -2,
255
+    BZIP2_MEM_ERROR        = -3,
256
+    BZIP2_DATA_ERROR       = -4,
257
+    BZIP2_DATA_ERROR_MAGIC = -5,
258
+    BZIP2_IO_ERROR         = -6,
259
+    BZIP2_UNEXPECTED_EOF   = -7,
260
+    BZIP2_OUTBUFF_FULL     = -8,
261
+    BZIP2_CONFIG_ERROR     = -9,
262
+    BZIP2_RUN_OK           = 1,
263
+    BZIP2_FLUSH_OK         = 2,
264
+    BZIP2_FINISH_OK        = 3,
265
+    BZIP2_STREAM_END       = 4
266
+};
267
+
268
+/**
238 269
 \group_engine
239 270
  * Scan option flag values for engine_scan_options(). *DEPRECATED*
240 271
  */
... ...
@@ -1339,5 +1370,83 @@ int32_t json_get_int(int32_t objid);
1339 1339
 uint32_t engine_scan_options_ex(const uint8_t* option_name, uint32_t name_len);
1340 1340
 
1341 1341
 /* ----------------- END 0.101 APIs ---------------------------------- */
1342
+/* ----------------- BEGIN 0.103 APIs -------------------------------- */
1343
+
1344
+/**
1345
+\group_adt
1346
+  * Initializes LZMA data structures for decompressing data
1347
+  * 'from_buffer' and writing uncompressed data 'to_buffer'.
1348
+  * This function expects the LZMA data to be prefixed with an 'LZMA_ALONE' header: 
1349
+  * - One byte of lzma parameters lc, lp and pb converted into a byte value like this: lc + 9 * (5 * pb + lp).
1350
+  *   lc The number of high bits of the previous byte to use as a context for literal encoding.
1351
+  *   lp The number of low bits of the dictionary position to include in literal_pos_state.
1352
+  *   pb The number of low bits of the dictionary position to include in pos_state.
1353
+  * - Four bytes of dictionary size. In case of doubt you can set this to zero.
1354
+  * - Eight bytes of uncompressed size. Can be set to -1 if the size is unknown
1355
+  *   and the lzma stream is terminated with an end marker.
1356
+  * @param[in] from_buffer ID of buffer_pipe to read compressed data from
1357
+  * @param[in] to_buffer ID of buffer_pipe to write decompressed data to
1358
+  * @return ID of newly created lzma data structure, <0 on failure.
1359
+  */
1360
+int32_t lzma_init(int32_t from, int32_t to);
1361
+
1362
+/**
1363
+\group_adt
1364
+  * Decompress all available data in the input buffer, and write to output buffer.
1365
+  * Stops when the input buffer becomes empty, or write buffer becomes full.
1366
+  * This function can be called repeatedly on success after filling the input
1367
+  * buffer, and flushing the output buffer.
1368
+  * The lzma stream is done processing when 0 bytes are available from output
1369
+  * buffer, and input buffer is not empty.
1370
+  * @param[in] id ID of lzma data structure.
1371
+  * @return 0 on success, lzma error code otherwise.
1372
+  */
1373
+int32_t lzma_process(int32_t id);
1374
+
1375
+/**
1376
+\group_adt
1377
+  * Deallocates lzma data structure.
1378
+  * Using the lzma data structure after this will result in an error.
1379
+  * All lzma data structures are automatically deallocated when bytecode
1380
+  * finishes execution.
1381
+  * @param[in] id ID of lzma data structure
1382
+  * @return 0 on success.
1383
+  */
1384
+int32_t lzma_done(int32_t id);
1385
+
1386
+/**
1387
+\group_adt
1388
+  * Initializes Bzip2 data structures for decompressing data
1389
+  * 'from_buffer' and writing uncompressed data 'to_buffer'.
1390
+  * @param[in] from_buffer ID of buffer_pipe to read compressed data from
1391
+  * @param[in] to_buffer ID of buffer_pipe to write decompressed data to
1392
+  * @return ID of newly created bzip2 data structure, <0 on failure.
1393
+  */
1394
+int32_t bzip2_init(int32_t from, int32_t to);
1395
+
1396
+/**
1397
+\group_adt
1398
+  * Decompress all available data in the input buffer, and write to output buffer.
1399
+  * Stops when the input buffer becomes empty, or write buffer becomes full.
1400
+  * This function can be called repeatedly on success after filling the input
1401
+  * buffer, and flushing the output buffer.
1402
+  * The bzip2 stream is done processing when 0 bytes are available from output
1403
+  * buffer, and input buffer is not empty.
1404
+  * @param[in] id ID of lzma data structure.
1405
+  * @return 0 on success, bzip2 error code otherwise.
1406
+  */
1407
+int32_t bzip2_process(int32_t id);
1408
+
1409
+/**
1410
+\group_adt
1411
+  * Deallocates bzip2 data structure.
1412
+  * Using the bzip2 data structure after this will result in an error.
1413
+  * All bzip2 data structures are automatically deallocated when bytecode
1414
+  * finishes execution.
1415
+  * @param[in] id ID of bzip2 data structure
1416
+  * @return 0 on success.
1417
+  */
1418
+int32_t bzip2_done(int32_t id);
1419
+/* ----------------- END 0.103 APIs ---------------------------------- */
1342 1420
 #endif
1343 1421
 #endif
... ...
@@ -134,6 +134,12 @@ int32_t cli_bcapi_json_get_string(struct cli_bc_ctx *ctx , int8_t*, int32_t, int
134 134
 int32_t cli_bcapi_json_get_boolean(struct cli_bc_ctx *ctx , int32_t);
135 135
 int32_t cli_bcapi_json_get_int(struct cli_bc_ctx *ctx , int32_t);
136 136
 uint32_t cli_bcapi_engine_scan_options_ex(struct cli_bc_ctx *ctx , const uint8_t*, uint32_t);
137
+int32_t cli_bcapi_lzma_init(struct cli_bc_ctx *ctx , int32_t, int32_t);
138
+int32_t cli_bcapi_lzma_process(struct cli_bc_ctx *ctx , int32_t);
139
+int32_t cli_bcapi_lzma_done(struct cli_bc_ctx *ctx , int32_t);
140
+int32_t cli_bcapi_bzip2_init(struct cli_bc_ctx *ctx , int32_t, int32_t);
141
+int32_t cli_bcapi_bzip2_process(struct cli_bc_ctx *ctx , int32_t);
142
+int32_t cli_bcapi_bzip2_done(struct cli_bc_ctx *ctx , int32_t);
137 143
 
138 144
 const struct cli_apiglobal cli_globals[] = {
139 145
 /* Bytecode globals BEGIN */
... ...
@@ -158,10 +164,10 @@ static uint16_t cli_tmp4[]={16, 8, 8, 32, 32, 32, 32, 32, 32, 32, 32, 32, 16, 16
158 158
 static uint16_t cli_tmp5[]={32, 16, 16, 32, 32, 32, 16, 16};
159 159
 static uint16_t cli_tmp6[]={32};
160 160
 static uint16_t cli_tmp7[]={32};
161
-static uint16_t cli_tmp8[]={32, 65, 32};
162
-static uint16_t cli_tmp9[]={32, 32};
163
-static uint16_t cli_tmp10[]={32, 65, 32, 32};
164
-static uint16_t cli_tmp11[]={32, 32, 32};
161
+static uint16_t cli_tmp8[]={32, 32};
162
+static uint16_t cli_tmp9[]={32, 32, 32};
163
+static uint16_t cli_tmp10[]={32, 65, 32};
164
+static uint16_t cli_tmp11[]={32, 65, 32, 32};
165 165
 static uint16_t cli_tmp12[]={32};
166 166
 static uint16_t cli_tmp13[]={32, 65, 32, 65, 32};
167 167
 static uint16_t cli_tmp14[]={65, 32, 32};
... ...
@@ -191,10 +197,10 @@ const struct cli_bc_type cli_apicall_types[]={
191 191
 	{DStructType, cli_tmp5, 8, 0, 0},
192 192
 	{DArrayType, cli_tmp6, 1, 0, 0},
193 193
 	{DArrayType, cli_tmp7, 64, 0, 0},
194
-	{DFunctionType, cli_tmp8, 3, 0, 0},
195
-	{DFunctionType, cli_tmp9, 2, 0, 0},
196
-	{DFunctionType, cli_tmp10, 4, 0, 0},
197
-	{DFunctionType, cli_tmp11, 3, 0, 0},
194
+	{DFunctionType, cli_tmp8, 2, 0, 0},
195
+	{DFunctionType, cli_tmp9, 3, 0, 0},
196
+	{DFunctionType, cli_tmp10, 3, 0, 0},
197
+	{DFunctionType, cli_tmp11, 4, 0, 0},
198 198
 	{DFunctionType, cli_tmp12, 1, 0, 0},
199 199
 	{DFunctionType, cli_tmp13, 5, 0, 0},
200 200
 	{DFunctionType, cli_tmp14, 3, 0, 0},
... ...
@@ -219,107 +225,113 @@ const struct cli_bc_type cli_apicall_types[]={
219 219
 const unsigned cli_apicall_maxtypes=sizeof(cli_apicall_types)/sizeof(cli_apicall_types[0]);
220 220
 const struct cli_apicall cli_apicalls[]={
221 221
 /* Bytecode APIcalls BEGIN */
222
-	{"test1", 11, 0, 0},
223
-	{"read", 8, 0, 1},
224
-	{"write", 8, 1, 1},
225
-	{"seek", 11, 1, 0},
226
-	{"setvirusname", 8, 2, 1},
227
-	{"debug_print_str", 8, 3, 1},
228
-	{"debug_print_uint", 9, 0, 2},
222
+	{"test1", 9, 0, 0},
223
+	{"read", 10, 0, 1},
224
+	{"write", 10, 1, 1},
225
+	{"seek", 9, 1, 0},
226
+	{"setvirusname", 10, 2, 1},
227
+	{"debug_print_str", 10, 3, 1},
228
+	{"debug_print_uint", 8, 0, 2},
229 229
 	{"disasm_x86", 25, 4, 1},
230
-	{"trace_directory", 8, 5, 1},
231
-	{"trace_scope", 8, 6, 1},
232
-	{"trace_source", 8, 7, 1},
233
-	{"trace_op", 8, 8, 1},
234
-	{"trace_value", 8, 9, 1},
235
-	{"trace_ptr", 8, 10, 1},
236
-	{"pe_rawaddr", 9, 1, 2},
237
-	{"file_find", 8, 11, 1},
238
-	{"file_byteat", 9, 2, 2},
230
+	{"trace_directory", 10, 5, 1},
231
+	{"trace_scope", 10, 6, 1},
232
+	{"trace_source", 10, 7, 1},
233
+	{"trace_op", 10, 8, 1},
234
+	{"trace_value", 10, 9, 1},
235
+	{"trace_ptr", 10, 10, 1},
236
+	{"pe_rawaddr", 8, 1, 2},
237
+	{"file_find", 10, 11, 1},
238
+	{"file_byteat", 8, 2, 2},
239 239
 	{"malloc", 24, 0, 3},
240
-	{"test2", 9, 3, 2},
240
+	{"test2", 8, 3, 2},
241 241
 	{"get_pe_section", 21, 12, 1},
242 242
 	{"fill_buffer", 20, 0, 4},
243
-	{"extract_new", 9, 4, 2},
244
-	{"read_number", 9, 5, 2},
243
+	{"extract_new", 8, 4, 2},
244
+	{"read_number", 8, 5, 2},
245 245
 	{"hashset_new", 12, 0, 5},
246
-	{"hashset_add", 11, 2, 0},
247
-	{"hashset_remove", 11, 3, 0},
248
-	{"hashset_contains", 11, 4, 0},
249
-	{"hashset_done", 9, 6, 2},
250
-	{"hashset_empty", 9, 7, 2},
251
-	{"buffer_pipe_new", 9, 8, 2},
252
-	{"buffer_pipe_new_fromfile", 9, 9, 2},
253
-	{"buffer_pipe_read_avail", 9, 10, 2},
246
+	{"hashset_add", 9, 2, 0},
247
+	{"hashset_remove", 9, 3, 0},
248
+	{"hashset_contains", 9, 4, 0},
249
+	{"hashset_done", 8, 6, 2},
250
+	{"hashset_empty", 8, 7, 2},
251
+	{"buffer_pipe_new", 8, 8, 2},
252
+	{"buffer_pipe_new_fromfile", 8, 9, 2},
253
+	{"buffer_pipe_read_avail", 8, 10, 2},
254 254
 	{"buffer_pipe_read_get", 14, 0, 6},
255
-	{"buffer_pipe_read_stopped", 11, 5, 0},
256
-	{"buffer_pipe_write_avail", 9, 11, 2},
255
+	{"buffer_pipe_read_stopped", 9, 5, 0},
256
+	{"buffer_pipe_write_avail", 8, 11, 2},
257 257
 	{"buffer_pipe_write_get", 14, 1, 6},
258
-	{"buffer_pipe_write_stopped", 11, 6, 0},
259
-	{"buffer_pipe_done", 9, 12, 2},
258
+	{"buffer_pipe_write_stopped", 9, 6, 0},
259
+	{"buffer_pipe_done", 8, 12, 2},
260 260
 	{"inflate_init", 15, 0, 7},
261
-	{"inflate_process", 9, 13, 2},
262
-	{"inflate_done", 9, 14, 2},
263
-	{"bytecode_rt_error", 9, 15, 2},
264
-	{"jsnorm_init", 9, 16, 2},
265
-	{"jsnorm_process", 9, 17, 2},
266
-	{"jsnorm_done", 9, 18, 2},
267
-	{"ilog2", 11, 7, 0},
261
+	{"inflate_process", 8, 13, 2},
262
+	{"inflate_done", 8, 14, 2},
263
+	{"bytecode_rt_error", 8, 15, 2},
264
+	{"jsnorm_init", 8, 16, 2},
265
+	{"jsnorm_process", 8, 17, 2},
266
+	{"jsnorm_done", 8, 18, 2},
267
+	{"ilog2", 9, 7, 0},
268 268
 	{"ipow", 15, 1, 7},
269 269
 	{"iexp", 15, 2, 7},
270 270
 	{"isin", 15, 3, 7},
271 271
 	{"icos", 15, 4, 7},
272 272
 	{"memstr", 13, 0, 8},
273
-	{"hex2ui", 11, 8, 0},
274
-	{"atoi", 8, 13, 1},
275
-	{"debug_print_str_start", 8, 14, 1},
276
-	{"debug_print_str_nonl", 8, 15, 1},
277
-	{"entropy_buffer", 8, 16, 1},
278
-	{"map_new", 11, 9, 0},
279
-	{"map_addkey", 10, 0, 9},
280
-	{"map_setvalue", 10, 1, 9},
281
-	{"map_remove", 10, 2, 9},
282
-	{"map_find", 10, 3, 9},
283
-	{"map_getvaluesize", 9, 19, 2},
273
+	{"hex2ui", 9, 8, 0},
274
+	{"atoi", 10, 13, 1},
275
+	{"debug_print_str_start", 10, 14, 1},
276
+	{"debug_print_str_nonl", 10, 15, 1},
277
+	{"entropy_buffer", 10, 16, 1},
278
+	{"map_new", 9, 9, 0},
279
+	{"map_addkey", 11, 0, 9},
280
+	{"map_setvalue", 11, 1, 9},
281
+	{"map_remove", 11, 2, 9},
282
+	{"map_find", 11, 3, 9},
283
+	{"map_getvaluesize", 8, 19, 2},
284 284
 	{"map_getvalue", 14, 2, 6},
285
-	{"map_done", 9, 20, 2},
286
-	{"file_find_limit", 10, 4, 9},
285
+	{"map_done", 8, 20, 2},
286
+	{"file_find_limit", 11, 4, 9},
287 287
 	{"engine_functionality_level", 12, 1, 5},
288 288
 	{"engine_dconf_level", 12, 2, 5},
289 289
 	{"engine_scan_options", 12, 3, 5},
290 290
 	{"engine_db_options", 12, 4, 5},
291
-	{"extract_set_container", 9, 21, 2},
292
-	{"input_switch", 9, 22, 2},
291
+	{"extract_set_container", 8, 21, 2},
292
+	{"input_switch", 8, 22, 2},
293 293
 	{"get_environment", 16, 17, 1},
294
-	{"disable_bytecode_if", 10, 5, 9},
295
-	{"disable_jit_if", 10, 6, 9},
294
+	{"disable_bytecode_if", 11, 5, 9},
295
+	{"disable_jit_if", 11, 6, 9},
296 296
 	{"version_compare", 13, 1, 8},
297 297
 	{"check_platform", 15, 5, 7},
298 298
 	{"pdf_get_obj_num", 12, 5, 5},
299 299
 	{"pdf_get_flags", 12, 6, 5},
300
-	{"pdf_set_flags", 9, 23, 2},
301
-	{"pdf_lookupobj", 9, 24, 2},
302
-	{"pdf_getobjsize", 9, 25, 2},
300
+	{"pdf_set_flags", 8, 23, 2},
301
+	{"pdf_lookupobj", 8, 24, 2},
302
+	{"pdf_getobjsize", 8, 25, 2},
303 303
 	{"pdf_getobj", 14, 3, 6},
304
-	{"pdf_getobjid", 9, 26, 2},
305
-	{"pdf_getobjflags", 9, 27, 2},
306
-	{"pdf_setobjflags", 11, 10, 0},
307
-	{"pdf_get_offset", 9, 28, 2},
304
+	{"pdf_getobjid", 8, 26, 2},
305
+	{"pdf_getobjflags", 8, 27, 2},
306
+	{"pdf_setobjflags", 9, 10, 0},
307
+	{"pdf_get_offset", 8, 28, 2},
308 308
 	{"pdf_get_phase", 12, 7, 5},
309 309
 	{"pdf_get_dumpedobjid", 12, 8, 5},
310 310
 	{"matchicon", 13, 2, 8},
311 311
 	{"running_on_jit", 12, 9, 5},
312 312
 	{"get_file_reliability", 12, 10, 5},
313 313
 	{"json_is_active", 12, 11, 5},
314
-	{"json_get_object", 10, 7, 9},
315
-	{"json_get_type", 9, 29, 2},
316
-	{"json_get_array_length", 9, 30, 2},
317
-	{"json_get_array_idx", 11, 11, 0},
318
-	{"json_get_string_length", 9, 31, 2},
319
-	{"json_get_string", 10, 8, 9},
320
-	{"json_get_boolean", 9, 32, 2},
321
-	{"json_get_int", 9, 33, 2},
322
-	{"engine_scan_options_ex", 8, 18, 1}
314
+	{"json_get_object", 11, 7, 9},
315
+	{"json_get_type", 8, 29, 2},
316
+	{"json_get_array_length", 8, 30, 2},
317
+	{"json_get_array_idx", 9, 11, 0},
318
+	{"json_get_string_length", 8, 31, 2},
319
+	{"json_get_string", 11, 8, 9},
320
+	{"json_get_boolean", 8, 32, 2},
321
+	{"json_get_int", 8, 33, 2},
322
+	{"engine_scan_options_ex", 10, 18, 1},
323
+	{"lzma_init", 9, 12, 0},
324
+	{"lzma_process", 8, 34, 2},
325
+	{"lzma_done", 8, 35, 2},
326
+	{"bzip2_init", 9, 13, 0},
327
+	{"bzip2_process", 8, 36, 2},
328
+	{"bzip2_done", 8, 37, 2}
323 329
 /* Bytecode APIcalls END */
324 330
 };
325 331
 const unsigned cli_numapicalls=sizeof(cli_apicalls)/sizeof(cli_apicalls[0]);
... ...
@@ -336,7 +348,9 @@ const cli_apicall_int2 cli_apicalls0[] = {
336 336
 	(cli_apicall_int2)cli_bcapi_hex2ui,
337 337
 	(cli_apicall_int2)cli_bcapi_map_new,
338 338
 	(cli_apicall_int2)cli_bcapi_pdf_setobjflags,
339
-	(cli_apicall_int2)cli_bcapi_json_get_array_idx
339
+	(cli_apicall_int2)cli_bcapi_json_get_array_idx,
340
+	(cli_apicall_int2)cli_bcapi_lzma_init,
341
+	(cli_apicall_int2)cli_bcapi_bzip2_init
340 342
 };
341 343
 const cli_apicall_pointer cli_apicalls1[] = {
342 344
 	(cli_apicall_pointer)cli_bcapi_read,
... ...
@@ -393,7 +407,11 @@ const cli_apicall_int1 cli_apicalls2[] = {
393 393
 	(cli_apicall_int1)cli_bcapi_json_get_array_length,
394 394
 	(cli_apicall_int1)cli_bcapi_json_get_string_length,
395 395
 	(cli_apicall_int1)cli_bcapi_json_get_boolean,
396
-	(cli_apicall_int1)cli_bcapi_json_get_int
396
+	(cli_apicall_int1)cli_bcapi_json_get_int,
397
+	(cli_apicall_int1)cli_bcapi_lzma_process,
398
+	(cli_apicall_int1)cli_bcapi_lzma_done,
399
+	(cli_apicall_int1)cli_bcapi_bzip2_process,
400
+	(cli_apicall_int1)cli_bcapi_bzip2_done
397 401
 };
398 402
 const cli_apicall_malloclike cli_apicalls3[] = {
399 403
 	(cli_apicall_malloclike)cli_bcapi_malloc
... ...
@@ -132,5 +132,11 @@ int32_t cli_bcapi_json_get_string(struct cli_bc_ctx *ctx , int8_t*, int32_t, int
132 132
 int32_t cli_bcapi_json_get_boolean(struct cli_bc_ctx *ctx , int32_t);
133 133
 int32_t cli_bcapi_json_get_int(struct cli_bc_ctx *ctx , int32_t);
134 134
 uint32_t cli_bcapi_engine_scan_options_ex(struct cli_bc_ctx *ctx , const uint8_t*, uint32_t);
135
+int32_t cli_bcapi_lzma_init(struct cli_bc_ctx *ctx , int32_t, int32_t);
136
+int32_t cli_bcapi_lzma_process(struct cli_bc_ctx *ctx , int32_t);
137
+int32_t cli_bcapi_lzma_done(struct cli_bc_ctx *ctx , int32_t);
138
+int32_t cli_bcapi_bzip2_init(struct cli_bc_ctx *ctx , int32_t, int32_t);
139
+int32_t cli_bcapi_bzip2_process(struct cli_bc_ctx *ctx , int32_t);
140
+int32_t cli_bcapi_bzip2_done(struct cli_bc_ctx *ctx , int32_t);
135 141
 
136 142
 #endif
... ...
@@ -198,6 +198,10 @@ struct cli_bc_ctx {
198 198
     unsigned filewritten;
199 199
     unsigned found;
200 200
     unsigned ninflates;
201
+    unsigned nlzmas;
202
+#if HAVE_BZLIB_H
203
+    unsigned nbzip2s;
204
+#endif
201 205
     bc_dbg_callback_trace trace;
202 206
     bc_dbg_callback_trace_op trace_op;
203 207
     bc_dbg_callback_trace_val trace_val;
... ...
@@ -211,6 +215,10 @@ struct cli_bc_ctx {
211 211
     unsigned col;
212 212
     mpool_t *mpool;
213 213
     struct bc_inflate *inflates;
214
+    struct bc_lzma *lzmas;
215
+#if HAVE_BZLIB_H
216
+    struct bc_bzip2 *bzip2s;
217
+#endif
214 218
     struct bc_buffer *buffers;
215 219
     unsigned nbuffers;
216 220
     unsigned nhashsets;
... ...
@@ -1,7 +1,7 @@
1 1
 /*
2 2
  *  ClamAV bytecode definitions.
3 3
  *
4
- *  Copyright (C) 2013-2020 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
4
+ *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
5 5
  *  Copyright (C) 2009-2013 Sourcefire, Inc.
6 6
  *
7 7
  *  Authors: Török Edvin
... ...
@@ -70,10 +70,13 @@ int cli_LzmaInit(struct CLI_LZMA *L, uint64_t size_override)
70 70
 
71 71
     if (!L->init) {
72 72
         L->p_cnt = LZMA_PROPS_SIZE;
73
-        if (size_override)
73
+        if (size_override) {
74
+            L->s_cnt = 0;
74 75
             L->usize = size_override;
75
-        else
76
+        } else {
76 77
             L->s_cnt = 8;
78
+            L->usize = 0;
79
+        }
77 80
         L->init = 1;
78 81
     } else if (size_override)
79 82
         cli_warnmsg("cli_LzmaInit: ignoring late size override\n");
... ...
@@ -89,7 +92,7 @@ int cli_LzmaInit(struct CLI_LZMA *L, uint64_t size_override)
89 89
     while (L->s_cnt) {
90 90
         uint64_t c = (uint64_t)lzma_getbyte(L, &fail);
91 91
         if (fail) return LZMA_RESULT_OK;
92
-        L->usize = c << (8 * (8 - L->s_cnt));
92
+        L->usize |= c << (8 * (8 - L->s_cnt));
93 93
         L->s_cnt--;
94 94
     }
95 95