Browse code

Add bytecode API for pdf.

Török Edvin authored on 2010/08/02 21:42:58
Showing 10 changed files
... ...
@@ -215,7 +215,6 @@ static int cli_bytecode_context_reset(struct cli_bc_ctx *ctx)
215 215
 
216 216
 int cli_bytecode_context_clear(struct cli_bc_ctx *ctx)
217 217
 {
218
-    cli_ctx *cctx = (cli_ctx*)ctx->ctx;
219 218
     cli_bytecode_context_reset(ctx);
220 219
     memset(ctx, 0, sizeof(*ctx));
221 220
     return CL_SUCCESS;
... ...
@@ -1736,7 +1735,7 @@ static int calc_gepz(struct cli_bc *bc, struct cli_bc_func *func, uint16_t tid,
1736 1736
 
1737 1737
 static int cli_bytecode_prepare_interpreter(struct cli_bc *bc)
1738 1738
 {
1739
-    unsigned i, j, k, rc;
1739
+    unsigned i, j, k;
1740 1740
     uint64_t *gmap;
1741 1741
     unsigned bcglobalid = cli_apicall_maxglobal - _FIRST_GLOBAL+2;
1742 1742
     bc->numGlobalBytes = 0;
... ...
@@ -2249,7 +2248,7 @@ static int run_builtin_or_loaded(struct cli_all_bc *bcs, uint8_t kind, const cha
2249 2249
 int cli_bytecode_prepare(struct cl_engine *engine, struct cli_all_bc *bcs, unsigned dconfmask)
2250 2250
 {
2251 2251
     unsigned i, interp = 0, jitok = 0, jitcount=0;
2252
-    int rc1, rc2, rc;
2252
+    int rc;
2253 2253
     struct cli_bc_ctx *ctx;
2254 2254
 
2255 2255
     cli_detect_environment(&bcs->env);
... ...
@@ -82,6 +82,7 @@ struct cli_all_bc {
82 82
 
83 83
 struct cli_pe_hook_data;
84 84
 struct cli_exe_section;
85
+struct pdf_obj;
85 86
 struct cli_bc_ctx *cli_bytecode_context_alloc(void);
86 87
 /* FIXME: we can't include others.h because others.h includes us...*/
87 88
 void cli_bytecode_context_setctx(struct cli_bc_ctx *ctx, void *cctx);
... ...
@@ -90,6 +91,7 @@ int cli_bytecode_context_setparam_int(struct cli_bc_ctx *ctx, unsigned i, uint64
90 90
 int cli_bytecode_context_setparam_ptr(struct cli_bc_ctx *ctx, unsigned i, void *data, unsigned datalen);
91 91
 int cli_bytecode_context_setfile(struct cli_bc_ctx *ctx, fmap_t *map);
92 92
 int cli_bytecode_context_setpe(struct cli_bc_ctx *ctx, const struct cli_pe_hook_data *data, const struct cli_exe_section *sections);
93
+int cli_bytecode_context_setpdf(struct cli_bc_ctx *ctx, unsigned phase, unsigned nobjs, struct pdf_obj *objs, uint32_t *pdf_flags, uint32_t pdfsize, uint32_t pdfstartoff);
93 94
 int cli_bytecode_context_clear(struct cli_bc_ctx *ctx);
94 95
 /* returns file descriptor, sets tempfile. Caller takes ownership, and is
95 96
  * responsible for freeing/unlinking */
... ...
@@ -42,6 +42,7 @@
42 42
 #include "bytecode_api_impl.h"
43 43
 #include "others.h"
44 44
 #include "pe.h"
45
+#include "pdf.h"
45 46
 #include "disasm.h"
46 47
 #include "scanners.h"
47 48
 #include "jsparse/js-norm.h"
... ...
@@ -989,7 +990,7 @@ int32_t cli_bcapi_memstr(struct cli_bc_ctx *ctx, const uint8_t* h, int32_t hs,
989 989
     const uint8_t *s;
990 990
     if (!h || !n || hs < 0 || ns < 0)
991 991
 	return -1;
992
-    s = cli_memstr(h, hs, n, ns);
992
+    s = (const uint8_t*) cli_memstr((const char*)h, hs, (const char*)n, ns);
993 993
     if (!s)
994 994
 	return -1;
995 995
     return s - h;
... ...
@@ -997,12 +998,12 @@ int32_t cli_bcapi_memstr(struct cli_bc_ctx *ctx, const uint8_t* h, int32_t hs,
997 997
 
998 998
 int32_t cli_bcapi_hex2ui(struct cli_bc_ctx *ctx, uint32_t ah, uint32_t bh)
999 999
 {
1000
-    uint8_t result = 0;
1000
+    char result = 0;
1001 1001
     unsigned char in[2];
1002 1002
     in[0] = ah;
1003 1003
     in[1] = bh;
1004 1004
 
1005
-    if (cli_hex2str_to(in, &result, 2) == -1)
1005
+    if (cli_hex2str_to((const char*)in, &result, 2) == -1)
1006 1006
 	return -1;
1007 1007
     return result;
1008 1008
 }
... ...
@@ -1262,7 +1263,6 @@ uint32_t cli_bcapi_disable_jit_if(struct cli_bc_ctx *ctx , const int8_t* reason,
1262 1262
 int32_t cli_bcapi_version_compare(struct cli_bc_ctx *ctx , const uint8_t* lhs, uint32_t lhs_len, 
1263 1263
 				  const uint8_t* rhs, uint32_t rhs_len)
1264 1264
 {
1265
-    char *endl, *endr;
1266 1265
     unsigned i = 0, j = 0;
1267 1266
     unsigned long li=0, ri=0;
1268 1267
     do {
... ...
@@ -1322,3 +1322,121 @@ uint32_t cli_bcapi_check_platform(struct cli_bc_ctx *ctx , uint32_t a, uint32_t
1322 1322
     return ret;
1323 1323
 }
1324 1324
 
1325
+int cli_bytecode_context_setpdf(struct cli_bc_ctx *ctx, unsigned phase,
1326
+				unsigned nobjs,
1327
+				struct pdf_obj *objs, uint32_t *pdf_flags,
1328
+				uint32_t pdfsize, uint32_t pdfstartoff)
1329
+{
1330
+    ctx->pdf_nobjs = nobjs;
1331
+    ctx->pdf_objs = objs;
1332
+    ctx->pdf_flags = pdf_flags;
1333
+    ctx->pdf_size = pdfsize;
1334
+    ctx->pdf_startoff = pdfstartoff;
1335
+    ctx->pdf_phase = phase;
1336
+    return 0;
1337
+}
1338
+
1339
+int32_t cli_bcapi_pdf_get_obj_num(struct cli_bc_ctx *ctx)
1340
+{
1341
+    if (!ctx->pdf_phase)
1342
+	return -1;
1343
+    return ctx->pdf_nobjs;
1344
+}
1345
+
1346
+int32_t cli_bcapi_pdf_get_flags(struct cli_bc_ctx *ctx)
1347
+{
1348
+    if (!ctx->pdf_phase)
1349
+	return -1;
1350
+    return *ctx->pdf_flags;
1351
+}
1352
+
1353
+int32_t cli_bcapi_pdf_set_flags(struct cli_bc_ctx *ctx , int32_t flags)
1354
+{
1355
+    if (!ctx->pdf_phase)
1356
+	return -1;
1357
+    cli_dbgmsg("cli_pdf: bytecode set_flags %08x -> %08x\n",
1358
+	       *ctx->pdf_flags,
1359
+	       flags);
1360
+    *ctx->pdf_flags = flags;
1361
+    return 0;
1362
+}
1363
+
1364
+int32_t cli_bcapi_pdf_lookupobj(struct cli_bc_ctx *ctx , uint32_t objid)
1365
+{
1366
+    unsigned i;
1367
+    if (!ctx->pdf_phase)
1368
+	return -1;
1369
+    for (i=0;i<ctx->pdf_nobjs;i++) {
1370
+	if (ctx->pdf_objs[i].id == objid)
1371
+	    return i;
1372
+    }
1373
+    return -1;
1374
+}
1375
+
1376
+uint32_t cli_bcapi_pdf_getobjsize(struct cli_bc_ctx *ctx , int32_t objidx)
1377
+{
1378
+    if (!ctx->pdf_phase ||
1379
+	objidx >= ctx->pdf_nobjs ||
1380
+	ctx->pdf_phase == PDF_PHASE_POSTDUMP /* map is obj itself, no access to pdf anymore */
1381
+       )
1382
+	return 0;
1383
+    if (objidx + 1 == ctx->pdf_nobjs)
1384
+	return ctx->pdf_size - ctx->pdf_objs[objidx].start;
1385
+    return ctx->pdf_objs[objidx+1].start - ctx->pdf_objs[objidx].start - 4;
1386
+}
1387
+
1388
+uint8_t* cli_bcapi_pdf_getobj(struct cli_bc_ctx *ctx , int32_t objidx, uint32_t amount)
1389
+{
1390
+    uint32_t size = cli_bcapi_pdf_getobjsize(ctx, objidx);
1391
+    if (amount > size)
1392
+	return NULL;
1393
+    return fmap_need_off(ctx->fmap, ctx->pdf_objs[objidx].start, amount);
1394
+}
1395
+
1396
+int32_t cli_bcapi_pdf_getobjid(struct cli_bc_ctx *ctx , int32_t objidx)
1397
+{
1398
+    if (!ctx->pdf_phase ||
1399
+	objidx >= ctx->pdf_nobjs)
1400
+	return -1;
1401
+    return ctx->pdf_objs[objidx].id;
1402
+}
1403
+
1404
+int32_t cli_bcapi_pdf_getobjflags(struct cli_bc_ctx *ctx , int32_t objidx)
1405
+{
1406
+    if (!ctx->pdf_phase ||
1407
+	objidx >= ctx->pdf_nobjs)
1408
+	return -1;
1409
+    return ctx->pdf_objs[objidx].flags;
1410
+}
1411
+
1412
+int32_t cli_bcapi_pdf_setobjflags(struct cli_bc_ctx *ctx , int32_t objidx, int32_t flags)
1413
+{
1414
+    if (!ctx->pdf_phase ||
1415
+	objidx >= ctx->pdf_nobjs)
1416
+	return -1;
1417
+    cli_dbgmsg("cli_pdf: bytecode setobjflags %08x -> %08x\n",
1418
+	       ctx->pdf_objs[objidx].flags,
1419
+	       flags);
1420
+    ctx->pdf_objs[objidx].flags = flags;
1421
+    return 0;
1422
+}
1423
+
1424
+int32_t cli_bcapi_pdf_get_offset(struct cli_bc_ctx *ctx , int32_t objidx)
1425
+{
1426
+    if (!ctx->pdf_phase ||
1427
+	objidx >= ctx->pdf_nobjs)
1428
+	return -1;
1429
+    return ctx->pdf_startoff + ctx->pdf_objs[objidx].start;
1430
+}
1431
+
1432
+int32_t cli_bcapi_pdf_get_phase(struct cli_bc_ctx *ctx)
1433
+{
1434
+    return ctx->pdf_phase;
1435
+}
1436
+
1437
+int32_t cli_bcapi_pdf_get_dumpedobjid(struct cli_bc_ctx *ctx)
1438
+{
1439
+    if (ctx->pdf_phase != PDF_PHASE_POSTDUMP)
1440
+	return -1;
1441
+    return ctx->pdf_dumpedid;
1442
+}
... ...
@@ -51,6 +51,8 @@ enum BytecodeKind {
51 51
     BC_LOGICAL=256,
52 52
     /** a PE unpacker */
53 53
     BC_PE_UNPACKER,
54
+    /* PDF hook */
55
+    BC_PDF,
54 56
     _BC_LAST_HOOK
55 57
 };
56 58
 
... ...
@@ -59,7 +61,63 @@ static const unsigned  PE_INVALID_RVA = 0xFFFFFFFF ;
59 59
 /** LibClamAV functionality level constants */
60 60
 enum FunctionalityLevels {
61 61
     FUNC_LEVEL_096 = 51,
62
-    FUNC_LEVEL_096_dev
62
+    FUNC_LEVEL_096_dev,
63
+    FUNC_LEVEL_096_1,
64
+    FUNC_LEVEL_096_1_dev,
65
+    FUNC_LEVEL_096_2
66
+};
67
+
68
+enum pdf_phase {
69
+    PDF_PHASE_NONE /* not a PDF */,
70
+    PDF_PHASE_PARSED, /* after parsing a PDF, object flags can be set etc. */
71
+    PDF_PHASE_POSTDUMP, /* after an obj was dumped and scanned */
72
+    PDF_PHASE_END /* after the pdf scan finished */
73
+};
74
+
75
+enum pdf_flag {
76
+    BAD_PDF_VERSION=0,
77
+    BAD_PDF_HEADERPOS,
78
+    BAD_PDF_TRAILER,
79
+    BAD_PDF_TOOMANYOBJS,
80
+    BAD_STREAM_FILTERS,
81
+    BAD_FLATE,
82
+    BAD_FLATESTART,
83
+    BAD_STREAMSTART,
84
+    BAD_ASCIIDECODE,
85
+    BAD_INDOBJ,
86
+    UNTERMINATED_OBJ_DICT,
87
+    ESCAPED_COMMON_PDFNAME,
88
+    HEX_JAVASCRIPT,
89
+    UNKNOWN_FILTER,
90
+    MANY_FILTERS,
91
+    HAS_OPENACTION,
92
+    BAD_STREAMLEN,
93
+    ENCRYPTED_PDF,
94
+    LINEARIZED_PDF /* not bad, just as flag */
95
+};
96
+
97
+enum pdf_objflags {
98
+    OBJ_STREAM=0,
99
+    OBJ_DICT,
100
+    OBJ_EMBEDDED_FILE,
101
+    OBJ_FILTER_AH,
102
+    OBJ_FILTER_A85,
103
+    OBJ_FILTER_FLATE,
104
+    OBJ_FILTER_LZW,
105
+    OBJ_FILTER_RL,
106
+    OBJ_FILTER_FAX,
107
+    OBJ_FILTER_JBIG2,
108
+    OBJ_FILTER_DCT,
109
+    OBJ_FILTER_JPX,
110
+    OBJ_FILTER_CRYPT,
111
+    OBJ_FILTER_UNKNOWN,
112
+    OBJ_JAVASCRIPT,
113
+    OBJ_OPENACTION,
114
+    OBJ_HASFILTERS,
115
+    OBJ_SIGNED,
116
+    OBJ_IMAGE,
117
+    OBJ_TRUNCATED,
118
+    OBJ_FORCEDUMP
63 119
 };
64 120
 
65 121
 #ifdef __CLAMBC__
... ...
@@ -697,7 +755,80 @@ int32_t version_compare(const uint8_t* lhs, uint32_t lhs_len,
697 697
             1 - match */
698 698
 uint32_t check_platform(uint32_t a, uint32_t b, uint32_t c);
699 699
 
700
+/** Return number of pdf objects 
701
+ * @return -1 - if not called from PDF hook
702
+          >=0 - number of PDF objects
703
+*/
704
+int32_t pdf_get_obj_num(void);
705
+
706
+/** Return the flags for the entire PDF (as set so far).
707
+  * @return -1 - if not called from PDF hook
708
+           >=0 - pdf flags */
709
+int32_t pdf_get_flags(void);
710
+
711
+/** Sets the flags for the entire PDF.
712
+  * It is recommended that you retrieve old flags, and just add new ones.
713
+  * @param flags - flags to set */
714
+int32_t pdf_set_flags(int32_t flags);
715
+
716
+/** Lookup pdf object with specified id.
717
+  * @param id - pdf id (objnumber << 8 | generationid)
718
+    @return -1 - if object id doesn't exist
719
+           >=0 - object index
720
+  */
721
+int32_t pdf_lookupobj(uint32_t id);
722
+
723
+/** Return the size of the specified PDF obj.
724
+  * @param objnum - object index (from 0), not object id!
725
+  * @return 0 - if not called from PDF hook, or invalid objnum
726
+          >=0 - size of object */
727
+uint32_t pdf_getobjsize(int32_t objidx);
728
+
729
+/** Return the undecoded object.
730
+  Meant only for reading, write modifies the fmap buffer, so avoid!
731
+  @param objidx - object index (from 0), not object id!
732
+  @param amount - size returned by pdf_getobjsize (or smaller)
733
+  @return NULL - invalid objidx/amount
734
+          pointer - pointer to original object */
735
+uint8_t *pdf_getobj(int32_t objidx, uint32_t amount);
736
+
737
+/* Return the object id for the specified object index.
738
+   @param objidx - object index (from 0)
739
+   @return -1 - object index invalid
740
+          >=0 - object id (obj id << 8 | generation id)
741
+*/
742
+int32_t pdf_getobjid(int32_t objidx);
743
+
744
+/* Return the object flags for the specified object index.
745
+   @param objidx - object index (from 0)
746
+   @return -1 - object index invalid
747
+          >=0 - object flags
748
+*/
749
+int32_t pdf_getobjflags(int32_t objidx);
750
+
751
+/* Sets the object flags for the specified object index.
752
+   This can be used to force dumping of a certain obj, by setting the
753
+   OBJ_FORCEDUMP flag for example.
754
+   @param objidx - object index (from 0)
755
+   @return -1 - object index invalid
756
+          >=0 - flags set
757
+*/
758
+int32_t pdf_setobjflags(int32_t objidx, int32_t flags);
759
+
760
+/* Return the object's offset in the PDF.
761
+   @param objidx - object index (from 0)
762
+   @return -1 - object index invalid
763
+          >=0 - offset
764
+*/
765
+int32_t pdf_get_offset(int32_t objidx);
766
+
767
+/** Return an 'enum pdf_phase'.
768
+  * Identifies at which phase this bytecode was called */
769
+int32_t pdf_get_phase(void);
700 770
 
771
+/** Return the currently dumped obj id.
772
+  Valid only in PDF_PHASE_POSTDUMP */
773
+int32_t pdf_get_dumpedobjid(void);
701 774
 /* ---------------- END 0.96.2 APIs   ----------------------------------- */
702 775
 #endif
703 776
 #endif
... ...
@@ -109,6 +109,18 @@ uint32_t cli_bcapi_disable_bytecode_if(struct cli_bc_ctx *ctx , const int8_t*, u
109 109
 uint32_t cli_bcapi_disable_jit_if(struct cli_bc_ctx *ctx , const int8_t*, uint32_t, uint32_t);
110 110
 int32_t cli_bcapi_version_compare(struct cli_bc_ctx *ctx , const uint8_t*, uint32_t, const uint8_t*, uint32_t);
111 111
 uint32_t cli_bcapi_check_platform(struct cli_bc_ctx *ctx , uint32_t, uint32_t, uint32_t);
112
+int32_t cli_bcapi_pdf_get_obj_num(struct cli_bc_ctx *ctx );
113
+int32_t cli_bcapi_pdf_get_flags(struct cli_bc_ctx *ctx );
114
+int32_t cli_bcapi_pdf_set_flags(struct cli_bc_ctx *ctx , int32_t);
115
+int32_t cli_bcapi_pdf_lookupobj(struct cli_bc_ctx *ctx , uint32_t);
116
+uint32_t cli_bcapi_pdf_getobjsize(struct cli_bc_ctx *ctx , int32_t);
117
+uint8_t* cli_bcapi_pdf_getobj(struct cli_bc_ctx *ctx , int32_t, uint32_t);
118
+int32_t cli_bcapi_pdf_getobjid(struct cli_bc_ctx *ctx , int32_t);
119
+int32_t cli_bcapi_pdf_getobjflags(struct cli_bc_ctx *ctx , int32_t);
120
+int32_t cli_bcapi_pdf_setobjflags(struct cli_bc_ctx *ctx , int32_t, int32_t);
121
+int32_t cli_bcapi_pdf_get_offset(struct cli_bc_ctx *ctx , int32_t);
122
+int32_t cli_bcapi_pdf_get_phase(struct cli_bc_ctx *ctx );
123
+int32_t cli_bcapi_pdf_get_dumpedobjid(struct cli_bc_ctx *ctx );
112 124
 
113 125
 const struct cli_apiglobal cli_globals[] = {
114 126
 /* Bytecode globals BEGIN */
... ...
@@ -133,17 +145,17 @@ static uint16_t cli_tmp4[]={16, 8, 8, 32, 32, 32, 32, 32, 32, 32, 32, 32, 16, 16
133 133
 static uint16_t cli_tmp5[]={32, 16, 16, 32, 32, 32, 16, 16};
134 134
 static uint16_t cli_tmp6[]={32};
135 135
 static uint16_t cli_tmp7[]={32};
136
-static uint16_t cli_tmp8[]={32, 32, 32, 32};
137
-static uint16_t cli_tmp9[]={32, 65, 32, 65, 32};
138
-static uint16_t cli_tmp10[]={32, 65, 32, 32};
139
-static uint16_t cli_tmp11[]={32, 81, 32};
140
-static uint16_t cli_tmp12[]={82};
141
-static uint16_t cli_tmp13[]={32, 32, 32, 32, 32, 32, 32, 83, 83, 83, 83, 83, 83, 83, 8, 8, 8, 8, 8, 8, 8, 8, 8};
142
-static uint16_t cli_tmp14[]={8};
143
-static uint16_t cli_tmp15[]={32, 32};
144
-static uint16_t cli_tmp16[]={32};
145
-static uint16_t cli_tmp17[]={65, 32, 32};
146
-static uint16_t cli_tmp18[]={32, 32, 32};
136
+static uint16_t cli_tmp8[]={32};
137
+static uint16_t cli_tmp9[]={32, 32};
138
+static uint16_t cli_tmp10[]={32, 32, 32};
139
+static uint16_t cli_tmp11[]={65, 32, 32};
140
+static uint16_t cli_tmp12[]={32, 32, 32, 32};
141
+static uint16_t cli_tmp13[]={32, 65, 32, 65, 32};
142
+static uint16_t cli_tmp14[]={32, 65, 32, 32};
143
+static uint16_t cli_tmp15[]={32, 85, 32};
144
+static uint16_t cli_tmp16[]={86};
145
+static uint16_t cli_tmp17[]={32, 32, 32, 32, 32, 32, 32, 87, 87, 87, 87, 87, 87, 87, 8, 8, 8, 8, 8, 8, 8, 8, 8};
146
+static uint16_t cli_tmp18[]={8};
147 147
 static uint16_t cli_tmp19[]={32, 65, 32};
148 148
 static uint16_t cli_tmp20[]={32, 65, 32, 32, 32, 32};
149 149
 static uint16_t cli_tmp21[]={32, 91, 32};
... ...
@@ -166,17 +178,17 @@ const struct cli_bc_type cli_apicall_types[]={
166 166
 	{DStructType, cli_tmp5, 8, 0, 0},
167 167
 	{DArrayType, cli_tmp6, 1, 0, 0},
168 168
 	{DArrayType, cli_tmp7, 64, 0, 0},
169
-	{DFunctionType, cli_tmp8, 4, 0, 0},
170
-	{DFunctionType, cli_tmp9, 5, 0, 0},
171
-	{DFunctionType, cli_tmp10, 4, 0, 0},
169
+	{DFunctionType, cli_tmp8, 1, 0, 0},
170
+	{DFunctionType, cli_tmp9, 2, 0, 0},
171
+	{DFunctionType, cli_tmp10, 3, 0, 0},
172 172
 	{DFunctionType, cli_tmp11, 3, 0, 0},
173
-	{DPointerType, cli_tmp12, 1, 0, 0},
174
-	{DStructType, cli_tmp13, 23, 0, 0},
175
-	{DArrayType, cli_tmp14, 65, 0, 0},
176
-	{DFunctionType, cli_tmp15, 2, 0, 0},
177
-	{DFunctionType, cli_tmp16, 1, 0, 0},
178
-	{DFunctionType, cli_tmp17, 3, 0, 0},
179
-	{DFunctionType, cli_tmp18, 3, 0, 0},
173
+	{DFunctionType, cli_tmp12, 4, 0, 0},
174
+	{DFunctionType, cli_tmp13, 5, 0, 0},
175
+	{DFunctionType, cli_tmp14, 4, 0, 0},
176
+	{DFunctionType, cli_tmp15, 3, 0, 0},
177
+	{DPointerType, cli_tmp16, 1, 0, 0},
178
+	{DStructType, cli_tmp17, 23, 0, 0},
179
+	{DArrayType, cli_tmp18, 65, 0, 0},
180 180
 	{DFunctionType, cli_tmp19, 3, 0, 0},
181 181
 	{DFunctionType, cli_tmp20, 6, 0, 0},
182 182
 	{DFunctionType, cli_tmp21, 3, 0, 0},
... ...
@@ -194,13 +206,13 @@ const struct cli_bc_type cli_apicall_types[]={
194 194
 const unsigned cli_apicall_maxtypes=sizeof(cli_apicall_types)/sizeof(cli_apicall_types[0]);
195 195
 const struct cli_apicall cli_apicalls[]={
196 196
 /* Bytecode APIcalls BEGIN */
197
-	{"test1", 18, 0, 0},
197
+	{"test1", 10, 0, 0},
198 198
 	{"read", 19, 0, 1},
199 199
 	{"write", 19, 1, 1},
200
-	{"seek", 18, 1, 0},
200
+	{"seek", 10, 1, 0},
201 201
 	{"setvirusname", 19, 2, 1},
202 202
 	{"debug_print_str", 19, 3, 1},
203
-	{"debug_print_uint", 15, 0, 2},
203
+	{"debug_print_uint", 9, 0, 2},
204 204
 	{"disasm_x86", 25, 4, 1},
205 205
 	{"trace_directory", 19, 5, 1},
206 206
 	{"trace_scope", 19, 6, 1},
... ...
@@ -208,68 +220,80 @@ const struct cli_apicall cli_apicalls[]={
208 208
 	{"trace_op", 19, 8, 1},
209 209
 	{"trace_value", 19, 9, 1},
210 210
 	{"trace_ptr", 19, 10, 1},
211
-	{"pe_rawaddr", 15, 1, 2},
211
+	{"pe_rawaddr", 9, 1, 2},
212 212
 	{"file_find", 19, 11, 1},
213
-	{"file_byteat", 15, 2, 2},
213
+	{"file_byteat", 9, 2, 2},
214 214
 	{"malloc", 24, 0, 3},
215
-	{"test2", 15, 3, 2},
215
+	{"test2", 9, 3, 2},
216 216
 	{"get_pe_section", 21, 12, 1},
217 217
 	{"fill_buffer", 20, 0, 4},
218
-	{"extract_new", 15, 4, 2},
219
-	{"read_number", 15, 5, 2},
220
-	{"hashset_new", 16, 0, 5},
221
-	{"hashset_add", 18, 2, 0},
222
-	{"hashset_remove", 18, 3, 0},
223
-	{"hashset_contains", 18, 4, 0},
224
-	{"hashset_done", 15, 6, 2},
225
-	{"hashset_empty", 15, 7, 2},
226
-	{"buffer_pipe_new", 15, 8, 2},
227
-	{"buffer_pipe_new_fromfile", 15, 9, 2},
228
-	{"buffer_pipe_read_avail", 15, 10, 2},
229
-	{"buffer_pipe_read_get", 17, 0, 6},
230
-	{"buffer_pipe_read_stopped", 18, 5, 0},
231
-	{"buffer_pipe_write_avail", 15, 11, 2},
232
-	{"buffer_pipe_write_get", 17, 1, 6},
233
-	{"buffer_pipe_write_stopped", 18, 6, 0},
234
-	{"buffer_pipe_done", 15, 12, 2},
235
-	{"inflate_init", 8, 0, 7},
236
-	{"inflate_process", 15, 13, 2},
237
-	{"inflate_done", 15, 14, 2},
238
-	{"bytecode_rt_error", 15, 15, 2},
239
-	{"jsnorm_init", 15, 16, 2},
240
-	{"jsnorm_process", 15, 17, 2},
241
-	{"jsnorm_done", 15, 18, 2},
242
-	{"ilog2", 18, 7, 0},
243
-	{"ipow", 8, 1, 7},
244
-	{"iexp", 8, 2, 7},
245
-	{"isin", 8, 3, 7},
246
-	{"icos", 8, 4, 7},
247
-	{"memstr", 9, 0, 8},
248
-	{"hex2ui", 18, 8, 0},
218
+	{"extract_new", 9, 4, 2},
219
+	{"read_number", 9, 5, 2},
220
+	{"hashset_new", 8, 0, 5},
221
+	{"hashset_add", 10, 2, 0},
222
+	{"hashset_remove", 10, 3, 0},
223
+	{"hashset_contains", 10, 4, 0},
224
+	{"hashset_done", 9, 6, 2},
225
+	{"hashset_empty", 9, 7, 2},
226
+	{"buffer_pipe_new", 9, 8, 2},
227
+	{"buffer_pipe_new_fromfile", 9, 9, 2},
228
+	{"buffer_pipe_read_avail", 9, 10, 2},
229
+	{"buffer_pipe_read_get", 11, 0, 6},
230
+	{"buffer_pipe_read_stopped", 10, 5, 0},
231
+	{"buffer_pipe_write_avail", 9, 11, 2},
232
+	{"buffer_pipe_write_get", 11, 1, 6},
233
+	{"buffer_pipe_write_stopped", 10, 6, 0},
234
+	{"buffer_pipe_done", 9, 12, 2},
235
+	{"inflate_init", 12, 0, 7},
236
+	{"inflate_process", 9, 13, 2},
237
+	{"inflate_done", 9, 14, 2},
238
+	{"bytecode_rt_error", 9, 15, 2},
239
+	{"jsnorm_init", 9, 16, 2},
240
+	{"jsnorm_process", 9, 17, 2},
241
+	{"jsnorm_done", 9, 18, 2},
242
+	{"ilog2", 10, 7, 0},
243
+	{"ipow", 12, 1, 7},
244
+	{"iexp", 12, 2, 7},
245
+	{"isin", 12, 3, 7},
246
+	{"icos", 12, 4, 7},
247
+	{"memstr", 13, 0, 8},
248
+	{"hex2ui", 10, 8, 0},
249 249
 	{"atoi", 19, 13, 1},
250 250
 	{"debug_print_str_start", 19, 14, 1},
251 251
 	{"debug_print_str_nonl", 19, 15, 1},
252 252
 	{"entropy_buffer", 19, 16, 1},
253
-	{"map_new", 18, 9, 0},
254
-	{"map_addkey", 10, 0, 9},
255
-	{"map_setvalue", 10, 1, 9},
256
-	{"map_remove", 10, 2, 9},
257
-	{"map_find", 10, 3, 9},
258
-	{"map_getvaluesize", 15, 19, 2},
259
-	{"map_getvalue", 17, 2, 6},
260
-	{"map_done", 15, 20, 2},
261
-	{"file_find_limit", 10, 4, 9},
262
-	{"engine_functionality_level", 16, 1, 5},
263
-	{"engine_dconf_level", 16, 2, 5},
264
-	{"engine_scan_options", 16, 3, 5},
265
-	{"engine_db_options", 16, 4, 5},
266
-	{"extract_set_container", 15, 21, 2},
267
-	{"input_switch", 15, 22, 2},
268
-	{"get_environment", 11, 17, 1},
269
-	{"disable_bytecode_if", 10, 5, 9},
270
-	{"disable_jit_if", 10, 6, 9},
271
-	{"version_compare", 9, 1, 8},
272
-	{"check_platform", 8, 5, 7}
253
+	{"map_new", 10, 9, 0},
254
+	{"map_addkey", 14, 0, 9},
255
+	{"map_setvalue", 14, 1, 9},
256
+	{"map_remove", 14, 2, 9},
257
+	{"map_find", 14, 3, 9},
258
+	{"map_getvaluesize", 9, 19, 2},
259
+	{"map_getvalue", 11, 2, 6},
260
+	{"map_done", 9, 20, 2},
261
+	{"file_find_limit", 14, 4, 9},
262
+	{"engine_functionality_level", 8, 1, 5},
263
+	{"engine_dconf_level", 8, 2, 5},
264
+	{"engine_scan_options", 8, 3, 5},
265
+	{"engine_db_options", 8, 4, 5},
266
+	{"extract_set_container", 9, 21, 2},
267
+	{"input_switch", 9, 22, 2},
268
+	{"get_environment", 15, 17, 1},
269
+	{"disable_bytecode_if", 14, 5, 9},
270
+	{"disable_jit_if", 14, 6, 9},
271
+	{"version_compare", 13, 1, 8},
272
+	{"check_platform", 12, 5, 7},
273
+	{"pdf_get_obj_num", 8, 5, 5},
274
+	{"pdf_get_flags", 8, 6, 5},
275
+	{"pdf_set_flags", 9, 23, 2},
276
+	{"pdf_lookupobj", 9, 24, 2},
277
+	{"pdf_getobjsize", 9, 25, 2},
278
+	{"pdf_getobj", 11, 3, 6},
279
+	{"pdf_getobjid", 9, 26, 2},
280
+	{"pdf_getobjflags", 9, 27, 2},
281
+	{"pdf_setobjflags", 10, 10, 0},
282
+	{"pdf_get_offset", 9, 28, 2},
283
+	{"pdf_get_phase", 8, 7, 5},
284
+	{"pdf_get_dumpedobjid", 8, 8, 5}
273 285
 /* Bytecode APIcalls END */
274 286
 };
275 287
 const cli_apicall_int2 cli_apicalls0[] = {
... ...
@@ -282,7 +306,8 @@ const cli_apicall_int2 cli_apicalls0[] = {
282 282
 	(cli_apicall_int2)cli_bcapi_buffer_pipe_write_stopped,
283 283
 	(cli_apicall_int2)cli_bcapi_ilog2,
284 284
 	(cli_apicall_int2)cli_bcapi_hex2ui,
285
-	(cli_apicall_int2)cli_bcapi_map_new
285
+	(cli_apicall_int2)cli_bcapi_map_new,
286
+	(cli_apicall_int2)cli_bcapi_pdf_setobjflags
286 287
 };
287 288
 const cli_apicall_pointer cli_apicalls1[] = {
288 289
 	(cli_apicall_pointer)cli_bcapi_read,
... ...
@@ -327,7 +352,13 @@ const cli_apicall_int1 cli_apicalls2[] = {
327 327
 	(cli_apicall_int1)cli_bcapi_map_getvaluesize,
328 328
 	(cli_apicall_int1)cli_bcapi_map_done,
329 329
 	(cli_apicall_int1)cli_bcapi_extract_set_container,
330
-	(cli_apicall_int1)cli_bcapi_input_switch
330
+	(cli_apicall_int1)cli_bcapi_input_switch,
331
+	(cli_apicall_int1)cli_bcapi_pdf_set_flags,
332
+	(cli_apicall_int1)cli_bcapi_pdf_lookupobj,
333
+	(cli_apicall_int1)cli_bcapi_pdf_getobjsize,
334
+	(cli_apicall_int1)cli_bcapi_pdf_getobjid,
335
+	(cli_apicall_int1)cli_bcapi_pdf_getobjflags,
336
+	(cli_apicall_int1)cli_bcapi_pdf_get_offset
331 337
 };
332 338
 const cli_apicall_malloclike cli_apicalls3[] = {
333 339
 	(cli_apicall_malloclike)cli_bcapi_malloc
... ...
@@ -340,12 +371,17 @@ const cli_apicall_allocobj cli_apicalls5[] = {
340 340
 	(cli_apicall_allocobj)cli_bcapi_engine_functionality_level,
341 341
 	(cli_apicall_allocobj)cli_bcapi_engine_dconf_level,
342 342
 	(cli_apicall_allocobj)cli_bcapi_engine_scan_options,
343
-	(cli_apicall_allocobj)cli_bcapi_engine_db_options
343
+	(cli_apicall_allocobj)cli_bcapi_engine_db_options,
344
+	(cli_apicall_allocobj)cli_bcapi_pdf_get_obj_num,
345
+	(cli_apicall_allocobj)cli_bcapi_pdf_get_flags,
346
+	(cli_apicall_allocobj)cli_bcapi_pdf_get_phase,
347
+	(cli_apicall_allocobj)cli_bcapi_pdf_get_dumpedobjid
344 348
 };
345 349
 const cli_apicall_bufget cli_apicalls6[] = {
346 350
 	(cli_apicall_bufget)cli_bcapi_buffer_pipe_read_get,
347 351
 	(cli_apicall_bufget)cli_bcapi_buffer_pipe_write_get,
348
-	(cli_apicall_bufget)cli_bcapi_map_getvalue
352
+	(cli_apicall_bufget)cli_bcapi_map_getvalue,
353
+	(cli_apicall_bufget)cli_bcapi_pdf_getobj
349 354
 };
350 355
 const cli_apicall_int3 cli_apicalls7[] = {
351 356
 	(cli_apicall_int3)cli_bcapi_inflate_init,
... ...
@@ -107,5 +107,17 @@ uint32_t cli_bcapi_disable_bytecode_if(struct cli_bc_ctx *ctx , const int8_t*, u
107 107
 uint32_t cli_bcapi_disable_jit_if(struct cli_bc_ctx *ctx , const int8_t*, uint32_t, uint32_t);
108 108
 int32_t cli_bcapi_version_compare(struct cli_bc_ctx *ctx , const uint8_t*, uint32_t, const uint8_t*, uint32_t);
109 109
 uint32_t cli_bcapi_check_platform(struct cli_bc_ctx *ctx , uint32_t, uint32_t, uint32_t);
110
+int32_t cli_bcapi_pdf_get_obj_num(struct cli_bc_ctx *ctx );
111
+int32_t cli_bcapi_pdf_get_flags(struct cli_bc_ctx *ctx );
112
+int32_t cli_bcapi_pdf_set_flags(struct cli_bc_ctx *ctx , int32_t);
113
+int32_t cli_bcapi_pdf_lookupobj(struct cli_bc_ctx *ctx , uint32_t);
114
+uint32_t cli_bcapi_pdf_getobjsize(struct cli_bc_ctx *ctx , int32_t);
115
+uint8_t* cli_bcapi_pdf_getobj(struct cli_bc_ctx *ctx , int32_t, uint32_t);
116
+int32_t cli_bcapi_pdf_getobjid(struct cli_bc_ctx *ctx , int32_t);
117
+int32_t cli_bcapi_pdf_getobjflags(struct cli_bc_ctx *ctx , int32_t);
118
+int32_t cli_bcapi_pdf_setobjflags(struct cli_bc_ctx *ctx , int32_t, int32_t);
119
+int32_t cli_bcapi_pdf_get_offset(struct cli_bc_ctx *ctx , int32_t);
120
+int32_t cli_bcapi_pdf_get_phase(struct cli_bc_ctx *ctx );
121
+int32_t cli_bcapi_pdf_get_dumpedobjid(struct cli_bc_ctx *ctx );
110 122
 
111 123
 #endif
... ...
@@ -156,6 +156,13 @@ struct cli_bc_ctx {
156 156
     fmap_t *save_map;
157 157
     const char *virname;
158 158
     struct cli_bc_hooks hooks;
159
+    uint32_t pdf_nobjs;
160
+    struct pdf_obj *pdf_objs;
161
+    uint32_t* pdf_flags;
162
+    uint32_t pdf_size;
163
+    uint32_t pdf_startoff;
164
+    unsigned pdf_phase;
165
+    int32_t pdf_dumpedid;
159 166
     const struct cli_exe_section *sections;
160 167
     char *tempfile;
161 168
     void *ctx;
... ...
@@ -50,6 +50,8 @@ static	char	const	rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $";
50 50
 #include "scanners.h"
51 51
 #include "fmap.h"
52 52
 #include "str.h"
53
+#include "bytecode.h"
54
+#include "bytecode_api.h"
53 55
 
54 56
 #ifdef	CL_DEBUG
55 57
 /*#define	SAVE_TMP	
... ...
@@ -62,28 +64,6 @@ static	const	char	*pdf_nextlinestart(const char *ptr, size_t len);
62 62
 static	const	char	*pdf_nextobject(const char *ptr, size_t len);
63 63
 
64 64
 #if 1
65
-enum pdf_flag {
66
-    BAD_PDF_VERSION=0,
67
-    BAD_PDF_HEADERPOS,
68
-    BAD_PDF_TRAILER,
69
-    BAD_PDF_TOOMANYOBJS,
70
-    BAD_STREAM_FILTERS,
71
-    BAD_FLATE,
72
-    BAD_FLATESTART,
73
-    BAD_STREAMSTART,
74
-    BAD_ASCIIDECODE,
75
-    BAD_INDOBJ,
76
-    UNTERMINATED_OBJ_DICT,
77
-    ESCAPED_COMMON_PDFNAME,
78
-    HEX_JAVASCRIPT,
79
-    UNKNOWN_FILTER,
80
-    MANY_FILTERS,
81
-    HAS_OPENACTION,
82
-    BAD_STREAMLEN,
83
-    ENCRYPTED_PDF,
84
-    LINEARIZED_PDF /* not bad, just as flag */
85
-};
86
-
87 65
 static int xrefCheck(const char *xref, const char *eof)
88 66
 {
89 67
     const char *q;
... ...
@@ -105,34 +85,6 @@ static int xrefCheck(const char *xref, const char *eof)
105 105
     return -1;
106 106
 }
107 107
 
108
-enum objflags {
109
-    OBJ_STREAM=0,
110
-    OBJ_DICT,
111
-    OBJ_EMBEDDED_FILE,
112
-    OBJ_FILTER_AH,
113
-    OBJ_FILTER_A85,
114
-    OBJ_FILTER_FLATE,
115
-    OBJ_FILTER_LZW,
116
-    OBJ_FILTER_RL,
117
-    OBJ_FILTER_FAX,
118
-    OBJ_FILTER_JBIG2,
119
-    OBJ_FILTER_DCT,
120
-    OBJ_FILTER_JPX,
121
-    OBJ_FILTER_CRYPT,
122
-    OBJ_FILTER_UNKNOWN,
123
-    OBJ_JAVASCRIPT,
124
-    OBJ_OPENACTION,
125
-    OBJ_HASFILTERS,
126
-    OBJ_SIGNED,
127
-    OBJ_IMAGE,
128
-    OBJ_TRUNCATED
129
-};
130
-
131
-struct pdf_obj {
132
-    uint32_t start;
133
-    uint32_t id;
134
-    uint32_t flags;
135
-};
136 108
 struct pdf_struct {
137 109
     struct pdf_obj *objs;
138 110
     unsigned nobjs;
... ...
@@ -140,6 +92,7 @@ struct pdf_struct {
140 140
     const char *map;
141 141
     off_t size;
142 142
     off_t offset;
143
+    off_t startoff;
143 144
     cli_ctx *ctx;
144 145
     const char *dir;
145 146
     unsigned files;
... ...
@@ -502,6 +455,40 @@ static int obj_size(struct pdf_struct *pdf, struct pdf_obj *obj, int binary)
502 502
     return pdf->offset - obj->start - 6;
503 503
 }
504 504
 
505
+static int run_pdf_hooks(struct pdf_struct *pdf, enum pdf_phase phase, int fd,
506
+			 int dumpid)
507
+{
508
+    int ret;
509
+    struct cli_bc_ctx *bc_ctx;
510
+    cli_ctx *ctx = pdf->ctx;
511
+    fmap_t *map;
512
+
513
+    bc_ctx = cli_bytecode_context_alloc();
514
+    if (!bc_ctx) {
515
+	cli_errmsg("cli_pdf: can't allocate memory for bc_ctx");
516
+	return CL_EMEM;
517
+    }
518
+
519
+    map = *ctx->fmap;
520
+    if (fd != -1) {
521
+	map = fmap(fd, 0, 0);
522
+	if (!map) {
523
+	    cli_warnmsg("can't mmap pdf extracted obj\n");
524
+	    map = *ctx->fmap;
525
+	    fd = -1;
526
+	}
527
+    }
528
+    cli_bytecode_context_setpdf(bc_ctx, phase, pdf->nobjs, pdf->objs,
529
+				&pdf->flags, pdf->size, pdf->startoff);
530
+    cli_bytecode_context_setctx(bc_ctx, ctx);
531
+    ret = cli_bytecode_runhook(ctx, ctx->engine, bc_ctx, BC_PDF, map, ctx->virname);
532
+    cli_bytecode_context_destroy(bc_ctx);
533
+    if (fd != -1) {
534
+	funmap(map);
535
+    }
536
+    return ret;
537
+}
538
+
505 539
 static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
506 540
 {
507 541
     char fullname[NAME_MAX + 1];
... ...
@@ -509,20 +496,28 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
509 509
     off_t sum = 0;
510 510
     int rc = CL_SUCCESS;
511 511
     char *ascii_decoded = NULL;
512
+    int dump = 1;
512 513
 
513 514
     /* TODO: call bytecode hook here, allow override dumpability */
514 515
     if ((!(obj->flags & (1 << OBJ_STREAM)) ||
515 516
 	(obj->flags & (1 << OBJ_HASFILTERS)))
516 517
 	&& !(obj->flags & DUMP_MASK)) {
517 518
 	/* don't dump all streams */
518
-	return CL_CLEAN;
519
+	dump = 0;
519 520
     }
520 521
 #if 1
521 522
     if (obj->flags & (1 << OBJ_IMAGE)) {
522 523
 	/* don't dump / scan images */
523
-	return CL_CLEAN;
524
+	dump = 0;
524 525
     }
525 526
 #endif
527
+    if (obj->flags & (1 << OBJ_FORCEDUMP)) {
528
+	/* bytecode can force dump by setting this flag */
529
+	dump = 1;
530
+    }
531
+    if (!dump)
532
+	return CL_CLEAN;
533
+    cli_dbgmsg("cli_pdf: dumping obj %u %u\n", obj->id>>8, obj->id);
526 534
     snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u", pdf->dir, pdf->files++);
527 535
     fout = open(fullname,O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
528 536
     if (fout < 0) {
... ...
@@ -541,7 +536,6 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
541 541
 			   pdf->size - obj->start,
542 542
 			   &p_stream, &p_endstream);
543 543
 	if (p_stream && p_endstream) {
544
-	    int rc2;
545 544
 	    const char *flate_in;
546 545
 	    long ascii_decoded_size = 0;
547 546
 	    size_t size = p_endstream - p_stream;
... ...
@@ -629,13 +623,6 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
629 629
 		if (filter_writen(pdf, obj, fout, flate_in, ascii_decoded_size, &sum) != ascii_decoded_size)
630 630
 		    rc = CL_EWRITE;
631 631
 	    }
632
-	    cli_updatelimits(pdf->ctx, sum);
633
-	    /* TODO: invoke bytecode on this pdf obj with metainformation associated
634
-	     * */
635
-	    lseek(fout, 0, SEEK_SET);
636
-	    rc2 = cli_magic_scandesc(fout, pdf->ctx);
637
-	    if (rc2 == CL_VIRUS || rc == CL_SUCCESS)
638
-		rc = rc2;
639 632
 	}
640 633
     } else if (obj->flags & (1 << OBJ_JAVASCRIPT)) {
641 634
 	const char *q2;
... ...
@@ -689,6 +676,21 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
689 689
     }
690 690
     } while (0);
691 691
     cli_dbgmsg("cli_pdf: extracted %ld bytes %u %u obj to %s\n", sum, obj->id>>8, obj->id&0xff, fullname);
692
+    if (sum) {
693
+	int rc2;
694
+	cli_updatelimits(pdf->ctx, sum);
695
+	/* TODO: invoke bytecode on this pdf obj with metainformation associated
696
+	 * */
697
+	lseek(fout, 0, SEEK_SET);
698
+	rc2 = cli_magic_scandesc(fout, pdf->ctx);
699
+	if (rc2 == CL_VIRUS || rc == CL_SUCCESS)
700
+	    rc = rc2;
701
+	if (rc == CL_CLEAN) {
702
+	    rc2 = run_pdf_hooks(pdf, PDF_PHASE_POSTDUMP, fout, obj - pdf->objs);
703
+	    if (rc2 == CL_VIRUS)
704
+		rc = rc2;
705
+	}
706
+    }
692 707
     close(fout);
693 708
     free(ascii_decoded);
694 709
     if (!pdf->ctx->engine->keeptmp)
... ...
@@ -709,7 +711,7 @@ enum objstate {
709 709
 
710 710
 struct pdfname_action {
711 711
     const char *pdfname;
712
-    enum objflags set_objflag;/* OBJ_DICT is noop */
712
+    enum pdf_objflags set_objflag;/* OBJ_DICT is noop */
713 713
     enum objstate from_state;/* STATE_NONE is noop */
714 714
     enum objstate to_state;
715 715
 };
... ...
@@ -896,7 +898,7 @@ static void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
896 896
 				   objid >> 8, objid&0xff);
897 897
 			obj2 = find_obj(pdf, obj, objid);
898 898
 			if (obj2) {
899
-			    enum objflags flag = objstate == STATE_JAVASCRIPT ?
899
+			    enum pdf_objflags flag = objstate == STATE_JAVASCRIPT ?
900 900
 				OBJ_JAVASCRIPT : OBJ_OPENACTION;
901 901
 			    obj2->flags |= 1 << flag;
902 902
 			    obj->flags &= ~(1 << flag);
... ...
@@ -1026,7 +1028,8 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
1026 1026
     size -= offset;
1027 1027
 
1028 1028
     pdf.size = size;
1029
-    pdf.map = fmap_need_off_once(map, offset, size);
1029
+    pdf.map = fmap_need_off(map, offset, size);
1030
+    pdf.startoff = offset;
1030 1031
     if (!pdf.map) {
1031 1032
 	cli_errmsg("cli_pdf: mmap() failed (3)\n");
1032 1033
 	return CL_EMAP;
... ...
@@ -1047,40 +1050,40 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
1047 1047
 	pdf_parseobj(&pdf, obj);
1048 1048
     }
1049 1049
 
1050
+    rc = run_pdf_hooks(&pdf, PDF_PHASE_PARSED, -1, -1);
1050 1051
     /* extract PDF objs */
1051
-    for (i=0;i<pdf.nobjs;i++) {
1052
+    for (i=0;!rc && i<pdf.nobjs;i++) {
1052 1053
 	struct pdf_obj *obj = &pdf.objs[i];
1053 1054
 	rc = pdf_extract_obj(&pdf, obj);
1054
-	if (rc != CL_SUCCESS)
1055
-	    break;
1056 1055
     }
1057 1056
 
1058 1057
     if (pdf.flags & (1 << ENCRYPTED_PDF))
1059 1058
 	pdf.flags &= ~ ((1 << BAD_FLATESTART) | (1 << BAD_STREAMSTART) |
1060 1059
 	    (1 << BAD_ASCIIDECODE));
1061 1060
 
1062
-    if (pdf.flags) {
1061
+   if (pdf.flags && !rc) {
1063 1062
 	cli_dbgmsg("cli_pdf: flags 0x%02x\n", pdf.flags);
1063
+	rc = run_pdf_hooks(&pdf, PDF_PHASE_END, -1, -1);
1064
+	if (!rc && (ctx->options & CL_SCAN_ALGORITHMIC)) {
1065
+	    if (pdf.flags & (1 << ESCAPED_COMMON_PDFNAME)) {
1066
+		/* for example /Fl#61te#44#65#63#6f#64#65 instead of /FlateDecode */
1067
+		*ctx->virname = "Heuristics.PDF.ObfuscatedNameObject";
1068
+		rc = CL_VIRUS;
1069
+	    }
1070
+	}
1064 1071
 #if 0
1065 1072
 	/* TODO: find both trailers, and /Encrypt settings */
1066 1073
 	if (pdf.flags & (1 << LINEARIZED_PDF))
1067 1074
 	    pdf.flags &= ~ (1 << BAD_ASCIIDECODE);
1068 1075
 	if (pdf.flags & (1 << MANY_FILTERS))
1069 1076
 	    pdf.flags &= ~ (1 << BAD_ASCIIDECODE);
1070
-	if (pdf.flags &
1077
+	if (!rc && (pdf.flags &
1071 1078
 	    ((1 << BAD_PDF_TOOMANYOBJS) | (1 << BAD_STREAM_FILTERS) |
1072
-	    (1<<BAD_FLATE) | (1<<BAD_ASCIIDECODE)|
1073
-	    (1<<UNTERMINATED_OBJ_DICT) | (1<<UNKNOWN_FILTER))) {
1079
+	     (1<<BAD_FLATE) | (1<<BAD_ASCIIDECODE)|
1080
+    	     (1<<UNTERMINATED_OBJ_DICT) | (1<<UNKNOWN_FILTER)))) {
1074 1081
 	    rc = CL_EUNPACK;
1075 1082
 	}
1076 1083
 #endif
1077
-	if (ctx->options & CL_SCAN_ALGORITHMIC) {
1078
-	    if (pdf.flags & (1 << ESCAPED_COMMON_PDFNAME)) {
1079
-		/* for example /Fl#61te#44#65#63#6f#64#65 instead of /FlateDecode */
1080
-		*ctx->virname = "Heuristics.PDF.ObfuscatedNameObject";
1081
-		rc = CL_VIRUS;
1082
-	    }
1083
-	}
1084 1084
     }
1085 1085
     cli_dbgmsg("cli_pdf: returning %d\n", rc);
1086 1086
     free(pdf.objs);
... ...
@@ -21,6 +21,11 @@
21 21
 #define __PDF_H
22 22
 
23 23
 #include "others.h"
24
+struct pdf_obj {
25
+    uint32_t start;
26
+    uint32_t id;
27
+    uint32_t flags;
28
+};
24 29
 
25 30
 int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset);
26 31
 
... ...
@@ -2265,6 +2265,7 @@ int cli_scanpe(cli_ctx *ctx)
2265 2265
     ret = cli_bytecode_runhook(ctx, ctx->engine, bc_ctx, BC_PE_UNPACKER, map, ctx->virname);
2266 2266
     switch (ret) {
2267 2267
 	case CL_VIRUS:
2268
+	    cli_bytecode_context_destroy(bc_ctx);
2268 2269
 	    return CL_VIRUS;
2269 2270
 	case CL_SUCCESS:
2270 2271
 	    ndesc = cli_bytecode_context_getresult_file(bc_ctx, &tempfile);