git-svn: trunk@2968
aCaB authored on 2007/03/24 23:52:38... | ... |
@@ -1,3 +1,9 @@ |
1 |
+Sat Mar 24 13:49:59 CET 2007 (acab) |
|
2 |
+----------------------------------- |
|
3 |
+ * libclamav/upx.c: improve upx rebuilder - more to come |
|
4 |
+ many thanks to Andrey J. Melnikoff (TEMHOTA) <temnota * kmv.ru> |
|
5 |
+ for the suggestions and the preliminary patch |
|
6 |
+ |
|
1 | 7 |
Sat Mar 24 01:51:30 CET 2007 (acab) |
2 | 8 |
----------------------------------- |
3 | 9 |
* libclamav: - merge the first set of pe cleanup changes |
... | ... |
@@ -26,6 +26,19 @@ |
26 | 26 |
** 04/06/2k4 - Now we handle 2B, 2D and 2E :D |
27 | 27 |
** 28/08/2k4 - PE rebuild for nested packers |
28 | 28 |
** 12/12/2k4 - Improved PE rebuild code and added some debug info on failure |
29 |
+** 23/03/2k4 - New approach for rebuilding: |
|
30 |
+ o Get imports via magic |
|
31 |
+ o Get imports via leascan |
|
32 |
+ o if (!pe) pe=scan4pe(); |
|
33 |
+ o if (!pe) forgepe(); |
|
34 |
+*/ |
|
35 |
+ |
|
36 |
+/* |
|
37 |
+ TODO: |
|
38 |
+ - scan4pe() |
|
39 |
+ - forgepe() |
|
40 |
+ - pass dll flag from pe.c |
|
41 |
+ - grab statistical magic data from teh zoo |
|
29 | 42 |
*/ |
30 | 43 |
|
31 | 44 |
/* |
... | ... |
@@ -49,6 +62,7 @@ |
49 | 49 |
#include "cltypes.h" |
50 | 50 |
#include "others.h" |
51 | 51 |
#include "upx.h" |
52 |
+#include "str.h" |
|
52 | 53 |
|
53 | 54 |
#define PEALIGN(o,a) (((a))?(((o)/(a))*(a)):(o)) |
54 | 55 |
#define PESALIGN(o,a) (((a))?(((o)/(a)+((o)%(a)!=0))*(a)):(o)) |
... | ... |
@@ -71,44 +85,74 @@ |
71 | 71 |
|
72 | 72 |
/* PE from UPX */ |
73 | 73 |
|
74 |
-static int pefromupx (char *src, char *dst, uint32_t *dsize, uint32_t ep, uint32_t upx0, uint32_t upx1, uint32_t magic) |
|
74 |
+static int pefromupx (char *src, uint32_t ssize, char *dst, uint32_t *dsize, uint32_t ep, uint32_t upx0, uint32_t upx1, uint32_t *magic) |
|
75 | 75 |
{ |
76 | 76 |
char *imports, *sections, *pehdr, *newbuf; |
77 |
- unsigned int sectcnt, upd=1; |
|
78 |
- uint32_t realstuffsz, valign; |
|
77 |
+ unsigned int sectcnt=0, upd=1; |
|
78 |
+ uint32_t realstuffsz, valign=0; |
|
79 | 79 |
uint32_t foffset=0xd0+0xf8; |
80 | 80 |
|
81 | 81 |
if((dst == NULL) || (src == NULL)) |
82 | 82 |
return 0; |
83 | 83 |
|
84 |
- imports = dst + cli_readint32(src + ep - upx1 + magic); |
|
84 |
+ while ((valign=magic[sectcnt++])) { |
|
85 |
+ if ( ep - upx1 + valign <= ssize-5 && /* Wondering how we got so far?! */ |
|
86 |
+ src[ep - upx1 + valign - 2] == '\x8d' && /* lea edi, ... */ |
|
87 |
+ src[ep - upx1 + valign - 1] == '\xbe' ) /* ... [esi + offset] */ |
|
88 |
+ break; |
|
89 |
+ } |
|
85 | 90 |
|
86 |
- realstuffsz = imports-dst; |
|
87 |
- |
|
88 |
- if (realstuffsz >= *dsize ) { |
|
89 |
- cli_dbgmsg("UPX: wrong realstuff size - giving up rebuild\n"); |
|
90 |
- return 0; |
|
91 |
+ if (!valign && ep - upx1 + 0x80 < ssize-8) { |
|
92 |
+ char *pt = &src[ep - upx1 + 0x80]; |
|
93 |
+ cli_dbgmsg("UPX: bad magic - scanning for imports\n"); |
|
94 |
+ |
|
95 |
+ while ((pt=(char *)cli_memstr(pt, ssize - (pt-src) - 8, "\x8d\xbe", 2))) { |
|
96 |
+ if (pt[6] == '\x8b' && pt[7] == '\x07') { /* lea edi, [esi+imports] / mov eax, [edi] */ |
|
97 |
+ valign=pt-src+2-ep+upx1; |
|
98 |
+ break; |
|
99 |
+ } |
|
100 |
+ pt++; |
|
101 |
+ } |
|
91 | 102 |
} |
92 |
- |
|
93 |
- pehdr = imports; |
|
94 |
- while (CLI_ISCONTAINED(dst, *dsize, pehdr, 8) && cli_readint32(pehdr)) { |
|
95 |
- pehdr+=8; |
|
96 |
- while(CLI_ISCONTAINED(dst, *dsize, pehdr, 2) && *pehdr) { |
|
97 |
- pehdr++; |
|
98 |
- while (CLI_ISCONTAINED(dst, *dsize, pehdr, 2) && *pehdr) |
|
103 |
+ |
|
104 |
+ if (valign && CLI_ISCONTAINED(src, ssize, src + ep - upx1 + valign, 4)) { |
|
105 |
+ imports = dst + cli_readint32(src + ep - upx1 + valign); |
|
106 |
+ |
|
107 |
+ realstuffsz = imports-dst; |
|
108 |
+ |
|
109 |
+ if (realstuffsz >= *dsize ) { |
|
110 |
+ cli_dbgmsg("UPX: wrong realstuff size - giving up rebuild\n"); |
|
111 |
+ return 0; |
|
112 |
+ } |
|
113 |
+ |
|
114 |
+ pehdr = imports; |
|
115 |
+ while (CLI_ISCONTAINED(dst, *dsize, pehdr, 8) && cli_readint32(pehdr)) { |
|
116 |
+ pehdr+=8; |
|
117 |
+ while(CLI_ISCONTAINED(dst, *dsize, pehdr, 2) && *pehdr) { |
|
118 |
+ pehdr++; |
|
119 |
+ while (CLI_ISCONTAINED(dst, *dsize, pehdr, 2) && *pehdr) |
|
120 |
+ pehdr++; |
|
99 | 121 |
pehdr++; |
122 |
+ } |
|
100 | 123 |
pehdr++; |
101 | 124 |
} |
102 |
- pehdr++; |
|
125 |
+ |
|
126 |
+ pehdr+=4; |
|
127 |
+ } else { /* TODO: this one should be a separate if (!pe) */ |
|
128 |
+ cli_dbgmsg("UPX: no luck - brutally scanning for PE (TODO)\n"); |
|
129 |
+ /* TODO */ |
|
130 |
+ return 0; |
|
103 | 131 |
} |
104 | 132 |
|
105 |
- pehdr+=4; |
|
133 |
+ /* TODO: forgepe() */ |
|
134 |
+ |
|
135 |
+ /* TODO: Kick the checks outta here and write a checkpe() */ |
|
106 | 136 |
if (!CLI_ISCONTAINED(dst, *dsize, pehdr, 0xf8)) { |
107 | 137 |
cli_dbgmsg("UPX: sections out of bounds - giving up rebuild\n"); |
108 | 138 |
return 0; |
109 |
- } |
|
110 |
- |
|
111 |
- if ( cli_readint32(pehdr) != 0x4550 ) { |
|
139 |
+ } |
|
140 |
+ |
|
141 |
+ if (cli_readint32(pehdr) != 0x4550 ) { |
|
112 | 142 |
cli_dbgmsg("UPX: No magic for PE - giving up rebuild\n"); |
113 | 143 |
return 0; |
114 | 144 |
} |
... | ... |
@@ -205,7 +249,7 @@ static int doubleebx(char *src, uint32_t *myebx, uint32_t *scur, uint32_t ssize) |
205 | 205 |
int upx_inflate2b(char *src, uint32_t ssize, char *dst, uint32_t *dsize, uint32_t upx0, uint32_t upx1, uint32_t ep) |
206 | 206 |
{ |
207 | 207 |
int32_t backbytes, unp_offset = -1; |
208 |
- uint32_t backsize, myebx = 0, scur=0, dcur=0, i; |
|
208 |
+ uint32_t backsize, myebx = 0, scur=0, dcur=0, i, magic[]={0x108,0x110,0}; |
|
209 | 209 |
int oob; |
210 | 210 |
|
211 | 211 |
while (1) { |
... | ... |
@@ -274,20 +318,13 @@ int upx_inflate2b(char *src, uint32_t ssize, char *dst, uint32_t *dsize, uint32_ |
274 | 274 |
dcur+=backsize; |
275 | 275 |
} |
276 | 276 |
|
277 |
- |
|
278 |
- if ( ep - upx1 + 0x108 <= ssize-5 && /* Wondering how we got so far?! */ |
|
279 |
- src[ep - upx1 + 0x106] == '\x8d' && /* lea edi, ... */ |
|
280 |
- src[ep - upx1 + 0x107] == '\xbe' ) /* ... [esi + offset] */ |
|
281 |
- return pefromupx (src, dst, dsize, ep, upx0, upx1, 0x108); |
|
282 |
- |
|
283 |
- cli_dbgmsg("UPX: bad magic for 2b\n"); |
|
284 |
- return 0; |
|
277 |
+ return pefromupx (src, ssize, dst, dsize, ep, upx0, upx1, magic); |
|
285 | 278 |
} |
286 | 279 |
|
287 | 280 |
int upx_inflate2d(char *src, uint32_t ssize, char *dst, uint32_t *dsize, uint32_t upx0, uint32_t upx1, uint32_t ep) |
288 | 281 |
{ |
289 | 282 |
int32_t backbytes, unp_offset = -1; |
290 |
- uint32_t backsize, myebx = 0, scur=0, dcur=0, i; |
|
283 |
+ uint32_t backsize, myebx = 0, scur=0, dcur=0, i, magic[]={0x11c,0x124,0}; |
|
291 | 284 |
int oob; |
292 | 285 |
|
293 | 286 |
while (1) { |
... | ... |
@@ -363,20 +400,13 @@ int upx_inflate2d(char *src, uint32_t ssize, char *dst, uint32_t *dsize, uint32_ |
363 | 363 |
dcur+=backsize; |
364 | 364 |
} |
365 | 365 |
|
366 |
- if ( ep - upx1 + 0x124 <= ssize-5 ) { /* Wondering how we got so far?! */ |
|
367 |
- if ( src[ep - upx1 + 0x11a] == '\x8d' && src[ep - upx1 + 0x11b] == '\xbe' ) |
|
368 |
- return pefromupx (src, dst, dsize, ep, upx0, upx1, 0x11c); |
|
369 |
- if ( src[ep - upx1 + 0x122] == '\x8d' && src[ep - upx1 + 0x123] == '\xbe' ) |
|
370 |
- return pefromupx (src, dst, dsize, ep, upx0, upx1, 0x124); |
|
371 |
- } |
|
372 |
- cli_dbgmsg("UPX: bad magic for 2d\n"); |
|
373 |
- return 0; |
|
366 |
+ return pefromupx (src, ssize, dst, dsize, ep, upx0, upx1, magic); |
|
374 | 367 |
} |
375 | 368 |
|
376 | 369 |
int upx_inflate2e(char *src, uint32_t ssize, char *dst, uint32_t *dsize, uint32_t upx0, uint32_t upx1, uint32_t ep) |
377 | 370 |
{ |
378 | 371 |
int32_t backbytes, unp_offset = -1; |
379 |
- uint32_t backsize, myebx = 0, scur=0, dcur=0, i; |
|
372 |
+ uint32_t backsize, myebx = 0, scur=0, dcur=0, i, magic[]={0x128,0x130,0}; |
|
380 | 373 |
int oob; |
381 | 374 |
|
382 | 375 |
for(;;) { |
... | ... |
@@ -459,12 +489,5 @@ int upx_inflate2e(char *src, uint32_t ssize, char *dst, uint32_t *dsize, uint32_ |
459 | 459 |
dcur+=backsize; |
460 | 460 |
} |
461 | 461 |
|
462 |
- if ( ep - upx1 + 0x130 <= ssize-5 ) { /* Wondering how we got so far?! */ |
|
463 |
- if ( src[ep - upx1 + 0x126] == '\x8d' && src[ep - upx1 + 0x127] == '\xbe' ) |
|
464 |
- return pefromupx (src, dst, dsize, ep, upx0, upx1, 0x128); |
|
465 |
- if ( src[ep - upx1 + 0x12e] == '\x8d' && src[ep - upx1 + 0x12f] == '\xbe' ) |
|
466 |
- return pefromupx (src, dst, dsize, ep, upx0, upx1, 0x130); |
|
467 |
- } |
|
468 |
- cli_dbgmsg("UPX: bad magic for 2e\n"); |
|
469 |
- return 0; |
|
462 |
+ return pefromupx (src, ssize, dst, dsize, ep, upx0, upx1, magic); |
|
470 | 463 |
} |