Browse code

libclamav: SHA1/SHA256 handling changes and wildcard-size support

David Raynor authored on 2013/03/09 08:10:07
Showing 9 changed files
... ...
@@ -1,3 +1,7 @@
1
+Fri Mar 8 17:48:34 EDT 2013 (dar)
2
+------------------------------------
3
+ * libclamav: SHA1/SHA256 handling changes and wildcard-size support
4
+
1 5
 Thu Mar 7 19:38:34 EDT 2013 (dar)
2 6
 ------------------------------------
3 7
  * sigtool: Add print-certs command to allow dumping certs without a scan
... ...
@@ -201,10 +201,10 @@ attachment.exe: OK
201 201
 
202 202
     \section{Signature formats}
203 203
 
204
-    \subsection{MD5}
205
-    The easiest way to create signatures for ClamAV is to use MD5 checksums,
206
-    however this method can be only used against static malware. To create
207
-    a signature for \verb+test.exe+ use the \verb+--md5+ option of sigtool:
204
+    \subsection{Hash-based signatures}
205
+    The easiest way to create signatures for ClamAV is to use filehash checksums,
206
+    however this method can be only used against static malware. To create a
207
+    MD5 signature for \verb+test.exe+ use the \verb+--md5+ option of sigtool:
208 208
     \begin{verbatim}
209 209
 zolw@localhost:/tmp/test$ sigtool --md5 test.exe > test.hdb
210 210
 zolw@localhost:/tmp/test$ cat test.hdb 
... ...
@@ -238,17 +238,50 @@ Time: 0.024 sec (0 m 0 s)
238 238
     left in /tmp. Please keep in mind that a hash signature will stop
239 239
     matching as soon as a single byte changes in the target file.}
240 240
 
241
-    \subsection{MD5, PE section based}
242
-    You can create a MD5 signature for a specific section in a PE file.
241
+    \subsection{SHA1, SHA256}
242
+    ClamAV 0.98 has also added support for SHA1 and SHA256 file checksums.
243
+    The format is the same as for MD5 file checksum. 
244
+    It can differentiate between them based on the length of the hash string
245
+    in the signature. For best backwards compatibility, these should be
246
+    placed inside a \verb+*.hsb+ file. The format is:
247
+    \begin{verbatim}
248
+HashString:FileSize:MalwareName
249
+    \end{verbatim}
250
+
251
+    \subsection{PE section based}
252
+    You can create a hash signature for a specific section in a PE file.
243 253
     Such signatures shall be stored inside \verb+.mdb+ files in the
244 254
     following format:
245 255
     \begin{verbatim}
246
-PESectionSize:MD5:MalwareName
256
+PESectionSize:PESectionHash:MalwareName
247 257
     \end{verbatim}
248 258
     The easiest way to generate MD5 based section signatures is to extract
249 259
     target PE sections into separate files and then run sigtool with the
250 260
     option \verb+--mdb+
251 261
 
262
+    ClamAV 0.98 has also added support for SHA1 and SHA256 section based
263
+    signatures. The format is the same as for MD5 PE section based signatures.
264
+    It can differentiate between them based on the length of the hash string
265
+    in the signature. For best backwards compatibility, these should be
266
+    placed inside a \verb+*.msb+ file.
267
+
268
+    \subsection{Unknown size}
269
+    ClamAV 0.98 has also added support for hash signatures where the size
270
+    is not known but the hash is. It is much more performance-efficient to
271
+    use signatures with specific sizes, so be cautious when using this
272
+    feature. For these cases, the '*' character can be used in the size
273
+    field. To ensure proper backwards compatibility with older versions of
274
+    ClamAV, these signatures must have a minimum functional level of 73 or
275
+    higher. Signatures that use the wildcard size without this level set
276
+    will be rejected as malformed.
277
+    \begin{verbatim}
278
+Sample .hsb signature matching any size
279
+HashString:*:MalwareName:73
280
+
281
+Sample .msb signature matching any size
282
+*:PESectionHash:MalwareName:73
283
+    \end{verbatim}
284
+
252 285
     \subsection{Body-based signatures}
253 286
     ClamAV stores all body-based signatures in a hexadecimal format. In this
254 287
     section by a hex-signature we mean a fragment of malware's body converted
... ...
@@ -670,7 +703,10 @@ fileno:max depth
670 670
 
671 671
     \subsection{Whitelist databases}
672 672
     To whitelist a specific file use the MD5 signature format and place
673
-    it inside a database file with the extension of \verb+.fp+.\\
673
+    it inside a database file with the extension of \verb+.fp+.
674
+    To whitelist a specific file with the SHA1 or SHA256 file hash signature
675
+    format, place the signature inside a database file with the extension
676
+    of \verb+.sfp+.\\
674 677
 
675 678
     \noindent
676 679
     To whitelist a specific signature from the database you just add
... ...
@@ -28,7 +28,7 @@
28 28
 
29 29
 int hm_addhash_str(struct cli_matcher *root, const char *strhash, uint32_t size, const char *virusname) {
30 30
     enum CLI_HASH_TYPE type;
31
-    char binhash[32];
31
+    char binhash[CLI_HASHLEN_MAX];
32 32
     int hlen;
33 33
 
34 34
     if(!root || !strhash) {
... ...
@@ -36,7 +36,8 @@ int hm_addhash_str(struct cli_matcher *root, const char *strhash, uint32_t size,
36 36
 	return CL_ENULLARG;
37 37
     }
38 38
 
39
-    if(!size || size == (uint32_t)-1) {
39
+    /* size 0 here is now a wildcard size match */
40
+    if(size == (uint32_t)-1) {
40 41
 	cli_errmsg("hm_addhash_str: null or invalid size (%u)\n", size);
41 42
 	return CL_EARG;
42 43
     }
... ...
@@ -65,9 +66,9 @@ int hm_addhash_str(struct cli_matcher *root, const char *strhash, uint32_t size,
65 65
 }
66 66
 
67 67
 static const unsigned int hashlen[] = {
68
-    16, /* CLI_HASH_MD5 */
69
-    20, /* CLI_HASH_SHA1 */
70
-    32, /* CLI_HASH_SHA256 */
68
+    CLI_HASHLEN_MD5,
69
+    CLI_HASHLEN_SHA1,
70
+    CLI_HASHLEN_SHA256
71 71
 };
72 72
 
73 73
 int hm_addhash_bin(struct cli_matcher *root, const void *binhash, enum CLI_HASH_TYPE type, uint32_t size, const char *virusname) {
... ...
@@ -77,32 +78,38 @@ int hm_addhash_bin(struct cli_matcher *root, const void *binhash, enum CLI_HASH_
77 77
     struct cli_htu32 *ht;
78 78
     int i;
79 79
 
80
-    ht = &root->hm.sizehashes[type];
81
-    if(!root->hm.sizehashes[type].capacity) {
82
-	i = cli_htu32_init(ht, 64, root->mempool);
83
-	if(i) return i;
80
+    if (size) {
81
+        /* size non-zero, find sz_hash element in size-driven hashtable  */
82
+        ht = &root->hm.sizehashes[type];
83
+        if(!root->hm.sizehashes[type].capacity) {
84
+            i = cli_htu32_init(ht, 64, root->mempool);
85
+            if(i) return i;
86
+        }
87
+
88
+        item = cli_htu32_find(ht, size);
89
+        if(!item) {
90
+	    struct cli_htu32_element htitem;
91
+	    szh = mpool_calloc(root->mempool, 1, sizeof(*szh));
92
+	    if(!szh) {
93
+	        cli_errmsg("hm_addhash_bin: failed to allocate size hash\n");
94
+	        return CL_EMEM;
95
+	    }
96
+
97
+	    htitem.key = size;
98
+	    htitem.data.as_ptr = szh;
99
+	    i = cli_htu32_insert(ht, &htitem, root->mempool);
100
+	    if(i) {
101
+	        cli_errmsg("hm_addhash_bin: failed to add item to hashtab");
102
+	        mpool_free(root->mempool, szh);
103
+	        return i;
104
+	    }
105
+        } else
106
+	    szh = (struct cli_sz_hash *)item->data.as_ptr;
107
+    }
108
+    else {
109
+        /* size 0 = wildcard */
110
+        szh = &root->hwild.hashes[type];
84 111
     }
85
-
86
-    item = cli_htu32_find(ht, size);
87
-    if(!item) {
88
-	struct cli_htu32_element htitem;
89
-	szh = mpool_calloc(root->mempool, 1, sizeof(*szh));
90
-	if(!szh) {
91
-	    cli_errmsg("hm_addhash_bin: failed to allocate size hash\n");
92
-	    return CL_EMEM;
93
-	}
94
-
95
-	htitem.key = size;
96
-	htitem.data.as_ptr = szh;
97
-	i = cli_htu32_insert(ht, &htitem, root->mempool);
98
-	if(i) {
99
-	    cli_errmsg("hm_addhash_bin: failed to add item to hashtab");
100
-	    mpool_free(root->mempool, szh);
101
-	    return i;
102
-	}
103
-    } else
104
-	szh = (struct cli_sz_hash *)item->data.as_ptr;
105
-
106 112
     szh->items++;
107 113
 
108 114
     szh->hash_array = mpool_realloc2(root->mempool, szh->hash_array, hlen * szh->items);
... ...
@@ -129,7 +136,6 @@ int hm_addhash_bin(struct cli_matcher *root, const void *binhash, enum CLI_HASH_
129 129
     return 0;
130 130
 }
131 131
 
132
-
133 132
 static inline int hm_cmp(const uint8_t *itm, const uint8_t *ref, unsigned int keylen) {
134 133
 #if WORDS_BIGENDIAN == 0
135 134
     uint32_t i = *(uint32_t *)itm, r = *(uint32_t *)ref;
... ...
@@ -142,7 +148,7 @@ static inline int hm_cmp(const uint8_t *itm, const uint8_t *ref, unsigned int ke
142 142
 }
143 143
 
144 144
 static void hm_sort(struct cli_sz_hash *szh, size_t l, size_t r, unsigned int keylen) {
145
-    uint8_t piv[32], tmph[32];
145
+    uint8_t piv[CLI_HASHLEN_MAX], tmph[CLI_HASHLEN_MAX];
146 146
     size_t l1, r1;
147 147
 
148 148
     const char *tmpv;
... ...
@@ -181,7 +187,7 @@ static void hm_sort(struct cli_sz_hash *szh, size_t l, size_t r, unsigned int ke
181 181
     hm_sort(szh, r1, r, keylen);
182 182
 }
183 183
 
184
-
184
+/* flush both size-specific and agnostic hash sets */
185 185
 void hm_flush(struct cli_matcher *root) {
186 186
     enum CLI_HASH_TYPE type;
187 187
 
... ...
@@ -191,18 +197,27 @@ void hm_flush(struct cli_matcher *root) {
191 191
     for(type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {
192 192
 	struct cli_htu32 *ht = &root->hm.sizehashes[type];
193 193
 	const struct cli_htu32_element *item = NULL;
194
+	struct cli_sz_hash *szh = NULL;
194 195
 
195 196
 	if(!root->hm.sizehashes[type].capacity)
196 197
 	    continue;
197 198
 
198 199
 	while((item = cli_htu32_next(ht, item))) {
199
-	    struct cli_sz_hash *szh = (struct cli_sz_hash *)item->data.as_ptr;
200
+	    szh = (struct cli_sz_hash *)item->data.as_ptr;
200 201
 	    unsigned int keylen = hashlen[type];
201 202
 
202 203
 	    if(szh->items > 1)
203 204
 		hm_sort(szh, 0, szh->items, keylen);
204 205
 	}
205 206
     }
207
+
208
+    for(type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {
209
+	struct cli_sz_hash *szh = &root->hwild.hashes[type];
210
+	unsigned int keylen = hashlen[type];
211
+
212
+	if(szh->items > 1)
213
+	    hm_sort(szh, 0, szh->items, keylen);
214
+    }
206 215
 }
207 216
 
208 217
 
... ...
@@ -210,20 +225,18 @@ int cli_hm_have_size(const struct cli_matcher *root, enum CLI_HASH_TYPE type, ui
210 210
     return (size && size != 0xffffffff && root && root->hm.sizehashes[type].capacity && cli_htu32_find(&root->hm.sizehashes[type], size));
211 211
 }
212 212
 
213
-int cli_hm_scan(const unsigned char *digest, uint32_t size, const char **virname, const struct cli_matcher *root, enum CLI_HASH_TYPE type) {
214
-    const struct cli_htu32_element *item;
213
+int cli_hm_have_wild(const struct cli_matcher *root, enum CLI_HASH_TYPE type) {
214
+    return (root && root->hwild.hashes[type].items);
215
+}
216
+
217
+/* cli_hm_scan will scan only size-specific hashes, if any */
218
+static int hm_scan(const unsigned char *digest, const char **virname, const struct cli_sz_hash *szh, enum CLI_HASH_TYPE type) {
215 219
     unsigned int keylen;
216
-    struct cli_sz_hash *szh;
217 220
     size_t l, r;
218 221
 
219
-    if(!digest || !size || size == 0xffffffff || !root || !root->hm.sizehashes[type].capacity)
222
+    if(!digest || !szh || !szh->items)
220 223
 	return CL_CLEAN;
221 224
 
222
-    item = cli_htu32_find(&root->hm.sizehashes[type], size);
223
-    if(!item)
224
-	return CL_CLEAN;
225
-
226
-    szh = (struct cli_sz_hash *)item->data.as_ptr;
227 225
     keylen = hashlen[type];
228 226
 
229 227
     l = 0;
... ...
@@ -247,6 +260,32 @@ int cli_hm_scan(const unsigned char *digest, uint32_t size, const char **virname
247 247
     return CL_CLEAN;
248 248
 }
249 249
 
250
+/* cli_hm_scan will scan only size-specific hashes, if any */
251
+int cli_hm_scan(const unsigned char *digest, uint32_t size, const char **virname, const struct cli_matcher *root, enum CLI_HASH_TYPE type) {
252
+    const struct cli_htu32_element *item;
253
+    struct cli_sz_hash *szh;
254
+
255
+    if(!digest || !size || size == 0xffffffff || !root || !root->hm.sizehashes[type].capacity)
256
+	return CL_CLEAN;
257
+
258
+    item = cli_htu32_find(&root->hm.sizehashes[type], size);
259
+    if(!item)
260
+	return CL_CLEAN;
261
+
262
+    szh = (struct cli_sz_hash *)item->data.as_ptr;
263
+
264
+    return hm_scan(digest, virname, szh, type);
265
+}
266
+
267
+/* cli_hm_scan_wild will scan only size-agnostic hashes, if any */
268
+int cli_hm_scan_wild(const unsigned char *digest, const char **virname, const struct cli_matcher *root, enum CLI_HASH_TYPE type) {
269
+    if(!digest || !root || !root->hwild.hashes[type].items)
270
+	return CL_CLEAN;
271
+
272
+    return hm_scan(digest, virname, &root->hwild.hashes[type], type);
273
+}
274
+
275
+/* free both size-specific and agnostic hash sets */
250 276
 void hm_free(struct cli_matcher *root) {
251 277
     enum CLI_HASH_TYPE type;
252 278
 
... ...
@@ -262,7 +301,6 @@ void hm_free(struct cli_matcher *root) {
262 262
 
263 263
 	while((item = cli_htu32_next(ht, item))) {
264 264
 	    struct cli_sz_hash *szh = (struct cli_sz_hash *)item->data.as_ptr;
265
-	    unsigned int keylen = hashlen[type];
266 265
 
267 266
 	    mpool_free(root->mempool, szh->hash_array);
268 267
 	    while(szh->items)
... ...
@@ -272,5 +310,17 @@ void hm_free(struct cli_matcher *root) {
272 272
 	}
273 273
 	cli_htu32_free(ht, root->mempool);
274 274
     }
275
+
276
+    for(type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {
277
+	struct cli_sz_hash *szh = &root->hwild.hashes[type];
278
+
279
+	if(!szh->items)
280
+	    continue;
281
+
282
+	mpool_free(root->mempool, szh->hash_array);
283
+	while(szh->items)
284
+	    mpool_free(root->mempool, (void *)szh->virusnames[--szh->items]);
285
+	mpool_free(root->mempool, szh->virusnames);
286
+    }
275 287
 }
276 288
 
... ...
@@ -29,7 +29,7 @@
29 29
 #include "hashtab.h"
30 30
 
31 31
 enum CLI_HASH_TYPE {
32
-    CLI_HASH_MD5,
32
+    CLI_HASH_MD5 = 0,
33 33
     CLI_HASH_SHA1,
34 34
     CLI_HASH_SHA256,
35 35
 
... ...
@@ -37,6 +37,13 @@ enum CLI_HASH_TYPE {
37 37
     CLI_HASH_AVAIL_TYPES
38 38
 };
39 39
 
40
+#define CLI_HASHLEN_MD5 16
41
+#define CLI_HASHLEN_SHA1 20
42
+#define CLI_HASHLEN_SHA256 32
43
+#define CLI_HASHLEN_MAX 32
44
+
45
+#define cli_hashlength(t) ((t == CLI_HASH_MD5) ? CLI_HASHLEN_MD5 : ((t == CLI_HASH_SHA1) ? CLI_HASHLEN_SHA1 : CLI_HASHLEN_SHA256))
46
+
40 47
 struct cli_sz_hash {
41 48
     uint8_t *hash_array;
42 49
     const char **virusnames;
... ...
@@ -48,11 +55,17 @@ struct cli_hash_patt {
48 48
     struct cli_htu32 sizehashes[CLI_HASH_AVAIL_TYPES];
49 49
 };
50 50
 
51
+struct cli_hash_wild {
52
+    struct cli_sz_hash hashes[CLI_HASH_AVAIL_TYPES];
53
+};
54
+
51 55
 int hm_addhash_str(struct cli_matcher *root, const char *strhash, uint32_t size, const char *virusname);
52 56
 int hm_addhash_bin(struct cli_matcher *root, const void *binhash, enum CLI_HASH_TYPE type, uint32_t size, const char *virusname);
53 57
 void hm_flush(struct cli_matcher *root);
54 58
 int cli_hm_scan(const unsigned char *digest, uint32_t size, const char **virname, const struct cli_matcher *root, enum CLI_HASH_TYPE type);
59
+int cli_hm_scan_wild(const unsigned char *digest, const char **virname, const struct cli_matcher *root, enum CLI_HASH_TYPE type);
55 60
 int cli_hm_have_size(const struct cli_matcher *root, enum CLI_HASH_TYPE type, uint32_t size);
61
+int cli_hm_have_wild(const struct cli_matcher *root, enum CLI_HASH_TYPE type);
56 62
 void hm_free(struct cli_matcher *root);
57 63
 
58 64
 #endif
... ...
@@ -425,6 +425,10 @@ int cli_checkfp(unsigned char *digest, size_t size, cli_ctx *ctx)
425 425
 	cli_dbgmsg("cli_checkfp(md5): Found false positive detection (fp sig: %s), size: %d\n", virname, (int)size);
426 426
 	return CL_CLEAN;
427 427
     }
428
+    else if(cli_hm_scan_wild(digest, &virname, ctx->engine->hm_fp, CLI_HASH_MD5) == CL_VIRUS) {
429
+	cli_dbgmsg("cli_checkfp(md5): Found false positive detection (fp sig: %s), size: *\n", virname);
430
+	return CL_CLEAN;
431
+    }
428 432
 
429 433
     if(cli_debug_flag || ctx->engine->cb_hash) {
430 434
 	for(i = 0; i < 16; i++)
... ...
@@ -438,8 +442,11 @@ int cli_checkfp(unsigned char *digest, size_t size, cli_ctx *ctx)
438 438
 	do_dsig_check = strncmp("W32S.", cli_get_last_virus(ctx), 5);
439 439
 
440 440
     map = *ctx->fmap;
441
-    have_sha1 = cli_hm_have_size(ctx->engine->hm_fp, CLI_HASH_SHA1, size) || (cli_hm_have_size(ctx->engine->hm_fp, CLI_HASH_SHA1, 1) && do_dsig_check);
442
-    have_sha256 = cli_hm_have_size(ctx->engine->hm_fp, CLI_HASH_SHA256, size);
441
+    have_sha1 = cli_hm_have_size(ctx->engine->hm_fp, CLI_HASH_SHA1, size)
442
+	 || cli_hm_have_wild(ctx->engine->hm_fp, CLI_HASH_SHA1)
443
+	 || (cli_hm_have_size(ctx->engine->hm_fp, CLI_HASH_SHA1, 1) && do_dsig_check);
444
+    have_sha256 = cli_hm_have_size(ctx->engine->hm_fp, CLI_HASH_SHA256, size)
445
+	 || cli_hm_have_wild(ctx->engine->hm_fp, CLI_HASH_SHA256);
443 446
     if(have_sha1 || have_sha256) {
444 447
 	if((ptr = fmap_need_off_once(map, 0, size))) {
445 448
 	    if(have_sha1) {
... ...
@@ -450,6 +457,10 @@ int cli_checkfp(unsigned char *digest, size_t size, cli_ctx *ctx)
450 450
 		    cli_dbgmsg("cli_checkfp(sha1): Found false positive detection (fp sig: %s)\n", virname);
451 451
 		    return CL_CLEAN;
452 452
 		}
453
+		if(cli_hm_scan_wild(&shash1[SHA1_HASH_SIZE], &virname, ctx->engine->hm_fp, CLI_HASH_SHA1) == CL_VIRUS) {
454
+		    cli_dbgmsg("cli_checkfp(sha1): Found false positive detection (fp sig: %s)\n", virname);
455
+		    return CL_CLEAN;
456
+		}
453 457
 		if(do_dsig_check && cli_hm_scan(&shash1[SHA1_HASH_SIZE], 1, &virname, ctx->engine->hm_fp, CLI_HASH_SHA1) == CL_VIRUS) {
454 458
 		    cli_dbgmsg("cli_checkfp(sha1): Found false positive detection via catalog file\n");
455 459
 		    return CL_CLEAN;
... ...
@@ -463,6 +474,10 @@ int cli_checkfp(unsigned char *digest, size_t size, cli_ctx *ctx)
463 463
 		    cli_dbgmsg("cli_checkfp(sha256): Found false positive detection (fp sig: %s)\n", virname);
464 464
 		    return CL_CLEAN;
465 465
 		}
466
+		if(cli_hm_scan_wild(&shash256[SHA256_HASH_SIZE], &virname, ctx->engine->hm_fp, CLI_HASH_SHA256) == CL_VIRUS) {
467
+		    cli_dbgmsg("cli_checkfp(sha256): Found false positive detection (fp sig: %s)\n", virname);
468
+		    return CL_CLEAN;
469
+		}
466 470
 	    }
467 471
 	}
468 472
     }
... ...
@@ -771,13 +786,15 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
771 771
 	    memcpy(digest[CLI_HASH_MD5], refhash, 16);
772 772
 	}
773 773
 
774
-	if(cli_hm_have_size(hdb, CLI_HASH_SHA1, map->len) || cli_hm_have_size(fp, CLI_HASH_SHA1, map->len)) {
774
+	if(cli_hm_have_size(hdb, CLI_HASH_SHA1, map->len) || cli_hm_have_wild(hdb, CLI_HASH_SHA1)
775
+		|| cli_hm_have_size(fp, CLI_HASH_SHA1, map->len) || cli_hm_have_wild(fp, CLI_HASH_SHA1) ) {
775 776
 	    SHA1Init(&sha1ctx);
776 777
 	    compute_hash[CLI_HASH_SHA1] = 1;
777 778
 	} else
778 779
 	    compute_hash[CLI_HASH_SHA1] = 0;
779 780
 
780
-	if(cli_hm_have_size(hdb, CLI_HASH_SHA256, map->len) || cli_hm_have_size(fp, CLI_HASH_SHA256, map->len)) {
781
+	if(cli_hm_have_size(hdb, CLI_HASH_SHA256, map->len) || cli_hm_have_wild(hdb, CLI_HASH_SHA256)
782
+		|| cli_hm_have_size(fp, CLI_HASH_SHA256, map->len) || cli_hm_have_wild(fp, CLI_HASH_SHA256)) {
781 783
 	    sha256_init(&sha256ctx);
782 784
 	    compute_hash[CLI_HASH_SHA256] = 1;
783 785
 	} else
... ...
@@ -871,25 +888,55 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
871 871
 
872 872
 	virname = NULL;
873 873
 	for(hashtype = CLI_HASH_MD5; hashtype < CLI_HASH_AVAIL_TYPES; hashtype++) {
874
-	    if(compute_hash[hashtype] &&
875
-	       (ret = cli_hm_scan(digest[hashtype], map->len, &virname, hdb, hashtype)) == CL_VIRUS) {
876
-		if(fp) {
877
-		    for(hashtype2 = CLI_HASH_MD5; hashtype2 < CLI_HASH_AVAIL_TYPES; hashtype2++) {
878
-			if(compute_hash[hashtype2] &&
879
-			   cli_hm_scan(digest[hashtype2], map->len, NULL, fp, hashtype2) == CL_VIRUS) {
880
-			    ret = CL_CLEAN;
881
-			    break;
882
-			}
874
+	    const char * virname_w = NULL;
875
+	    int found = 0;
876
+
877
+	    /* If no hash, skip to next type */
878
+	    if(!compute_hash[hashtype])
879
+		continue;
880
+
881
+	    /* Do hash scan */
882
+	    if((ret = cli_hm_scan(digest[hashtype], map->len, &virname, hdb, hashtype)) == CL_VIRUS) {
883
+		found += 1;
884
+	    }
885
+	    if(!found || SCAN_ALL) {
886
+		if ((ret = cli_hm_scan_wild(digest[hashtype], &virname_w, hdb, hashtype)) == CL_VIRUS)
887
+		    found += 2;
888
+	    }
889
+
890
+	    /* If found, do immediate hash-only FP check */
891
+	    if (found && fp) {
892
+		for(hashtype2 = CLI_HASH_MD5; hashtype2 < CLI_HASH_AVAIL_TYPES; hashtype2++) {
893
+		    if(!compute_hash[hashtype2])
894
+			continue;
895
+		    if(cli_hm_scan(digest[hashtype2], map->len, NULL, fp, hashtype2) == CL_VIRUS) {
896
+			found = 0;
897
+			ret = CL_CLEAN;
898
+			break;
883 899
 		    }
884
-		}
885
-		if (ret == CL_VIRUS) {
886
-		    viruses_found++;
887
-		    cli_append_virus(ctx, virname);
888
-		    if (!SCAN_ALL)
900
+		    else if(cli_hm_scan_wild(digest[hashtype2], NULL, fp, hashtype2) == CL_VIRUS) {
901
+			found = 0;
902
+			ret = CL_CLEAN;
889 903
 			break;
904
+		    }
890 905
 		}
906
+	    }
907
+
908
+	    /* If matched size-based hash ... */
909
+	    if (found % 2) {
910
+		viruses_found++;
911
+		cli_append_virus(ctx, virname);
912
+		if (!SCAN_ALL)
913
+		    break;
891 914
 		virname = NULL;
892 915
 	    }
916
+	    /* If matched size-agnostic hash ... */
917
+	    if (found > 1) {
918
+		viruses_found++;
919
+		cli_append_virus(ctx, virname_w);
920
+		if (!SCAN_ALL)
921
+		    break;
922
+	    }
893 923
 	}
894 924
     }
895 925
 
... ...
@@ -93,6 +93,7 @@ struct cli_matcher {
93 93
 
94 94
     /* HASH */
95 95
     struct cli_hash_patt hm;
96
+    struct cli_hash_wild hwild;
96 97
 
97 98
     /* Extended Aho-Corasick */
98 99
     uint32_t ac_partsigs, ac_nodes, ac_patterns, ac_lsigs;
... ...
@@ -390,27 +390,6 @@ void findres(uint32_t by_type, uint32_t by_name, uint32_t res_rva, fmap_t *map,
390 390
     }
391 391
 }
392 392
 
393
-static unsigned int cli_md5sect(fmap_t *map, struct cli_exe_section *s, unsigned char *digest) {
394
-    const void *hashme;
395
-    cli_md5_ctx md5;
396
-
397
-    if (s->rsz > CLI_MAX_ALLOCATION) {
398
-	cli_dbgmsg("cli_md5sect: skipping md5 calculation for too big section\n");
399
-	return 0;
400
-    }
401
-
402
-    if(!s->rsz) return 0;
403
-    if(!(hashme=fmap_need_off_once(map, s->raw, s->rsz))) {
404
-	cli_dbgmsg("cli_md5sect: unable to read section data\n");
405
-	return 0;
406
-    }
407
-
408
-    cli_md5_init(&md5);
409
-    cli_md5_update(&md5, hashme, s->rsz);
410
-    cli_md5_final(digest, &md5);
411
-    return 1;
412
-}
413
-
414 393
 static void cli_parseres_special(uint32_t base, uint32_t rva, fmap_t *map, struct cli_exe_section *exe_sections, uint16_t nsections, size_t fsize, uint32_t hdr_size, unsigned int level, uint32_t type, unsigned int *maxres, struct swizz_stats *stats) {
415 394
     unsigned int err = 0, i;
416 395
     const uint8_t *resdir;
... ...
@@ -502,6 +481,100 @@ static void cli_parseres_special(uint32_t base, uint32_t rva, fmap_t *map, struc
502 502
     fmap_unneed_ptr(map, oentry, entries*8);
503 503
 }
504 504
 
505
+static unsigned int cli_hashsect(fmap_t *map, struct cli_exe_section *s, unsigned char **digest, int * foundhash, int * foundwild)
506
+{
507
+    const void *hashme;
508
+    cli_md5_ctx md5;
509
+    SHA1Context sha1ctx;
510
+    SHA256_CTX sha256ctx;
511
+
512
+    if (s->rsz > CLI_MAX_ALLOCATION) {
513
+        cli_dbgmsg("cli_hashsect: skipping hash calculation for too big section\n");
514
+        return 0;
515
+    }
516
+
517
+    if(!s->rsz) return 0;
518
+    if(!(hashme=fmap_need_off_once(map, s->raw, s->rsz))) {
519
+        cli_dbgmsg("cli_hashsect: unable to read section data\n");
520
+        return 0;
521
+    }
522
+
523
+    if(foundhash[CLI_HASH_MD5] || foundwild[CLI_HASH_MD5]) {
524
+        cli_md5_init(&md5);
525
+        cli_md5_update(&md5, hashme, s->rsz);
526
+        cli_md5_final(digest[CLI_HASH_MD5], &md5);
527
+    }
528
+    if(foundhash[CLI_HASH_SHA1] || foundwild[CLI_HASH_SHA1]) {
529
+        SHA1Init(&sha1ctx);
530
+        SHA1Update(&sha1ctx, hashme, s->rsz);
531
+        SHA1Final(&sha1ctx, digest[CLI_HASH_SHA1]);
532
+    }
533
+    if(foundhash[CLI_HASH_SHA256] || foundwild[CLI_HASH_SHA256]) {
534
+        sha256_init(&sha256ctx);
535
+        sha256_update(&sha256ctx, hashme, s->rsz);
536
+        sha256_final(&sha256ctx, digest[CLI_HASH_SHA256]);
537
+    }
538
+
539
+    return 1;
540
+}
541
+
542
+/* check hash section sigs */
543
+static int scan_pe_mdb (cli_ctx * ctx, struct cli_exe_section *exe_section)
544
+{
545
+    struct cli_matcher * mdb_sect = ctx->engine->hm_mdb;
546
+    unsigned char * hashset[CLI_HASH_AVAIL_TYPES];
547
+    const char * virname = NULL;
548
+    int foundsize[CLI_HASH_AVAIL_TYPES];
549
+    int foundwild[CLI_HASH_AVAIL_TYPES];
550
+    enum CLI_HASH_TYPE type;
551
+    int ret = CL_CLEAN;
552
+ 
553
+    /* pick hashtypes to generate */
554
+    for(type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {
555
+        foundsize[type] = cli_hm_have_size(mdb_sect, type, exe_section->rsz);
556
+        foundwild[type] = cli_hm_have_wild(mdb_sect, type);
557
+        if(foundsize[type] || foundwild[type]) {
558
+            hashset[type] = cli_malloc(cli_hashlength(type));
559
+            if(!hashset[type]) {
560
+                cli_errmsg("scan_pe: cli_malloc failed!\n");
561
+                for(; type > 0;)
562
+                    free(hashset[--type]);
563
+                return CL_EMEM;
564
+            }
565
+        }
566
+        else {
567
+            hashset[type] = NULL;
568
+        }
569
+    }
570
+
571
+    /* Generate hashes */
572
+    cli_hashsect(*ctx->fmap, exe_section, hashset, foundsize, foundwild);
573
+
574
+    /* Do scans */
575
+    for(type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {
576
+       if(foundsize[type] && cli_hm_scan(hashset[type], exe_section->rsz, &virname, mdb_sect, type) == CL_VIRUS) {
577
+            cli_append_virus(ctx, virname);
578
+            if (!SCAN_ALL) {
579
+                for(type = CLI_HASH_AVAIL_TYPES; type > 0;)
580
+                    free(hashset[--type]);
581
+                return CL_VIRUS;
582
+            }
583
+            ret = CL_VIRUS;
584
+       }
585
+       if(foundwild[type] && cli_hm_scan_wild(hashset[type], &virname, mdb_sect, type) == CL_VIRUS) {
586
+            cli_append_virus(ctx, virname);
587
+            if (!SCAN_ALL) {
588
+                for(type = CLI_HASH_AVAIL_TYPES; type > 0;)
589
+                    free(hashset[--type]);
590
+                return CL_VIRUS;
591
+            }
592
+            ret = CL_VIRUS;
593
+       }
594
+    }
595
+
596
+    return ret;
597
+}
598
+
505 599
 int cli_scanpe(cli_ctx *ctx)
506 600
 {
507 601
 	uint16_t e_magic; /* DOS signature ("MZ") */
... ...
@@ -528,7 +601,7 @@ int cli_scanpe(cli_ctx *ctx)
528 528
 	size_t fsize;
529 529
 	uint32_t valign, falign, hdr_size, j;
530 530
 	struct cli_exe_section *exe_sections;
531
-	struct cli_matcher *md5_sect;
531
+	struct cli_matcher *mdb_sect;
532 532
 	char timestr[32];
533 533
 	struct pe_image_data_dir *dirs;
534 534
 	struct cli_bc_ctx *bc_ctx;
... ...
@@ -977,25 +1050,16 @@ int cli_scanpe(cli_ctx *ctx)
977 977
 
978 978
 	    if(SCAN_ALGO && (DCONF & PE_CONF_POLIPOS) && !*sname && exe_sections[i].vsz > 40000 && exe_sections[i].vsz < 70000 && exe_sections[i].chr == 0xe0000060) polipos = i;
979 979
 
980
-	    /* check MD5 section sigs */
981
-	    md5_sect = ctx->engine->hm_mdb;
982
-	    if((DCONF & PE_CONF_MD5SECT) && md5_sect) {
983
-		unsigned char md5_dig[16];
984
-		if(cli_hm_have_size(md5_sect, CLI_HASH_MD5, exe_sections[i].rsz) && 
985
-		   cli_md5sect(map, &exe_sections[i], md5_dig) &&
986
-		   cli_hm_scan(md5_dig, exe_sections[i].rsz, &virname, md5_sect, CLI_HASH_MD5) == CL_VIRUS) {
987
-		    cli_append_virus(ctx, virname);
988
-		    if(cli_hm_scan(md5_dig, fsize, NULL, ctx->engine->hm_fp, CLI_HASH_MD5) != CL_VIRUS) {
989
-			if (!SCAN_ALL) {
990
-			    free(section_hdr);
991
-			    free(exe_sections);
992
-			    return CL_VIRUS;
993
-			}
994
-		    }
995
-		    viruses_found++;
996
-		}
980
+	    /* check hash section sigs */
981
+	    if((DCONF & PE_CONF_MD5SECT) && ctx->engine->hm_mdb) {
982
+	        ret = scan_pe_mdb(ctx, &exe_sections[i]);
983
+	        if (ret != CL_CLEAN) {
984
+	            cli_errmsg("scan_pe: scan_pe_mdb failed: %d!\n", ret);
985
+	            free(section_hdr);
986
+	            free(exe_sections);
987
+	            return ret;
988
+	        }
997 989
 	    }
998
-	    
999 990
 	}
1000 991
 
1001 992
 	if (exe_sections[i].urva>>31 || exe_sections[i].uvsz>>31 || (exe_sections[i].rsz && exe_sections[i].uraw>>31) || exe_sections[i].ursz>>31) {
... ...
@@ -1871,13 +1935,17 @@ int cli_scanpe(cli_ctx *ctx)
1871 1871
 
1872 1872
 	    if(epbuff[1] != '\xbe' || skew <= 0 || skew > 0xfff) { /* FIXME: legit skews?? */
1873 1873
 		skew = 0; 
1874
-		if(upxfn(src, ssize, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep) >= 0)
1875
-		    upx_success = 1;
1876
-
1877 1874
 	    } else {
1878 1875
 		cli_dbgmsg("UPX: UPX1 seems skewed by %d bytes\n", skew);
1879
-		if(upxfn(src + skew, ssize - skew, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep-skew) >= 0 || upxfn(src, ssize, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep) >= 0)
1880
-		    upx_success = 1;
1876
+	    }
1877
+
1878
+	    /* Try skewed first (skew may be zero) */
1879
+	    if(upxfn(src + skew, ssize - skew, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep-skew) >= 0) {
1880
+		upx_success = 1;
1881
+	    }
1882
+	    /* If skew not successful and non-zero, try no skew */
1883
+	    else if(skew && (upxfn(src, ssize, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep) >= 0)) {
1884
+		upx_success = 1;
1881 1885
 	    }
1882 1886
 
1883 1887
 	    if(upx_success)
... ...
@@ -2042,7 +2110,7 @@ int cli_scanpe(cli_ctx *ctx)
2042 2042
 	CLI_UNPSIZELIMITS("PEspin", fsize);
2043 2043
 
2044 2044
 	if((spinned = (char *) cli_malloc(fsize)) == NULL) {
2045
-        cli_errmsg("PESping: Unable to allocate memory for spinned %u\n", fsize);
2045
+        cli_errmsg("PESping: Unable to allocate memory for spinned %lu\n", (unsigned long)fsize);
2046 2046
 	    free(exe_sections);
2047 2047
 	    return CL_EMEM;
2048 2048
 	}
... ...
@@ -2106,7 +2174,7 @@ int cli_scanpe(cli_ctx *ctx)
2106 2106
 	    char *spinned;
2107 2107
 
2108 2108
 	    if((spinned = (char *) cli_malloc(fsize)) == NULL) {
2109
-            cli_errmsg("yc: Unable to allocate memory for spinned %u\n", fsize);
2109
+            cli_errmsg("yC: Unable to allocate memory for spinned %lu\n", (unsigned long)fsize);
2110 2110
 	      free(exe_sections);
2111 2111
 	      return CL_EMEM;
2112 2112
 	    }
... ...
@@ -1914,13 +1914,14 @@ static int cli_loadign(FILE *fs, struct cl_engine *engine, unsigned int options,
1914 1914
 #define MD5_TOKENS 5
1915 1915
 static int cli_loadhash(FILE *fs, struct cl_engine *engine, unsigned int *signo, unsigned int mode, unsigned int options, struct cli_dbio *dbio, const char *dbname)
1916 1916
 {
1917
-	const char *tokens[MD5_TOKENS + 1];
1918
-	char buffer[FILEBUFF], *buffer_cpy = NULL;
1919
-	const char *pt, *virname;
1920
-	int ret = CL_SUCCESS;
1921
-	unsigned int size_field = 1, md5_field = 0, line = 0, sigs = 0, tokens_count;
1922
-	struct cli_matcher *db;
1923
-	unsigned long size;
1917
+    const char *tokens[MD5_TOKENS + 1];
1918
+    char buffer[FILEBUFF], *buffer_cpy = NULL;
1919
+    const char *pt, *virname;
1920
+    int ret = CL_SUCCESS;
1921
+    unsigned int size_field = 1, md5_field = 0, line = 0, sigs = 0, tokens_count;
1922
+    unsigned int req_fl = 0; 
1923
+    struct cli_matcher *db;
1924
+    unsigned long size;
1924 1925
 
1925 1926
 
1926 1927
     if(mode == MD5_MDB) {
... ...
@@ -1966,7 +1967,7 @@ static int cli_loadhash(FILE *fs, struct cl_engine *engine, unsigned int *signo,
1966 1966
 	    break;
1967 1967
 	}
1968 1968
 	if(tokens_count > MD5_TOKENS - 2) {
1969
-	    unsigned int req_fl = atoi(tokens[MD5_TOKENS - 2]);
1969
+	    req_fl = atoi(tokens[MD5_TOKENS - 2]);
1970 1970
 
1971 1971
 	    if(tokens_count > MD5_TOKENS) {
1972 1972
 		ret = CL_EMALFDB;
... ...
@@ -1976,17 +1977,28 @@ static int cli_loadhash(FILE *fs, struct cl_engine *engine, unsigned int *signo,
1976 1976
 	    if(cl_retflevel() < req_fl)
1977 1977
 		continue;
1978 1978
 	    if(tokens_count == MD5_TOKENS) {
1979
-		req_fl = atoi(tokens[MD5_TOKENS - 1]);
1980
-		if(cl_retflevel() > req_fl)
1979
+		int max_fl = atoi(tokens[MD5_TOKENS - 1]);
1980
+		if(cl_retflevel() > max_fl)
1981 1981
 		    continue;
1982 1982
 	    }
1983 1983
 	}
1984 1984
 
1985
-	size = strtoul(tokens[size_field], (char **)&pt, 10);
1986
-	if(*pt || !size || size >= 0xffffffff) {
1987
-	    cli_errmsg("cli_loadhash: Invalid value for the size field\n");
1988
-	    ret = CL_EMALFDB;
1989
-	    break;
1985
+	if((mode == MD5_MDB) || strcmp(tokens[size_field],"*")) {
1986
+	    size = strtoul(tokens[size_field], (char **)&pt, 10);
1987
+	    if(*pt || !size || size >= 0xffffffff) {
1988
+		cli_errmsg("cli_loadhash: Invalid value for the size field\n");
1989
+		ret = CL_EMALFDB;
1990
+		break;
1991
+	    }
1992
+	}
1993
+	else {
1994
+	    size = 0;
1995
+	    if((tokens_count < MD5_TOKENS - 1) || (req_fl < 73)) {
1996
+		cli_errmsg("cli_loadhash: Minimum FLEVEL field must be at least 73 for wildcard size hash signatures."
1997
+			" For reference, running FLEVEL is %d\n", cl_retflevel());
1998
+		ret = CL_EMALFDB;
1999
+		break;
2000
+	    }
1990 2001
 	}
1991 2002
 
1992 2003
 	pt = tokens[2]; /* virname */
... ...
@@ -1348,7 +1348,7 @@ static int listdb(const char *filename, const regex_t *regex)
1348 1348
             line++;
1349 1349
             mprintf("%s\n", buffer);
1350 1350
         }
1351
-    } else if(cli_strbcasestr(filename, ".hdb") || cli_strbcasestr(filename, ".hdu") || cli_strbcasestr(filename, ".mdb") || cli_strbcasestr(filename, ".mdu")) { /* hash database */
1351
+    } else if(cli_strbcasestr(filename, ".hdb") || cli_strbcasestr(filename, ".hdu") || cli_strbcasestr(filename, ".mdb") || cli_strbcasestr(filename, ".mdu") || cli_strbcasestr(filename, ".hsb") || cli_strbcasestr(filename, ".hsu") || cli_strbcasestr(filename, ".msb") || cli_strbcasestr(filename, ".msu")) { /* hash database */
1352 1352
 
1353 1353
 	while(fgets(buffer, FILEBUFF, fh)) {
1354 1354
 	    cli_chomp(buffer);