| ... | ... |
@@ -1,3 +1,8 @@ |
| 1 |
+Sun Aug 3 23:09:44 CEST 2008 (acab) |
|
| 2 |
+------------------------------------ |
|
| 3 |
+ * libclamav/uniq: faster md5 lookup |
|
| 4 |
+ * sigtool: sync |
|
| 5 |
+ |
|
| 1 | 6 |
Sun Aug 3 16:12:17 CEST 2008 (acab) |
| 2 | 7 |
------------------------------------ |
| 3 | 8 |
* libclamav: use md5 based lookup for ole2/vba instead of hashtab (bb#1071) |
| ... | ... |
@@ -24,118 +24,21 @@ |
| 24 | 24 |
#include "clamav-config.h" |
| 25 | 25 |
#endif |
| 26 | 26 |
|
| 27 |
+#include <stdlib.h> |
|
| 28 |
+#if HAVE_STRING_H |
|
| 29 |
+#include <string.h> |
|
| 30 |
+#endif |
|
| 31 |
+ |
|
| 27 | 32 |
#include "uniq.h" |
| 33 |
+#include "others.h" |
|
| 28 | 34 |
#include "md5.h" |
| 29 | 35 |
|
| 30 |
-#if 0 |
|
| 31 | 36 |
struct uniq *uniq_init(uint32_t count) {
|
| 32 | 37 |
struct uniq *U; |
| 33 |
- uint32_t i; |
|
| 34 | 38 |
|
| 35 | 39 |
if(!count) return NULL; |
| 36 | 40 |
U = cli_calloc(1, sizeof(*U)); |
| 37 | 41 |
if(!U) return NULL; |
| 38 |
- if(cli_ac_init(&U->matcher, 16, 16)) {
|
|
| 39 |
- uniq_free(U); |
|
| 40 |
- return NULL; |
|
| 41 |
- } |
|
| 42 |
- U->custs = cli_calloc(count, sizeof(U->custs)); |
|
| 43 |
- if(!U->custs) {
|
|
| 44 |
- uniq_free(U); |
|
| 45 |
- return NULL; |
|
| 46 |
- } |
|
| 47 |
- U->patts = cli_calloc(count, sizeof(U->patts)); |
|
| 48 |
- if(!U->patts) {
|
|
| 49 |
- uniq_free(U); |
|
| 50 |
- return NULL; |
|
| 51 |
- } |
|
| 52 |
- U->md5s = cli_malloc(count*sizeof(U->md5s)); |
|
| 53 |
- if(!U->md5s) {
|
|
| 54 |
- uniq_free(U); |
|
| 55 |
- return NULL; |
|
| 56 |
- } |
|
| 57 |
- |
|
| 58 |
- U->entries = count; |
|
| 59 |
- |
|
| 60 |
- for(i=0; i<count; i++) {
|
|
| 61 |
- U->patts[i].pattern = U->md5s[i].md5; |
|
| 62 |
- U->patts[i].length = 16; |
|
| 63 |
- U->patts[i].ch[0] = U->patts[i].ch[1] |= CLI_MATCH_IGNORE; |
|
| 64 |
- U->patts[i].customdata = &U->custs[i]; |
|
| 65 |
- } |
|
| 66 |
- |
|
| 67 |
- return U; |
|
| 68 |
-} |
|
| 69 |
- |
|
| 70 |
-void uniq_free(struct uniq *U) {
|
|
| 71 |
- uint32_t i; |
|
| 72 |
- U->matcher.ac_patterns = 0; /* don't free my arrays! */ |
|
| 73 |
- cli_ac_free(&U->matcher); |
|
| 74 |
- if(U->custs) free(U->custs); |
|
| 75 |
- if(U->patts) free(U->patts); |
|
| 76 |
- if(U->md5s) free(U->md5s); |
|
| 77 |
- free(U); |
|
| 78 |
-} |
|
| 79 |
- |
|
| 80 |
- |
|
| 81 |
-uint32_t uniq_add(struct uniq *U, const char *key, uint32_t key_len, char **rhash) {
|
|
| 82 |
- uint8_t digest[16]; |
|
| 83 |
- struct UNIQCUST *cust; |
|
| 84 |
- struct cli_ac_data mdata; |
|
| 85 |
- |
|
| 86 |
- cli_md5_ctx md5; |
|
| 87 |
- cli_md5_init(&md5); |
|
| 88 |
- cli_md5_update(&md5, key, key_len); |
|
| 89 |
- cli_md5_final(digest, &md5); |
|
| 90 |
- |
|
| 91 |
- cli_ac_initdata(&mdata, 0, 0, AC_DEFAULT_TRACKLEN); /* This can't fail as we don't have parts or lsigs */ |
|
| 92 |
- if (cli_ac_scanbuff(digest,16, NULL, (void *)&cust, NULL, &U->matcher, &mdata,0,0,-1,NULL,AC_SCAN_VIR,NULL)!=CL_VIRUS) {
|
|
| 93 |
- int i; |
|
| 94 |
- char HEX[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
|
|
| 95 |
- struct cli_ac_patt *patt = &U->patts[U->matcher.ac_patterns]; |
|
| 96 |
- |
|
| 97 |
- cust = patt->customdata; |
|
| 98 |
- for(i = 0; i < 16; i++) {
|
|
| 99 |
- cust->name[i*2] = HEX[digest[i]>>4 & 0xf]; |
|
| 100 |
- cust->name[i*2+1] = HEX[digest[i] & 0xf]; |
|
| 101 |
- patt->pattern[i] = digest[i]; |
|
| 102 |
- } |
|
| 103 |
- cli_ac_addpatt(&U->matcher,patt); /* FIXME this can fail */ |
|
| 104 |
- cli_ac_buildtrie(&U->matcher); |
|
| 105 |
- } |
|
| 106 |
- |
|
| 107 |
- cust->count++; |
|
| 108 |
- if(rhash) *rhash = cust->name; |
|
| 109 |
- return cust->count; |
|
| 110 |
-} |
|
| 111 |
- |
|
| 112 |
-uint32_t uniq_get(struct uniq *U, const char *key, uint32_t key_len, char **rhash) {
|
|
| 113 |
- uint8_t digest[16]; |
|
| 114 |
- struct UNIQCUST *cust; |
|
| 115 |
- struct cli_ac_data mdata; |
|
| 116 |
- |
|
| 117 |
- cli_md5_ctx md5; |
|
| 118 |
- cli_md5_init(&md5); |
|
| 119 |
- cli_md5_update(&md5, key, key_len); |
|
| 120 |
- cli_md5_final(digest, &md5); |
|
| 121 |
- |
|
| 122 |
- cli_ac_initdata(&mdata, 0, 0, AC_DEFAULT_TRACKLEN); /* This can't fail as we don't have parts or lsigs */ |
|
| 123 |
- if (cli_ac_scanbuff(digest,16, NULL, (void *)&cust, NULL, &U->matcher, &mdata,0,0,-1,NULL,AC_SCAN_VIR,NULL)!=CL_VIRUS) |
|
| 124 |
- return 0; |
|
| 125 |
- |
|
| 126 |
- if(rhash) *rhash = cust->name; |
|
| 127 |
- return cust->count; |
|
| 128 |
-} |
|
| 129 |
- |
|
| 130 |
-#else |
|
| 131 |
-#include <string.h> |
|
| 132 |
- |
|
| 133 |
-struct uniq *uniq_init(uint32_t count) {
|
|
| 134 |
- struct uniq *U; |
|
| 135 |
- |
|
| 136 |
- if(!count) return NULL; |
|
| 137 |
- U = cli_malloc(sizeof(*U)); |
|
| 138 |
- if(!U) return NULL; |
|
| 139 | 42 |
|
| 140 | 43 |
U->md5s = cli_malloc(count * sizeof(*U->md5s)); |
| 141 | 44 |
if(!U->md5s) {
|
| ... | ... |
@@ -143,7 +46,6 @@ struct uniq *uniq_init(uint32_t count) {
|
| 143 | 143 |
return NULL; |
| 144 | 144 |
} |
| 145 | 145 |
|
| 146 |
- U->items = 0; |
|
| 147 | 146 |
return U; |
| 148 | 147 |
} |
| 149 | 148 |
|
| ... | ... |
@@ -156,50 +58,59 @@ uint32_t uniq_add(struct uniq *U, const char *key, uint32_t key_len, char **rhas |
| 156 | 156 |
unsigned int i; |
| 157 | 157 |
uint8_t digest[16]; |
| 158 | 158 |
cli_md5_ctx md5; |
| 159 |
- struct UNIQMD5 *m; |
|
| 159 |
+ struct UNIQMD5 *m = NULL; |
|
| 160 | 160 |
|
| 161 | 161 |
cli_md5_init(&md5); |
| 162 | 162 |
cli_md5_update(&md5, key, key_len); |
| 163 | 163 |
cli_md5_final(digest, &md5); |
| 164 | 164 |
|
| 165 |
- for(i=0; i<U->items; i++) {
|
|
| 166 |
- if(memcmp(digest, U->md5s[i].md5, 16)) continue; |
|
| 167 |
- m = &U->md5s[i]; |
|
| 168 |
- break; |
|
| 169 |
- } |
|
| 165 |
+ if(U->items && U->md5s[U->idx[*digest]].md5[0]==*digest) |
|
| 166 |
+ for(m=&U->md5s[U->idx[*digest]]; m; m=m->next) |
|
| 167 |
+ if(!memcmp(&digest[1], &m->md5[1], 15)) break; |
|
| 170 | 168 |
|
| 171 |
- if(i==U->items) {
|
|
| 172 |
- char HEX[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
|
|
| 173 |
- m = &U->md5s[i]; |
|
| 169 |
+ if(!m) {
|
|
| 170 |
+ const char HEX[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
|
|
| 171 |
+ |
|
| 172 |
+ m = &U->md5s[U->items]; |
|
| 174 | 173 |
m->count = 0; |
| 174 |
+ |
|
| 175 |
+ if(U->items && U->md5s[U->idx[*digest]].md5[0]==*digest) |
|
| 176 |
+ m->next = &U->md5s[U->idx[*digest]]; |
|
| 177 |
+ else |
|
| 178 |
+ m->next = NULL; |
|
| 179 |
+ |
|
| 180 |
+ U->idx[*digest]=U->items; |
|
| 181 |
+ |
|
| 175 | 182 |
for(i = 0; i < 16; i++) {
|
| 176 | 183 |
m->name[i*2] = HEX[digest[i]>>4 & 0xf]; |
| 177 | 184 |
m->name[i*2+1] = HEX[digest[i] & 0xf]; |
| 178 |
- m->md5[i] = digest[i]; |
|
| 185 |
+ m->md5[i] = digest[i]; |
|
| 179 | 186 |
} |
| 180 | 187 |
m->name[32] = '\0'; |
| 181 | 188 |
} |
| 182 |
- |
|
| 189 |
+ |
|
| 183 | 190 |
U->items++; |
| 184 | 191 |
if(rhash) *rhash = m->name; |
| 185 | 192 |
return m->count++; |
| 186 | 193 |
} |
| 187 | 194 |
|
| 188 | 195 |
uint32_t uniq_get(struct uniq *U, const char *key, uint32_t key_len, char **rhash) {
|
| 189 |
- unsigned int i; |
|
| 190 | 196 |
uint8_t digest[16]; |
| 191 | 197 |
cli_md5_ctx md5; |
| 198 |
+ struct UNIQMD5 *m = NULL; |
|
| 192 | 199 |
|
| 193 | 200 |
cli_md5_init(&md5); |
| 194 | 201 |
cli_md5_update(&md5, key, key_len); |
| 195 | 202 |
cli_md5_final(digest, &md5); |
| 196 | 203 |
|
| 197 |
- for(i=0; i<U->items; i++) {
|
|
| 198 |
- if(memcmp(digest, U->md5s[i].md5, 16)) continue; |
|
| 199 |
- if(rhash) *rhash = U->md5s[i].name; |
|
| 200 |
- return U->md5s[i].count; |
|
| 204 |
+ if(!U->items || U->md5s[U->idx[*digest]].md5[0]!=*digest) |
|
| 205 |
+ return 0; |
|
| 206 |
+ |
|
| 207 |
+ for(m=&U->md5s[U->idx[*digest]]; m; m=m->next) {
|
|
| 208 |
+ if(memcmp(&digest[1], &m->md5[1], 15)) continue; |
|
| 209 |
+ if(rhash) *rhash = m->name; |
|
| 210 |
+ return m->count; |
|
| 201 | 211 |
} |
| 202 | 212 |
|
| 203 | 213 |
return 0; |
| 204 | 214 |
} |
| 205 |
-#endif |
| ... | ... |
@@ -23,46 +23,25 @@ |
| 23 | 23 |
#ifndef _UNIQ_H |
| 24 | 24 |
#define _UNIQ_H |
| 25 | 25 |
|
| 26 |
-#include "matcher.h" |
|
| 27 | 26 |
#include "cltypes.h" |
| 28 | 27 |
|
| 29 |
-#if 0 |
|
| 30 |
-struct UNIQCUST {
|
|
| 31 |
- char name[33]; |
|
| 32 |
- uint32_t count; |
|
| 33 |
-}; |
|
| 34 |
- |
|
| 35 |
-struct UNIQMD5 {
|
|
| 36 |
- uint16_t md5[16]; |
|
| 37 |
-}; |
|
| 38 |
- |
|
| 39 |
-/* A basic storage for unique IDs */ |
|
| 40 |
-struct uniq {
|
|
| 41 |
- struct cli_matcher matcher; |
|
| 42 |
- struct cli_ac_patt *patts; |
|
| 43 |
- struct UNIQMD5 *md5s; |
|
| 44 |
- struct UNIQCUST *custs; |
|
| 45 |
- uint32_t entries; |
|
| 46 |
-}; |
|
| 47 |
- |
|
| 48 |
-#else |
|
| 49 |
- |
|
| 50 | 28 |
struct UNIQMD5 {
|
| 29 |
+ struct UNIQMD5 *next; |
|
| 51 | 30 |
uint32_t count; |
| 52 | 31 |
uint8_t md5[16]; |
| 53 | 32 |
char name[33]; |
| 54 | 33 |
}; |
| 55 | 34 |
|
| 56 | 35 |
struct uniq {
|
| 57 |
- uint32_t items; |
|
| 58 | 36 |
struct UNIQMD5 *md5s; |
| 37 |
+ uint32_t items; |
|
| 38 |
+ uint32_t idx[256]; |
|
| 59 | 39 |
}; |
| 60 | 40 |
|
| 61 |
-#endif |
|
| 62 |
- |
|
| 63 | 41 |
struct uniq *uniq_init(uint32_t); |
| 64 | 42 |
void uniq_free(struct uniq *); |
| 65 | 43 |
uint32_t uniq_add(struct uniq *, const char *, uint32_t, char **); |
| 66 | 44 |
uint32_t uniq_get(struct uniq *, const char *, uint32_t, char **); |
| 67 | 45 |
|
| 46 |
+ |
|
| 68 | 47 |
#endif |
| ... | ... |
@@ -1041,9 +1041,9 @@ int sigtool_vba_scandir (const char *dirname, int hex_output, struct uniq *U) |
| 1041 | 1041 |
DIR *dd; |
| 1042 | 1042 |
struct dirent *dent; |
| 1043 | 1043 |
struct stat statbuf; |
| 1044 |
- char *fullname, vbaname[1024]; |
|
| 1044 |
+ char *fullname, vbaname[1024], *hash; |
|
| 1045 | 1045 |
unsigned char *data; |
| 1046 |
- uint32_t hashcnt, hash; |
|
| 1046 |
+ uint32_t hashcnt; |
|
| 1047 | 1047 |
|
| 1048 | 1048 |
hashcnt = uniq_get(U, "_vba_project", 12, NULL); |
| 1049 | 1049 |
while(hashcnt--) {
|
| ... | ... |
@@ -1051,7 +1051,7 @@ int sigtool_vba_scandir (const char *dirname, int hex_output, struct uniq *U) |
| 1051 | 1051 |
|
| 1052 | 1052 |
for(i = 0; i < vba_project->count; i++) {
|
| 1053 | 1053 |
for(j = 0; j < vba_project->colls[i]; j++) {
|
| 1054 |
- snprintf(vbaname, 1024, "%s/%u_%u", vba_project->dir, vba_project->name[i], j); |
|
| 1054 |
+ snprintf(vbaname, 1024, "%s/%s_%u", vba_project->dir, vba_project->name[i], j); |
|
| 1055 | 1055 |
vbaname[sizeof(vbaname)-1] = '\0'; |
| 1056 | 1056 |
fd = open(vbaname, O_RDONLY|O_BINARY); |
| 1057 | 1057 |
if(fd == -1) continue; |
| ... | ... |
@@ -1077,7 +1077,7 @@ int sigtool_vba_scandir (const char *dirname, int hex_output, struct uniq *U) |
| 1077 | 1077 |
|
| 1078 | 1078 |
if((hashcnt = uniq_get(U, "powerpoint document", 19, &hash))) {
|
| 1079 | 1079 |
while(hashcnt--) {
|
| 1080 |
- snprintf(vbaname, 1024, "%s/%u_%u", dirname, hash, hashcnt); |
|
| 1080 |
+ snprintf(vbaname, 1024, "%s/%s_%u", dirname, hash, hashcnt); |
|
| 1081 | 1081 |
vbaname[sizeof(vbaname)-1] = '\0'; |
| 1082 | 1082 |
fd = open(vbaname, O_RDONLY|O_BINARY); |
| 1083 | 1083 |
if (fd == -1) continue; |
| ... | ... |
@@ -1093,7 +1093,7 @@ int sigtool_vba_scandir (const char *dirname, int hex_output, struct uniq *U) |
| 1093 | 1093 |
|
| 1094 | 1094 |
if ((hashcnt = uniq_get(U, "worddocument", 12, &hash))) {
|
| 1095 | 1095 |
while(hashcnt--) {
|
| 1096 |
- snprintf(vbaname, sizeof(vbaname), "%s/%u_%u", dirname, hash, hashcnt); |
|
| 1096 |
+ snprintf(vbaname, sizeof(vbaname), "%s/%s_%u", dirname, hash, hashcnt); |
|
| 1097 | 1097 |
vbaname[sizeof(vbaname)-1] = '\0'; |
| 1098 | 1098 |
fd = open(vbaname, O_RDONLY|O_BINARY); |
| 1099 | 1099 |
if (fd == -1) continue; |