git-svn: trunk@4050
Török Edvin authored on 2008/08/01 23:50:02... | ... |
@@ -52,7 +52,7 @@ |
52 | 52 |
#include "iana_cctld.h" |
53 | 53 |
#include "scanners.h" |
54 | 54 |
#include "md5.h" |
55 |
- |
|
55 |
+#include <assert.h> |
|
56 | 56 |
|
57 | 57 |
#define DOMAIN_REAL 1 |
58 | 58 |
#define DOMAIN_DISPLAY 0 |
... | ... |
@@ -739,6 +739,7 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs) |
739 | 739 |
|
740 | 740 |
if(!ctx->found_possibly_unwanted) |
741 | 741 |
*ctx->virname=NULL; |
742 |
+#if 0 |
|
742 | 743 |
FILE *f = fopen("/home/edwin/quarantine/urls","r"); |
743 | 744 |
if(!f) |
744 | 745 |
abort(); |
... | ... |
@@ -771,6 +772,7 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs) |
771 | 771 |
} |
772 | 772 |
fclose(f); |
773 | 773 |
return 0; |
774 |
+#endif |
|
774 | 775 |
for(i=0;i<hrefs->count;i++) |
775 | 776 |
if(hrefs->contents[i]) { |
776 | 777 |
struct url_check urls; |
... | ... |
@@ -1180,19 +1182,103 @@ static int whitelist_check(const struct cl_engine* engine,struct url_check* urls |
1180 | 1180 |
return whitelist_match(engine,urls->realLink.data,urls->displayLink.data,hostOnly); |
1181 | 1181 |
} |
1182 | 1182 |
|
1183 |
-static int hash_match(const struct regex_matcher *rlist, const char *url, size_t len) |
|
1183 |
+static int hash_match(const struct regex_matcher *rlist, const char *host, size_t hlen, const char *path, size_t plen) |
|
1184 | 1184 |
{ |
1185 |
- unsigned char md5_dig[16]; |
|
1186 |
- cli_md5_ctx md5; |
|
1185 |
+#if 0 |
|
1186 |
+ char s[1024]; |
|
1187 |
+ strncpy(s, host, hlen); |
|
1188 |
+ strncpy(s+hlen, path, plen); |
|
1189 |
+ s[hlen+plen] = '\0'; |
|
1190 |
+ cli_dbgmsg("hash lookup for: %s\n",s); |
|
1191 |
+#endif |
|
1192 |
+ if(rlist->hashes.bm_patterns) { |
|
1193 |
+ unsigned char md5_dig[16]; |
|
1194 |
+ cli_md5_ctx md5; |
|
1195 |
+ |
|
1196 |
+ cli_md5_init(&md5); |
|
1197 |
+ cli_md5_update(&md5, host, hlen); |
|
1198 |
+ cli_md5_update(&md5, path, plen); |
|
1199 |
+ cli_md5_final(md5_dig, &md5); |
|
1200 |
+ if(cli_bm_scanbuff(md5_dig, 16, NULL, &rlist->hashes,0,0,-1) == CL_VIRUS) { |
|
1201 |
+ return CL_VIRUS; |
|
1202 |
+ } |
|
1203 |
+ } |
|
1204 |
+ return CL_SUCCESS; |
|
1205 |
+} |
|
1187 | 1206 |
|
1188 |
- if(!rlist->hashes.bm_patterns) |
|
1189 |
- return CL_CLEAN; |
|
1207 |
+#define URL_MAX_LEN 1024 |
|
1208 |
+#define COMPONENTS 4 |
|
1209 |
+static int url_hash_match(const struct regex_matcher *rlist, const char *inurl, size_t len) |
|
1210 |
+{ |
|
1211 |
+ char urlbuff[URL_MAX_LEN+3];/* htmlnorm truncates at 1024 bytes + terminating null + slash + host end null */ |
|
1212 |
+ char *url; |
|
1213 |
+ const char *urlend = urlbuff + len; |
|
1214 |
+ char *host_begin; |
|
1215 |
+ size_t host_len, path_len; |
|
1216 |
+ char *path_begin; |
|
1217 |
+ const char *component; |
|
1218 |
+ const char *lp[COMPONENTS+1]; |
|
1219 |
+ size_t pp[COMPONENTS+2]; |
|
1220 |
+ size_t j, k, ji, ki; |
|
1221 |
+ |
|
1222 |
+ if(!inurl) |
|
1223 |
+ return CL_EMEM; |
|
1224 |
+ strncpy(urlbuff, inurl, URL_MAX_LEN); |
|
1225 |
+ urlbuff[URL_MAX_LEN] = urlbuff[URL_MAX_LEN+1] = urlbuff[URL_MAX_LEN+2] = '\0'; |
|
1226 |
+ url = urlbuff; |
|
1227 |
+ str_hex_to_char(&url, &urlend); |
|
1228 |
+ len = urlend - url; |
|
1229 |
+ host_begin = strchr(url,':'); |
|
1230 |
+ if(!host_begin) |
|
1231 |
+ return CL_PHISH_CLEAN; |
|
1232 |
+ ++host_begin; |
|
1233 |
+ while((host_begin < urlend) && *host_begin == '/') ++host_begin; |
|
1234 |
+ while(*host_begin == '.' && host_begin < urlend) ++host_begin; |
|
1235 |
+ host_len = strcspn(host_begin, ":/?"); |
|
1236 |
+ path_begin = host_begin + host_len; |
|
1237 |
+ if(host_len < len) { |
|
1238 |
+ memmove(path_begin + 2, path_begin + 1, len - host_len); |
|
1239 |
+ *path_begin++ = '/'; |
|
1240 |
+ *path_begin++ = '\0'; |
|
1241 |
+ } else path_begin = url+len; |
|
1242 |
+ if(url + len >= path_begin) { |
|
1243 |
+ path_len = url + len - path_begin + 1; |
|
1244 |
+ } else |
|
1245 |
+ path_len = 0; |
|
1246 |
+ str_make_lowercase(host_begin, host_len); |
|
1190 | 1247 |
|
1191 |
- cli_md5_init(&md5); |
|
1192 |
- cli_md5_update(&md5, url, len); |
|
1193 |
- cli_md5_final(md5_dig, &md5); |
|
1194 |
- if(cli_bm_scanbuff(md5_dig, 16, NULL, &rlist->hashes,0,0,-1) == CL_VIRUS) { |
|
1195 |
- return CL_VIRUS; |
|
1248 |
+ j=COMPONENTS; |
|
1249 |
+ component = strrchr(host_begin, '.'); |
|
1250 |
+ while(component && j > 0) { |
|
1251 |
+ do { |
|
1252 |
+ --component; |
|
1253 |
+ } while(*component != '.' && component > host_begin); |
|
1254 |
+ if(*component != '.') |
|
1255 |
+ component = NULL; |
|
1256 |
+ if(component) |
|
1257 |
+ lp[j--] = component + 1; |
|
1258 |
+ } |
|
1259 |
+ lp[j] = host_begin; |
|
1260 |
+ |
|
1261 |
+ pp[0] = path_len; |
|
1262 |
+ pp[1] = strcspn(path_begin, "?"); |
|
1263 |
+ if(pp[1] != pp[0]) k = 2; |
|
1264 |
+ else k = 1; |
|
1265 |
+ pp[k++] = 0; |
|
1266 |
+ while(k < COMPONENTS+2) { |
|
1267 |
+ const char *p = strchr(path_begin + pp[k-1] + 1, '/'); |
|
1268 |
+ if(p && p > path_begin) { |
|
1269 |
+ pp[k++] = p - path_begin; |
|
1270 |
+ } else |
|
1271 |
+ break; |
|
1272 |
+ } |
|
1273 |
+ |
|
1274 |
+ for(ji=j;ji < COMPONENTS+1; ji++) { |
|
1275 |
+ for(ki=0;ki < k; ki++) { |
|
1276 |
+ assert(pp[ki] < path_len); |
|
1277 |
+ if(hash_match(rlist, lp[ji], host_begin + host_len - lp[ji] + 1, path_begin, pp[ki]) == CL_VIRUS) |
|
1278 |
+ return CL_VIRUS; |
|
1279 |
+ } |
|
1196 | 1280 |
} |
1197 | 1281 |
return CL_SUCCESS; |
1198 | 1282 |
} |
... | ... |
@@ -1214,6 +1300,16 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url |
1214 | 1214 |
if(!strcmp(urls->realLink.data,urls->displayLink.data)) |
1215 | 1215 |
return CL_PHISH_CLEAN;/* displayed and real URL are identical -> clean */ |
1216 | 1216 |
|
1217 |
+ if(!isURL(pchk, urls->realLink.data, 0)) { |
|
1218 |
+ cli_dbgmsg("Real 'url' is not url:%s\n",urls->realLink.data); |
|
1219 |
+ return CL_PHISH_CLEAN; |
|
1220 |
+ } |
|
1221 |
+ |
|
1222 |
+ if(url_hash_match(engine->domainlist_matcher, urls->realLink.data, strlen(urls->realLink.data)) == CL_VIRUS) { |
|
1223 |
+ cli_dbgmsg("Hash matched for: %s\n", urls->realLink.data); |
|
1224 |
+ return CL_PHISH_HASH; |
|
1225 |
+ } |
|
1226 |
+ |
|
1217 | 1227 |
if((rc = cleanupURLs(urls))) { |
1218 | 1228 |
/* it can only return an error, or say its clean; |
1219 | 1229 |
* it is not allowed to decide it is phishing */ |
... | ... |
@@ -1223,7 +1319,7 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url |
1223 | 1223 |
cli_dbgmsg("Phishcheck:URL after cleanup: %s->%s\n", urls->realLink.data, |
1224 | 1224 |
urls->displayLink.data); |
1225 | 1225 |
|
1226 |
- if((!isURL(pchk, urls->displayLink.data, 1) || !isURL(pchk, urls->realLink.data, 0) ) && |
|
1226 |
+ if((!isURL(pchk, urls->displayLink.data, 1) ) && |
|
1227 | 1227 |
( (phishy&PHISHY_NUMERIC_IP && !isNumericURL(pchk, urls->displayLink.data)) || |
1228 | 1228 |
!(phishy&PHISHY_NUMERIC_IP))) { |
1229 | 1229 |
cli_dbgmsg("Displayed 'url' is not url:%s\n",urls->displayLink.data); |
... | ... |
@@ -1233,10 +1329,6 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url |
1233 | 1233 |
if(whitelist_check(engine, urls, 0)) |
1234 | 1234 |
return CL_PHISH_CLEAN;/* if url is whitelisted don't perform further checks */ |
1235 | 1235 |
|
1236 |
- if(hash_match(engine->domainlist_matcher, urls->realLink.data, strlen(urls->realLink.data)) == CL_VIRUS) { |
|
1237 |
- cli_dbgmsg("Hash matched for: %s\n", urls->realLink.data); |
|
1238 |
- return CL_PHISH_HASH; |
|
1239 |
- } |
|
1240 | 1236 |
url_check_init(&host_url); |
1241 | 1237 |
|
1242 | 1238 |
if((rc = url_get_host(pchk, urls, &host_url, DOMAIN_DISPLAY, &phishy))) { |
... | ... |
@@ -1324,6 +1416,8 @@ static const char* phishing_ret_toString(enum phish_status rc) |
1324 | 1324 |
return "URLs are way too different"; |
1325 | 1325 |
case CL_PHISH_HEX_URL: |
1326 | 1326 |
return "Embedded hex urls"; |
1327 |
+ case CL_PHISH_HASH: |
|
1328 |
+ return "Blacklisted"; |
|
1327 | 1329 |
default: |
1328 | 1330 |
return "Unknown return code"; |
1329 | 1331 |
} |