... | ... |
@@ -665,8 +665,8 @@ cleanupURL(struct string* URL, struct string* pre_URL, int isReal) |
665 | 665 |
|
666 | 666 |
str_replace(begin, end, '\\', '/'); |
667 | 667 |
/* find beginning of hostname, because: |
668 |
- * - we want to keep only protocol, host, and |
|
669 |
- * strip path & query parameter(s) |
|
668 |
+ * - we want to keep only protocol, host, and |
|
669 |
+ * strip path & query parameter(s) |
|
670 | 670 |
* - we want to make hostname lowercase*/ |
671 | 671 |
host_begin = strchr(begin, ':'); |
672 | 672 |
while (host_begin && (host_begin < end) && (host_begin[1] == '/')) host_begin++; |
... | ... |
@@ -677,7 +677,7 @@ cleanupURL(struct string* URL, struct string* pre_URL, int isReal) |
677 | 677 |
host_len = strcspn(host_begin, ":/?"); |
678 | 678 |
if (host_begin + host_len > end + 1) { |
679 | 679 |
/* prevent hostname extending beyond end, it can happen |
680 |
- * if we have spaces at the end, we don't want those part of |
|
680 |
+ * if we have spaces at the end, we don't want those part of |
|
681 | 681 |
* the hostname */ |
682 | 682 |
host_len = end - host_begin + 1; |
683 | 683 |
} else { |
... | ... |
@@ -1473,16 +1473,59 @@ static enum phish_status phishingCheck(const struct cl_engine* engine, struct ur |
1473 | 1473 |
return CL_PHISH_CLEAN; |
1474 | 1474 |
} |
1475 | 1475 |
|
1476 |
+ /* |
|
1477 |
+ * Whitelist X-type WDB signatures: X:RealURL:DisplayedURL |
|
1478 |
+ * Eg: |
|
1479 |
+ * X:.+\.benign\.com([/?].*)?:.+\.benign\.de |
|
1480 |
+ */ |
|
1476 | 1481 |
if (whitelist_check(engine, urls, 0)) |
1477 | 1482 |
return CL_PHISH_CLEAN; /* if url is whitelisted don't perform further checks */ |
1478 | 1483 |
|
1479 |
- url_check_init(&host_url); |
|
1484 |
+ /* |
|
1485 |
+ * Match R-type PDB signatures: R:RealURL:DisplayedURL |
|
1486 |
+ * Eg: |
|
1487 |
+ * R:.+\.malicious\.net([/?].*)?:.+\.benign\.com |
|
1488 |
+ */ |
|
1489 |
+ if (domainlist_match(engine, urls->realLink.data, urls->displayLink.data, &urls->pre_fixup, 0)) { |
|
1490 |
+ phishy |= DOMAIN_LISTED; |
|
1491 |
+ } |
|
1480 | 1492 |
|
1493 |
+ /* |
|
1494 |
+ * Get copy of URLs stripped down to just the FQDN. |
|
1495 |
+ */ |
|
1496 |
+ url_check_init(&host_url); |
|
1481 | 1497 |
if ((rc = url_get_host(urls, &host_url, DOMAIN_DISPLAY, &phishy))) { |
1482 | 1498 |
free_if_needed(&host_url); |
1483 | 1499 |
return rc < 0 ? rc : CL_PHISH_CLEAN; |
1484 | 1500 |
} |
1501 |
+ if ((rc = url_get_host(urls, &host_url, DOMAIN_REAL, &phishy))) { |
|
1502 |
+ free_if_needed(&host_url); |
|
1503 |
+ return rc < 0 ? rc : CL_PHISH_CLEAN; |
|
1504 |
+ } |
|
1485 | 1505 |
|
1506 |
+ /* |
|
1507 |
+ * Exit early if the realLink and displayLink are the same. |
|
1508 |
+ */ |
|
1509 |
+ if (!strcmp(urls->realLink.data, urls->displayLink.data)) { |
|
1510 |
+ free_if_needed(&host_url); |
|
1511 |
+ return CL_PHISH_CLEAN; |
|
1512 |
+ } |
|
1513 |
+ |
|
1514 |
+ /* |
|
1515 |
+ * Whitelist M-type WDB signatures: M:RealHostname:DisplayedHostname |
|
1516 |
+ * Eg: |
|
1517 |
+ * M:email.isbenign.com:benign.com |
|
1518 |
+ */ |
|
1519 |
+ if (whitelist_check(engine, &host_url, 1)) { |
|
1520 |
+ free_if_needed(&host_url); |
|
1521 |
+ return CL_PHISH_CLEAN; |
|
1522 |
+ } |
|
1523 |
+ |
|
1524 |
+ /* |
|
1525 |
+ * Match H-type PDB signatures: H:DisplayedHostname |
|
1526 |
+ * Eg: |
|
1527 |
+ * H:malicious.com |
|
1528 |
+ */ |
|
1486 | 1529 |
if (domainlist_match(engine, host_url.displayLink.data, host_url.realLink.data, &urls->pre_fixup, 1)) { |
1487 | 1530 |
phishy |= DOMAIN_LISTED; |
1488 | 1531 |
} else { |
... | ... |
@@ -1516,21 +1559,6 @@ static enum phish_status phishingCheck(const struct cl_engine* engine, struct ur |
1516 | 1516 |
return CL_PHISH_CLEAN; |
1517 | 1517 |
} |
1518 | 1518 |
|
1519 |
- if ((rc = url_get_host(urls, &host_url, DOMAIN_REAL, &phishy))) { |
|
1520 |
- free_if_needed(&host_url); |
|
1521 |
- return rc < 0 ? rc : CL_PHISH_CLEAN; |
|
1522 |
- } |
|
1523 |
- |
|
1524 |
- if (whitelist_check(engine, &host_url, 1)) { |
|
1525 |
- free_if_needed(&host_url); |
|
1526 |
- return CL_PHISH_CLEAN; |
|
1527 |
- } |
|
1528 |
- |
|
1529 |
- if (!strcmp(urls->realLink.data, urls->displayLink.data)) { |
|
1530 |
- free_if_needed(&host_url); |
|
1531 |
- return CL_PHISH_CLEAN; |
|
1532 |
- } |
|
1533 |
- |
|
1534 | 1519 |
{ |
1535 | 1520 |
struct url_check domain_url; |
1536 | 1521 |
url_check_init(&domain_url); |
... | ... |
@@ -119,7 +119,7 @@ static int validate_subdomain(const struct regex_list *regex, const struct pre_f |
119 | 119 |
if (real_url[pos] != '.') { |
120 | 120 |
/* we need to shift left, and insert a '.' |
121 | 121 |
* we have an extra '.' at the beginning inserted by get_host to have room, |
122 |
- * orig_real_url has to be used here, |
|
122 |
+ * orig_real_url has to be used here, |
|
123 | 123 |
* because we want to overwrite that extra '.' */ |
124 | 124 |
size_t orig_real_len = strlen(orig_real_url); |
125 | 125 |
cli_dbgmsg("No dot here:%s\n", real_url + pos); |
... | ... |
@@ -185,7 +185,11 @@ int regex_list_match(struct regex_matcher *matcher, char *real_url, const char * |
185 | 185 |
|
186 | 186 |
strncpy(buffer, real_url, real_len); |
187 | 187 |
buffer[real_len] = (!is_whitelist && hostOnly) ? '/' : ':'; |
188 |
+ |
|
189 |
+ /* For H-type PDB signatures, real_url is actually the DisplayedHostname. |
|
190 |
+ RealHostname is not used. */ |
|
188 | 191 |
if (!hostOnly || is_whitelist) { |
192 |
+ /* For all other PDB and WDB signatures concatenate Real:Displayed. */ |
|
189 | 193 |
strncpy(buffer + real_len + 1, display_url, display_len); |
190 | 194 |
} |
191 | 195 |
buffer[buffer_len - 1] = '/'; |
... | ... |
@@ -413,20 +417,20 @@ int load_regex_matcher(struct cl_engine *engine, struct regex_matcher *matcher, |
413 | 413 |
* Multiple lines of form, (empty lines are skipped): |
414 | 414 |
* Flags RealURL DisplayedURL |
415 | 415 |
* Where: |
416 |
- * Flags: |
|
416 |
+ * Flags: |
|
417 | 417 |
* |
418 | 418 |
* .pdb files: |
419 |
- * R - regex, H - host-only, followed by (optional) 3-digit hexnumber representing |
|
419 |
+ * R - regex, H - host-only, followed by (optional) 3-digit hexnumber representing |
|
420 | 420 |
* flags that should be filtered. |
421 | 421 |
* [i.e. phishcheck urls.flags that we don't want to be done for this particular host] |
422 |
- * |
|
422 |
+ * |
|
423 | 423 |
* .wdb files: |
424 |
- * X - full URL regex |
|
424 |
+ * X - full URL regex |
|
425 | 425 |
* Y - host-only regex |
426 | 426 |
* M - host simple pattern |
427 | 427 |
* |
428 | 428 |
* If a line in the file doesn't conform to this format, loading fails |
429 |
- * |
|
429 |
+ * |
|
430 | 430 |
*/ |
431 | 431 |
while (cli_dbgets(buffer, FILEBUFF, fd, dbio)) { |
432 | 432 |
char *pattern; |