Browse code

Added call to match R-type PDB signatures in phishcheck.c:phishingCheck(). This makes R-type PDB signatures functional, and operate as described in the documentation.

Micah Snyder (micasnyd) authored on 2019/07/01 05:37:27
Showing 2 changed files
... ...
@@ -665,8 +665,8 @@ cleanupURL(struct string* URL, struct string* pre_URL, int isReal)
665 665
 
666 666
         str_replace(begin, end, '\\', '/');
667 667
         /* find beginning of hostname, because:
668
-		 * - we want to keep only protocol, host, and 
669
-		 *  strip path & query parameter(s) 
668
+		 * - we want to keep only protocol, host, and
669
+		 *  strip path & query parameter(s)
670 670
 		 * - we want to make hostname lowercase*/
671 671
         host_begin = strchr(begin, ':');
672 672
         while (host_begin && (host_begin < end) && (host_begin[1] == '/')) host_begin++;
... ...
@@ -677,7 +677,7 @@ cleanupURL(struct string* URL, struct string* pre_URL, int isReal)
677 677
         host_len = strcspn(host_begin, ":/?");
678 678
         if (host_begin + host_len > end + 1) {
679 679
             /* prevent hostname extending beyond end, it can happen
680
-			 * if we have spaces at the end, we don't want those part of 
680
+			 * if we have spaces at the end, we don't want those part of
681 681
 			 * the hostname */
682 682
             host_len = end - host_begin + 1;
683 683
         } else {
... ...
@@ -1473,16 +1473,59 @@ static enum phish_status phishingCheck(const struct cl_engine* engine, struct ur
1473 1473
         return CL_PHISH_CLEAN;
1474 1474
     }
1475 1475
 
1476
+    /*
1477
+     * Whitelist X-type WDB signatures:  X:RealURL:DisplayedURL
1478
+     * Eg:
1479
+     *      X:.+\.benign\.com([/?].*)?:.+\.benign\.de
1480
+     */
1476 1481
     if (whitelist_check(engine, urls, 0))
1477 1482
         return CL_PHISH_CLEAN; /* if url is whitelisted don't perform further checks */
1478 1483
 
1479
-    url_check_init(&host_url);
1484
+    /*
1485
+     * Match R-type PDB signatures:  R:RealURL:DisplayedURL
1486
+     * Eg:
1487
+     *      R:.+\.malicious\.net([/?].*)?:.+\.benign\.com
1488
+     */
1489
+    if (domainlist_match(engine, urls->realLink.data, urls->displayLink.data, &urls->pre_fixup, 0)) {
1490
+        phishy |= DOMAIN_LISTED;
1491
+    }
1480 1492
 
1493
+    /*
1494
+     * Get copy of URLs stripped down to just the FQDN.
1495
+     */
1496
+    url_check_init(&host_url);
1481 1497
     if ((rc = url_get_host(urls, &host_url, DOMAIN_DISPLAY, &phishy))) {
1482 1498
         free_if_needed(&host_url);
1483 1499
         return rc < 0 ? rc : CL_PHISH_CLEAN;
1484 1500
     }
1501
+    if ((rc = url_get_host(urls, &host_url, DOMAIN_REAL, &phishy))) {
1502
+        free_if_needed(&host_url);
1503
+        return rc < 0 ? rc : CL_PHISH_CLEAN;
1504
+    }
1485 1505
 
1506
+    /*
1507
+     * Exit early if the realLink and displayLink are the same.
1508
+     */
1509
+    if (!strcmp(urls->realLink.data, urls->displayLink.data)) {
1510
+        free_if_needed(&host_url);
1511
+        return CL_PHISH_CLEAN;
1512
+    }
1513
+
1514
+    /*
1515
+     * Whitelist M-type WDB signatures: M:RealHostname:DisplayedHostname
1516
+     * Eg:
1517
+     *      M:email.isbenign.com:benign.com
1518
+     */
1519
+    if (whitelist_check(engine, &host_url, 1)) {
1520
+        free_if_needed(&host_url);
1521
+        return CL_PHISH_CLEAN;
1522
+    }
1523
+
1524
+    /*
1525
+     * Match H-type PDB signatures:  H:DisplayedHostname
1526
+     * Eg:
1527
+     *      H:malicious.com
1528
+     */
1486 1529
     if (domainlist_match(engine, host_url.displayLink.data, host_url.realLink.data, &urls->pre_fixup, 1)) {
1487 1530
         phishy |= DOMAIN_LISTED;
1488 1531
     } else {
... ...
@@ -1516,21 +1559,6 @@ static enum phish_status phishingCheck(const struct cl_engine* engine, struct ur
1516 1516
         return CL_PHISH_CLEAN;
1517 1517
     }
1518 1518
 
1519
-    if ((rc = url_get_host(urls, &host_url, DOMAIN_REAL, &phishy))) {
1520
-        free_if_needed(&host_url);
1521
-        return rc < 0 ? rc : CL_PHISH_CLEAN;
1522
-    }
1523
-
1524
-    if (whitelist_check(engine, &host_url, 1)) {
1525
-        free_if_needed(&host_url);
1526
-        return CL_PHISH_CLEAN;
1527
-    }
1528
-
1529
-    if (!strcmp(urls->realLink.data, urls->displayLink.data)) {
1530
-        free_if_needed(&host_url);
1531
-        return CL_PHISH_CLEAN;
1532
-    }
1533
-
1534 1519
     {
1535 1520
         struct url_check domain_url;
1536 1521
         url_check_init(&domain_url);
... ...
@@ -119,7 +119,7 @@ static int validate_subdomain(const struct regex_list *regex, const struct pre_f
119 119
             if (real_url[pos] != '.') {
120 120
                 /* we need to shift left, and insert a '.'
121 121
 				 * we have an extra '.' at the beginning inserted by get_host to have room,
122
-				 * orig_real_url has to be used here, 
122
+				 * orig_real_url has to be used here,
123 123
 				 * because we want to overwrite that extra '.' */
124 124
                 size_t orig_real_len = strlen(orig_real_url);
125 125
                 cli_dbgmsg("No dot here:%s\n", real_url + pos);
... ...
@@ -185,7 +185,11 @@ int regex_list_match(struct regex_matcher *matcher, char *real_url, const char *
185 185
 
186 186
         strncpy(buffer, real_url, real_len);
187 187
         buffer[real_len] = (!is_whitelist && hostOnly) ? '/' : ':';
188
+
189
+        /* For H-type PDB signatures, real_url is actually the DisplayedHostname.
190
+           RealHostname is not used. */
188 191
         if (!hostOnly || is_whitelist) {
192
+            /* For all other PDB and WDB signatures concatenate Real:Displayed. */
189 193
             strncpy(buffer + real_len + 1, display_url, display_len);
190 194
         }
191 195
         buffer[buffer_len - 1] = '/';
... ...
@@ -413,20 +417,20 @@ int load_regex_matcher(struct cl_engine *engine, struct regex_matcher *matcher,
413 413
 	 * Multiple lines of form, (empty lines are skipped):
414 414
  	 * Flags RealURL DisplayedURL
415 415
 	 * Where:
416
-	 * Flags: 
416
+	 * Flags:
417 417
 	 *
418 418
 	 * .pdb files:
419
-	 * R - regex, H - host-only, followed by (optional) 3-digit hexnumber representing 
419
+	 * R - regex, H - host-only, followed by (optional) 3-digit hexnumber representing
420 420
 	 * flags that should be filtered.
421 421
 	 * [i.e. phishcheck urls.flags that we don't want to be done for this particular host]
422
-	 * 
422
+	 *
423 423
 	 * .wdb files:
424
-	 * X - full URL regex 
424
+	 * X - full URL regex
425 425
 	 * Y - host-only regex
426 426
 	 * M - host simple pattern
427 427
 	 *
428 428
 	 * If a line in the file doesn't conform to this format, loading fails
429
-	 * 
429
+	 *
430 430
 	 */
431 431
     while (cli_dbgets(buffer, FILEBUFF, fd, dbio)) {
432 432
         char *pattern;