...
|
...
|
@@ -117,8 +117,6 @@ Checks if realLink is http, but displayedLink is https or viceversa.
|
117
|
117
|
|
118
|
118
|
10. Hostname of real URL is extracted.
|
119
|
119
|
|
120
|
|
-11. Skip cid: displayedLink urls (images embedded in mails).
|
121
|
|
-
|
122
|
120
|
12. Numeric IP detection.
|
123
|
121
|
If url is a numeric IP, then -> phish.
|
124
|
122
|
Maybe we should do DNS lookup?
|
...
|
...
|
@@ -154,7 +152,6 @@ static const char aspnet[] = "asp.net";
|
154
|
154
|
/* ; is replaced by ' ' so omit it here*/
|
155
|
155
|
static const char lt[]="<";
|
156
|
156
|
static const char gt[]=">";
|
157
|
|
-static const char cid[] = "cid:";
|
158
|
157
|
static const char src_text[] = "src";
|
159
|
158
|
static const char href_text[] = "href";
|
160
|
159
|
static const char mailto[] = "mailto:";
|
...
|
...
|
@@ -162,7 +159,6 @@ static const char https[]="https://";
|
162
|
162
|
|
163
|
163
|
static const size_t href_text_len = sizeof(href_text);
|
164
|
164
|
static const size_t src_text_len = sizeof(src_text);
|
165
|
|
-static const size_t cid_len = sizeof(cid)-1;
|
166
|
165
|
static const size_t dotnet_len = sizeof(dotnet)-1;
|
167
|
166
|
static const size_t adonet_len = sizeof(adonet)-1;
|
168
|
167
|
static const size_t aspnet_len = sizeof(aspnet)-1;
|
...
|
...
|
@@ -223,7 +219,7 @@ static const size_t https_len = sizeof(https)-1;
|
223
|
223
|
#define URI_fragmentaddress2 URI_URI2
|
224
|
224
|
#define URI_fragmentaddress3 URI_URI3"(#"URI_fragmentid")?"
|
225
|
225
|
|
226
|
|
-#define URI_CHECK_PROTOCOLS "(http|https|ftp)://.+"
|
|
226
|
+#define URI_CHECK_PROTOCOLS "(http|https|ftp|mailto)://.+"
|
227
|
227
|
|
228
|
228
|
/*Warning: take care when modifying this regex, it has been tweaked, and tuned, just don't break it please.
|
229
|
229
|
* there is fragmentaddress1, and 2 to work around the ISO limitation of 509 bytes max length for string constants*/
|
...
|
...
|
@@ -898,6 +894,7 @@ static void free_regex(regex_t* p)
|
898
|
898
|
|
899
|
899
|
int phishing_init(struct cl_engine* engine)
|
900
|
900
|
{
|
|
901
|
+ char *url_regex, *realurl_regex;
|
901
|
902
|
struct phishcheck* pchk;
|
902
|
903
|
if(!engine->phishcheck) {
|
903
|
904
|
pchk = engine->phishcheck = cli_malloc(sizeof(struct phishcheck));
|
...
|
...
|
@@ -934,20 +931,33 @@ int phishing_init(struct cl_engine* engine)
|
934
|
934
|
engine->phishcheck = NULL;
|
935
|
935
|
return CL_EFORMAT;
|
936
|
936
|
}
|
937
|
|
- pchk->url_regex = str_compose("^ *("URI_fragmentaddress1,URI_fragmentaddress2,URI_fragmentaddress3"|"URI_CHECK_PROTOCOLS") *$");
|
938
|
|
- if(build_regex(&pchk->preg,pchk->url_regex,1)) {
|
|
937
|
+ url_regex = str_compose("^ *(("URI_CHECK_PROTOCOLS")|("URI_fragmentaddress1,URI_fragmentaddress2,URI_fragmentaddress3")) *$");
|
|
938
|
+ if(build_regex(&pchk->preg,url_regex,1)) {
|
|
939
|
+ free_regex(&pchk->preg_cctld);
|
|
940
|
+ free_regex(&pchk->preg_tld);
|
|
941
|
+ free(url_regex);
|
|
942
|
+ free(pchk);
|
|
943
|
+ engine->phishcheck = NULL;
|
|
944
|
+ return CL_EFORMAT;
|
|
945
|
+ }
|
|
946
|
+ free(url_regex);
|
|
947
|
+ realurl_regex = str_compose("^ *(("URI_CHECK_PROTOCOLS")|("URI_path1,URI_fragmentaddress2,URI_fragmentaddress3")) *$");
|
|
948
|
+ if(build_regex(&pchk->preg_realurl, realurl_regex,1)) {
|
939
|
949
|
free_regex(&pchk->preg_cctld);
|
940
|
950
|
free_regex(&pchk->preg_tld);
|
941
|
|
- free(pchk->url_regex);
|
|
951
|
+ free_regex(&pchk->preg);
|
|
952
|
+ free(url_regex);
|
|
953
|
+ free(realurl_regex);
|
942
|
954
|
free(pchk);
|
943
|
955
|
engine->phishcheck = NULL;
|
944
|
956
|
return CL_EFORMAT;
|
945
|
957
|
}
|
|
958
|
+ free(realurl_regex);
|
946
|
959
|
if(build_regex(&pchk->preg_numeric,numeric_url_regex,1)) {
|
947
|
960
|
free_regex(&pchk->preg_cctld);
|
948
|
961
|
free_regex(&pchk->preg_tld);
|
949
|
962
|
free_regex(&pchk->preg);
|
950
|
|
- free(pchk->url_regex);
|
|
963
|
+ free_regex(&pchk->preg_realurl);
|
951
|
964
|
free(pchk);
|
952
|
965
|
engine->phishcheck = NULL;
|
953
|
966
|
return CL_EFORMAT;
|
...
|
...
|
@@ -967,10 +977,7 @@ void phishing_done(struct cl_engine* engine)
|
967
|
967
|
free_regex(&pchk->preg_cctld);
|
968
|
968
|
free_regex(&pchk->preg_tld);
|
969
|
969
|
free_regex(&pchk->preg_numeric);
|
970
|
|
- if(pchk->url_regex) {
|
971
|
|
- free(pchk->url_regex);
|
972
|
|
- pchk->url_regex = NULL;
|
973
|
|
- }
|
|
970
|
+ free_regex(&pchk->preg_realurl);
|
974
|
971
|
pchk->is_disabled = 1;
|
975
|
972
|
}
|
976
|
973
|
whitelist_done(engine);
|
...
|
...
|
@@ -985,13 +992,21 @@ void phishing_done(struct cl_engine* engine)
|
985
|
985
|
|
986
|
986
|
/*
|
987
|
987
|
* Only those URLs are identified as URLs for which phishing detection can be performed.
|
988
|
|
- * This means that no attempt is made to properly recognize 'cid:' URLs
|
989
|
988
|
*/
|
990
|
989
|
static int isURL(const struct phishcheck* pchk,const char* URL)
|
991
|
990
|
{
|
992
|
991
|
return URL ? !cli_regexec(&pchk->preg,URL,0,NULL,0) : 0;
|
993
|
992
|
}
|
994
|
993
|
|
|
994
|
+/*
|
|
995
|
+ * Check if this is a real URL, which basically means to check if it has a known URL scheme (http,https,ftp).
|
|
996
|
+ * This prevents false positives with outbind:// and blocked:: links.
|
|
997
|
+ */
|
|
998
|
+static int isRealURL(const struct phishcheck* pchk,const char* URL)
|
|
999
|
+{
|
|
1000
|
+ return URL ? !cli_regexec(&pchk->preg_realurl,URL,0,NULL,0) : 0;
|
|
1001
|
+}
|
|
1002
|
+
|
995
|
1003
|
static int isNumericURL(const struct phishcheck* pchk,const char* URL)
|
996
|
1004
|
{
|
997
|
1005
|
return URL ? !cli_regexec(&pchk->preg_numeric,URL,0,NULL,0) : 0;
|
...
|
...
|
@@ -1146,6 +1161,14 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url
|
1146
|
1146
|
if(whitelist_check(engine,urls,0))
|
1147
|
1147
|
return CL_PHISH_WHITELISTED;/* if url is whitelist don't perform further checks */
|
1148
|
1148
|
|
|
1149
|
+ if((!isURL(pchk, urls->displayLink.data) || !isRealURL(pchk, urls->realLink.data) )&&
|
|
1150
|
+ ( (phishy&PHISHY_NUMERIC_IP && !isNumericURL(pchk, urls->displayLink.data)) ||
|
|
1151
|
+ !(phishy&PHISHY_NUMERIC_IP))) {
|
|
1152
|
+ cli_dbgmsg("Displayed 'url' is not url:%s\n",urls->displayLink.data);
|
|
1153
|
+ free_if_needed(&host_url);
|
|
1154
|
+ return CL_PHISH_TEXTURL;
|
|
1155
|
+ }
|
|
1156
|
+
|
1149
|
1157
|
if(urls->flags&DOMAINLIST_REQUIRED && domainlist_match(engine,urls->realLink.data,urls->displayLink.data,NULL,0,&urls->flags))
|
1150
|
1158
|
phishy |= DOMAIN_LISTED;
|
1151
|
1159
|
else {
|
...
|
...
|
@@ -1226,23 +1249,11 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url
|
1226
|
1226
|
return CL_PHISH_HOST_NOT_LISTED;
|
1227
|
1227
|
}
|
1228
|
1228
|
|
1229
|
|
- if(!strncmp(urls->displayLink.data,cid,cid_len))/* cid: image */{
|
1230
|
|
- free_if_needed(&host_url);
|
1231
|
|
- return CL_PHISH_CLEAN_CID;
|
1232
|
|
- }
|
1233
|
|
-
|
1234
|
1229
|
if(whitelist_check(engine,&host_url,1)) {
|
1235
|
1230
|
free_if_needed(&host_url);
|
1236
|
1231
|
return CL_PHISH_HOST_WHITELISTED;
|
1237
|
1232
|
}
|
1238
|
1233
|
|
1239
|
|
- if((!isURL(pchk, urls->displayLink.data) || !isURL(pchk, urls->realLink.data) )&&
|
1240
|
|
- ( (phishy&PHISHY_NUMERIC_IP && !isNumericURL(pchk, urls->displayLink.data)) ||
|
1241
|
|
- !(phishy&PHISHY_NUMERIC_IP))) {
|
1242
|
|
- cli_dbgmsg("Displayed 'url' is not url:%s\n",urls->displayLink.data);
|
1243
|
|
- free_if_needed(&host_url);
|
1244
|
|
- return CL_PHISH_TEXTURL;
|
1245
|
|
- }
|
1246
|
1234
|
|
1247
|
1235
|
if(urls->flags&HOST_SUFFICIENT) {
|
1248
|
1236
|
if(!strcmp(urls->realLink.data,urls->displayLink.data)) {
|