Browse code

improve URL handling

git-svn: trunk@4831

Török Edvin authored on 2009/02/19 05:27:25
Showing 3 changed files
... ...
@@ -1,3 +1,8 @@
1
+Wed Feb 18 22:58:14 EET 2009 (edwin)
2
+------------------------------------
3
+ * libclamav/phishcheck.c, unit_tests/check_regex.c: improve URL
4
+ handling
5
+
1 6
 Wed Feb 18 22:24:22 EET 2009 (edwin)
2 7
 ------------------------------------
3 8
  * libclamav/: reorder fields (bb #1144)
... ...
@@ -1002,9 +1002,10 @@ static inline int validate_uri_ialpha(const char *start, const char *end)
1002 1002
 /*
1003 1003
  * Only those URLs are identified as URLs for which phishing detection can be performed.
1004 1004
  */
1005
-static int isURL(const char* URL, int accept_anyproto)
1005
+static int isURL(char* URL, int accept_anyproto)
1006 1006
 {
1007
-	const char *start = NULL, *p, *q, *end;
1007
+	char *last_tld_end = NULL, *q;
1008
+	const char *start = NULL, *p, *end;
1008 1009
 	if(!URL)
1009 1010
 		return 0;
1010 1011
 
... ...
@@ -1055,6 +1056,8 @@ static int isURL(const char* URL, int accept_anyproto)
1055 1055
 		if(q) {
1056 1056
 			if(!validate_uri_xpalphas_nodot(p, q))
1057 1057
 				return 0;
1058
+			if (accept_anyproto && in_tld_set(p, q-p))
1059
+			    last_tld_end = q;
1058 1060
 			p = q+1;
1059 1061
 		}
1060 1062
 	} while(q);
... ...
@@ -1063,7 +1066,16 @@ static int isURL(const char* URL, int accept_anyproto)
1063 1063
 	if (end < p)
1064 1064
 		end = p;
1065 1065
 	while (*end == ' ' && end > p) --end;
1066
-	return !!in_tld_set(p, end - p);
1066
+
1067
+	if (in_tld_set(p, end - p))
1068
+	    return 1;
1069
+	if (!accept_anyproto)
1070
+	    return 0;
1071
+	if (last_tld_end) {
1072
+	    *last_tld_end = '\0';
1073
+	    return 1;
1074
+	}
1075
+	return 0;
1067 1076
 }
1068 1077
 
1069 1078
 /*
... ...
@@ -1132,6 +1144,7 @@ static int url_get_host(struct url_check* url,struct url_check* host_url,int isR
1132 1132
 	if(!isReal) {
1133 1133
 		url->pre_fixup.host_start = start - URL;
1134 1134
 		url->pre_fixup.host_end = end - URL;
1135
+		url->pre_fixup.pre_displayLink.data[url->pre_fixup.host_end] = '\0';
1135 1136
 	}
1136 1137
 	return CL_PHISH_NODECISION;
1137 1138
 }
... ...
@@ -226,6 +226,7 @@ static const struct rtest {
226 226
 		"http://pages.ebay.de@fake.example.com","pages.ebay.de",0},
227 227
 	{NULL,"http://key.com","https://key.com",0},
228 228
 	{NULL,"http://key.com%00fake.example.com","https://key.com",0},
229
+	{NULL,"http://key.com.example.com","key.com.invalid"}
229 230
 };
230 231
 
231 232
 #ifdef CHECK_HAVE_LOOPS
... ...
@@ -434,7 +435,6 @@ static struct uc {
434 434
     {"http://example.com/blah/..","example.com/",""},
435 435
     {"http://example.com/blah/../x","example.com/","x"},
436 436
     {"http://example.com/./a","example.com/","a"}
437
-
438 437
 };
439 438
 
440 439
 START_TEST (test_url_canon)