git-svn: trunk@3560
Török Edvin authored on 2008/01/31 03:44:07... | ... |
@@ -1,3 +1,8 @@ |
1 |
+Wed Jan 30 20:23:20 EET 2008 (edwin) |
|
2 |
+------------------------------------ |
|
3 |
+ * libclamav/phishcheck.c, regex_list.c: when domain matchers, preserve full |
|
4 |
+ subdomain(bb #721) |
|
5 |
+ |
|
1 | 6 |
Tue Jan 29 17:50:05 GMT 2008 (njh) |
2 | 7 |
---------------------------------- |
3 | 8 |
* libclamav/tnef.c: Handle trailing CR and change handling of truncated |
... | ... |
@@ -242,6 +242,7 @@ static const short int hextable[256] = { |
242 | 242 |
|
243 | 243 |
/* Prototypes*/ |
244 | 244 |
static void string_init_c(struct string* dest,char* data); |
245 |
+static int string_assign_concatenated(struct string* dest, const char* prefix, const char* begin, const char* end); |
|
245 | 246 |
static void string_assign_null(struct string* dest); |
246 | 247 |
static char *rfind(char *start, char c, size_t len); |
247 | 248 |
static char hex2int(const unsigned char* src); |
... | ... |
@@ -298,19 +299,32 @@ static void string_init_c(struct string* dest,char* data) |
298 | 298 |
dest->ref = NULL; |
299 | 299 |
} |
300 | 300 |
|
301 |
+/* assigns to @dest the string made from concatenating @prefix with the string between @begin and @end */ |
|
302 |
+static int string_assign_concatenated(struct string* dest, const char* prefix, const char* begin, const char* end) |
|
303 |
+{ |
|
304 |
+ const size_t prefix_len = strlen(prefix); |
|
305 |
+ char* ret = cli_malloc(prefix_len + end - begin + 1); |
|
306 |
+ if(!ret) |
|
307 |
+ return CL_EMEM; |
|
308 |
+ strncpy(ret, prefix, prefix_len); |
|
309 |
+ strncpy(ret+prefix_len, begin, end-begin); |
|
310 |
+ ret[prefix_len+end-begin]='\0'; |
|
311 |
+ string_free(dest); |
|
312 |
+ string_init_c(dest, ret); |
|
313 |
+ return CL_SUCCESS; |
|
314 |
+} |
|
315 |
+ |
|
301 | 316 |
/* make a copy of the string between start -> end*/ |
302 | 317 |
static int string_assign_dup(struct string* dest,const char* start,const char* end) |
303 | 318 |
{ |
304 |
- char* ret = cli_malloc(end-start+1); |
|
319 |
+ char* ret = cli_malloc(end-start+1); |
|
305 | 320 |
if(!ret) |
306 | 321 |
return CL_EMEM; |
307 | 322 |
strncpy(ret,start,end-start); |
308 | 323 |
ret[end-start]='\0'; |
309 | 324 |
|
310 | 325 |
string_free(dest); |
311 |
- dest->data=ret; |
|
312 |
- dest->refcount=1; |
|
313 |
- dest->ref=NULL; |
|
326 |
+ string_init_c(dest, ret); |
|
314 | 327 |
return CL_SUCCESS; |
315 | 328 |
} |
316 | 329 |
|
... | ... |
@@ -745,11 +759,10 @@ cleanupURL(struct string *URL,struct string *pre_URL, int isReal) |
745 | 745 |
} |
746 | 746 |
if(!isReal) { |
747 | 747 |
str_fixup_spaces(&begin,&end); |
748 |
- if (( rc = string_assign_dup(URL,begin,end+1) )) { |
|
748 |
+ if (( rc = string_assign_dup(URL, begin, end+1) )) { |
|
749 | 749 |
return rc; |
750 | 750 |
} |
751 | 751 |
} |
752 |
- /*cli_dbgmsg("%p::%s\n",URL->data,URL->data);*/ |
|
753 | 752 |
} |
754 | 753 |
return 0; |
755 | 754 |
} |
... | ... |
@@ -765,6 +778,7 @@ static int found_possibly_unwanted(cli_ctx* ctx) |
765 | 765 |
|
766 | 766 |
int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs) |
767 | 767 |
{ |
768 |
+ /* TODO: get_host and then apply regex, etc. */ |
|
768 | 769 |
int i; |
769 | 770 |
struct phishcheck* pchk = (struct phishcheck*) ctx->engine->phishcheck; |
770 | 771 |
/* check for status of whitelist fatal error, etc. */ |
... | ... |
@@ -1003,6 +1017,7 @@ static enum phish_status cleanupURLs(struct url_check* urls) |
1003 | 1003 |
{ |
1004 | 1004 |
if(urls->flags&CLEANUP_URL) { |
1005 | 1005 |
cleanupURL(&urls->realLink,NULL,1); |
1006 |
+ |
|
1006 | 1007 |
cleanupURL(&urls->displayLink,&urls->pre_fixup.pre_displayLink,0); |
1007 | 1008 |
if(!urls->displayLink.data || !urls->realLink.data) |
1008 | 1009 |
return CL_PHISH_NODECISION; |
... | ... |
@@ -1024,7 +1039,7 @@ static int url_get_host(const struct phishcheck* pchk, struct url_check* url,str |
1024 | 1024 |
if(!start || !end) { |
1025 | 1025 |
string_assign_null(host); |
1026 | 1026 |
} |
1027 |
- else if(( rc = string_assign_dup(host,start,end) )) { |
|
1027 |
+ else if(( rc = string_assign_concatenated(host, ".", start, end) )) { |
|
1028 | 1028 |
return rc; |
1029 | 1029 |
} |
1030 | 1030 |
|
... | ... |
@@ -1110,6 +1125,8 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url |
1110 | 1110 |
return rc < 0 ? rc : CL_PHISH_CLEAN; |
1111 | 1111 |
} |
1112 | 1112 |
|
1113 |
+ cli_dbgmsg("Phishcheck:URL after cleanup: %s->%s\n", urls->realLink.data, |
|
1114 |
+ urls->displayLink.data); |
|
1113 | 1115 |
if(whitelist_check(engine, urls, 0)) |
1114 | 1116 |
return CL_PHISH_CLEAN;/* if url is whitelisted don't perform further checks */ |
1115 | 1117 |
|
... | ... |
@@ -238,7 +238,7 @@ static inline size_t get_char_at_pos_with_skip(const struct pre_fixup_info* info |
238 | 238 |
realpos++; |
239 | 239 |
} |
240 | 240 |
while(str[realpos]==' ') realpos++; |
241 |
- cli_dbgmsg("calc_pos_with_skip:%s\n",str+realpos); |
|
241 |
+ cli_dbgmsg("calc_pos_with_skip:%s\n",str+realpos); |
|
242 | 242 |
return (pos>0 && !str[realpos]) ? '\0' : str[realpos>0?realpos-1:0]; |
243 | 243 |
} |
244 | 244 |
|
... | ... |
@@ -257,6 +257,7 @@ static inline size_t get_char_at_pos_with_skip(const struct pre_fixup_info* info |
257 | 257 |
*/ |
258 | 258 |
int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* display_url,const struct pre_fixup_info* pre_fixup,int hostOnly,const char** info,int is_whitelist) |
259 | 259 |
{ |
260 |
+ char* orig_real_url = real_url; |
|
260 | 261 |
massert(matcher); |
261 | 262 |
massert(real_url); |
262 | 263 |
massert(display_url); |
... | ... |
@@ -264,6 +265,9 @@ int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* di |
264 | 264 |
if(!matcher->list_inited) |
265 | 265 |
return 0; |
266 | 266 |
massert(matcher->list_built); |
267 |
+ /* skip initial '.' inserted by get_host */ |
|
268 |
+ if(real_url[0] == '.') real_url++; |
|
269 |
+ if(display_url[0] == '.') display_url++; |
|
267 | 270 |
{ |
268 | 271 |
size_t real_len = strlen(real_url); |
269 | 272 |
size_t display_len = strlen(display_url); |
... | ... |
@@ -280,7 +284,7 @@ int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* di |
280 | 280 |
buffer[real_len]= (!is_whitelist && hostOnly) ? '\0' : ':'; |
281 | 281 |
if(!hostOnly || is_whitelist) { |
282 | 282 |
strncpy(buffer+real_len+1,display_url,display_len); |
283 |
- if(is_whitelist) |
|
283 |
+ if(is_whitelist) |
|
284 | 284 |
buffer[buffer_len - 1] = '/'; |
285 | 285 |
buffer[buffer_len]=0; |
286 | 286 |
} |
... | ... |
@@ -297,30 +301,40 @@ int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* di |
297 | 297 |
cli_ac_freedata(&mdata); |
298 | 298 |
if(rc) { |
299 | 299 |
char c; |
300 |
- const char* matched = strchr(*info,':'); |
|
300 |
+ const char* matched = strchr(*info,':'); |
|
301 | 301 |
const size_t match_len = matched ? strlen(matched+1) : 0; |
302 | 302 |
if(((c=get_char_at_pos_with_skip(pre_fixup,buffer,buffer_len+1))==' ' || c=='\0' || c=='/' || c=='?') && |
303 | 303 |
(match_len == buffer_len || /* full match */ |
304 | 304 |
(match_len < buffer_len && |
305 |
- ((c=get_char_at_pos_with_skip(pre_fixup,buffer,buffer_len-match_len))=='.' || (c==' ')) ) |
|
305 |
+ ((c=get_char_at_pos_with_skip(pre_fixup,buffer,buffer_len-match_len))=='.' || (c==' ')) ) |
|
306 | 306 |
/* subdomain matched*/)) { |
307 | 307 |
|
308 |
- cli_dbgmsg("Got a match: %s with %s\n",buffer,*info); |
|
309 |
- cli_dbgmsg("Before inserting .: %s\n",real_url); |
|
308 |
+ cli_dbgmsg("Got a match: %s with %s\n", buffer, *info); |
|
309 |
+ cli_dbgmsg("Before inserting .: %s\n", orig_real_url); |
|
310 | 310 |
if(real_len >= match_len + 1) { |
311 |
- real_url[real_len-match_len-1]='.'; |
|
312 |
- cli_dbgmsg("After inserting .: %s\n",real_url); |
|
311 |
+ const size_t pos = real_len - match_len - 1; |
|
312 |
+ if(real_url[pos] != '.') { |
|
313 |
+ cli_dbgmsg("No dot here:%s\n",real_url+pos); |
|
314 |
+ /* we need to shift left, and insert a '.' |
|
315 |
+ * we have an extra '.' at the beginning inserted by get_host to have room, |
|
316 |
+ * orig_real_url has to be used here, |
|
317 |
+ * because we want to overwrite that extra '.' */ |
|
318 |
+ size_t orig_real_len = strlen(orig_real_url); |
|
319 |
+ real_url = orig_real_url; |
|
320 |
+ memmove(real_url, real_url+1, orig_real_len-match_len-1); |
|
321 |
+ real_url[orig_real_len-match_len-1]='.'; |
|
322 |
+ cli_dbgmsg("After inserting .: %s\n", real_url); |
|
323 |
+ } |
|
313 | 324 |
} |
314 | 325 |
break; |
315 | 326 |
} |
316 |
- cli_dbgmsg("Ignoring false match: %s with %s,%c\n",buffer,*info,c); |
|
327 |
+ cli_dbgmsg("Ignoring false match: %s with %s, mismatched character: %c\n", buffer, *info, c); |
|
317 | 328 |
rc=0; |
318 | 329 |
} |
319 | 330 |
} |
320 | 331 |
} else |
321 | 332 |
rc = 0; |
322 |
- |
|
323 |
- if(!rc) |
|
333 |
+ if(!rc) |
|
324 | 334 |
rc = match_node(hostOnly ? matcher->root_regex_hostonly : matcher->root_regex,(unsigned char*)buffer,buffer_len,info) == MATCH_SUCCESS ? CL_VIRUS : CL_SUCCESS; |
325 | 335 |
free(buffer); |
326 | 336 |
if(!rc) |