git-svn: trunk@2931
Török Edvin authored on 2007/03/11 20:14:35... | ... |
@@ -1,3 +1,8 @@ |
1 |
+Sun Mar 11 11:21:00 EET 2007 (edwin) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav/hashtab.[ch],phish_*,regex_list.[ch]: #include cleanup, Code |
|
4 |
+ cleanup, remove commented-out code, fix some gcc -W* warnings |
|
5 |
+ |
|
1 | 6 |
Sun Mar 11 00:02:12 GMT 2007 (njh) |
2 | 7 |
---------------------------------- |
3 | 8 |
* libclamav/mbox.c: Remove use of libcurl |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
/* |
2 | 2 |
* HTML Entity & Encoding normalization. |
3 | 3 |
* |
4 | ||
4 | ||
5 | 5 |
* |
6 | 6 |
* This program is free software; you can redistribute it and/or modify |
7 | 7 |
* it under the terms of the GNU General Public License as published by |
... | ... |
@@ -31,6 +31,7 @@ |
31 | 31 |
#include "others.h" |
32 | 32 |
#include "hashtab.h" |
33 | 33 |
|
34 |
+ |
|
34 | 35 |
static const size_t prime_list[] = |
35 | 36 |
{ |
36 | 37 |
53ul, 97ul, 193ul, 389ul, 769ul, |
... | ... |
@@ -80,13 +81,13 @@ static inline void PROFILE_FIND_ELEMENT(struct hashtable *s) |
80 | 80 |
static inline void PROFILE_FIND_NOTFOUND(struct hashtable *s, size_t tries) |
81 | 81 |
{ |
82 | 82 |
s->PROFILE_STRUCT.not_found++; |
83 |
- s->PROFILE_STRUCT.not_found_tries += tries+1; |
|
83 |
+ s->PROFILE_STRUCT.not_found_tries += tries; |
|
84 | 84 |
} |
85 | 85 |
|
86 | 86 |
static inline void PROFILE_FIND_FOUND(struct hashtable *s, size_t tries) |
87 | 87 |
{ |
88 | 88 |
s->PROFILE_STRUCT.found++; |
89 |
- s->PROFILE_STRUCT.found_tries += tries+1; |
|
89 |
+ s->PROFILE_STRUCT.found_tries += tries; |
|
90 | 90 |
} |
91 | 91 |
|
92 | 92 |
static inline void PROFILE_HASH_EXHAUSTED(struct hashtable *s) |
... | ... |
@@ -102,7 +103,7 @@ static inline void PROFILE_GROW_START(struct hashtable *s) |
102 | 102 |
static inline void PROFILE_GROW_FOUND(struct hashtable *s, size_t tries) |
103 | 103 |
{ |
104 | 104 |
s->PROFILE_STRUCT.grow_found++; |
105 |
- s->PROFILE_STRUCT.grow_found_tries += tries+1; |
|
105 |
+ s->PROFILE_STRUCT.grow_found_tries += tries; |
|
106 | 106 |
} |
107 | 107 |
|
108 | 108 |
static inline void PROFILE_GROW_DONE(struct hashtable *s) |
... | ... |
@@ -112,19 +113,19 @@ static inline void PROFILE_GROW_DONE(struct hashtable *s) |
112 | 112 |
static inline void PROFILE_DELETED_REUSE(struct hashtable *s, size_t tries) |
113 | 113 |
{ |
114 | 114 |
s->PROFILE_STRUCT.deleted_reuse++; |
115 |
- s->PROFILE_STRUCT.deleted_tries += tries+1; |
|
115 |
+ s->PROFILE_STRUCT.deleted_tries += tries; |
|
116 | 116 |
} |
117 | 117 |
|
118 | 118 |
static inline void PROFILE_INSERT(struct hashtable *s, size_t tries) |
119 | 119 |
{ |
120 | 120 |
s->PROFILE_STRUCT.inserts++; |
121 |
- s->PROFILE_STRUCT.insert_tries += tries+1; |
|
121 |
+ s->PROFILE_STRUCT.insert_tries += tries; |
|
122 | 122 |
} |
123 | 123 |
|
124 | 124 |
static inline void PROFILE_DATA_UPDATE(struct hashtable *s, size_t tries) |
125 | 125 |
{ |
126 | 126 |
s->PROFILE_STRUCT.update++; |
127 |
- s->PROFILE_STRUCT.update_tries += tries+1; |
|
127 |
+ s->PROFILE_STRUCT.update_tries += tries; |
|
128 | 128 |
} |
129 | 129 |
|
130 | 130 |
static inline void PROFILE_HASH_DELETE(struct hashtable *s) |
... | ... |
@@ -140,7 +141,7 @@ static inline void PROFILE_HASH_CLEAR(struct hashtable *s) |
140 | 140 |
static inline void PROFILE_REPORT(const struct hashtable *s) |
141 | 141 |
{ |
142 | 142 |
size_t lookups, queries, insert_tries, inserts; |
143 |
- cli_dbgmsg("--------Hashtable usage report for %x--------------\n",s); |
|
143 |
+ cli_dbgmsg("--------Hashtable usage report for %p--------------\n",(const void*)s); |
|
144 | 144 |
cli_dbgmsg("hash function calculations:%ld\n",s->PROFILE_STRUCT.calc_hash); |
145 | 145 |
cli_dbgmsg("successfull finds/total searches: %ld/%ld; lookups: %ld\n", s->PROFILE_STRUCT.found, s->PROFILE_STRUCT.find_req, s->PROFILE_STRUCT.found_tries); |
146 | 146 |
cli_dbgmsg("unsuccessfull finds/total searches: %ld/%ld; lookups: %ld\n", s->PROFILE_STRUCT.not_found, s->PROFILE_STRUCT.find_req , s->PROFILE_STRUCT.not_found_tries); |
... | ... |
@@ -195,7 +196,7 @@ int hashtab_init(struct hashtable *s,size_t capacity) |
195 | 195 |
return 0; |
196 | 196 |
} |
197 | 197 |
|
198 |
-static inline size_t hash(const unsigned char* k,const size_t len,const size_t SIZE) |
|
198 |
+static size_t hash(const unsigned char* k,const size_t len,const size_t SIZE) |
|
199 | 199 |
{ |
200 | 200 |
size_t Hash = 0; |
201 | 201 |
size_t i; |
... | ... |
@@ -208,7 +209,7 @@ static inline size_t hash(const unsigned char* k,const size_t len,const size_t S |
208 | 208 |
struct element* hashtab_find(const struct hashtable *s,const unsigned char* key,const size_t len) |
209 | 209 |
{ |
210 | 210 |
struct element* element; |
211 |
- size_t tries = 0; |
|
211 |
+ size_t tries = 1; |
|
212 | 212 |
size_t idx; |
213 | 213 |
|
214 | 214 |
if(!s) |
... | ... |
@@ -248,8 +249,7 @@ static int hashtab_grow(struct hashtable *s) |
248 | 248 |
for(i=0; i < s->capacity;i++) { |
249 | 249 |
if(s->htable[i].key && s->htable[i].key != DELETED_KEY) { |
250 | 250 |
struct element* element; |
251 |
- size_t tries = 0; |
|
252 |
- |
|
251 |
+ size_t tries = 1; |
|
253 | 252 |
|
254 | 253 |
PROFILE_CALC_HASH(s); |
255 | 254 |
idx = hash(s->htable[i].key, strlen((const char*)s->htable[i].key), new_capacity); |
... | ... |
@@ -276,7 +276,7 @@ static int hashtab_grow(struct hashtable *s) |
276 | 276 |
s->used = used; |
277 | 277 |
s->capacity = new_capacity; |
278 | 278 |
s->maxfill = new_capacity*8/10; |
279 |
- cli_dbgmsg("Table %x size after grow:%ld\n",s,s->capacity); |
|
279 |
+ cli_dbgmsg("Table %p size after grow:%ld\n",(void*)s,s->capacity); |
|
280 | 280 |
PROFILE_GROW_DONE(s); |
281 | 281 |
return CL_SUCCESS; |
282 | 282 |
} |
... | ... |
@@ -286,7 +286,7 @@ int hashtab_insert(struct hashtable *s,const unsigned char* key,const size_t len |
286 | 286 |
{ |
287 | 287 |
struct element* element; |
288 | 288 |
struct element* deleted_element = NULL; |
289 |
- size_t tries = 0; |
|
289 |
+ size_t tries = 1; |
|
290 | 290 |
size_t idx; |
291 | 291 |
if(!s) |
292 | 292 |
return CL_ENULLARG; |
... | ... |
@@ -315,7 +315,7 @@ int hashtab_insert(struct hashtable *s,const unsigned char* key,const size_t len |
315 | 315 |
element->data = data; |
316 | 316 |
s->used++; |
317 | 317 |
if(s->used > s->maxfill) { |
318 |
- cli_dbgmsg("hashtab.c:Growing hashtable %p, because it has exceeded maxfill, old size:%ld\n",s,s->capacity); |
|
318 |
+ cli_dbgmsg("hashtab.c:Growing hashtable %p, because it has exceeded maxfill, old size:%ld\n",(void*)s,s->capacity); |
|
319 | 319 |
hashtab_grow(s); |
320 | 320 |
} |
321 | 321 |
return 0; |
... | ... |
@@ -335,7 +335,7 @@ int hashtab_insert(struct hashtable *s,const unsigned char* key,const size_t len |
335 | 335 |
} while (tries <= s->capacity); |
336 | 336 |
/* no free place found*/ |
337 | 337 |
PROFILE_HASH_EXHAUSTED(s); |
338 |
- cli_dbgmsg("hashtab.c: Growing hashtable %x, because its full, old size:%ld.\n",s,s->capacity); |
|
338 |
+ cli_dbgmsg("hashtab.c: Growing hashtable %p, because its full, old size:%ld.\n",(void*)s,s->capacity); |
|
339 | 339 |
} while( hashtab_grow(s) >= 0 ); |
340 | 340 |
cli_warnmsg("hashtab.c: Unable to grow hashtable\n"); |
341 | 341 |
return CL_EMEM; |
... | ... |
@@ -401,8 +401,6 @@ int hashtab_generate_c(const struct hashtable *s,const char* name) |
401 | 401 |
return 0; |
402 | 402 |
} |
403 | 403 |
|
404 |
- |
|
405 |
- |
|
406 | 404 |
int hashtab_load(FILE* in, struct hashtable *s) |
407 | 405 |
{ |
408 | 406 |
char line[1024]; |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
/* |
2 | 2 |
* Phishing module: domain list implementation. |
3 | 3 |
* |
4 | ||
4 | ||
5 | 5 |
* |
6 | 6 |
* This program is free software; you can redistribute it and/or modify |
7 | 7 |
* it under the terms of the GNU General Public License as published by |
... | ... |
@@ -37,48 +37,26 @@ |
37 | 37 |
#endif |
38 | 38 |
|
39 | 39 |
#include <stdio.h> |
40 |
-#include <stdlib.h> |
|
41 |
-#include <errno.h> |
|
42 | 40 |
#include <string.h> |
43 |
-#ifdef HAVE_STRINGS_H |
|
44 |
-#include <strings.h> |
|
45 |
-#endif |
|
46 | 41 |
#include <ctype.h> |
47 | 42 |
|
48 |
-#include <limits.h> |
|
49 | 43 |
#include "clamav.h" |
50 |
-#include <sys/types.h> |
|
51 |
- |
|
52 |
-#ifdef HAVE_REGEX_H |
|
53 |
-/*#define USE_PCRE*/ |
|
54 |
-#include <regex.h> |
|
55 |
-#endif |
|
56 |
- |
|
57 |
-#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2) |
|
58 |
-#include <stddef.h> |
|
59 |
-#endif |
|
60 |
- |
|
61 | 44 |
#include "others.h" |
62 |
-#include "defaults.h" |
|
63 |
-#include "str.h" |
|
64 |
-#include "filetypes.h" |
|
65 |
-#include "mbox.h" |
|
66 | 45 |
#include "phish_domaincheck_db.h" |
67 | 46 |
#include "regex_list.h" |
68 |
-#include "matcher-ac.h" |
|
69 | 47 |
|
70 | 48 |
int domainlist_match(const struct cl_engine* engine,const char* real_url,const char* display_url,int hostOnly,unsigned short* flags) |
71 | 49 |
{ |
72 | 50 |
const char* info; |
73 | 51 |
int rc = engine->domainlist_matcher ? regex_list_match(engine->domainlist_matcher,real_url,display_url,hostOnly,&info,0) : 0; |
74 |
- if(rc && info && info[0]) {/*match successfull, and has custom flags*/ |
|
52 |
+ if(rc && info && info[0]) {/*match successful, and has custom flags*/ |
|
75 | 53 |
if(strlen(info)==3 && isxdigit(info[0]) && isxdigit(info[1]) && isxdigit(info[2])) { |
76 | 54 |
unsigned short notwantedflags=0; |
77 | 55 |
sscanf(info,"%hx",¬wantedflags); |
78 | 56 |
*flags &= ~notwantedflags;/* filter unwanted phishcheck flags */ |
79 | 57 |
} |
80 | 58 |
else { |
81 |
- cli_warnmsg("Phishcheck:Unknown flag format in domainlist, 3 hex digits expected"); |
|
59 |
+ cli_warnmsg("Phishcheck:Unknown flag format in domain-list, 3 hex digits expected"); |
|
82 | 60 |
} |
83 | 61 |
} |
84 | 62 |
return rc; |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
/* |
2 | 2 |
* Phishing module: domain list implementation. |
3 | 3 |
* |
4 | ||
4 | ||
5 | 5 |
* |
6 | 6 |
* This program is free software; you can redistribute it and/or modify |
7 | 7 |
* it under the terms of the GNU General Public License as published by |
... | ... |
@@ -24,6 +24,7 @@ |
24 | 24 |
|
25 | 25 |
#ifndef _PHISH_DOMAINCHECK_DB_H |
26 | 26 |
#define _PHISH_DOMAINCHECK_DB_H |
27 |
+#include "clamav.h" |
|
27 | 28 |
|
28 | 29 |
int init_domainlist(struct cl_engine* engine); |
29 | 30 |
void domainlist_done(struct cl_engine* engine); |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
/* |
2 | 2 |
* Phishing module: whitelist implementation. |
3 | 3 |
* |
4 | ||
4 | ||
5 | 5 |
* |
6 | 6 |
* This program is free software; you can redistribute it and/or modify |
7 | 7 |
* it under the terms of the GNU General Public License as published by |
... | ... |
@@ -36,41 +36,20 @@ |
36 | 36 |
#endif |
37 | 37 |
#endif |
38 | 38 |
|
39 |
+ |
|
39 | 40 |
#include <stdio.h> |
40 |
-#include <stdlib.h> |
|
41 |
-#include <errno.h> |
|
42 | 41 |
#include <string.h> |
43 |
-#ifdef HAVE_STRINGS_H |
|
44 |
-#include <strings.h> |
|
45 |
-#endif |
|
46 | 42 |
#include <ctype.h> |
47 | 43 |
|
48 |
-#include <limits.h> |
|
49 | 44 |
#include "clamav.h" |
50 |
-#include <sys/types.h> |
|
51 |
- |
|
52 |
-#ifdef HAVE_REGEX_H |
|
53 |
-/*#define USE_PCRE*/ |
|
54 |
-#include <regex.h> |
|
55 |
-#endif |
|
56 |
- |
|
57 |
-#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2) |
|
58 |
-#include <stddef.h> |
|
59 |
-#endif |
|
60 |
- |
|
61 | 45 |
#include "others.h" |
62 |
-#include "defaults.h" |
|
63 |
-#include "str.h" |
|
64 |
-#include "filetypes.h" |
|
65 |
-#include "mbox.h" |
|
66 | 46 |
#include "phish_whitelist.h" |
67 | 47 |
#include "regex_list.h" |
68 |
-#include "matcher-ac.h" |
|
69 | 48 |
|
70 | 49 |
int whitelist_match(const struct cl_engine* engine,const char* real_url,const char* display_url,int hostOnly) |
71 | 50 |
{ |
72 | 51 |
const char* info;/*unused*/ |
73 |
- cli_dbgmsg("Phishing: looking up in whitelist:%s:%s; hostonly:%d\n",real_url,display_url,hostOnly); |
|
52 |
+ cli_dbgmsg("Phishing: looking up in whitelist:%s:%s; host-only:%d\n",real_url,display_url,hostOnly); |
|
74 | 53 |
return engine->whitelist_matcher ? regex_list_match(engine->whitelist_matcher,real_url,display_url,hostOnly,&info,1) : 0; |
75 | 54 |
} |
76 | 55 |
|
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
/* |
2 | 2 |
* Phishing module: whitelist implementation. |
3 | 3 |
* |
4 | ||
4 | ||
5 | 5 |
* |
6 | 6 |
* This program is free software; you can redistribute it and/or modify |
7 | 7 |
* it under the terms of the GNU General Public License as published by |
... | ... |
@@ -25,6 +25,8 @@ |
25 | 25 |
#ifndef _PHISH_WHITELIST_H |
26 | 26 |
#define _PHISH_WHITELIST_H |
27 | 27 |
|
28 |
+#include "clamav.h" |
|
29 |
+ |
|
28 | 30 |
int init_whitelist(struct cl_engine* engine); |
29 | 31 |
void whitelist_done(struct cl_engine* engine); |
30 | 32 |
void whitelist_cleanup(const struct cl_engine* engine); |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
/* |
2 | 2 |
* Detect phishing, based on URL spoofing detection. |
3 | 3 |
* |
4 | ||
4 | ||
5 | 5 |
* |
6 | 6 |
* This program is free software; you can redistribute it and/or modify |
7 | 7 |
* it under the terms of the GNU General Public License as published by |
... | ... |
@@ -37,50 +37,26 @@ |
37 | 37 |
#endif |
38 | 38 |
|
39 | 39 |
#include <stdio.h> |
40 |
-#include <stdlib.h> |
|
41 |
-#include <errno.h> |
|
42 | 40 |
#include <string.h> |
43 |
-#ifdef HAVE_STRINGS_H |
|
44 |
-#include <strings.h> |
|
45 |
-#endif |
|
46 | 41 |
#include <ctype.h> |
47 |
-#include <limits.h> |
|
48 |
-#include "clamav.h" |
|
49 |
-#ifndef C_WINDOWS |
|
50 |
-#include <netdb.h> |
|
51 |
-#include <netinet/in.h> |
|
52 |
-#endif |
|
53 |
- |
|
54 |
-#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2) |
|
55 |
-#include <stddef.h> |
|
56 |
-#endif |
|
57 |
- |
|
58 |
-#include <sys/types.h> |
|
59 |
-#ifndef C_WINDOWS |
|
60 |
-#include <sys/socket.h> |
|
61 |
-#endif |
|
62 |
-#ifdef HAVE_REGEX_H |
|
63 |
-#include <regex.h> |
|
64 |
-#endif |
|
65 | 42 |
|
43 |
+#include "clamav.h" |
|
66 | 44 |
#include "others.h" |
67 |
-#include "defaults.h" |
|
68 |
-#include "str.h" |
|
69 |
-#include "filetypes.h" |
|
70 | 45 |
#include "mbox.h" |
46 |
+#include "message.h" |
|
71 | 47 |
#include "htmlnorm.h" |
72 | 48 |
#include "phishcheck.h" |
73 |
-#include "phish_whitelist.h" |
|
74 | 49 |
#include "phish_domaincheck_db.h" |
75 |
-#include "regex_list.h" |
|
50 |
+#include "phish_whitelist.h" |
|
76 | 51 |
#include "iana_tld.h" |
77 | 52 |
|
53 |
+ |
|
78 | 54 |
#define DOMAIN_REAL 1 |
79 | 55 |
#define DOMAIN_DISPLAY 0 |
80 | 56 |
|
81 | 57 |
#define PHISHY_USERNAME_IN_URL 1 |
82 | 58 |
#define PHISHY_NUMERIC_IP 2 |
83 |
-#define REAL_IS_MAILTO 4 |
|
59 |
+#define REAL_IS_MAILTO 4 |
|
84 | 60 |
/* this is just a flag, so that the displayed url will be parsed as mailto too, for example |
85 | 61 |
* <a href='mailto:somebody@yahoo.com'>to:somebody@yahoo.com</a>*/ |
86 | 62 |
#define DOMAIN_LISTED 8 |
... | ... |
@@ -91,6 +67,8 @@ |
91 | 91 |
* Phishing design documentation, |
92 | 92 |
(initially written at http://wiki.clamav.net/index.php/phishing_design as discussed with aCaB) |
93 | 93 |
|
94 |
+TODO:update this doc |
|
95 |
+ |
|
94 | 96 |
*Warning*: if flag *--phish-scan-alldomains* (or equivalent clamd/clamav-milter config option) isn't given, then phishing scanning is done only for domains listed in daily.pdb. |
95 | 97 |
If your daily.pdb is empty, then by default NO PHISHING is DONE, UNLESS you give the *--phish-scan-alldomains* |
96 | 98 |
This is just a side-effect, daily.pdb is empty, because it isn't yet officialy in daily.cvd. |
... | ... |
@@ -172,8 +150,6 @@ static char empty_string[]=""; |
172 | 172 |
#define DWORD_CLOAK "[0-9]{8,}" |
173 | 173 |
|
174 | 174 |
static const char cloaked_host_regex[] = "^(("CLOAK_REGEX_HEXURL")|("OCTAL_CLOAK")|("DWORD_CLOAK"))$"; |
175 |
- |
|
176 |
- |
|
177 | 175 |
static const char tld_regex[] = "^"iana_tld"$"; |
178 | 176 |
static const char cctld_regex[] = "^"iana_cctld"$"; |
179 | 177 |
static const char dotnet[] = ".net"; |
... | ... |
@@ -184,6 +160,9 @@ static const char gt[]=">"; |
184 | 184 |
static const char cid[] = "cid:"; |
185 | 185 |
static const char src_text[] = "src"; |
186 | 186 |
static const char href_text[] = "href"; |
187 |
+static const char mailto[] = "mailto:"; |
|
188 |
+static const char https[]="https://"; |
|
189 |
+ |
|
187 | 190 |
static const size_t href_text_len = sizeof(href_text); |
188 | 191 |
static const size_t src_text_len = sizeof(src_text); |
189 | 192 |
static const size_t cid_len = sizeof(cid)-1; |
... | ... |
@@ -192,8 +171,9 @@ static const size_t adonet_len = sizeof(adonet)-1; |
192 | 192 |
static const size_t aspnet_len = sizeof(aspnet)-1; |
193 | 193 |
static const size_t lt_len = sizeof(lt)-1; |
194 | 194 |
static const size_t gt_len = sizeof(gt)-1; |
195 |
+static const size_t mailto_len = sizeof(mailto)-1; |
|
196 |
+static const size_t https_len = sizeof(https)-1; |
|
195 | 197 |
|
196 |
-/*static const char* url_regex="^ *([[:alnum:]%_-]+:(//)?)?([[:alnum:]%_-]@)*[[:alnum:]%_-]+\\.([[:alnum:]%_-]+\\.)*[[:alnum:]_%-]+(/[[:alnum:];:@$=?&/.,%_-]+) *$";*/ |
|
197 | 198 |
/* for urls, including mailto: urls, and (broken) http:www... style urls*/ |
198 | 199 |
/* refer to: http://www.w3.org/Addressing/URL/5_URI_BNF.html |
199 | 200 |
* Modifications: don't allow empty domains/subdomains, such as www..com <- that is no url |
... | ... |
@@ -272,10 +252,10 @@ static const short int hextable[256] = { |
272 | 272 |
}; |
273 | 273 |
|
274 | 274 |
/* Prototypes*/ |
275 |
-static inline void string_init_c(struct string* dest,char* data); |
|
276 |
-static void string_assign_null(struct string* dest); |
|
277 |
-static char *rfind(char *start, char c, size_t len); |
|
278 |
-static inline char hex2int(const unsigned char* src); |
|
275 |
+static void string_init_c(struct string* dest,char* data); |
|
276 |
+static void string_assign_null(struct string* dest); |
|
277 |
+static char *rfind(char *start, char c, size_t len); |
|
278 |
+static char hex2int(const unsigned char* src); |
|
279 | 279 |
static int isTLD(const struct phishcheck* pchk,const char* str,int len); |
280 | 280 |
static enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls); |
281 | 281 |
static const char* phishing_ret_toString(enum phish_status rc); |
... | ... |
@@ -311,7 +291,7 @@ static void string_free(struct string* str) |
311 | 311 |
} |
312 | 312 |
} |
313 | 313 |
|
314 |
-/* always use the string_assign when assigning to a string, this makes sure the old one's refcount is decremented*/ |
|
314 |
+/* always use the string_assign when assigning to a string, this makes sure the old one's reference count is incremented*/ |
|
315 | 315 |
static void string_assign(struct string* dest,struct string* src) |
316 | 316 |
{ |
317 | 317 |
string_free(dest); |
... | ... |
@@ -325,7 +305,7 @@ static void string_assign(struct string* dest,struct string* src) |
325 | 325 |
/* it doesn't free old string, use only for initialization |
326 | 326 |
* Doesn't allow NULL pointers, they are replaced by pointer to empty string |
327 | 327 |
* */ |
328 |
-static inline void string_init_c(struct string* dest,char* data) |
|
328 |
+static void string_init_c(struct string* dest,char* data) |
|
329 | 329 |
{ |
330 | 330 |
dest->refcount = 1; |
331 | 331 |
dest->data = data ? data : empty_string; |
... | ... |
@@ -348,7 +328,7 @@ static int string_assign_dup(struct string* dest,const char* start,const char* e |
348 | 348 |
return CL_SUCCESS; |
349 | 349 |
} |
350 | 350 |
|
351 |
-static inline void string_assign_null(struct string* dest) |
|
351 |
+static void string_assign_null(struct string* dest) |
|
352 | 352 |
{ |
353 | 353 |
string_free(dest); |
354 | 354 |
dest->data=empty_string; |
... | ... |
@@ -375,35 +355,31 @@ static void free_if_needed(struct url_check* url) |
375 | 375 |
static int build_regex(regex_t* preg,const char* regex,int nosub) |
376 | 376 |
{ |
377 | 377 |
int rc; |
378 |
- cli_dbgmsg("Compiling regex:%s\n",regex); |
|
378 |
+ cli_dbgmsg("Phishcheck: Compiling regex:%s\n",regex); |
|
379 | 379 |
rc = regcomp(preg,regex,REG_EXTENDED|REG_ICASE|(nosub ? REG_NOSUB :0)); |
380 | 380 |
if(rc) { |
381 | 381 |
|
382 | 382 |
#ifdef C_WINDOWS |
383 |
- cli_errmsg("Error in compiling regex, disabling phishing checks\n"); |
|
383 |
+ cli_errmsg("Phishcheck: Error in compiling regex, disabling phishing checks\n"); |
|
384 | 384 |
#else |
385 | 385 |
size_t buflen = regerror(rc,preg,NULL,0); |
386 | 386 |
char *errbuf = cli_malloc(buflen); |
387 | 387 |
|
388 | 388 |
if(errbuf) { |
389 | 389 |
regerror(rc,preg,errbuf,buflen); |
390 |
- cli_errmsg("Error in compiling regex:%s\nDisabling phishing checks\n",errbuf); |
|
390 |
+ cli_errmsg("Phishcheck: Error in compiling regex:%s\nDisabling phishing checks\n",errbuf); |
|
391 | 391 |
free(errbuf); |
392 | 392 |
} else |
393 |
- cli_errmsg("Error in compiling regex, disabling phishing checks. Additionaly an Out-of-memory error was encountered while generating a detailed error message\n"); |
|
393 |
+ cli_errmsg("Phishcheck: Error in compiling regex, disabling phishing checks. Additionally an Out-of-memory error was encountered while generating a detailed error message\n"); |
|
394 | 394 |
#endif |
395 | 395 |
return 1; |
396 | 396 |
} |
397 | 397 |
return CL_SUCCESS; |
398 | 398 |
} |
399 | 399 |
|
400 |
-/*static regex_t* host_preg = NULL; |
|
401 |
-static const char* host_regex="cid:.+|mailto:(.+)|([[:alpha:]]+://)?(([^:/?]+@)+([^:/?]+)([:/?].+)?|([^@:/?]+)([:/?].+)?)"; <- this is slower than the function below |
|
402 |
-*/ |
|
403 | 400 |
/* allocates memory */ |
404 | 401 |
static int get_host(const struct phishcheck* s,struct string* dest,const char* URL,int isReal,int* phishy) |
405 | 402 |
{ |
406 |
- const char mailto[] = "mailto:"; |
|
407 | 403 |
int rc,ismailto = 0; |
408 | 404 |
const char* start; |
409 | 405 |
const char* end=NULL; |
... | ... |
@@ -413,8 +389,8 @@ static int get_host(const struct phishcheck* s,struct string* dest,const char* U |
413 | 413 |
} |
414 | 414 |
start = strstr(URL,"://"); |
415 | 415 |
if(!start) { |
416 |
- if(!strncmp(URL,mailto,sizeof(mailto)-1)) { |
|
417 |
- start = URL + sizeof(mailto)-1; |
|
416 |
+ if(!strncmp(URL,mailto,mailto_len)) { |
|
417 |
+ start = URL + mailto_len; |
|
418 | 418 |
ismailto = 1; |
419 | 419 |
} |
420 | 420 |
else if (!isReal && *phishy&REAL_IS_MAILTO) { |
... | ... |
@@ -426,14 +402,9 @@ static int get_host(const struct phishcheck* s,struct string* dest,const char* U |
426 | 426 |
ismailto = 1; |
427 | 427 |
} |
428 | 428 |
else { |
429 |
-/* if(!strncmp(URL,"cid:",4)) {handled in phishcheck |
|
430 |
- string_assign_null(dest); |
|
431 |
- return;* cid: image, nothing to verify |
|
432 |
- } |
|
433 |
-*/ |
|
434 | 429 |
start=URL;/*URL without protocol*/ |
435 | 430 |
if(isReal) |
436 |
- cli_dbgmsg("PH:Real URL without protocol:%s\n",URL); |
|
431 |
+ cli_dbgmsg("Phishcheck: Real URL without protocol:%s\n",URL); |
|
437 | 432 |
else ismailto=2;/*no-protocol, might be mailto, @ is no problem*/ |
438 | 433 |
} |
439 | 434 |
} |
... | ... |
@@ -441,37 +412,32 @@ static int get_host(const struct phishcheck* s,struct string* dest,const char* U |
441 | 441 |
start += 3; /* :// */ |
442 | 442 |
|
443 | 443 |
if(!ismailto || !isReal) { |
444 |
- const char *realhost; |
|
444 |
+ const char *realhost,*tld; |
|
445 | 445 |
|
446 | 446 |
do { |
447 | 447 |
end = start + strcspn(start,":/?"); |
448 | 448 |
realhost = strchr(start,'@'); |
449 | 449 |
|
450 |
- if(realhost == NULL) |
|
450 |
+ if(realhost == NULL || (start!=end && realhost>end)) { |
|
451 |
+ /*don't check beyond end of hostname*/ |
|
451 | 452 |
break; |
452 |
- |
|
453 |
- if(start!=end && realhost>end) |
|
454 |
- /*don't check beyond end of hostname*/ |
|
455 |
- realhost = NULL; |
|
456 |
- |
|
457 |
- if(realhost) { |
|
458 |
- const char* tld = strrchr(realhost,'.'); |
|
459 |
- rc = tld ? isTLD(s,tld,tld-realhost-1) : 0; |
|
460 |
- if(rc < 0) |
|
461 |
- return rc; |
|
462 |
- if(rc) |
|
463 |
- *phishy |= PHISHY_USERNAME_IN_URL;/* if the url contains a username that is there just to fool people, |
|
464 |
- like http://www.ebay.com@somevilplace.someevildomain.com/ */ |
|
465 |
- start=realhost+1;/*skip the username*/ |
|
466 | 453 |
} |
454 |
+ |
|
455 |
+ tld = strrchr(realhost,'.'); |
|
456 |
+ rc = tld ? isTLD(s,tld,tld-realhost-1) : 0; |
|
457 |
+ if(rc < 0) |
|
458 |
+ return rc; |
|
459 |
+ if(rc) |
|
460 |
+ *phishy |= PHISHY_USERNAME_IN_URL;/* if the url contains a username that is there just to fool people, |
|
461 |
+ like http://www.ebay.com@somevilplace.someevildomain.com/ */ |
|
462 |
+ start = realhost+1;/*skip the username*/ |
|
467 | 463 |
} while(realhost);/*skip over multiple @ characters, text following last @ character is the real host*/ |
468 | 464 |
} |
469 |
- else |
|
470 |
- if (ismailto && isReal) |
|
465 |
+ else if (ismailto && isReal) |
|
471 | 466 |
*phishy |= REAL_IS_MAILTO; |
472 | 467 |
|
473 | 468 |
if(!end) { |
474 |
- end = start+strcspn(start,":/?");/*especially important for mailto:somebody@yahoo.com?subject=...*/ |
|
469 |
+ end = start + strcspn(start,":/?");/*especially important for mailto:somebody@yahoo.com?subject=...*/ |
|
475 | 470 |
if(!end) |
476 | 471 |
end = start + strlen(start); |
477 | 472 |
} |
... | ... |
@@ -525,15 +491,15 @@ static void get_domain(const struct phishcheck* pchk,struct string* dest,struct |
525 | 525 |
char* domain; |
526 | 526 |
char* tld = strrchr(host->data,'.'); |
527 | 527 |
if(!tld) { |
528 |
- cli_dbgmsg("PH:What? A host without a tld? (%s)\n",host->data); |
|
528 |
+ cli_dbgmsg("Phishcheck: Encountered a host without a tld? (%s)\n",host->data); |
|
529 | 529 |
string_assign(dest,host); |
530 | 530 |
return; |
531 | 531 |
} |
532 | 532 |
if(isCountryCode(pchk,tld+1)) { |
533 |
- const char* countrycode=tld+1; |
|
533 |
+ const char* countrycode = tld+1; |
|
534 | 534 |
tld = rfind(host->data,'.',tld-host->data-1); |
535 | 535 |
if(!tld) { |
536 |
- cli_dbgmsg("PH:Weird, a name with only 2 levels (%s)\n", |
|
536 |
+ cli_dbgmsg("Phishcheck: Weird, a name with only 2 levels (%s)\n", |
|
537 | 537 |
host->data); |
538 | 538 |
string_assign(dest,host); |
539 | 539 |
return; |
... | ... |
@@ -552,43 +518,6 @@ static void get_domain(const struct phishcheck* pchk,struct string* dest,struct |
552 | 552 |
string_assign_ref(dest,host,domain+1); |
553 | 553 |
} |
554 | 554 |
|
555 |
- |
|
556 |
-/* |
|
557 |
-int ip_reverse(struct url_check* urls,int isReal) |
|
558 |
-{ |
|
559 |
- const char* host = isReal ? urls->realLink.data : urls->displayLink.data; |
|
560 |
- struct hostent *he = gethostbyname (host); |
|
561 |
- if (he) |
|
562 |
- { |
|
563 |
- char *addr = 0; |
|
564 |
- switch (he->h_addrtype) |
|
565 |
- { |
|
566 |
- case AF_INET: |
|
567 |
- addr = inet_ntoa (*(struct in_addr *) he->h_addr); |
|
568 |
- break; |
|
569 |
- } |
|
570 |
- if (addr && strcmp (he->h_name, addr) == 0) |
|
571 |
- { |
|
572 |
- char *h_addr_copy = strdup (he->h_addr); |
|
573 |
- if (h_addr_copy == NULL) |
|
574 |
- he = NULL; |
|
575 |
- else |
|
576 |
- { |
|
577 |
- he = gethostbyaddr (h_addr_copy, he->h_length, he->h_addrtype); |
|
578 |
- free (h_addr_copy); |
|
579 |
- } |
|
580 |
- } |
|
581 |
- if (he) |
|
582 |
- string_assign_dup(isReal ? &urls->realLink : &urls->displayLink,he->h_name,he->h_name+strlen(he->h_name)); |
|
583 |
- } |
|
584 |
- return 0; |
|
585 |
-} |
|
586 |
-* frees its argument, and allocates memory* |
|
587 |
-void reverse_lookup(struct url_check* url,int isReal) |
|
588 |
-{ |
|
589 |
- ip_reverse(url,isReal); |
|
590 |
-} |
|
591 |
-*/ |
|
592 | 555 |
static int isNumeric(const char* host) |
593 | 556 |
{ |
594 | 557 |
int len = strlen(host); |
... | ... |
@@ -606,8 +535,7 @@ static int isNumeric(const char* host) |
606 | 606 |
|
607 | 607 |
static int isSSL(const char* URL) |
608 | 608 |
{ |
609 |
- const char https[]="https://"; |
|
610 |
- return URL ? !strncmp(https,URL,sizeof(https)-1) : 0; |
|
609 |
+ return URL ? !strncmp(https,URL,https_len) : 0; |
|
611 | 610 |
} |
612 | 611 |
|
613 | 612 |
/* deletes @what from the string @begin. |
... | ... |
@@ -697,15 +625,15 @@ str_strip(char **begin, const char **end, const char *what, size_t what_len) |
697 | 697 |
} |
698 | 698 |
|
699 | 699 |
|
700 |
-/* replace every occurence of @c in @str with @r*/ |
|
701 |
-static inline void str_replace(char* str,const char* end,char c,char r) |
|
700 |
+/* replace every occurrence of @c in @str with @r*/ |
|
701 |
+static void str_replace(char* str,const char* end,char c,char r) |
|
702 | 702 |
{ |
703 | 703 |
for(;str<end;str++) { |
704 | 704 |
if(*str==c) |
705 | 705 |
*str=r; |
706 | 706 |
} |
707 | 707 |
} |
708 |
-static inline void str_make_lowercase(char* str,size_t len) |
|
708 |
+static void str_make_lowercase(char* str,size_t len) |
|
709 | 709 |
{ |
710 | 710 |
for(;len;str++,len--) { |
711 | 711 |
*str = tolower(*str); |
... | ... |
@@ -713,7 +641,7 @@ static inline void str_make_lowercase(char* str,size_t len) |
713 | 713 |
} |
714 | 714 |
|
715 | 715 |
#define fix32(x) ((x)<32 ? 32 : (x)) |
716 |
-static inline void clear_msb(char* begin) |
|
716 |
+static void clear_msb(char* begin) |
|
717 | 717 |
{ |
718 | 718 |
for(;*begin;begin++) |
719 | 719 |
*begin = fix32((*begin)&0x7f); |
... | ... |
@@ -736,7 +664,7 @@ static inline void clear_msb(char* begin) |
736 | 736 |
* otherwise strip space |
737 | 737 |
* |
738 | 738 |
*/ |
739 |
-static inline void |
|
739 |
+static void |
|
740 | 740 |
str_fixup_spaces(char **begin, const char **end) |
741 | 741 |
{ |
742 | 742 |
char *space = strchr(*begin, ' '); |
... | ... |
@@ -855,17 +783,12 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs) |
855 | 855 |
urls.always_check_flags |= CHECK_CLOAKING; |
856 | 856 |
} |
857 | 857 |
string_init_c(&urls.realLink,(char*)hrefs->value[i]); |
858 |
-/* if(!hrefs->contents[i]->isClosed) { |
|
859 |
- blobAddData(hrefs->contents[i],empty_string,1); |
|
860 |
- blobClose(hrefs->contents[i]); |
|
861 |
- }*/ |
|
862 | 858 |
string_init_c(&urls.displayLink,(char*)blobGetData(hrefs->contents[i])); |
863 | 859 |
|
864 | 860 |
if (urls.displayLink.data[blobGetDataSize(hrefs->contents[i])-1]) { |
865 | 861 |
cli_warnmsg("urls.displayLink.data[...]"); |
866 | 862 |
return CL_CLEAN; |
867 | 863 |
} |
868 |
-/* massert(strlen(urls.displayLink.data) < blobGetDataSize(hrefs->contents[i]));*/ |
|
869 | 864 |
urls.realLink.refcount=-1; |
870 | 865 |
urls.displayLink.refcount=-1;/*don't free these, caller will free*/ |
871 | 866 |
if(strcmp((char*)hrefs->tag[i],"href")) { |
... | ... |
@@ -879,7 +802,7 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs) |
879 | 879 |
if(pchk->is_disabled) |
880 | 880 |
return CL_CLEAN; |
881 | 881 |
free_if_needed(&urls); |
882 |
- cli_dbgmsg("Phishing scan result:%s\n",phishing_ret_toString(rc)); |
|
882 |
+ cli_dbgmsg("Phishcheck: Phishing scan result:%s\n",phishing_ret_toString(rc)); |
|
883 | 883 |
switch(rc)/*TODO: support flags from ctx->options,*/ |
884 | 884 |
{ |
885 | 885 |
case CL_PHISH_CLEAN: |
... | ... |
@@ -923,8 +846,8 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs) |
923 | 923 |
} |
924 | 924 |
else |
925 | 925 |
if(strcmp((char*)hrefs->tag[i],"href")) |
926 |
- cli_dbgmsg("PH:href with no contents?\n"); |
|
927 |
- return CL_CLEAN;/*texturlfound?CL_VIRUS:0;*/ |
|
926 |
+ cli_dbgmsg("Phishcheck: href with no contents?\n"); |
|
927 |
+ return CL_CLEAN; |
|
928 | 928 |
} |
929 | 929 |
|
930 | 930 |
static char* str_compose(const char* a,const char* b,const char* c) |
... | ... |
@@ -943,7 +866,7 @@ static char* str_compose(const char* a,const char* b,const char* c) |
943 | 943 |
return concated; |
944 | 944 |
} |
945 | 945 |
|
946 |
-static inline char hex2int(const unsigned char* src) |
|
946 |
+static char hex2int(const unsigned char* src) |
|
947 | 947 |
{ |
948 | 948 |
return (src[0] == '0' && src[1] == '0') ? |
949 | 949 |
0x1 :/* don't convert %00 to \0, use 0x1 |
... | ... |
@@ -1088,7 +1011,7 @@ static int url_get_host(const struct phishcheck* pchk, struct url_check* url,str |
1088 | 1088 |
return CL_PHISH_TEXTURL; |
1089 | 1089 |
} |
1090 | 1090 |
if(url->flags&CHECK_CLOAKING && !regexec(&pchk->preg_hexurl,host->data,0,NULL,0)) { |
1091 |
- /* use a regex here, so that we don't accidentally block 0xacab.net style hosts */ |
|
1091 |
+ /* uses a regex here, so that we don't accidentally block 0xacab.net style hosts */ |
|
1092 | 1092 |
string_free(host); |
1093 | 1093 |
return CL_PHISH_HEX_URL; |
1094 | 1094 |
} |
... | ... |
@@ -1096,8 +1019,6 @@ static int url_get_host(const struct phishcheck* pchk, struct url_check* url,str |
1096 | 1096 |
return CL_PHISH_CLEAN;/* link without domain, such as: href="/isapi.dll?... */ |
1097 | 1097 |
if(isNumeric(host->data)) { |
1098 | 1098 |
*phishy |= PHISHY_NUMERIC_IP; |
1099 |
-/* if(url->flags&DO_REVERSE_LOOKUP) |
|
1100 |
- reverse_lookup(host_url,isReal);*/ |
|
1101 | 1099 |
} |
1102 | 1100 |
return CL_PHISH_NODECISION; |
1103 | 1101 |
} |
... | ... |
@@ -1125,7 +1046,6 @@ static int isEncoded(const char* url) |
1125 | 1125 |
size_t cnt=0; |
1126 | 1126 |
do{ |
1127 | 1127 |
cnt++; |
1128 |
- /*last=start;*/ |
|
1129 | 1128 |
start=strstr(start,"&#"); |
1130 | 1129 |
if(start) |
1131 | 1130 |
start=strstr(start,";"); |
... | ... |
@@ -1138,7 +1058,36 @@ static int whitelist_check(const struct cl_engine* engine,struct url_check* urls |
1138 | 1138 |
return whitelist_match(engine,urls->realLink.data,urls->displayLink.data,hostOnly); |
1139 | 1139 |
} |
1140 | 1140 |
|
1141 |
- |
|
1141 |
+static int isPhishing(enum phish_status rc) |
|
1142 |
+{ |
|
1143 |
+ switch(rc) { |
|
1144 |
+ case CL_PHISH_CLEAN: |
|
1145 |
+ case CL_PHISH_CLEANUP_OK: |
|
1146 |
+ case CL_PHISH_WHITELISTED: |
|
1147 |
+ case CL_PHISH_HOST_WHITELISTED: |
|
1148 |
+ case CL_PHISH_HOST_OK: |
|
1149 |
+ case CL_PHISH_DOMAIN_OK: |
|
1150 |
+ case CL_PHISH_REDIR_OK: |
|
1151 |
+ case CL_PHISH_HOST_REDIR_OK: |
|
1152 |
+ case CL_PHISH_DOMAIN_REDIR_OK: |
|
1153 |
+ case CL_PHISH_HOST_REVERSE_OK: |
|
1154 |
+ case CL_PHISH_DOMAIN_REVERSE_OK: |
|
1155 |
+ case CL_PHISH_MAILTO_OK: |
|
1156 |
+ case CL_PHISH_TEXTURL: |
|
1157 |
+ case CL_PHISH_HOST_NOT_LISTED: |
|
1158 |
+ case CL_PHISH_CLEAN_CID: |
|
1159 |
+ return 0; |
|
1160 |
+ case CL_PHISH_HEX_URL: |
|
1161 |
+ case CL_PHISH_CLOAKED_NULL: |
|
1162 |
+ case CL_PHISH_SSL_SPOOF: |
|
1163 |
+ case CL_PHISH_CLOAKED_UIU: |
|
1164 |
+ case CL_PHISH_NUMERIC_IP: |
|
1165 |
+ case CL_PHISH_NOMATCH: |
|
1166 |
+ return 1; |
|
1167 |
+ default: |
|
1168 |
+ return 1; |
|
1169 |
+ } |
|
1170 |
+} |
|
1142 | 1171 |
/* urls can't contain null pointer, caller must ensure this */ |
1143 | 1172 |
static enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls) |
1144 | 1173 |
{ |
... | ... |
@@ -1150,7 +1099,7 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url |
1150 | 1150 |
if(!urls->realLink.data) |
1151 | 1151 |
return CL_PHISH_CLEAN; |
1152 | 1152 |
|
1153 |
- cli_dbgmsg("PH:Checking url %s->%s\n", urls->realLink.data, |
|
1153 |
+ cli_dbgmsg("Phishcheck:Checking url %s->%s\n", urls->realLink.data, |
|
1154 | 1154 |
urls->displayLink.data); |
1155 | 1155 |
|
1156 | 1156 |
if(!strcmp(urls->realLink.data,urls->displayLink.data)) |
... | ... |
@@ -1206,7 +1155,7 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url |
1206 | 1206 |
|
1207 | 1207 |
if(urls->flags&CHECK_CLOAKING) { |
1208 | 1208 |
/*Checks if URL is cloaked. |
1209 |
- Should we check if it containts another http://, https://? |
|
1209 |
+ Should we check if it contains another http://, https://? |
|
1210 | 1210 |
No because we might get false positives from redirect services.*/ |
1211 | 1211 |
if(strchr(urls->realLink.data,0x1)) { |
1212 | 1212 |
free_if_needed(&host_url); |
... | ... |
@@ -1281,59 +1230,9 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url |
1281 | 1281 |
free_if_needed(&domain_url); |
1282 | 1282 |
} |
1283 | 1283 |
|
1284 |
- /*if(urls->flags&CHECK_REDIR) { |
|
1285 |
- //see where the realLink redirects, and compare that with the displayed Link |
|
1286 |
- const uchar* redirectedURL = getRedirectedURL(urls->realLink); |
|
1287 |
- if(urls->needsfree) |
|
1288 |
- free(urls->realLink); |
|
1289 |
- urls->realLink = redirectedURL; |
|
1290 |
- |
|
1291 |
- if(!strcmp(urls->realLink,urls->displayLink)) |
|
1292 |
- return CL_PHISH_REDIR_OK; |
|
1293 |
- |
|
1294 |
- if(urls->flags&HOST_SUFFICIENT) { |
|
1295 |
- if(rc = url_get_host(urls,&host_url,DOMAIN_REAL)) |
|
1296 |
- if(!strcmp(host_url.realLink,host_url.displayLink)) { |
|
1297 |
- free_if_needed(&host_url); |
|
1298 |
- return CL_PHISH_HOST_REDIR_OK; |
|
1299 |
- } |
|
1300 |
- if(urls->flags&DOMAIN_SUFFICIENT) { |
|
1301 |
- struct url_check domain_url; |
|
1302 |
- url_get_domain(&host_url,&domain_url); |
|
1303 |
- if(!strcmp(domain_url.realLink,domain_url.displayLink)) { |
|
1304 |
- free_if_needed(&host_url); |
|
1305 |
- free_if_needed(&domain_url); |
|
1306 |
- return CL_PHISH_DOMAIN_REDIR_OK; |
|
1307 |
- } |
|
1308 |
- } |
|
1309 |
- }//HOST_SUFFICIENT&CHECK_REDIR |
|
1310 |
- } |
|
1311 |
- free_if_needed(&host_url);*/ |
|
1312 |
- /* if(urls->flags&CHECK_DOMAIN_REVERSE) { |
|
1313 |
- //do a DNS lookup of the domain, and see what IP it corresponds to |
|
1314 |
- //then do a reverse lookup on the IP, and see what domain you get |
|
1315 |
- //There are some corporate signatures that mix different domains belonging to same company |
|
1316 |
- struct url_check domain_url; |
|
1317 |
- url_check_init(&domain_url); |
|
1318 |
- if(!dns_to_ip_and_reverse(&host_url,DOMAIN_DISPLAY)) { |
|
1319 |
- if(!strcmp(host_url.realLink.data,host_url.displayLink.data)) { |
|
1320 |
- free_if_needed(&host_url); |
|
1321 |
- return CL_PHISH_HOST_REVERSE_OK; |
|
1322 |
- } |
|
1323 |
- if(urls->flags&DOMAIN_SUFFICIENT) { |
|
1324 |
- url_get_domain(&host_url,&domain_url); |
|
1325 |
- if(!strcmp(domain_url.realLink.data,domain_url.displayLink.data)) { |
|
1326 |
- free_if_needed(&host_url); |
|
1327 |
- free_if_needed(&domain_url); |
|
1328 |
- return CL_PHISH_DOMAIN_REVERSE_OK; |
|
1329 |
- } |
|
1330 |
- free_if_needed(&domain_url); |
|
1331 |
- } |
|
1332 |
- } |
|
1333 |
- }*/ |
|
1334 | 1284 |
free_if_needed(&host_url); |
1335 | 1285 |
}/*HOST_SUFFICIENT*/ |
1336 |
- /*we failed to find a reason why the 2 URLs are different, this is definetely phishing*/ |
|
1286 |
+ /*we failed to find a reason why the 2 URLs are different, this is definitely phishing*/ |
|
1337 | 1287 |
if(urls->flags&DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED)) |
1338 | 1288 |
return CL_PHISH_HOST_NOT_LISTED; |
1339 | 1289 |
return phishy_map(phishy,CL_PHISH_NOMATCH); |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
/* |
2 | ||
2 | ||
3 | 3 |
* |
4 | 4 |
* This program is free software; you can redistribute it and/or modify |
5 | 5 |
* it under the terms of the GNU General Public License as published by |
... | ... |
@@ -87,36 +87,6 @@ void phishing_done(struct cl_engine* engine); |
87 | 87 |
/* end of non-thread-safe functions */ |
88 | 88 |
|
89 | 89 |
|
90 |
-static inline int isPhishing(enum phish_status rc) |
|
91 |
-{ |
|
92 |
- switch(rc) { |
|
93 |
- case CL_PHISH_CLEAN: |
|
94 |
- case CL_PHISH_CLEANUP_OK: |
|
95 |
- case CL_PHISH_WHITELISTED: |
|
96 |
- case CL_PHISH_HOST_WHITELISTED: |
|
97 |
- case CL_PHISH_HOST_OK: |
|
98 |
- case CL_PHISH_DOMAIN_OK: |
|
99 |
- case CL_PHISH_REDIR_OK: |
|
100 |
- case CL_PHISH_HOST_REDIR_OK: |
|
101 |
- case CL_PHISH_DOMAIN_REDIR_OK: |
|
102 |
- case CL_PHISH_HOST_REVERSE_OK: |
|
103 |
- case CL_PHISH_DOMAIN_REVERSE_OK: |
|
104 |
- case CL_PHISH_MAILTO_OK: |
|
105 |
- case CL_PHISH_TEXTURL: |
|
106 |
- case CL_PHISH_HOST_NOT_LISTED: |
|
107 |
- case CL_PHISH_CLEAN_CID: |
|
108 |
- return 0; |
|
109 |
- case CL_PHISH_HEX_URL: |
|
110 |
- case CL_PHISH_CLOAKED_NULL: |
|
111 |
- case CL_PHISH_SSL_SPOOF: |
|
112 |
- case CL_PHISH_CLOAKED_UIU: |
|
113 |
- case CL_PHISH_NUMERIC_IP: |
|
114 |
- case CL_PHISH_NOMATCH: |
|
115 |
- return 1; |
|
116 |
- default: |
|
117 |
- return 1; |
|
118 |
- } |
|
119 |
-} |
|
120 | 90 |
#endif |
121 | 91 |
|
122 | 92 |
#endif |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
/* |
2 | 2 |
* Match a string against a list of patterns/regexes. |
3 | 3 |
* |
4 | ||
4 | ||
5 | 5 |
* |
6 | 6 |
* This program is free software; you can redistribute it and/or modify |
7 | 7 |
* it under the terms of the GNU General Public License as published by |
... | ... |
@@ -38,33 +38,22 @@ |
38 | 38 |
|
39 | 39 |
#include <stdio.h> |
40 | 40 |
#include <stdlib.h> |
41 |
-#include <errno.h> |
|
42 | 41 |
#include <string.h> |
43 |
-#ifdef HAVE_STRINGS_H |
|
44 |
-#include <strings.h> |
|
45 |
-#endif |
|
46 | 42 |
#include <ctype.h> |
47 | 43 |
|
48 | 44 |
#include <limits.h> |
49 | 45 |
#include <sys/types.h> |
50 | 46 |
|
51 | 47 |
#ifdef HAVE_REGEX_H |
52 |
-/*#define USE_PCRE*/ |
|
53 | 48 |
#include <regex.h> |
54 | 49 |
#endif |
55 | 50 |
|
56 |
-#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2) |
|
57 |
-#include <stddef.h> |
|
58 |
-#endif |
|
59 | 51 |
|
60 | 52 |
#include "clamav.h" |
61 | 53 |
#include "others.h" |
62 |
-#include "defaults.h" |
|
63 |
-#include "str.h" |
|
64 |
-#include "filetypes.h" |
|
65 |
-#include "mbox.h" |
|
66 | 54 |
#include "regex_list.h" |
67 | 55 |
#include "matcher-ac.h" |
56 |
+#include "str.h" |
|
68 | 57 |
|
69 | 58 |
|
70 | 59 |
/*Tree*/ |
... | ... |
@@ -317,7 +306,7 @@ static void stack_reset(struct node_stack* stack) |
317 | 317 |
} |
318 | 318 |
|
319 | 319 |
/* Push @node on @stack, growing it if necessarry */ |
320 |
-static inline int stack_push(struct node_stack* stack,struct tree_node* node) |
|
320 |
+static int stack_push(struct node_stack* stack,struct tree_node* node) |
|
321 | 321 |
{ |
322 | 322 |
massert(stack); |
323 | 323 |
massert(stack->data); |
... | ... |
@@ -333,7 +322,7 @@ static inline int stack_push(struct node_stack* stack,struct tree_node* node) |
333 | 333 |
} |
334 | 334 |
|
335 | 335 |
/* Pops node from @stack, doesn't realloc */ |
336 |
-static inline struct tree_node* stack_pop(struct node_stack* stack) |
|
336 |
+static struct tree_node* stack_pop(struct node_stack* stack) |
|
337 | 337 |
{ |
338 | 338 |
massert(stack); |
339 | 339 |
massert(stack->data); |
... | ... |
@@ -348,12 +337,6 @@ static inline struct tree_node* stack_pop(struct node_stack* stack) |
348 | 348 |
int init_regex_list(struct regex_matcher* matcher) |
349 | 349 |
{ |
350 | 350 |
int rc; |
351 |
- /* |
|
352 |
- if(!engine_ok) { |
|
353 |
- cli_dbgmsg("Matcher engine not initialized\n"); |
|
354 |
- return CL_ENULLARG; |
|
355 |
- } |
|
356 |
- */ |
|
357 | 351 |
|
358 | 352 |
massert(matcher); |
359 | 353 |
matcher->list_inited = 0; |
... | ... |
@@ -417,7 +400,7 @@ static int add_regex_list_element(struct cli_matcher* root,const char* pattern,c |
417 | 417 |
for(i=0;i<len;i++) |
418 | 418 |
new->pattern[i]=pattern[i];/*new->pattern is short int* */ |
419 | 419 |
|
420 |
- new->virname = strdup(info); |
|
420 |
+ new->virname = cli_strdup(info); |
|
421 | 421 |
if((ret = cli_ac_addpatt(root,new))) { |
422 | 422 |
free(new->virname); |
423 | 423 |
free(new->pattern); |
... | ... |
@@ -459,7 +442,7 @@ static int functionality_level_check(char* line) |
459 | 459 |
max = atoi(ptmax); |
460 | 460 |
|
461 | 461 |
if(min > cl_retflevel()) { |
462 |
- cli_dbgmsg("regex list line %s not loaded (required f-level: %u)\n",line,min); |
|
462 |
+ cli_dbgmsg("regex list line %s not loaded (required f-level: %u)\n",line,(unsigned int)min); |
|
463 | 463 |
return CL_EMALFDB; |
464 | 464 |
} |
465 | 465 |
|
... | ... |
@@ -561,7 +544,7 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio |
561 | 561 |
if(!matcher->root_hosts) { |
562 | 562 |
matcher->root_hosts = old_hosts;/* according to manpage this must still be valid*/ |
563 | 563 |
return CL_EMEM; |
564 |
- } |
|
564 |
+ } |
|
565 | 565 |
memset(&matcher->root_hosts[matcher->root_hosts_cnt-1], 0, sizeof(struct cli_matcher)); |
566 | 566 |
matcher->root_hosts[matcher->root_hosts_cnt-1].ac_root = cli_calloc(1, sizeof(struct cli_ac_node)); |
567 | 567 |
if(!matcher->root_hosts[matcher->root_hosts_cnt-1].ac_root) { |
... | ... |
@@ -597,119 +580,11 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio |
597 | 597 |
return CL_SUCCESS; |
598 | 598 |
} |
599 | 599 |
|
600 |
-/* |
|
601 |
-static void tree_node_merge_nonbin(struct tree_node* into,const struct tree_node* node) |
|
602 |
-{ |
|
603 |
- massert(into); |
|
604 |
- massert(node); |
|
605 |
- |
|
606 |
- if(node->alternatives){ |
|
607 |
- if(node->u.children[0]->next == node) { |
|
608 |
- *no non-bin alternatives here* |
|
609 |
- } |
|
610 |
- else { |
|
611 |
- struct tree_node* p; |
|
612 |
- for(p = node->u.children[0]->next; p->next != node; p = p->next) |
|
613 |
- tree_node_insert_nonbin(into,p); |
|
614 |
- } |
|
615 |
- } |
|
616 |
- else |
|
617 |
- tree_node_insert_nonbin(into,node->u.children[0]); |
|
618 |
-} |
|
619 |
-* |
|
620 |
-static void tree_node_merge_bin(struct tree_node* into,const struct tree_node* node) |
|
621 |
-{ |
|
622 |
- if(node->u.children && node->alternatives) { |
|
623 |
- if(!into->alternatives) { |
|
624 |
- * into has no bin part, just copy+link the node there* |
|
625 |
- int i; |
|
626 |
- struct tree_node* next = into->u.children[0]; |
|
627 |
- into->u.children = node->u.children; |
|
628 |
- into->alternatives = node->alternatives; |
|
629 |
- for(i=0;i < into->alternatives;i++) { |
|
630 |
- if(into->u.children[i]->next == node) { |
|
631 |
- into->u.children[i]->next = next; |
|
632 |
- into->u.children[i]->listend = 0; |
|
633 |
- } |
|
634 |
- else { |
|
635 |
- struct tree_node* p; |
|
636 |
- for(p = into->u.children[0]->next; p->next != node; p = p->next); |
|
637 |
- p->listend = 0; |
|
638 |
- p->next = next; |
|
639 |
- } |
|
640 |
- } |
|
641 |
- } |
|
642 |
- const size_t new_size = tree_node_get_array_size(into) + tree_node_get_array_size(node); |
|
643 |
- struct tree_node** new_children = cli_malloc(sizeof( |
|
644 |
- } |
|
645 |
- * else: no bin part to merge * |
|
646 |
-} |
|
647 |
-*/ |
|
648 | 600 |
|
649 | 601 |
static struct tree_node ** tree_node_get_children(const struct tree_node* node) |
650 | 602 |
{ |
651 | 603 |
return node->op==OP_CUSTOMCLASS ? (node->u.children[1] ? node->u.children+1 : NULL) :node->u.children; |
652 | 604 |
} |
653 |
-/* don't do this, it wastes too much memory, and has no benefit |
|
654 |
-static void regex_list_dobuild(struct tree_node* called_from,struct tree_node* node) |
|
655 |
-{ |
|
656 |
- struct tree_node **children; |
|
657 |
- massert(node); |
|
658 |
- |
|
659 |
- children = tree_node_get_children(node); |
|
660 |
- if(node->op!=OP_ROOT) |
|
661 |
- massert(called_from); |
|
662 |
- if(node->op==OP_TMP_PARCLOSE) { |
|
663 |
- const size_t array_size = (node->alternatives +(called_from->op==OP_CUSTOMCLASS ? 1:0))*sizeof(*called_from->u.children); |
|
664 |
- if(node->c) |
|
665 |
- return;* already processed this common node* |
|
666 |
- else |
|
667 |
- node->c = 1; |
|
668 |
- * copy children to called_from from this node |
|
669 |
- * called_from should have 0 alternatives, and a link to this node via ->u.children[0] |
|
670 |
- * * |
|
671 |
- massert(called_from->alternatives == 0); |
|
672 |
- massert(called_from->u.children); |
|
673 |
- massert(called_from->u.children[0] == node); |
|
674 |
- called_from->u.children = cli_realloc(called_from->u.children,array_size); |
|
675 |
- called_from->u.children = node->u.children; |
|
676 |
- called_from->alternatives = node->alternatives; |
|
677 |
- if(called_from->alternatives) { |
|
678 |
- * fix parent pointers * |
|
679 |
- int i;TODO: do a deep copy of children here |
|
680 |
- struct tree_node **from_children = tree_node_get_children(called_from); |
|
681 |
- massert(from_children); |
|
682 |
- for(i=0;i < called_from->alternatives;i++) { |
|
683 |
- struct tree_node* p; |
|
684 |
- for(p=from_children[i];p->next != node; p = p->next); |
|
685 |
- p->next = called_from; |
|
686 |
- } |
|
687 |
- } |
|
688 |
- } |
|
689 |
- |
|
690 |
- if(node->op==OP_LEAF) |
|
691 |
- return; |
|
692 |
- else if (node->alternatives) { |
|
693 |
- int i; |
|
694 |
- struct tree_node* p; |
|
695 |
- massert(children); |
|
696 |
- p = children[0]->op==OP_LEAF ? NULL : children[0]->next; |
|
697 |
- for(i=0;i<node->alternatives;i++) |
|
698 |
- regex_list_dobuild(node,children[i]); |
|
699 |
- if(p && p!=node) |
|
700 |
- regex_list_dobuild(node,p); |
|
701 |
- } else { |
|
702 |
- if(children) |
|
703 |
- if (children[0]) |
|
704 |
- regex_list_dobuild(node,children[0]); |
|
705 |
- } |
|
706 |
- if(node->next && !node->listend) |
|
707 |
- regex_list_dobuild(node,node->next); |
|
708 |
- if(node->op==OP_TMP_PARCLOSE) |
|
709 |
- node->c=0; |
|
710 |
- *free(node);* |
|
711 |
-} |
|
712 |
-*/ |
|
713 | 605 |
|
714 | 606 |
/* Build the matcher list */ |
715 | 607 |
static int build_regex_list(struct regex_matcher* matcher) |
... | ... |
@@ -802,7 +677,6 @@ static const unsigned char* getNextToken(const unsigned char* pat,struct token_t |
802 | 802 |
case '{': |
803 | 803 |
case '}': |
804 | 804 |
token->type=TOKEN_REGEX; |
805 |
-/* massert(0 && "find_regex_start should have forbidden us from finding regex special chars");*/ |
|
806 | 805 |
break; |
807 | 806 |
case '[': |
808 | 807 |
{ |
... | ... |
@@ -1003,7 +877,7 @@ static struct tree_node* tree_root_alloc(void) |
1003 | 1003 |
return root; |
1004 | 1004 |
} |
1005 | 1005 |
|
1006 |
-static inline struct tree_node* tree_node_char_binsearch(const struct tree_node* node,const char csearch,int* left) |
|
1006 |
+static struct tree_node* tree_node_char_binsearch(const struct tree_node* node,const char csearch,int* left) |
|
1007 | 1007 |
{ |
1008 | 1008 |
int right; |
1009 | 1009 |
struct tree_node **children; |
... | ... |
@@ -1028,7 +902,7 @@ static inline struct tree_node* tree_node_char_binsearch(const struct tree_node* |
1028 | 1028 |
return NULL; |
1029 | 1029 |
} |
1030 | 1030 |
|
1031 |
-static inline struct tree_node* tree_get_next(struct tree_node* node) |
|
1031 |
+static struct tree_node* tree_get_next(struct tree_node* node) |
|
1032 | 1032 |
{ |
1033 | 1033 |
struct tree_node** children; |
1034 | 1034 |
massert(node); |
... | ... |
@@ -1042,14 +916,14 @@ static inline struct tree_node* tree_get_next(struct tree_node* node) |
1042 | 1042 |
return children[0]->next; |
1043 | 1043 |
} |
1044 | 1044 |
|
1045 |
-static inline size_t tree_node_get_array_size(const struct tree_node* node) |
|
1045 |
+static size_t tree_node_get_array_size(const struct tree_node* node) |
|
1046 | 1046 |
{ |
1047 | 1047 |
massert(node); |
1048 | 1048 |
/* if op is CUSTOMCLASS, then first pointer is pointer to bitmap, so array size is +1 */ |
1049 | 1049 |
return (node->alternatives + (node->op==OP_CUSTOMCLASS ? 1 : 0)) * sizeof(node->u.children[0]); |
1050 | 1050 |
} |
1051 | 1051 |
|
1052 |
-static inline struct tree_node* tree_node_char_insert(struct tree_node* node,const char c,int left) |
|
1052 |
+static struct tree_node* tree_node_char_insert(struct tree_node* node,const char c,int left) |
|
1053 | 1053 |
{ |
1054 | 1054 |
struct tree_node* new, *alt = tree_get_next(node); |
1055 | 1055 |
struct tree_node **children; |
... | ... |
@@ -1073,7 +947,7 @@ static inline struct tree_node* tree_node_char_insert(struct tree_node* node,con |
1073 | 1073 |
return new; |
1074 | 1074 |
} |
1075 | 1075 |
|
1076 |
-static inline void tree_node_insert_nonbin(struct tree_node* node, struct tree_node* new) |
|
1076 |
+static void tree_node_insert_nonbin(struct tree_node* node, struct tree_node* new) |
|
1077 | 1077 |
{ |
1078 | 1078 |
struct tree_node **children; |
1079 | 1079 |
massert(node); |
... | ... |
@@ -1118,7 +992,7 @@ static inline void tree_node_insert_nonbin(struct tree_node* node, struct tree_n |
1118 | 1118 |
} |
1119 | 1119 |
} |
1120 | 1120 |
|
1121 |
-static inline unsigned char char_getclass(const unsigned char* bitmap) |
|
1121 |
+static unsigned char char_getclass(const unsigned char* bitmap) |
|
1122 | 1122 |
{ |
1123 | 1123 |
size_t i; |
1124 | 1124 |
massert(bitmap); |
... | ... |
@@ -1259,7 +1133,7 @@ static int add_pattern(struct regex_matcher* matcher,const unsigned char* pat,co |
1259 | 1259 |
struct leaf_info* leaf=cli_malloc(sizeof(*leaf)); |
1260 | 1260 |
if(!leaf) |
1261 | 1261 |
return CL_EMEM; |
1262 |
- leaf->info=strdup(info); |
|
1262 |
+ leaf->info = cli_strdup(info); |
|
1263 | 1263 |
if(token.type==TOKEN_REGEX) { |
1264 | 1264 |
int rc; |
1265 | 1265 |
struct tree_node* new; |
... | ... |
@@ -1441,7 +1315,7 @@ static int match_node(struct tree_node* node,const unsigned char* c,size_t len,c |
1441 | 1441 |
} |
1442 | 1442 |
|
1443 | 1443 |
/* push node on stack, only if it isn't there already */ |
1444 |
-static inline void stack_push_once(struct node_stack* stack,struct tree_node* node) |
|
1444 |
+static void stack_push_once(struct node_stack* stack,struct tree_node* node) |
|
1445 | 1445 |
{ |
1446 | 1446 |
size_t i; |
1447 | 1447 |
massert(stack); |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
/* |
2 | 2 |
* Match a string against a list of patterns/regexes. |
3 | 3 |
* |
4 | ||
4 | ||
5 | 5 |
* |
6 | 6 |
* This program is free software; you can redistribute it and/or modify |
7 | 7 |
* it under the terms of the GNU General Public License as published by |
... | ... |
@@ -25,8 +25,6 @@ |
25 | 25 |
#ifndef _REGEX_LIST_H |
26 | 26 |
#define _REGEX_LIST_H |
27 | 27 |
|
28 |
- |
|
29 |
- |
|
30 | 28 |
#ifdef NDEBUG |
31 | 29 |
#define massert(x) (void)(0) |
32 | 30 |
#else |
... | ... |
@@ -61,7 +59,6 @@ void regex_list_cleanup(struct regex_matcher* matcher); |
61 | 61 |
void regex_list_done(struct regex_matcher* matcher); |
62 | 62 |
int is_regex_ok(struct regex_matcher* matcher); |
63 | 63 |
|
64 |
-void setup_matcher_engine(void);/* global, non thread-safe */ |
|
65 | 64 |
#endif |
66 | 65 |
|
67 | 66 |
#endif |