git-svn: trunk@2340
Tomasz Kojm authored on 2006/10/07 20:00:46... | ... |
@@ -1,3 +1,8 @@ |
1 |
+Sat Oct 7 12:47:32 CEST 2006 (tk) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav: make the experimental anti-phishing code more thread safe, |
|
4 |
+ patch from Edvin Torok <edwintorok*gmail.com> |
|
5 |
+ |
|
1 | 6 |
Thu Oct 5 22:46:19 CEST 2006 (tk) |
2 | 7 |
---------------------------------- |
3 | 8 |
* libclamav/sis.c: fix handling of compressed multiple language files (bb#42) |
... | ... |
@@ -19,6 +19,9 @@ |
19 | 19 |
* MA 02110-1301, USA. |
20 | 20 |
* |
21 | 21 |
* $Log: phish_domaincheck_db.c,v $ |
22 |
+ * Revision 1.3 2006/10/07 11:00:46 tkojm |
|
23 |
+ * make the experimental anti-phishing code more thread safe |
|
24 |
+ * |
|
22 | 25 |
* Revision 1.2 2006/09/26 18:55:36 njh |
23 | 26 |
* Fixed portability issues |
24 | 27 |
* |
... | ... |
@@ -91,13 +94,10 @@ |
91 | 91 |
#include "regex_list.h" |
92 | 92 |
#include "matcher-ac.h" |
93 | 93 |
|
94 |
- |
|
95 |
-static struct regex_matcher domainlist_matcher; |
|
96 |
- |
|
97 |
-int domainlist_match(const char* real_url,const char* display_url,int hostOnly,unsigned short* flags) |
|
94 |
+int domainlist_match(const struct cl_engine* engine,const char* real_url,const char* display_url,int hostOnly,unsigned short* flags) |
|
98 | 95 |
{ |
99 | 96 |
const char* info; |
100 |
- int rc = regex_list_match(&domainlist_matcher,real_url,display_url,hostOnly,&info); |
|
97 |
+ int rc = engine->domainlist_matcher ? regex_list_match(engine->domainlist_matcher,real_url,display_url,hostOnly,&info) : 0; |
|
101 | 98 |
if(rc && info && info[0]) {/*match successfull, and has custom flags*/ |
102 | 99 |
if(strlen(info)==3 && isxdigit(info[0]) && isxdigit(info[1]) && isxdigit(info[2])) { |
103 | 100 |
unsigned short notwantedflags=0; |
... | ... |
@@ -111,29 +111,38 @@ int domainlist_match(const char* real_url,const char* display_url,int hostOnly,u |
111 | 111 |
return rc; |
112 | 112 |
} |
113 | 113 |
|
114 |
-int init_domainlist(void) |
|
114 |
+int init_domainlist(struct cl_engine* engine) |
|
115 | 115 |
{ |
116 |
- return init_regex_list(&domainlist_matcher); |
|
116 |
+ if(engine) { |
|
117 |
+ engine->domainlist_matcher = cli_malloc(sizeof(*engine->domainlist_matcher)); |
|
118 |
+ if(!engine->domainlist_matcher) |
|
119 |
+ return CL_EMEM; |
|
120 |
+ return init_regex_list(engine->domainlist_matcher); |
|
117 | 121 |
} |
118 |
- |
|
119 |
-int is_domainlist_ok(void) |
|
120 |
-{ |
|
121 |
- return is_regex_ok(&domainlist_matcher); |
|
122 |
+ else |
|
123 |
+ return CL_ENULLARG; |
|
122 | 124 |
} |
123 | 125 |
|
124 |
-int cli_loadpdb(FILE* fd,unsigned int options) |
|
126 |
+int is_domainlist_ok(const struct cl_engine* engine) |
|
125 | 127 |
{ |
126 |
- return load_regex_matcher(&domainlist_matcher,fd,options); |
|
128 |
+ return (engine && engine->domainlist_matcher) ? is_regex_ok(engine->domainlist_matcher) : 1; |
|
127 | 129 |
} |
128 | 130 |
|
129 |
-void domainlist_cleanup(void) |
|
131 |
+ |
|
132 |
+void domainlist_cleanup(const struct cl_engine* engine) |
|
130 | 133 |
{ |
131 |
- regex_list_cleanup(&domainlist_matcher); |
|
134 |
+ if(engine && engine->domainlist_matcher) { |
|
135 |
+ regex_list_cleanup(engine->domainlist_matcher); |
|
136 |
+ } |
|
132 | 137 |
} |
133 | 138 |
|
134 |
-void domainlist_done(void) |
|
139 |
+void domainlist_done(struct cl_engine* engine) |
|
135 | 140 |
{ |
136 |
- regex_list_done(&domainlist_matcher); |
|
141 |
+ if(engine && engine->domainlist_matcher) { |
|
142 |
+ regex_list_done(engine->domainlist_matcher); |
|
143 |
+ free(engine->domainlist_matcher); |
|
144 |
+ engine->domainlist_matcher = NULL; |
|
145 |
+ } |
|
137 | 146 |
} |
138 | 147 |
|
139 | 148 |
#endif |
... | ... |
@@ -25,13 +25,11 @@ |
25 | 25 |
#ifndef _PHISH_DOMAINCHECK_DB_H |
26 | 26 |
#define _PHISH_DOMAINCHECK_DB_H |
27 | 27 |
|
28 |
-int cli_loadpdb(FILE* fd, unsigned int options); |
|
29 |
-int build_domainlist(void); |
|
30 |
-int init_domainlist(void); |
|
31 |
-void domainlist_done(void); |
|
32 |
-void domainlist_cleanup(void); |
|
33 |
-int is_domainlist_ok(void); |
|
34 |
-int domainlist_match(const char* real_url,const char* display_url,int hostOnly,unsigned short* flags); |
|
28 |
+int init_domainlist(struct cl_engine* engine); |
|
29 |
+void domainlist_done(struct cl_engine* engine); |
|
30 |
+void domainlist_cleanup(const struct cl_engine* engine); |
|
31 |
+int is_domainlist_ok(const struct cl_engine* engine); |
|
32 |
+int domainlist_match(const struct cl_engine* engine,const char* real_url,const char* display_url,int hostOnly,unsigned short* flags); |
|
35 | 33 |
|
36 | 34 |
#endif |
37 | 35 |
|
... | ... |
@@ -19,6 +19,9 @@ |
19 | 19 |
* MA 02110-1301, USA. |
20 | 20 |
* |
21 | 21 |
* $Log: phish_whitelist.c,v $ |
22 |
+ * Revision 1.4 2006/10/07 11:00:46 tkojm |
|
23 |
+ * make the experimental anti-phishing code more thread safe |
|
24 |
+ * |
|
22 | 25 |
* Revision 1.3 2006/09/26 18:55:36 njh |
23 | 26 |
* Fixed portability issues |
24 | 27 |
* |
... | ... |
@@ -93,78 +96,44 @@ |
93 | 93 |
#include "regex_list.h" |
94 | 94 |
#include "matcher-ac.h" |
95 | 95 |
|
96 |
-/*#define WHITELIST_TEST*/ |
|
97 |
- |
|
98 |
-static struct regex_matcher whitelist_matcher; |
|
99 |
- |
|
100 |
-int whitelist_match(const char* real_url,const char* display_url,int hostOnly) |
|
96 |
+int whitelist_match(const struct cl_engine* engine,const char* real_url,const char* display_url,int hostOnly) |
|
101 | 97 |
{ |
102 | 98 |
const char* info;/*unused*/ |
103 |
- return regex_list_match(&whitelist_matcher,real_url,display_url,hostOnly,&info); |
|
99 |
+ return engine->whitelist_matcher ? regex_list_match(engine->whitelist_matcher,real_url,display_url,hostOnly,&info) : 0; |
|
104 | 100 |
} |
105 | 101 |
|
106 |
-int init_whitelist(void) |
|
102 |
+int init_whitelist(struct cl_engine* engine) |
|
107 | 103 |
{ |
108 |
- return init_regex_list(&whitelist_matcher); |
|
104 |
+ if(engine) { |
|
105 |
+ engine->whitelist_matcher = cli_malloc(sizeof(*engine->whitelist_matcher)); |
|
106 |
+ if(!engine->whitelist_matcher) |
|
107 |
+ return CL_EMEM; |
|
108 |
+ return init_regex_list(engine->whitelist_matcher); |
|
109 | 109 |
} |
110 |
- |
|
111 |
-int is_whitelist_ok(void) |
|
112 |
-{ |
|
113 |
- return is_regex_ok(&whitelist_matcher); |
|
110 |
+ else |
|
111 |
+ return CL_ENULLARG; |
|
114 | 112 |
} |
115 | 113 |
|
116 |
-int cli_loadwdb(FILE* fd,unsigned int options) |
|
114 |
+int is_whitelist_ok(const struct cl_engine* engine) |
|
117 | 115 |
{ |
118 |
- return load_regex_matcher(&whitelist_matcher,fd,options); |
|
116 |
+ return (engine && engine->whitelist_matcher) ? is_regex_ok(engine->whitelist_matcher) : 1; |
|
119 | 117 |
} |
120 | 118 |
|
121 |
-void whitelist_cleanup(void) |
|
122 |
-{ |
|
123 |
- regex_list_cleanup(&whitelist_matcher); |
|
124 |
-} |
|
125 | 119 |
|
126 |
-void whitelist_done(void) |
|
120 |
+void whitelist_cleanup(const struct cl_engine* engine) |
|
127 | 121 |
{ |
128 |
- regex_list_done(&whitelist_matcher); |
|
122 |
+ if(engine && engine->whitelist_matcher) { |
|
123 |
+ regex_list_cleanup(engine->whitelist_matcher); |
|
124 |
+ } |
|
129 | 125 |
} |
130 | 126 |
|
131 |
-#ifdef WHITELIST_TEST |
|
132 |
-int main(int argc,char* argv[]) |
|
127 |
+void whitelist_done(struct cl_engine* engine) |
|
133 | 128 |
{ |
134 |
-/* struct tree_node* root=tree_node_alloc(NULL,1); |
|
135 |
- const char* info; |
|
136 |
- const unsigned char test[]="tesxt"; |
|
137 |
- setup_matcher(); |
|
138 |
- root->op=OP_ROOT; |
|
139 |
- root->c=0; |
|
140 |
- root->next=NULL; |
|
141 |
- root->listend=1; |
|
142 |
- dump_tree(root); |
|
143 |
- add_pattern(&root,"test","1"); |
|
144 |
- dump_tree(root); |
|
145 |
- add_pattern(&root,"tesv","2"); |
|
146 |
- dump_tree(root); |
|
147 |
- add_pattern(&root,"tert","3"); |
|
148 |
- dump_tree(root); |
|
149 |
- add_pattern(&root,"terr+","4"); |
|
150 |
- dump_tree(root); |
|
151 |
- add_pattern(&root,"tes[xy]t","5"); |
|
152 |
- dump_tree(root); |
|
153 |
- match_node(root,test,sizeof(test),&info); |
|
154 |
- destroy_tree(root); |
|
155 |
- if(info) |
|
156 |
- printf("%s\n",info); |
|
157 |
- else printf("not found\n");*/ |
|
158 |
- /*FILE* f=fopen("w.wdb","r"); |
|
159 |
- init_whitelist(); |
|
160 |
- load_whitelist(f); |
|
161 |
- fclose(f); |
|
162 |
- dump_tree(root_regex); |
|
163 |
- build_whitelist(); |
|
164 |
- printf("%d\n",whitelist_match("http://www.google.ro","http://www.google.me.ro",0)); |
|
165 |
- whitelist_done();*/ |
|
166 |
- return 0; |
|
129 |
+ if(engine && engine->whitelist_matcher) { |
|
130 |
+ regex_list_done(engine->whitelist_matcher); |
|
131 |
+ free(engine->whitelist_matcher); |
|
132 |
+ engine->whitelist_matcher = NULL; |
|
133 |
+ } |
|
167 | 134 |
} |
168 |
-#endif |
|
169 | 135 |
|
170 | 136 |
#endif |
... | ... |
@@ -25,13 +25,11 @@ |
25 | 25 |
#ifndef _PHISH_WHITELIST_H |
26 | 26 |
#define _PHISH_WHITELIST_H |
27 | 27 |
|
28 |
-int cli_loadwdb(FILE* fd, unsigned int options); |
|
29 |
-int build_whitelist(void); |
|
30 |
-int init_whitelist(void); |
|
31 |
-void whitelist_done(void); |
|
32 |
-void whitelist_cleanup(void); |
|
33 |
-int is_whitelist_ok(void); |
|
34 |
-int whitelist_match(const char* real_url,const char* display_url,int hostOnly); |
|
28 |
+int init_whitelist(struct cl_engine* engine); |
|
29 |
+void whitelist_done(struct cl_engine* engine); |
|
30 |
+void whitelist_cleanup(const struct cl_engine* engine); |
|
31 |
+int is_whitelist_ok(const struct cl_engine* engine); |
|
32 |
+int whitelist_match(const struct cl_engine* engine, const char* real_url,const char* display_url,int hostOnly); |
|
35 | 33 |
|
36 | 34 |
#endif |
37 | 35 |
|
... | ... |
@@ -19,6 +19,9 @@ |
19 | 19 |
* MA 02110-1301, USA. |
20 | 20 |
* |
21 | 21 |
* $Log: phishcheck.c,v $ |
22 |
+ * Revision 1.11 2006/10/07 11:00:46 tkojm |
|
23 |
+ * make the experimental anti-phishing code more thread safe |
|
24 |
+ * |
|
22 | 25 |
* Revision 1.10 2006/09/27 14:23:14 njh |
23 | 26 |
* Ported to VS2005 |
24 | 27 |
* |
... | ... |
@@ -147,6 +150,8 @@ case CL_PHISH_HOST_NOT_LISTED: |
147 | 147 |
#include <regex.h> |
148 | 148 |
#endif |
149 | 149 |
|
150 |
+#include <pthread.h> |
|
151 |
+ |
|
150 | 152 |
#include "others.h" |
151 | 153 |
#include "defaults.h" |
152 | 154 |
#include "str.h" |
... | ... |
@@ -247,6 +252,7 @@ For the Whitelist(.wdb)/Domainlist(.pdb) format see regex_list.c (search for Fla |
247 | 247 |
* |
248 | 248 |
*/ |
249 | 249 |
static char empty_string[]=""; |
250 |
+ |
|
250 | 251 |
static inline void string_init_c(struct string* dest,char* data); |
251 | 252 |
static void string_assign_null(struct string* dest); |
252 | 253 |
static char *rfind(char *start, char c, size_t len); |
... | ... |
@@ -347,7 +353,6 @@ void free_if_needed(struct url_check* url) |
347 | 347 |
string_free(&url->displayLink); |
348 | 348 |
} |
349 | 349 |
|
350 |
-static int phish_disabled = 0;/* disabled due to fatal startup error */ |
|
351 | 350 |
|
352 | 351 |
static int build_regex(regex_t** preg,const char* regex,int nosub) |
353 | 352 |
{ |
... | ... |
@@ -372,7 +377,7 @@ static int build_regex(regex_t** preg,const char* regex,int nosub) |
372 | 372 |
#endif |
373 | 373 |
free(*preg); |
374 | 374 |
*preg=NULL; |
375 |
- phish_disabled=1; |
|
375 |
+ phish_disable("problem in compiling regex"); |
|
376 | 376 |
return 1; |
377 | 377 |
} |
378 | 378 |
return 0; |
... | ... |
@@ -468,10 +473,6 @@ static const char cctld_regex[] = "^"iana_cctld"$"; |
468 | 468 |
|
469 | 469 |
int isCountryCode(const char* str) |
470 | 470 |
{ |
471 |
- if(!preg_cctld) { |
|
472 |
- if(build_regex(&preg_cctld,cctld_regex,1)) |
|
473 |
- return -1; |
|
474 |
- } |
|
475 | 471 |
return str ? !regexec(preg_cctld,str,0,NULL,0) : 0; |
476 | 472 |
} |
477 | 473 |
|
... | ... |
@@ -484,10 +485,6 @@ int isTLD(const char* str,int len) |
484 | 484 |
int rc; |
485 | 485 |
strncpy(s,str,len); |
486 | 486 |
s[len]='\0'; |
487 |
- if(!preg_tld) { |
|
488 |
- if(build_regex(&preg_tld,tld_regex,1)) |
|
489 |
- return -1; |
|
490 |
- } |
|
491 | 487 |
rc = !regexec(preg_tld,s,0,NULL,0); |
492 | 488 |
free(s); |
493 | 489 |
return rc; |
... | ... |
@@ -599,15 +596,10 @@ int isSSL(const char* URL) |
599 | 599 |
return URL ? !strncmp(https,URL,sizeof(https)-1) : 0; |
600 | 600 |
} |
601 | 601 |
|
602 |
-static int hexinited=0; |
|
603 |
-static short int hextable[256]; |
|
604 |
-static inline char hex2int(const unsigned char* src) |
|
605 |
-{ |
|
606 |
- assert(hexinited); |
|
607 |
- return hextable[src[0]]<<4 | hextable[src[1]]; |
|
608 |
-} |
|
609 | 602 |
|
610 | 603 |
|
604 |
+static inline char hex2int(const unsigned char* src); |
|
605 |
+ |
|
611 | 606 |
/* deletes @what from the string @begin. |
612 | 607 |
* @what_len: length of @what, excluding the terminating \0 */ |
613 | 608 |
static void |
... | ... |
@@ -828,29 +820,32 @@ void get_redirected_URL(struct string* URL) |
828 | 828 |
returns redirected URL*/ |
829 | 829 |
} |
830 | 830 |
|
831 |
-static inline int is_phish_disabled(void) |
|
831 |
+ |
|
832 |
+/* ---- runtime disable ------*/ |
|
833 |
+static int phish_disabled = 0; |
|
834 |
+static pthread_mutex_t phish_disabled_lock = PTHREAD_MUTEX_INITIALIZER; |
|
835 |
+ |
|
836 |
+void phish_disable(const char* reason) |
|
832 | 837 |
{ |
833 |
- if (phish_disabled) |
|
834 |
- return 1; |
|
835 |
- else if (!is_whitelist_ok()) { |
|
838 |
+ cli_warnmsg("Disabling phishing checks, reason:%s\n",reason); |
|
839 |
+ pthread_mutex_lock(&phish_disabled_lock); |
|
836 | 840 |
phish_disabled = 1; |
837 |
- return 1; |
|
838 |
- } |
|
839 |
- else return 0; |
|
841 |
+ pthread_mutex_unlock(&phish_disabled_lock); |
|
840 | 842 |
} |
841 | 843 |
|
842 |
-static void init_hextable(void) |
|
844 |
+static inline int is_phish_disabled(const struct cl_engine* engine) |
|
843 | 845 |
{ |
844 |
- unsigned char c; |
|
845 |
- memset(hextable,0,256); |
|
846 |
- for(c='0';c<='9';c++) |
|
847 |
- hextable[c] = c-'0'; |
|
848 |
- for(c='a';c<='z';c++) |
|
849 |
- hextable[c] = 10+c-'a'; |
|
850 |
- for(c='A';c<='Z';c++) |
|
851 |
- hextable[c] = 10+c-'A'; |
|
852 |
- hexinited=1; |
|
846 |
+ int rc; |
|
847 |
+ if (!is_whitelist_ok(engine)) |
|
848 |
+ phish_disable("whitelist is not ok"); |
|
849 |
+ if (!is_domainlist_ok(engine)) |
|
850 |
+ phish_disable("domainlist is not ok"); |
|
851 |
+ pthread_mutex_lock(&phish_disabled_lock); |
|
852 |
+ rc = phish_disabled; |
|
853 |
+ pthread_mutex_unlock(&phish_disabled_lock); |
|
854 |
+ return rc; |
|
853 | 855 |
} |
856 |
+/* -------end runtime disable---------*/ |
|
854 | 857 |
|
855 | 858 |
int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs) |
856 | 859 |
{ |
... | ... |
@@ -859,12 +854,8 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs) |
859 | 859 |
const size_t href_text_len = sizeof(href_text); |
860 | 860 |
const size_t src_text_len = sizeof(src_text); |
861 | 861 |
int i; |
862 |
- if(is_phish_disabled()) |
|
862 |
+ if(is_phish_disabled(ctx->engine)) |
|
863 | 863 |
return 0; |
864 |
- if(!hexinited) { |
|
865 |
- init_hextable(); |
|
866 |
- atexit(phishing_done);/*TODO: replace this with a proper phishing_done call from manager.c*/ |
|
867 |
- } |
|
868 | 864 |
|
869 | 865 |
*ctx->virname=NULL; |
870 | 866 |
for(i=0;i<hrefs->count;i++) |
... | ... |
@@ -893,8 +884,8 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs) |
893 | 893 |
urls.displayLink.data = url; |
894 | 894 |
} |
895 | 895 |
|
896 |
- rc = phishingCheck(&urls); |
|
897 |
- if(phish_disabled) |
|
896 |
+ rc = phishingCheck(ctx->engine,&urls); |
|
897 |
+ if(is_phish_disabled(ctx->engine)) |
|
898 | 898 |
return 0; |
899 | 899 |
free_if_needed(&urls); |
900 | 900 |
cli_dbgmsg("Phishing scan result:%s\n",phishing_ret_toString(rc)); |
... | ... |
@@ -1015,28 +1006,89 @@ static char* str_compose(const char* a,const char* b,const char* c) |
1015 | 1015 |
|
1016 | 1016 |
/*Warning: take care when modifying this regex, it has been tweaked, and tuned, just don't break it please. |
1017 | 1017 |
* there is fragmentaddress1, and 2 to work around the ISO limitation of 509 bytes max length for string constants*/ |
1018 |
-static char* url_regex = NULL; |
|
1019 | 1018 |
static const char numeric_url_regex[] = "^ *"URI_numeric_fragmentaddress" *$"; |
1019 |
+static char* url_regex = NULL; |
|
1020 |
+ |
|
1021 |
+static int hexinited=0; |
|
1022 |
+static short int hextable[256]; |
|
1023 |
+ |
|
1024 |
+static inline char hex2int(const unsigned char* src) |
|
1025 |
+{ |
|
1026 |
+ assert(hexinited); |
|
1027 |
+ return hextable[src[0]]<<4 | hextable[src[1]]; |
|
1028 |
+} |
|
1029 |
+ |
|
1030 |
+static void free_regex(regex_t** p) |
|
1031 |
+{ |
|
1032 |
+ if(p) { |
|
1033 |
+ if(*p) { |
|
1034 |
+ regfree(*p); |
|
1035 |
+ free(*p); |
|
1036 |
+ *p=NULL; |
|
1037 |
+ } |
|
1038 |
+ } |
|
1039 |
+} |
|
1040 |
+/* --------non-thread-safe functions--------*/ |
|
1041 |
+static void init_hextable(void) |
|
1042 |
+{ |
|
1043 |
+ unsigned char c; |
|
1044 |
+ memset(hextable,0,256); |
|
1045 |
+ for(c='0';c<='9';c++) |
|
1046 |
+ hextable[c] = c-'0'; |
|
1047 |
+ for(c='a';c<='z';c++) |
|
1048 |
+ hextable[c] = 10+c-'a'; |
|
1049 |
+ for(c='A';c<='Z';c++) |
|
1050 |
+ hextable[c] = 10+c-'A'; |
|
1051 |
+ hexinited=1; |
|
1052 |
+} |
|
1053 |
+ |
|
1054 |
+int phishing_init(engine) |
|
1055 |
+{ |
|
1056 |
+ cli_dbgmsg("Initializing phishcheck module\n"); |
|
1057 |
+ setup_matcher_engine(); |
|
1058 |
+ if(build_regex(&preg_cctld,cctld_regex,1)) |
|
1059 |
+ return -1; |
|
1060 |
+ if(build_regex(&preg_tld,tld_regex,1)) |
|
1061 |
+ return -1; |
|
1062 |
+ url_regex = str_compose("^ *("URI_fragmentaddress1,URI_fragmentaddress2,URI_fragmentaddress3"|"URI_CHECK_PROTOCOLS") *$"); |
|
1063 |
+ if(build_regex(&preg,url_regex,1)) |
|
1064 |
+ return -1; |
|
1065 |
+ if(build_regex(&preg_numeric,numeric_url_regex,1)) |
|
1066 |
+ return -1; |
|
1067 |
+ init_hextable(); |
|
1068 |
+ cli_dbgmsg("Phishcheck module initialized\n"); |
|
1069 |
+ return 0; |
|
1070 |
+} |
|
1071 |
+ |
|
1072 |
+ |
|
1073 |
+void phishing_done(struct cl_engine* engine) |
|
1074 |
+{ |
|
1075 |
+ cli_dbgmsg("Cleaning up phishcheck\n"); |
|
1076 |
+ free_regex(&preg); |
|
1077 |
+ free_regex(&preg_cctld); |
|
1078 |
+ free_regex(&preg_tld); |
|
1079 |
+ free_regex(&preg_numeric); |
|
1080 |
+ if(url_regex) |
|
1081 |
+ free(url_regex); |
|
1082 |
+ |
|
1083 |
+ whitelist_done(engine); |
|
1084 |
+ domainlist_done(engine); |
|
1085 |
+ matcher_engine_done(); |
|
1086 |
+ cli_dbgmsg("Phishcheck cleaned up\n"); |
|
1087 |
+} |
|
1088 |
+ |
|
1089 |
+/* ---------------end of non-thread-safe function-----------*/ |
|
1020 | 1090 |
/* |
1021 | 1091 |
* Only those URLs are identified as URLs for which phishing detection can be performed. |
1022 | 1092 |
* This means that no attempt is made to properly recognize 'cid:' URLs |
1023 | 1093 |
*/ |
1024 | 1094 |
int isURL(const char* URL) |
1025 | 1095 |
{ |
1026 |
- if(!preg) { |
|
1027 |
- url_regex = str_compose("^ *("URI_fragmentaddress1,URI_fragmentaddress2,URI_fragmentaddress3"|"URI_CHECK_PROTOCOLS") *$"); |
|
1028 |
- if(build_regex(&preg,url_regex,1)) |
|
1029 |
- return -1; |
|
1030 |
- } |
|
1031 | 1096 |
return URL ? !regexec(preg,URL,0,NULL,0) : 0; |
1032 | 1097 |
} |
1033 | 1098 |
|
1034 | 1099 |
int isNumericURL(const char* URL) |
1035 | 1100 |
{ |
1036 |
- if(!preg_numeric) { |
|
1037 |
- if(build_regex(&preg_numeric,numeric_url_regex,1)) |
|
1038 |
- return -1; |
|
1039 |
- } |
|
1040 | 1101 |
return URL ? !regexec(preg_numeric,URL,0,NULL,0) : 0; |
1041 | 1102 |
} |
1042 | 1103 |
|
... | ... |
@@ -1115,36 +1167,15 @@ int isEncoded(const char* url) |
1115 | 1115 |
return (cnt-1 >strlen(url)*7/10);/*more than 70% made up of &#;*/ |
1116 | 1116 |
} |
1117 | 1117 |
|
1118 |
-static void free_regex(regex_t** p) |
|
1119 |
-{ |
|
1120 |
- if(p) { |
|
1121 |
- if(*p) { |
|
1122 |
- regfree(*p); |
|
1123 |
- free(*p); |
|
1124 |
- *p=NULL; |
|
1125 |
- } |
|
1126 |
- } |
|
1127 |
-} |
|
1128 | 1118 |
|
1129 |
-void phishing_done(void) |
|
1130 |
-{ |
|
1131 |
- free_regex(&preg); |
|
1132 |
- free_regex(&preg_cctld); |
|
1133 |
- free_regex(&preg_tld); |
|
1134 |
- free_regex(&preg_numeric); |
|
1135 |
- whitelist_done(); |
|
1136 |
- domainlist_done(); |
|
1137 |
- if(url_regex) |
|
1138 |
- free(url_regex); |
|
1139 |
-} |
|
1140 | 1119 |
|
1141 |
-int whitelist_check(struct url_check* urls,int hostOnly) |
|
1120 |
+int whitelist_check(const struct cl_engine* engine,struct url_check* urls,int hostOnly) |
|
1142 | 1121 |
{ |
1143 |
- return whitelist_match(urls->realLink.data,urls->displayLink.data,hostOnly); |
|
1122 |
+ return whitelist_match(engine,urls->realLink.data,urls->displayLink.data,hostOnly); |
|
1144 | 1123 |
} |
1145 | 1124 |
|
1146 | 1125 |
/* urls can't contain null pointer, caller must ensure this */ |
1147 |
-enum phish_status phishingCheck(struct url_check* urls) |
|
1126 |
+enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls) |
|
1148 | 1127 |
{ |
1149 | 1128 |
struct url_check host_url; |
1150 | 1129 |
const char cid[] = "cid:"; |
... | ... |
@@ -1166,10 +1197,10 @@ enum phish_status phishingCheck(struct url_check* urls) |
1166 | 1166 |
return rc;/* URLs identical after cleanup */ |
1167 | 1167 |
} |
1168 | 1168 |
|
1169 |
- if(whitelist_check(urls,0)) |
|
1169 |
+ if(whitelist_check(engine,urls,0)) |
|
1170 | 1170 |
return CL_PHISH_WHITELISTED;/* if url is whitelist don't perform further checks */ |
1171 | 1171 |
|
1172 |
- if(urls->flags&DOMAINLIST_REQUIRED && domainlist_match(urls->realLink.data,urls->displayLink.data,0,&urls->flags)) |
|
1172 |
+ if(urls->flags&DOMAINLIST_REQUIRED && domainlist_match(engine,urls->realLink.data,urls->displayLink.data,0,&urls->flags)) |
|
1173 | 1173 |
phishy |= DOMAIN_LISTED; |
1174 | 1174 |
else { |
1175 | 1175 |
/* although entire url is not listed, the host might be, |
... | ... |
@@ -1184,14 +1215,14 @@ enum phish_status phishingCheck(struct url_check* urls) |
1184 | 1184 |
return rc; |
1185 | 1185 |
} |
1186 | 1186 |
|
1187 |
- if(whitelist_check(&host_url,1)) { |
|
1187 |
+ if(whitelist_check(engine,&host_url,1)) { |
|
1188 | 1188 |
free_if_needed(&host_url); |
1189 | 1189 |
return CL_PHISH_HOST_WHITELISTED; |
1190 | 1190 |
} |
1191 | 1191 |
|
1192 | 1192 |
if(urls->flags&DOMAINLIST_REQUIRED) { |
1193 | 1193 |
if(!(phishy&DOMAIN_LISTED)) { |
1194 |
- if(domainlist_match(urls->displayLink.data,urls->realLink.data,1,&urls->flags)) |
|
1194 |
+ if(domainlist_match(engine,urls->displayLink.data,urls->realLink.data,1,&urls->flags)) |
|
1195 | 1195 |
phishy |= DOMAIN_LISTED; |
1196 | 1196 |
else { |
1197 | 1197 |
free_if_needed(&host_url); |
... | ... |
@@ -61,9 +61,9 @@ struct url_check { |
61 | 61 |
}; |
62 | 62 |
|
63 | 63 |
int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs); |
64 |
-enum phish_status phishingCheck(struct url_check* urls); |
|
64 |
+enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls); |
|
65 | 65 |
|
66 |
-int whitelist_check(struct url_check* urls,int hostOnly); |
|
66 |
+int whitelist_check(const struct cl_engine* engine,struct url_check* urls,int hostOnly); |
|
67 | 67 |
void url_check_init(struct url_check* urls); |
68 | 68 |
void get_host(struct string* dest,const char* URL,int isReal,int* phishy); |
69 | 69 |
void string_free(struct string* str); |
... | ... |
@@ -89,7 +89,13 @@ enum phish_status url_get_host(struct url_check* url,struct url_check* host_url, |
89 | 89 |
void url_get_domain(struct url_check* url,struct url_check* domains); |
90 | 90 |
enum phish_status phishy_map(int phishy,enum phish_status fallback); |
91 | 91 |
int isEncoded(const char* url); |
92 |
-void phishing_done(void); |
|
92 |
+ |
|
93 |
+void phish_disable(const char* reason); |
|
94 |
+/* Global, non-thread-safe functions, call only once! */ |
|
95 |
+void phishint_init(struct cl_engine* engine); |
|
96 |
+void phishing_done(struct cl_engine* engine); |
|
97 |
+/* end of non-thread-safe functions */ |
|
98 |
+ |
|
93 | 99 |
|
94 | 100 |
static inline int isPhishing(enum phish_status rc) |
95 | 101 |
{ |
... | ... |
@@ -44,6 +44,7 @@ |
44 | 44 |
#ifdef CL_EXPERIMENTAL |
45 | 45 |
#include "phish_whitelist.h" |
46 | 46 |
#include "phish_domaincheck_db.h" |
47 |
+#include "regex_list.h" |
|
47 | 48 |
#endif |
48 | 49 |
|
49 | 50 |
|
... | ... |
@@ -469,6 +470,7 @@ int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hex |
469 | 469 |
|
470 | 470 |
static int cli_initengine(struct cl_engine **engine, unsigned int options) |
471 | 471 |
{ |
472 |
+ int rc; |
|
472 | 473 |
|
473 | 474 |
if(!*engine) { |
474 | 475 |
cli_dbgmsg("Initializing the engine structure\n"); |
... | ... |
@@ -487,6 +489,10 @@ static int cli_initengine(struct cl_engine **engine, unsigned int options) |
487 | 487 |
cli_errmsg("Can't allocate memory for roots!\n"); |
488 | 488 |
return CL_EMEM; |
489 | 489 |
} |
490 |
+#ifdef CL_EXPERIMENTAL |
|
491 |
+ if(rc =phishing_init(*engine)) |
|
492 |
+ return rc; |
|
493 |
+#endif |
|
490 | 494 |
} |
491 | 495 |
|
492 | 496 |
return CL_SUCCESS; |
... | ... |
@@ -593,6 +599,44 @@ static int cli_loaddb(FILE *fd, struct cl_engine **engine, unsigned int *signo, |
593 | 593 |
return CL_SUCCESS; |
594 | 594 |
} |
595 | 595 |
|
596 |
+#ifdef CL_EXPERIMENTAL |
|
597 |
+static int cli_loadwdb(struct cl_engine** engine,FILE* fd,unsigned int options) |
|
598 |
+{ |
|
599 |
+ int ret = 0; |
|
600 |
+ |
|
601 |
+ if((ret = cli_initengine(engine, options))) { |
|
602 |
+ cl_free(*engine); |
|
603 |
+ return ret; |
|
604 |
+ } |
|
605 |
+ |
|
606 |
+ if(!(*engine)->whitelist_matcher) |
|
607 |
+ if(ret = init_whitelist(*engine)) { |
|
608 |
+ whitelist_done(*engine); |
|
609 |
+ cl_free(*engine); |
|
610 |
+ return ret; |
|
611 |
+ } |
|
612 |
+ |
|
613 |
+ return load_regex_matcher((*engine)->whitelist_matcher,fd,options); |
|
614 |
+} |
|
615 |
+ |
|
616 |
+static int cli_loadpdb(struct cl_engine** engine,FILE* fd,unsigned int options) |
|
617 |
+{ |
|
618 |
+ int ret = 0; |
|
619 |
+ |
|
620 |
+ if((ret = cli_initengine(engine, options))) { |
|
621 |
+ cl_free(*engine); |
|
622 |
+ return ret; |
|
623 |
+ } |
|
624 |
+ |
|
625 |
+ if(!(*engine)->domainlist_matcher) |
|
626 |
+ if(ret = init_domainlist(*engine)) { |
|
627 |
+ domainlist_done(*engine); |
|
628 |
+ cl_free(*engine); |
|
629 |
+ return ret; |
|
630 |
+ } |
|
631 |
+ return load_regex_matcher((*engine)->domainlist_matcher,fd,options); |
|
632 |
+} |
|
633 |
+#endif |
|
596 | 634 |
static int cli_loadndb(FILE *fd, struct cl_engine **engine, unsigned int *signo, unsigned short sdb, unsigned int options) |
597 | 635 |
{ |
598 | 636 |
char buffer[FILEBUFF], *sig, *virname, *offset, *pt; |
... | ... |
@@ -1118,12 +1162,12 @@ static int cli_load(const char *filename, struct cl_engine **engine, unsigned in |
1118 | 1118 |
#ifdef CL_EXPERIMENTAL |
1119 | 1119 |
} else if(cli_strbcasestr(filename, ".wdb")) { |
1120 | 1120 |
if(!(options & CL_SCAN_NOPHISHING)) |
1121 |
- ret = cli_loadwdb(fd, options); |
|
1121 |
+ ret = cli_loadwdb(engine, fd, options); |
|
1122 | 1122 |
else |
1123 | 1123 |
skipped = 1; |
1124 | 1124 |
} else if(cli_strbcasestr(filename, ".pdb")) { |
1125 | 1125 |
if(!(options & CL_SCAN_NOPHISHING)) |
1126 |
- ret = cli_loadpdb(fd, options); |
|
1126 |
+ ret = cli_loadpdb(engine, fd, options); |
|
1127 | 1127 |
else |
1128 | 1128 |
skipped = 1; |
1129 | 1129 |
#endif |
... | ... |
@@ -1548,6 +1592,9 @@ void cl_free(struct cl_engine *engine) |
1548 | 1548 |
free(metah); |
1549 | 1549 |
} |
1550 | 1550 |
|
1551 |
+#ifdef CL_EXPERIMENTAL |
|
1552 |
+ phishing_done(engine); |
|
1553 |
+#endif |
|
1551 | 1554 |
free(engine); |
1552 | 1555 |
} |
1553 | 1556 |
|
... | ... |
@@ -19,6 +19,9 @@ |
19 | 19 |
* MA 02110-1301, USA. |
20 | 20 |
* |
21 | 21 |
* $Log: regex_list.c,v $ |
22 |
+ * Revision 1.7 2006/10/07 11:00:46 tkojm |
|
23 |
+ * make the experimental anti-phishing code more thread safe |
|
24 |
+ * |
|
22 | 25 |
* Revision 1.6 2006/09/27 19:14:49 njh |
23 | 26 |
* Fix segfault on Solaris |
24 | 27 |
* |
... | ... |
@@ -190,15 +193,47 @@ static struct std_classmap { |
190 | 190 |
{"[:xdigit:]",XDIGIT} |
191 | 191 |
}; |
192 | 192 |
|
193 |
+static int cli_iswctype(const char c,const enum wctype_t type); |
|
194 |
+ |
|
195 |
+/* -------------- NON_THREAD_SAFE BEGIN --------------*/ |
|
196 |
+/* Global variables and functions accessing them, not thread-safe! |
|
197 |
+ * they should be called on application startup/shutdown once! */ |
|
193 | 198 |
static const size_t std_class_cnt = sizeof(std_class)/sizeof(std_class[0]); |
194 | 199 |
#define STD_CLASS_CNT sizeof(std_class)/sizeof(std_class[0]) |
195 | 200 |
typedef char char_bitmap_t[32]; |
196 | 201 |
static char_bitmap_p char_class_bitmap[STD_CLASS_CNT]; |
197 | 202 |
static unsigned short int char_class[256]; |
203 |
+static int engine_ok = 0; |
|
204 |
+ |
|
205 |
+void setup_matcher_engine(void) |
|
206 |
+{ |
|
207 |
+ /*Set up std character classes*/ |
|
208 |
+ size_t i; |
|
209 |
+ size_t j; |
|
210 |
+ memset(char_class,0,256); |
|
211 |
+ for(i=0;i<std_class_cnt;i++) { |
|
212 |
+ enum wctype_t type = std_class[i].type; |
|
213 |
+ char_class_bitmap[i]=cli_calloc(256>>3,1); |
|
214 |
+ for(j=0;j<256;j++) |
|
215 |
+ if(cli_iswctype(j,type)) { |
|
216 |
+ char_class[j] |= 1<<i; |
|
217 |
+ char_class_bitmap[i][j>>3] |= 1<<(j&0x07); |
|
218 |
+ } |
|
219 |
+ } |
|
220 |
+ engine_ok = 1; |
|
221 |
+} |
|
222 |
+ |
|
223 |
+void matcher_engine_done(void) |
|
224 |
+{ |
|
225 |
+ size_t i; |
|
226 |
+ for(i=0;i<std_class_cnt;i++) |
|
227 |
+ free(char_class_bitmap[i]); |
|
228 |
+ engine_ok = 0; |
|
229 |
+} |
|
230 |
+ |
|
198 | 231 |
|
232 |
+/* -------------- NON_THREAD_SAFE END --------------*/ |
|
199 | 233 |
/* Prototypes */ |
200 |
-static void setup_matcher_engine(void); |
|
201 |
-static void matcher_engine_done(void); |
|
202 | 234 |
static int add_pattern(struct regex_matcher* matcher,const unsigned char* pat,const char* info); |
203 | 235 |
static int match_node(struct tree_node* node,const unsigned char* c,size_t len,const char** info); |
204 | 236 |
static void destroy_tree(struct regex_matcher* matcher); |
... | ... |
@@ -236,6 +271,7 @@ int regex_list_match(struct regex_matcher* matcher,const char* real_url,const ch |
236 | 236 |
assert(real_url); |
237 | 237 |
assert(display_url); |
238 | 238 |
assert(info); |
239 |
+ assert(engine_ok); |
|
239 | 240 |
if(!matcher->list_inited) |
240 | 241 |
return 0; |
241 | 242 |
assert(matcher->list_built); |
... | ... |
@@ -328,9 +364,6 @@ static inline struct tree_node* stack_pop(struct node_stack* stack) |
328 | 328 |
int init_regex_list(struct regex_matcher* matcher) |
329 | 329 |
{ |
330 | 330 |
assert(matcher); |
331 |
- |
|
332 |
- setup_matcher_engine(); |
|
333 |
- |
|
334 | 331 |
matcher->list_inited = 0; |
335 | 332 |
matcher->root_hosts = (struct cli_matcher*) cli_calloc(1,sizeof(*matcher->root_hosts)); |
336 | 333 |
if(!matcher->root_hosts) |
... | ... |
@@ -424,7 +457,6 @@ static int add_regex_list_element(struct cli_matcher* root,const char* pattern,c |
424 | 424 |
#ifndef NDEBUG |
425 | 425 |
void dump_tree(struct tree_node* root); |
426 | 426 |
#endif |
427 |
-static int matcher_engine_refcount=0; |
|
428 | 427 |
|
429 | 428 |
static int build_regex_list(struct regex_matcher* matcher); |
430 | 429 |
/* Load patterns/regexes from file */ |
... | ... |
@@ -511,7 +543,6 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio |
511 | 511 |
return CL_EMALFDB; |
512 | 512 |
} |
513 | 513 |
regex_list_cleanup(matcher); |
514 |
- matcher_engine_refcount++; |
|
515 | 514 |
return CL_SUCCESS; |
516 | 515 |
} |
517 | 516 |
|
... | ... |
@@ -665,7 +696,6 @@ void regex_list_done(struct regex_matcher* matcher) |
665 | 665 |
matcher->list_loaded=0; |
666 | 666 |
} |
667 | 667 |
if(matcher->list_inited) { |
668 |
- matcher_engine_done(); |
|
669 | 668 |
matcher->list_inited=0; |
670 | 669 |
} |
671 | 670 |
stack_destroy(&matcher->node_stack); |
... | ... |
@@ -708,38 +738,7 @@ static int cli_iswctype(const char c,const enum wctype_t type) |
708 | 708 |
} |
709 | 709 |
} |
710 | 710 |
|
711 |
-static int engine_inited=0; |
|
712 |
- |
|
713 |
-static void setup_matcher_engine(void) |
|
714 |
-{ |
|
715 |
- /*Set up std character classes*/ |
|
716 |
- size_t i; |
|
717 |
- size_t j; |
|
718 |
- if(engine_inited) |
|
719 |
- return; |
|
720 |
- memset(char_class,0,256); |
|
721 |
- for(i=0;i<std_class_cnt;i++) { |
|
722 |
- enum wctype_t type = std_class[i].type; |
|
723 |
- char_class_bitmap[i]=cli_calloc(256>>3,1); |
|
724 |
- for(j=0;j<256;j++) |
|
725 |
- if(cli_iswctype(j,type)) { |
|
726 |
- char_class[j] |= 1<<i; |
|
727 |
- char_class_bitmap[i][j>>3] |= 1<<(j&0x07); |
|
728 |
- } |
|
729 |
- } |
|
730 |
- engine_inited=1; |
|
731 |
-} |
|
732 | 711 |
|
733 |
-static void matcher_engine_done(void) |
|
734 |
-{ |
|
735 |
- size_t i; |
|
736 |
- matcher_engine_refcount--; |
|
737 |
- if(!matcher_engine_refcount) { |
|
738 |
- for(i=0;i<std_class_cnt;i++) |
|
739 |
- free(char_class_bitmap[i]); |
|
740 |
- } |
|
741 |
- engine_inited=0; |
|
742 |
-} |
|
743 | 712 |
|
744 | 713 |
struct token_t |
745 | 714 |
{ |
... | ... |
@@ -48,6 +48,8 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio |
48 | 48 |
void regex_list_cleanup(struct regex_matcher* matcher); |
49 | 49 |
void regex_list_done(struct regex_matcher* matcher); |
50 | 50 |
int is_regex_ok(struct regex_matcher* matcher); |
51 |
+ |
|
52 |
+void setup_matcher_engine(void);/* global, non thread-safe */ |
|
51 | 53 |
#endif |
52 | 54 |
|
53 | 55 |
#endif |