Browse code

make the experimental anti-phishing code more thread safe

git-svn: trunk@2340

Tomasz Kojm authored on 2006/10/07 20:00:46
Showing 11 changed files
... ...
@@ -1,3 +1,8 @@
1
+Sat Oct  7 12:47:32 CEST 2006 (tk)
2
+----------------------------------
3
+  * libclamav: make the experimental anti-phishing code more thread safe,
4
+	       patch from Edvin Torok <edwintorok*gmail.com>
5
+
1 6
 Thu Oct  5 22:46:19 CEST 2006 (tk)
2 7
 ----------------------------------
3 8
   * libclamav/sis.c: fix handling of compressed multiple language files (bb#42)
... ...
@@ -181,6 +181,10 @@ struct cl_engine {
181 181
 
182 182
     /* Hardware database handle */
183 183
     void *hwdb;
184
+
185
+    /* Phishing .pdb and .wdb databases*/
186
+    void *whitelist_matcher;
187
+    void *domainlist_matcher;
184 188
 };
185 189
 
186 190
 struct cl_limits {
... ...
@@ -19,6 +19,9 @@
19 19
  *  MA 02110-1301, USA.
20 20
  *
21 21
  *  $Log: phish_domaincheck_db.c,v $
22
+ *  Revision 1.3  2006/10/07 11:00:46  tkojm
23
+ *  make the experimental anti-phishing code more thread safe
24
+ *
22 25
  *  Revision 1.2  2006/09/26 18:55:36  njh
23 26
  *  Fixed portability issues
24 27
  *
... ...
@@ -91,13 +94,10 @@
91 91
 #include "regex_list.h"
92 92
 #include "matcher-ac.h"
93 93
 
94
-
95
-static struct regex_matcher domainlist_matcher;
96
-
97
-int domainlist_match(const char* real_url,const char* display_url,int hostOnly,unsigned short* flags)
94
+int domainlist_match(const struct cl_engine* engine,const char* real_url,const char* display_url,int hostOnly,unsigned short* flags)
98 95
 {
99 96
 	const char* info;
100
-	int rc = regex_list_match(&domainlist_matcher,real_url,display_url,hostOnly,&info);
97
+	int rc = engine->domainlist_matcher ? regex_list_match(engine->domainlist_matcher,real_url,display_url,hostOnly,&info) : 0;
101 98
 	if(rc && info && info[0]) {/*match successfull, and has custom flags*/
102 99
 		if(strlen(info)==3 && isxdigit(info[0]) && isxdigit(info[1]) && isxdigit(info[2])) {
103 100
 			unsigned short notwantedflags=0;
... ...
@@ -111,29 +111,38 @@ int domainlist_match(const char* real_url,const char* display_url,int hostOnly,u
111 111
 	return rc;
112 112
 }
113 113
 
114
-int init_domainlist(void)
114
+int init_domainlist(struct cl_engine* engine)
115 115
 {
116
-	return	init_regex_list(&domainlist_matcher);
116
+	if(engine) {
117
+		engine->domainlist_matcher = cli_malloc(sizeof(*engine->domainlist_matcher));
118
+		if(!engine->domainlist_matcher)
119
+			return CL_EMEM;
120
+		return init_regex_list(engine->domainlist_matcher);
117 121
 }
118
-
119
-int is_domainlist_ok(void)
120
-{
121
-	return is_regex_ok(&domainlist_matcher);
122
+	else
123
+		return CL_ENULLARG;
122 124
 }
123 125
 
124
-int cli_loadpdb(FILE* fd,unsigned int options)
126
+int is_domainlist_ok(const struct cl_engine* engine)
125 127
 {
126
-	return load_regex_matcher(&domainlist_matcher,fd,options);
128
+	return (engine && engine->domainlist_matcher) ? is_regex_ok(engine->domainlist_matcher) : 1;
127 129
 }
128 130
 
129
-void domainlist_cleanup(void)
131
+
132
+void domainlist_cleanup(const struct cl_engine* engine)
130 133
 {
131
-	regex_list_cleanup(&domainlist_matcher);
134
+	if(engine && engine->domainlist_matcher) {
135
+		regex_list_cleanup(engine->domainlist_matcher);
136
+	}
132 137
 }
133 138
 
134
-void domainlist_done(void)
139
+void domainlist_done(struct cl_engine* engine)
135 140
 {
136
-	regex_list_done(&domainlist_matcher);
141
+	if(engine && engine->domainlist_matcher) {
142
+		regex_list_done(engine->domainlist_matcher);
143
+		free(engine->domainlist_matcher);
144
+		engine->domainlist_matcher = NULL;
145
+	}
137 146
 }
138 147
 
139 148
 #endif
... ...
@@ -25,13 +25,11 @@
25 25
 #ifndef _PHISH_DOMAINCHECK_DB_H
26 26
 #define _PHISH_DOMAINCHECK_DB_H
27 27
 
28
-int cli_loadpdb(FILE* fd, unsigned int options);
29
-int build_domainlist(void);
30
-int init_domainlist(void);
31
-void domainlist_done(void);
32
-void domainlist_cleanup(void);
33
-int is_domainlist_ok(void);
34
-int domainlist_match(const char* real_url,const char* display_url,int hostOnly,unsigned short* flags);
28
+int init_domainlist(struct cl_engine* engine);
29
+void domainlist_done(struct cl_engine* engine);
30
+void domainlist_cleanup(const struct cl_engine* engine);
31
+int is_domainlist_ok(const struct cl_engine* engine);
32
+int domainlist_match(const struct cl_engine* engine,const char* real_url,const char* display_url,int hostOnly,unsigned short* flags);
35 33
 
36 34
 #endif
37 35
 
... ...
@@ -19,6 +19,9 @@
19 19
  *  MA 02110-1301, USA.
20 20
  *
21 21
  *  $Log: phish_whitelist.c,v $
22
+ *  Revision 1.4  2006/10/07 11:00:46  tkojm
23
+ *  make the experimental anti-phishing code more thread safe
24
+ *
22 25
  *  Revision 1.3  2006/09/26 18:55:36  njh
23 26
  *  Fixed portability issues
24 27
  *
... ...
@@ -93,78 +96,44 @@
93 93
 #include "regex_list.h"
94 94
 #include "matcher-ac.h"
95 95
 
96
-/*#define WHITELIST_TEST*/
97
-
98
-static struct regex_matcher whitelist_matcher;
99
-
100
-int whitelist_match(const char* real_url,const char* display_url,int hostOnly)
96
+int whitelist_match(const struct cl_engine* engine,const char* real_url,const char* display_url,int hostOnly)
101 97
 {
102 98
 	const char* info;/*unused*/
103
-	return	regex_list_match(&whitelist_matcher,real_url,display_url,hostOnly,&info);
99
+	return	engine->whitelist_matcher ? regex_list_match(engine->whitelist_matcher,real_url,display_url,hostOnly,&info) : 0;
104 100
 }
105 101
 
106
-int init_whitelist(void)
102
+int init_whitelist(struct cl_engine* engine)
107 103
 {
108
-	return	init_regex_list(&whitelist_matcher);
104
+	if(engine) {
105
+		engine->whitelist_matcher = cli_malloc(sizeof(*engine->whitelist_matcher));
106
+		if(!engine->whitelist_matcher)
107
+			return CL_EMEM;
108
+		return	init_regex_list(engine->whitelist_matcher);
109 109
 }
110
-
111
-int is_whitelist_ok(void)
112
-{
113
-	return is_regex_ok(&whitelist_matcher);
110
+	else
111
+		return CL_ENULLARG;
114 112
 }
115 113
 
116
-int cli_loadwdb(FILE* fd,unsigned int options)
114
+int is_whitelist_ok(const struct cl_engine* engine)
117 115
 {
118
-	return load_regex_matcher(&whitelist_matcher,fd,options);
116
+	return (engine && engine->whitelist_matcher) ? is_regex_ok(engine->whitelist_matcher) : 1;
119 117
 }
120 118
 
121
-void whitelist_cleanup(void)
122
-{
123
-	regex_list_cleanup(&whitelist_matcher);
124
-}
125 119
 
126
-void whitelist_done(void)
120
+void whitelist_cleanup(const struct cl_engine* engine)
127 121
 {
128
-	regex_list_done(&whitelist_matcher);
122
+	if(engine && engine->whitelist_matcher) {
123
+		regex_list_cleanup(engine->whitelist_matcher);
124
+	}
129 125
 }
130 126
 
131
-#ifdef WHITELIST_TEST
132
-int main(int argc,char* argv[])
127
+void whitelist_done(struct cl_engine* engine)
133 128
 {
134
-/*	struct tree_node* root=tree_node_alloc(NULL,1);
135
-	const  char* info;
136
-	const  unsigned char test[]="tesxt";
137
-	setup_matcher();
138
-	root->op=OP_ROOT;
139
-	root->c=0;
140
-	root->next=NULL;
141
-	root->listend=1;
142
-	dump_tree(root);
143
-	add_pattern(&root,"test","1");
144
-	dump_tree(root);
145
-	add_pattern(&root,"tesv","2");
146
-	dump_tree(root);
147
-	add_pattern(&root,"tert","3");
148
-	dump_tree(root);
149
-	add_pattern(&root,"terr+","4");
150
-	dump_tree(root);
151
-	add_pattern(&root,"tes[xy]t","5");
152
-	dump_tree(root);
153
-	match_node(root,test,sizeof(test),&info);
154
-	destroy_tree(root);
155
-	if(info)
156
-		printf("%s\n",info);
157
-	else printf("not found\n");*/
158
-	/*FILE* f=fopen("w.wdb","r");
159
-	init_whitelist();
160
-	load_whitelist(f);
161
-	fclose(f);
162
-	dump_tree(root_regex);
163
-	build_whitelist();
164
-	printf("%d\n",whitelist_match("http://www.google.ro","http://www.google.me.ro",0));
165
-	whitelist_done();*/
166
-	return 0;
129
+	if(engine && engine->whitelist_matcher) {
130
+		regex_list_done(engine->whitelist_matcher);	
131
+		free(engine->whitelist_matcher);
132
+		engine->whitelist_matcher = NULL;
133
+	}
167 134
 }
168
-#endif
169 135
 
170 136
 #endif
... ...
@@ -25,13 +25,11 @@
25 25
 #ifndef _PHISH_WHITELIST_H
26 26
 #define _PHISH_WHITELIST_H
27 27
 
28
-int cli_loadwdb(FILE* fd, unsigned int options);
29
-int build_whitelist(void);
30
-int init_whitelist(void);
31
-void whitelist_done(void);
32
-void whitelist_cleanup(void);
33
-int is_whitelist_ok(void);
34
-int whitelist_match(const char* real_url,const char* display_url,int hostOnly);
28
+int init_whitelist(struct cl_engine* engine);
29
+void whitelist_done(struct cl_engine* engine);
30
+void whitelist_cleanup(const struct cl_engine* engine);
31
+int is_whitelist_ok(const struct cl_engine* engine);
32
+int whitelist_match(const struct cl_engine* engine, const char* real_url,const char* display_url,int hostOnly);
35 33
 
36 34
 #endif
37 35
 
... ...
@@ -19,6 +19,9 @@
19 19
  *  MA 02110-1301, USA.
20 20
  *
21 21
  *  $Log: phishcheck.c,v $
22
+ *  Revision 1.11  2006/10/07 11:00:46  tkojm
23
+ *  make the experimental anti-phishing code more thread safe
24
+ *
22 25
  *  Revision 1.10  2006/09/27 14:23:14  njh
23 26
  *  Ported to VS2005
24 27
  *
... ...
@@ -147,6 +150,8 @@ case CL_PHISH_HOST_NOT_LISTED:
147 147
 #include <regex.h>
148 148
 #endif
149 149
 
150
+#include <pthread.h>
151
+
150 152
 #include "others.h"
151 153
 #include "defaults.h"
152 154
 #include "str.h"
... ...
@@ -247,6 +252,7 @@ For the Whitelist(.wdb)/Domainlist(.pdb) format see regex_list.c (search for Fla
247 247
  *
248 248
  */
249 249
 static char empty_string[]="";
250
+
250 251
 static	inline	void string_init_c(struct string* dest,char* data);
251 252
 static	void	string_assign_null(struct string* dest);
252 253
 static	char	*rfind(char *start, char c, size_t len);
... ...
@@ -347,7 +353,6 @@ void free_if_needed(struct url_check* url)
347 347
 	string_free(&url->displayLink);
348 348
 }
349 349
 
350
-static int phish_disabled = 0;/* disabled due to fatal startup error */
351 350
 
352 351
 static int build_regex(regex_t** preg,const char* regex,int nosub)
353 352
 {
... ...
@@ -372,7 +377,7 @@ static int build_regex(regex_t** preg,const char* regex,int nosub)
372 372
 #endif
373 373
 		free(*preg);
374 374
 		*preg=NULL;
375
-		phish_disabled=1;
375
+		phish_disable("problem in compiling regex");
376 376
 		return 1;
377 377
 	}
378 378
 	return 0;
... ...
@@ -468,10 +473,6 @@ static const char cctld_regex[] = "^"iana_cctld"$";
468 468
 
469 469
 int isCountryCode(const char* str)
470 470
 {
471
-	if(!preg_cctld) {
472
-		if(build_regex(&preg_cctld,cctld_regex,1))
473
-			return -1;
474
-	}
475 471
 	return str ? !regexec(preg_cctld,str,0,NULL,0) : 0;
476 472
 }
477 473
 
... ...
@@ -484,10 +485,6 @@ int isTLD(const char* str,int len)
484 484
 		int rc;
485 485
 		strncpy(s,str,len);
486 486
 		s[len]='\0';
487
-		if(!preg_tld) {
488
-			if(build_regex(&preg_tld,tld_regex,1))
489
-				return -1;
490
-		}
491 487
 		rc = !regexec(preg_tld,s,0,NULL,0);
492 488
 		free(s);
493 489
 		return rc;
... ...
@@ -599,15 +596,10 @@ int isSSL(const char* URL)
599 599
 	return URL ? !strncmp(https,URL,sizeof(https)-1) : 0;
600 600
 }
601 601
 
602
-static int hexinited=0;
603
-static short int hextable[256];
604
-static inline char hex2int(const unsigned char* src)
605
-{
606
-	assert(hexinited);
607
-	return hextable[src[0]]<<4 | hextable[src[1]];
608
-}
609 602
 
610 603
 
604
+static inline char hex2int(const unsigned char* src);
605
+
611 606
 /* deletes @what from the string @begin.
612 607
  * @what_len: length of @what, excluding the terminating \0 */
613 608
 static void
... ...
@@ -828,29 +820,32 @@ void get_redirected_URL(struct string* URL)
828 828
 	returns redirected URL*/
829 829
 }
830 830
 
831
-static inline int is_phish_disabled(void)
831
+
832
+/* ---- runtime disable ------*/
833
+static int phish_disabled = 0;
834
+static pthread_mutex_t phish_disabled_lock = PTHREAD_MUTEX_INITIALIZER;
835
+
836
+void phish_disable(const char* reason)
832 837
 {
833
-	if (phish_disabled)
834
-		return 1;
835
-	else if (!is_whitelist_ok()) {
838
+	cli_warnmsg("Disabling phishing checks, reason:%s\n",reason);
839
+	pthread_mutex_lock(&phish_disabled_lock);
836 840
 		phish_disabled = 1;
837
-		return 1;
838
-	}
839
-	else return 0;
841
+	pthread_mutex_unlock(&phish_disabled_lock);
840 842
 }
841 843
 
842
-static void init_hextable(void)
844
+static inline int is_phish_disabled(const struct cl_engine* engine)
843 845
 {
844
-	unsigned char c;
845
-	memset(hextable,0,256);
846
-	for(c='0';c<='9';c++)
847
-		hextable[c] = c-'0';
848
-	for(c='a';c<='z';c++)
849
-		hextable[c] = 10+c-'a';
850
-	for(c='A';c<='Z';c++)
851
-		hextable[c] = 10+c-'A';
852
-	hexinited=1;
846
+	int rc;
847
+	if (!is_whitelist_ok(engine)) 
848
+		phish_disable("whitelist is not ok");
849
+	if (!is_domainlist_ok(engine))
850
+		phish_disable("domainlist is not ok");
851
+	pthread_mutex_lock(&phish_disabled_lock);
852
+	rc = phish_disabled;
853
+	pthread_mutex_unlock(&phish_disabled_lock);
854
+	return rc;
853 855
 }
856
+/* -------end runtime disable---------*/
854 857
 
855 858
 int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
856 859
 {
... ...
@@ -859,12 +854,8 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
859 859
 	const size_t href_text_len = sizeof(href_text);
860 860
 	const size_t src_text_len = sizeof(src_text);
861 861
 	int i;
862
-	if(is_phish_disabled())
862
+	if(is_phish_disabled(ctx->engine))
863 863
 		return 0;
864
-	if(!hexinited) {
865
-		init_hextable();
866
-		atexit(phishing_done);/*TODO: replace this with a proper phishing_done call from manager.c*/
867
-	}
868 864
 
869 865
 	*ctx->virname=NULL;
870 866
 	for(i=0;i<hrefs->count;i++)
... ...
@@ -893,8 +884,8 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
893 893
 				urls.displayLink.data = url;
894 894
 			}
895 895
 
896
-			rc = phishingCheck(&urls);
897
-			if(phish_disabled)
896
+			rc = phishingCheck(ctx->engine,&urls);
897
+			if(is_phish_disabled(ctx->engine))
898 898
 				return 0;
899 899
 			free_if_needed(&urls);
900 900
 			cli_dbgmsg("Phishing scan result:%s\n",phishing_ret_toString(rc));
... ...
@@ -1015,28 +1006,89 @@ static char* str_compose(const char* a,const char* b,const char* c)
1015 1015
 
1016 1016
 /*Warning: take care when modifying this regex, it has been tweaked, and tuned, just don't break it please.
1017 1017
  * there is fragmentaddress1, and 2  to work around the ISO limitation of 509 bytes max length for string constants*/
1018
-static char* url_regex = NULL;
1019 1018
 static const char numeric_url_regex[] = "^ *"URI_numeric_fragmentaddress" *$";
1019
+static char* url_regex = NULL;
1020
+
1021
+static int hexinited=0;
1022
+static short int hextable[256];
1023
+
1024
+static inline char hex2int(const unsigned char* src)
1025
+{
1026
+	assert(hexinited);
1027
+	return hextable[src[0]]<<4 | hextable[src[1]];
1028
+}
1029
+
1030
+static void free_regex(regex_t** p)
1031
+{
1032
+	if(p) {
1033
+		if(*p) {
1034
+			regfree(*p);
1035
+			free(*p);
1036
+			*p=NULL;
1037
+		}
1038
+	}
1039
+}
1040
+/* --------non-thread-safe functions--------*/
1041
+static void init_hextable(void)
1042
+{
1043
+	unsigned char c;
1044
+	memset(hextable,0,256);
1045
+	for(c='0';c<='9';c++)
1046
+		hextable[c] = c-'0';
1047
+	for(c='a';c<='z';c++)
1048
+		hextable[c] = 10+c-'a';
1049
+	for(c='A';c<='Z';c++)
1050
+		hextable[c] = 10+c-'A';
1051
+	hexinited=1;
1052
+}
1053
+
1054
+int phishing_init(engine)
1055
+{
1056
+	cli_dbgmsg("Initializing phishcheck module\n");
1057
+	setup_matcher_engine();
1058
+	if(build_regex(&preg_cctld,cctld_regex,1))
1059
+		return -1;
1060
+	if(build_regex(&preg_tld,tld_regex,1))
1061
+		return -1;	
1062
+	url_regex = str_compose("^ *("URI_fragmentaddress1,URI_fragmentaddress2,URI_fragmentaddress3"|"URI_CHECK_PROTOCOLS") *$");
1063
+	if(build_regex(&preg,url_regex,1))
1064
+		return -1;
1065
+	if(build_regex(&preg_numeric,numeric_url_regex,1))
1066
+		return -1;
1067
+	init_hextable();
1068
+	cli_dbgmsg("Phishcheck module initialized\n");
1069
+	return 0;
1070
+}
1071
+
1072
+
1073
+void phishing_done(struct cl_engine* engine)
1074
+{
1075
+	cli_dbgmsg("Cleaning up phishcheck\n");
1076
+	free_regex(&preg);
1077
+	free_regex(&preg_cctld);
1078
+	free_regex(&preg_tld);
1079
+	free_regex(&preg_numeric);
1080
+	if(url_regex)
1081
+		free(url_regex);
1082
+
1083
+	whitelist_done(engine);
1084
+	domainlist_done(engine);
1085
+	matcher_engine_done();
1086
+	cli_dbgmsg("Phishcheck cleaned up\n");
1087
+}
1088
+
1089
+/* ---------------end of non-thread-safe function-----------*/
1020 1090
 /*
1021 1091
  * Only those URLs are identified as URLs for which phishing detection can be performed.
1022 1092
  * This means that no attempt is made to properly recognize 'cid:' URLs
1023 1093
  */
1024 1094
 int isURL(const char* URL)
1025 1095
 {
1026
-	if(!preg) {
1027
-		url_regex = str_compose("^ *("URI_fragmentaddress1,URI_fragmentaddress2,URI_fragmentaddress3"|"URI_CHECK_PROTOCOLS") *$");
1028
-		if(build_regex(&preg,url_regex,1))
1029
-			return -1;
1030
-	}
1031 1096
 	return URL ? !regexec(preg,URL,0,NULL,0) : 0;
1032 1097
 }
1033 1098
 
1034 1099
 int isNumericURL(const char* URL)
1035 1100
 {
1036
-	if(!preg_numeric) {
1037
-		if(build_regex(&preg_numeric,numeric_url_regex,1))
1038
-			return -1;
1039
-	}
1040 1101
 	return URL ? !regexec(preg_numeric,URL,0,NULL,0) : 0;
1041 1102
 }
1042 1103
 
... ...
@@ -1115,36 +1167,15 @@ int isEncoded(const char* url)
1115 1115
 	return (cnt-1 >strlen(url)*7/10);/*more than 70% made up of &#;*/
1116 1116
 }
1117 1117
 
1118
-static void free_regex(regex_t** p)
1119
-{
1120
-	if(p) {
1121
-		if(*p) {
1122
-			regfree(*p);
1123
-			free(*p);
1124
-			*p=NULL;
1125
-		}
1126
-	}
1127
-}
1128 1118
 
1129
-void phishing_done(void)
1130
-{
1131
-	free_regex(&preg);
1132
-	free_regex(&preg_cctld);
1133
-	free_regex(&preg_tld);
1134
-	free_regex(&preg_numeric);
1135
-	whitelist_done();
1136
-	domainlist_done();
1137
-	if(url_regex)
1138
-		free(url_regex);
1139
-}
1140 1119
 
1141
-int whitelist_check(struct url_check* urls,int hostOnly)
1120
+int whitelist_check(const struct cl_engine* engine,struct url_check* urls,int hostOnly)
1142 1121
 {
1143
-	return whitelist_match(urls->realLink.data,urls->displayLink.data,hostOnly);
1122
+	return whitelist_match(engine,urls->realLink.data,urls->displayLink.data,hostOnly);
1144 1123
 }
1145 1124
 
1146 1125
 /* urls can't contain null pointer, caller must ensure this */
1147
-enum phish_status phishingCheck(struct url_check* urls)
1126
+enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls)
1148 1127
 {
1149 1128
 	struct url_check host_url;
1150 1129
 	const char cid[] = "cid:";
... ...
@@ -1166,10 +1197,10 @@ enum phish_status phishingCheck(struct url_check* urls)
1166 1166
 		return rc;/* URLs identical after cleanup */
1167 1167
 	}
1168 1168
 
1169
-	if(whitelist_check(urls,0))
1169
+	if(whitelist_check(engine,urls,0))
1170 1170
 		return CL_PHISH_WHITELISTED;/* if url is whitelist don't perform further checks */
1171 1171
 
1172
-	if(urls->flags&DOMAINLIST_REQUIRED && domainlist_match(urls->realLink.data,urls->displayLink.data,0,&urls->flags))
1172
+	if(urls->flags&DOMAINLIST_REQUIRED && domainlist_match(engine,urls->realLink.data,urls->displayLink.data,0,&urls->flags))
1173 1173
 		phishy |= DOMAIN_LISTED;
1174 1174
 	else {
1175 1175
 		/* although entire url is not listed, the host might be,
... ...
@@ -1184,14 +1215,14 @@ enum phish_status phishingCheck(struct url_check* urls)
1184 1184
 		return rc;
1185 1185
 	}
1186 1186
 
1187
-	if(whitelist_check(&host_url,1)) {
1187
+	if(whitelist_check(engine,&host_url,1)) {
1188 1188
 		free_if_needed(&host_url);
1189 1189
 		return CL_PHISH_HOST_WHITELISTED;
1190 1190
 	}
1191 1191
 
1192 1192
 	if(urls->flags&DOMAINLIST_REQUIRED) {
1193 1193
 		if(!(phishy&DOMAIN_LISTED)) {
1194
-			if(domainlist_match(urls->displayLink.data,urls->realLink.data,1,&urls->flags))
1194
+			if(domainlist_match(engine,urls->displayLink.data,urls->realLink.data,1,&urls->flags))
1195 1195
 				phishy |= DOMAIN_LISTED;
1196 1196
 			else {
1197 1197
 				free_if_needed(&host_url);
... ...
@@ -61,9 +61,9 @@ struct url_check {
61 61
 };
62 62
 
63 63
 int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs);
64
-enum phish_status phishingCheck(struct url_check* urls);
64
+enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls);
65 65
 
66
-int whitelist_check(struct url_check* urls,int hostOnly);
66
+int whitelist_check(const struct cl_engine* engine,struct url_check* urls,int hostOnly);
67 67
 void url_check_init(struct url_check* urls);
68 68
 void get_host(struct string* dest,const char* URL,int isReal,int* phishy);
69 69
 void string_free(struct string* str);
... ...
@@ -89,7 +89,13 @@ enum phish_status url_get_host(struct url_check* url,struct url_check* host_url,
89 89
 void url_get_domain(struct url_check* url,struct url_check* domains);
90 90
 enum phish_status phishy_map(int phishy,enum phish_status fallback);
91 91
 int isEncoded(const char* url);
92
-void phishing_done(void);
92
+
93
+void phish_disable(const char* reason);
94
+/* Global, non-thread-safe functions, call only once! */
95
+void phishint_init(struct cl_engine* engine);
96
+void phishing_done(struct cl_engine* engine);
97
+/* end of non-thread-safe functions */
98
+
93 99
 
94 100
 static inline int isPhishing(enum phish_status rc)
95 101
 {
... ...
@@ -44,6 +44,7 @@
44 44
 #ifdef CL_EXPERIMENTAL
45 45
 #include "phish_whitelist.h"
46 46
 #include "phish_domaincheck_db.h"
47
+#include "regex_list.h"
47 48
 #endif
48 49
 
49 50
 
... ...
@@ -469,6 +470,7 @@ int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hex
469 469
 
470 470
 static int cli_initengine(struct cl_engine **engine, unsigned int options)
471 471
 {
472
+    int rc;
472 473
 
473 474
     if(!*engine) {
474 475
 	cli_dbgmsg("Initializing the engine structure\n");
... ...
@@ -487,6 +489,10 @@ static int cli_initengine(struct cl_engine **engine, unsigned int options)
487 487
 	    cli_errmsg("Can't allocate memory for roots!\n");
488 488
 	    return CL_EMEM;
489 489
 	}
490
+#ifdef CL_EXPERIMENTAL
491
+	if(rc =phishing_init(*engine))
492
+		return rc;
493
+#endif
490 494
     }
491 495
 
492 496
     return CL_SUCCESS;
... ...
@@ -593,6 +599,44 @@ static int cli_loaddb(FILE *fd, struct cl_engine **engine, unsigned int *signo,
593 593
     return CL_SUCCESS;
594 594
 }
595 595
 
596
+#ifdef CL_EXPERIMENTAL
597
+static int cli_loadwdb(struct cl_engine** engine,FILE* fd,unsigned int options)
598
+{
599
+	int ret = 0;
600
+
601
+	if((ret = cli_initengine(engine, options))) {
602
+		cl_free(*engine);
603
+		return ret;
604
+	}
605
+
606
+	if(!(*engine)->whitelist_matcher)
607
+		if(ret = init_whitelist(*engine)) {
608
+	            whitelist_done(*engine);
609
+		    cl_free(*engine);
610
+		    return ret;
611
+		}
612
+
613
+	return load_regex_matcher((*engine)->whitelist_matcher,fd,options);
614
+}
615
+
616
+static int cli_loadpdb(struct cl_engine** engine,FILE* fd,unsigned int options)
617
+{
618
+	int ret = 0;
619
+
620
+	if((ret = cli_initengine(engine, options))) {
621
+		cl_free(*engine);
622
+		return ret;
623
+	}
624
+
625
+	if(!(*engine)->domainlist_matcher)
626
+		if(ret = init_domainlist(*engine)) {
627
+	            domainlist_done(*engine);
628
+		    cl_free(*engine);
629
+		    return ret;
630
+		}
631
+	return load_regex_matcher((*engine)->domainlist_matcher,fd,options);
632
+}
633
+#endif
596 634
 static int cli_loadndb(FILE *fd, struct cl_engine **engine, unsigned int *signo, unsigned short sdb, unsigned int options)
597 635
 {
598 636
 	char buffer[FILEBUFF], *sig, *virname, *offset, *pt;
... ...
@@ -1118,12 +1162,12 @@ static int cli_load(const char *filename, struct cl_engine **engine, unsigned in
1118 1118
 #ifdef CL_EXPERIMENTAL
1119 1119
     } else if(cli_strbcasestr(filename, ".wdb")) {
1120 1120
 	if(!(options & CL_SCAN_NOPHISHING))
1121
-	    ret = cli_loadwdb(fd, options);
1121
+	    ret = cli_loadwdb(engine, fd, options);
1122 1122
 	else
1123 1123
 	    skipped = 1;
1124 1124
     } else if(cli_strbcasestr(filename, ".pdb")) {
1125 1125
 	if(!(options & CL_SCAN_NOPHISHING))
1126
-	    ret = cli_loadpdb(fd, options);
1126
+	    ret = cli_loadpdb(engine, fd, options);
1127 1127
 	else
1128 1128
 	    skipped = 1;
1129 1129
 #endif
... ...
@@ -1548,6 +1592,9 @@ void cl_free(struct cl_engine *engine)
1548 1548
 	free(metah);
1549 1549
     }
1550 1550
 
1551
+#ifdef CL_EXPERIMENTAL
1552
+   phishing_done(engine);
1553
+#endif
1551 1554
     free(engine);
1552 1555
 }
1553 1556
 
... ...
@@ -19,6 +19,9 @@
19 19
  *  MA 02110-1301, USA.
20 20
  *
21 21
  *  $Log: regex_list.c,v $
22
+ *  Revision 1.7  2006/10/07 11:00:46  tkojm
23
+ *  make the experimental anti-phishing code more thread safe
24
+ *
22 25
  *  Revision 1.6  2006/09/27 19:14:49  njh
23 26
  *  Fix segfault on Solaris
24 27
  *
... ...
@@ -190,15 +193,47 @@ static struct std_classmap {
190 190
 	{"[:xdigit:]",XDIGIT}
191 191
 };
192 192
 
193
+static int cli_iswctype(const char c,const enum wctype_t type);
194
+
195
+/* -------------- NON_THREAD_SAFE BEGIN --------------*/
196
+/* Global variables and functions accessing them, not thread-safe!
197
+ * they should be called on application startup/shutdown once! */
193 198
 static const size_t std_class_cnt =  sizeof(std_class)/sizeof(std_class[0]);
194 199
 #define STD_CLASS_CNT sizeof(std_class)/sizeof(std_class[0])
195 200
 typedef char char_bitmap_t[32];
196 201
 static char_bitmap_p char_class_bitmap[STD_CLASS_CNT];
197 202
 static unsigned short int char_class[256];
203
+static int engine_ok = 0;
204
+
205
+void setup_matcher_engine(void)
206
+{
207
+	/*Set up std character classes*/
208
+	size_t i;
209
+	size_t j;
210
+	memset(char_class,0,256);
211
+	for(i=0;i<std_class_cnt;i++) {
212
+		enum wctype_t type = std_class[i].type;
213
+		char_class_bitmap[i]=cli_calloc(256>>3,1);
214
+		for(j=0;j<256;j++)
215
+			if(cli_iswctype(j,type)) {
216
+				char_class[j] |= 1<<i;
217
+				char_class_bitmap[i][j>>3] |= 1<<(j&0x07);
218
+			}
219
+	}	
220
+	engine_ok  = 1;
221
+}
222
+
223
+void matcher_engine_done(void)
224
+{
225
+	size_t i;
226
+	for(i=0;i<std_class_cnt;i++)
227
+			free(char_class_bitmap[i]);
228
+	engine_ok = 0;
229
+}
230
+
198 231
 
232
+/* -------------- NON_THREAD_SAFE END --------------*/
199 233
 /* Prototypes */
200
-static void setup_matcher_engine(void);
201
-static void matcher_engine_done(void);
202 234
 static int add_pattern(struct regex_matcher* matcher,const unsigned char* pat,const char* info);
203 235
 static int match_node(struct tree_node* node,const unsigned char* c,size_t len,const char** info);
204 236
 static void destroy_tree(struct regex_matcher* matcher);
... ...
@@ -236,6 +271,7 @@ int regex_list_match(struct regex_matcher* matcher,const char* real_url,const ch
236 236
 	assert(real_url);
237 237
 	assert(display_url);
238 238
 	assert(info);
239
+	assert(engine_ok);
239 240
 	if(!matcher->list_inited)
240 241
 		return 0;
241 242
 	assert(matcher->list_built);
... ...
@@ -328,9 +364,6 @@ static inline struct tree_node* stack_pop(struct node_stack* stack)
328 328
 int init_regex_list(struct regex_matcher* matcher)
329 329
 {
330 330
 	assert(matcher);
331
-	
332
-	setup_matcher_engine();
333
-
334 331
 	matcher->list_inited = 0;
335 332
 	matcher->root_hosts = (struct cli_matcher*) cli_calloc(1,sizeof(*matcher->root_hosts));
336 333
 	if(!matcher->root_hosts)
... ...
@@ -424,7 +457,6 @@ static int add_regex_list_element(struct cli_matcher* root,const char* pattern,c
424 424
 #ifndef NDEBUG
425 425
 void dump_tree(struct tree_node* root);
426 426
 #endif
427
-static int matcher_engine_refcount=0;
428 427
 
429 428
 static int build_regex_list(struct regex_matcher* matcher);
430 429
 /* Load patterns/regexes from file */
... ...
@@ -511,7 +543,6 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio
511 511
 		return CL_EMALFDB;
512 512
 	}
513 513
 	regex_list_cleanup(matcher);
514
-	matcher_engine_refcount++;
515 514
 	return CL_SUCCESS;
516 515
 }
517 516
 
... ...
@@ -665,7 +696,6 @@ void regex_list_done(struct regex_matcher* matcher)
665 665
 		matcher->list_loaded=0;
666 666
 	}
667 667
 	if(matcher->list_inited) {
668
-		matcher_engine_done();
669 668
 		matcher->list_inited=0;
670 669
 	}
671 670
 	stack_destroy(&matcher->node_stack);
... ...
@@ -708,38 +738,7 @@ static int cli_iswctype(const char c,const enum wctype_t type)
708 708
 	}
709 709
 }
710 710
 
711
-static int engine_inited=0;
712
-
713
-static void setup_matcher_engine(void)
714
-{
715
-	/*Set up std character classes*/
716
-	size_t i;
717
-	size_t j;
718
-	if(engine_inited)
719
-		return;
720
-	memset(char_class,0,256);
721
-	for(i=0;i<std_class_cnt;i++) {
722
-		enum wctype_t type = std_class[i].type;
723
-		char_class_bitmap[i]=cli_calloc(256>>3,1);
724
-		for(j=0;j<256;j++)
725
-			if(cli_iswctype(j,type)) {
726
-				char_class[j] |= 1<<i;
727
-				char_class_bitmap[i][j>>3] |= 1<<(j&0x07);
728
-			}
729
-	}	
730
-	engine_inited=1;
731
-}
732 711
 
733
-static void matcher_engine_done(void)
734
-{
735
-	size_t i;
736
-	matcher_engine_refcount--;
737
-	if(!matcher_engine_refcount) {
738
-		for(i=0;i<std_class_cnt;i++)
739
-			free(char_class_bitmap[i]);
740
-	}
741
-	engine_inited=0;
742
-}
743 712
 
744 713
 struct token_t
745 714
 {
... ...
@@ -48,6 +48,8 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio
48 48
 void regex_list_cleanup(struct regex_matcher* matcher);
49 49
 void regex_list_done(struct regex_matcher* matcher);
50 50
 int is_regex_ok(struct regex_matcher* matcher);
51
+
52
+void setup_matcher_engine(void);/* global, non thread-safe */
51 53
 #endif
52 54
 
53 55
 #endif