libclamav/phishcheck.h
98cf4932
ad387e52
 #ifdef	HAVE_REGEX_H
 #include <regex.h>
 #endif
98cf4932
 
 #define CL_PHISH_BASE 100
 enum phish_status {CL_PHISH_NODECISION=0,CL_PHISH_CLEAN=CL_PHISH_BASE, CL_PHISH_CLEANUP_OK,CL_PHISH_HOST_OK, CL_PHISH_DOMAIN_OK,
 	CL_PHISH_HOST_NOT_LISTED,
 	CL_PHISH_REDIR_OK, CL_PHISH_HOST_REDIR_OK, CL_PHISH_DOMAIN_REDIR_OK,
 	CL_PHISH_HOST_REVERSE_OK,CL_PHISH_DOMAIN_REVERSE_OK,
 	CL_PHISH_WHITELISTED,CL_PHISH_HOST_WHITELISTED,
 	CL_PHISH_CLEAN_CID,
 	CL_PHISH_TEXTURL, CL_PHISH_MAILTO_OK,
 	CL_PHISH_CLOAKED_UIU, CL_PHISH_NUMERIC_IP,CL_PHISH_HEX_URL,CL_PHISH_CLOAKED_NULL,CL_PHISH_SSL_SPOOF, CL_PHISH_NOMATCH};
 
 #define HOST_SUFFICIENT   1
 #define DOMAIN_SUFFICIENT (HOST_SUFFICIENT | 2)
 #define DO_REVERSE_LOOKUP 4
 #define CHECK_REDIR       8
 #define CHECK_SSL         16
 #define CHECK_CLOAKING    32
 #define CLEANUP_URL       64
 #define CHECK_DOMAIN_REVERSE 128
 #define CHECK_IMG_URL        256
 #define DOMAINLIST_REQUIRED  512
 /* img checking disabled by default */
 
 
 #define CL_PHISH_ALL_CHECKS (CLEANUP_URL|DOMAIN_SUFFICIENT|CHECK_SSL|CHECK_CLOAKING|DOMAINLIST_REQUIRED|CHECK_IMG_URL)
 
 struct string {
 	int refcount;
 	struct string* ref;
 	char* data;
 };
 
ad387e52
 struct phishcheck {
 	regex_t preg;
 	regex_t preg_tld;
 	regex_t preg_cctld;
 	regex_t preg_numeric;
 	char*    url_regex;
 	int      is_disabled;
 };
 
98cf4932
 struct url_check {
 	struct string realLink;
 	struct string displayLink;
 	unsigned short       flags;
 };
 
 int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs);
6cecbecd
 enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls);
98cf4932
 
6cecbecd
 int whitelist_check(const struct cl_engine* engine,struct url_check* urls,int hostOnly);
98cf4932
 void url_check_init(struct url_check* urls);
 void string_free(struct string* str);
 void string_assign(struct string* dest,struct string* src);
 void string_assign_c(struct string* dest,char* data);
45411cce
 int string_assign_dup(struct string* dest,const char* start,const char* end);
98cf4932
 void string_assign_ref(struct string* dest,struct string* ref,char* data);
 void free_if_needed(struct url_check* url);
45411cce
 int get_host(const struct phishcheck* pchk,struct string* dest,const char* URL,int isReal,int* phishy);
ad387e52
 int isCountryCode(const struct phishcheck* s,const char* str);
 int isTLD(const struct phishcheck* s,const char* str,int len);
 void get_domain(const struct phishcheck* pchk,struct string* dest,struct string* host);
98cf4932
 int ip_reverse(struct url_check* urls,int isReal);
 void reverse_lookup(struct url_check* url,int isReal);
 int isNumeric(const char* host);
 int isSSL(const char* URL);
45411cce
 int cleanupURL(struct string* URL,int isReal);
98cf4932
 void get_redirected_URL(struct string* URL);
ad387e52
 int isURL(const struct phishcheck* pchk,const char* URL);
98cf4932
 enum phish_status cleanupURLs(struct url_check* urls);
ad387e52
 int isNumericURL(const struct phishcheck* pchk, const char* URL);
45411cce
 int url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url_check* host_url,int isReal,int* phishy);
ad387e52
 void url_get_domain(const struct phishcheck* pchk, struct url_check* url,struct url_check* domains);
98cf4932
 enum phish_status phishy_map(int phishy,enum phish_status fallback);
 int isEncoded(const char* url);
6cecbecd
 
ad387e52
 void phish_disable(struct cl_engine* engine,const char* reason);
6cecbecd
 /* Global, non-thread-safe functions, call only once! */
ad387e52
 int phishing_init(struct cl_engine* engine);
6cecbecd
 void phishing_done(struct cl_engine* engine);
 /* end of non-thread-safe functions */
 
98cf4932
 
 static inline int isPhishing(enum phish_status rc)
 {
 	switch(rc) {
 		case CL_PHISH_CLEAN:
 		case CL_PHISH_CLEANUP_OK:
 		case CL_PHISH_WHITELISTED:
 		case CL_PHISH_HOST_WHITELISTED:
 		case CL_PHISH_HOST_OK:
 		case CL_PHISH_DOMAIN_OK:
 		case CL_PHISH_REDIR_OK:
 		case CL_PHISH_HOST_REDIR_OK:
 		case CL_PHISH_DOMAIN_REDIR_OK:
 		case CL_PHISH_HOST_REVERSE_OK:
 		case CL_PHISH_DOMAIN_REVERSE_OK:
 		case CL_PHISH_MAILTO_OK:
 		case CL_PHISH_TEXTURL:
 		case CL_PHISH_HOST_NOT_LISTED:
 		case CL_PHISH_CLEAN_CID:
 			return 0;
 		case CL_PHISH_HEX_URL:
 		case CL_PHISH_CLOAKED_NULL:
 		case CL_PHISH_SSL_SPOOF:
 		case CL_PHISH_CLOAKED_UIU:
 		case CL_PHISH_NUMERIC_IP:
 		case CL_PHISH_NOMATCH:
 			return 1;
 		default:
 			return 1;
 	}
 }
 const char* phishing_ret_toString(enum phish_status rc);
 #endif
 
 #endif