GitList

Browse code

phishing patch from Edwin (closes bb#157, #174, #222, #224)

git-svn: trunk@2607

Tomasz Kojm authored on 2007/01/13 02:29:09
Showing 12 changed files

clamav-devel/ChangeLog index 9420617..377b7f9 100644
clamav-devel/clamd/clamd.c index 54760a3..3c22fad 100644
clamav-devel/clamd/server-th.c index 47957ab..b34dcc9 100644
clamav-devel/clamscan/clamscan.c index d7d8b14..5fd6636 100644
clamav-devel/clamscan/clamscan_opt.h index 3ce1a1b..da190be 100644
clamav-devel/clamscan/manager.c index a01cf1f..7c9d043 100644
clamav-devel/docs/man/clamscan.1 index 359b0b7..184ab2c 100644
clamav-devel/etc/clamd.conf index 009e6c7..736b65f 100644
clamav-devel/libclamav/clamav.h index ff5f5fb..e16e17d 100644
clamav-devel/libclamav/phishcheck.c index 0fd40ab..7eb676a 100644
clamav-devel/libclamav/phishcheck.h index c7865b9..2735645 100644
clamav-devel/shared/cfgparser.c index 6061380..e26568a 100644

@@ -1,3 +1,14 @@
                     +Fri Jan 12 18:18:43 CET 2007 (tk)
                     +---------------------------------
                     +  * libclamav: phishing patch from Edwin (closes bb#157, #174, #222, #224)
                     +	         - add new tuning options to phishing code (--phishing-ssl,
                     +		   --phishing-cloak, PhishAlwaysBlockSSLMismatch,
                     +		   PhishAlwaysBlockCloak)
                     +		 - rename alldomains/stricturl option to PhishingRestrictedScan
                     +		 - update man pages for phishing options
                     +		 - update clamd/clamscan/cfgparser for new phishing options
                     +		 - add log lines for non-default phishing options
+                    +
                      Tue Jan  9 21:04:03 CET 2007 (tk)
                      ---------------------------------
                        * libclamav: dynamic configuration support

clamav-devel/clamd/clamd.c

History View file @ 19b3e18

@@ -313,6 +313,13 @@ int main(int argc, char **argv)
                          else
                      	logg("Not loading phishing signatures.\n");
                     +#ifdef CL_EXPERIMENTAL
                     +    if(cfgopt(copt,"PhishingScanURLs")->enabled)
                     +	dboptions |= CL_DB_PHISHING_URLS;
                     +    else
                     +	logg("Disabling URL based phishing detection.\n");
                     +#endif
+                    +
                          if(cfgopt(copt, "NodalCoreAcceleration")->enabled) {
                      #ifdef HAVE_NCORE
                      	dboptions |= CL_DB_NCORE;

clamav-devel/clamd/server-th.c

History View file @ 19b3e18

@@ -299,12 +299,6 @@ int acceptloop_th(int *socketds, int nsockets, struct cl_node *root, unsigned in
                          logg("*Listening daemon: PID: %d\n", getpid());
                          max_threads = cfgopt(copt, "MaxThreads")->numarg;
                     -#ifdef CL_EXPERIMENTAL
                     -    if(cfgopt(copt,"PhishingScanURLs")->enabled)
                     -	dboptions |= CL_DB_PHISHING_URLS;
                     -    if(cfgopt(copt,"PhishingStrictURLCheck")->enabled)
                     -	options |= CL_SCAN_PHISHING_DOMAINLIST;
                     -#endif
                          if(cfgopt(copt, "ScanArchive")->enabled || cfgopt(copt, "ClamukoScanArchive")->enabled) {
@@ -422,6 +416,34 @@ int acceptloop_th(int *socketds, int nsockets, struct cl_node *root, unsigned in
                      	logg("HTML support disabled.\n");
+                         }
                     +#ifdef CL_EXPERIMENTAL
                     +    if(cfgopt(copt,"PhishingScanURLs")->enabled) {
+                    +
                     +	if(cfgopt(copt,"PhishingRestrictedScan")->enabled) {
                     +	    /* we don't scan urls from all domains, just those listed in
                     +	     * .pdb file. This is the safe default
                     +	     */
                     +	    options |= CL_SCAN_PHISHING_DOMAINLIST;
                     +	} else {
                     +	    /* This is a false positive prone option, since newsletters, etc.
                     +	     * often contain links that will be classified as phishing attempts,
                     +	     * even though the site they link to isn't a phish site.
                     +	     */
                     +	    logg("Phishing: Checking all URLs, regardless of domain (FP prone).\n");
                     +	}
+                    +
                     +	if(cfgopt(copt,"PhishingAlwaysBlockCloak")->enabled) {
                     +	    options |= CL_SCAN_PHISHING_BLOCKCLOAK;
                     +	    logg("Phishing: Always checking for cloaked urls\n");
                     +	}
+                    +
                     +	if(cfgopt(copt,"PhishingAlwaysBlockSSLMismatch")->enabled) {
                     +	    options |= CL_SCAN_PHISHING_BLOCKSSL;
                     +	    logg("Phishing: Always checking for ssl mismatches\n");
                     +	}
                     +    }
                     +#endif /* CL_EXPERIMENTAL */
+                    +
                          selfchk = cfgopt(copt, "SelfCheck")->numarg;
                          if(!selfchk) {
                      	logg("Self checking disabled.\n");

clamav-devel/clamscan/clamscan.c

History View file @ 19b3e18

@@ -263,7 +263,9 @@ void help(void)
                          mprintf("    --no-phishing-sigs                   Disable signature-based phishing detection\n");
                      #ifdef CL_EXPERIMENTAL
                          mprintf("    --no-phishing-scan-urls              Disable url-based phishing detection\n");
                     -    mprintf("    --phishing-strict-url-check          Enable phishing detection for all domains (might lead to false positives!)\n");
                     +    mprintf("    --no-phishing-restrictedscan         Enable phishing detection for all domains (might lead to false positives!)\n");
                     +    mprintf("    --phishing-ssl                       Always block SSL mismatches in URLs (phishing module)\n");
                     +    mprintf("    --phishing-cloak                     Always block cloaked URLs (phishing module)\n");
                      #endif
                          mprintf("    --no-algorithmic                     Disable algorithmic detection\n");
                          mprintf("    --no-pe                              Disable PE analysis\n");

clamav-devel/clamscan/clamscan_opt.h

History View file @ 19b3e18

@@ -80,8 +80,10 @@ static struct option clamscan_longopt[] = {
                          {"mail-follow-urls", 0, 0, 0},
                          {"no-phishing-sigs", 0, 0, 0},
                      #ifdef CL_EXPERIMENTAL
                     -    {"no-phishing-scan-urls",0,0,0},
                     -    {"phishing-strict-url-check",0,0,0},
                     +    {"no-phishing-scan-urls", 0, 0, 0},
                     +    {"no-phishing-restrictedscan", 0, 0, 0},
                     +    {"phishing-ssl", 0, 0, 0},
                     +    {"phishing-cloak", 0, 0, 0},
                      #endif
                          {"no-algorithmic", 0, 0, 0},
                          {"unzip", 2, 0, 0},

clamav-devel/clamscan/manager.c

History View file @ 19b3e18

@@ -95,8 +95,16 @@ int scanmanager(const struct optstruct *opt)
                      #ifdef CL_EXPERIMENTAL
                          if(!opt_check(opt,"no-phishing-scan-urls"))
                      	dboptions |= CL_DB_PHISHING_URLS;
                     -    if(opt_check(opt,"phishing-strict-url-check"))
                     -	options |= CL_SCAN_PHISHING_DOMAINLIST;
                     +    if(!opt_check(opt,"no-phishing-restrictedscan")) {
                     +	/* not scanning all domains, check only URLs with domains from .pdb */
                     +	dboptions |= CL_SCAN_PHISHING_DOMAINLIST;
                     +    }
                     +    if(opt_check(opt,"phishing-ssl")) {
                     +	   dboptions |= CL_SCAN_PHISHING_BLOCKSSL;
                     +    }
                     +    if(opt_check(opt,"phishing-cloak")) {
                     +	    dboptions |= CL_SCAN_PHISHING_BLOCKCLOAK;
                     +    }
                      #endif
                          if(opt_check(opt, "dev-ac-only")) {

clamav-devel/docs/man/clamscan.1

History View file @ 19b3e18

@@ -76,9 +76,15 @@ Disable detection of phishing messages.
                      \fB\-\-no\-phishing\-scan\-urls\fR
                      Disable url-based phishing detection. (Only available in experimental builds)
                      .TP
                     -\fB\-\-phish\-scan\-alldomains\fR
                     +\fB\-\-no\-phishing\-restrictedscan\fR
                      Enable phishing detection for all domains (might lead to false positives!).(Only available in experimental builds)
                      .TP
                     +\fB\-\-phishing\-ssl\fR
                     +Always block SSL mismatches in URLs (might lead to false positives!). (Only available in experimental builds)
                     +.TP
                     +\fB\-\-phishing\-cloak\fR
                     +Always block cloaked URLs (might lead to some false positives). (Only available in experimental builds)
                     +.TP
                      \fB\-\-no\-algo\fR
                      In some cases (eg. complex malware, exploits in graphic files, and others), ClamAV uses special algorithms to provide accurate detection. This option disables the algorithmic detection.
                      .TP

clamav-devel/etc/clamd.conf

History View file @ 19b3e18

@@ -222,17 +222,33 @@ LocalSocket /tmp/clamd
                      # Default: yes
                      #PhishingSignatures yes
                     -# Use phishing detection for all domains (not just those listed in the .pdb database).
                     -# It is not recommended to turn this option on, it is mean for internal use.
                     -# (available in experimental builds only)
                     -# Default: no
                     -#PhishingStrictURLCheck no
                      # Scan urls found in mails for phishing attempts.
                      # (available in experimental builds only)
                      # Default: yes
                      #PhishingScanURLs yes
                     +# Use phishing detection only for domains listed in the .pdb database. It is
                     +# not recommended to have this option turned off, because scanning of all
                     +# domains may lead to many false positives!
                     +# (available in experimental builds only)
                     +# Default: yes
                     +#PhishingRestrictedScan yes
+                    +
                     +# Always block SSL mismatches in URLs, even if the URL isn't in the database.
                     +# This can lead to false positives.
                     +# (available in experimental builds only)
                     +#
                     +# Default: no
                     +#PhishingAlwaysBlockSSLMismatch no
+                    +
                     +# Always block cloaked URLs, even if URL isn't in database.
                     +# There is a remote possibility, that this leads to false positives.
                     +# (available in experimental builds only)
                     +#
                     +# Default: no
                     +#PhishingAlwaysBlockCloak no
+                    +
                      ##
                      ## HTML
                      ##

clamav-devel/libclamav/clamav.h

History View file @ 19b3e18

@@ -88,8 +88,10 @@ extern "C"
                      #define CL_SCAN_MAILURL		    0x80
                      #define CL_SCAN_BLOCKMAX	    0x100
                      #define CL_SCAN_ALGO		    0x200
                     -#define CL_SCAN_PHISHING_DOMAINLIST 0x800
                     -#define CL_SCAN_ELF		    0x1000
                     +#define CL_SCAN_PHISHING_DOMAINLIST 0x400
                     +#define CL_SCAN_PHISHING_BLOCKSSL   0x800 /* ssl mismatches, not ssl by itself*/
                     +#define CL_SCAN_PHISHING_BLOCKCLOAK 0x1000
                     +#define CL_SCAN_ELF		    0x2000
                      /* recommended scan settings */
                      #define CL_SCAN_STDOPT		(CL_SCAN_ARCHIVE | CL_SCAN_MAIL | CL_SCAN_OLE2 | CL_SCAN_HTML | CL_SCAN_PE | CL_SCAN_ALGO | CL_SCAN_ELF)

clamav-devel/libclamav/phishcheck.c

History View file @ 19b3e18

@@ -19,6 +19,9 @@
                       *  MA 02110-1301, USA.
+                      *
                       *  $Log: phishcheck.c,v $
                     + *  Revision 1.17  2007/01/12 17:29:09  tkojm
                     + *  phishing patch from Edwin (closes bb#157, #174, #222, #224)
                     + *
                       *  Revision 1.16  2006/12/20 01:23:50  tkojm
                       *  options cleanup
+                      *
@@ -268,6 +271,15 @@ For the Whitelist(.wdb)/Domainlist(.pdb) format see regex_list.c (search for Fla
                      /* Constant strings and tables */
                      static char empty_string[]="";
+                    +
                     +#define ANY_CLOAK "(0[xX])?([a-fA-F0-9]+\\.?)+"
                     +#define CLOAK_REGEX_HEXURL "("ANY_CLOAK")?0[xX][a-fA-F0-9]+\\.?"ANY_CLOAK
                     +#define OCTAL_CLOAK "("ANY_CLOAK")?000[0-9]+\\.?"ANY_CLOAK
                     +#define DWORD_CLOAK "[0-9]{8,}"
+                    +
                     +static const char cloaked_host_regex[] = "^(("CLOAK_REGEX_HEXURL")|("OCTAL_CLOAK")|("DWORD_CLOAK"))$";
+                    +
+                    +
                      static const char tld_regex[] = "^"iana_tld"$";
                      static const char cctld_regex[] = "^"iana_cctld"$";
                      static const char dotnet[] = ".net";
@@ -944,6 +956,12 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
                      				continue;
                      			if (ctx->options&CL_SCAN_PHISHING_DOMAINLIST)
                      				urls.flags |= DOMAINLIST_REQUIRED;
                     +			if (ctx->options & CL_SCAN_PHISHING_BLOCKSSL) {
                     +				urls.always_check_flags |= CHECK_SSL;
                     +			}
                     +			if (ctx->options & CL_SCAN_PHISHING_BLOCKCLOAK) {
                     +				urls.always_check_flags |= CHECK_CLOAKING;
                     +			}
                      			string_init_c(&urls.realLink,(char*)hrefs->value[i]);
                      /*			if(!hrefs->contents[i]->isClosed) {
                      				blobAddData(hrefs->contents[i],empty_string,1);
@@ -1035,7 +1053,10 @@ static char* str_compose(const char* a,const char* b,const char* c)
                      static inline char hex2int(const unsigned char* src)
+                     {
                     -	return hextable[src[0]]<<4 | hextable[src[1]];
                     +	return (src[0] == '0' && src[1] == '0') ?
                     +		0x1 :/* don't convert %00 to \0, use 0x1
                     + 		      * this value is also used by cloak check*/
                     +		hextable[src[0]]<<4 | hextable[src[1]];
+                     }
                      static void free_regex(regex_t* p)
@@ -1066,6 +1087,12 @@ int phishing_init(struct cl_engine* engine)
                      	cli_dbgmsg("Initializing phishcheck module\n");
                     +	if(build_regex(&pchk->preg_hexurl,cloaked_host_regex,1)) {
                     +		free(pchk);
                     +		engine->phishcheck = NULL;
                     +		return CL_EFORMAT;
                     +	}
+                    +
                      	if(build_regex(&pchk->preg_cctld,cctld_regex,1)) {
                      		free(pchk);
                      		engine->phishcheck = NULL;
@@ -1106,6 +1133,7 @@ void phishing_done(struct cl_engine* engine)
                      	cli_dbgmsg("Cleaning up phishcheck\n");
                      	if(pchk && !pchk->is_disabled) {
                      		free_regex(&pchk->preg);
                     +		free_regex(&pchk->preg_hexurl);
                      		free_regex(&pchk->preg_cctld);
                      		free_regex(&pchk->preg_tld);
                      		free_regex(&pchk->preg_numeric);
@@ -1167,7 +1195,8 @@ int url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url
                      		string_free(host);
                      		return CL_PHISH_TEXTURL;
+                     	}
                     -	if(isReal && (!strncmp(host->data,"0x",2) || !strncmp(host->data,"0X",2))) {
                     +	if(!regexec(&pchk->preg_hexurl,host->data,0,NULL,0)) {
                     +		/* use a regex here, so that we don't accidentally block 0xacab.net style hosts */
                      		string_free(host);
                      		return CL_PHISH_HEX_URL;
+                     	}
@@ -1217,6 +1246,7 @@ int whitelist_check(const struct cl_engine* engine,struct url_check* urls,int ho
                      	return whitelist_match(engine,urls->realLink.data,urls->displayLink.data,hostOnly);
+                     }
+                    +
                      /* urls can't contain null pointer, caller must ensure this */
                      enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls)
+                     {
@@ -1267,17 +1297,23 @@ enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check*
                      			if(domainlist_match(engine,urls->displayLink.data,urls->realLink.data,1,&urls->flags))
                      				phishy |= DOMAIN_LISTED;
                      			else {
                     +			}
                     +		}
                     +	}
+                    +
                     +	if(urls->flags & DOMAINLIST_REQUIRED && !(phishy & DOMAIN_LISTED) ) {
                     +		urls->flags &= urls->always_check_flags;
                     +		if(!urls->flags) {
                      				free_if_needed(&host_url);
                      				return CL_PHISH_HOST_NOT_LISTED;
+                     			}
+                     		}
                     -	}
                      	if(urls->flags&CHECK_CLOAKING) {
                      		/*Checks if URL is cloaked.
                      		Should we check if it containts another http://, https://?
                      		No because we might get false positives from redirect services.*/
                     -		if(strstr(urls->realLink.data,"%00")) {
                     +		if(strchr(urls->realLink.data,'\0x1')) {
                      			free_if_needed(&host_url);
                      			return CL_PHISH_CLOAKED_NULL;
+                     		}
@@ -1287,6 +1323,7 @@ enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check*
+                     		}
+                     	}
+                    +
                      	if(urls->displayLink.data[0]=='\0') {
                      		free_if_needed(&host_url);
                      		return CL_PHISH_CLEAN;
@@ -1297,12 +1334,22 @@ enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check*
                      		return CL_PHISH_SSL_SPOOF;
+                     	}
                     +	if(!urls->flags&CHECK_CLOAKING && urls->flags & DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED) ) {
                     +		free_if_needed(&host_url);
                     +		return CL_PHISH_HOST_NOT_LISTED;
                     +	}
+                    +
                      	if((rc = url_get_host(pchk, urls,&host_url,DOMAIN_REAL,&phishy)))
+                     	{
                      		free_if_needed(&host_url);
                      		return rc;
+                     	}
                     +	if(urls->flags&DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED)) {
                     +		free_if_needed(&host_url);
                     +		return CL_PHISH_HOST_NOT_LISTED;
                     +	}
+                    +
                      	if(!strncmp(urls->displayLink.data,cid,cid_len))/* cid: image */{
                      		free_if_needed(&host_url);
                      		return CL_PHISH_CLEAN_CID;

clamav-devel/libclamav/phishcheck.h

History View file @ 19b3e18

@@ -62,6 +62,7 @@ struct phishcheck {
                      	regex_t preg_tld;
                      	regex_t preg_cctld;
                      	regex_t preg_numeric;
                     +	regex_t preg_hexurl;
                      	char*    url_regex;
                      	int      is_disabled;
                      };
@@ -70,6 +71,7 @@ struct url_check {
                      	struct string realLink;
                      	struct string displayLink;
                      	unsigned short       flags;
                     +	unsigned short always_check_flags;
                      };
                      int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs);

clamav-devel/shared/cfgparser.c

History View file @ 19b3e18

@@ -52,7 +52,11 @@ struct cfgoption cfg_options[] = {
                          {"PhishingSignatures", OPT_BOOL, 1, NULL, 0, OPT_CLAMD},
                      #ifdef CL_EXPERIMENTAL
                          {"PhishingScanURLs",OPT_BOOL, 1, NULL, 0, OPT_CLAMD},
                     -    {"PhishingStrictURLCheck", OPT_BOOL, 0, NULL, 0, OPT_CLAMD},
                     +    /* these are FP prone options, if default isn't used */
                     +    {"PhishingAlwaysBlockCloak", OPT_BOOL, 0, NULL, 0, OPT_CLAMD},
                     +    {"PhishingAlwaysBlockSSLMismatch", OPT_BOOL, 0, NULL, 0, OPT_CLAMD},
                     +    {"PhishingRestrictedScan", OPT_BOOL, 1, NULL, 0, OPT_CLAMD},
                     +    /* end of FP prone options */
                      #endif
                          {"AlgorithmicDetection", OPT_BOOL, 1, NULL, 0, OPT_CLAMD},
                          {"ScanHTML", OPT_BOOL, 1, NULL, 0, OPT_CLAMD},