libclamav/regex_list.c
bd912dd8
 /*
  *  Match a string against a list of patterns/regexes.
  *
2023340a
  *  Copyright (C) 2007-2008 Sourcefire, Inc.
  *
  *  Authors: Török Edvin
bd912dd8
  *
  *  This program is free software; you can redistribute it and/or modify
2023340a
  *  it under the terms of the GNU General Public License version 2 as
38a00199
  *  published by the Free Software Foundation.
bd912dd8
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
  */
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
 #ifdef CL_THREAD_SAFE
 #ifndef _REENTRANT
 #define _REENTRANT
 #endif
 #endif
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
056d95dc
 #include <zlib.h>
bd912dd8
 
 #include <limits.h>
 #include <sys/types.h>
2e11bcdf
 #include <assert.h>
 
bd912dd8
 
53ff1b04
 #include "regex/regex.h"
bd912dd8
 
 
 #include "clamav.h"
 #include "others.h"
 #include "regex_list.h"
 #include "matcher-ac.h"
6b656d36
 #include "matcher.h"
43ecd9a1
 #include "str.h"
056d95dc
 #include "readdb.h"
2e11bcdf
 #include "jsparse/textbuf.h"
5ee56e41
 #include "regex_suffix.h"
589d8d8e
 #include "default.h"
627b7626
 #include "hashtab.h"
0728972e
 
 #include "mpool.h"
 
2e11bcdf
 /* Prototypes */
9ee053fe
 static regex_t *new_preg(struct regex_matcher *matcher);
2e11bcdf
 static size_t reverse_string(char *pattern);
a497dce5
 static int add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, const struct regex_list *regex);
2e11bcdf
 static int add_static_pattern(struct regex_matcher *matcher, char* pattern);
 /* ---------- */
bd912dd8
 
ed654433
 #define MATCH_SUCCESS 0
bd912dd8
 #define MATCH_FAILED  -1
 
 /*
  * Call this function when an unrecoverable error has occured, (instead of exit).
  */
 static void fatal_error(struct regex_matcher* matcher)
 {
 	regex_list_done(matcher);
 	matcher->list_inited = -1;/* the phishing module will know we tried to load a whitelist, and failed, so it will disable itself too*/
 }
 
 
b5341ac0
 static inline size_t get_char_at_pos_with_skip(const struct pre_fixup_info* info, const char* buffer, size_t pos)
 {
97ba1aed
 	const char* str;
ecd2643b
 	size_t realpos = 0;
b5341ac0
 	if(!info) {
97ba1aed
 		return (pos <= strlen(buffer)) ? buffer[pos>0 ? pos-1:0] : '\0';
b5341ac0
 	}
97ba1aed
 	str = info->pre_displayLink.data;
8ea27f5c
 	cli_dbgmsg("calc_pos_with_skip: skip:%lu, %lu - %lu \"%s\",\"%s\"\n", pos, info->host_start, info->host_end, str, buffer);
ecd2643b
 	pos += info->host_start;
97ba1aed
 	while(str[realpos] && !isalnum(str[realpos])) realpos++;
 	for(; str[realpos] && (pos>0); pos--) {
 		while(str[realpos]==' ') realpos++;
b5341ac0
 		realpos++;
 	}
97ba1aed
 	while(str[realpos]==' ') realpos++;
75fe1251
 	cli_dbgmsg("calc_pos_with_skip:%s\n",str+realpos);
97ba1aed
 	return (pos>0 && !str[realpos]) ? '\0' : str[realpos>0?realpos-1:0];
b5341ac0
 }
 
2e11bcdf
 static int validate_subdomain(const struct regex_list *regex, const struct pre_fixup_info *pre_fixup, const char *buffer, size_t buffer_len, char *real_url, size_t real_len, char *orig_real_url)
 {
 	char c;
 	size_t match_len;
 
 	if(!regex || !regex->pattern)
 		return 0;
 	match_len = strlen(regex->pattern);
 	if(((c=get_char_at_pos_with_skip(pre_fixup,buffer,buffer_len+1))==' ' || c=='\0' || c=='/' || c=='?') &&
 			(match_len == buffer_len || /* full match */
 			 (match_len < buffer_len &&
 			  ((c=get_char_at_pos_with_skip(pre_fixup,buffer,buffer_len-match_len))=='.' || (c==' ')) )
 			 /* subdomain matched*/)) {
a2d14e06
 		/* we have an extra / at the end */
 		if(match_len > 0) match_len--;
2e11bcdf
 		cli_dbgmsg("Got a match: %s with %s\n", buffer, regex->pattern);
 		cli_dbgmsg("Before inserting .: %s\n", orig_real_url);
 		if(real_len >= match_len + 1) {
 			const size_t pos = real_len - match_len - 1;
 			if(real_url[pos] != '.') {
 				/* we need to shift left, and insert a '.'
 				 * we have an extra '.' at the beginning inserted by get_host to have room,
 				 * orig_real_url has to be used here, 
 				 * because we want to overwrite that extra '.' */
 				size_t orig_real_len = strlen(orig_real_url);
 				cli_dbgmsg("No dot here:%s\n",real_url+pos);
 				real_url = orig_real_url;
 				memmove(real_url, real_url+1, orig_real_len-match_len-1);
 				real_url[orig_real_len-match_len-1]='.';
 				cli_dbgmsg("After inserting .: %s\n", real_url);
 			}
 		}
 		return 1;
 	}
 	cli_dbgmsg("Ignoring false match: %s with %s, mismatched character: %c\n", buffer, regex->pattern, c);
 	return 0;
 }
 
bd912dd8
 /*
  * @matcher - matcher structure to use
  * @real_url - href target
  * @display_url - <a> tag contents
  * @hostOnly - if you want to match only the host part
ec481027
  * @is_whitelist - is this a lookup in whitelist?
bd912dd8
  *
  * @return - CL_SUCCESS - url doesn't match
  *         - CL_VIRUS - url matches list
  *
  * Do not send NULL pointers to this function!!
  *
  */
2e11bcdf
 int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* display_url,const struct pre_fixup_info* pre_fixup,int hostOnly,const char **info, int is_whitelist)
bd912dd8
 {
75fe1251
 	char* orig_real_url = real_url;
2e11bcdf
 	struct regex_list *regex;
e6f14011
 	size_t real_len, display_len, buffer_len;
2e11bcdf
 
 	assert(matcher);
 	assert(real_url);
 	assert(display_url);
 	*info = NULL;
bd912dd8
 	if(!matcher->list_inited)
 		return 0;
2e11bcdf
 	assert(matcher->list_built);
75fe1251
 	/* skip initial '.' inserted by get_host */
 	if(real_url[0] == '.') real_url++;
 	if(display_url[0] == '.') display_url++;
e6f14011
 	real_len    = strlen(real_url);
 	display_len = strlen(display_url);
 	buffer_len  = (hostOnly && !is_whitelist) ? real_len + 1 : real_len + display_len + 1 + 1;
 	if(buffer_len < 3) {
 		/* too short, no match possible */
 		return 0;
 	}
bd912dd8
 	{
2e11bcdf
 		char *buffer = cli_malloc(buffer_len+1);
 		char *bufrev;
d4e1cb47
 		int rc = 0, root;
4e9ab8ed
 		struct cli_ac_data mdata;
9ee053fe
 		struct cli_ac_result *res = NULL;
bd912dd8
 
 		if(!buffer)
 			return CL_EMEM;
 
 		strncpy(buffer,real_url,real_len);
2e11bcdf
 		buffer[real_len]= (!is_whitelist && hostOnly) ? '/' : ':';
ec481027
 		if(!hostOnly || is_whitelist) {
62b2ecc7
 			strncpy(buffer+real_len+1,display_url,display_len);
f15a8c49
 		}
2e11bcdf
 		buffer[buffer_len - 1] = '/';
 		buffer[buffer_len]=0;
62b2ecc7
 		cli_dbgmsg("Looking up in regex_list: %s\n", buffer);
bd912dd8
 
aca9ea82
 		if((rc = cli_ac_initdata(&mdata, 0, 0, 0, CLI_DEFAULT_AC_TRACKLEN)))
2e11bcdf
 			return rc;
 
 		bufrev = cli_strdup(buffer);
 		if(!bufrev)
 			return CL_EMEM;
 		reverse_string(bufrev);
e0ac80ab
 		rc = filter_search(&matcher->filter, (const unsigned char*)bufrev, buffer_len) != -1;
063ddd53
 		if(rc == -1) {
9ee053fe
 			free(buffer);
 			free(bufrev);
2e11bcdf
 			/* filter says this suffix doesn't match.
 			 * The filter has false positives, but no false
 			 * negatives */
 			return 0;
 		}
33872a43
 		rc = cli_ac_scanbuff((const unsigned char*)bufrev,buffer_len, NULL, (void*)&regex, &res, &matcher->suffixes,&mdata,0,0,NULL,AC_SCAN_VIR,NULL);
10290ba3
 		free(bufrev);
2e11bcdf
 		cli_ac_freedata(&mdata);
 
9ee053fe
 		rc = 0;
d4e1cb47
 		root = matcher->root_regex_idx;
 		while(res || root) {
9ee053fe
 			struct cli_ac_result *q;
d4e1cb47
 			if (!res) {
 			    regex = matcher->suffix_regexes[root].head;
 			    root = 0;
 			} else {
 			    regex = res->customdata;
 			}
9ee053fe
 			while(!rc && regex) {
2e11bcdf
 				/* loop over multiple regexes corresponding to
 				 * this suffix */
9ee053fe
 				if (!regex->preg) {
2e11bcdf
 					/* we matched a static pattern */
 					rc = validate_subdomain(regex, pre_fixup, buffer, buffer_len, real_url, real_len, orig_real_url);
 				} else {
9ee053fe
 					rc = !cli_regexec(regex->preg, buffer, 0, NULL, 0);
c33ec470
 				}
2e11bcdf
 				if(rc) *info = regex->pattern;
 				regex = regex->nxt;
9ee053fe
 			}
d4e1cb47
 			if (res) {
 			    q = res;
 			    res = res->next;
 			    free(q);
 			}
2e11bcdf
 		}
bd912dd8
 		free(buffer);
 		if(!rc)
f74bc827
 			cli_dbgmsg("Lookup result: not in regex list\n");
 		else
 			cli_dbgmsg("Lookup result: in regex list\n");
bd912dd8
 		return rc;
 	}
 }
 
 
 /* Initialization & loading */
 /* Initializes @matcher, allocating necesarry substructures */
5b74e89a
 int init_regex_list(struct regex_matcher* matcher, uint8_t dconf_prefiltering)
bd912dd8
 {
1e2969a4
 #ifdef USE_MPOOL
47d40feb
 	mpool_t *mp = matcher->mempool;
1e2969a4
 #endif
e3b67c5e
 	int rc;
ec481027
 
2e11bcdf
 	assert(matcher);
 	memset(matcher, 0, sizeof(*matcher));
bd912dd8
 
 	matcher->list_inited=1;
2e11bcdf
 	matcher->list_built=0;
bd912dd8
 	matcher->list_loaded=0;
cc447ac8
 	cli_hashtab_init(&matcher->suffix_hash, 512);
1e2969a4
 #ifdef USE_MPOOL
b36e9f8a
 	matcher->mempool = mp;
 	matcher->suffixes.mempool = mp;
563582a1
 	assert(mp && "mempool must be initialized");
1e2969a4
 #endif
5b74e89a
 	if((rc = cli_ac_init(&matcher->suffixes, 2, 32, dconf_prefiltering))) {
2e11bcdf
 		return rc;
 	}
1e2969a4
 #ifdef USE_MPOOL
4e46d65d
 	matcher->sha256_hashes.mempool = mp;
a3d029b9
 	matcher->hostkey_prefix.mempool = mp;
1e2969a4
 #endif
4e46d65d
 	if((rc = cli_bm_init(&matcher->sha256_hashes))) {
b611b5ff
 		return rc;
 	}
a3d029b9
 	if((rc = cli_bm_init(&matcher->hostkey_prefix))) {
 		return rc;
 	}
e0ac80ab
 	filter_init(&matcher->filter);
bd912dd8
 	return CL_SUCCESS;
 }
 
9db68157
 static int functionality_level_check(char* line)
50c27591
 {
 	char* ptmin;
 	char* ptmax;
 	size_t j;
 
 	ptmin = strrchr(line,':');
 	if(!ptmin) 
 		return CL_SUCCESS;
 	
 	ptmin++;
 
 	ptmax = strchr(ptmin,'-');
 	if(!ptmax) 
 		return CL_SUCCESS;/* there is no functionality level specified, so we're ok */
 	else {
9db68157
 		size_t min, max;
50c27591
 		ptmax++;
9db68157
 		for(j=0;j+ptmin+1 < ptmax;j++)
50c27591
 			if(!isdigit(ptmin[j])) 
 				return CL_SUCCESS;/* not numbers, not functionality level */
 		for(j=0;j<strlen(ptmax);j++)
 			if(!isdigit(ptmax[j])) 
 				return CL_SUCCESS;/* see above */
 		ptmax[-1]='\0';
 		min = atoi(ptmin);
 		if(strlen(ptmax)==0)
  			max = INT_MAX; 		
 		else
 			max = atoi(ptmax);
 
 		if(min > cl_retflevel()) {
43ecd9a1
 			cli_dbgmsg("regex list line %s not loaded (required f-level: %u)\n",line,(unsigned int)min);
50c27591
 			return CL_EMALFDB; 
 		}
 
 		if(max < cl_retflevel()) 
 			return CL_EMALFDB;
 		ptmin[-1]='\0';
 		return CL_SUCCESS;
b611b5ff
 	}
 }
 
a3d029b9
 static int add_hash(struct regex_matcher *matcher, char* pattern, const char fl, int is_prefix)
b611b5ff
 {
 	int rc;
47d40feb
 	struct cli_bm_patt *pat = mpool_calloc(matcher->mempool, 1, sizeof(*pat));
a3d029b9
 	struct cli_matcher *bm;
816d66a8
 	const char *vname = NULL;
b611b5ff
 	if(!pat)
 		return CL_EMEM;
47d40feb
 	pat->pattern = (unsigned char*)cli_mpool_hex2str(matcher->mempool, pattern);
b611b5ff
 	if(!pat->pattern)
 		return CL_EMALFDB;
4e46d65d
 	pat->length = 32;
a3d029b9
 	if (is_prefix) {
 	    pat->length=4;
 	    bm = &matcher->hostkey_prefix;
 	} else {
 	    bm = &matcher->sha256_hashes;
 	}
 
627b7626
 	if (!matcher->sha256_pfx_set.keys) {
cc447ac8
 	    if((rc = cli_hashset_init(&matcher->sha256_pfx_set, 1048576, 90))) {
627b7626
 		return rc;
 	    }
 	}
 
816d66a8
 	if (fl != 'W' && pat->length == 32 &&
cc447ac8
 	    cli_hashset_contains(&matcher->sha256_pfx_set, cli_readint32(pat->pattern)) &&
fb0a54dd
 	    cli_bm_scanbuff(pat->pattern, 32, &vname, NULL, &matcher->sha256_hashes,0,NULL,NULL,NULL) == CL_VIRUS) {
816d66a8
 	    if (*vname == 'W') {
 		/* hash is whitelisted in local.gdb */
 		cli_dbgmsg("Skipping hash %s\n", pattern);
 		mpool_free(matcher->mempool, pat->pattern);
 		mpool_free(matcher->mempool, pat);
 		return CL_SUCCESS;
 	    }
 	}
 	pat->virname = mpool_malloc(matcher->mempool, 1);
 	if(!pat->virname) {
 		free(pat);
 		return CL_EMEM;
 	}
 	*pat->virname = fl;
cc447ac8
 	cli_hashset_addkey(&matcher->sha256_pfx_set, cli_readint32(pat->pattern));
33872a43
 	if((rc = cli_bm_addpatt(bm, pat, "*"))) {
b611b5ff
 		cli_errmsg("add_hash: failed to add BM pattern\n");
 		free(pat->pattern);
1126559f
 		free(pat->virname);
b611b5ff
 		free(pat);
 		return CL_EMALFDB;
 	}
 	return CL_SUCCESS;
50c27591
 }
 
 
bd912dd8
 /* Load patterns/regexes from file */
9f497be6
 int load_regex_matcher(struct cl_engine *engine,struct regex_matcher* matcher,FILE* fd,unsigned int *signo,unsigned int options,int is_whitelist,struct cli_dbio *dbio, uint8_t dconf_prefiltering)
bd912dd8
 {
03527bee
 	int rc,line=0,entry=0;
bd912dd8
 	char buffer[FILEBUFF];
 
2e11bcdf
 	assert(matcher);
bd912dd8
 
 	if(matcher->list_inited==-1)
ec481027
 		return CL_EMALFDB; /* already failed to load */
e8ae4fae
 	if(!fd && !dbio) {
bd912dd8
 		cli_errmsg("Unable to load regex list (null file)\n");
871177cd
 		return CL_ENULLARG;
bd912dd8
 	}
 
 	cli_dbgmsg("Loading regex_list\n");
 	if(!matcher->list_inited) {
5b74e89a
 		rc = init_regex_list(matcher, dconf_prefiltering);
bd912dd8
 		if (!matcher->list_inited) {
 			cli_errmsg("Regex list failed to initialize!\n");
 			fatal_error(matcher);
ec481027
 			return rc;
bd912dd8
 		}
 	}
 	/*
 	 * Regexlist db format (common to .wdb(whitelist) and .pdb(domainlist) files:
 	 * Multiple lines of form, (empty lines are skipped):
  	 * Flags RealURL DisplayedURL
 	 * Where:
6e3332cf
 	 * Flags: 
 	 *
 	 * .pdb files:
 	 * R - regex, H - host-only, followed by (optional) 3-digit hexnumber representing 
bd912dd8
 	 * flags that should be filtered.
 	 * [i.e. phishcheck urls.flags that we don't want to be done for this particular host]
6e3332cf
 	 * 
 	 * .wdb files:
 	 * X - full URL regex 
 	 * Y - host-only regex
 	 * M - host simple pattern
bd912dd8
 	 *
 	 * If a line in the file doesn't conform to this format, loading fails
 	 * 
 	 */
e8ae4fae
 	while(cli_dbgets(buffer, FILEBUFF, fd, dbio)) {
bd912dd8
 		char* pattern;
 		char* flags;
2e11bcdf
 		size_t pattern_len;
 
bd912dd8
 		cli_chomp(buffer);
03527bee
 		line++;
bd912dd8
 		if(!*buffer)
 			continue;/* skip empty lines */
50c27591
 
2e11bcdf
 		if(functionality_level_check(buffer))
50c27591
 			continue;
 
9f497be6
 		if(engine->cb_sigload && engine->cb_sigload("phishing", buffer, engine->cb_sigload_ctx)) {
 			cli_dbgmsg("load_regex_matcher: skipping %s due to callback\n", buffer);
 			continue;
 		}
 
03527bee
 		entry++;
a891c2d9
 		pattern = strchr(buffer,':');
bd912dd8
 		if(!pattern) {
 			cli_errmsg("Malformed regex list line %d\n",line);
 			fatal_error(matcher);
 			return CL_EMALFDB;
 		}
c33ec470
 		/*pattern[0]='\0';*/
ec481027
 		flags = buffer+1;
bd912dd8
 		pattern++;
e9913115
 
2e11bcdf
 		pattern_len = strlen(pattern);
 		if(pattern_len < FILEBUFF) {
 			pattern[pattern_len] = '/';
 			pattern[pattern_len+1] = '\0';
 		}
 		else {
 			cli_errmsg("Overlong regex line %d\n",line);
 			fatal_error(matcher);
 			return CL_EMALFDB;
e9913115
 		}
 
2e11bcdf
 		if((buffer[0] == 'R' && !is_whitelist) || ((buffer[0] == 'X' || buffer[0] == 'Y') && is_whitelist)) {
 			/* regex for hostname*/
3dcc2d78
 			if (( rc = regex_list_add_pattern(matcher, pattern) ))
bd912dd8
 				return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;
 		}
2e11bcdf
 		else if( ( buffer[0] == 'H' && !is_whitelist) || (buffer[0] == 'M' && is_whitelist)) {
 			/*matches displayed host*/
 			if (( rc = add_static_pattern(matcher, pattern) ))
bd912dd8
 				return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;
816d66a8
 		} else if (buffer[0] == 'S' && (!is_whitelist || pattern[0]=='W')) {
b611b5ff
 			pattern[pattern_len] = '\0';
816d66a8
 			if (pattern[0]=='W')
 			    flags[0]='W';
 			if((pattern[0]=='W' || pattern[0]=='F' || pattern[0]=='P') && pattern[1]==':') {
e828353b
 			    pattern += 2;
a3d029b9
 			    if (( rc = add_hash(matcher, pattern, flags[0], pattern[-2] == 'P') )) {
b611b5ff
 				cli_errmsg("Error loading at line: %d\n", line);
 				return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;
e828353b
 			    }
 			} else {
 			    cli_errmsg("Error loading line: %d, %c\n", line, *pattern);
 			    return CL_EMALFDB;
b611b5ff
 			}
 		} else {
ec481027
 			return CL_EMALFDB;
bd912dd8
 		}
 	}
 	matcher->list_loaded = 1;
03527bee
 	if(signo)
 	    *signo += entry;
bd912dd8
 
 	return CL_SUCCESS;
 }
 
 
 /* Build the matcher list */
2e11bcdf
 int cli_build_regex_list(struct regex_matcher* matcher)
bd912dd8
 {
e3b67c5e
 	int rc;
9828566c
 	if(!matcher)
 		return CL_SUCCESS;
bd912dd8
 	if(!matcher->list_inited || !matcher->list_loaded) {
 		cli_errmsg("Regex list not loaded!\n");
 		return -1;/*TODO: better error code */
 	}
 	cli_dbgmsg("Building regex list\n");
cc447ac8
 	cli_hashtab_free(&matcher->suffix_hash);
2e11bcdf
 	if(( rc = cli_ac_buildtrie(&matcher->suffixes) ))
 		return rc;
bd912dd8
 	matcher->list_built=1;
cc447ac8
 	cli_hashset_destroy(&matcher->sha256_pfx_set);
bd912dd8
 
 	return CL_SUCCESS;
 }
 
 /* Done with this matcher, free resources */
 void regex_list_done(struct regex_matcher* matcher)
 {
2e11bcdf
 	assert(matcher);
bd912dd8
 
76115c19
 	if(matcher->list_inited == 1) {
2e11bcdf
 		size_t i;
10290ba3
 		cli_ac_free(&matcher->suffixes);
2e11bcdf
 		if(matcher->suffix_regexes) {
 			for(i=0;i<matcher->suffix_cnt;i++) {
a2d14e06
 				struct regex_list *r = matcher->suffix_regexes[i].head;
2e11bcdf
 				while(r) {
9ee053fe
 					struct regex_list *q = r;
2e11bcdf
 					r = r->nxt;
9ee053fe
 					free(q->pattern);
 					free(q);
2e11bcdf
 				}
 			}
 			free(matcher->suffix_regexes);
 			matcher->suffix_regexes = NULL;
15b08fbb
 		}
9ee053fe
 		if(matcher->all_pregs) {
10290ba3
 			for(i=0;i<matcher->regex_cnt;i++) {
9ee053fe
 				regex_t *r = matcher->all_pregs[i];
 				cli_regfree(r);
47d40feb
 				mpool_free(matcher->mempool, r);
10290ba3
 			}
47d40feb
 			mpool_free(matcher->mempool, matcher->all_pregs);
10290ba3
 		}
cc447ac8
 		cli_hashtab_free(&matcher->suffix_hash);
4e46d65d
 		cli_bm_free(&matcher->sha256_hashes);
a3d029b9
 		cli_bm_free(&matcher->hostkey_prefix);
bd912dd8
 	}
 }
 
2e11bcdf
 int is_regex_ok(struct regex_matcher* matcher)
bd912dd8
 {
2e11bcdf
 	assert(matcher);
 	return (!matcher->list_inited || matcher->list_inited!=-1);/* either we don't have a regexlist, or we initialized it successfully */
bd912dd8
 }
 
5ee56e41
 static int add_newsuffix(struct regex_matcher *matcher, struct regex_list *info, const char *suffix, size_t len)
bd912dd8
 {
2e11bcdf
 	struct cli_matcher *root = &matcher->suffixes;
47d40feb
 	struct cli_ac_patt *new = mpool_calloc(matcher->mempool,1,sizeof(*new));
2e11bcdf
 	size_t i;
 	int ret;
bd912dd8
 
2e11bcdf
 	if(!new)
 		return CL_EMEM;
 	assert(root && suffix);
 
 	new->rtype = 0;
 	new->type = 0;
 	new->sigid = 0;
 	new->parts = 0;
 	new->partno = 0;
 	new->mindist = 0;
 	new->maxdist = 0;
33872a43
 	new->offset_min = CLI_OFF_ANY;
2e11bcdf
 	new->length = len;
 
 	new->ch[0] = new->ch[1] |= CLI_MATCH_IGNORE;
 	if(new->length > root->maxpatlen)
 		root->maxpatlen = new->length;
 
47d40feb
 	new->pattern = mpool_malloc(matcher->mempool, sizeof(new->pattern[0])*len);
2e11bcdf
 	if(!new->pattern) {
47d40feb
 		mpool_free(matcher->mempool, new);
2e11bcdf
 		return CL_EMEM;
bd912dd8
 	}
2e11bcdf
 	for(i=0;i<len;i++)
 		new->pattern[i] = suffix[i];/*new->pattern is short int* */
 
10290ba3
 	new->customdata = info;
 	new->virname = NULL;
2e11bcdf
 	if((ret = cli_ac_addpatt(root,new))) {
47d40feb
 		mpool_free(matcher->mempool, new->pattern);
 		mpool_free(matcher->mempool, new);
2e11bcdf
 		return ret;
bd912dd8
 	}
e0ac80ab
 	filter_add_static(&matcher->filter, (const unsigned char*)suffix, len, "regex");
2e11bcdf
 	return CL_SUCCESS;
bd912dd8
 }
 
2e11bcdf
 #define MODULE "regex_list: "
 /* ------ load a regex, determine suffix, determine suffix2regexlist map ---- */
bd912dd8
 
a2d14e06
 static void list_add_tail(struct regex_list_ht *ht, struct regex_list *regex)
 {
 	if(!ht->head)
 		ht->head = regex;
 	if(ht->tail) {
 		ht->tail->nxt = regex;
 	}
 	ht->tail = regex;
 }
 
2e11bcdf
 /* returns 0 on success, clamav error code otherwise */
a497dce5
 static int add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, const struct regex_list *iregex)
bd912dd8
 {
5ee56e41
 	struct regex_matcher *matcher = cbdata;
9ee053fe
 	struct regex_list *regex = cli_malloc(sizeof(*regex));
cc447ac8
 	const struct cli_element *el;
2e11bcdf
 
 	assert(matcher);
9ee053fe
 	if(!regex)
 		return CL_EMEM;
 	regex->pattern = iregex->pattern ? cli_strdup(iregex->pattern) : NULL;
 	regex->preg = iregex->preg;
 	regex->nxt = NULL;
cc447ac8
 	el = cli_hashtab_find(&matcher->suffix_hash, suffix, suffix_len);
2e11bcdf
 	/* TODO: what if suffixes are prefixes of eachother and only one will
 	 * match? */
 	if(el) {
 		/* existing suffix */
a497dce5
 		assert((size_t)el->data < matcher->suffix_cnt);
a2d14e06
 		list_add_tail(&matcher->suffix_regexes[el->data], regex);
2e11bcdf
 	} else {
 		/* new suffix */
 		size_t n = matcher->suffix_cnt++;
cc447ac8
 		el = cli_hashtab_insert(&matcher->suffix_hash, suffix, suffix_len, n);
2e11bcdf
 		matcher->suffix_regexes = cli_realloc(matcher->suffix_regexes, (n+1)*sizeof(*matcher->suffix_regexes));
 		if(!matcher->suffix_regexes)
 			return CL_EMEM;
a2d14e06
 		matcher->suffix_regexes[n].tail = regex;
 		matcher->suffix_regexes[n].head = regex;
d4e1cb47
 		if (suffix[0] == '/' && suffix[1] == '\0')
 		    matcher->root_regex_idx = n;
2e11bcdf
 		add_newsuffix(matcher, regex, suffix, suffix_len);
bd912dd8
 	}
2e11bcdf
 	return 0;
bd912dd8
 }
 
2e11bcdf
 static size_t reverse_string(char *pattern)
bd912dd8
 {
2e11bcdf
 	size_t len = strlen(pattern);
bd912dd8
 	size_t i;
2e11bcdf
 	for(i=0; i < (len/2); i++) {
 		char aux = pattern[i];
 		pattern[i] = pattern[len-i-1];
 		pattern[len-i-1] = aux;
 	}
 	return len;
bd912dd8
 }
 
9ee053fe
 static regex_t *new_preg(struct regex_matcher *matcher)
10290ba3
 {
9ee053fe
 	regex_t *r;
47d40feb
 	matcher->all_pregs = mpool_realloc(matcher->mempool, matcher->all_pregs, ++matcher->regex_cnt * sizeof(*matcher->all_pregs));
9ee053fe
 	if(!matcher->all_pregs)
10290ba3
 		return NULL;
47d40feb
 	r = mpool_malloc(matcher->mempool, sizeof(*r));
10290ba3
 	if(!r)
 		return NULL;
9ee053fe
 	matcher->all_pregs[matcher->regex_cnt-1] = r;
10290ba3
 	return r;
 }
 
2e11bcdf
 static int add_static_pattern(struct regex_matcher *matcher, char* pattern)
bd912dd8
 {
2e11bcdf
 	size_t len;
9ee053fe
 	struct regex_list regex;
 	int rc;
 
2e11bcdf
 	len = reverse_string(pattern);
9ee053fe
 	regex.nxt = NULL;
 	regex.pattern = cli_strdup(pattern);
 	regex.preg = NULL;
 	rc = add_pattern_suffix(matcher, pattern, len, &regex);
 	free(regex.pattern);
 	return rc;
bd912dd8
 }
 
3dcc2d78
 int regex_list_add_pattern(struct regex_matcher *matcher, char *pattern)
bd912dd8
 {
 	int rc;
a497dce5
 	regex_t *preg;
2e11bcdf
 	size_t len;
 	/* we only match the host, so remove useless stuff */
 	const char remove_end[] = "([/?].*)?/";
 	const char remove_end2[] = "([/?].*)/";
bd912dd8
 
2e11bcdf
 	len = strlen(pattern);
 	if(len > sizeof(remove_end)) {
 		if(strncmp(&pattern[len - sizeof(remove_end)+1], remove_end, sizeof(remove_end)-1) == 0) {
 			len -= sizeof(remove_end) - 1;
3dcc2d78
 			pattern[len++]='/';
bd912dd8
 		}
2e11bcdf
 		if(strncmp(&pattern[len - sizeof(remove_end2)+1], remove_end2, sizeof(remove_end2)-1) == 0) {
 			len -= sizeof(remove_end2) - 1;
3dcc2d78
 			pattern[len++]='/';
bd912dd8
 		}
 	}
2e11bcdf
 	pattern[len] = '\0';
9ee053fe
 
a497dce5
 	preg = new_preg(matcher);
 	if(!preg)
9ee053fe
 		return CL_EMEM;
2e11bcdf
 
a497dce5
 	rc = cli_regex2suffix(pattern, preg, add_pattern_suffix, (void*)matcher);
2e11bcdf
 	if(rc) {
a497dce5
 		cli_regfree(preg);
bd912dd8
 	}
 
2e11bcdf
 	return rc;
bd912dd8
 }