libclamav/regex_list.c
bd912dd8
 /*
  *  Match a string against a list of patterns/regexes.
  *
e1cbc270
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
  *  Copyright (C) 2007-2013 Sourcefire, Inc.
2023340a
  *
  *  Authors: Török Edvin
bd912dd8
  *
  *  This program is free software; you can redistribute it and/or modify
2023340a
  *  it under the terms of the GNU General Public License version 2 as
38a00199
  *  published by the Free Software Foundation.
bd912dd8
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
  */
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
 #ifdef CL_THREAD_SAFE
 #ifndef _REENTRANT
 #define _REENTRANT
 #endif
 #endif
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
056d95dc
 #include <zlib.h>
bd912dd8
 
 #include <limits.h>
 #include <sys/types.h>
2e11bcdf
 #include <assert.h>
 
53ff1b04
 #include "regex/regex.h"
bd912dd8
 
 #include "clamav.h"
 #include "others.h"
 #include "regex_list.h"
 #include "matcher-ac.h"
6b656d36
 #include "matcher.h"
43ecd9a1
 #include "str.h"
056d95dc
 #include "readdb.h"
2e11bcdf
 #include "jsparse/textbuf.h"
5ee56e41
 #include "regex_suffix.h"
589d8d8e
 #include "default.h"
627b7626
 #include "hashtab.h"
0728972e
 
 #include "mpool.h"
 
2e11bcdf
 /* Prototypes */
9ee053fe
 static regex_t *new_preg(struct regex_matcher *matcher);
2e11bcdf
 static size_t reverse_string(char *pattern);
102cd430
 static cl_error_t add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, const struct regex_list *regex);
 static cl_error_t add_static_pattern(struct regex_matcher *matcher, char *pattern);
2e11bcdf
 /* ---------- */
bd912dd8
 
ed654433
 #define MATCH_SUCCESS 0
288057e9
 #define MATCH_FAILED -1
bd912dd8
 
 /*
22cb38ed
  * Call this function when an unrecoverable error has occurred, (instead of exit).
bd912dd8
  */
288057e9
 static void fatal_error(struct regex_matcher *matcher)
bd912dd8
 {
288057e9
     regex_list_done(matcher);
     matcher->list_inited = -1; /* the phishing module will know we tried to load a whitelist, and failed, so it will disable itself too*/
bd912dd8
 }
 
288057e9
 static inline char get_char_at_pos_with_skip(const struct pre_fixup_info *info, const char *buffer, size_t pos)
b5341ac0
 {
288057e9
     const char *str;
     size_t realpos = 0;
     if (!info) {
         return (pos <= strlen(buffer)) ? buffer[pos > 0 ? pos - 1 : 0] : '\0';
     }
     str = info->pre_displayLink.data;
     cli_dbgmsg("calc_pos_with_skip: skip:%llu, %llu - %llu \"%s\",\"%s\"\n", (long long unsigned)pos, (long long unsigned)info->host_start,
059ca614
                (long long unsigned)info->host_end, str, buffer);
288057e9
     pos += info->host_start;
     while (str[realpos] && !isalnum(str[realpos])) realpos++;
     for (; str[realpos] && (pos > 0); pos--) {
         while (str[realpos] == ' ') realpos++;
         realpos++;
     }
     while (str[realpos] == ' ') realpos++;
     cli_dbgmsg("calc_pos_with_skip:%s\n", str + realpos);
     return (pos > 0 && !str[realpos]) ? '\0' : str[realpos > 0 ? realpos - 1 : 0];
b5341ac0
 }
 
2e11bcdf
 static int validate_subdomain(const struct regex_list *regex, const struct pre_fixup_info *pre_fixup, const char *buffer, size_t buffer_len, char *real_url, size_t real_len, char *orig_real_url)
 {
288057e9
     char c;
     size_t match_len;
 
     if (!regex || !regex->pattern)
         return 0;
     match_len = strlen(regex->pattern);
     if (((c = get_char_at_pos_with_skip(pre_fixup, buffer, buffer_len + 1)) == ' ' || c == '\0' || c == '/' || c == '?') &&
         (match_len == buffer_len || /* full match */
          (match_len < buffer_len &&
           ((c = get_char_at_pos_with_skip(pre_fixup, buffer, buffer_len - match_len)) == '.' || (c == ' ')))
          /* subdomain matched*/)) {
         /* we have an extra / at the end */
         if (match_len > 0) match_len--;
         cli_dbgmsg("Got a match: %s with %s\n", buffer, regex->pattern);
         cli_dbgmsg("Before inserting .: %s\n", orig_real_url);
         if (real_len >= match_len + 1) {
             const size_t pos = real_len - match_len - 1;
             if (real_url[pos] != '.') {
                 /* we need to shift left, and insert a '.'
2e11bcdf
 				 * we have an extra '.' at the beginning inserted by get_host to have room,
fa51ec00
 				 * orig_real_url has to be used here,
2e11bcdf
 				 * because we want to overwrite that extra '.' */
288057e9
                 size_t orig_real_len = strlen(orig_real_url);
                 cli_dbgmsg("No dot here:%s\n", real_url + pos);
                 real_url = orig_real_url;
                 memmove(real_url, real_url + 1, orig_real_len - match_len - 1);
                 real_url[orig_real_len - match_len - 1] = '.';
                 cli_dbgmsg("After inserting .: %s\n", real_url);
             }
         }
         return 1;
     }
     cli_dbgmsg("Ignoring false match: %s with %s, mismatched character: %c\n", buffer, regex->pattern, c);
     return 0;
2e11bcdf
 }
 
bd912dd8
 /*
  * @matcher - matcher structure to use
  * @real_url - href target
  * @display_url - <a> tag contents
  * @hostOnly - if you want to match only the host part
ec481027
  * @is_whitelist - is this a lookup in whitelist?
bd912dd8
  *
  * @return - CL_SUCCESS - url doesn't match
  *         - CL_VIRUS - url matches list
  *
  * Do not send NULL pointers to this function!!
  *
  */
102cd430
 cl_error_t regex_list_match(struct regex_matcher *matcher, char *real_url, const char *display_url, const struct pre_fixup_info *pre_fixup, int hostOnly, const char **info, int is_whitelist)
bd912dd8
 {
288057e9
     char *orig_real_url = real_url;
     struct regex_list *regex;
     size_t real_len, display_len, buffer_len;
 
102cd430
     char *buffer         = NULL;
     char *bufrev         = NULL;
     cl_error_t rc        = CL_SUCCESS;
     int filter_search_rc = 0;
     int root;
     struct cli_ac_data mdata;
     struct cli_ac_result *res = NULL;
 
288057e9
     assert(matcher);
     assert(real_url);
     assert(display_url);
     *info = NULL;
     if (!matcher->list_inited)
102cd430
         return CL_SUCCESS;
288057e9
     assert(matcher->list_built);
     /* skip initial '.' inserted by get_host */
     if (real_url[0] == '.') real_url++;
     if (display_url[0] == '.') display_url++;
     real_len    = strlen(real_url);
     display_len = strlen(display_url);
     buffer_len  = (hostOnly && !is_whitelist) ? real_len + 1 : real_len + display_len + 1 + 1;
     if (buffer_len < 3) {
         /* too short, no match possible */
102cd430
         return CL_SUCCESS;
     }
     buffer = cli_malloc(buffer_len + 1);
     if (!buffer) {
         cli_errmsg("regex_list_match: Unable to allocate memory for buffer\n");
         return CL_EMEM;
288057e9
     }
 
102cd430
     strncpy(buffer, real_url, real_len);
     buffer[real_len] = (!is_whitelist && hostOnly) ? '/' : ':';
bd912dd8
 
102cd430
     /*
      * For H-type PDB signatures, real_url is actually the DisplayedHostname.
      * RealHostname is not used.
      */
     if (!hostOnly || is_whitelist) {
         /* For all other PDB and WDB signatures concatenate Real:Displayed. */
         strncpy(buffer + real_len + 1, display_url, display_len);
     }
     buffer[buffer_len - 1] = '/';
     buffer[buffer_len]     = 0;
     cli_dbgmsg("Looking up in regex_list: %s\n", buffer);
fa51ec00
 
102cd430
     if (CL_SUCCESS != (rc = cli_ac_initdata(&mdata, 0, 0, 0, CLI_DEFAULT_AC_TRACKLEN)))
         return rc;
288057e9
 
102cd430
     bufrev = cli_strdup(buffer);
     if (!bufrev)
         return CL_EMEM;
288057e9
 
102cd430
     reverse_string(bufrev);
     filter_search_rc = filter_search(&matcher->filter, (const unsigned char *)bufrev, buffer_len) != -1;
     if (filter_search_rc == -1) {
         free(buffer);
         free(bufrev);
         /* filter says this suffix doesn't match.
2e11bcdf
 			 * The filter has false positives, but no false
 			 * negatives */
102cd430
         return CL_SUCCESS;
     }
 
     rc = cli_ac_scanbuff((const unsigned char *)bufrev, buffer_len, NULL, (void *)&regex, &res, &matcher->suffixes, &mdata, 0, 0, NULL, AC_SCAN_VIR, NULL);
     free(bufrev);
     cli_ac_freedata(&mdata);
 
     rc   = CL_SUCCESS;
     root = matcher->root_regex_idx;
     while (res || root) {
         struct cli_ac_result *q;
         if (!res) {
             regex = matcher->suffix_regexes[root].head;
             root  = 0;
         } else {
             regex = res->customdata;
288057e9
         }
102cd430
         while (!rc && regex) {
             /* loop over multiple regexes corresponding to
2e11bcdf
 				 * this suffix */
102cd430
             if (!regex->preg) {
                 /* we matched a static pattern */
                 rc = validate_subdomain(regex, pre_fixup, buffer, buffer_len, real_url, real_len, orig_real_url);
             } else {
                 rc = !cli_regexec(regex->preg, buffer, 0, NULL, 0);
288057e9
             }
102cd430
             if (rc) *info = regex->pattern;
             regex = regex->nxt;
         }
         if (res) {
             q   = res;
             res = res->next;
             free(q);
288057e9
         }
     }
102cd430
     free(buffer);
     if (!rc)
         cli_dbgmsg("Lookup result: not in regex list\n");
     else
         cli_dbgmsg("Lookup result: in regex list\n");
     return rc;
bd912dd8
 }
 
 /* Initialization & loading */
22cb38ed
 /* Initializes @matcher, allocating necessary substructures */
102cd430
 cl_error_t init_regex_list(struct regex_matcher *matcher, uint8_t dconf_prefiltering)
bd912dd8
 {
1e2969a4
 #ifdef USE_MPOOL
288057e9
     mpool_t *mp = matcher->mempool;
1e2969a4
 #endif
102cd430
     cl_error_t rc;
ec481027
 
288057e9
     assert(matcher);
     memset(matcher, 0, sizeof(*matcher));
bd912dd8
 
288057e9
     matcher->list_inited = 1;
     matcher->list_built  = 0;
     matcher->list_loaded = 0;
     cli_hashtab_init(&matcher->suffix_hash, 512);
1e2969a4
 #ifdef USE_MPOOL
288057e9
     matcher->mempool          = mp;
     matcher->suffixes.mempool = mp;
     assert(mp && "mempool must be initialized");
1e2969a4
 #endif
288057e9
     if ((rc = cli_ac_init(&matcher->suffixes, 2, 32, dconf_prefiltering))) {
         return rc;
     }
1e2969a4
 #ifdef USE_MPOOL
288057e9
     matcher->sha256_hashes.mempool  = mp;
     matcher->hostkey_prefix.mempool = mp;
1e2969a4
 #endif
288057e9
     if ((rc = cli_bm_init(&matcher->sha256_hashes))) {
         return rc;
     }
     if ((rc = cli_bm_init(&matcher->hostkey_prefix))) {
         return rc;
     }
     filter_init(&matcher->filter);
     return CL_SUCCESS;
bd912dd8
 }
 
288057e9
 static int functionality_level_check(char *line)
50c27591
 {
288057e9
     char *ptmin;
     char *ptmax;
     size_t j;
 
     ptmin = strrchr(line, ':');
     if (!ptmin)
         return CL_SUCCESS;
 
     ptmin++;
 
     ptmax = strchr(ptmin, '-');
     if (!ptmax)
         return CL_SUCCESS; /* there is no functionality level specified, so we're ok */
     else {
         size_t min, max;
         ptmax++;
         for (j = 0; j + ptmin + 1 < ptmax; j++)
             if (!isdigit(ptmin[j]))
                 return CL_SUCCESS; /* not numbers, not functionality level */
         for (j = 0; j < strlen(ptmax); j++)
             if (!isdigit(ptmax[j]))
                 return CL_SUCCESS; /* see above */
         ptmax[-1] = '\0';
         min       = atoi(ptmin);
         if (strlen(ptmax) == 0)
             max = INT_MAX;
         else
             max = atoi(ptmax);
 
         if (min > cl_retflevel()) {
             cli_dbgmsg("regex list line %s not loaded (required f-level: %u)\n", line, (unsigned int)min);
             return CL_EMALFDB;
         }
 
         if (max < cl_retflevel())
             return CL_EMALFDB;
         ptmin[-1] = '\0';
         return CL_SUCCESS;
     }
b611b5ff
 }
 
288057e9
 static int add_hash(struct regex_matcher *matcher, char *pattern, const char fl, int is_prefix)
b611b5ff
 {
288057e9
     int rc;
544fa973
     struct cli_bm_patt *pat = MPOOL_CALLOC(matcher->mempool, 1, sizeof(*pat));
288057e9
     struct cli_matcher *bm;
     const char *vname = NULL;
     if (!pat)
         return CL_EMEM;
544fa973
     pat->pattern = (unsigned char *)CLI_MPOOL_HEX2STR(matcher->mempool, pattern);
288057e9
     if (!pat->pattern)
         return CL_EMALFDB;
     pat->length = 32;
     if (is_prefix) {
         pat->length = 4;
         bm          = &matcher->hostkey_prefix;
     } else {
         bm = &matcher->sha256_hashes;
     }
 
     if (!matcher->sha256_pfx_set.keys) {
         if ((rc = cli_hashset_init(&matcher->sha256_pfx_set, 1048576, 90))) {
             return rc;
         }
     }
 
     if (fl != 'W' && pat->length == 32 &&
         cli_hashset_contains(&matcher->sha256_pfx_set, cli_readint32(pat->pattern)) &&
         cli_bm_scanbuff(pat->pattern, 32, &vname, NULL, &matcher->sha256_hashes, 0, NULL, NULL, NULL) == CL_VIRUS) {
         if (*vname == 'W') {
             /* hash is whitelisted in local.gdb */
             cli_dbgmsg("Skipping hash %s\n", pattern);
544fa973
             MPOOL_FREE(matcher->mempool, pat->pattern);
             MPOOL_FREE(matcher->mempool, pat);
288057e9
             return CL_SUCCESS;
         }
     }
544fa973
     pat->virname = MPOOL_MALLOC(matcher->mempool, 1);
288057e9
     if (!pat->virname) {
         free(pat);
241e7eb1
         cli_errmsg("add_hash: Unable to allocate memory for path->virname\n");
288057e9
         return CL_EMEM;
     }
     *pat->virname = fl;
     cli_hashset_addkey(&matcher->sha256_pfx_set, cli_readint32(pat->pattern));
     if ((rc = cli_bm_addpatt(bm, pat, "*"))) {
         cli_errmsg("add_hash: failed to add BM pattern\n");
         free(pat->pattern);
         free(pat->virname);
         free(pat);
         return CL_EMALFDB;
     }
     return CL_SUCCESS;
50c27591
 }
 
bd912dd8
 /* Load patterns/regexes from file */
102cd430
 cl_error_t load_regex_matcher(struct cl_engine *engine, struct regex_matcher *matcher, FILE *fd, unsigned int *signo, unsigned int options, int is_whitelist, struct cli_dbio *dbio, uint8_t dconf_prefiltering)
bd912dd8
 {
102cd430
     cl_error_t rc;
     int line = 0, entry = 0;
288057e9
     char buffer[FILEBUFF];
 
     assert(matcher);
 
     if (matcher->list_inited == -1)
         return CL_EMALFDB; /* already failed to load */
     if (!fd && !dbio) {
         cli_errmsg("Unable to load regex list (null file)\n");
         return CL_ENULLARG;
     }
 
     cli_dbgmsg("Loading regex_list\n");
     if (!matcher->list_inited) {
         rc = init_regex_list(matcher, dconf_prefiltering);
         if (!matcher->list_inited) {
             cli_errmsg("Regex list failed to initialize!\n");
             fatal_error(matcher);
             return rc;
         }
     }
     /*
bd912dd8
 	 * Regexlist db format (common to .wdb(whitelist) and .pdb(domainlist) files:
 	 * Multiple lines of form, (empty lines are skipped):
  	 * Flags RealURL DisplayedURL
 	 * Where:
fa51ec00
 	 * Flags:
6e3332cf
 	 *
 	 * .pdb files:
fa51ec00
 	 * R - regex, H - host-only, followed by (optional) 3-digit hexnumber representing
bd912dd8
 	 * flags that should be filtered.
 	 * [i.e. phishcheck urls.flags that we don't want to be done for this particular host]
fa51ec00
 	 *
6e3332cf
 	 * .wdb files:
fa51ec00
 	 * X - full URL regex
6e3332cf
 	 * Y - host-only regex
 	 * M - host simple pattern
bd912dd8
 	 *
 	 * If a line in the file doesn't conform to this format, loading fails
fa51ec00
 	 *
bd912dd8
 	 */
288057e9
     while (cli_dbgets(buffer, FILEBUFF, fd, dbio)) {
         char *pattern;
         char *flags;
         size_t pattern_len;
 
         cli_chomp(buffer);
         line++;
         if (!*buffer)
             continue; /* skip empty lines */
 
         if (functionality_level_check(buffer))
             continue;
 
         if (engine->cb_sigload && engine->cb_sigload("phishing", buffer, ~options & CL_DB_OFFICIAL, engine->cb_sigload_ctx)) {
             cli_dbgmsg("load_regex_matcher: skipping %s due to callback\n", buffer);
             continue;
         }
bd912dd8
 
288057e9
         entry++;
         pattern = strchr(buffer, ':');
         if (!pattern) {
             cli_errmsg("Malformed regex list line %d\n", line);
             fatal_error(matcher);
             return CL_EMALFDB;
         }
         /*pattern[0]='\0';*/
         flags = buffer + 1;
         pattern++;
 
         pattern_len = strlen(pattern);
         if (pattern_len < FILEBUFF) {
             pattern[pattern_len]     = '/';
             pattern[pattern_len + 1] = '\0';
         } else {
             cli_errmsg("Overlong regex line %d\n", line);
             fatal_error(matcher);
             return CL_EMALFDB;
         }
 
         if ((buffer[0] == 'R' && !is_whitelist) || ((buffer[0] == 'X' || buffer[0] == 'Y') && is_whitelist)) {
             /* regex for hostname*/
             if ((rc = regex_list_add_pattern(matcher, pattern)))
                 return rc == CL_EMEM ? CL_EMEM : CL_EMALFDB;
         } else if ((buffer[0] == 'H' && !is_whitelist) || (buffer[0] == 'M' && is_whitelist)) {
             /*matches displayed host*/
             if ((rc = add_static_pattern(matcher, pattern)))
                 return rc == CL_EMEM ? CL_EMEM : CL_EMALFDB;
         } else if (buffer[0] == 'S' && (!is_whitelist || pattern[0] == 'W')) {
             pattern[pattern_len] = '\0';
             if (pattern[0] == 'W')
                 flags[0] = 'W';
             if ((pattern[0] == 'W' || pattern[0] == 'F' || pattern[0] == 'P') && pattern[1] == ':') {
                 pattern += 2;
                 if ((rc = add_hash(matcher, pattern, flags[0], pattern[-2] == 'P'))) {
                     cli_errmsg("Error loading at line: %d\n", line);
                     return rc == CL_EMEM ? CL_EMEM : CL_EMALFDB;
                 }
             } else {
                 cli_errmsg("Error loading line: %d, %c\n", line, *pattern);
                 return CL_EMALFDB;
             }
         } else {
             return CL_EMALFDB;
         }
     }
     matcher->list_loaded = 1;
     if (signo)
         *signo += entry;
 
     return CL_SUCCESS;
 }
bd912dd8
 
 /* Build the matcher list */
102cd430
 cl_error_t cli_build_regex_list(struct regex_matcher *matcher)
bd912dd8
 {
102cd430
     cl_error_t rc;
288057e9
     if (!matcher)
         return CL_SUCCESS;
     if (!matcher->list_inited || !matcher->list_loaded) {
         cli_errmsg("Regex list not loaded!\n");
         return -1; /*TODO: better error code */
     }
     cli_dbgmsg("Building regex list\n");
     cli_hashtab_free(&matcher->suffix_hash);
     if ((rc = cli_ac_buildtrie(&matcher->suffixes)))
         return rc;
     matcher->list_built = 1;
     cli_hashset_destroy(&matcher->sha256_pfx_set);
 
     return CL_SUCCESS;
bd912dd8
 }
 
 /* Done with this matcher, free resources */
288057e9
 void regex_list_done(struct regex_matcher *matcher)
bd912dd8
 {
288057e9
     assert(matcher);
 
     if (matcher->list_inited == 1) {
         size_t i;
         cli_ac_free(&matcher->suffixes);
         if (matcher->suffix_regexes) {
             for (i = 0; i < matcher->suffix_cnt; i++) {
                 struct regex_list *r = matcher->suffix_regexes[i].head;
                 while (r) {
                     struct regex_list *q = r;
                     r                    = r->nxt;
                     free(q->pattern);
                     free(q);
                 }
             }
             free(matcher->suffix_regexes);
             matcher->suffix_regexes = NULL;
         }
         if (matcher->all_pregs) {
             for (i = 0; i < matcher->regex_cnt; i++) {
                 regex_t *r = matcher->all_pregs[i];
                 cli_regfree(r);
544fa973
                 MPOOL_FREE(matcher->mempool, r);
288057e9
             }
544fa973
             MPOOL_FREE(matcher->mempool, matcher->all_pregs);
288057e9
         }
         cli_hashtab_free(&matcher->suffix_hash);
         cli_bm_free(&matcher->sha256_hashes);
         cli_bm_free(&matcher->hostkey_prefix);
     }
bd912dd8
 }
 
288057e9
 int is_regex_ok(struct regex_matcher *matcher)
bd912dd8
 {
288057e9
     assert(matcher);
     return (!matcher->list_inited || matcher->list_inited != -1); /* either we don't have a regexlist, or we initialized it successfully */
bd912dd8
 }
 
5ee56e41
 static int add_newsuffix(struct regex_matcher *matcher, struct regex_list *info, const char *suffix, size_t len)
bd912dd8
 {
288057e9
     struct cli_matcher *root = &matcher->suffixes;
544fa973
     struct cli_ac_patt *new  = MPOOL_CALLOC(matcher->mempool, 1, sizeof(*new));
288057e9
     size_t i;
     int ret;
 
     if (!new)
         return CL_EMEM;
     assert(root && suffix);
 
     new->rtype      = 0;
     new->type       = 0;
     new->sigid      = 0;
     new->parts      = 0;
     new->partno     = 0;
     new->mindist    = 0;
     new->maxdist    = 0;
     new->offset_min = CLI_OFF_ANY;
     new->length[0]  = (uint16_t)len;
 
     new->ch[0] = new->ch[1] |= CLI_MATCH_IGNORE;
     if (new->length[0] > root->maxpatlen)
         root->maxpatlen = new->length[0];
 
544fa973
     new->pattern = MPOOL_MALLOC(matcher->mempool, sizeof(new->pattern[0]) * len);
288057e9
     if (!new->pattern) {
544fa973
         MPOOL_FREE(matcher->mempool, new);
241e7eb1
         cli_errmsg("add_newsuffix: Unable to allocate memory for new->pattern\n");
288057e9
         return CL_EMEM;
     }
     for (i = 0; i < len; i++)
         new->pattern[i] = suffix[i]; /*new->pattern is short int* */
 
     new->customdata = info;
     new->virname    = NULL;
     if ((ret = cli_ac_addpatt(root, new))) {
544fa973
         MPOOL_FREE(matcher->mempool, new->pattern);
         MPOOL_FREE(matcher->mempool, new);
288057e9
         return ret;
     }
     filter_add_static(&matcher->filter, (const unsigned char *)suffix, len, "regex");
     return CL_SUCCESS;
bd912dd8
 }
 
2e11bcdf
 #define MODULE "regex_list: "
 /* ------ load a regex, determine suffix, determine suffix2regexlist map ---- */
bd912dd8
 
a2d14e06
 static void list_add_tail(struct regex_list_ht *ht, struct regex_list *regex)
 {
288057e9
     if (!ht->head)
         ht->head = regex;
     if (ht->tail) {
         ht->tail->nxt = regex;
     }
     ht->tail = regex;
a2d14e06
 }
 
2e11bcdf
 /* returns 0 on success, clamav error code otherwise */
102cd430
 static cl_error_t add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, const struct regex_list *iregex)
bd912dd8
 {
288057e9
     struct regex_matcher *matcher = cbdata;
     struct regex_list *regex      = cli_malloc(sizeof(*regex));
     const struct cli_element *el;
     void *tmp_matcher; /*	save original address if OOM occurs */
2e11bcdf
 
288057e9
     assert(matcher);
     if (!regex) {
241e7eb1
         cli_errmsg("add_pattern_suffix: Unable to allocate memory for regex\n");
288057e9
         return CL_EMEM;
241e7eb1
     }
288057e9
     regex->pattern = iregex->pattern ? cli_strdup(iregex->pattern) : NULL;
     regex->preg    = iregex->preg;
     regex->nxt     = NULL;
     el             = cli_hashtab_find(&matcher->suffix_hash, suffix, suffix_len);
     /* TODO: what if suffixes are prefixes of eachother and only one will
2e11bcdf
 	 * match? */
288057e9
     if (el) {
         /* existing suffix */
         assert((size_t)el->data < matcher->suffix_cnt);
         list_add_tail(&matcher->suffix_regexes[el->data], regex);
     } else {
         /* new suffix */
         size_t n    = matcher->suffix_cnt++;
         el          = cli_hashtab_insert(&matcher->suffix_hash, suffix, suffix_len, n);
         tmp_matcher = matcher->suffix_regexes; /*  save the current value before cli_realloc()	*/
         tmp_matcher = cli_realloc(matcher->suffix_regexes, (n + 1) * sizeof(*matcher->suffix_regexes));
         if (!tmp_matcher) {
             free(regex);
             return CL_EMEM;
         }
         matcher->suffix_regexes         = tmp_matcher; /*  success, point at new memory location   */
         matcher->suffix_regexes[n].tail = regex;
         matcher->suffix_regexes[n].head = regex;
         if (suffix[0] == '/' && suffix[1] == '\0')
             matcher->root_regex_idx = n;
         add_newsuffix(matcher, regex, suffix, suffix_len);
     }
102cd430
     return CL_SUCCESS;
bd912dd8
 }
 
2e11bcdf
 static size_t reverse_string(char *pattern)
bd912dd8
 {
288057e9
     size_t len = strlen(pattern);
     size_t i;
     for (i = 0; i < (len / 2); i++) {
         char aux             = pattern[i];
         pattern[i]           = pattern[len - i - 1];
         pattern[len - i - 1] = aux;
     }
     return len;
bd912dd8
 }
 
9ee053fe
 static regex_t *new_preg(struct regex_matcher *matcher)
10290ba3
 {
288057e9
     regex_t *r;
544fa973
     matcher->all_pregs = MPOOL_REALLOC(matcher->mempool, matcher->all_pregs, ++matcher->regex_cnt * sizeof(*matcher->all_pregs));
288057e9
     if (!matcher->all_pregs) {
241e7eb1
         cli_errmsg("new_preg: Unable to reallocate memory\n");
288057e9
         return NULL;
241e7eb1
     }
544fa973
     r = MPOOL_MALLOC(matcher->mempool, sizeof(*r));
288057e9
     if (!r) {
241e7eb1
         cli_errmsg("new_preg: Unable to allocate memory\n");
288057e9
         return NULL;
241e7eb1
     }
288057e9
     matcher->all_pregs[matcher->regex_cnt - 1] = r;
     return r;
10290ba3
 }
 
102cd430
 static cl_error_t add_static_pattern(struct regex_matcher *matcher, char *pattern)
bd912dd8
 {
288057e9
     size_t len;
     struct regex_list regex;
102cd430
     cl_error_t rc;
288057e9
 
     len           = reverse_string(pattern);
     regex.nxt     = NULL;
     regex.pattern = cli_strdup(pattern);
     regex.preg    = NULL;
     rc            = add_pattern_suffix(matcher, pattern, len, &regex);
     free(regex.pattern);
     return rc;
bd912dd8
 }
 
102cd430
 cl_error_t regex_list_add_pattern(struct regex_matcher *matcher, char *pattern)
bd912dd8
 {
102cd430
     cl_error_t rc;
288057e9
     regex_t *preg;
     size_t len;
     /* we only match the host, so remove useless stuff */
     const char remove_end[]  = "([/?].*)?/";
     const char remove_end2[] = "([/?].*)/";
 
     len = strlen(pattern);
     if (len > sizeof(remove_end)) {
         if (strncmp(&pattern[len - sizeof(remove_end) + 1], remove_end, sizeof(remove_end) - 1) == 0) {
             len -= sizeof(remove_end) - 1;
             pattern[len++] = '/';
         }
         if (strncmp(&pattern[len - sizeof(remove_end2) + 1], remove_end2, sizeof(remove_end2) - 1) == 0) {
             len -= sizeof(remove_end2) - 1;
             pattern[len++] = '/';
         }
     }
     pattern[len] = '\0';
 
     preg = new_preg(matcher);
     if (!preg)
         return CL_EMEM;
 
     rc = cli_regex2suffix(pattern, preg, add_pattern_suffix, (void *)matcher);
     if (rc) {
         cli_regfree(preg);
     }
 
     return rc;
bd912dd8
 }