bd912dd8 |
/*
* Match a string against a list of patterns/regexes.
* |
e1cbc270 |
* Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
* Copyright (C) 2007-2013 Sourcefire, Inc. |
2023340a |
*
* Authors: Török Edvin |
bd912dd8 |
*
* This program is free software; you can redistribute it and/or modify |
2023340a |
* it under the terms of the GNU General Public License version 2 as |
38a00199 |
* published by the Free Software Foundation. |
bd912dd8 |
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
#ifdef CL_THREAD_SAFE
#ifndef _REENTRANT
#define _REENTRANT
#endif
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h> |
056d95dc |
#include <zlib.h> |
bd912dd8 |
#include <limits.h>
#include <sys/types.h> |
2e11bcdf |
#include <assert.h>
|
53ff1b04 |
#include "regex/regex.h" |
bd912dd8 |
#include "clamav.h"
#include "others.h"
#include "regex_list.h"
#include "matcher-ac.h" |
6b656d36 |
#include "matcher.h" |
43ecd9a1 |
#include "str.h" |
056d95dc |
#include "readdb.h" |
2e11bcdf |
#include "jsparse/textbuf.h" |
5ee56e41 |
#include "regex_suffix.h" |
589d8d8e |
#include "default.h" |
627b7626 |
#include "hashtab.h" |
0728972e |
#include "mpool.h"
|
2e11bcdf |
/* Prototypes */ |
9ee053fe |
static regex_t *new_preg(struct regex_matcher *matcher); |
2e11bcdf |
static size_t reverse_string(char *pattern); |
102cd430 |
static cl_error_t add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, const struct regex_list *regex);
static cl_error_t add_static_pattern(struct regex_matcher *matcher, char *pattern); |
2e11bcdf |
/* ---------- */ |
bd912dd8 |
|
ed654433 |
#define MATCH_SUCCESS 0 |
288057e9 |
#define MATCH_FAILED -1 |
bd912dd8 |
/* |
22cb38ed |
* Call this function when an unrecoverable error has occurred, (instead of exit). |
bd912dd8 |
*/ |
288057e9 |
static void fatal_error(struct regex_matcher *matcher) |
bd912dd8 |
{ |
288057e9 |
regex_list_done(matcher);
matcher->list_inited = -1; /* the phishing module will know we tried to load a whitelist, and failed, so it will disable itself too*/ |
bd912dd8 |
}
|
288057e9 |
static inline char get_char_at_pos_with_skip(const struct pre_fixup_info *info, const char *buffer, size_t pos) |
b5341ac0 |
{ |
288057e9 |
const char *str;
size_t realpos = 0;
if (!info) {
return (pos <= strlen(buffer)) ? buffer[pos > 0 ? pos - 1 : 0] : '\0';
}
str = info->pre_displayLink.data;
cli_dbgmsg("calc_pos_with_skip: skip:%llu, %llu - %llu \"%s\",\"%s\"\n", (long long unsigned)pos, (long long unsigned)info->host_start, |
059ca614 |
(long long unsigned)info->host_end, str, buffer); |
288057e9 |
pos += info->host_start;
while (str[realpos] && !isalnum(str[realpos])) realpos++;
for (; str[realpos] && (pos > 0); pos--) {
while (str[realpos] == ' ') realpos++;
realpos++;
}
while (str[realpos] == ' ') realpos++;
cli_dbgmsg("calc_pos_with_skip:%s\n", str + realpos);
return (pos > 0 && !str[realpos]) ? '\0' : str[realpos > 0 ? realpos - 1 : 0]; |
b5341ac0 |
}
|
2e11bcdf |
static int validate_subdomain(const struct regex_list *regex, const struct pre_fixup_info *pre_fixup, const char *buffer, size_t buffer_len, char *real_url, size_t real_len, char *orig_real_url)
{ |
288057e9 |
char c;
size_t match_len;
if (!regex || !regex->pattern)
return 0;
match_len = strlen(regex->pattern);
if (((c = get_char_at_pos_with_skip(pre_fixup, buffer, buffer_len + 1)) == ' ' || c == '\0' || c == '/' || c == '?') &&
(match_len == buffer_len || /* full match */
(match_len < buffer_len &&
((c = get_char_at_pos_with_skip(pre_fixup, buffer, buffer_len - match_len)) == '.' || (c == ' ')))
/* subdomain matched*/)) {
/* we have an extra / at the end */
if (match_len > 0) match_len--;
cli_dbgmsg("Got a match: %s with %s\n", buffer, regex->pattern);
cli_dbgmsg("Before inserting .: %s\n", orig_real_url);
if (real_len >= match_len + 1) {
const size_t pos = real_len - match_len - 1;
if (real_url[pos] != '.') {
/* we need to shift left, and insert a '.' |
2e11bcdf |
* we have an extra '.' at the beginning inserted by get_host to have room, |
fa51ec00 |
* orig_real_url has to be used here, |
2e11bcdf |
* because we want to overwrite that extra '.' */ |
288057e9 |
size_t orig_real_len = strlen(orig_real_url);
cli_dbgmsg("No dot here:%s\n", real_url + pos);
real_url = orig_real_url;
memmove(real_url, real_url + 1, orig_real_len - match_len - 1);
real_url[orig_real_len - match_len - 1] = '.';
cli_dbgmsg("After inserting .: %s\n", real_url);
}
}
return 1;
}
cli_dbgmsg("Ignoring false match: %s with %s, mismatched character: %c\n", buffer, regex->pattern, c);
return 0; |
2e11bcdf |
}
|
bd912dd8 |
/*
* @matcher - matcher structure to use
* @real_url - href target
* @display_url - <a> tag contents
* @hostOnly - if you want to match only the host part |
ec481027 |
* @is_whitelist - is this a lookup in whitelist? |
bd912dd8 |
*
* @return - CL_SUCCESS - url doesn't match
* - CL_VIRUS - url matches list
*
* Do not send NULL pointers to this function!!
*
*/ |
102cd430 |
cl_error_t regex_list_match(struct regex_matcher *matcher, char *real_url, const char *display_url, const struct pre_fixup_info *pre_fixup, int hostOnly, const char **info, int is_whitelist) |
bd912dd8 |
{ |
288057e9 |
char *orig_real_url = real_url;
struct regex_list *regex;
size_t real_len, display_len, buffer_len;
|
102cd430 |
char *buffer = NULL;
char *bufrev = NULL;
cl_error_t rc = CL_SUCCESS;
int filter_search_rc = 0;
int root;
struct cli_ac_data mdata;
struct cli_ac_result *res = NULL;
|
288057e9 |
assert(matcher);
assert(real_url);
assert(display_url);
*info = NULL;
if (!matcher->list_inited) |
102cd430 |
return CL_SUCCESS; |
288057e9 |
assert(matcher->list_built);
/* skip initial '.' inserted by get_host */
if (real_url[0] == '.') real_url++;
if (display_url[0] == '.') display_url++;
real_len = strlen(real_url);
display_len = strlen(display_url);
buffer_len = (hostOnly && !is_whitelist) ? real_len + 1 : real_len + display_len + 1 + 1;
if (buffer_len < 3) {
/* too short, no match possible */ |
102cd430 |
return CL_SUCCESS;
}
buffer = cli_malloc(buffer_len + 1);
if (!buffer) {
cli_errmsg("regex_list_match: Unable to allocate memory for buffer\n");
return CL_EMEM; |
288057e9 |
}
|
102cd430 |
strncpy(buffer, real_url, real_len);
buffer[real_len] = (!is_whitelist && hostOnly) ? '/' : ':'; |
bd912dd8 |
|
102cd430 |
/*
* For H-type PDB signatures, real_url is actually the DisplayedHostname.
* RealHostname is not used.
*/
if (!hostOnly || is_whitelist) {
/* For all other PDB and WDB signatures concatenate Real:Displayed. */
strncpy(buffer + real_len + 1, display_url, display_len);
}
buffer[buffer_len - 1] = '/';
buffer[buffer_len] = 0;
cli_dbgmsg("Looking up in regex_list: %s\n", buffer); |
fa51ec00 |
|
102cd430 |
if (CL_SUCCESS != (rc = cli_ac_initdata(&mdata, 0, 0, 0, CLI_DEFAULT_AC_TRACKLEN)))
return rc; |
288057e9 |
|
102cd430 |
bufrev = cli_strdup(buffer);
if (!bufrev)
return CL_EMEM; |
288057e9 |
|
102cd430 |
reverse_string(bufrev);
filter_search_rc = filter_search(&matcher->filter, (const unsigned char *)bufrev, buffer_len) != -1;
if (filter_search_rc == -1) {
free(buffer);
free(bufrev);
/* filter says this suffix doesn't match. |
2e11bcdf |
* The filter has false positives, but no false
* negatives */ |
102cd430 |
return CL_SUCCESS;
}
rc = cli_ac_scanbuff((const unsigned char *)bufrev, buffer_len, NULL, (void *)®ex, &res, &matcher->suffixes, &mdata, 0, 0, NULL, AC_SCAN_VIR, NULL);
free(bufrev);
cli_ac_freedata(&mdata);
rc = CL_SUCCESS;
root = matcher->root_regex_idx;
while (res || root) {
struct cli_ac_result *q;
if (!res) {
regex = matcher->suffix_regexes[root].head;
root = 0;
} else {
regex = res->customdata; |
288057e9 |
} |
102cd430 |
while (!rc && regex) {
/* loop over multiple regexes corresponding to |
2e11bcdf |
* this suffix */ |
102cd430 |
if (!regex->preg) {
/* we matched a static pattern */
rc = validate_subdomain(regex, pre_fixup, buffer, buffer_len, real_url, real_len, orig_real_url);
} else {
rc = !cli_regexec(regex->preg, buffer, 0, NULL, 0); |
288057e9 |
} |
102cd430 |
if (rc) *info = regex->pattern;
regex = regex->nxt;
}
if (res) {
q = res;
res = res->next;
free(q); |
288057e9 |
}
} |
102cd430 |
free(buffer);
if (!rc)
cli_dbgmsg("Lookup result: not in regex list\n");
else
cli_dbgmsg("Lookup result: in regex list\n");
return rc; |
bd912dd8 |
}
/* Initialization & loading */ |
22cb38ed |
/* Initializes @matcher, allocating necessary substructures */ |
102cd430 |
cl_error_t init_regex_list(struct regex_matcher *matcher, uint8_t dconf_prefiltering) |
bd912dd8 |
{ |
1e2969a4 |
#ifdef USE_MPOOL |
288057e9 |
mpool_t *mp = matcher->mempool; |
1e2969a4 |
#endif |
102cd430 |
cl_error_t rc; |
ec481027 |
|
288057e9 |
assert(matcher);
memset(matcher, 0, sizeof(*matcher)); |
bd912dd8 |
|
288057e9 |
matcher->list_inited = 1;
matcher->list_built = 0;
matcher->list_loaded = 0;
cli_hashtab_init(&matcher->suffix_hash, 512); |
1e2969a4 |
#ifdef USE_MPOOL |
288057e9 |
matcher->mempool = mp;
matcher->suffixes.mempool = mp;
assert(mp && "mempool must be initialized"); |
1e2969a4 |
#endif |
288057e9 |
if ((rc = cli_ac_init(&matcher->suffixes, 2, 32, dconf_prefiltering))) {
return rc;
} |
1e2969a4 |
#ifdef USE_MPOOL |
288057e9 |
matcher->sha256_hashes.mempool = mp;
matcher->hostkey_prefix.mempool = mp; |
1e2969a4 |
#endif |
288057e9 |
if ((rc = cli_bm_init(&matcher->sha256_hashes))) {
return rc;
}
if ((rc = cli_bm_init(&matcher->hostkey_prefix))) {
return rc;
}
filter_init(&matcher->filter);
return CL_SUCCESS; |
bd912dd8 |
}
|
288057e9 |
static int functionality_level_check(char *line) |
50c27591 |
{ |
288057e9 |
char *ptmin;
char *ptmax;
size_t j;
ptmin = strrchr(line, ':');
if (!ptmin)
return CL_SUCCESS;
ptmin++;
ptmax = strchr(ptmin, '-');
if (!ptmax)
return CL_SUCCESS; /* there is no functionality level specified, so we're ok */
else {
size_t min, max;
ptmax++;
for (j = 0; j + ptmin + 1 < ptmax; j++)
if (!isdigit(ptmin[j]))
return CL_SUCCESS; /* not numbers, not functionality level */
for (j = 0; j < strlen(ptmax); j++)
if (!isdigit(ptmax[j]))
return CL_SUCCESS; /* see above */
ptmax[-1] = '\0';
min = atoi(ptmin);
if (strlen(ptmax) == 0)
max = INT_MAX;
else
max = atoi(ptmax);
if (min > cl_retflevel()) {
cli_dbgmsg("regex list line %s not loaded (required f-level: %u)\n", line, (unsigned int)min);
return CL_EMALFDB;
}
if (max < cl_retflevel())
return CL_EMALFDB;
ptmin[-1] = '\0';
return CL_SUCCESS;
} |
b611b5ff |
}
|
288057e9 |
static int add_hash(struct regex_matcher *matcher, char *pattern, const char fl, int is_prefix) |
b611b5ff |
{ |
288057e9 |
int rc; |
544fa973 |
struct cli_bm_patt *pat = MPOOL_CALLOC(matcher->mempool, 1, sizeof(*pat)); |
288057e9 |
struct cli_matcher *bm;
const char *vname = NULL;
if (!pat)
return CL_EMEM; |
544fa973 |
pat->pattern = (unsigned char *)CLI_MPOOL_HEX2STR(matcher->mempool, pattern); |
288057e9 |
if (!pat->pattern)
return CL_EMALFDB;
pat->length = 32;
if (is_prefix) {
pat->length = 4;
bm = &matcher->hostkey_prefix;
} else {
bm = &matcher->sha256_hashes;
}
if (!matcher->sha256_pfx_set.keys) {
if ((rc = cli_hashset_init(&matcher->sha256_pfx_set, 1048576, 90))) {
return rc;
}
}
if (fl != 'W' && pat->length == 32 &&
cli_hashset_contains(&matcher->sha256_pfx_set, cli_readint32(pat->pattern)) &&
cli_bm_scanbuff(pat->pattern, 32, &vname, NULL, &matcher->sha256_hashes, 0, NULL, NULL, NULL) == CL_VIRUS) {
if (*vname == 'W') {
/* hash is whitelisted in local.gdb */
cli_dbgmsg("Skipping hash %s\n", pattern); |
544fa973 |
MPOOL_FREE(matcher->mempool, pat->pattern);
MPOOL_FREE(matcher->mempool, pat); |
288057e9 |
return CL_SUCCESS;
}
} |
544fa973 |
pat->virname = MPOOL_MALLOC(matcher->mempool, 1); |
288057e9 |
if (!pat->virname) {
free(pat); |
241e7eb1 |
cli_errmsg("add_hash: Unable to allocate memory for path->virname\n"); |
288057e9 |
return CL_EMEM;
}
*pat->virname = fl;
cli_hashset_addkey(&matcher->sha256_pfx_set, cli_readint32(pat->pattern));
if ((rc = cli_bm_addpatt(bm, pat, "*"))) {
cli_errmsg("add_hash: failed to add BM pattern\n");
free(pat->pattern);
free(pat->virname);
free(pat);
return CL_EMALFDB;
}
return CL_SUCCESS; |
50c27591 |
}
|
bd912dd8 |
/* Load patterns/regexes from file */ |
102cd430 |
cl_error_t load_regex_matcher(struct cl_engine *engine, struct regex_matcher *matcher, FILE *fd, unsigned int *signo, unsigned int options, int is_whitelist, struct cli_dbio *dbio, uint8_t dconf_prefiltering) |
bd912dd8 |
{ |
102cd430 |
cl_error_t rc;
int line = 0, entry = 0; |
288057e9 |
char buffer[FILEBUFF];
assert(matcher);
if (matcher->list_inited == -1)
return CL_EMALFDB; /* already failed to load */
if (!fd && !dbio) {
cli_errmsg("Unable to load regex list (null file)\n");
return CL_ENULLARG;
}
cli_dbgmsg("Loading regex_list\n");
if (!matcher->list_inited) {
rc = init_regex_list(matcher, dconf_prefiltering);
if (!matcher->list_inited) {
cli_errmsg("Regex list failed to initialize!\n");
fatal_error(matcher);
return rc;
}
}
/* |
bd912dd8 |
* Regexlist db format (common to .wdb(whitelist) and .pdb(domainlist) files:
* Multiple lines of form, (empty lines are skipped):
* Flags RealURL DisplayedURL
* Where: |
fa51ec00 |
* Flags: |
6e3332cf |
*
* .pdb files: |
fa51ec00 |
* R - regex, H - host-only, followed by (optional) 3-digit hexnumber representing |
bd912dd8 |
* flags that should be filtered.
* [i.e. phishcheck urls.flags that we don't want to be done for this particular host] |
fa51ec00 |
* |
6e3332cf |
* .wdb files: |
fa51ec00 |
* X - full URL regex |
6e3332cf |
* Y - host-only regex
* M - host simple pattern |
bd912dd8 |
*
* If a line in the file doesn't conform to this format, loading fails |
fa51ec00 |
* |
bd912dd8 |
*/ |
288057e9 |
while (cli_dbgets(buffer, FILEBUFF, fd, dbio)) {
char *pattern;
char *flags;
size_t pattern_len;
cli_chomp(buffer);
line++;
if (!*buffer)
continue; /* skip empty lines */
if (functionality_level_check(buffer))
continue;
if (engine->cb_sigload && engine->cb_sigload("phishing", buffer, ~options & CL_DB_OFFICIAL, engine->cb_sigload_ctx)) {
cli_dbgmsg("load_regex_matcher: skipping %s due to callback\n", buffer);
continue;
} |
bd912dd8 |
|
288057e9 |
entry++;
pattern = strchr(buffer, ':');
if (!pattern) {
cli_errmsg("Malformed regex list line %d\n", line);
fatal_error(matcher);
return CL_EMALFDB;
}
/*pattern[0]='\0';*/
flags = buffer + 1;
pattern++;
pattern_len = strlen(pattern);
if (pattern_len < FILEBUFF) {
pattern[pattern_len] = '/';
pattern[pattern_len + 1] = '\0';
} else {
cli_errmsg("Overlong regex line %d\n", line);
fatal_error(matcher);
return CL_EMALFDB;
}
if ((buffer[0] == 'R' && !is_whitelist) || ((buffer[0] == 'X' || buffer[0] == 'Y') && is_whitelist)) {
/* regex for hostname*/
if ((rc = regex_list_add_pattern(matcher, pattern)))
return rc == CL_EMEM ? CL_EMEM : CL_EMALFDB;
} else if ((buffer[0] == 'H' && !is_whitelist) || (buffer[0] == 'M' && is_whitelist)) {
/*matches displayed host*/
if ((rc = add_static_pattern(matcher, pattern)))
return rc == CL_EMEM ? CL_EMEM : CL_EMALFDB;
} else if (buffer[0] == 'S' && (!is_whitelist || pattern[0] == 'W')) {
pattern[pattern_len] = '\0';
if (pattern[0] == 'W')
flags[0] = 'W';
if ((pattern[0] == 'W' || pattern[0] == 'F' || pattern[0] == 'P') && pattern[1] == ':') {
pattern += 2;
if ((rc = add_hash(matcher, pattern, flags[0], pattern[-2] == 'P'))) {
cli_errmsg("Error loading at line: %d\n", line);
return rc == CL_EMEM ? CL_EMEM : CL_EMALFDB;
}
} else {
cli_errmsg("Error loading line: %d, %c\n", line, *pattern);
return CL_EMALFDB;
}
} else {
return CL_EMALFDB;
}
}
matcher->list_loaded = 1;
if (signo)
*signo += entry;
return CL_SUCCESS;
} |
bd912dd8 |
/* Build the matcher list */ |
102cd430 |
cl_error_t cli_build_regex_list(struct regex_matcher *matcher) |
bd912dd8 |
{ |
102cd430 |
cl_error_t rc; |
288057e9 |
if (!matcher)
return CL_SUCCESS;
if (!matcher->list_inited || !matcher->list_loaded) {
cli_errmsg("Regex list not loaded!\n");
return -1; /*TODO: better error code */
}
cli_dbgmsg("Building regex list\n");
cli_hashtab_free(&matcher->suffix_hash);
if ((rc = cli_ac_buildtrie(&matcher->suffixes)))
return rc;
matcher->list_built = 1;
cli_hashset_destroy(&matcher->sha256_pfx_set);
return CL_SUCCESS; |
bd912dd8 |
}
/* Done with this matcher, free resources */ |
288057e9 |
void regex_list_done(struct regex_matcher *matcher) |
bd912dd8 |
{ |
288057e9 |
assert(matcher);
if (matcher->list_inited == 1) {
size_t i;
cli_ac_free(&matcher->suffixes);
if (matcher->suffix_regexes) {
for (i = 0; i < matcher->suffix_cnt; i++) {
struct regex_list *r = matcher->suffix_regexes[i].head;
while (r) {
struct regex_list *q = r;
r = r->nxt;
free(q->pattern);
free(q);
}
}
free(matcher->suffix_regexes);
matcher->suffix_regexes = NULL;
}
if (matcher->all_pregs) {
for (i = 0; i < matcher->regex_cnt; i++) {
regex_t *r = matcher->all_pregs[i];
cli_regfree(r); |
544fa973 |
MPOOL_FREE(matcher->mempool, r); |
288057e9 |
} |
544fa973 |
MPOOL_FREE(matcher->mempool, matcher->all_pregs); |
288057e9 |
}
cli_hashtab_free(&matcher->suffix_hash);
cli_bm_free(&matcher->sha256_hashes);
cli_bm_free(&matcher->hostkey_prefix);
} |
bd912dd8 |
}
|
288057e9 |
int is_regex_ok(struct regex_matcher *matcher) |
bd912dd8 |
{ |
288057e9 |
assert(matcher);
return (!matcher->list_inited || matcher->list_inited != -1); /* either we don't have a regexlist, or we initialized it successfully */ |
bd912dd8 |
}
|
5ee56e41 |
static int add_newsuffix(struct regex_matcher *matcher, struct regex_list *info, const char *suffix, size_t len) |
bd912dd8 |
{ |
288057e9 |
struct cli_matcher *root = &matcher->suffixes; |
544fa973 |
struct cli_ac_patt *new = MPOOL_CALLOC(matcher->mempool, 1, sizeof(*new)); |
288057e9 |
size_t i;
int ret;
if (!new)
return CL_EMEM;
assert(root && suffix);
new->rtype = 0;
new->type = 0;
new->sigid = 0;
new->parts = 0;
new->partno = 0;
new->mindist = 0;
new->maxdist = 0;
new->offset_min = CLI_OFF_ANY;
new->length[0] = (uint16_t)len;
new->ch[0] = new->ch[1] |= CLI_MATCH_IGNORE;
if (new->length[0] > root->maxpatlen)
root->maxpatlen = new->length[0];
|
544fa973 |
new->pattern = MPOOL_MALLOC(matcher->mempool, sizeof(new->pattern[0]) * len); |
288057e9 |
if (!new->pattern) { |
544fa973 |
MPOOL_FREE(matcher->mempool, new); |
241e7eb1 |
cli_errmsg("add_newsuffix: Unable to allocate memory for new->pattern\n"); |
288057e9 |
return CL_EMEM;
}
for (i = 0; i < len; i++)
new->pattern[i] = suffix[i]; /*new->pattern is short int* */
new->customdata = info;
new->virname = NULL;
if ((ret = cli_ac_addpatt(root, new))) { |
544fa973 |
MPOOL_FREE(matcher->mempool, new->pattern);
MPOOL_FREE(matcher->mempool, new); |
288057e9 |
return ret;
}
filter_add_static(&matcher->filter, (const unsigned char *)suffix, len, "regex");
return CL_SUCCESS; |
bd912dd8 |
}
|
2e11bcdf |
#define MODULE "regex_list: "
/* ------ load a regex, determine suffix, determine suffix2regexlist map ---- */ |
bd912dd8 |
|
a2d14e06 |
static void list_add_tail(struct regex_list_ht *ht, struct regex_list *regex)
{ |
288057e9 |
if (!ht->head)
ht->head = regex;
if (ht->tail) {
ht->tail->nxt = regex;
}
ht->tail = regex; |
a2d14e06 |
}
|
2e11bcdf |
/* returns 0 on success, clamav error code otherwise */ |
102cd430 |
static cl_error_t add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, const struct regex_list *iregex) |
bd912dd8 |
{ |
288057e9 |
struct regex_matcher *matcher = cbdata;
struct regex_list *regex = cli_malloc(sizeof(*regex));
const struct cli_element *el;
void *tmp_matcher; /* save original address if OOM occurs */ |
2e11bcdf |
|
288057e9 |
assert(matcher);
if (!regex) { |
241e7eb1 |
cli_errmsg("add_pattern_suffix: Unable to allocate memory for regex\n"); |
288057e9 |
return CL_EMEM; |
241e7eb1 |
} |
288057e9 |
regex->pattern = iregex->pattern ? cli_strdup(iregex->pattern) : NULL;
regex->preg = iregex->preg;
regex->nxt = NULL;
el = cli_hashtab_find(&matcher->suffix_hash, suffix, suffix_len);
/* TODO: what if suffixes are prefixes of eachother and only one will |
2e11bcdf |
* match? */ |
288057e9 |
if (el) {
/* existing suffix */
assert((size_t)el->data < matcher->suffix_cnt);
list_add_tail(&matcher->suffix_regexes[el->data], regex);
} else {
/* new suffix */
size_t n = matcher->suffix_cnt++;
el = cli_hashtab_insert(&matcher->suffix_hash, suffix, suffix_len, n);
tmp_matcher = matcher->suffix_regexes; /* save the current value before cli_realloc() */
tmp_matcher = cli_realloc(matcher->suffix_regexes, (n + 1) * sizeof(*matcher->suffix_regexes));
if (!tmp_matcher) {
free(regex);
return CL_EMEM;
}
matcher->suffix_regexes = tmp_matcher; /* success, point at new memory location */
matcher->suffix_regexes[n].tail = regex;
matcher->suffix_regexes[n].head = regex;
if (suffix[0] == '/' && suffix[1] == '\0')
matcher->root_regex_idx = n;
add_newsuffix(matcher, regex, suffix, suffix_len);
} |
102cd430 |
return CL_SUCCESS; |
bd912dd8 |
}
|
2e11bcdf |
static size_t reverse_string(char *pattern) |
bd912dd8 |
{ |
288057e9 |
size_t len = strlen(pattern);
size_t i;
for (i = 0; i < (len / 2); i++) {
char aux = pattern[i];
pattern[i] = pattern[len - i - 1];
pattern[len - i - 1] = aux;
}
return len; |
bd912dd8 |
}
|
9ee053fe |
static regex_t *new_preg(struct regex_matcher *matcher) |
10290ba3 |
{ |
288057e9 |
regex_t *r; |
544fa973 |
matcher->all_pregs = MPOOL_REALLOC(matcher->mempool, matcher->all_pregs, ++matcher->regex_cnt * sizeof(*matcher->all_pregs)); |
288057e9 |
if (!matcher->all_pregs) { |
241e7eb1 |
cli_errmsg("new_preg: Unable to reallocate memory\n"); |
288057e9 |
return NULL; |
241e7eb1 |
} |
544fa973 |
r = MPOOL_MALLOC(matcher->mempool, sizeof(*r)); |
288057e9 |
if (!r) { |
241e7eb1 |
cli_errmsg("new_preg: Unable to allocate memory\n"); |
288057e9 |
return NULL; |
241e7eb1 |
} |
288057e9 |
matcher->all_pregs[matcher->regex_cnt - 1] = r;
return r; |
10290ba3 |
}
|
102cd430 |
static cl_error_t add_static_pattern(struct regex_matcher *matcher, char *pattern) |
bd912dd8 |
{ |
288057e9 |
size_t len;
struct regex_list regex; |
102cd430 |
cl_error_t rc; |
288057e9 |
len = reverse_string(pattern);
regex.nxt = NULL;
regex.pattern = cli_strdup(pattern);
regex.preg = NULL;
rc = add_pattern_suffix(matcher, pattern, len, ®ex);
free(regex.pattern);
return rc; |
bd912dd8 |
}
|
102cd430 |
cl_error_t regex_list_add_pattern(struct regex_matcher *matcher, char *pattern) |
bd912dd8 |
{ |
102cd430 |
cl_error_t rc; |
288057e9 |
regex_t *preg;
size_t len;
/* we only match the host, so remove useless stuff */
const char remove_end[] = "([/?].*)?/";
const char remove_end2[] = "([/?].*)/";
len = strlen(pattern);
if (len > sizeof(remove_end)) {
if (strncmp(&pattern[len - sizeof(remove_end) + 1], remove_end, sizeof(remove_end) - 1) == 0) {
len -= sizeof(remove_end) - 1;
pattern[len++] = '/';
}
if (strncmp(&pattern[len - sizeof(remove_end2) + 1], remove_end2, sizeof(remove_end2) - 1) == 0) {
len -= sizeof(remove_end2) - 1;
pattern[len++] = '/';
}
}
pattern[len] = '\0';
preg = new_preg(matcher);
if (!preg)
return CL_EMEM;
rc = cli_regex2suffix(pattern, preg, add_pattern_suffix, (void *)matcher);
if (rc) {
cli_regfree(preg);
}
return rc; |
bd912dd8 |
} |