/* * Support for matcher using PCRE * * Copyright (C) 2007-2013 Sourcefire, Inc. * Copyright (C) 2014 Cisco Systems, Inc. * All Rights Reserved. * * Authors: Kevin Lin * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #if HAVE_CONFIG_H #include "clamav-config.h" #endif #if HAVE_PCRE #include "clamav.h" #include "cltypes.h" #include "others.h" #include "matcher-pcre.h" #include "mpool.h" #include "regex_pcre.h" int cli_pcre_addpatt(struct cli_matcher *root, const char *trigger, const char *pattern, const char *cflags, const uint32_t *lsigid) { struct cli_pcre_meta **newmetatable = NULL, *pm = NULL; uint32_t pcre_count; const char *opt; int ret = CL_SUCCESS, options = 0; if (!root || !trigger || !pattern) { cli_errmsg("pcre_addpatt: NULL root or NULL trigger or NULL pattern\n"); return CL_ENULLARG; } /* TODO: trigger and regex checking (string length limitations, no self referencal or other pcre referential) */ /* allocating entries */ pm = (struct cli_pcre_meta *)mpool_calloc(root->mempool, 1, sizeof(*pm)); if (!pm) { cli_errmsg("cli_pcre_addpatt: Unable to allocate memory for new pcre meta\n"); return CL_EMEM; } pm->trigger = strdup(trigger); if (!pm->trigger) { cli_errmsg("cli_pcre_addpatt: Unable to allocate memory for trigger string\n"); cli_pcre_freemeta(pm); mpool_free(root->mempool, pm); return CL_EMEM; } pm->pdata.expression = strdup(pattern); if (!pm->pdata.expression) { cli_errmsg("cli_pcre_addpatt: Unable to allocate memory for expression\n"); cli_pcre_freemeta(pm); mpool_free(root->mempool, pm); return CL_EMEM; } pm->lsigid[0] = lsigid[0]; pm->lsigid[1] = lsigid[1]; /* parse and add options, also totally not from snort */ if (cflags) { cli_dbgmsg("cli_pcre_addpatt: parsing pcre compile flags: %s\n", cflags); opt = cflags; /* cli_pcre_addoptions handles pcre specific options */ while (cli_pcre_addoptions(&(pm->pdata), &opt, 0) != CL_SUCCESS) { /* handle matcher specific options here */ switch (*opt) { /* no matcher-specific options atm */ case 'g': pm->flags |= CLI_PCRE_GLOBAL; break; default: cli_errmsg("cli_pcre_addpatt: unknown/extra pcre option encountered %c\n", *opt); cli_pcre_freemeta(pm); mpool_free(root->mempool, pm); return CL_EMALFDB; } opt++; } cli_dbgmsg("PCRE_CASELESS %08x\n", PCRE_CASELESS); cli_dbgmsg("PCRE_DOTALL %08x\n", PCRE_DOTALL); cli_dbgmsg("PCRE_MULTILINE %08x\n", PCRE_MULTILINE); cli_dbgmsg("PCRE_EXTENDED %08x\n", PCRE_EXTENDED); cli_dbgmsg("PCRE_ANCHORED %08x\n", PCRE_ANCHORED); cli_dbgmsg("PCRE_DOLLAR_ENDONLY %08x\n", PCRE_DOLLAR_ENDONLY); cli_dbgmsg("PCRE_UNGREEDY %08x\n", PCRE_UNGREEDY); cli_dbgmsg("PCRE_OPTIONS %08x\n", pm->pdata.options); } /* add pcre data to root after reallocation */ pcre_count = root->pcre_metas+1; newmetatable = (struct cli_pcre_meta **)mpool_realloc(root->mempool, root->pcre_metatable, pcre_count * sizeof(struct cli_pcre_meta *)); if (!newmetatable) { cli_errmsg("cli_pcre_addpatt: Unable to allocate memory for new pcre meta table\n"); cli_pcre_freemeta(pm); mpool_free(root->mempool, pm); return CL_EMEM; } cli_dbgmsg("cli_pcre_addpatt: Adding /%s/ triggered on (%s) as subsig %d for lsigid %d\n", pm->pdata.expression, pm->trigger, pm->lsigid[1], pm->lsigid[0]); newmetatable[pcre_count-1] = pm; root->pcre_metatable = newmetatable; root->pcre_metas = pcre_count; return CL_SUCCESS; } int cli_pcre_build(struct cli_matcher *root, long long unsigned match_limit, long long unsigned recmatch_limit) { unsigned int i; int ret; struct cli_pcre_meta *pm = NULL; for (i = 0; i < root->pcre_metas; ++i) { pm = root->pcre_metatable[i]; if (!pm) { cli_errmsg("cli_pcre_build: metadata for pcre %d is missing\n", i); return CL_ENULLARG; } cli_dbgmsg("cli_pcre_build: Compiling regex: %s\n", pm->pdata.expression); /* parse the regex - TODO: set start_offset (at the addpatt phase?), also no options override */ if ((ret = cli_pcre_compile(&(pm->pdata), match_limit, recmatch_limit, 0, 0)) != CL_SUCCESS) { cli_errmsg("cli_pcre_build: failed to parse pcre regex\n"); return ret; } } return CL_SUCCESS; } static inline void lsig_sub_matched(const struct cli_matcher *root, struct cli_ac_data *mdata, uint32_t lsigid1, uint32_t lsigid2, uint32_t realoff, int partial) { const struct cli_lsig_tdb *tdb = &root->ac_lsigtable[lsigid1]->tdb; if(realoff != CLI_OFF_NONE) { if(mdata->lsigsuboff_first[lsigid1][lsigid2] == CLI_OFF_NONE) mdata->lsigsuboff_first[lsigid1][lsigid2] = realoff; if(mdata->lsigsuboff_last[lsigid1][lsigid2] != CLI_OFF_NONE && ((!partial && realoff <= mdata->lsigsuboff_last[lsigid1][lsigid2]) || (partial && realoff < mdata->lsigsuboff_last[lsigid1][lsigid2]))) return; mdata->lsigcnt[lsigid1][lsigid2]++; if(mdata->lsigcnt[lsigid1][lsigid2] <= 1 || !tdb->macro_ptids || !tdb->macro_ptids[lsigid2]) mdata->lsigsuboff_last[lsigid1][lsigid2] = realoff; } if (mdata->lsigcnt[lsigid1][lsigid2] > 1) { /* Check that the previous match had a macro match following it at the * correct distance. This check is only done after the 1st match.*/ const struct cli_ac_patt *macropt; uint32_t id, last_macro_match, smin, smax, last_macroprev_match; if (!tdb->macro_ptids) return; id = tdb->macro_ptids[lsigid2]; if (!id) return; macropt = root->ac_pattable[id]; smin = macropt->ch_mindist[0]; smax = macropt->ch_maxdist[0]; /* start of last macro match */ last_macro_match = mdata->macro_lastmatch[macropt->sigid]; /* start of previous lsig subsig match */ last_macroprev_match = mdata->lsigsuboff_last[lsigid1][lsigid2]; if (last_macro_match != CLI_OFF_NONE) cli_dbgmsg("Checking macro match: %u + (%u - %u) == %u\n", last_macroprev_match, smin, smax, last_macro_match); if (last_macro_match == CLI_OFF_NONE || last_macroprev_match + smin > last_macro_match || last_macroprev_match + smax < last_macro_match) { cli_dbgmsg("Canceled false lsig macro match\n"); /* Previous match was false - cancel it */ mdata->lsigcnt[lsigid1][lsigid2]--; mdata->lsigsuboff_last[lsigid1][lsigid2] = realoff; } else { /* mark the macro sig itself matched */ mdata->lsigcnt[lsigid1][lsigid2+1]++; mdata->lsigsuboff_last[lsigid1][lsigid2+1] = last_macro_match; } } } int cli_pcre_scanbuf(const unsigned char *buffer, uint32_t length, const struct cli_matcher *root, struct cli_ac_data *mdata, cli_ctx *ctx) { struct cli_pcre_meta **metatable = root->pcre_metatable, *pm = NULL; struct cli_pcre_data *pd; unsigned int i, evalcnt; uint64_t evalids; uint32_t global; int rc, offset, ovector[OVECCOUNT]; for (i = 0; i < root->pcre_metas; ++i) { pm = root->pcre_metatable[i]; pd = &(pm->pdata); /* check the evaluation of the trigger */ cli_dbgmsg("cli_pcre_scanbuf: checking %s; running regex /%s/\n", pm->trigger, pd->expression); if ((strcmp(pm->trigger, PCRE_BYPASS)) && (cli_ac_chklsig(pm->trigger, pm->trigger + strlen(pm->trigger), mdata->lsigcnt[pm->lsigid[0]], &evalcnt, &evalids, 0) != 1)) continue; global = (pm->flags & CLI_PCRE_GLOBAL); offset = pd->search_offset; cli_dbgmsg("cli_pcre_scanbuf: triggered %s; running regex /%s/%s\n", pm->trigger, pd->expression, global ? " (global)":""); /* if the global flag is set, loop through the scanning - TODO: how does this affect really big files? */ do { rc = cli_pcre_match(pd, buffer, length, CLI_PCREMATCH_NOOVERRIDE, offset, ovector, OVECCOUNT); cli_dbgmsg("cli_pcre_scanbuf: running regex /%s/ returns %d\n", pd->expression, rc); /* matched, rc shouldn't be >0 unless a full match occurs */ if (rc > 0) { cli_dbgmsg("cli_pcre_scanbuf: assigning lsigcnt[%d][%d], located @ %d\n", pm->lsigid[0], pm->lsigid[1], ovector[0]); lsig_sub_matched(root, mdata, pm->lsigid[0], pm->lsigid[1], ovector[0], 0); } /* move off to the end of the match for next match; * NOTE: misses matches starting within the last match */ offset = ovector[1]; /* clear the ovector results (they fall through the pcre_match) */ memset(ovector, 0, sizeof(ovector)); } while (global && rc > 0 && offset < length); /* handle error codes */ if (rc < 0 && rc != PCRE_ERROR_NOMATCH) { cli_errmsg("cli_pcre_scanbuf: cli_pcre_match: pcre_exec: returned error %d\n", rc); /* TODO: convert the pcre error codes to clamav error codes, handle match_limit and match_limit_recursion exceeded */ return CL_BREAK; } } return CL_SUCCESS; } int cli_pcre_ucondscanbuf(const unsigned char *buffer, uint32_t length, const struct cli_matcher *root, struct cli_ac_data *mdata, cli_ctx *ctx) { struct cli_pcre_meta **metatable = root->pcre_metatable, *pm = NULL; struct cli_pcre_data *pd; unsigned int i, evalcnt; uint64_t evalids; uint32_t global; int rc, offset, ovector[OVECCOUNT]; for (i = 0; i < root->pcre_metas; ++i) { pm = root->pcre_metatable[i]; pd = &(pm->pdata); global = (pm->flags & CLI_PCRE_GLOBAL); offset = pd->search_offset; cli_dbgmsg("cli_pcre_ucondscanbuf: unconditionally running regex /%s/\n", pd->expression); /* if the global flag is set, loop through the scanning - TODO: how does this affect really big files? */ do { rc = cli_pcre_match(pd, buffer, length, CLI_PCREMATCH_NOOVERRIDE, offset, ovector, OVECCOUNT); cli_dbgmsg("cli_pcre_ucondscanbuf: running regex /%s/ returns %d\n", pd->expression, rc); /* matched, rc shouldn't be >0 unless a full match occurs */ if (rc > 0) { cli_dbgmsg("cli_pcre_ucondscanbuf: assigning lsigcnt[%d][%d], located @ %d\n", pm->lsigid[0], pm->lsigid[1], ovector[0]); lsig_sub_matched(root, mdata, pm->lsigid[0], pm->lsigid[1], ovector[0], 0); } /* move off to the end of the match for next match; * NOTE: misses matches starting within the last match */ offset = ovector[1]; /* clear the ovector results (they fall through the pcre_match) */ memset(ovector, 0, sizeof(ovector)); } while (global && rc > 0 && offset < length); /* handle error codes */ if (rc < 0 && rc != PCRE_ERROR_NOMATCH) { cli_errmsg("cli_pcre_ucondscanbuf: cli_pcre_match: pcre_exec: returned error %d\n", rc); /* TODO: convert the pcre error codes to clamav error codes, handle match_limit and match_limit_recursion exceeded */ return CL_BREAK; } } return CL_SUCCESS; } void cli_pcre_freemeta(struct cli_pcre_meta *pm) { if (!pm) return; if (pm->trigger) free(pm->trigger); cli_pcre_free_single(&(pm->pdata)); } void cli_pcre_freetable(struct cli_matcher *root) { uint32_t i; struct cli_pcre_meta *pm = NULL; for (i = 0; i < root->pcre_metas; ++i) { /* free pcre meta*/ pm = root->pcre_metatable[i]; cli_pcre_freemeta(pm); mpool_free(root->mempool, pm); } /* free holding structures and set count to zero */ mpool_free(root->mempool, root->pcre_metatable); root->pcre_metatable = NULL; root->pcre_metas = 0; } #endif /* HAVE_PCRE */