libclamav/matcher-pcre.c
5fa73369
 /*
  *  Support for matcher using PCRE
  *
c442ca9c
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
5fa73369
  *  Copyright (C) 2007-2013 Sourcefire, Inc.
  *
  *  Authors: Kevin Lin
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
  */
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
 #include "clamav.h"
82fa5ba0
 #include "dconf.h"
5c2c7233
 #include "events.h"
5fa73369
 #include "others.h"
7ab4eec7
 #include "matcher.h"
ea3d1e72
 #include "matcher-ac.h"
5fa73369
 #include "matcher-pcre.h"
 #include "mpool.h"
b289385d
 #include "readdb.h"
5fa73369
 #include "regex_pcre.h"
9e408e76
 #include "str.h"
5fa73369
 
62c00993
 #if HAVE_PCRE
5841080a
 #if USING_PCRE2
 #define PCRE2_CODE_UNIT_WIDTH 8
 #include <pcre2.h>
 #else
 #include <pcre.h>
 #endif
 
7250b47e
 /* DEBUGGING */
 //#define MATCHER_PCRE_DEBUG
 #ifdef MATCHER_PCRE_DEBUG
 #  define pm_dbgmsg(...) cli_dbgmsg( __VA_ARGS__)
 #else
 #  define pm_dbgmsg(...)
 #endif
 #undef MATCHER_PCRE_DEBUG
 
5c2c7233
 /* PERFORMANCE MACROS AND FUNCTIONS */
 #define MAX_TRACKED_PCRE 64
 #define PCRE_EVENTS_PER_SIG 2
 #define MAX_PCRE_SIGEVENT_ID MAX_TRACKED_PCRE*PCRE_EVENTS_PER_SIG
 
0d370098
 cli_events_t *p_sigevents = NULL;
5c2c7233
 unsigned int p_sigid = 0;
 
b8de9217
 static void pcre_perf_events_init(struct cli_pcre_meta *pm, const char *virname)
5c2c7233
 {
     int ret;
147df16a
     size_t namelen;
5c2c7233
 
     if (!p_sigevents) {
         p_sigevents = cli_events_new(MAX_PCRE_SIGEVENT_ID);
         if (!p_sigevents) {
             cli_errmsg("pcre_perf: no memory for events table\n");
             return;
         }
     }
 
     if (p_sigid > MAX_PCRE_SIGEVENT_ID - PCRE_EVENTS_PER_SIG - 1) {
         cli_errmsg("pcre_perf: events table full. Increase MAX_TRACKED_PCRE\n");
         return;
     }
 
147df16a
     if (!virname) {
b8de9217
         virname = "(null)";
147df16a
         namelen = 7;
     } else {
         namelen = strlen(virname)+strlen(pm->pdata.expression)+3;
     }
b8de9217
 
5c2c7233
     /* set the name */
b289385d
     pm->statname = (char*)cli_calloc(1, namelen);
b8de9217
     if (!pm->statname) {
         return;
     }
     snprintf(pm->statname, namelen, "%s/%s/", virname, pm->pdata.expression);
5c2c7233
 
b8de9217
     pm_dbgmsg("pcre_perf: adding sig ids starting %u for %s\n", p_sigid, pm->statname);
5c2c7233
 
     /* register time event */
     pm->sigtime_id = p_sigid;
b8de9217
     ret = cli_event_define(p_sigevents, p_sigid++, pm->statname, ev_time, multiple_sum);
5c2c7233
     if (ret) {
         cli_errmsg("pcre_perf: cli_event_define() error for time event id %d\n", pm->sigtime_id);
         pm->sigtime_id = MAX_PCRE_SIGEVENT_ID+1;
         return;
     }
 
     /* register match count */
     pm->sigmatch_id = p_sigid;
b8de9217
     ret = cli_event_define(p_sigevents, p_sigid++, pm->statname, ev_int, multiple_sum);
5c2c7233
     if (ret) {
         cli_errmsg("pcre_perf: cli_event_define() error for matches event id %d\n", pm->sigmatch_id);
         pm->sigmatch_id = MAX_PCRE_SIGEVENT_ID+1;
         return;
     }
 }
 
 struct sigperf_elem {
     const char * name;
     uint64_t usecs;
     unsigned long run_count;
     unsigned long match_count;
 };
 
 static int sigelem_comp(const void * a, const void * b)
 {
b289385d
     const struct sigperf_elem *ela = (const struct sigperf_elem *)a;
     const struct sigperf_elem *elb = (const struct sigperf_elem *)b;
5c2c7233
     return elb->usecs/elb->run_count - ela->usecs/ela->run_count;
 }
 
 void cli_pcre_perf_print()
 {
     struct sigperf_elem stats[MAX_TRACKED_PCRE], *elem = stats;
     int i, elems = 0, max_name_len = 0, name_len;
 
     if (!p_sigid || !p_sigevents) {
         cli_warnmsg("cli_pcre_perf_print: statistics requested but no PCREs were loaded!\n");
         return;
     }
 
     memset(stats, 0, sizeof(stats));
     for (i=0;i<MAX_TRACKED_PCRE;i++) {
         union ev_val val;
         uint32_t count;
         const char * name = cli_event_get_name(p_sigevents, i*PCRE_EVENTS_PER_SIG);
         cli_event_get(p_sigevents, i*PCRE_EVENTS_PER_SIG, &val, &count);
         if (!count) {
             if (name)
                 cli_dbgmsg("No event triggered for %s\n", name);
             continue;
         }
         if (name)
             name_len = strlen(name);
         else
             name_len = 0;
         if (name_len > max_name_len)
             max_name_len = name_len;
         elem->name = name?name:"\"noname\"";
         elem->usecs = val.v_int;
         elem->run_count = count;
         cli_event_get(p_sigevents, i*PCRE_EVENTS_PER_SIG+1, &val, &count);
         elem->match_count = count;
         elem++;
         elems++;
     }
90379a9e
     if (max_name_len < strlen("PCRE Expression"))
         max_name_len = strlen("PCRE Expression");
0d370098
 
5c2c7233
     cli_qsort(stats, elems, sizeof(struct sigperf_elem), sigelem_comp);
 
     elem = stats;
     /* name runs matches microsecs avg */
     cli_infomsg (NULL, "%-*s %*s %*s %*s %*s\n", max_name_len, "PCRE Expression",
                  8, "#runs", 8, "#matches", 12, "usecs total", 9, "usecs avg");
     cli_infomsg (NULL, "%-*s %*s %*s %*s %*s\n", max_name_len, "===============",
                  8, "=====", 8, "========", 12, "===========", 9, "=========");
     while (elem->run_count) {
         cli_infomsg (NULL, "%-*s %*lu %*lu %*llu %*.2f\n", max_name_len, elem->name,
                      8, elem->run_count, 8, elem->match_count,
059ca614
                      12, (long long unsigned)elem->usecs, 9, (double)elem->usecs/elem->run_count);
5c2c7233
         elem++;
     }
 }
 
 
 void cli_pcre_perf_events_destroy()
 {
     cli_events_free(p_sigevents);
     p_sigid = 0;
 }
 
 
 /* PCRE MATCHER FUNCTIONS */
a6d2b523
 int cli_pcre_init()
 {
     return cli_pcre_init_internal();
 }
 
86eddf11
 int cli_pcre_addpatt(struct cli_matcher *root, const char *virname, const char *trigger, const char *pattern, const char *cflags, const char *offset, const uint32_t *lsigid, unsigned int options)
35a05ff8
 {
     struct cli_pcre_meta **newmetatable = NULL, *pm = NULL;
     uint32_t pcre_count;
39597f25
     const char *opt;
5c2c7233
     int ret = CL_SUCCESS, rssigs;
35a05ff8
 
7ab4eec7
     if (!root || !trigger || !pattern || !offset) {
7250b47e
         cli_errmsg("cli_pcre_addpatt: NULL root or NULL trigger or NULL pattern or NULL offset\n");
35a05ff8
         return CL_ENULLARG;
     }
 
3c333c78
     /* TODO: trigger and regex checking (backreference limitations?) (control pattern limitations?) */
5843ac79
     /* cli_ac_chklsig will fail a empty trigger; empty patterns can cause an infinite loop */
     if (*trigger == '\0' || *pattern == '\0') {
         cli_errmsg("cli_pcre_addpatt: trigger or pattern cannot be an empty string\n");
         return CL_EMALFDB;
     }
15464f6c
     if (cflags && *cflags == '\0') {
         cflags = NULL;
     }
5843ac79
 
7250b47e
     if (lsigid)
297c14a8
         pm_dbgmsg("cli_pcre_addpatt: Adding /%s/%s%s triggered on (%s) as subsig %d for lsigid %d\n", 
                   pattern, cflags ? " with flags " : "", cflags ? cflags : "", trigger, lsigid[1], lsigid[0]);
7250b47e
     else
         pm_dbgmsg("cli_pcre_addpatt: Adding /%s/%s%s triggered on (%s) [no lsigid]\n",
297c14a8
                   pattern, cflags ? " with flags " : "", cflags ? cflags : "", trigger);
7250b47e
 
6b7c153a
 #ifdef PCRE_BYPASS
     /* check for trigger bypass */
     if (strcmp(trigger, PCRE_BYPASS)) {
 #endif
         /* validate the lsig trigger */
         rssigs = cli_ac_chklsig(trigger, trigger + strlen(trigger), NULL, NULL, NULL, 1);
         if(rssigs == -1) {
             cli_errmsg("cli_pcre_addpatt: regex subsig /%s/ is missing a valid logical trigger\n", pattern);
0d370098
             return CL_EMALFDB;
         }
6b7c153a
 
         if (lsigid) {
             if (rssigs > lsigid[1]) {
                 cli_errmsg("cli_pcre_addpatt: regex subsig %d logical trigger refers to subsequent subsig %d\n", lsigid[1], rssigs);
                 return CL_EMALFDB;
             }
             if (rssigs == lsigid[1]) {
                 cli_errmsg("cli_pcre_addpatt: regex subsig %d logical trigger is self-referential\n", lsigid[1]);
                 return CL_EMALFDB;
             }
0d370098
         }
6b7c153a
         else {
             cli_dbgmsg("cli_pcre_addpatt: regex subsig is missing lsigid data\n");
         }
 #ifdef PCRE_BYPASS
ad0303b9
     }
6b7c153a
 #endif
35a05ff8
 
     /* allocating entries */
     pm = (struct cli_pcre_meta *)mpool_calloc(root->mempool, 1, sizeof(*pm));
     if (!pm) {
         cli_errmsg("cli_pcre_addpatt: Unable to allocate memory for new pcre meta\n");
         return CL_EMEM;
     }
 
0118458f
     pm->trigger = cli_mpool_strdup(root->mempool, trigger);
35a05ff8
     if (!pm->trigger) {
         cli_errmsg("cli_pcre_addpatt: Unable to allocate memory for trigger string\n");
0118458f
         cli_pcre_freemeta(root, pm);
35a05ff8
         mpool_free(root->mempool, pm);
         return CL_EMEM;
     }
 
0118458f
     pm->virname = (char *)cli_mpool_virname(root->mempool, virname, options & CL_DB_OFFICIAL);
86eddf11
     if(!pm->virname) {
         cli_errmsg("cli_pcre_addpatt: Unable to allocate memory for virname or NULL virname\n");
0118458f
         cli_pcre_freemeta(root, pm);
35a05ff8
         mpool_free(root->mempool, pm);
         return CL_EMEM;
     }
 
0d370098
     if (lsigid) {
86eddf11
         root->ac_lsigtable[lsigid[0]]->virname = pm->virname;
 
0d370098
         pm->lsigid[0] = 1;
         pm->lsigid[1] = lsigid[0];
         pm->lsigid[2] = lsigid[1];
     }
     else {
         /* sigtool */
         pm->lsigid[0] = 0;
     }
35a05ff8
 
86eddf11
     pm->pdata.expression = strdup(pattern);
     if (!pm->pdata.expression) {
         cli_errmsg("cli_pcre_addpatt: Unable to allocate memory for expression\n");
0118458f
         cli_pcre_freemeta(root, pm);
86eddf11
         mpool_free(root->mempool, pm);
         return CL_EMEM;
     }
 
7ab4eec7
     /* offset parsing and usage, similar to cli_ac_addsig */
7250b47e
     /* relative and type-specific offsets handled during scan */
7ab4eec7
     ret = cli_caloff(offset, NULL, root->type, pm->offdata, &(pm->offset_min), &(pm->offset_max));
     if (ret != CL_SUCCESS) {
         cli_errmsg("cli_pcre_addpatt: cannot calculate offset data: %s for pattern: %s\n", offset, pattern);
0118458f
         cli_pcre_freemeta(root, pm);
7ab4eec7
         mpool_free(root->mempool, pm);
         return ret;
     }
     if(pm->offdata[0] != CLI_OFF_ANY) {
         if(pm->offdata[0] == CLI_OFF_ABSOLUTE)
             root->pcre_absoff_num++;
         else
             root->pcre_reloff_num++;
     }
 
39597f25
     /* parse and add options, also totally not from snort */
     if (cflags) {
         opt = cflags;
 
         /* cli_pcre_addoptions handles pcre specific options */
         while (cli_pcre_addoptions(&(pm->pdata), &opt, 0) != CL_SUCCESS) {
18ff5029
             /* it will return here to handle any matcher specific options */
39597f25
             switch (*opt) {
efda6201
             case 'g':  pm->flags |= CLI_PCRE_GLOBAL;            break;
18a1c962
             case 'r':  pm->flags |= CLI_PCRE_ROLLING;           break;
7ab4eec7
             case 'e':  pm->flags |= CLI_PCRE_ENCOMPASS;         break;
39597f25
             default:
                 cli_errmsg("cli_pcre_addpatt: unknown/extra pcre option encountered %c\n", *opt);
0118458f
                 cli_pcre_freemeta(root, pm);
39597f25
                 mpool_free(root->mempool, pm);
                 return CL_EMALFDB;
             }
             opt++;
         }
 
7250b47e
         if (pm->flags) {
af9b7ea2
             pm_dbgmsg("Matcher:  %s%s%s\n",
18a1c962
                       pm->flags & CLI_PCRE_GLOBAL ? "CLAMAV_GLOBAL " : "",
                       pm->flags & CLI_PCRE_ROLLING ? "CLAMAV_ROLLING " : "",
7250b47e
                       pm->flags & CLI_PCRE_ENCOMPASS ? "CLAMAV_ENCOMPASS " : "");
         }
         else
             pm_dbgmsg("Matcher:  NONE\n");
 
         if (pm->pdata.options) {
5841080a
 #if USING_PCRE2
             pm_dbgmsg("Compiler: %s%s%s%s%s%s%s\n",
                       pm->pdata.options & PCRE2_CASELESS ? "PCRE2_CASELESS " : "",
                       pm->pdata.options & PCRE2_DOTALL ? "PCRE2_DOTALL " : "",
                       pm->pdata.options & PCRE2_MULTILINE ? "PCRE2_MULTILINE " : "",
                       pm->pdata.options & PCRE2_EXTENDED ? "PCRE2_EXTENDED " : "",
 
                       pm->pdata.options & PCRE2_ANCHORED ? "PCRE2_ANCHORED " : "",
                       pm->pdata.options & PCRE2_DOLLAR_ENDONLY ? "PCRE2_DOLLAR_ENDONLY " : "",
                       pm->pdata.options & PCRE2_UNGREEDY ? "PCRE2_UNGREEDY " : "");
 #else
7250b47e
             pm_dbgmsg("Compiler: %s%s%s%s%s%s%s\n",
                       pm->pdata.options & PCRE_CASELESS ? "PCRE_CASELESS " : "",
                       pm->pdata.options & PCRE_DOTALL ? "PCRE_DOTALL " : "",
                       pm->pdata.options & PCRE_MULTILINE ? "PCRE_MULTILINE " : "",
                       pm->pdata.options & PCRE_EXTENDED ? "PCRE_EXTENDED " : "",
 
                       pm->pdata.options & PCRE_ANCHORED ? "PCRE_ANCHORED " : "",
                       pm->pdata.options & PCRE_DOLLAR_ENDONLY ? "PCRE_DOLLAR_ENDONLY " : "",
                       pm->pdata.options & PCRE_UNGREEDY ? "PCRE_UNGREEDY " : "");
5841080a
 #endif
7250b47e
         }
         else
             pm_dbgmsg("Compiler: NONE\n");
39597f25
     }
 
5c2c7233
     /* add metadata to the performance tracker */
     if (options & CL_DB_PCRE_STATS)
b8de9217
         pcre_perf_events_init(pm, virname);
5c2c7233
 
39597f25
     /* add pcre data to root after reallocation */
35a05ff8
     pcre_count = root->pcre_metas+1;
     newmetatable = (struct cli_pcre_meta **)mpool_realloc(root->mempool, root->pcre_metatable,
                                          pcre_count * sizeof(struct cli_pcre_meta *));
     if (!newmetatable) {
         cli_errmsg("cli_pcre_addpatt: Unable to allocate memory for new pcre meta table\n");
0118458f
         cli_pcre_freemeta(root, pm);
35a05ff8
         mpool_free(root->mempool, pm);
         return CL_EMEM;
     }
 
     newmetatable[pcre_count-1] = pm;
     root->pcre_metatable = newmetatable;
 
     root->pcre_metas = pcre_count;
 
     return CL_SUCCESS;
 }
5fa73369
 
e1ab4bec
 int cli_pcre_build(struct cli_matcher *root, long long unsigned match_limit, long long unsigned recmatch_limit, const struct cli_dconf *dconf)
9bc7c138
 {
35a05ff8
     unsigned int i;
     int ret;
     struct cli_pcre_meta *pm = NULL;
65dd1a3c
     int disable_all = 0;
 
5e572e2f
     if (dconf && !(dconf->pcre & PCRE_CONF_SUPPORT))
65dd1a3c
         disable_all = 1;
35a05ff8
 
     for (i = 0; i < root->pcre_metas; ++i) {
         pm = root->pcre_metatable[i];
a0190b17
         if (!pm) {
             cli_errmsg("cli_pcre_build: metadata for pcre %d is missing\n", i);
             return CL_ENULLARG;
         }
35a05ff8
 
82fa5ba0
         /* for safety, disable all pcre */
         if (disable_all) {
             pm->flags |= CLI_PCRE_DISABLED;
             continue;
         }
 
         if (pm->flags & CLI_PCRE_DISABLED) {
             cli_dbgmsg("cli_pcre_build: Skip compiling regex: %s (disabled)\n", pm->pdata.expression);
             continue;
         }
 
df50a5ff
         /* disable global */
65dd1a3c
         if (dconf && !(dconf->pcre & PCRE_CONF_GLOBAL)) {
df50a5ff
             cli_dbgmsg("cli_pcre_build: disabling global option for regex /%s/\n", pm->pdata.expression);
             pm->flags &= ~(CLI_PCRE_GLOBAL);
         }
effb4f51
 
         /* options override through metadata manipulation */
5841080a
 #if USING_PCRE2
         //pm->pdata.options |= PCRE2_NEVER_UTF; /* disables (?UTF*) potential security vuln */
         //pm->pdata.options |= PCRE2_UCP;
         //pm->pdata.options |= PCRE2_AUTO_CALLOUT; /* used with CALLOUT(-BACK) function */
 #else
         //pm->pdata.options |= PCRE_NEVER_UTF; /* implemented in 8.33, disables (?UTF*) potential security vuln */
effb4f51
         //pm->pdata.options |= PCRE_UCP;/* implemented in 8.20 */
         //pm->pdata.options |= PCRE_AUTO_CALLOUT; /* used with CALLOUT(-BACK) function */
5841080a
 #endif
effb4f51
 
65dd1a3c
         if (dconf && (dconf->pcre & PCRE_CONF_OPTIONS)) {
df50a5ff
             /* compile the regex, no options override *wink* */
7250b47e
             pm_dbgmsg("cli_pcre_build: Compiling regex: /%s/\n", pm->pdata.expression);
df50a5ff
             ret = cli_pcre_compile(&(pm->pdata), match_limit, recmatch_limit, 0, 0);
         }
         else {
22cb38ed
             /* compile the regex, options overridden and disabled */
7250b47e
             pm_dbgmsg("cli_pcre_build: Compiling regex: /%s/ (without options)\n", pm->pdata.expression);
df50a5ff
             ret = cli_pcre_compile(&(pm->pdata), match_limit, recmatch_limit, 0, 1);
         }
         if (ret != CL_SUCCESS) {
82fa5ba0
             cli_errmsg("cli_pcre_build: failed to build pcre regex\n");
7250b47e
             pm->flags |= CLI_PCRE_DISABLED; /* disable the pcre, currently will terminate execution */
9bc7c138
             return ret;
         }
     }
 
     return CL_SUCCESS;
 }
 
8c85efcd
 /* TODO - handle VI and Macro offset types */
82fa5ba0
 int cli_pcre_recaloff(struct cli_matcher *root, struct cli_pcre_off *data, struct cli_target_info *info, cli_ctx *ctx)
7ab4eec7
 {
6bf32a73
     /* TANGENT: maintain relative offset data in cli_ac_data? */
7ab4eec7
     int ret;
     unsigned int i;
     struct cli_pcre_meta *pm;
     uint32_t endoff;
 
601b2ef8
     if (!data) {
         return CL_ENULLARG;
     }
82fa5ba0
 
65dd1a3c
     if (!root || !root->pcre_metatable || !info || (ctx && ctx->dconf && !(ctx->dconf->pcre & PCRE_CONF_SUPPORT))) {
601b2ef8
         data->shift = NULL;
         data->offset = NULL;
7ab4eec7
         return CL_SUCCESS;
     }
 
     /* allocate data structures */
     data->shift = (uint32_t *) cli_calloc(root->pcre_metas, sizeof(uint32_t));
     if (!data->shift) {
         cli_errmsg("cli_pcre_initoff: cannot allocate memory for data->shift\n");
         return CL_EMEM;
     }
     data->offset = (uint32_t *) cli_calloc(root->pcre_metas, sizeof(uint32_t));
     if (!data->offset) {
         cli_errmsg("cli_pcre_initoff: cannot allocate memory for data->offset\n");
         free(data->shift);
         return CL_EMEM;
     }
 
ac0e3359
     pm_dbgmsg("CLI_OFF_NONE: %u\n", CLI_OFF_NONE);
     pm_dbgmsg("CLI_OFF_ANY: %u\n", CLI_OFF_ANY);
 
7ab4eec7
     /* iterate across all pcre metadata and recalc offsets */
     for (i = 0; i < root->pcre_metas; ++i) {
         pm = root->pcre_metatable[i];
 
7250b47e
         /* skip broken pcres, not getting executed anyways */
82fa5ba0
         if (pm->flags & CLI_PCRE_DISABLED) {
             data->offset[i] = CLI_OFF_NONE;
ac0e3359
             data->shift[i] = 0;
82fa5ba0
             continue;
         }
 
7ab4eec7
         if (pm->offdata[0] == CLI_OFF_ANY) {
18a1c962
             data->offset[i] = CLI_OFF_ANY;
7ab4eec7
             data->shift[i] = 0;
         }
ac0e3359
         else if (pm->offdata[0] == CLI_OFF_NONE) {
             data->offset[i] = CLI_OFF_NONE;
             data->shift[i] = 0;
7ab4eec7
         }
ac0e3359
         else if (pm->offdata[0] == CLI_OFF_ABSOLUTE) {
7ab4eec7
             data->offset[i] = pm->offdata[1];
             data->shift[i] = pm->offdata[2];
         }
         else {
             ret = cli_caloff(NULL, info, root->type, pm->offdata, &data->offset[i], &endoff);
             if (ret != CL_SUCCESS) {
0d370098
                 cli_errmsg("cli_pcre_recaloff: cannot recalculate relative offset for signature\n");
7ab4eec7
                 free(data->shift);
                 free(data->offset);
                 return ret;
             }
ac0e3359
             /* CLI_OFF_NONE gets passed down, CLI_OFF_ANY gets reinterpreted */
             /* TODO - CLI_OFF_VERSION is interpreted as CLI_OFF_ANY(?) */
             if (data->offset[i] == CLI_OFF_ANY) {
18a1c962
                 data->offset[i] = CLI_OFF_ANY;
ac0e3359
                 data->shift[i] = 0;
18a1c962
             }
             else {
ac0e3359
                 data->shift[i] = endoff-(data->offset[i]);
             }
7ab4eec7
         }
ac0e3359
 
         pm_dbgmsg("%u: %u %u->%u(+%u)\n", i, pm->offdata[0], data->offset[i],
                   data->offset[i]+data->shift[i], data->shift[i]);
7ab4eec7
     }
 
     return CL_SUCCESS;
 }
 
 void cli_pcre_freeoff(struct cli_pcre_off *data)
 {
601b2ef8
     if (data) {
         free(data->offset);
         data->offset = NULL;
         free(data->shift);
         data->shift = NULL;
     }
7ab4eec7
 }
 
ac0e3359
 int cli_pcre_qoff(struct cli_pcre_meta *pm, uint32_t length, uint32_t *adjbuffer, uint32_t *adjshift)
 {
     if (!pm)
         return CL_ENULLARG;
 
     /* default to scanning whole buffer but try to use existing offdata */
     if (pm->offdata[0] == CLI_OFF_NONE) {
         return CL_BREAK;
     }
18a1c962
     else if (pm->offdata[0] == CLI_OFF_ANY) {
         *adjbuffer = CLI_OFF_ANY;
         *adjshift = 0;
     }
ac0e3359
     else if (pm->offdata[0] == CLI_OFF_ABSOLUTE) {
         *adjbuffer = pm->offdata[1];
         *adjshift = pm->offdata[2];
     }
     else if (pm->offdata[0] == CLI_OFF_EOF_MINUS) {
         *adjbuffer = length - pm->offdata[1];
         *adjshift = pm->offdata[2];
     }
     else {
8c85efcd
         /* all relative offsets */
         /* TODO - check if relative offsets apply for normal hex substrs */
ac0e3359
         *adjbuffer = 0;
         *adjshift = 0;
     }
 
     return CL_SUCCESS;
 }
 
2d785c96
 int cli_pcre_scanbuf(const unsigned char *buffer, uint32_t length, const char **virname, struct cli_ac_result **res, const struct cli_matcher *root, struct cli_ac_data *mdata, const struct cli_pcre_off *data, cli_ctx *ctx)
5fa73369
 {
35a05ff8
     struct cli_pcre_meta **metatable = root->pcre_metatable, *pm = NULL;
     struct cli_pcre_data *pd;
5841080a
     struct cli_pcre_results p_res;
0d370098
     struct cli_ac_result *newres;
7ab4eec7
     uint32_t adjbuffer, adjshift, adjlength;
c9c36355
     unsigned int i, evalcnt = 0;
     uint64_t maxfilesize, evalids = 0;
18a1c962
     uint32_t global, encompass, rolling;
c9a070c9
     int rc = 0, offset = 0, ret = CL_SUCCESS, options=0;
2d785c96
     uint8_t viruses_found = 0;
f40a96fb
 
5841080a
     if ((root->pcre_metas == 0) || (!root->pcre_metatable) || (ctx && ctx->dconf && !(ctx->dconf->pcre & PCRE_CONF_SUPPORT)))
7ab4eec7
         return CL_SUCCESS;
 
5841080a
     memset(&p_res, 0, sizeof(p_res));
37415732
 
35a05ff8
     for (i = 0; i < root->pcre_metas; ++i) {
18ff5029
 
35a05ff8
         pm = root->pcre_metatable[i];
3ee96ac2
         pd = &(pm->pdata);
f40a96fb
 
82fa5ba0
         /* skip checking and running disabled pcres */
         if (pm->flags & CLI_PCRE_DISABLED) {
             cli_dbgmsg("cli_pcre_scanbuf: skipping disabled regex /%s/\n", pd->expression);
             continue;
         }
 
ac0e3359
         /* skip checking and running CLI_OFF_NONE pcres */
         if (data && data->offset[i] == CLI_OFF_NONE) {
             pm_dbgmsg("cli_pcre_scanbuf: skipping CLI_OFF_NONE regex /%s/\n", pd->expression);
             continue;
         }
 
7250b47e
         /* evaluate trigger */
0d370098
         if (pm->lsigid[0]) {
ad6b08d2
             pm_dbgmsg("cli_pcre_scanbuf: checking %s; running regex /%s/\n", pm->trigger, pd->expression);
6b7c153a
 #ifdef PCRE_BYPASS
             if (strcmp(pm->trigger, PCRE_BYPASS))
 #endif
                 if (cli_ac_chklsig(pm->trigger, pm->trigger + strlen(pm->trigger), mdata->lsigcnt[pm->lsigid[1]], &evalcnt, &evalids, 0) != 1)
                     continue;
0d370098
         }
         else {
22cb38ed
             cli_dbgmsg("cli_pcre_scanbuf: skipping %s check due to uninitialized lsigid\n", pm->trigger);
7250b47e
             /* fall-through to unconditional execution - sigtool-only */
0d370098
         }
35a05ff8
 
18a1c962
         global = (pm->flags & CLI_PCRE_GLOBAL);       /* globally search for all matches (within bounds) */
7ab4eec7
         encompass = (pm->flags & CLI_PCRE_ENCOMPASS); /* encompass search to offset->offset+maxshift */
18a1c962
         rolling = (pm->flags & CLI_PCRE_ROLLING);     /* rolling search (unanchored) */
7ab4eec7
         offset = pd->search_offset;                   /* this is usually 0 */
f40a96fb
 
ad6b08d2
         pm_dbgmsg("cli_pcre_scanbuf: triggered %s; running regex /%s/%s%s\n", pm->trigger, pd->expression, 
18a1c962
                    global ? " (global)":"", rolling ? " (rolling)":"");
f40a96fb
 
7ab4eec7
         /* adjust the buffer sent to cli_pcre_match for offset and maxshift */
         if (!data) {
ac0e3359
             if (cli_pcre_qoff(pm, length, &adjbuffer, &adjshift) != CL_SUCCESS)
                 continue;
7ab4eec7
         }
         else {
             adjbuffer = data->offset[i];
             adjshift = data->shift[i];
         }
 
18a1c962
         /* check for need to anchoring */
         if (!rolling && !adjshift && (adjbuffer != CLI_OFF_ANY))
5841080a
 #if USING_PCRE2
             options |= PCRE2_ANCHORED;
 #else
18a1c962
             options |= PCRE_ANCHORED;
5841080a
 #endif
18a1c962
         else
             options = 0;
 
         if (adjbuffer == CLI_OFF_ANY)
             adjbuffer = 0;
 
7ab4eec7
         /* check the offset bounds */
         if (adjbuffer < length) {
             /* handle encompass flag */
             if (encompass && adjshift != 0 && adjshift != CLI_OFF_NONE) {
                     if (adjbuffer+adjshift > length)
                         adjlength = length - adjbuffer;
                     else
                         adjlength = adjshift;
             }
             else {
ac0e3359
                 /* NOTE - if using non-encompass method 2, alter shift universally */
8c85efcd
                 /* TODO - limitations on non-encompassed buffers? */
7ab4eec7
                 adjlength = length - adjbuffer;
             }
         }
         else {
18a1c962
             /* starting offset is outside bounds of file, skip pcre execution silently */
             pm_dbgmsg("cli_pcre_scanbuf: starting offset is outside bounds of file %u >= %u\n", adjbuffer, length);
7ab4eec7
             continue;
         }
 
7250b47e
         pm_dbgmsg("cli_pcre_scanbuf: passed buffer adjusted to %u +%u(%u)[%u]%s\n", adjbuffer, adjlength, adjbuffer+adjlength, adjshift, encompass ? " (encompass)":"");
7ab4eec7
 
ea3d1e72
         /* if the global flag is set, loop through the scanning */
efda6201
         do {
0359cc57
             if (cli_checktimelimit(ctx) != CL_SUCCESS) {
                 cli_dbgmsg("cli_unzip: Time limit reached (max: %u)\n", ctx->engine->maxscantime);
                 ret = CL_ETIMEOUT;
                 break;
             }
 
5841080a
             /* reset the match results */
             if ((ret = cli_pcre_results_reset(&p_res, pd)) != CL_SUCCESS)
                 break;
 
5c2c7233
             /* performance metrics */
             cli_event_time_start(p_sigevents, pm->sigtime_id);
5841080a
             rc = cli_pcre_match(pd, buffer+adjbuffer, adjlength, offset, options, &p_res);
5c2c7233
             cli_event_time_stop(p_sigevents, pm->sigtime_id);
0d94646e
             /* if debug, generate a match report */
             if (cli_debug_flag)
5841080a
                 cli_pcre_report(pd, buffer+adjbuffer, adjlength, rc, &p_res);
538e71a1
 
efda6201
             /* matched, rc shouldn't be >0 unless a full match occurs */
             if (rc > 0) {
5841080a
                 cli_dbgmsg("cli_pcre_scanbuf: located regex match @ %d\n", adjbuffer+p_res.match[0]);
2d785c96
 
7ab4eec7
                 /* check if we've gone over offset+shift */
                 if (!encompass && adjshift) {
5841080a
                     if (p_res.match[0] > adjshift) {
7ab4eec7
                         /* ignore matched offset (outside of maxshift) */
5841080a
                         cli_dbgmsg("cli_pcre_scanbuf: match found outside of maxshift @%u\n", adjbuffer+p_res.match[0]);
7ab4eec7
                         break;
                     }
                 }
 
5c2c7233
                 /* track the detection count */
                 cli_event_count(p_sigevents, pm->sigmatch_id);
 
0d370098
                 /* for logical signature evaluation */
18ff5029
 
0d370098
                 if (pm->lsigid[0]) {
7250b47e
                     pm_dbgmsg("cli_pcre_scanbuf: assigning lsigcnt[%d][%d], located @ %d\n",
5841080a
                               pm->lsigid[1], pm->lsigid[2], adjbuffer+p_res.match[0]);
efda6201
 
5841080a
                     ret = lsig_sub_matched(root, mdata, pm->lsigid[1], pm->lsigid[2], adjbuffer+p_res.match[0], 0);
18ff5029
                     if (ret != CL_SUCCESS) {
                             break;
                     }
2d785c96
                 } else {
                     /* for raw match data - sigtool only */
                     if(res) {
                         newres = (struct cli_ac_result *)cli_calloc(1, sizeof(struct cli_ac_result));
                         if(!newres) {
6aa36f30
                             cli_errmsg("cli_pcre_scanbuff: Can't allocate memory for new result\n");
5841080a
                             ret = CL_EMEM;
                             break;
2d785c96
                         }
                         newres->virname = pm->virname;
                         newres->customdata = NULL; /* get value? */
                         newres->next = *res;
5841080a
                         newres->offset = adjbuffer+p_res.match[0];
2d785c96
                         *res = newres;
                     } else {
cbf5017a
                         ret = CL_CLEAN;
                         viruses_found = 1;
                         if (ctx)
                             ret = cli_append_virus(ctx, (const char *)pm->virname);
2d785c96
                         if (virname)
                             *virname = pm->virname;
d7979d4f
                         if (!ctx || !SCAN_ALLMATCHES)
cbf5017a
                             if (ret != CL_CLEAN)
                                 break;
0d370098
                     }
                 }
efda6201
             }
 
7ab4eec7
             /* move off to the end of the match for next match; offset is relative to adjbuffer
7250b47e
              * NOTE: misses matches starting within the last match; TODO: start from start of last match? */
5841080a
             offset = p_res.match[1];
9e408e76
 
7ab4eec7
         } while (global && rc > 0 && offset < adjlength);
efda6201
 
5841080a
         /* handle error code */
18ff5029
         if (rc < 0 && p_res.err != CL_SUCCESS) {
5841080a
             ret = p_res.err;
18ff5029
         }
5841080a
 
         /* jumps out of main loop from 'global' loop */
18ff5029
         if (ret != CL_SUCCESS) {
5841080a
             break;
18ff5029
         }
f40a96fb
     }
 
5841080a
     /* free match results */
     cli_pcre_results_free(&p_res);
 
     if (ret == CL_SUCCESS && viruses_found)
2d785c96
         return CL_VIRUS;
5841080a
     return ret;
5fa73369
 }
 
0118458f
 void cli_pcre_freemeta(struct cli_matcher *root, struct cli_pcre_meta *pm)
35a05ff8
 {
     if (!pm)
         return;
 
86eddf11
     if (pm->trigger) {
0118458f
         mpool_free(root->mempool, pm->trigger);
86eddf11
         pm->trigger = NULL;
     }
 
     if (pm->virname) {
0118458f
         mpool_free(root->mempool, pm->virname);
86eddf11
         pm->virname = NULL;
     }
35a05ff8
 
b8de9217
     if (pm->statname) {
61f49be0
         free(pm->statname);
b8de9217
         pm->statname = NULL;
     }
 
35a05ff8
     cli_pcre_free_single(&(pm->pdata));
 }
 
 void cli_pcre_freetable(struct cli_matcher *root)
 {
     uint32_t i;
     struct cli_pcre_meta *pm = NULL;
 
     for (i = 0; i < root->pcre_metas; ++i) {
7250b47e
         /* free pcre meta */
35a05ff8
         pm = root->pcre_metatable[i];
0118458f
         cli_pcre_freemeta(root, pm);
35a05ff8
         mpool_free(root->mempool, pm);
     }
5fa73369
 
35a05ff8
     /* free holding structures and set count to zero */
     mpool_free(root->mempool, root->pcre_metatable);
     root->pcre_metatable = NULL;
     root->pcre_metas = 0;
5fa73369
 }
5c2c7233
 
62c00993
 #else
 /* NO-PCRE FUNCTIONS */
a6d2b523
 void cli_pcre_perf_print()
 {
     cli_errmsg("cli_pcre_perf_print: Cannot print PCRE performance results without PCRE support\n");
     return;
 }
 
 void cli_pcre_perf_events_destroy()
 {
     cli_errmsg("cli_pcre_perf_events_destroy: Cannot destroy PCRE performance results without PCRE support\n");
     return;
 }
 
 int cli_pcre_init()
 {
     cli_errmsg("cli_pcre_init: Cannot initialize PCRE without PCRE support\n");
     return CL_SUCCESS;
 }
 
 int cli_pcre_build(struct cli_matcher *root, long long unsigned match_limit, long long unsigned recmatch_limit, const struct cli_dconf *dconf)
 {
     UNUSEDPARAM(root);
     UNUSEDPARAM(match_limit);
     UNUSEDPARAM(recmatch_limit);
     UNUSEDPARAM(dconf);
 
     cli_errmsg("cli_pcre_build: Cannot build PCRE expression without PCRE support\n");
     return CL_SUCCESS;
 }
 
 int cli_pcre_scanbuf(const unsigned char *buffer, uint32_t length, const char **virname, struct cli_ac_result **res, const struct cli_matcher *root, struct cli_ac_data *mdata, const struct cli_pcre_off *data, cli_ctx *ctx)
 {
     UNUSEDPARAM(buffer);
     UNUSEDPARAM(length);
     UNUSEDPARAM(virname);
     UNUSEDPARAM(res);
     UNUSEDPARAM(root);
     UNUSEDPARAM(mdata);
     UNUSEDPARAM(data);
     UNUSEDPARAM(ctx);
 
     cli_errmsg("cli_pcre_scanbuf: Cannot scan buffer with PCRE expression without PCRE support\n");
     return CL_SUCCESS;
 }
 
62c00993
 int cli_pcre_recaloff(struct cli_matcher *root, struct cli_pcre_off *data, struct cli_target_info *info, cli_ctx *ctx)
 {
     UNUSEDPARAM(root);
     UNUSEDPARAM(info);
     UNUSEDPARAM(ctx);
     if (data) {
         data->offset = NULL;
         data->shift = NULL;
     }
     return CL_SUCCESS;
 }
 
 void cli_pcre_freeoff(struct cli_pcre_off *data)
 {
     UNUSEDPARAM(data);
     return;
 }
 
7afaa9bd
 #endif /* HAVE_PCRE */