5fa73369 |
/*
* Support for PCRE regex variant
* |
c442ca9c |
* Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved. |
5fa73369 |
* Copyright (C) 2007-2013 Sourcefire, Inc.
*
* Authors: Kevin Lin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
|
9bc7c138 |
#if HAVE_PCRE |
5841080a |
#if USING_PCRE2
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>
#else |
5fa73369 |
#include <pcre.h> |
5841080a |
#endif |
5fa73369 |
#include "clamav.h"
#include "others.h"
#include "regex_pcre.h"
|
5841080a |
#if USING_PCRE2
/* NOTE: pcre2 could use mpool through ext */
void *cli_pcre_malloc(size_t size, void *ext)
{
UNUSEDPARAM(ext);
return cli_malloc(size);
}
void cli_pcre_free(void *ptr, void *ext)
{
UNUSEDPARAM(ext);
free(ptr);
}
#endif
/* cli_pcre_init_internal: redefine pcre_malloc and pcre_free; pcre2 does this during compile */ |
a6d2b523 |
int cli_pcre_init_internal() |
effb4f51 |
{ |
5841080a |
#if !USING_PCRE2 |
effb4f51 |
pcre_malloc = cli_malloc;
pcre_free = free;
pcre_stack_malloc = cli_malloc;
pcre_stack_free = free; |
5841080a |
#endif |
effb4f51 |
return CL_SUCCESS;
}
|
39597f25 |
int cli_pcre_addoptions(struct cli_pcre_data *pd, const char **opt, int errout)
{
if (!pd || !opt || !(*opt))
return CL_ENULLARG;
while (**opt != '\0') {
switch(**opt) { |
5841080a |
#if USING_PCRE2
case 'i': pd->options |= PCRE2_CASELESS; break;
case 's': pd->options |= PCRE2_DOTALL; break;
case 'm': pd->options |= PCRE2_MULTILINE; break;
case 'x': pd->options |= PCRE2_EXTENDED; break;
/* these are pcre2 specific... don't work with perl */
case 'A': pd->options |= PCRE2_ANCHORED; break;
case 'E': pd->options |= PCRE2_DOLLAR_ENDONLY; break;
case 'U': pd->options |= PCRE2_UNGREEDY; break;
#else |
39597f25 |
case 'i': pd->options |= PCRE_CASELESS; break;
case 's': pd->options |= PCRE_DOTALL; break;
case 'm': pd->options |= PCRE_MULTILINE; break;
case 'x': pd->options |= PCRE_EXTENDED; break;
/* these are pcre specific... don't work with perl */
case 'A': pd->options |= PCRE_ANCHORED; break;
case 'E': pd->options |= PCRE_DOLLAR_ENDONLY; break; |
14878cb1 |
case 'U': pd->options |= PCRE_UNGREEDY; break; |
5841080a |
#endif |
39597f25 |
default:
if (errout) {
cli_errmsg("cli_pcre_addoptions: unknown/extra pcre option encountered %c\n", **opt);
return CL_EMALFDB;
}
else
return CL_EPARSE; /* passed to caller to handle */
}
(*opt)++;
}
return CL_SUCCESS;
}
|
5841080a |
#if USING_PCRE2
int cli_pcre_compile(struct cli_pcre_data *pd, long long unsigned match_limit, long long unsigned match_limit_recursion, unsigned int options, int opt_override)
{ |
eaf0a436 |
int errornum;
PCRE2_SIZE erroffset; |
5841080a |
pcre2_general_context *gctx;
pcre2_compile_context *cctx;
if (!pd || !pd->expression) {
cli_errmsg("cli_pcre_compile: NULL pd or NULL pd->expression\n");
return CL_ENULLARG;
}
gctx = pcre2_general_context_create(cli_pcre_malloc, cli_pcre_free, NULL);
if (!gctx) {
cli_errmsg("cli_pcre_compile: Unable to allocate memory for general context\n");
return CL_EMEM;
}
cctx = pcre2_compile_context_create(gctx);
if (!cctx) {
cli_errmsg("cli_pcre_compile: Unable to allocate memory for compile context\n");
pcre2_general_context_free(gctx);
return CL_EMEM;
}
/* compile the pcre2 regex last arg is charset, allow for options override */
if (opt_override)
pd->re = pcre2_compile(pd->expression, PCRE2_ZERO_TERMINATED, options, &errornum, &erroffset, cctx); /* pd->re handled by pcre2 -> call pcre_free() -> calls free() */
else
pd->re = pcre2_compile(pd->expression, PCRE2_ZERO_TERMINATED, pd->options, &errornum, &erroffset, cctx); /* pd->re handled by pcre2 -> call pcre_free() -> calls free() */
if (pd->re == NULL) {
PCRE2_UCHAR errmsg[256];
pcre2_get_error_message(errornum, errmsg, sizeof(errmsg)); |
eaf0a436 |
cli_errmsg("cli_pcre_compile: PCRE2 compilation failed at offset %llu: %s\n",
(long long unsigned)erroffset, errmsg); |
5841080a |
pcre2_compile_context_free(cctx);
pcre2_general_context_free(gctx);
return CL_EMALFDB;
}
/* setup matching context and set the match limits */
pd->mctx = pcre2_match_context_create(gctx);
if (!pd->mctx) {
cli_errmsg("cli_pcre_compile: Unable to allocate memory for match context\n");
pcre2_compile_context_free(cctx);
pcre2_general_context_free(gctx);
return CL_EMEM;
}
pcre2_set_match_limit(pd->mctx, match_limit);
pcre2_set_recursion_limit(pd->mctx, match_limit_recursion);
/* non-dynamic allocated fields set by caller */
pcre2_compile_context_free(cctx);
pcre2_general_context_free(gctx);
return CL_SUCCESS;
}
#else |
39597f25 |
int cli_pcre_compile(struct cli_pcre_data *pd, long long unsigned match_limit, long long unsigned match_limit_recursion, unsigned int options, int opt_override) |
9bc7c138 |
{
const char *error;
int erroffset;
if (!pd || !pd->expression) {
cli_errmsg("cli_pcre_compile: NULL pd or NULL pd->expression\n");
return CL_ENULLARG;
}
|
39597f25 |
/* compile the pcre regex last arg is charset, allow for options override */
if (opt_override) |
5841080a |
pd->re = pcre_compile(pd->expression, options, &error, &erroffset, NULL); /* pd->re handled by pcre -> call pcre_free() -> calls free() */ |
39597f25 |
else |
5841080a |
pd->re = pcre_compile(pd->expression, pd->options, &error, &erroffset, NULL); /* pd->re handled by pcre -> call pcre_free() -> calls free() */ |
5fa73369 |
if (pd->re == NULL) { |
5841080a |
cli_errmsg("cli_pcre_compile: PCRE compilation failed at offset %d: %s\n", erroffset, error); |
6bf32a73 |
return CL_EMALFDB; |
5fa73369 |
}
/* now study it... (section totally not from snort) */
pd->ex = pcre_study(pd->re, 0, &error);
if (!(pd->ex)) {
pd->ex = (pcre_extra *)cli_calloc(1, sizeof(*(pd->ex)));
if (!(pd->ex)) { |
5841080a |
cli_errmsg("cli_pcre_compile: Unable to allocate memory for extra data\n"); |
5fa73369 |
return CL_EMEM;
}
}
|
39597f25 |
/* set the match limits */ |
5fa73369 |
if (pd->ex->flags & PCRE_EXTRA_MATCH_LIMIT) { |
9bc7c138 |
pd->ex->match_limit = match_limit; |
5fa73369 |
}
else {
pd->ex->flags |= PCRE_EXTRA_MATCH_LIMIT; |
9bc7c138 |
pd->ex->match_limit = match_limit; |
5fa73369 |
}
|
39597f25 |
/* set the recursion match limits */ |
5fa73369 |
#ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
if (pd->ex->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) { |
9bc7c138 |
pd->ex->match_limit_recursion = match_limit_recursion; |
5fa73369 |
}
else {
pd->ex->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; |
9bc7c138 |
pd->ex->match_limit_recursion = match_limit_recursion; |
5fa73369 |
}
#endif /* PCRE_EXTRA_MATCH_LIMIT_RECURSION */
/* non-dynamic allocated fields set by caller */
return CL_SUCCESS;
} |
5841080a |
#endif |
5fa73369 |
|
5841080a |
int cli_pcre_match(struct cli_pcre_data *pd, const unsigned char *buffer, uint32_t buflen, int override_offset, int options, struct cli_pcre_results *results) |
0d94646e |
{ |
5841080a |
int rc, startoffset;
#if USING_PCRE2
pcre2_general_context *pc2ctx;
PCRE2_SIZE *ovector;
#endif |
0d94646e |
/* set the startoffset, override if a value is specified */
startoffset = pd->search_offset;
if (override_offset >= 0)
startoffset = override_offset;
/* execute the pcre and return */ |
5841080a |
#if USING_PCRE2
rc = pcre2_match(pd->re, buffer, buflen, startoffset, options, results->match_data, pd->mctx);
if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) {
switch (rc) {
case PCRE2_ERROR_CALLOUT:
break;
case PCRE2_ERROR_NOMEMORY:
cli_errmsg("cli_pcre_match: pcre_exec: out of memory\n");
results->err = CL_EMEM;
break;
case PCRE2_ERROR_MATCHLIMIT:
cli_dbgmsg("cli_pcre_match: pcre_exec: match limit exceeded\n");
break;
case PCRE2_ERROR_RECURSIONLIMIT:
cli_dbgmsg("cli_pcre_match: pcre_exec: recursive limit exceeded\n");
break;
default:
cli_errmsg("cli_pcre_match: pcre_exec: returned error %d\n", rc);
results->err = CL_BREAK;
}
} else if (rc > 0) {
ovector = pcre2_get_ovector_pointer(results->match_data);
results->match[0] = ovector[0];
results->match[1] = ovector[1];
} else {
results->match[0] = results->match[1] = 0;
}
#else |
45bafcdf |
rc = pcre_exec(pd->re, pd->ex, (const char *)buffer, buflen, startoffset, options, results->ovector, OVECCOUNT); |
5841080a |
if (rc < 0 && rc != PCRE_ERROR_NOMATCH) {
switch (rc) {
case PCRE_ERROR_CALLOUT:
break;
case PCRE_ERROR_NOMEMORY:
cli_errmsg("cli_pcre_match: pcre_exec: out of memory\n");
results->err = CL_EMEM;
break;
case PCRE_ERROR_MATCHLIMIT:
cli_dbgmsg("cli_pcre_match: pcre_exec: match limit exceeded\n");
break;
case PCRE_ERROR_RECURSIONLIMIT:
cli_dbgmsg("cli_pcre_match: pcre_exec: recursive limit exceeded\n");
break;
default:
cli_errmsg("cli_pcre_match: pcre_exec: returned error %d\n", rc);
results->err = CL_BREAK;
}
} else if (rc > 0) {
results->match[0] = results->ovector[0];
results->match[1] = results->ovector[1];
} else {
results->match[0] = results->match[1] = 0;
}
#endif
return rc; |
0d94646e |
}
|
fd6af5bf |
#define DISABLE_PCRE_REPORT 0
#define MATCH_MAXLEN 1028 /*because lolz*/
|
6bf32a73 |
/* TODO: audit this function */ |
5841080a |
static void named_substr_print(const struct cli_pcre_data *pd, const unsigned char *buffer, int *ovector) |
fd6af5bf |
{
int i, j, length, namecount, trunc;
unsigned char *tabptr;
int name_entry_size;
unsigned char *name_table;
const char *start;
char outstr[2*MATCH_MAXLEN+1];
/* determine if there are named substrings */ |
5841080a |
#if USING_PCRE2
(void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMECOUNT, &namecount);
#else |
fd6af5bf |
(void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMECOUNT, &namecount); |
5841080a |
#endif |
fd6af5bf |
if (namecount <= 0) { |
0d94646e |
cli_dbgmsg("cli_pcre_report: no named substrings\n"); |
fd6af5bf |
}
else { |
0d94646e |
cli_dbgmsg("cli_pcre_report: named substrings\n"); |
fd6af5bf |
/* extract named substring translation table */ |
5841080a |
#if USING_PCRE2
(void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMETABLE, &name_table);
(void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
#else |
fd6af5bf |
(void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMETABLE, &name_table);
(void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size); |
5841080a |
#endif |
fd6af5bf |
/* print named substring information */
tabptr = name_table;
for (i = 0; i < namecount; i++) {
int n = (tabptr[0] << 8) | tabptr[1];
|
45bafcdf |
start = (const char *)buffer + ovector[2*n]; |
fd6af5bf |
length = ovector[2*n+1] - ovector[2*n];
trunc = 0;
if (length > MATCH_MAXLEN) {
trunc = 1;
length = MATCH_MAXLEN;
}
for (j = 0; j < length; ++j)
snprintf(outstr+(2*j), sizeof(outstr)-(2*j), "%02x", (unsigned int)*(start+j));
|
0d94646e |
cli_dbgmsg("cli_pcre_report: (%d) %*s: %s%s\n", n, name_entry_size - 3, tabptr + 2, |
fd6af5bf |
outstr, trunc ? " (trunc)":""); |
0ab27124 |
/*
cli_dbgmsg("named_substr: (%d) %*s: %.*s%s\n", n, name_entry_size - 3, tabptr + 2,
length, start, trunc ? " (trunc)":"");
*/ |
fd6af5bf |
tabptr += name_entry_size;
}
}
}
/* TODO: audit this function */ |
5841080a |
void cli_pcre_report(const struct cli_pcre_data *pd, const unsigned char *buffer, uint32_t buflen, int rc, struct cli_pcre_results *results) |
5fa73369 |
{ |
0d94646e |
int i, j, length, trunc; |
fd6af5bf |
const char *start;
char outstr[2*MATCH_MAXLEN+1]; |
5841080a |
#if USING_PCRE2
PCRE2_SIZE *ovector;
ovector = pcre2_get_ovector_pointer(results->match_data);
#else
int *ovector = results->ovector;
#endif |
f40a96fb |
|
fd6af5bf |
/* print out additional diagnostics if cli_debug_flag is set */ |
0d94646e |
if (!DISABLE_PCRE_REPORT) { |
fd6af5bf |
cli_dbgmsg("\n"); |
5841080a |
#if USING_PCRE2
cli_dbgmsg("cli_pcre_report: PCRE2 Execution Report:\n");
#else |
0d94646e |
cli_dbgmsg("cli_pcre_report: PCRE Execution Report:\n"); |
5841080a |
#endif |
0d94646e |
cli_dbgmsg("cli_pcre_report: running regex /%s/ returns %d\n", pd->expression, rc); |
fd6af5bf |
if (rc > 0) {
/* print out full-match and capture groups */
for (i = 0; i < rc; ++i) { |
45bafcdf |
start = (const char *)buffer + ovector[2*i]; |
fd6af5bf |
length = ovector[2*i+1] - ovector[2*i];
|
0e4ef43b |
if (ovector[2*i+1] > buflen) { |
0d94646e |
cli_warnmsg("cli_pcre_report: reported match goes outside buffer\n");
continue;
}
|
fd6af5bf |
trunc = 0;
if (length > MATCH_MAXLEN) {
trunc = 1;
length = MATCH_MAXLEN;
}
for (j = 0; j < length; ++j)
snprintf(outstr+(2*j), sizeof(outstr)-(2*j), "%02x", (unsigned int)*(start+j));
|
0d94646e |
cli_dbgmsg("cli_pcre_report: %d: %s%s\n", i, outstr, trunc ? " (trunc)":"");
//cli_dbgmsg("cli_pcre_report: %d: %.*s%s\n", i, length, start, trunc ? " (trunc)":""); |
fd6af5bf |
}
|
5841080a |
named_substr_print(pd, buffer, ovector); |
fd6af5bf |
} |
5841080a |
#if USING_PCRE2
else if (rc == 0 || rc == PCRE2_ERROR_NOMATCH) {
#else |
fd6af5bf |
else if (rc == 0 || rc == PCRE_ERROR_NOMATCH) { |
5841080a |
#endif |
0d94646e |
cli_dbgmsg("cli_pcre_report: no match found\n"); |
fd6af5bf |
}
else { |
0d94646e |
cli_dbgmsg("cli_pcre_report: error occurred in pcre_match: %d\n", rc); |
fd6af5bf |
/* error handled by caller */
} |
0d94646e |
cli_dbgmsg("cli_pcre_report: PCRE Execution Report End\n"); |
fd6af5bf |
cli_dbgmsg("\n");
} |
5fa73369 |
}
|
5841080a |
int cli_pcre_results_reset(struct cli_pcre_results *results, const struct cli_pcre_data *pd)
{
results->err = CL_SUCCESS;
results->match[0] = results->match[1] = 0;
#if USING_PCRE2
if (results->match_data)
pcre2_match_data_free(results->match_data);
results->match_data = pcre2_match_data_create_from_pattern(pd->re, NULL);
if (!results->match_data)
return CL_EMEM;
#else
memset(results->ovector, 0, OVECCOUNT);
#endif
return CL_SUCCESS;
}
void cli_pcre_results_free(struct cli_pcre_results *results)
{
#if USING_PCRE2
if (results->match_data)
pcre2_match_data_free(results->match_data);
#endif
}
|
5fa73369 |
void cli_pcre_free_single(struct cli_pcre_data *pd)
{ |
5841080a |
#if USING_PCRE2
if (pd->re) {
pcre2_code_free(pd->re);
pd->re = NULL;
}
if (pd->mctx) {
pcre2_match_context_free(pd->mctx);
pd->mctx = NULL;
}
#else |
5fa73369 |
if (pd->re) {
pcre_free(pd->re); |
86eddf11 |
pd->re = NULL; |
5fa73369 |
}
if (pd->ex) {
free(pd->ex); |
86eddf11 |
pd->ex = NULL; |
5fa73369 |
} |
5841080a |
#endif |
5fa73369 |
if (pd->expression) {
free(pd->expression); |
86eddf11 |
pd->expression = NULL; |
5fa73369 |
}
} |
7afaa9bd |
#endif /* HAVE_PCRE */ |