/* * Support for PCRE regex variant * * Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * Copyright (C) 2007-2013 Sourcefire, Inc. * * Authors: Kevin Lin * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #if HAVE_CONFIG_H #include "clamav-config.h" #endif #if HAVE_PCRE #if USING_PCRE2 #define PCRE2_CODE_UNIT_WIDTH 8 #include #else #include #endif #include "clamav.h" #include "others.h" #include "regex_pcre.h" #if USING_PCRE2 /* NOTE: pcre2 could use mpool through ext */ void *cli_pcre_malloc(size_t size, void *ext) { UNUSEDPARAM(ext); return cli_malloc(size); } void cli_pcre_free(void *ptr, void *ext) { UNUSEDPARAM(ext); free(ptr); } #endif /* cli_pcre_init_internal: redefine pcre_malloc and pcre_free; pcre2 does this during compile */ int cli_pcre_init_internal() { #if !USING_PCRE2 pcre_malloc = cli_malloc; pcre_free = free; pcre_stack_malloc = cli_malloc; pcre_stack_free = free; #endif return CL_SUCCESS; } int cli_pcre_addoptions(struct cli_pcre_data *pd, const char **opt, int errout) { if (!pd || !opt || !(*opt)) return CL_ENULLARG; while (**opt != '\0') { switch(**opt) { #if USING_PCRE2 case 'i': pd->options |= PCRE2_CASELESS; break; case 's': pd->options |= PCRE2_DOTALL; break; case 'm': pd->options |= PCRE2_MULTILINE; break; case 'x': pd->options |= PCRE2_EXTENDED; break; /* these are pcre2 specific... don't work with perl */ case 'A': pd->options |= PCRE2_ANCHORED; break; case 'E': pd->options |= PCRE2_DOLLAR_ENDONLY; break; case 'U': pd->options |= PCRE2_UNGREEDY; break; #else case 'i': pd->options |= PCRE_CASELESS; break; case 's': pd->options |= PCRE_DOTALL; break; case 'm': pd->options |= PCRE_MULTILINE; break; case 'x': pd->options |= PCRE_EXTENDED; break; /* these are pcre specific... don't work with perl */ case 'A': pd->options |= PCRE_ANCHORED; break; case 'E': pd->options |= PCRE_DOLLAR_ENDONLY; break; case 'U': pd->options |= PCRE_UNGREEDY; break; #endif default: if (errout) { cli_errmsg("cli_pcre_addoptions: unknown/extra pcre option encountered %c\n", **opt); return CL_EMALFDB; } else return CL_EPARSE; /* passed to caller to handle */ } (*opt)++; } return CL_SUCCESS; } #if USING_PCRE2 int cli_pcre_compile(struct cli_pcre_data *pd, long long unsigned match_limit, long long unsigned match_limit_recursion, unsigned int options, int opt_override) { int errornum; PCRE2_SIZE erroffset; pcre2_general_context *gctx; pcre2_compile_context *cctx; if (!pd || !pd->expression) { cli_errmsg("cli_pcre_compile: NULL pd or NULL pd->expression\n"); return CL_ENULLARG; } gctx = pcre2_general_context_create(cli_pcre_malloc, cli_pcre_free, NULL); if (!gctx) { cli_errmsg("cli_pcre_compile: Unable to allocate memory for general context\n"); return CL_EMEM; } cctx = pcre2_compile_context_create(gctx); if (!cctx) { cli_errmsg("cli_pcre_compile: Unable to allocate memory for compile context\n"); pcre2_general_context_free(gctx); return CL_EMEM; } /* compile the pcre2 regex last arg is charset, allow for options override */ if (opt_override) pd->re = pcre2_compile(pd->expression, PCRE2_ZERO_TERMINATED, options, &errornum, &erroffset, cctx); /* pd->re handled by pcre2 -> call pcre_free() -> calls free() */ else pd->re = pcre2_compile(pd->expression, PCRE2_ZERO_TERMINATED, pd->options, &errornum, &erroffset, cctx); /* pd->re handled by pcre2 -> call pcre_free() -> calls free() */ if (pd->re == NULL) { PCRE2_UCHAR errmsg[256]; pcre2_get_error_message(errornum, errmsg, sizeof(errmsg)); cli_errmsg("cli_pcre_compile: PCRE2 compilation failed at offset %llu: %s\n", (long long unsigned)erroffset, errmsg); pcre2_compile_context_free(cctx); pcre2_general_context_free(gctx); return CL_EMALFDB; } /* setup matching context and set the match limits */ pd->mctx = pcre2_match_context_create(gctx); if (!pd->mctx) { cli_errmsg("cli_pcre_compile: Unable to allocate memory for match context\n"); pcre2_compile_context_free(cctx); pcre2_general_context_free(gctx); return CL_EMEM; } pcre2_set_match_limit(pd->mctx, match_limit); pcre2_set_recursion_limit(pd->mctx, match_limit_recursion); /* non-dynamic allocated fields set by caller */ pcre2_compile_context_free(cctx); pcre2_general_context_free(gctx); return CL_SUCCESS; } #else int cli_pcre_compile(struct cli_pcre_data *pd, long long unsigned match_limit, long long unsigned match_limit_recursion, unsigned int options, int opt_override) { const char *error; int erroffset; if (!pd || !pd->expression) { cli_errmsg("cli_pcre_compile: NULL pd or NULL pd->expression\n"); return CL_ENULLARG; } /* compile the pcre regex last arg is charset, allow for options override */ if (opt_override) pd->re = pcre_compile(pd->expression, options, &error, &erroffset, NULL); /* pd->re handled by pcre -> call pcre_free() -> calls free() */ else pd->re = pcre_compile(pd->expression, pd->options, &error, &erroffset, NULL); /* pd->re handled by pcre -> call pcre_free() -> calls free() */ if (pd->re == NULL) { cli_errmsg("cli_pcre_compile: PCRE compilation failed at offset %d: %s\n", erroffset, error); return CL_EMALFDB; } /* now study it... (section totally not from snort) */ pd->ex = pcre_study(pd->re, 0, &error); if (!(pd->ex)) { pd->ex = (pcre_extra *)cli_calloc(1, sizeof(*(pd->ex))); if (!(pd->ex)) { cli_errmsg("cli_pcre_compile: Unable to allocate memory for extra data\n"); return CL_EMEM; } } /* set the match limits */ if (pd->ex->flags & PCRE_EXTRA_MATCH_LIMIT) { pd->ex->match_limit = match_limit; } else { pd->ex->flags |= PCRE_EXTRA_MATCH_LIMIT; pd->ex->match_limit = match_limit; } /* set the recursion match limits */ #ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION if (pd->ex->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) { pd->ex->match_limit_recursion = match_limit_recursion; } else { pd->ex->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; pd->ex->match_limit_recursion = match_limit_recursion; } #endif /* PCRE_EXTRA_MATCH_LIMIT_RECURSION */ /* non-dynamic allocated fields set by caller */ return CL_SUCCESS; } #endif int cli_pcre_match(struct cli_pcre_data *pd, const unsigned char *buffer, uint32_t buflen, int override_offset, int options, struct cli_pcre_results *results) { int rc, startoffset; #if USING_PCRE2 pcre2_general_context *pc2ctx; PCRE2_SIZE *ovector; #endif /* set the startoffset, override if a value is specified */ startoffset = pd->search_offset; if (override_offset >= 0) startoffset = override_offset; /* execute the pcre and return */ #if USING_PCRE2 rc = pcre2_match(pd->re, buffer, buflen, startoffset, options, results->match_data, pd->mctx); if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) { switch (rc) { case PCRE2_ERROR_CALLOUT: break; case PCRE2_ERROR_NOMEMORY: cli_errmsg("cli_pcre_match: pcre_exec: out of memory\n"); results->err = CL_EMEM; break; case PCRE2_ERROR_MATCHLIMIT: cli_dbgmsg("cli_pcre_match: pcre_exec: match limit exceeded\n"); break; case PCRE2_ERROR_RECURSIONLIMIT: cli_dbgmsg("cli_pcre_match: pcre_exec: recursive limit exceeded\n"); break; default: cli_errmsg("cli_pcre_match: pcre_exec: returned error %d\n", rc); results->err = CL_BREAK; } } else if (rc > 0) { ovector = pcre2_get_ovector_pointer(results->match_data); results->match[0] = ovector[0]; results->match[1] = ovector[1]; } else { results->match[0] = results->match[1] = 0; } #else rc = pcre_exec(pd->re, pd->ex, (const char *)buffer, buflen, startoffset, options, results->ovector, OVECCOUNT); if (rc < 0 && rc != PCRE_ERROR_NOMATCH) { switch (rc) { case PCRE_ERROR_CALLOUT: break; case PCRE_ERROR_NOMEMORY: cli_errmsg("cli_pcre_match: pcre_exec: out of memory\n"); results->err = CL_EMEM; break; case PCRE_ERROR_MATCHLIMIT: cli_dbgmsg("cli_pcre_match: pcre_exec: match limit exceeded\n"); break; case PCRE_ERROR_RECURSIONLIMIT: cli_dbgmsg("cli_pcre_match: pcre_exec: recursive limit exceeded\n"); break; default: cli_errmsg("cli_pcre_match: pcre_exec: returned error %d\n", rc); results->err = CL_BREAK; } } else if (rc > 0) { results->match[0] = results->ovector[0]; results->match[1] = results->ovector[1]; } else { results->match[0] = results->match[1] = 0; } #endif return rc; } #define DISABLE_PCRE_REPORT 0 #define MATCH_MAXLEN 1028 /*because lolz*/ /* TODO: audit this function */ static void named_substr_print(const struct cli_pcre_data *pd, const unsigned char *buffer, int *ovector) { int i, j, length, namecount, trunc; unsigned char *tabptr; int name_entry_size; unsigned char *name_table; const char *start; char outstr[2*MATCH_MAXLEN+1]; /* determine if there are named substrings */ #if USING_PCRE2 (void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMECOUNT, &namecount); #else (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMECOUNT, &namecount); #endif if (namecount <= 0) { cli_dbgmsg("cli_pcre_report: no named substrings\n"); } else { cli_dbgmsg("cli_pcre_report: named substrings\n"); /* extract named substring translation table */ #if USING_PCRE2 (void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMETABLE, &name_table); (void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size); #else (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMETABLE, &name_table); (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size); #endif /* print named substring information */ tabptr = name_table; for (i = 0; i < namecount; i++) { int n = (tabptr[0] << 8) | tabptr[1]; start = (const char *)buffer + ovector[2*n]; length = ovector[2*n+1] - ovector[2*n]; trunc = 0; if (length > MATCH_MAXLEN) { trunc = 1; length = MATCH_MAXLEN; } for (j = 0; j < length; ++j) snprintf(outstr+(2*j), sizeof(outstr)-(2*j), "%02x", (unsigned int)*(start+j)); cli_dbgmsg("cli_pcre_report: (%d) %*s: %s%s\n", n, name_entry_size - 3, tabptr + 2, outstr, trunc ? " (trunc)":""); /* cli_dbgmsg("named_substr: (%d) %*s: %.*s%s\n", n, name_entry_size - 3, tabptr + 2, length, start, trunc ? " (trunc)":""); */ tabptr += name_entry_size; } } } /* TODO: audit this function */ void cli_pcre_report(const struct cli_pcre_data *pd, const unsigned char *buffer, uint32_t buflen, int rc, struct cli_pcre_results *results) { int i, j, length, trunc; const char *start; char outstr[2*MATCH_MAXLEN+1]; #if USING_PCRE2 PCRE2_SIZE *ovector; ovector = pcre2_get_ovector_pointer(results->match_data); #else int *ovector = results->ovector; #endif /* print out additional diagnostics if cli_debug_flag is set */ if (!DISABLE_PCRE_REPORT) { cli_dbgmsg("\n"); #if USING_PCRE2 cli_dbgmsg("cli_pcre_report: PCRE2 Execution Report:\n"); #else cli_dbgmsg("cli_pcre_report: PCRE Execution Report:\n"); #endif cli_dbgmsg("cli_pcre_report: running regex /%s/ returns %d\n", pd->expression, rc); if (rc > 0) { /* print out full-match and capture groups */ for (i = 0; i < rc; ++i) { start = (const char *)buffer + ovector[2*i]; length = ovector[2*i+1] - ovector[2*i]; if (ovector[2*i+1] > buflen) { cli_warnmsg("cli_pcre_report: reported match goes outside buffer\n"); continue; } trunc = 0; if (length > MATCH_MAXLEN) { trunc = 1; length = MATCH_MAXLEN; } for (j = 0; j < length; ++j) snprintf(outstr+(2*j), sizeof(outstr)-(2*j), "%02x", (unsigned int)*(start+j)); cli_dbgmsg("cli_pcre_report: %d: %s%s\n", i, outstr, trunc ? " (trunc)":""); //cli_dbgmsg("cli_pcre_report: %d: %.*s%s\n", i, length, start, trunc ? " (trunc)":""); } named_substr_print(pd, buffer, ovector); } #if USING_PCRE2 else if (rc == 0 || rc == PCRE2_ERROR_NOMATCH) { #else else if (rc == 0 || rc == PCRE_ERROR_NOMATCH) { #endif cli_dbgmsg("cli_pcre_report: no match found\n"); } else { cli_dbgmsg("cli_pcre_report: error occurred in pcre_match: %d\n", rc); /* error handled by caller */ } cli_dbgmsg("cli_pcre_report: PCRE Execution Report End\n"); cli_dbgmsg("\n"); } } int cli_pcre_results_reset(struct cli_pcre_results *results, const struct cli_pcre_data *pd) { results->err = CL_SUCCESS; results->match[0] = results->match[1] = 0; #if USING_PCRE2 if (results->match_data) pcre2_match_data_free(results->match_data); results->match_data = pcre2_match_data_create_from_pattern(pd->re, NULL); if (!results->match_data) return CL_EMEM; #else memset(results->ovector, 0, OVECCOUNT); #endif return CL_SUCCESS; } void cli_pcre_results_free(struct cli_pcre_results *results) { #if USING_PCRE2 if (results->match_data) pcre2_match_data_free(results->match_data); #endif } void cli_pcre_free_single(struct cli_pcre_data *pd) { #if USING_PCRE2 if (pd->re) { pcre2_code_free(pd->re); pd->re = NULL; } if (pd->mctx) { pcre2_match_context_free(pd->mctx); pd->mctx = NULL; } #else if (pd->re) { pcre_free(pd->re); pd->re = NULL; } if (pd->ex) { free(pd->ex); pd->ex = NULL; } #endif if (pd->expression) { free(pd->expression); pd->expression = NULL; } } #endif /* HAVE_PCRE */