libclamav/regex_pcre.c
5fa73369
 /*
  *  Support for PCRE regex variant
  *
e1cbc270
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
5fa73369
  *  Copyright (C) 2007-2013 Sourcefire, Inc.
  *
  *  Authors: Kevin Lin
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
  */
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
9bc7c138
 #if HAVE_PCRE
5841080a
 #if USING_PCRE2
 #define PCRE2_CODE_UNIT_WIDTH 8
 #include <pcre2.h>
 #else
5fa73369
 #include <pcre.h>
5841080a
 #endif
5fa73369
 
 #include "clamav.h"
 #include "others.h"
 #include "regex_pcre.h"
 
5841080a
 #if USING_PCRE2
 /* NOTE: pcre2 could use mpool through ext */
 void *cli_pcre_malloc(size_t size, void *ext)
 {
     UNUSEDPARAM(ext);
     return cli_malloc(size);
 }
 
 void cli_pcre_free(void *ptr, void *ext)
 {
     UNUSEDPARAM(ext);
     free(ptr);
 }
 #endif
 
 /* cli_pcre_init_internal: redefine pcre_malloc and pcre_free; pcre2 does this during compile */
102cd430
 cl_error_t cli_pcre_init_internal()
effb4f51
 {
5841080a
 #if !USING_PCRE2
288057e9
     pcre_malloc       = cli_malloc;
     pcre_free         = free;
effb4f51
     pcre_stack_malloc = cli_malloc;
288057e9
     pcre_stack_free   = free;
5841080a
 #endif
effb4f51
 
     return CL_SUCCESS;
 }
 
102cd430
 cl_error_t cli_pcre_addoptions(struct cli_pcre_data *pd, const char **opt, int errout)
39597f25
 {
     if (!pd || !opt || !(*opt))
         return CL_ENULLARG;
 
     while (**opt != '\0') {
288057e9
         switch (**opt) {
5841080a
 #if USING_PCRE2
288057e9
             case 'i':
                 pd->options |= PCRE2_CASELESS;
                 break;
             case 's':
                 pd->options |= PCRE2_DOTALL;
                 break;
             case 'm':
                 pd->options |= PCRE2_MULTILINE;
                 break;
             case 'x':
                 pd->options |= PCRE2_EXTENDED;
                 break;
 
                 /* these are pcre2 specific... don't work with perl */
             case 'A':
                 pd->options |= PCRE2_ANCHORED;
                 break;
             case 'E':
                 pd->options |= PCRE2_DOLLAR_ENDONLY;
                 break;
             case 'U':
                 pd->options |= PCRE2_UNGREEDY;
                 break;
5841080a
 #else
288057e9
             case 'i':
                 pd->options |= PCRE_CASELESS;
                 break;
             case 's':
                 pd->options |= PCRE_DOTALL;
                 break;
             case 'm':
                 pd->options |= PCRE_MULTILINE;
                 break;
             case 'x':
                 pd->options |= PCRE_EXTENDED;
                 break;
 
                 /* these are pcre specific... don't work with perl */
             case 'A':
                 pd->options |= PCRE_ANCHORED;
                 break;
             case 'E':
                 pd->options |= PCRE_DOLLAR_ENDONLY;
                 break;
             case 'U':
                 pd->options |= PCRE_UNGREEDY;
                 break;
5841080a
 #endif
288057e9
             default:
                 if (errout) {
                     cli_errmsg("cli_pcre_addoptions: unknown/extra pcre option encountered %c\n", **opt);
                     return CL_EMALFDB;
                 } else
                     return CL_EPARSE; /* passed to caller to handle */
39597f25
         }
         (*opt)++;
     }
 
     return CL_SUCCESS;
 }
 
5841080a
 #if USING_PCRE2
102cd430
 cl_error_t cli_pcre_compile(struct cli_pcre_data *pd, long long unsigned match_limit, long long unsigned match_limit_recursion, unsigned int options, int opt_override)
5841080a
 {
eaf0a436
     int errornum;
     PCRE2_SIZE erroffset;
5841080a
     pcre2_general_context *gctx;
     pcre2_compile_context *cctx;
 
     if (!pd || !pd->expression) {
         cli_errmsg("cli_pcre_compile: NULL pd or NULL pd->expression\n");
         return CL_ENULLARG;
     }
 
     gctx = pcre2_general_context_create(cli_pcre_malloc, cli_pcre_free, NULL);
     if (!gctx) {
         cli_errmsg("cli_pcre_compile: Unable to allocate memory for general context\n");
         return CL_EMEM;
     }
 
     cctx = pcre2_compile_context_create(gctx);
     if (!cctx) {
         cli_errmsg("cli_pcre_compile: Unable to allocate memory for compile context\n");
         pcre2_general_context_free(gctx);
         return CL_EMEM;
     }
 
     /* compile the pcre2 regex last arg is charset, allow for options override */
     if (opt_override)
f9ac6811
         pd->re = pcre2_compile((PCRE2_SPTR8)pd->expression, PCRE2_ZERO_TERMINATED, options, &errornum, &erroffset, cctx); /* pd->re handled by pcre2 -> call pcre_free() -> calls free() */
5841080a
     else
f9ac6811
         pd->re = pcre2_compile((PCRE2_SPTR8)pd->expression, PCRE2_ZERO_TERMINATED, pd->options, &errornum, &erroffset, cctx); /* pd->re handled by pcre2 -> call pcre_free() -> calls free() */
5841080a
     if (pd->re == NULL) {
         PCRE2_UCHAR errmsg[256];
         pcre2_get_error_message(errornum, errmsg, sizeof(errmsg));
eaf0a436
         cli_errmsg("cli_pcre_compile: PCRE2 compilation failed at offset %llu: %s\n",
                    (long long unsigned)erroffset, errmsg);
5841080a
         pcre2_compile_context_free(cctx);
         pcre2_general_context_free(gctx);
         return CL_EMALFDB;
     }
 
     /* setup matching context and set the match limits */
     pd->mctx = pcre2_match_context_create(gctx);
     if (!pd->mctx) {
         cli_errmsg("cli_pcre_compile: Unable to allocate memory for match context\n");
         pcre2_compile_context_free(cctx);
         pcre2_general_context_free(gctx);
         return CL_EMEM;
     }
 
     pcre2_set_match_limit(pd->mctx, match_limit);
     pcre2_set_recursion_limit(pd->mctx, match_limit_recursion);
 
     /* non-dynamic allocated fields set by caller */
     pcre2_compile_context_free(cctx);
     pcre2_general_context_free(gctx);
     return CL_SUCCESS;
 }
 #else
102cd430
 cl_error_t cli_pcre_compile(struct cli_pcre_data *pd, long long unsigned match_limit, long long unsigned match_limit_recursion, unsigned int options, int opt_override)
9bc7c138
 {
     const char *error;
     int erroffset;
 
     if (!pd || !pd->expression) {
         cli_errmsg("cli_pcre_compile: NULL pd or NULL pd->expression\n");
         return CL_ENULLARG;
     }
 
39597f25
     /* compile the pcre regex last arg is charset, allow for options override */
     if (opt_override)
5841080a
         pd->re = pcre_compile(pd->expression, options, &error, &erroffset, NULL); /* pd->re handled by pcre -> call pcre_free() -> calls free() */
39597f25
     else
5841080a
         pd->re = pcre_compile(pd->expression, pd->options, &error, &erroffset, NULL); /* pd->re handled by pcre -> call pcre_free() -> calls free() */
5fa73369
     if (pd->re == NULL) {
5841080a
         cli_errmsg("cli_pcre_compile: PCRE compilation failed at offset %d: %s\n", erroffset, error);
6bf32a73
         return CL_EMALFDB;
5fa73369
     }
 
     /* now study it... (section totally not from snort) */
     pd->ex = pcre_study(pd->re, 0, &error);
     if (!(pd->ex)) {
         pd->ex = (pcre_extra *)cli_calloc(1, sizeof(*(pd->ex)));
         if (!(pd->ex)) {
5841080a
             cli_errmsg("cli_pcre_compile: Unable to allocate memory for extra data\n");
5fa73369
             return CL_EMEM;
         }
     }
 
39597f25
     /* set the match limits */
5fa73369
     if (pd->ex->flags & PCRE_EXTRA_MATCH_LIMIT) {
9bc7c138
         pd->ex->match_limit = match_limit;
288057e9
     } else {
5fa73369
         pd->ex->flags |= PCRE_EXTRA_MATCH_LIMIT;
9bc7c138
         pd->ex->match_limit = match_limit;
5fa73369
     }
 
39597f25
     /* set the recursion match limits */
5fa73369
 #ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
     if (pd->ex->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) {
9bc7c138
         pd->ex->match_limit_recursion = match_limit_recursion;
288057e9
     } else {
5fa73369
         pd->ex->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
9bc7c138
         pd->ex->match_limit_recursion = match_limit_recursion;
5fa73369
     }
 #endif /* PCRE_EXTRA_MATCH_LIMIT_RECURSION */
 
     /* non-dynamic allocated fields set by caller */
     return CL_SUCCESS;
 }
5841080a
 #endif
5fa73369
 
f9ac6811
 int cli_pcre_match(struct cli_pcre_data *pd, const unsigned char *buffer, size_t buflen, size_t override_offset, int options, struct cli_pcre_results *results)
0d94646e
 {
f9ac6811
     int rc;
 
5841080a
 #if USING_PCRE2
     PCRE2_SIZE *ovector;
f9ac6811
     size_t startoffset;
 #else
     int startoffset;
5841080a
 #endif
0d94646e
 
     /* set the startoffset, override if a value is specified */
     startoffset = pd->search_offset;
f9ac6811
     if (override_offset != pd->search_offset)
0d94646e
         startoffset = override_offset;
 
288057e9
         /* execute the pcre and return */
5841080a
 #if USING_PCRE2
     rc = pcre2_match(pd->re, buffer, buflen, startoffset, options, results->match_data, pd->mctx);
     if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) {
         switch (rc) {
288057e9
             case PCRE2_ERROR_CALLOUT:
                 break;
             case PCRE2_ERROR_NOMEMORY:
                 cli_errmsg("cli_pcre_match: pcre_exec: out of memory\n");
                 results->err = CL_EMEM;
                 break;
             case PCRE2_ERROR_MATCHLIMIT:
                 cli_dbgmsg("cli_pcre_match: pcre_exec: match limit exceeded\n");
                 break;
             case PCRE2_ERROR_RECURSIONLIMIT:
                 cli_dbgmsg("cli_pcre_match: pcre_exec: recursive limit exceeded\n");
                 break;
             default:
                 cli_errmsg("cli_pcre_match: pcre_exec: returned error %d\n", rc);
                 results->err = CL_BREAK;
5841080a
         }
     } else if (rc > 0) {
         ovector = pcre2_get_ovector_pointer(results->match_data);
 
         results->match[0] = ovector[0];
         results->match[1] = ovector[1];
     } else {
         results->match[0] = results->match[1] = 0;
     }
 #else
f9ac6811
     rc = pcre_exec(pd->re, pd->ex, (const char *)buffer, (int)buflen, (int)startoffset, options, results->ovector, OVECCOUNT);
5841080a
     if (rc < 0 && rc != PCRE_ERROR_NOMATCH) {
         switch (rc) {
288057e9
             case PCRE_ERROR_CALLOUT:
                 break;
             case PCRE_ERROR_NOMEMORY:
                 cli_errmsg("cli_pcre_match: pcre_exec: out of memory\n");
                 results->err = CL_EMEM;
                 break;
             case PCRE_ERROR_MATCHLIMIT:
                 cli_dbgmsg("cli_pcre_match: pcre_exec: match limit exceeded\n");
                 break;
             case PCRE_ERROR_RECURSIONLIMIT:
                 cli_dbgmsg("cli_pcre_match: pcre_exec: recursive limit exceeded\n");
                 break;
             default:
                 cli_errmsg("cli_pcre_match: pcre_exec: returned error %d\n", rc);
                 results->err = CL_BREAK;
5841080a
         }
     } else if (rc > 0) {
         results->match[0] = results->ovector[0];
         results->match[1] = results->ovector[1];
     } else {
         results->match[0] = results->match[1] = 0;
     }
 #endif
     return rc;
0d94646e
 }
 
fd6af5bf
 #define DISABLE_PCRE_REPORT 0
 #define MATCH_MAXLEN 1028 /*because lolz*/
 
6bf32a73
 /* TODO: audit this function */
f9ac6811
 #if USING_PCRE2
 static void named_substr_print(const struct cli_pcre_data *pd, const unsigned char *buffer, PCRE2_SIZE *ovector)
 #else
5841080a
 static void named_substr_print(const struct cli_pcre_data *pd, const unsigned char *buffer, int *ovector)
f9ac6811
 #endif
fd6af5bf
 {
f9ac6811
     int i, namecount, trunc;
 
 #if USING_PCRE2
     PCRE2_SIZE length, j;
 #else
     int length, j;
 #endif
 
fd6af5bf
     unsigned char *tabptr;
     int name_entry_size;
     unsigned char *name_table;
     const char *start;
288057e9
     char outstr[2 * MATCH_MAXLEN + 1];
fd6af5bf
 
     /* determine if there are named substrings */
5841080a
 #if USING_PCRE2
     (void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMECOUNT, &namecount);
 #else
fd6af5bf
     (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMECOUNT, &namecount);
5841080a
 #endif
fd6af5bf
     if (namecount <= 0) {
0d94646e
         cli_dbgmsg("cli_pcre_report: no named substrings\n");
288057e9
     } else {
0d94646e
         cli_dbgmsg("cli_pcre_report: named substrings\n");
fd6af5bf
 
         /* extract named substring translation table */
5841080a
 #if USING_PCRE2
         (void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMETABLE, &name_table);
         (void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
 #else
fd6af5bf
         (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMETABLE, &name_table);
         (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
5841080a
 #endif
fd6af5bf
 
         /* print named substring information */
         tabptr = name_table;
         for (i = 0; i < namecount; i++) {
             int n = (tabptr[0] << 8) | tabptr[1];
 
288057e9
             start  = (const char *)buffer + ovector[2 * n];
             length = ovector[2 * n + 1] - ovector[2 * n];
fd6af5bf
 
             trunc = 0;
             if (length > MATCH_MAXLEN) {
288057e9
                 trunc  = 1;
fd6af5bf
                 length = MATCH_MAXLEN;
             }
 
             for (j = 0; j < length; ++j)
288057e9
                 snprintf(outstr + (2 * j), sizeof(outstr) - (2 * j), "%02x", (unsigned int)*(start + j));
fd6af5bf
 
0d94646e
             cli_dbgmsg("cli_pcre_report: (%d) %*s: %s%s\n", n, name_entry_size - 3, tabptr + 2,
288057e9
                        outstr, trunc ? " (trunc)" : "");
0ab27124
             /*
             cli_dbgmsg("named_substr:  (%d) %*s: %.*s%s\n", n, name_entry_size - 3, tabptr + 2,
                        length, start, trunc ? " (trunc)":"");
             */
fd6af5bf
             tabptr += name_entry_size;
         }
     }
 }
 
 /* TODO: audit this function */
f9ac6811
 void cli_pcre_report(const struct cli_pcre_data *pd, const unsigned char *buffer, size_t buflen, int rc, struct cli_pcre_results *results)
5fa73369
 {
f9ac6811
     int i, trunc;
 
 #if USING_PCRE2
     PCRE2_SIZE length, j;
 #else
     int length, j;
 #endif
 
fd6af5bf
     const char *start;
288057e9
     char outstr[2 * MATCH_MAXLEN + 1];
f9ac6811
 
5841080a
 #if USING_PCRE2
     PCRE2_SIZE *ovector;
     ovector = pcre2_get_ovector_pointer(results->match_data);
 #else
     int *ovector = results->ovector;
 #endif
f40a96fb
 
fd6af5bf
     /* print out additional diagnostics if cli_debug_flag is set */
0d94646e
     if (!DISABLE_PCRE_REPORT) {
fd6af5bf
         cli_dbgmsg("\n");
5841080a
 #if USING_PCRE2
         cli_dbgmsg("cli_pcre_report: PCRE2 Execution Report:\n");
 #else
0d94646e
         cli_dbgmsg("cli_pcre_report: PCRE Execution Report:\n");
5841080a
 #endif
0d94646e
         cli_dbgmsg("cli_pcre_report: running regex /%s/ returns %d\n", pd->expression, rc);
fd6af5bf
         if (rc > 0) {
             /* print out full-match and capture groups */
             for (i = 0; i < rc; ++i) {
288057e9
                 start  = (const char *)buffer + ovector[2 * i];
                 length = ovector[2 * i + 1] - ovector[2 * i];
fd6af5bf
 
f9ac6811
 #ifdef USING_PCRE2
288057e9
                 if (ovector[2 * i + 1] > buflen) {
f9ac6811
 #else
                 if (ovector[2 * i + 1] > (int)buflen) {
 #endif
0d94646e
                     cli_warnmsg("cli_pcre_report: reported match goes outside buffer\n");
                     continue;
                 }
 
fd6af5bf
                 trunc = 0;
                 if (length > MATCH_MAXLEN) {
288057e9
                     trunc  = 1;
fd6af5bf
                     length = MATCH_MAXLEN;
                 }
 
288057e9
                 for (j = 0; j < length; ++j)
                     snprintf(outstr + (2 * j), sizeof(outstr) - (2 * j), "%02x", (unsigned int)*(start + j));
fd6af5bf
 
288057e9
                 cli_dbgmsg("cli_pcre_report:  %d: %s%s\n", i, outstr, trunc ? " (trunc)" : "");
0d94646e
                 //cli_dbgmsg("cli_pcre_report:  %d: %.*s%s\n", i, length, start, trunc ? " (trunc)":"");
fd6af5bf
             }
 
5841080a
             named_substr_print(pd, buffer, ovector);
fd6af5bf
         }
5841080a
 #if USING_PCRE2
         else if (rc == 0 || rc == PCRE2_ERROR_NOMATCH) {
 #else
fd6af5bf
         else if (rc == 0 || rc == PCRE_ERROR_NOMATCH) {
5841080a
 #endif
0d94646e
             cli_dbgmsg("cli_pcre_report: no match found\n");
288057e9
         } else {
0d94646e
             cli_dbgmsg("cli_pcre_report: error occurred in pcre_match: %d\n", rc);
fd6af5bf
             /* error handled by caller */
         }
0d94646e
         cli_dbgmsg("cli_pcre_report: PCRE Execution Report End\n");
fd6af5bf
         cli_dbgmsg("\n");
     }
5fa73369
 }
 
102cd430
 cl_error_t cli_pcre_results_reset(struct cli_pcre_results *results, const struct cli_pcre_data *pd)
5841080a
 {
288057e9
     results->err      = CL_SUCCESS;
5841080a
     results->match[0] = results->match[1] = 0;
 #if USING_PCRE2
     if (results->match_data)
         pcre2_match_data_free(results->match_data);
 
     results->match_data = pcre2_match_data_create_from_pattern(pd->re, NULL);
     if (!results->match_data)
         return CL_EMEM;
 #else
     memset(results->ovector, 0, OVECCOUNT);
 #endif
     return CL_SUCCESS;
 }
 
 void cli_pcre_results_free(struct cli_pcre_results *results)
 {
 #if USING_PCRE2
     if (results->match_data)
         pcre2_match_data_free(results->match_data);
 #endif
 }
 
5fa73369
 void cli_pcre_free_single(struct cli_pcre_data *pd)
 {
5841080a
 #if USING_PCRE2
     if (pd->re) {
         pcre2_code_free(pd->re);
         pd->re = NULL;
     }
 
     if (pd->mctx) {
         pcre2_match_context_free(pd->mctx);
         pd->mctx = NULL;
     }
 #else
5fa73369
     if (pd->re) {
         pcre_free(pd->re);
86eddf11
         pd->re = NULL;
5fa73369
     }
     if (pd->ex) {
         free(pd->ex);
86eddf11
         pd->ex = NULL;
5fa73369
     }
5841080a
 #endif
5fa73369
     if (pd->expression) {
         free(pd->expression);
86eddf11
         pd->expression = NULL;
5fa73369
     }
 }
7afaa9bd
 #endif /* HAVE_PCRE */