/*
 *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
 *  Copyright (C) 2007-2013 Sourcefire, Inc.
 *
 *  Authors: Tomasz Kojm, Nigel Horne, Török Edvin
 *
 *  Acknowledgements: cli_strcasestr() contains a public domain code from:
 *                    http://unixpapa.com/incnote/string.html
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 *  MA 02110-1301, USA.
 */

#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif

#include "str.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif
#include <ctype.h>
#include <sys/types.h>

#include "clamav.h"
#include "others.h"
#include "matcher.h"
#include "jsparse/textbuf.h"
#include "platform.h"

// clang-format off

static const int hex_chars[256] = {
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
    -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
};

// clang-format on

static inline int cli_hex2int(const char c)
{
    return hex_chars[(const unsigned char)c];
}

int cli_realhex2ui(const char *hex, uint16_t *ptr, unsigned int len)
{
    uint16_t val;
    unsigned int i;
    int c;

    for (i = 0; i < len; i += 2) {
        val = 0;

        if (hex[i] == '?' && hex[i + 1] == '?') {
            val |= CLI_MATCH_IGNORE;

        } else if (hex[i + 1] == '?') {
            if ((c = cli_hex2int(hex[i])) >= 0) {
                val = c << 4;
            } else {
                return 0;
            }
            val |= CLI_MATCH_NIBBLE_HIGH;

        } else if (hex[i] == '?') {
            if ((c = cli_hex2int(hex[i + 1])) >= 0) {
                val = c;
            } else {
                return 0;
            }
            val |= CLI_MATCH_NIBBLE_LOW;

        } else if (hex[i] == '(') {
            val |= CLI_MATCH_SPECIAL;

        } else {
            if ((c = cli_hex2int(hex[i])) >= 0) {
                val = c;
                if ((c = cli_hex2int(hex[i + 1])) >= 0) {
                    val = (val << 4) + c;
                } else {
                    return 0;
                }
            } else {
                return 0;
            }
        }

        *ptr++ = val;
    }
    return 1;
}

uint16_t *cli_hex2ui(const char *hex)
{
    uint16_t *str;
    unsigned int len;

    len = strlen(hex);

    if (len % 2 != 0) {
        cli_errmsg("cli_hex2ui(): Malformed hexstring: %s (length: %u)\n", hex,
                   len);
        return NULL;
    }

    str = cli_calloc((len / 2) + 1, sizeof(uint16_t));
    if (!str)
        return NULL;

    if (cli_realhex2ui(hex, str, len))
        return str;

    free(str);
    return NULL;
}

char *cli_hex2str(const char *hex)
{
    char *str;
    size_t len;

    len = strlen(hex);

    if (len % 2 != 0) {
        cli_errmsg("cli_hex2str(): Malformed hexstring: %s (length: %u)\n", hex,
                   (unsigned)len);
        return NULL;
    }

    str = cli_calloc((len / 2) + 1, sizeof(char));
    if (!str)
        return NULL;

    if (cli_hex2str_to(hex, str, len) == -1) {
        free(str);
        return NULL;
    }
    return str;
}

int cli_hex2str_to(const char *hex, char *ptr, size_t len)
{
    size_t i;
    int c;
    char val;

    for (i = 0; i < len; i += 2) {
        if ((c = cli_hex2int(hex[i])) >= 0) {
            val = c;
            if ((c = cli_hex2int(hex[i + 1])) >= 0) {
                val = (val << 4) + c;
            } else {
                return -1;
            }
        } else {
            return -1;
        }

        *ptr++ = val;
    }

    return 0;
}

int cli_hex2num(const char *hex)
{
    int hexval, ret = 0, len, i;

    len = strlen(hex);

    if (len % 2 != 0) {
        cli_errmsg("cli_hex2num(): Malformed hexstring: %s (length: %d)\n", hex,
                   len);
        return -1;
    }

    for (i = 0; i < len; i++) {
        if ((hexval = cli_hex2int(hex[i])) < 0)
            break;
        ret = (ret << 4) | hexval;
    }

    return ret;
}

int cli_xtoi(const char *hex)
{
    int len, val, i;
    char *hexbuf;

    len = strlen(hex);

    if (len % 2 == 0)
        return cli_hex2num(hex);

    hexbuf = cli_calloc(len + 2, sizeof(char));
    if (hexbuf == NULL) {
        cli_errmsg("cli_xtoi(): cli_malloc fails.\n");
        return -1;
    }

    for (i = 0; i < len; i++)
        hexbuf[i + 1] = hex[i];
    val = cli_hex2num(hexbuf);
    free(hexbuf);
    return val;
}

char *cli_str2hex(const char *string, unsigned int len)
{
    char *hexstr;
    char HEX[] = {'0', '1', '2', '3', '4', '5', '6', '7',
                  '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
    unsigned int i, j;

    if ((hexstr = (char *)cli_calloc(2 * len + 1, sizeof(char))) == NULL)
        return NULL;

    for (i = 0, j = 0; i < len; i++, j += 2) {
        hexstr[j]     = HEX[(string[i] >> 4) & 0xf];
        hexstr[j + 1] = HEX[string[i] & 0xf];
    }

    return hexstr;
}

char *cli_utf16toascii(const char *str, unsigned int length)
{
    char *decoded;
    unsigned int i, j;

    if (length < 2) {
        cli_dbgmsg("cli_utf16toascii: length < 2\n");
        return NULL;
    }

    if (length % 2)
        length--;

    if (!(decoded = cli_calloc(length / 2 + 1, sizeof(char))))
        return NULL;

    for (i = 0, j = 0; i < length; i += 2, j++) {
        decoded[j] = ((unsigned char)str[i + 1]) << 4;
        decoded[j] += str[i];
    }

    return decoded;
}

int cli_strbcasestr(const char *haystack, const char *needle)
{
    const char *pt = haystack;
    int i, j;

    i = strlen(haystack);
    j = strlen(needle);

    if (i < j)
        return 0;

    pt += i - j;

    return !strcasecmp(pt, needle);
}

/**
 * @brief Remove trailing NL and CR characters from the end of the given string.
 *
 * @param string    string input
 * @return int      the new length of the string (ala strlen)
 * @return int      -1 if string was NULL.
 */
int cli_chomp(char *string)
{
    int l;

    if (string == NULL)
        return -1;

    l = strlen(string);

    if (l == 0)
        return 0;

    --l;

    while ((l >= 0) && ((string[l] == '\n') || (string[l] == '\r')))
        string[l--] = '\0';

    return l + 1;
}

/*
 * char *cli_strok(const char *line, int fieldno, char *delim)
 * Return a copy of field <fieldno> from the string <line>, where
 * fields are delimited by any char from <delim>, or NULL if <line>
 * doesn't have <fieldno> fields or not enough memory is available.
 * The caller has to free() the result afterwards.
 */
char *cli_strtok(const char *line, int fieldno, const char *delim)
{
    int counter  = 0, i, j;
    char *buffer = NULL;

    /* step to arg # <fieldno> */
    for (i = 0; line[i] && counter != fieldno; i++) {
        if (strchr(delim, line[i])) {
            counter++;
            while (line[i + 1] && strchr(delim, line[i + 1])) {
                i++;
            }
        }
    }
    if (!line[i]) {
        /* end of buffer before field reached */
        return NULL;
    }

    for (j = i; line[j]; j++) {
        if (strchr(delim, line[j])) {
            break;
        }
    }
    if (i == j) {
        return NULL;
    }
    buffer = cli_malloc(j - i + 1);
    if (!buffer) {
        cli_errmsg("cli_strtok: Unable to allocate memory for buffer\n");
        return NULL;
    }
    strncpy(buffer, line + i, j - i);
    buffer[j - i] = '\0';

    return buffer;
}

/*
 * Like cli_strtok, but this puts the output into a given argument, rather
 * than allocating fresh memory
 * Returns NULL for error, or a pointer to output
 * njh@bandsman.co.uk
 */
char *cli_strtokbuf(const char *input, int fieldno, const char *delim,
                    char *output)
{
    int counter = 0, i, j;

    /* step to arg # <fieldno> */
    for (i = 0; input[i] && counter != fieldno; i++) {
        if (strchr(delim, input[i])) {
            counter++;
            while (input[i + 1] && strchr(delim, input[i + 1])) {
                i++;
            }
        }
    }
    if (input[i] == '\0') {
        /* end of buffer before field reached */
        return NULL;
    }

    for (j = i; input[j]; j++) {
        if (strchr(delim, input[j])) {
            break;
        }
    }
    if (i == j) {
        return NULL;
    }
    strncpy(output, input + i, j - i);
    output[j - i] = '\0';

    return output;
}

const char *cli_memstr(const char *haystack, size_t hs, const char *needle, size_t ns)
{
    size_t i, s1, s2;

    if (!hs || !ns || hs < ns)
        return NULL;

    if (needle == haystack)
        return haystack;

    if (ns == 1)
        return memchr(haystack, needle[0], hs);

    if (needle[0] == needle[1]) {
        s1 = 2;
        s2 = 1;
    } else {
        s1 = 1;
        s2 = 2;
    }
    for (i = 0; i <= hs - ns;) {
        if (needle[1] != haystack[i + 1]) {
            i += s1;
        } else {
            if ((needle[0] == haystack[i]) &&
                !memcmp(needle + 2, haystack + i + 2, ns - 2))
                return &haystack[i];
            i += s2;
        }
    }

    return NULL;
}

char *cli_strrcpy(char *dest, const char *source) /* by NJH */
{

    if (!dest || !source) {
        cli_errmsg("cli_strrcpy: NULL argument\n");
        return NULL;
    }

    while ((*dest++ = *source++))
        ;

    return --dest;
}

const char *__cli_strcasestr(const char *haystack, const char *needle)
{
    size_t l;
    char f[3];
    const size_t strlen_a = strlen(haystack);
    const size_t strlen_b = strlen(needle);

    f[0] = tolower(*needle);
    f[1] = toupper(*needle);
    f[2] = '\0';
    for (l = strcspn(haystack, f); l != strlen_a; l += strcspn(haystack + l + 1, f) + 1)
        if (strncasecmp(haystack + l, needle, strlen_b) == 0)
            return (haystack + l);
    return (NULL);
}

char *__cli_strndup(const char *s, size_t n)
{
    char *alloc;
    size_t len;

    if (!s) {
        return NULL;
    }

    len   = CLI_STRNLEN(s, n);
    alloc = malloc(len + 1);

    if (!alloc) {
        return NULL;
    } else
        memcpy(alloc, s, len);

    alloc[len] = '\0';
    return alloc;
}

size_t __cli_strnlen(const char *s, size_t n)
{
    size_t i = 0;
    for (; (i < n) && s[i] != '\0'; ++i)
        ;
    return i;
}

/*
 * @brief Find the first occurrence of find in s.
 *
 * The search is limited to the first slen characters of s.
 *
 * Copyright (c) 2001 Mike Barcroft <mike@FreeBSD.org>
 * Copyright (c) 1990, 1993
 * The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Chris Torek.
 *
 * Copyright (c) 1990 The Regents of the University of California.
 * All rights reserved.
 *
 * @param s      haystack
 * @param find   needle
 * @param slen   haystack length
 * @return char* Address of the needle, if found, else NULL.
 */
char *__cli_strnstr(const char *s, const char *find, size_t slen)
{
    char c, sc;
    size_t len;

    if ((c = *find++) != '\0') {
        len = strlen(find);
        do {
            do {
                if (slen-- < 1 || (sc = *s++) == '\0')
                    return (NULL);
            } while (sc != c);
            if (len > slen)
                return (NULL);
        } while (strncmp(s, find, len) != 0);
        s--;
    }
    return ((char *)s);
}

size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count,
                       const char **tokens)
{
    size_t tokens_found, i;

    for (tokens_found = 0; tokens_found < token_count;) {
        tokens[tokens_found++] = buffer;
        buffer                 = strchr(buffer, delim);
        if (buffer) {
            *buffer++ = '\0';
        } else {
            i = tokens_found;
            while (i < token_count)
                tokens[i++] = NULL;

            return tokens_found;
        }
    }
    return tokens_found;
}

/**
 * @brief The strntol() function converts the string in str to a long value.
 * Modifications made to validate the length of the string for non-null term
 * strings.
 *
 * Copyright (c) 1990 The Regents of the University of California.
 * All rights reserved.
 *
 * @param nptr          Pointer to start of string.
 * @param n             Max length of buffer in bytes.
 * @param[out] endptr   [optional] If endptr is not NULL, strtol() stores the
 * address of the first invalid character in *endptr. If there were no digits at
 * all, however, strtol() stores the original value of str in *endptr. Nota
 * Bene:  If the buffer is non-null terminated and the number comprises the
 * entire buffer, endptr will point past the end of the buffer, and the caller
 * should check if endptr >= nptr + n.
 *
 * @param int           The conversion is done according to the given base,
 * which must be between 2 and 36 inclusive, or be the special value 0.
 * @return long         The signed long value.
 */
long cli_strntol(const char *nptr, size_t n, char **endptr, register int base)
{
    register const char *s     = nptr;
    register unsigned long acc = 0;
    register int c;
    register unsigned long cutoff;
    register int neg = 0, any = 0, cutlim;

    if (0 == n) {
        goto done;
    }
    /*
   * Skip white space and pick up leading +/- sign if any.
   * If base is 0, allow 0x for hex and 0 for octal, else
   * assume decimal; if base is already 16, allow 0x.
   */
    do {
        c = *s;
    } while (isspace(c) && (++s < nptr + n));

    if (s >= nptr + n) {
        goto done;
    }

    if (c == '-') {
        neg = 1;
        c   = *s++;
        if (s >= nptr + n) {
            goto done;
        }
    } else if (c == '+') {
        c = *s++;
        if (s >= nptr + n) {
            goto done;
        }
    }

    if (base == 0 || base == 16) {
        if (c == '0' && (s + 1 < nptr + n) &&
            (*(s + 1) == 'x' || *(s + 1) == 'X')) {
            if (s + 2 >= nptr + n) {
                goto done;
            }
            c = s[1];
            s += 2;
            base = 16;
        }
    }

    if (base == 0)
        base = c == '0' ? 8 : 10;

    /*
   * Compute the cutoff value between legal numbers and illegal
   * numbers.  That is the largest legal value, divided by the
   * base.  An input number that is greater than this value, if
   * followed by a legal input character, is too big.  One that
   * is equal to this value may be valid or not; the limit
   * between valid and invalid numbers is then based on the last
   * digit.  For instance, if the range for longs is
   * [-2147483648..2147483647] and the input base is 10,
   * cutoff will be set to 214748364 and cutlim to either
   * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated
   * a value > 214748364, or equal but the next digit is > 7 (or 8),
   * the number is too big, and we will return a range error.
   *
   * Set any if any `digits' consumed; make it negative to indicate
   * overflow.
   */
    cutoff = neg ? -(unsigned long)LONG_MIN : LONG_MAX;
    cutlim = cutoff % (unsigned long)base;
    cutoff /= (unsigned long)base;
    for (acc = 0, any = 0; s < nptr + n; s++) {
        c = *s;

        if (isdigit(c))
            c -= '0';
        else if (isalpha(c))
            c -= isupper(c) ? 'A' - 10 : 'a' - 10;
        else
            break;
        if (c >= base)
            break;
        if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
            any = -1;
        else {
            any = 1;
            acc *= base;
            acc += c;
        }
    }
    if (any < 0) {
        acc   = neg ? LONG_MIN : LONG_MAX;
        errno = ERANGE;
    } else if (neg)
        acc = -acc;

done:
    if (endptr != 0)
        *endptr = (char *)(any ? s : nptr);
    return (acc);
}

/**
 * @brief The strntoul() function converts the string in str to an unsigned long
 * value. Modifications made to validate the length of the string for non-null
 * term strings.
 *
 * Copyright (c) 1990 The Regents of the University of California.
 * All rights reserved.
 *
 * @param nptr          Pointer to start of string.
 * @param n             Max length of buffer in bytes.
 * @param[out] endptr   [optional] If endptr is not NULL, strtol() stores the
 * address of the first invalid character in *endptr. If there were no digits at
 * all, however, strtol() stores the original value of str in *endptr. Nota
 * Bene:  If the buffer is non-null terminated and the number comprises the
 * entire buffer, endptr will point past the end of the buffer, and the caller
 * should check if endptr >= nptr + n.
 *
 * @param int           The conversion is done according to the given base,
 * which must be between 2 and 36 inclusive, or be the special value 0.
 * @return unsigned long The unsigned long value.
 */
unsigned long cli_strntoul(const char *nptr, size_t n, char **endptr,
                           register int base)
{
    register const char *s     = nptr;
    register unsigned long acc = 0;
    register int c;
    register unsigned long cutoff;
    register int neg = 0, any = 0, cutlim;

    /*
   * See cli_strntol for comments as to the logic used.
   */
    do {
        c = *s;
    } while (isspace(c) && (++s < nptr + n));

    if (s >= nptr + n) {
        goto done;
    }

    if (c == '-') {
        neg = 1;
        c   = *s++;
        if (s >= nptr + n) {
            goto done;
        }
    } else if (c == '+') {
        c = *s++;
        if (s >= nptr + n) {
            goto done;
        }
    }

    if (base == 0 || base == 16) {
        if (c == '0' && (s + 1 < nptr + n) &&
            (*(s + 1) == 'x' || *(s + 1) == 'X')) {
            if (s + 2 >= nptr + n) {
                goto done;
            }
            c = s[1];
            s += 2;
            base = 16;
        }
    }
    if (base == 0)
        base = c == '0' ? 8 : 10;

    cutoff = (unsigned long)ULONG_MAX / (unsigned long)base;
    cutlim = (unsigned long)ULONG_MAX % (unsigned long)base;
    for (acc = 0, any = 0; s < nptr + n; s++) {
        c = *s;

        if (isdigit(c))
            c -= '0';
        else if (isalpha(c))
            c -= isupper(c) ? 'A' - 10 : 'a' - 10;
        else
            break;
        if (c >= base)
            break;
        if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
            any = -1;
        else {
            any = 1;
            acc *= base;
            acc += c;
        }
    }
    if (any < 0) {
        acc   = ULONG_MAX;
        errno = ERANGE;
    } else if (neg)
        acc = -acc;

done:
    if (endptr != 0)
        *endptr = (char *)(any ? s : nptr);
    return (acc);
}

/**
 * @brief 	cli_strntol_wrap() converts the string in str to a long value.
 *
 * Wrapper for cli_strntol() that provides incentive to check for failure.
 *
 * @param buf               Pointer to start of string.
 * @param buf_size 			Max length of buffer to convert to
 * integer.
 * @param fail_at_nondigit  If 1, fail out if the a non-digit character is found
 * before the end of the buffer. If 0, non-digit character represents end of
 * number and is not a failure.
 * @param base              The conversion is done according to the given base,
 * which must be between 2 and 36 inclusive, or be the special value 0.
 * @param[out] result 	    Long integer value of ascii number.
 * @return CL_SUCCESS       Success
 * @return CL_EPARSE        Failure
 */
cl_error_t cli_strntol_wrap(const char *buf, size_t buf_size,
                            int fail_at_nondigit, int base, long *result)
{
    char *endptr = NULL;
    long num;

    if (buf_size == 0 || !buf || !result) {
        /* invalid parameter */
        return CL_EPARSE;
    }
    errno = 0;
    num   = cli_strntol(buf, buf_size, &endptr, base);
    if ((num == LONG_MIN || num == LONG_MAX) && errno == ERANGE) {
        /* under- or overflow */
        return CL_EPARSE;
    }
    if (endptr == buf) {
        /* no digits */
        return CL_EPARSE;
    }
    if (fail_at_nondigit && (endptr < (buf + buf_size)) && (*endptr != '\0')) {
        /* non-digit encountered */
        return CL_EPARSE;
    }
    /* success */
    *result = num;
    return CL_SUCCESS;
}

/**
 * @brief 	cli_strntoul_wrap() converts the string in str to a long value.
 *
 * Wrapper for cli_strntoul() that provides incentive to check for failure.
 *
 * @param buf               Pointer to start of string.
 * @param buf_size 			Max length of buffer to convert to
 * integer.
 * @param fail_at_nondigit  If 1, fail out if the a non-digit character is found
 * before the end of the buffer. If 0, non-digit character represents end of
 * number and is not a failure.
 * @param base              The conversion is done according to the given base,
 * which must be between 2 and 36 inclusive, or be the special value 0.
 * @param[out] result 	    Unsigned long integer value of ascii number.
 * @return CL_SUCCESS       Success
 * @return CL_EPARSE        Failure
 */
cl_error_t cli_strntoul_wrap(const char *buf, size_t buf_size,
                             int fail_at_nondigit, int base,
                             unsigned long *result)
{
    char *endptr = NULL;
    unsigned long num;

    if (buf_size == 0 || !buf || !result) {
        /* invalid parameter */
        return CL_EPARSE;
    }
    errno = 0;
    num   = cli_strntoul(buf, buf_size, &endptr, base);
    if ((num == ULONG_MAX) && (errno == ERANGE)) {
        /* under- or overflow */
        return CL_EPARSE;
    }
    if (endptr == buf) {
        /* no digits */
        return CL_EPARSE;
    }
    if (fail_at_nondigit && (endptr < (buf + buf_size)) && (*endptr != '\0')) {
        /* non-digit encountered */
        return CL_EPARSE;
    }
    /* success */
    *result = num;
    return CL_SUCCESS;
}

size_t cli_ldbtokenize(char *buffer, const char delim, const size_t token_count,
                       const char **tokens, size_t token_skip)
{
    size_t tokens_found, i;
    int within_pcre = 0;

    for (tokens_found = 0; tokens_found < token_count;) {
        tokens[tokens_found++] = buffer;

        while (*buffer != '\0') {
            if (!within_pcre && (*buffer == delim))
                break;
            else if ((tokens_found > token_skip) && (*(buffer - 1) != '\\') &&
                     (*buffer == '/'))
                within_pcre = !within_pcre;
            buffer++;
        }

        if (*buffer != '\0') {
            *buffer++ = '\0';
        } else {
            i = tokens_found;
            while (i < token_count)
                tokens[i++] = NULL;
            return tokens_found;
        }
    }
    return tokens_found;
}

int cli_isnumber(const char *str)
{
    while (*str)
        if (!strchr("0123456789", *str++))
            return 0;

    return 1;
}

/* encodes the unicode character as utf-8 */
static inline size_t output_utf8(uint16_t u, unsigned char *dst)
{
    if (!u) {
        *dst = 0x1; /* don't add \0, add \1 instead */
        return 1;
    }
    if (u < 0x80) {
        *dst = u & 0xff;
        return 1;
    }
    if (u < 0x800) {
        *dst++ = 0xc0 | (u >> 6);   /* 110yyyyy */
        *dst   = 0x80 | (u & 0x3f); /* 10zzzzzz */
        return 2;
    }
    /* u < 0x10000 because we only handle utf-16,
   * values in range 0xd800 - 0xdfff aren't valid, but we don't check for
   * that*/
    *dst++ = 0xe0 | (u >> 12);         /* 1110xxxx */
    *dst++ = 0x80 | ((u >> 6) & 0x3f); /* 10yyyyyy */
    *dst   = 0x80 | (u & 0x3f);        /* 10zzzzzz */
    return 3;
}

/* javascript-like unescape() function */
char *cli_unescape(const char *str)
{
    char *R;
    size_t k, i = 0;
    const size_t len = strlen(str);
    /* unescaped string is at most as long as original,
   * it will usually be shorter */
    R = cli_malloc(len + 1);
    if (!R) {
        cli_errmsg("cli_unescape: Unable to allocate memory for string\n");
        return NULL;
    }
    for (k = 0; k < len; k++) {
        unsigned char c = str[k];
        if (str[k] == '%') {
            if (k + 5 >= len || str[k + 1] != 'u' || !isxdigit(str[k + 2]) ||
                !isxdigit(str[k + 3]) || !isxdigit(str[k + 4]) ||
                !isxdigit(str[k + 5])) {
                if (k + 2 < len && isxdigit(str[k + 1]) && isxdigit(str[k + 2])) {
                    c = ((cli_hex2int(str[k + 1]) < 0 ? 0 : cli_hex2int(str[k + 1]))
                         << 4) |
                        cli_hex2int(str[k + 2]);
                    k += 2;
                }
            } else {
                uint16_t u =
                    ((cli_hex2int(str[k + 2]) < 0 ? 0 : cli_hex2int(str[k + 2]))
                     << 12) |
                    ((cli_hex2int(str[k + 3]) < 0 ? 0 : cli_hex2int(str[k + 3])) << 8) |
                    ((cli_hex2int(str[k + 4]) < 0 ? 0 : cli_hex2int(str[k + 4])) << 4) |
                    cli_hex2int(str[k + 5]);
                i += output_utf8(u, (unsigned char *)&R[i]);
                k += 5;
                continue;
            }
        }
        if (!c)
            c = 1; /* don't add \0 */
        R[i++] = c;
    }
    R[i++] = '\0';
    R      = cli_realloc2(R, i);
    return R;
}

/* handle javascript's escape sequences inside strings */
int cli_textbuffer_append_normalize(struct text_buffer *buf, const char *str,
                                    size_t len)
{
    size_t i;
    for (i = 0; i < len; i++) {
        char c = str[i];
        if (c == '\\' && i + 1 < len) {
            i++;
            switch (str[i]) {
                case '0':
                    c = 0;
                    break;
                case 'b':
                    c = 8;
                    break;
                case 't':
                    c = 9;
                    break;
                case 'n':
                    c = 10;
                    break;
                case 'v':
                    c = 11;
                    break;
                case 'f':
                    c = 12;
                    break;
                case 'r':
                    c = 13;
                    break;
                case 'x':
                    if (i + 2 < len)
                        c = ((cli_hex2int(str[i + 1]) < 0 ? 0 : cli_hex2int(str[i + 1]))
                             << 4) |
                            cli_hex2int(str[i + 2]);
                    i += 2;
                    break;
                case 'u':
                    if (i + 4 < len) {
                        uint16_t u =
                            ((cli_hex2int(str[i + 1]) < 0 ? 0 : cli_hex2int(str[i + 1]))
                             << 12) |
                            ((cli_hex2int(str[i + 2]) < 0 ? 0 : cli_hex2int(str[i + 2]))
                             << 8) |
                            ((cli_hex2int(str[i + 3]) < 0 ? 0 : cli_hex2int(str[i + 3]))
                             << 4) |
                            cli_hex2int(str[i + 4]);
                        if (textbuffer_ensure_capacity(buf, 4) == -1)
                            return -1;
                        buf->pos += output_utf8(u, (unsigned char *)&buf->data[buf->pos]);
                        i += 4;
                        continue;
                    }
                    break;
                default:
                    c = str[i];
                    break;
            }
        }
        if (!c)
            c = 1; /* we don't insert \0 */
        if (textbuffer_putc(buf, c) == -1)
            return -1;
    }
    return 0;
}

int cli_hexnibbles(char *str, int len)
{
    int i;
    for (i = 0; i < len; i++) {
        int c = cli_hex2int(str[i]);
        if (c < 0)
            return 1;
        str[i] = c;
    }
    return 0;
}

char *cli_utf16_to_utf8(const char *utf16, size_t length, utf16_type type)
{
    /* utf8 -
   * 4 bytes for utf16 high+low surrogate (4 bytes input)
   * 3 bytes for utf16 otherwise (2 bytes input) */
    size_t i, j;
    size_t needed = length * 3 / 2 + 2;
    char *s2;

    if (length < 2)
        return cli_strdup("");
    if (length % 2) {
        cli_warnmsg("utf16 length is not multiple of two: %lu\n", (long)length);
        length--;
    }

    s2 = cli_malloc(needed);
    if (!s2)
        return NULL;

    i = 0;

    if ((utf16[0] == '\xff' && utf16[1] == '\xfe') ||
        (utf16[0] == '\xfe' && utf16[1] == '\xff')) {
        i += 2;
        if (type == UTF16_BOM)
            type = (utf16[0] == '\xff') ? UTF16_LE : UTF16_BE;
    } else if (type == UTF16_BOM)
        type = UTF16_BE;

    for (j = 0; i < length && j < needed; i += 2) {
        uint16_t c = cli_readint16(&utf16[i]);
        if (type == UTF16_BE)
            c = cbswap16(c);
        if (c < 0x80) {
            s2[j++] = c;
        } else if (c < 0x800) {
            s2[j]     = 0xc0 | (c >> 6);
            s2[j + 1] = 0x80 | (c & 0x3f);
            j += 2;
        } else if (c < 0xd800 || c >= 0xe000) {
            s2[j]     = 0xe0 | (c >> 12);
            s2[j + 1] = 0x80 | ((c >> 6) & 0x3f);
            s2[j + 2] = 0x80 | (c & 0x3f);
            j += 3;
        } else if (c < 0xdc00 && i + 3 < length) {
            uint16_t c2;
            /* UTF16 high+low surrogate */
            c  = c - 0xd800 + 0x40;
            c2 = i + 3 < length ? cli_readint16(&utf16[i + 2]) : 0;
            c2 -= 0xdc00;
            s2[j]     = 0xf0 | (c >> 8);
            s2[j + 1] = 0x80 | ((c >> 2) & 0x3f);
            s2[j + 2] = 0x80 | ((c & 3) << 4) | (c2 >> 6);
            s2[j + 3] = 0x80 | (c2 & 0x3f);
            j += 4;
            i += 2;
        } else {
            cli_dbgmsg("UTF16 surrogate encountered at wrong pos\n");
            /* invalid char */
            s2[j++] = 0xef;
            s2[j++] = 0xbf;
            s2[j++] = 0xbd;
        }
    }
    if (j >= needed)
        j = needed - 1;
    s2[j] = '\0';
    return s2;
}

int cli_isutf8(const char *buf, unsigned int len)
{
    unsigned int i, j;

    for (i = 0; i < len; i++) {
        if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
            continue;
        } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
            return 0;
        } else {
            unsigned int following;

            if ((buf[i] & 0x20) == 0) { /* 110xxxxx */
                /* c = buf[i] & 0x1f; */
                following = 1;
            } else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */
                /* c = buf[i] & 0x0f; */
                following = 2;
            } else if ((buf[i] & 0x08) == 0) { /* 11110xxx */
                /* c = buf[i] & 0x07; */
                following = 3;
            } else if ((buf[i] & 0x04) == 0) { /* 111110xx */
                /* c = buf[i] & 0x03; */
                following = 4;
            } else if ((buf[i] & 0x02) == 0) { /* 1111110x */
                /* c = buf[i] & 0x01; */
                following = 5;
            } else {
                return 0;
            }

            for (j = 0; j < following; j++) {
                if (++i >= len)
                    return 0;

                if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
                    return 0;

                /* c = (c << 6) + (buf[i] & 0x3f); */
            }
        }
    }

    return 1;
}

cl_error_t cli_basename(const char *filepath, size_t filepath_len,
                        char **filebase)
{
    cl_error_t status = CL_EARG;
    const char *index = NULL;

    if (NULL == filepath || NULL == filebase || filepath_len == 0) {
        cli_dbgmsg("cli_basename: Invalid arguments.\n");
        goto done;
    }

    index = filepath + filepath_len - 1;

    while (index > filepath) {
        if (index[0] == PATHSEP[0])
            break;
        index--;
    }
    if ((index != filepath) || (index[0] == PATHSEP[0]))
        index++;

    if (0 == CLI_STRNLEN(index, filepath_len - (index - filepath))) {
        cli_dbgmsg("cli_basename: Provided path does not include a file name.\n");
        status = CL_EFORMAT;
        goto done;
    }

    *filebase = CLI_STRNDUP(index, filepath_len - (index - filepath));
    if (NULL == *filebase) {
        cli_errmsg("cli_basename: Failed to allocate memory for file basename.\n");
        status = CL_EMEM;
        goto done;
    }

    status = CL_SUCCESS;

done:
    return status;
}