libclamav/str.c
e3aaff8e
 /*
e1cbc270
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
  *  Copyright (C) 2007-2013 Sourcefire, Inc.
2023340a
  *
  *  Authors: Tomasz Kojm, Nigel Horne, Török Edvin
cd1eae2c
  *
6289eda8
  *  Acknowledgements: cli_strcasestr() contains a public domain code from:
  *                    http://unixpapa.com/incnote/string.html
e3aaff8e
  *
  *  This program is free software; you can redistribute it and/or modify
bb34cb31
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
e3aaff8e
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
48b7b4a7
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
e3aaff8e
  */
 
6d6e8271
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
8515ab9e
 #include "str.h"
 
e3aaff8e
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
91c8f9e8
 #include <limits.h>
c7029064
 #ifdef HAVE_STRINGS_H
 #include <strings.h>
 #endif
e3aaff8e
 #include <ctype.h>
e4e8366f
 #include <sys/types.h>
e3aaff8e
 
 #include "clamav.h"
 #include "others.h"
bedc58de
 #include "matcher.h"
eb290151
 #include "jsparse/textbuf.h"
01eebc13
 #include "platform.h"
e3aaff8e
 
27948a03
 // clang-format off
 
a72b7d2e
 static const int hex_chars[256] = {
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
     -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 };
 
27948a03
 // clang-format on
 
a72b7d2e
 static inline int cli_hex2int(const char c)
e3aaff8e
 {
288057e9
     return hex_chars[(const unsigned char)c];
e3aaff8e
 }
 
288057e9
 int cli_realhex2ui(const char *hex, uint16_t *ptr, unsigned int len)
 {
     uint16_t val;
     unsigned int i;
     int c;
 
     for (i = 0; i < len; i += 2) {
         val = 0;
 
         if (hex[i] == '?' && hex[i + 1] == '?') {
             val |= CLI_MATCH_IGNORE;
 
         } else if (hex[i + 1] == '?') {
             if ((c = cli_hex2int(hex[i])) >= 0) {
                 val = c << 4;
             } else {
                 return 0;
             }
             val |= CLI_MATCH_NIBBLE_HIGH;
 
         } else if (hex[i] == '?') {
             if ((c = cli_hex2int(hex[i + 1])) >= 0) {
                 val = c;
             } else {
                 return 0;
             }
             val |= CLI_MATCH_NIBBLE_LOW;
e3aaff8e
 
288057e9
         } else if (hex[i] == '(') {
             val |= CLI_MATCH_SPECIAL;
 
         } else {
             if ((c = cli_hex2int(hex[i])) >= 0) {
                 val = c;
                 if ((c = cli_hex2int(hex[i + 1])) >= 0) {
                     val = (val << 4) + c;
                 } else {
                     return 0;
                 }
             } else {
                 return 0;
             }
         }
 
         *ptr++ = val;
e3aaff8e
     }
38e881e3
     return 1;
 }
 
 uint16_t *cli_hex2ui(const char *hex)
 {
288057e9
     uint16_t *str;
     unsigned int len;
38e881e3
 
     len = strlen(hex);
 
288057e9
     if (len % 2 != 0) {
cd1eae2c
         cli_errmsg("cli_hex2ui(): Malformed hexstring: %s (length: %u)\n", hex,
                    len);
288057e9
         return NULL;
38e881e3
     }
 
     str = cli_calloc((len / 2) + 1, sizeof(uint16_t));
288057e9
     if (!str)
         return NULL;
e3aaff8e
 
288057e9
     if (cli_realhex2ui(hex, str, len))
38e881e3
         return str;
288057e9
 
38e881e3
     free(str);
     return NULL;
e3aaff8e
 }
 
4048c4f6
 char *cli_hex2str(const char *hex)
 {
2979de20
     char *str;
eaf2aebd
     size_t len;
4048c4f6
 
     len = strlen(hex);
 
288057e9
     if (len % 2 != 0) {
cd1eae2c
         cli_errmsg("cli_hex2str(): Malformed hexstring: %s (length: %u)\n", hex,
                    (unsigned)len);
288057e9
         return NULL;
4048c4f6
     }
 
     str = cli_calloc((len / 2) + 1, sizeof(char));
288057e9
     if (!str)
         return NULL;
4048c4f6
 
eaf2aebd
     if (cli_hex2str_to(hex, str, len) == -1) {
288057e9
         free(str);
         return NULL;
eaf2aebd
     }
     return str;
 }
 
2979de20
 int cli_hex2str_to(const char *hex, char *ptr, size_t len)
eaf2aebd
 {
     size_t i;
     int c;
2979de20
     char val;
4048c4f6
 
288057e9
     for (i = 0; i < len; i += 2) {
         if ((c = cli_hex2int(hex[i])) >= 0) {
             val = c;
             if ((c = cli_hex2int(hex[i + 1])) >= 0) {
                 val = (val << 4) + c;
             } else {
                 return -1;
             }
         } else {
             return -1;
         }
 
         *ptr++ = val;
4048c4f6
     }
 
eaf2aebd
     return 0;
4048c4f6
 }
 
e5916a51
 int cli_hex2num(const char *hex)
 {
288057e9
     int hexval, ret = 0, len, i;
e5916a51
 
     len = strlen(hex);
 
288057e9
     if (len % 2 != 0) {
cd1eae2c
         cli_errmsg("cli_hex2num(): Malformed hexstring: %s (length: %d)\n", hex,
                    len);
288057e9
         return -1;
e5916a51
     }
 
288057e9
     for (i = 0; i < len; i++) {
         if ((hexval = cli_hex2int(hex[i])) < 0)
             break;
         ret = (ret << 4) | hexval;
e5916a51
     }
 
     return ret;
 }
 
a5bde84c
 int cli_xtoi(const char *hex)
 {
     int len, val, i;
288057e9
     char *hexbuf;
a5bde84c
 
     len = strlen(hex);
 
288057e9
     if (len % 2 == 0)
a5bde84c
         return cli_hex2num(hex);
288057e9
 
     hexbuf = cli_calloc(len + 2, sizeof(char));
a5bde84c
     if (hexbuf == NULL) {
         cli_errmsg("cli_xtoi(): cli_malloc fails.\n");
         return -1;
     }
288057e9
 
     for (i = 0; i < len; i++)
         hexbuf[i + 1] = hex[i];
a5bde84c
     val = cli_hex2num(hexbuf);
     free(hexbuf);
     return val;
 }
 
8000d078
 char *cli_str2hex(const char *string, unsigned int len)
e3aaff8e
 {
288057e9
     char *hexstr;
cd1eae2c
     char HEX[] = {'0', '1', '2', '3', '4', '5', '6', '7',
                   '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
288057e9
     unsigned int i, j;
e3aaff8e
 
288057e9
     if ((hexstr = (char *)cli_calloc(2 * len + 1, sizeof(char))) == NULL)
         return NULL;
e3aaff8e
 
288057e9
     for (i = 0, j = 0; i < len; i++, j += 2) {
         hexstr[j]     = HEX[(string[i] >> 4) & 0xf];
         hexstr[j + 1] = HEX[string[i] & 0xf];
e3aaff8e
     }
 
     return hexstr;
 }
 
bd988961
 char *cli_utf16toascii(const char *str, unsigned int length)
 {
288057e9
     char *decoded;
     unsigned int i, j;
bd988961
 
288057e9
     if (length < 2) {
         cli_dbgmsg("cli_utf16toascii: length < 2\n");
         return NULL;
bd988961
     }
 
288057e9
     if (length % 2)
         length--;
bd988961
 
288057e9
     if (!(decoded = cli_calloc(length / 2 + 1, sizeof(char))))
         return NULL;
bd988961
 
288057e9
     for (i = 0, j = 0; i < length; i += 2, j++) {
cd1eae2c
         decoded[j] = ((unsigned char)str[i + 1]) << 4;
288057e9
         decoded[j] += str[i];
bd988961
     }
 
     return decoded;
 }
 
e3aaff8e
 int cli_strbcasestr(const char *haystack, const char *needle)
 {
288057e9
     const char *pt = haystack;
     int i, j;
e3aaff8e
 
     i = strlen(haystack);
     j = strlen(needle);
 
288057e9
     if (i < j)
         return 0;
e3aaff8e
 
     pt += i - j;
 
     return !strcasecmp(pt, needle);
 }
 
694e7882
 /**
  * @brief Remove trailing NL and CR characters from the end of the given string.
  *
  * @param string    string input
  * @return int      the new length of the string (ala strlen)
  * @return int      -1 if string was NULL.
8f0f9d56
  */
288057e9
 int cli_chomp(char *string)
e3aaff8e
 {
288057e9
     int l;
96b02502
 
288057e9
     if (string == NULL)
         return -1;
976bcd2a
 
288057e9
     l = strlen(string);
486fa0d3
 
288057e9
     if (l == 0)
         return 0;
486fa0d3
 
288057e9
     --l;
8f0f9d56
 
288057e9
     while ((l >= 0) && ((string[l] == '\n') || (string[l] == '\r')))
         string[l--] = '\0';
e3aaff8e
 
288057e9
     return l + 1;
8f0f9d56
 }
486fa0d3
 
2d70a403
 /*
  * char *cli_strok(const char *line, int fieldno, char *delim)
  * Return a copy of field <fieldno> from the string <line>, where
  * fields are delimited by any char from <delim>, or NULL if <line>
  * doesn't have <fieldno> fields or not enough memory is available.
  * The caller has to free() the result afterwards.
  */
8515ab9e
 char *cli_strtok(const char *line, int fieldno, const char *delim)
e3aaff8e
 {
288057e9
     int counter  = 0, i, j;
2d70a403
     char *buffer = NULL;
976bcd2a
 
2d70a403
     /* step to arg # <fieldno> */
288057e9
     for (i = 0; line[i] && counter != fieldno; i++) {
         if (strchr(delim, line[i])) {
             counter++;
             while (line[i + 1] && strchr(delim, line[i + 1])) {
                 i++;
             }
         }
2d70a403
     }
     if (!line[i]) {
288057e9
         /* end of buffer before field reached */
         return NULL;
e3aaff8e
     }
 
288057e9
     for (j = i; line[j]; j++) {
         if (strchr(delim, line[j])) {
             break;
         }
2d70a403
     }
     if (i == j) {
288057e9
         return NULL;
976bcd2a
     }
288057e9
     buffer = cli_malloc(j - i + 1);
     if (!buffer) {
241e7eb1
         cli_errmsg("cli_strtok: Unable to allocate memory for buffer\n");
         return NULL;
     }
288057e9
     strncpy(buffer, line + i, j - i);
     buffer[j - i] = '\0';
2d70a403
 
     return buffer;
e3aaff8e
 }
908ac3b2
 
 /*
  * Like cli_strtok, but this puts the output into a given argument, rather
  * than allocating fresh memory
  * Returns NULL for error, or a pointer to output
  * njh@bandsman.co.uk
  */
cd1eae2c
 char *cli_strtokbuf(const char *input, int fieldno, const char *delim,
                     char *output)
908ac3b2
 {
     int counter = 0, i, j;
 
     /* step to arg # <fieldno> */
288057e9
     for (i = 0; input[i] && counter != fieldno; i++) {
         if (strchr(delim, input[i])) {
             counter++;
             while (input[i + 1] && strchr(delim, input[i + 1])) {
                 i++;
             }
         }
908ac3b2
     }
     if (input[i] == '\0') {
288057e9
         /* end of buffer before field reached */
         return NULL;
908ac3b2
     }
 
288057e9
     for (j = i; input[j]; j++) {
         if (strchr(delim, input[j])) {
             break;
         }
908ac3b2
     }
     if (i == j) {
288057e9
         return NULL;
908ac3b2
     }
288057e9
     strncpy(output, input + i, j - i);
     output[j - i] = '\0';
908ac3b2
 
     return output;
 }
8f84357e
 
f18ae5be
 const char *cli_memstr(const char *haystack, size_t hs, const char *needle, size_t ns)
8f84357e
 {
f18ae5be
     size_t i, s1, s2;
8f84357e
 
288057e9
     if (!hs || !ns || hs < ns)
         return NULL;
8f84357e
 
288057e9
     if (needle == haystack)
         return haystack;
8f84357e
 
288057e9
     if (ns == 1)
         return memchr(haystack, needle[0], hs);
8f84357e
 
288057e9
     if (needle[0] == needle[1]) {
         s1 = 2;
         s2 = 1;
f461d74f
     } else {
288057e9
         s1 = 1;
         s2 = 2;
f461d74f
     }
288057e9
     for (i = 0; i <= hs - ns;) {
         if (needle[1] != haystack[i + 1]) {
             i += s1;
         } else {
cd1eae2c
             if ((needle[0] == haystack[i]) &&
                 !memcmp(needle + 2, haystack + i + 2, ns - 2))
288057e9
                 return &haystack[i];
             i += s2;
         }
8f84357e
     }
 
     return NULL;
 }
9b133473
 
 char *cli_strrcpy(char *dest, const char *source) /* by NJH */
 {
 
288057e9
     if (!dest || !source) {
         cli_errmsg("cli_strrcpy: NULL argument\n");
         return NULL;
9b133473
     }
 
cd1eae2c
     while ((*dest++ = *source++))
         ;
9b133473
 
     return --dest;
 }
e4e8366f
 
2e06875d
 const char *__cli_strcasestr(const char *haystack, const char *needle)
ff75dedb
 {
288057e9
     size_t l;
     char f[3];
2e06875d
     const size_t strlen_a = strlen(haystack);
     const size_t strlen_b = strlen(needle);
288057e9
 
2e06875d
     f[0] = tolower(*needle);
     f[1] = toupper(*needle);
288057e9
     f[2] = '\0';
2e06875d
     for (l = strcspn(haystack, f); l != strlen_a; l += strcspn(haystack + l + 1, f) + 1)
         if (strncasecmp(haystack + l, needle, strlen_b) == 0)
             return (haystack + l);
288057e9
     return (NULL);
ff75dedb
 }
47a544dc
 
2e06875d
 char *__cli_strndup(const char *s, size_t n)
47a544dc
 {
     char *alloc;
     size_t len;
 
288057e9
     if (!s) {
47a544dc
         return NULL;
     }
 
2e06875d
     len   = CLI_STRNLEN(s, n);
288057e9
     alloc = malloc(len + 1);
47a544dc
 
288057e9
     if (!alloc) {
47a544dc
         return NULL;
     } else
         memcpy(alloc, s, len);
 
     alloc[len] = '\0';
     return alloc;
 }
 
2e06875d
 size_t __cli_strnlen(const char *s, size_t n)
 {
     size_t i = 0;
     for (; (i < n) && s[i] != '\0'; ++i)
         ;
     return i;
 }
 
f8b3d2e5
 /*
  * @brief Find the first occurrence of find in s.
  *
  * The search is limited to the first slen characters of s.
  *
  * Copyright (c) 2001 Mike Barcroft <mike@FreeBSD.org>
  * Copyright (c) 1990, 1993
  * The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Chris Torek.
  *
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * @param s      haystack
  * @param find   needle
  * @param slen   haystack length
  * @return char* Address of the needle, if found, else NULL.
  */
2e06875d
 char *__cli_strnstr(const char *s, const char *find, size_t slen)
f8b3d2e5
 {
     char c, sc;
     size_t len;
 
     if ((c = *find++) != '\0') {
         len = strlen(find);
         do {
             do {
                 if (slen-- < 1 || (sc = *s++) == '\0')
                     return (NULL);
             } while (sc != c);
             if (len > slen)
                 return (NULL);
         } while (strncmp(s, find, len) != 0);
         s--;
     }
     return ((char *)s);
 }
 
cd1eae2c
 size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count,
                        const char **tokens)
e4e8366f
 {
288057e9
     size_t tokens_found, i;
e4e8366f
 
288057e9
     for (tokens_found = 0; tokens_found < token_count;) {
         tokens[tokens_found++] = buffer;
         buffer                 = strchr(buffer, delim);
         if (buffer) {
             *buffer++ = '\0';
         } else {
             i = tokens_found;
             while (i < token_count)
                 tokens[i++] = NULL;
e4e8366f
 
288057e9
             return tokens_found;
         }
e4e8366f
     }
72fb25ea
     return tokens_found;
e4e8366f
 }
a3fe2c5b
 
ef9239d5
 /**
  * @brief The strntol() function converts the string in str to a long value.
cd1eae2c
  * Modifications made to validate the length of the string for non-null term
  * strings.
ef9239d5
  *
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
91c8f9e8
  * @param nptr          Pointer to start of string.
  * @param n             Max length of buffer in bytes.
cd1eae2c
  * @param[out] endptr   [optional] If endptr is not NULL, strtol() stores the
  * address of the first invalid character in *endptr. If there were no digits at
  * all, however, strtol() stores the original value of str in *endptr. Nota
  * Bene:  If the buffer is non-null terminated and the number comprises the
  * entire buffer, endptr will point past the end of the buffer, and the caller
  * should check if endptr >= nptr + n.
  *
  * @param int           The conversion is done according to the given base,
  * which must be between 2 and 36 inclusive, or be the special value 0.
91c8f9e8
  * @return long         The signed long value.
ef9239d5
  */
288057e9
 long cli_strntol(const char *nptr, size_t n, char **endptr, register int base)
ef9239d5
 {
288057e9
     register const char *s     = nptr;
e39e6d97
     register unsigned long acc = 0;
     register int c;
     register unsigned long cutoff;
     register int neg = 0, any = 0, cutlim;
ef9239d5
 
     if (0 == n) {
e39e6d97
         goto done;
ef9239d5
     }
e39e6d97
     /*
cd1eae2c
    * Skip white space and pick up leading +/- sign if any.
    * If base is 0, allow 0x for hex and 0 for octal, else
    * assume decimal; if base is already 16, allow 0x.
    */
e39e6d97
     do {
         c = *s;
     } while (isspace(c) && (++s < nptr + n));
ef9239d5
 
     if (s >= nptr + n) {
e39e6d97
         goto done;
ef9239d5
     }
 
e39e6d97
     if (c == '-') {
         neg = 1;
288057e9
         c   = *s++;
e39e6d97
         if (s >= nptr + n) {
             goto done;
         }
     } else if (c == '+') {
         c = *s++;
         if (s >= nptr + n) {
             goto done;
         }
ef9239d5
     }
 
e39e6d97
     if (base == 0 || base == 16) {
cd1eae2c
         if (c == '0' && (s + 1 < nptr + n) &&
             (*(s + 1) == 'x' || *(s + 1) == 'X')) {
ef9239d5
             if (s + 2 >= nptr + n) {
e39e6d97
                 goto done;
ef9239d5
             }
             c = s[1];
             s += 2;
             base = 16;
         }
e39e6d97
     }
ef9239d5
 
e39e6d97
     if (base == 0)
         base = c == '0' ? 8 : 10;
ef9239d5
 
e39e6d97
     /*
cd1eae2c
    * Compute the cutoff value between legal numbers and illegal
    * numbers.  That is the largest legal value, divided by the
    * base.  An input number that is greater than this value, if
    * followed by a legal input character, is too big.  One that
    * is equal to this value may be valid or not; the limit
    * between valid and invalid numbers is then based on the last
    * digit.  For instance, if the range for longs is
    * [-2147483648..2147483647] and the input base is 10,
    * cutoff will be set to 214748364 and cutlim to either
    * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated
    * a value > 214748364, or equal but the next digit is > 7 (or 8),
    * the number is too big, and we will return a range error.
    *
    * Set any if any `digits' consumed; make it negative to indicate
    * overflow.
    */
e39e6d97
     cutoff = neg ? -(unsigned long)LONG_MIN : LONG_MAX;
     cutlim = cutoff % (unsigned long)base;
     cutoff /= (unsigned long)base;
     for (acc = 0, any = 0; s < nptr + n; s++) {
ef9239d5
         c = *s;
 
e39e6d97
         if (isdigit(c))
             c -= '0';
         else if (isalpha(c))
             c -= isupper(c) ? 'A' - 10 : 'a' - 10;
         else
             break;
         if (c >= base)
             break;
         if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
             any = -1;
         else {
             any = 1;
             acc *= base;
             acc += c;
         }
     }
     if (any < 0) {
288057e9
         acc   = neg ? LONG_MIN : LONG_MAX;
e39e6d97
         errno = ERANGE;
     } else if (neg)
         acc = -acc;
 
 done:
     if (endptr != 0)
288057e9
         *endptr = (char *)(any ? s : nptr);
e39e6d97
     return (acc);
 }
 
 /**
cd1eae2c
  * @brief The strntoul() function converts the string in str to an unsigned long
  * value. Modifications made to validate the length of the string for non-null
  * term strings.
e39e6d97
  *
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * @param nptr          Pointer to start of string.
  * @param n             Max length of buffer in bytes.
cd1eae2c
  * @param[out] endptr   [optional] If endptr is not NULL, strtol() stores the
  * address of the first invalid character in *endptr. If there were no digits at
  * all, however, strtol() stores the original value of str in *endptr. Nota
  * Bene:  If the buffer is non-null terminated and the number comprises the
  * entire buffer, endptr will point past the end of the buffer, and the caller
  * should check if endptr >= nptr + n.
  *
  * @param int           The conversion is done according to the given base,
  * which must be between 2 and 36 inclusive, or be the special value 0.
e39e6d97
  * @return unsigned long The unsigned long value.
  */
cd1eae2c
 unsigned long cli_strntoul(const char *nptr, size_t n, char **endptr,
                            register int base)
e39e6d97
 {
288057e9
     register const char *s     = nptr;
e39e6d97
     register unsigned long acc = 0;
     register int c;
     register unsigned long cutoff;
     register int neg = 0, any = 0, cutlim;
 
     /*
cd1eae2c
    * See cli_strntol for comments as to the logic used.
    */
e39e6d97
     do {
         c = *s;
     } while (isspace(c) && (++s < nptr + n));
 
     if (s >= nptr + n) {
         goto done;
     }
 
     if (c == '-') {
         neg = 1;
288057e9
         c   = *s++;
e39e6d97
         if (s >= nptr + n) {
             goto done;
         }
     } else if (c == '+') {
         c = *s++;
         if (s >= nptr + n) {
             goto done;
         }
     }
 
     if (base == 0 || base == 16) {
cd1eae2c
         if (c == '0' && (s + 1 < nptr + n) &&
             (*(s + 1) == 'x' || *(s + 1) == 'X')) {
e39e6d97
             if (s + 2 >= nptr + n) {
                 goto done;
             }
             c = s[1];
             s += 2;
             base = 16;
         }
     }
     if (base == 0)
         base = c == '0' ? 8 : 10;
 
     cutoff = (unsigned long)ULONG_MAX / (unsigned long)base;
     cutlim = (unsigned long)ULONG_MAX % (unsigned long)base;
     for (acc = 0, any = 0; s < nptr + n; s++) {
         c = *s;
 
         if (isdigit(c))
             c -= '0';
         else if (isalpha(c))
             c -= isupper(c) ? 'A' - 10 : 'a' - 10;
         else
             break;
         if (c >= base)
             break;
         if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
             any = -1;
         else {
             any = 1;
             acc *= base;
             acc += c;
         }
     }
     if (any < 0) {
288057e9
         acc   = ULONG_MAX;
e39e6d97
         errno = ERANGE;
     } else if (neg)
         acc = -acc;
 
 done:
     if (endptr != 0)
288057e9
         *endptr = (char *)(any ? s : nptr);
e39e6d97
     return (acc);
ef9239d5
 }
 
91c8f9e8
 /**
e39e6d97
  * @brief 	cli_strntol_wrap() converts the string in str to a long value.
cd1eae2c
  *
91c8f9e8
  * Wrapper for cli_strntol() that provides incentive to check for failure.
cd1eae2c
  *
  * @param buf               Pointer to start of string.
  * @param buf_size 			Max length of buffer to convert to
  * integer.
  * @param fail_at_nondigit  If 1, fail out if the a non-digit character is found
  * before the end of the buffer. If 0, non-digit character represents end of
  * number and is not a failure.
  * @param base              The conversion is done according to the given base,
  * which must be between 2 and 36 inclusive, or be the special value 0.
91c8f9e8
  * @param[out] result 	    Long integer value of ascii number.
  * @return CL_SUCCESS       Success
  * @return CL_EPARSE        Failure
  */
cd1eae2c
 cl_error_t cli_strntol_wrap(const char *buf, size_t buf_size,
                             int fail_at_nondigit, int base, long *result)
91c8f9e8
 {
     char *endptr = NULL;
     long num;
 
     if (buf_size == 0 || !buf || !result) {
         /* invalid parameter */
         return CL_EPARSE;
     }
     errno = 0;
288057e9
     num   = cli_strntol(buf, buf_size, &endptr, base);
91c8f9e8
     if ((num == LONG_MIN || num == LONG_MAX) && errno == ERANGE) {
         /* under- or overflow */
         return CL_EPARSE;
     }
     if (endptr == buf) {
         /* no digits */
         return CL_EPARSE;
     }
     if (fail_at_nondigit && (endptr < (buf + buf_size)) && (*endptr != '\0')) {
         /* non-digit encountered */
         return CL_EPARSE;
     }
     /* success */
     *result = num;
     return CL_SUCCESS;
 }
 
e39e6d97
 /**
  * @brief 	cli_strntoul_wrap() converts the string in str to a long value.
cd1eae2c
  *
e39e6d97
  * Wrapper for cli_strntoul() that provides incentive to check for failure.
cd1eae2c
  *
  * @param buf               Pointer to start of string.
  * @param buf_size 			Max length of buffer to convert to
  * integer.
  * @param fail_at_nondigit  If 1, fail out if the a non-digit character is found
  * before the end of the buffer. If 0, non-digit character represents end of
  * number and is not a failure.
  * @param base              The conversion is done according to the given base,
  * which must be between 2 and 36 inclusive, or be the special value 0.
e39e6d97
  * @param[out] result 	    Unsigned long integer value of ascii number.
  * @return CL_SUCCESS       Success
  * @return CL_EPARSE        Failure
  */
cd1eae2c
 cl_error_t cli_strntoul_wrap(const char *buf, size_t buf_size,
                              int fail_at_nondigit, int base,
                              unsigned long *result)
e39e6d97
 {
     char *endptr = NULL;
0efcd558
     unsigned long num;
e39e6d97
 
     if (buf_size == 0 || !buf || !result) {
         /* invalid parameter */
         return CL_EPARSE;
     }
     errno = 0;
288057e9
     num   = cli_strntoul(buf, buf_size, &endptr, base);
0efcd558
     if ((num == ULONG_MAX) && (errno == ERANGE)) {
e39e6d97
         /* under- or overflow */
         return CL_EPARSE;
     }
     if (endptr == buf) {
         /* no digits */
         return CL_EPARSE;
     }
     if (fail_at_nondigit && (endptr < (buf + buf_size)) && (*endptr != '\0')) {
         /* non-digit encountered */
         return CL_EPARSE;
     }
     /* success */
     *result = num;
     return CL_SUCCESS;
 }
91c8f9e8
 
cd1eae2c
 size_t cli_ldbtokenize(char *buffer, const char delim, const size_t token_count,
0efcd558
                        const char **tokens, size_t token_skip)
71e13645
 {
     size_t tokens_found, i;
     int within_pcre = 0;
 
288057e9
     for (tokens_found = 0; tokens_found < token_count;) {
71e13645
         tokens[tokens_found++] = buffer;
 
         while (*buffer != '\0') {
             if (!within_pcre && (*buffer == delim))
                 break;
cd1eae2c
             else if ((tokens_found > token_skip) && (*(buffer - 1) != '\\') &&
                      (*buffer == '/'))
71e13645
                 within_pcre = !within_pcre;
             buffer++;
         }
 
288057e9
         if (*buffer != '\0') {
71e13645
             *buffer++ = '\0';
         } else {
             i = tokens_found;
288057e9
             while (i < token_count)
71e13645
                 tokens[i++] = NULL;
             return tokens_found;
         }
     }
     return tokens_found;
 }
 
a3fe2c5b
 int cli_isnumber(const char *str)
 {
288057e9
     while (*str)
         if (!strchr("0123456789", *str++))
             return 0;
a3fe2c5b
 
     return 1;
 }
eb290151
 
 /* encodes the unicode character as utf-8 */
288057e9
 static inline size_t output_utf8(uint16_t u, unsigned char *dst)
eb290151
 {
288057e9
     if (!u) {
         *dst = 0x1; /* don't add \0, add \1 instead */
         return 1;
     }
     if (u < 0x80) {
         *dst = u & 0xff;
         return 1;
     }
     if (u < 0x800) {
         *dst++ = 0xc0 | (u >> 6);   /* 110yyyyy */
         *dst   = 0x80 | (u & 0x3f); /* 10zzzzzz */
         return 2;
     }
     /* u < 0x10000 because we only handle utf-16,
cd1eae2c
    * values in range 0xd800 - 0xdfff aren't valid, but we don't check for
    * that*/
288057e9
     *dst++ = 0xe0 | (u >> 12);         /* 1110xxxx */
     *dst++ = 0x80 | ((u >> 6) & 0x3f); /* 10yyyyyy */
     *dst   = 0x80 | (u & 0x3f);        /* 10zzzzzz */
     return 3;
eb290151
 }
 
 /* javascript-like unescape() function */
 char *cli_unescape(const char *str)
 {
288057e9
     char *R;
     size_t k, i = 0;
     const size_t len = strlen(str);
     /* unescaped string is at most as long as original,
cd1eae2c
    * it will usually be shorter */
288057e9
     R = cli_malloc(len + 1);
     if (!R) {
241e7eb1
         cli_errmsg("cli_unescape: Unable to allocate memory for string\n");
288057e9
         return NULL;
     }
     for (k = 0; k < len; k++) {
         unsigned char c = str[k];
         if (str[k] == '%') {
cd1eae2c
             if (k + 5 >= len || str[k + 1] != 'u' || !isxdigit(str[k + 2]) ||
                 !isxdigit(str[k + 3]) || !isxdigit(str[k + 4]) ||
                 !isxdigit(str[k + 5])) {
288057e9
                 if (k + 2 < len && isxdigit(str[k + 1]) && isxdigit(str[k + 2])) {
cd1eae2c
                     c = ((cli_hex2int(str[k + 1]) < 0 ? 0 : cli_hex2int(str[k + 1]))
                          << 4) |
                         cli_hex2int(str[k + 2]);
288057e9
                     k += 2;
                 }
             } else {
cd1eae2c
                 uint16_t u =
                     ((cli_hex2int(str[k + 2]) < 0 ? 0 : cli_hex2int(str[k + 2]))
                      << 12) |
                     ((cli_hex2int(str[k + 3]) < 0 ? 0 : cli_hex2int(str[k + 3])) << 8) |
                     ((cli_hex2int(str[k + 4]) < 0 ? 0 : cli_hex2int(str[k + 4])) << 4) |
                     cli_hex2int(str[k + 5]);
288057e9
                 i += output_utf8(u, (unsigned char *)&R[i]);
                 k += 5;
                 continue;
             }
         }
cd1eae2c
         if (!c)
             c = 1; /* don't add \0 */
288057e9
         R[i++] = c;
241e7eb1
     }
288057e9
     R[i++] = '\0';
     R      = cli_realloc2(R, i);
     return R;
eb290151
 }
 
 /* handle javascript's escape sequences inside strings */
cd1eae2c
 int cli_textbuffer_append_normalize(struct text_buffer *buf, const char *str,
                                     size_t len)
eb290151
 {
288057e9
     size_t i;
     for (i = 0; i < len; i++) {
         char c = str[i];
         if (c == '\\' && i + 1 < len) {
             i++;
             switch (str[i]) {
                 case '0':
                     c = 0;
                     break;
                 case 'b':
                     c = 8;
                     break;
                 case 't':
                     c = 9;
                     break;
                 case 'n':
                     c = 10;
                     break;
                 case 'v':
                     c = 11;
                     break;
                 case 'f':
                     c = 12;
                     break;
                 case 'r':
                     c = 13;
                     break;
                 case 'x':
                     if (i + 2 < len)
cd1eae2c
                         c = ((cli_hex2int(str[i + 1]) < 0 ? 0 : cli_hex2int(str[i + 1]))
                              << 4) |
                             cli_hex2int(str[i + 2]);
288057e9
                     i += 2;
                     break;
                 case 'u':
                     if (i + 4 < len) {
cd1eae2c
                         uint16_t u =
                             ((cli_hex2int(str[i + 1]) < 0 ? 0 : cli_hex2int(str[i + 1]))
                              << 12) |
                             ((cli_hex2int(str[i + 2]) < 0 ? 0 : cli_hex2int(str[i + 2]))
                              << 8) |
                             ((cli_hex2int(str[i + 3]) < 0 ? 0 : cli_hex2int(str[i + 3]))
                              << 4) |
                             cli_hex2int(str[i + 4]);
288057e9
                         if (textbuffer_ensure_capacity(buf, 4) == -1)
                             return -1;
                         buf->pos += output_utf8(u, (unsigned char *)&buf->data[buf->pos]);
                         i += 4;
                         continue;
                     }
                     break;
                 default:
                     c = str[i];
                     break;
             }
         }
cd1eae2c
         if (!c)
             c = 1; /* we don't insert \0 */
288057e9
         if (textbuffer_putc(buf, c) == -1)
             return -1;
     }
     return 0;
eb290151
 }
 
73d8cded
 int cli_hexnibbles(char *str, int len)
 {
     int i;
288057e9
     for (i = 0; i < len; i++) {
         int c = cli_hex2int(str[i]);
cd1eae2c
         if (c < 0)
             return 1;
288057e9
         str[i] = c;
73d8cded
     }
     return 0;
 }
583cd65f
 
 char *cli_utf16_to_utf8(const char *utf16, size_t length, utf16_type type)
 {
     /* utf8 -
cd1eae2c
    * 4 bytes for utf16 high+low surrogate (4 bytes input)
    * 3 bytes for utf16 otherwise (2 bytes input) */
583cd65f
     size_t i, j;
288057e9
     size_t needed = length * 3 / 2 + 2;
583cd65f
     char *s2;
 
     if (length < 2)
288057e9
         return cli_strdup("");
583cd65f
     if (length % 2) {
288057e9
         cli_warnmsg("utf16 length is not multiple of two: %lu\n", (long)length);
         length--;
583cd65f
     }
 
     s2 = cli_malloc(needed);
     if (!s2)
288057e9
         return NULL;
583cd65f
 
     i = 0;
 
cd1eae2c
     if ((utf16[0] == '\xff' && utf16[1] == '\xfe') ||
         (utf16[0] == '\xfe' && utf16[1] == '\xff')) {
288057e9
         i += 2;
         if (type == UTF16_BOM)
             type = (utf16[0] == '\xff') ? UTF16_LE : UTF16_BE;
     } else if (type == UTF16_BOM)
         type = UTF16_BE;
 
     for (j = 0; i < length && j < needed; i += 2) {
         uint16_t c = cli_readint16(&utf16[i]);
         if (type == UTF16_BE)
             c = cbswap16(c);
         if (c < 0x80) {
             s2[j++] = c;
         } else if (c < 0x800) {
             s2[j]     = 0xc0 | (c >> 6);
             s2[j + 1] = 0x80 | (c & 0x3f);
             j += 2;
         } else if (c < 0xd800 || c >= 0xe000) {
             s2[j]     = 0xe0 | (c >> 12);
             s2[j + 1] = 0x80 | ((c >> 6) & 0x3f);
             s2[j + 2] = 0x80 | (c & 0x3f);
             j += 3;
         } else if (c < 0xdc00 && i + 3 < length) {
             uint16_t c2;
             /* UTF16 high+low surrogate */
             c  = c - 0xd800 + 0x40;
             c2 = i + 3 < length ? cli_readint16(&utf16[i + 2]) : 0;
             c2 -= 0xdc00;
             s2[j]     = 0xf0 | (c >> 8);
             s2[j + 1] = 0x80 | ((c >> 2) & 0x3f);
             s2[j + 2] = 0x80 | ((c & 3) << 4) | (c2 >> 6);
             s2[j + 3] = 0x80 | (c2 & 0x3f);
             j += 4;
             i += 2;
         } else {
             cli_dbgmsg("UTF16 surrogate encountered at wrong pos\n");
             /* invalid char */
             s2[j++] = 0xef;
             s2[j++] = 0xbf;
             s2[j++] = 0xbd;
         }
583cd65f
     }
     if (j >= needed)
288057e9
         j = needed - 1;
583cd65f
     s2[j] = '\0';
     return s2;
 }
0e7442f1
 
5f31c9b4
 int cli_isutf8(const char *buf, unsigned int len)
0e7442f1
 {
288057e9
     unsigned int i, j;
0e7442f1
 
288057e9
     for (i = 0; i < len; i++) {
         if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
0e7442f1
             continue;
288057e9
         } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
0e7442f1
             return 0;
         } else {
             unsigned int following;
 
288057e9
             if ((buf[i] & 0x20) == 0) { /* 110xxxxx */
0e7442f1
                 /* c = buf[i] & 0x1f; */
                 following = 1;
288057e9
             } else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */
0e7442f1
                 /* c = buf[i] & 0x0f; */
                 following = 2;
288057e9
             } else if ((buf[i] & 0x08) == 0) { /* 11110xxx */
0e7442f1
                 /* c = buf[i] & 0x07; */
                 following = 3;
288057e9
             } else if ((buf[i] & 0x04) == 0) { /* 111110xx */
0e7442f1
                 /* c = buf[i] & 0x03; */
                 following = 4;
288057e9
             } else if ((buf[i] & 0x02) == 0) { /* 1111110x */
0e7442f1
                 /* c = buf[i] & 0x01; */
                 following = 5;
             } else {
                 return 0;
             }
 
288057e9
             for (j = 0; j < following; j++) {
                 if (++i >= len)
0e7442f1
                     return 0;
 
288057e9
                 if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
0e7442f1
                     return 0;
 
                 /* c = (c << 6) + (buf[i] & 0x3f); */
             }
         }
     }
 
     return 1;
 }
01eebc13
 
cd1eae2c
 cl_error_t cli_basename(const char *filepath, size_t filepath_len,
                         char **filebase)
01eebc13
 {
     cl_error_t status = CL_EARG;
     const char *index = NULL;
288057e9
 
01eebc13
     if (NULL == filepath || NULL == filebase || filepath_len == 0) {
         cli_dbgmsg("cli_basename: Invalid arguments.\n");
         goto done;
     }
 
     index = filepath + filepath_len - 1;
 
     while (index > filepath) {
cd1eae2c
         if (index[0] == PATHSEP[0])
             break;
01eebc13
         index--;
     }
     if ((index != filepath) || (index[0] == PATHSEP[0]))
         index++;
 
2e06875d
     if (0 == CLI_STRNLEN(index, filepath_len - (index - filepath))) {
01eebc13
         cli_dbgmsg("cli_basename: Provided path does not include a file name.\n");
         status = CL_EFORMAT;
         goto done;
     }
 
2e06875d
     *filebase = CLI_STRNDUP(index, filepath_len - (index - filepath));
01eebc13
     if (NULL == *filebase) {
         cli_errmsg("cli_basename: Failed to allocate memory for file basename.\n");
         status = CL_EMEM;
         goto done;
     }
 
     status = CL_SUCCESS;
 
 done:
     return status;
 }