win32/compat/utf8_util.c
8ab47ebe
 /*
c442ca9c
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
  *  Copyright (C) 2011-2013 Sourcefire, Inc.
8ab47ebe
  *
  *  Authors: aCaB <acab@clamav.net>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
  */
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
 #include "utf8_util.h"
 
 char *cli_strdup_to_utf8(const char *s) {
     char *r = cli_to_utf8_maybe_alloc(s);
     if(!r) return NULL;
     if(r == s) return strdup(r);
     return r;
 }
 
 #define MAYBE_FREE_W do { if(wdup != tmpw) free(wdup); } while (0)
 #define MAYBE_FREE_U do { if(utf8 != tmpu) free(utf8); } while (0)
 char *cli_to_utf8_maybe_alloc(const char *s) {
     int len = strlen(s) + 1;
     wchar_t tmpw[1024], *wdup;
     char tmpu[1024], *utf8;
 
     if(len >= sizeof(tmpw) / sizeof(*tmpw)) {
 	wdup = (wchar_t *)malloc(len * sizeof(wchar_t));
 	if(!wdup) return NULL;
     } else
 	wdup = tmpw;
 
     /* Check if already UTF8 first... */
     if(MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, s, -1, wdup, len)) {
 	/* XP acts funny on MB_ERR_INVALID_CHARS, so we translate back and compare
 	   On Vista+ the flag is honored and there is no such overhead */
 	int ulen;
 	if((ulen = WideCharToMultiByte(CP_UTF8, 0, wdup, -1, NULL, 0, NULL, NULL))) {
 	    if(ulen > sizeof(tmpu)) {
 		utf8 = (char *)malloc(ulen);
 		if(!utf8) {
 		    MAYBE_FREE_W;
 		    return NULL;
 		}
 	    } else
 		utf8 = tmpu;
 	    if(WideCharToMultiByte(CP_UTF8, 0, wdup, -1, utf8, ulen, NULL, NULL) && !strcmp(s, utf8)) {
 		    MAYBE_FREE_W;
 		    MAYBE_FREE_U;
 		    return s;
 	    }
 	    MAYBE_FREE_U;
 	}
 	/* We should never land here */
     }
 
     /* ... then assume ANSI */
     if(MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, s, -1, wdup, len)) {
 	if((len = WideCharToMultiByte(CP_UTF8, 0, wdup, -1, NULL, 0, NULL, NULL))) {
 	    if((utf8 = (char *)malloc(len))) {
 		if(WideCharToMultiByte(CP_UTF8, 0, wdup, -1, utf8, len, NULL, NULL)) {
 		    MAYBE_FREE_W;
 		    return utf8;
 		}
 		free(utf8);
 	    }
 	}
     }
     MAYBE_FREE_W;
     return NULL;
 }