libclamav/entconv.h
3506ac49
 /*
  *  HTML Entity & Encoding normalization.
  *
c442ca9c
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
  *  Copyright (C) 2007-2013 Sourcefire, Inc.
2023340a
  *
  *  Authors: Török Edvin
3506ac49
  *
  *  This program is free software; you can redistribute it and/or modify
2023340a
  *  it under the terms of the GNU General Public License version 2 as
38a00199
  *  published by the Free Software Foundation.
3506ac49
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
  */
 
 #ifndef _ENTITIES_H
 #define _ENTITIES_H
b0b8398b
 
95b2d68c
 #include "clamav-types.h"
3506ac49
 
 #include "hashtab.h"
 
1d23b7de
 #define UCS4_1234 "UCS-4BE"
 #define UCS4_4321 "UCS-4LE"
4e1127c5
 #define UCS4_2143 "UCS4"
 #define UCS4_3412 "UCS-4"
 #define UTF16_BE "UTF-16BE"
 #define UTF16_LE "UTF-16LE"
 #define UTF8     "UTF-8"
3506ac49
 #define UNDECIDED_32_1234 UCS4_1234
 #define UNDECIDED_32_4321 UCS4_4321
 #define UNDECIDED_32_2143 UCS4_2143
 #define UNDECIDED_32_3412 UCS4_3412
 #define UNDECIDED_16_BE UTF16_BE
 #define UNDECIDED_16_LE UTF16_LE
4e1127c5
 #define UNDECIDED_8 "ISO-8859-1"
3506ac49
 
 #define MAX_ENTITY_SIZE 22
 
 struct entity_conv {
b0b8398b
 	unsigned char entity_buff[MAX_ENTITY_SIZE+2];
3506ac49
 };
 
ce2dcb53
 enum encodings {E_UCS4,E_UTF16,E_UCS4_1234,E_UCS4_4321,E_UCS4_2143,E_UCS4_3412,E_UTF16_BE,E_UTF16_LE,E_UTF8, E_UNKNOWN,E_OTHER};
7067a88c
 
8b22c9b5
 unsigned char* u16_normalize_tobuffer(uint16_t u16, unsigned char* dst, size_t dst_size);
b0b8398b
 const char* entity_norm(struct entity_conv* conv,const unsigned char* entity);
baedf04d
 const char* encoding_detect_bom(const unsigned char* bom, const size_t length);
b3fc7f97
 int encoding_normalize_toascii(const m_area_t* in_m_area, const char* initial_encoding, m_area_t* out_m_area);
3506ac49
 
 #endif