3506ac49 |
/*
* HTML Entity & Encoding normalization.
* |
2023340a |
* Copyright (C) 2007-2008 Sourcefire, Inc.
*
* Authors: Török Edvin |
3506ac49 |
*
* This program is free software; you can redistribute it and/or modify |
2023340a |
* it under the terms of the GNU General Public License version 2 as |
38a00199 |
* published by the Free Software Foundation. |
3506ac49 |
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef _ENTITIES_H
#define _ENTITIES_H |
b0b8398b |
|
6db2c87d |
#include "cltypes.h" |
3506ac49 |
#include "hashtab.h"
|
1d23b7de |
#define UCS4_1234 "UCS-4BE"
#define UCS4_4321 "UCS-4LE" |
4e1127c5 |
#define UCS4_2143 "UCS4"
#define UCS4_3412 "UCS-4"
#define UTF16_BE "UTF-16BE"
#define UTF16_LE "UTF-16LE"
#define UTF8 "UTF-8" |
3506ac49 |
#define UNDECIDED_32_1234 UCS4_1234
#define UNDECIDED_32_4321 UCS4_4321
#define UNDECIDED_32_2143 UCS4_2143
#define UNDECIDED_32_3412 UCS4_3412
#define UNDECIDED_16_BE UTF16_BE
#define UNDECIDED_16_LE UTF16_LE |
4e1127c5 |
#define UNDECIDED_8 "ISO-8859-1" |
3506ac49 |
#define MAX_ENTITY_SIZE 22
struct entity_conv { |
b0b8398b |
unsigned char entity_buff[MAX_ENTITY_SIZE+2]; |
3506ac49 |
};
|
7067a88c |
enum encodings {E_UCS4,E_UTF16,E_UCS4_1234,E_UCS4_4321,E_UCS4_2143,E_UCS4_3412,E_UTF16_BE,E_UTF16_LE,E_UTF8, E_UNKNOWN,E_OTHER, E_ICONV};
|
8b22c9b5 |
unsigned char* u16_normalize_tobuffer(uint16_t u16, unsigned char* dst, size_t dst_size); |
b0b8398b |
const char* entity_norm(struct entity_conv* conv,const unsigned char* entity); |
baedf04d |
const char* encoding_detect_bom(const unsigned char* bom, const size_t length); |
b3fc7f97 |
int encoding_normalize_toascii(const m_area_t* in_m_area, const char* initial_encoding, m_area_t* out_m_area); |
3506ac49 |
#endif
|