File: entconv.h

package info (click to toggle)
clamav 0.98.6%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie-kfreebsd
  • size: 54,844 kB
  • sloc: cpp: 267,090; ansic: 151,215; sh: 36,044; python: 2,630; makefile: 2,224; perl: 1,690; pascal: 1,218; lisp: 184; csh: 117; xml: 38; asm: 32; exp: 4
file content (59 lines) | stat: -rw-r--r-- 1,923 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
/*
 *  HTML Entity & Encoding normalization.
 *
 *  Copyright (C) 2007-2008 Sourcefire, Inc.
 *
 *  Authors: Török Edvin
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 *  MA 02110-1301, USA.
 */

#ifndef _ENTITIES_H
#define _ENTITIES_H

#include "cltypes.h"

#include "hashtab.h"

#define UCS4_1234 "UCS-4BE"
#define UCS4_4321 "UCS-4LE"
#define UCS4_2143 "UCS4"
#define UCS4_3412 "UCS-4"
#define UTF16_BE "UTF-16BE"
#define UTF16_LE "UTF-16LE"
#define UTF8     "UTF-8"
#define UNDECIDED_32_1234 UCS4_1234
#define UNDECIDED_32_4321 UCS4_4321
#define UNDECIDED_32_2143 UCS4_2143
#define UNDECIDED_32_3412 UCS4_3412
#define UNDECIDED_16_BE UTF16_BE
#define UNDECIDED_16_LE UTF16_LE
#define UNDECIDED_8 "ISO-8859-1"

#define MAX_ENTITY_SIZE 22

struct entity_conv {
	unsigned char entity_buff[MAX_ENTITY_SIZE+2];
};

enum encodings {E_UCS4,E_UTF16,E_UCS4_1234,E_UCS4_4321,E_UCS4_2143,E_UCS4_3412,E_UTF16_BE,E_UTF16_LE,E_UTF8, E_UNKNOWN,E_OTHER, E_ICONV};

unsigned char* u16_normalize_tobuffer(uint16_t u16, unsigned char* dst, size_t dst_size);
const char* entity_norm(struct entity_conv* conv,const unsigned char* entity);
const char* encoding_detect_bom(const unsigned char* bom, const size_t length);
int encoding_normalize_toascii(const m_area_t* in_m_area, const char* initial_encoding, m_area_t* out_m_area);

#endif