File: pa_charset.h

package info (click to toggle)
parser 3.5.1-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,520 kB
  • sloc: cpp: 35,302; sh: 15,643; ansic: 10,375; yacc: 1,378; makefile: 242
file content (195 lines) | stat: -rw-r--r-- 5,961 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
/** @file
	Parser: Charset connection decl.

	Copyright (c) 2001-2024 Art. Lebedev Studio (http://www.artlebedev.com)
	Authors: Konstantin Morshnev <moko@design.ru>, Alexandr Petrosian <paf@design.ru>
*/

#ifndef PA_CHARSET_H
#define PA_CHARSET_H

#define IDENT_PA_CHARSET_H "$Id: pa_charset.h,v 1.62 2025/06/28 15:38:04 moko Exp $"


#include "pa_exception.h"
#include "pa_common.h"
#include "pa_hash.h"
#include "pa_array.h"

#ifdef HAVE_PCRE2
#include <pcre2.h>
#else
#include <pcre.h>
#endif
// we are using some pcre_internal.h stuff as well
#include "../lib/pcre/pa_pcre_internal.h"

#ifdef XML
#include "libxml/xmlstring.h"
#include "libxml/encoding.h"
#endif

// defines

#define MAX_CHARSETS 10

#define MAX_CHARSET_UNI_CODES 500

#ifndef XMLCh 
	typedef unsigned int XMLCh;
#endif
#ifndef XMLByte
	typedef unsigned char XMLByte;
#endif

// helpers

typedef HashString<String::Body> HashStringString;

/**	charset holds name & transcode tables 
	registers libxml transcoders
*/
class Charset: public PA_Object {
public:

	Charset(Request_charsets* charsets, const String::Body ANAME, const String* afile_spec);
	
	const String::Body NAME() const { return FNAME; }
	const char* NAME_CSTR() const { return FNAME_CSTR; }

	bool isUTF8() const { return fisUTF8; }

	static String::C transcode(const String::C src, const Charset& source_charset, const Charset& dest_charset);
	static String::Body transcode(const String::Body src, const Charset& source_transcoder, const Charset& dest_transcoder);
	static String& transcode(const String& src, const Charset& source_transcoder, const Charset& dest_transcoder);
	static void transcode(ArrayString& src, const Charset& source_transcoder, const Charset& dest_transcoder);
	static void transcode(HashStringString& src, const Charset& source_transcoder, const Charset& dest_transcoder);

	static String::C escape(const String::C src, const Charset& source_charset);
	static String::Body escape(const String::Body src, const Charset& source_charset);
	static String& escape(const String& src, const Charset& source_charset);

	static String::C escape_JSON(const String::C src, const Charset& source_charset);
	static String::Body escape_JSON(const String::Body src, const Charset& source_charset);
	static String& escape_JSON(const String& src, const Charset& source_charset);

	void store_Char(XMLByte*& outPtr, XMLCh src, XMLByte not_found);

public:

	unsigned char pcre_tables[tables_length];

private:

	void load_definition(Request_charsets& charsets, const String& afile_spec);
	void sort_ToTable();

	const String::C transcodeToUTF8(const String::C src) const;
	const String::C transcodeFromUTF8(const String::C src) const;
	
	const String::C transcodeToCharset(const String::C src,
		const Charset& dest_transcoder) const;

public:

	struct Tables {
		struct Rec {
			XMLCh intCh;
			XMLByte extCh;
		};

		XMLCh fromTable[0x100];
		Rec toTable[MAX_CHARSET_UNI_CODES];
		uint toTableSize;
	};

	struct UTF8CaseTable {
		struct Rec {
			XMLCh from, to;
		};

		uint size;
		Rec* records;
	};

private:

	const String::Body FNAME;
	char* FNAME_CSTR;
	bool fisUTF8;
	Tables tables;

	static size_t calc_escaped_length_UTF8(XMLByte* src, size_t src_length);
	static size_t calc_escaped_length(const XMLByte* src, size_t src_length, const Charset::Tables& tables);
	static size_t calc_escaped_length(const String::C src, const Charset& source_charset);
	static size_t escape_UTF8(const XMLByte* src, size_t src_length, XMLByte* dest);
	static size_t escape(const XMLByte* src, size_t src_length, XMLByte* dest, const Charset::Tables& tables);

	static size_t calc_JSON_escaped_length_UTF8(XMLByte* src, size_t src_length);
	static size_t calc_JSON_escaped_length(const XMLByte* src, size_t src_length, const Charset::Tables& tables);
	static size_t calc_JSON_escaped_length(const String::C src, const Charset& source_charset);
	static size_t escape_JSON_UTF8(const XMLByte* src, size_t src_length, XMLByte* dest);
	static size_t escape_JSON(const XMLByte* src, size_t src_length, XMLByte* dest, const Charset::Tables& tables);

#ifdef XML

private:
	void addEncoding(char* name_cstr);

public:
	/// converts xmlChar* null-terminated string to char* 
	String::C transcode_cstr(const xmlChar* s);
	/// converts xmlChar* null-terminated string to parser String
	const String& transcode(const xmlChar* s);

	/** converts sized char*  to xmlChar*
		@returns xmlChar*  WHICH CALLER SHOULD FREE
	*/
	xmlChar* transcode_buf2xchar(const char* buf, size_t buf_size);
	/// converts parser String to xmlChar*
	xmlChar* transcode(const String& s);
	/// converts parser String::Body to xmlChar*
	xmlChar* transcode(const String::Body s);

private:

	xmlCharEncodingInputFunc ftranscoder_input;
	xmlCharEncodingOutputFunc ftranscoder_output;

#endif

};


// externs

extern Charset::UTF8CaseTable UTF8CaseToUpper;
extern Charset::UTF8CaseTable UTF8CaseToLower;
void change_case_UTF8(const XMLByte* srcData, size_t srcLen,
					XMLByte* toFill, size_t toFillLen,
					const Charset::UTF8CaseTable& table);
size_t getUTF8BytePos(const XMLByte* srcBegin, const XMLByte* srcEnd, size_t charPos/*position in characters*/);
size_t getUTF8CharPos(const XMLByte* srcBegin, const XMLByte* srcEnd, size_t bytePos/*position in bytes*/);
size_t lengthUTF8(const XMLByte* srcBegin, const XMLByte* srcEnd);
unsigned int lengthUTF8Char(const XMLByte c);

const char *fixUTF8(const char *src);

class UTF8_string_iterator {
	public:
		UTF8_string_iterator(const String& astring): fsrcPtr((XMLByte*)astring.cstr()), fsrcEnd(fsrcPtr + astring.length()) {}
		UTF8_string_iterator(XMLByte* asrcPtr, size_t length): fsrcPtr(asrcPtr), fsrcEnd(fsrcPtr + length) {}

		bool has_next();
		XMLCh next() { return fUTF8Char; }
		XMLByte getFirstByte(){ return ffirstByte; }
		size_t getCharSize(){ return fcharSize; }
	private:
		const XMLByte* fsrcPtr;
		const XMLByte* fsrcEnd;
		size_t fcharSize;
		XMLByte ffirstByte;
		XMLCh fUTF8Char;
};

#endif