File: utf8.h

package info (click to toggle)
virtuoso-opensource 7.2.5.1%2Bdfsg1-0.3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 285,240 kB
  • sloc: ansic: 641,220; sql: 490,413; xml: 269,570; java: 83,893; javascript: 79,900; cpp: 36,927; sh: 31,653; cs: 25,702; php: 12,690; yacc: 10,227; lex: 7,601; makefile: 7,129; jsp: 4,523; awk: 1,697; perl: 1,013; ruby: 1,003; python: 326
file content (46 lines) | stat: -rw-r--r-- 1,313 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#ifndef __UTF8_H__
#define __UTF8_H__

/* utf8.h -- convert characters to/from UTF-8

  (c) 1998-2006 (W3C) MIT, ERCIM, Keio University
  See tidy.h for the copyright notice.

*/

#include "platform.h"
#include "buffio.h"

/* UTF-8 encoding/decoding support
** Does not convert character "codepoints", i.e. to/from 10646.
*/

int TY_(DecodeUTF8BytesToChar)( uint* c, uint firstByte, ctmbstr successorBytes,
                                TidyInputSource* inp, int* count );

int TY_(EncodeCharToUTF8Bytes)( uint c, tmbstr encodebuf,
                                TidyOutputSink* outp, int* count );


uint  TY_(GetUTF8)( ctmbstr str, uint *ch );
tmbstr TY_(PutUTF8)( tmbstr buf, uint c );

#define UNICODE_BOM_BE   0xFEFF   /* big-endian (default) UNICODE BOM */
#define UNICODE_BOM      UNICODE_BOM_BE
#define UNICODE_BOM_LE   0xFFFE   /* little-endian UNICODE BOM */
#define UNICODE_BOM_UTF8 0xEFBBBF /* UTF-8 UNICODE BOM */


Bool    TY_(IsValidUTF16FromUCS4)( tchar ucs4 );
Bool    TY_(IsHighSurrogate)( tchar ch );
Bool    TY_(IsLowSurrogate)( tchar ch );

Bool    TY_(IsCombinedChar)( tchar ch );
Bool    TY_(IsValidCombinedChar)( tchar ch );

tchar   TY_(CombineSurrogatePair)( tchar high, tchar low );
Bool    TY_(SplitSurrogatePair)( tchar utf16, tchar* high, tchar* low );



#endif /* __UTF8_H__ */