File: SCharClass.h

package info (click to toggle)
yudit 3.1.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 18,472 kB
  • sloc: cpp: 76,344; perl: 5,630; makefile: 989; ansic: 823; sh: 441
file content (173 lines) | stat: -rw-r--r-- 5,235 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/** 
 *  Yudit Unicode Editor Source File
 *
 *  GNU Copyright (C) 1997-2023  Gaspar Sinai <gaspar@yudit.org>  
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License, version 2,
 *  dated June 1991. See file COPYYING for details.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
#ifndef SCharClass_h
#define SCharClass_h

#include "stoolkit/STypes.h"

typedef enum 
{
  SD_CC_Xx=0,
  SD_CC_Lu,  // 01 Lu Letter, Uppercase
  SD_CC_Ll,  // 02 Ll Letter, Lowercase
  SD_CC_Lt,  // 03 Lt Letter, Titlecase
  SD_CC_Mn,  // 04 Mn Mark, Non-Spacing
  SD_CC_Mc,  // 05 Mc Mark, Spacing Combining
  SD_CC_Me,  // 06 Me Mark, Enclosing
  SD_CC_Nd,  // 07 Nd Number, Decimal Digit
  SD_CC_Nl,  // 08 Nl Number, Letter
  SD_CC_No,  // 09 No Number, Other
  SD_CC_Zs,  // 0A Zs Separator, Space
  SD_CC_Zl,  // 0B Zl Separator, Line
  SD_CC_Zp,  // 0C Zp Separator, Paragraph
  SD_CC_Cc,  // 0D Cc Other, Control
  SD_CC_Cf,  // 0E Cf Other, Format
  SD_CC_Cs,  // 0F Cs Other, Surrogate
  SD_CC_Co,  // 10 Co Other, Private Use
  SD_CC_Cn,  // 11 Cn Other, Not Assigned
  SD_CC_Lm,  // 12 Lm Letter, Modifier
  SD_CC_Lo,  // 13 Lo Letter, Other
  SD_CC_Pc,  // 14 Pc Punctuation, Connector
  SD_CC_Pd,  // 15 Pd Punctuation, Dash
  SD_CC_Ps,  // 16 Ps Punctuation, Open
  SD_CC_Pe,  // 17 Pe Punctuation, Close
  SD_CC_Pi,  // 18 Pi Punctuation, Initial quote
            // (may behave like Ps or Pe depending on usage)
  SD_CC_Pf,  // 19 Pf Punctuation, Final quote 
            // (may behave like Ps or Pe depending on usage)
  SD_CC_Po,  // 1A Po Punctuation, Other
  SD_CC_Sm,  // 1B Sm Symbol, Math
  SD_CC_Sc,  // 1C Sc Symbol, Currency
  SD_CC_Sk,  // 1D Sk Symbol, Modifier
  SD_CC_So,  // 1E So Symbol, Other
  SD_CC_MAX  // No more
} SD_CharClass;

/* BiDi class */
typedef enum 
{
  /* strong */
  SD_BC_XX=0,
  SD_BC_L, // Left-to-Right
  SD_BC_LRE, // Left-to-Right Embedding
  SD_BC_LRO, // Left-to-Right Override
  SD_BC_R, // Right-to-Left
  SD_BC_AL, // Right-to-Left Arabic
  SD_BC_RLE, // Right-to-Left Embedding
  SD_BC_RLO, // Right-to-Left Override

  /* weak */
  SD_BC_PDF, // Pop Directional Format
  SD_BC_EN,  // European Number
  SD_BC_ES, // European Number Separator
  SD_BC_ET, // European Number Terminator
  SD_BC_AN, // Arabic Number
  SD_BC_CS, // Common Number Separator
  SD_BC_NSM, // Non-Spacing Mark
  SD_BC_BN, // Boundary Neutral

  /* neutral */
  SD_BC_B,  // Paragraph Separator
  SD_BC_S, // Segment Separator
  SD_BC_WS, // Whitespace
  SD_BC_ON, // Other Neutrals
  SD_BC_MAX 

} SD_BiDiClass;

#define SD_CD_ZWSP 0x200B /* Zero width space */
#define SD_CD_ZWNJ 0x200C /* Zs */
#define SD_CD_ZWJ 0x200D  /* Cf */
#define SD_CD_ARABIC_TATWEEL 0x0640 
#define SD_CD_SYRIAC_LETTER_DALATH 0x0715 
#define SD_CD_SYRIAC_LETTER_DOTLESS_DALATH 0x0716 
#define SD_CD_SYRIAC_LETTER_RISH 0x072A 

#define SD_CD_CTRL 0
#define SD_CD_LF ((SS_UCS4)'\n')
#define SD_CD_FF ((SS_UCS4)'\f')
#define SD_CD_CR ((SS_UCS4)'\r')
#define SD_CD_TAB ((SS_UCS4)'\t')
#define SD_CD_LS 0x2028 /* line separator */
#define SD_CD_PS 0x2029 /* paragraph separator */

#define SD_CD_LRO 0x202D /* left- to-right override */
#define SD_CD_RLO 0x202E /* right-to-left override */
#define SD_CD_LRE 0x202A /* left-to-right embedding */
#define SD_CD_RLE 0x202B /* right-to-left embedding */
#define SD_CD_PDF 0x202C /* pop directional format */

#define SD_CD_LRM 0x200E /* LEFT-TO-RIGHT MARK */
#define SD_CD_RLM 0x200F /* RIGHT-TO-LEFT MARK */

/**
 * Line breaking characters in utf-8
 * NLF = one of SS_LB_DOS SS_LB_MAC SS_LB_UNIX SS_LB_NEL.
 */
#define SS_LB_DOS "\r\n"
#define SS_LB_MAC "\r"
#define SS_LB_UNIX "\n"
#define SS_LB_LS "\342\200\250"
#define SS_LB_PS "\342\200\251" /* PARAGRAPH BREAKING */
#define SS_LB_FF "\f"

#define SS_LB_LRO "\342\200\255"
#define SS_LB_RLO "\342\200\256"
#define SS_LB_LRE "\342\200\252"
#define SS_LB_RLE "\342\200\253"
#define SS_LB_PDF "\342\200\254"

/** 
 * These line breaking chars are not supported here now.
 */
#define SS_LB_NEL "\702\102"
#define SS_LB_P_VT "\013" /* PARAGRAPH BREAKING */
#define SS_LB_P_FF "\014" /* PARAGRAPH BREAKING */


extern const char* ssCharClass[SD_CC_MAX];
extern const char* ssBiDiClass[SD_BC_MAX];

SD_CharClass getCharClass(SS_UCS4 in);
SD_BiDiClass getBiDiClass(SS_UCS4 in);
SS_UCS4 getMirroredCharacter (SS_UCS4 in);

typedef enum {
   SS_PS_None=0,
   SS_PS_LF,
   SS_PS_CR,
   SS_PS_CRLF,
   SS_PS_PS
} SS_ParaSep;


typedef enum {
  SS_EmbedNone=0, SS_EmbedLeft, SS_EmbedRight
} SS_Embedding;

typedef enum {
  SS_DR_L, /* L-R character */
  SS_DR_R, /* R-L character */
  SS_DR_LE, /* inside L embedded */
  SS_DR_RE, /* inside R embedded */
  SS_DR_LO, /* inside L override */
  SS_DR_RO  /* inside R override */
} SS_DR_Dir;

#endif /*SCharClass_h*/