File: tr.h

package info (click to toggle)
mcl 1%3A14-137-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd, stretch
  • size: 10,704 kB
  • ctags: 5,125
  • sloc: ansic: 53,217; sh: 4,448; perl: 3,967; makefile: 422
file content (170 lines) | stat: -rw-r--r-- 4,347 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
/*   (C) Copyright 2005, 2006, 2007, 2008, 2009 Stijn van Dongen
 *
 * This file is part of tingea.  You can redistribute and/or modify tingea under
 * the terms of the GNU General Public License; either version 3 of the License
 * or (at your option) any later version.  You should have received a copy of
 * the GPL along with tingea, in the file COPYING.
*/

#ifndef tingea_tr
#define tingea_tr

#include <string.h>

#include "ting.h"
#include "types.h"
#include "inttypes.h"


/*
 * README
 *    This interface is not POSIX compliant. It might evolve to
 *    optionally be indeed.
 *    However, given some of the braindeadliness of POSIX tr compliance,
 *    I don't think the worlds needs another tr implementation.
 *    My gripe is mainly about derailed syntax such as '[:alpha:0'.
 *    It should go down in a ball of flames, not happily parse.
 *    To be honest, I don't know for sure whether this is a POSIX
 *    lack of requirement or an implementation choice.
 *
 *    I did choose to follow most of the POSIX syntax. It is probably
 *    a sign of weakness.
 *    This interface should be able to do everything a POSIX interface can,
 *    possibly more.
 *
 * -  It allows separate specification of src, dst, del and squash sets.
 * -  Provisionally we accept "^spec" to indicate complement,
 *       for any of src dst del squash sets.
 * -  It uses [*c*20] to denote repeats, rather than [c*20].
 *       rationale: do not slam door shut on new syntax.
 * -  It does not recognize '[a-z]' ranges, only 'a-z'.
 *       rationale: none. If ever, notation will be [-a-z] or similar.
 * -  The magic repeat operator [*c#] stops on boundaries
 *       rationale: I like it.
 *       A boundary is introduced by stop/start of ranges and classes.
 * -  The magic repeat operator [*c*] does not stop on boundaries.
 * -  For now, the interface does 1) deletion, 2) translation, 3) squashing.
 *       in the future it may provide a custom order of doing things.
 * 
 *
 * Apart from the fact that you cannot have '\0' in C strings, everything
 * here should work for '\0' as well - specifically the mcxTrTable structure.
 * However, the current interface uses C strings for dst and src and C strings
 * for data.
 *
 * More documentation to follow.
 *
*/

extern const char* mcx_tr_err;
extern mcxbool     mcx_tr_debug;


typedef struct
{  u32      tlt[256]
;  mcxbits  modes
;
}  mcxTR    ;


#define MCX_TR_DEFAULT           0
#define MCX_TR_TRANSLATE   1 <<  1

#define MCX_TR_SOURCE      1 <<  2
#define MCX_TR_DEST        1 <<  3
#define MCX_TR_SQUASH      1 <<  4
#define MCX_TR_DELETE      1 <<  5

#define MCX_TR_SOURCE_C    1 <<  6
#define MCX_TR_DEST_C      1 <<  7
#define MCX_TR_DELETE_C    1 <<  8
#define MCX_TR_SQUASH_C    1 <<  9


#define MCX_TR_COMPLEMENT  1 << 10


mcxstatus mcxTRloadTable
(  mcxTR*      tr
,  const char* src
,  const char* dst
,  const char* set_delete
,  const char* set_squash
,  mcxbits     modes
)  ;


  /*  returns new length of string.
   *  fixme: document map/squash semantics.
  */
ofs mcxTRtranslate
(  char*    src
,  mcxTR*   tr
)  ;


ofs mcxTingTranslate
(  mcxTing*       src
,  mcxTR*         tr
)  ;

ofs mcxTingTr
(  mcxTing*       txt
,  const char*    src
,  const char*    dst
,  const char*    set_delete
,  const char*    set_squash
,  mcxbits        flags
)  ;


/* Accepts e.g. \012 and sets *value to 10.
 * idem \xa0 and \n (\t, \r, \b etc)
 * Does *not* yet accept \0xa0
 *
 * Returns next parsable character.
 *
 * This interface should be moved to ding.
*/

char* mcxStrEscapedValue
(  const char* p
,  const char* z
,  int   *value
)  ;



/* 
 * returns a ting containing all the characters according to bits.
 * bits accept
 *    MCX_TR_SOURCE
 *    MCX_TR_SOURCE_C
 *    MCX_TR_SQUASH
 *    MCX_TR_SQUASH_C
 *    MCX_TR_DELETE
 *    MCX_TR_DELETE_C
 *
 * NOTE
 *    MCX_TR_DEST
 *    MCX_TR_DEST_C
 *    are not yet implemented.
 *
 * NOTE DANGER SIGN
 *    tr no longer contains information on complements that were
 *    used in constructing it.
 *    The complements that bits refer to is simply the information
 *    present in tr.
 *    So a   source of "^a-z"  given to mcxTRloadTable
 *    and    MCX_TR_SOURCE_C given to mcxTRsplash
 *    result in a string containing all of a-z.
*/

mcxTing* mcxTRsplash
(  mcxTR*   tr
,  mcxbits  bits
)  ;


#endif