File: shmbchar.h

package info (click to toggle)
bash 5.3-2
  • links: PTS
  • area: main
  • in suites: forky, sid
  • size: 44,432 kB
  • sloc: ansic: 134,747; sh: 8,866; yacc: 5,966; makefile: 4,697; perl: 4,105; asm: 48; awk: 23; sed: 16
file content (133 lines) | stat: -rw-r--r-- 5,456 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/* Multibyte character data type.
   Copyright (C) 2001, 2005-2007, 2009-2010, 2021,2024 Free Software Foundation, Inc.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

/* Written by Bruno Haible <bruno@clisp.org>.  */

#ifndef _SHMBCHAR_H
#define _SHMBCHAR_H 1

#if defined (HANDLE_MULTIBYTE)

#include <string.h>

/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
   <wchar.h>.
   BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before
   <wchar.h>.  */
#include <stdio.h>
#include <time.h>
#include <wchar.h>
#include <wctype.h>

/* is_basic(c) tests whether the single-byte character c is
   - in the ISO C "basic character set" or is one of '@', '$', and '`'
     which ISO C 23 ยง 5.2.1.1.(1) guarantees to be single-byte and in
     practice are safe to treat as basic in the execution character set,
     or 
   - in the POSIX "portable character set", which
     <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap06.html>
     equally guarantees to be single-byte. */
                               
#if (' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
    && ('$' == 36) && ('%' == 37) && ('&' == 38) && ('\'' == 39) \
    && ('(' == 40) && (')' == 41) && ('*' == 42) && ('+' == 43) \
    && (',' == 44) && ('-' == 45) && ('.' == 46) && ('/' == 47) \
    && ('0' == 48) && ('1' == 49) && ('2' == 50) && ('3' == 51) \
    && ('4' == 52) && ('5' == 53) && ('6' == 54) && ('7' == 55) \
    && ('8' == 56) && ('9' == 57) && (':' == 58) && (';' == 59) \
    && ('<' == 60) && ('=' == 61) && ('>' == 62) && ('?' == 63) \
    && ('@' == 64) && ('A' == 65) && ('B' == 66) && ('C' == 67) \
    && ('D' == 68) && ('E' == 69) && ('F' == 70) && ('G' == 71) \
    && ('H' == 72) && ('I' == 73) && ('J' == 74) && ('K' == 75) \
    && ('L' == 76) && ('M' == 77) && ('N' == 78) && ('O' == 79) \
    && ('P' == 80) && ('Q' == 81) && ('R' == 82) && ('S' == 83) \
    && ('T' == 84) && ('U' == 85) && ('V' == 86) && ('W' == 87) \
    && ('X' == 88) && ('Y' == 89) && ('Z' == 90) && ('[' == 91) \
    && ('\\' == 92) && (']' == 93) && ('^' == 94) && ('_' == 95) \
    && ('`' == 96) && ('a' == 97) && ('b' == 98) && ('c' == 99) \
    && ('d' == 100) && ('e' == 101) && ('f' == 102) && ('g' == 103) \
    && ('h' == 104) && ('i' == 105) && ('j' == 106) && ('k' == 107) \
    && ('l' == 108) && ('m' == 109) && ('n' == 110) && ('o' == 111) \
    && ('p' == 112) && ('q' == 113) && ('r' == 114) && ('s' == 115) \
    && ('t' == 116) && ('u' == 117) && ('v' == 118) && ('w' == 119) \
    && ('x' == 120) && ('y' == 121) && ('z' == 122) && ('{' == 123) \
    && ('|' == 124) && ('}' == 125) && ('~' == 126)
/* The character set is ISO-646, not EBCDIC. */
# define IS_BASIC_ASCII 1

extern const unsigned int is_basic_table[];

static inline int
is_basic (char c)
{
  return (is_basic_table [(unsigned char) c >> 5] >> ((unsigned char) c & 31))
         & 1;
}

#if 0
/* XXX - FUTURE */
/* All locale encodings (see localcharset.h) map the characters 0x00..0x7F
   to U+0000..U+007F, like ASCII, except for
     CP864      different mapping of '%'
     SHIFT_JIS  different mappings of 0x5C, 0x7E
     JOHAB      different mapping of 0x5C
   However, these characters in the range 0x20..0x7E are in the ISO C
   "basic character set" and in the POSIX "portable character set", which
   ISO C and POSIX guarantee to be single-byte.  Thus, locales with these
   encodings are not POSIX compliant.  And they are most likely not in use
   any more (as of 2023).  */
#define is_basic(c) ((unsigned char) (c) < 0x80)
#endif

#else

static inline int
is_basic (char c)
{
  switch (c)
    {
    case '\b': case '\r': case '\n':
    case '\t': case '\v': case '\f':
    case ' ': case '!': case '"': case '#': case '$': case '%':
    case '&': case '\'': case '(': case ')': case '*':
    case '+': case ',': case '-': case '.': case '/':
    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
    case ':': case ';': case '<': case '=': case '>':
    case '?': case '@':
    case 'A': case 'B': case 'C': case 'D': case 'E':
    case 'F': case 'G': case 'H': case 'I': case 'J':
    case 'K': case 'L': case 'M': case 'N': case 'O':
    case 'P': case 'Q': case 'R': case 'S': case 'T':
    case 'U': case 'V': case 'W': case 'X': case 'Y':
    case 'Z':
    case '[': case '\\': case ']': case '^': case '_': case '`':
    case 'a': case 'b': case 'c': case 'd': case 'e':
    case 'f': case 'g': case 'h': case 'i': case 'j':
    case 'k': case 'l': case 'm': case 'n': case 'o':
    case 'p': case 'q': case 'r': case 's': case 't':
    case 'u': case 'v': case 'w': case 'x': case 'y':
    case 'z': case '{': case '|': case '}': case '~':
      return 1;
    default:
      return 0;
    }
}

#endif

#endif /* HANDLE_MULTIBYTE */
#endif /* _SHMBCHAR_H */