File: genctype.py

package info (click to toggle)
subversion 1.8.10-6%2Bdeb8u6
  • links: PTS, VCS
  • area: main
  • in suites: jessie
  • size: 62,080 kB
  • sloc: ansic: 795,684; python: 115,859; java: 17,742; sh: 13,590; ruby: 12,397; cpp: 11,206; lisp: 7,540; perl: 5,649; sql: 1,466; makefile: 1,110; xml: 577
file content (114 lines) | stat: -rwxr-xr-x 3,932 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python
#
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
"""getctype.py - Generate the svn_ctype character classification table.
"""

# Table of ASCII character names
names = ('nul', 'soh', 'stx', 'etx', 'eot', 'enq', 'ack', 'bel',
         'bs',  'ht',  'nl',  'vt',  'np',  'cr',  'so',  'si',
         'dle', 'dc1', 'dc2', 'dc3', 'dc4', 'nak', 'syn', 'etb',
         'can', 'em',  'sub', 'esc', 'fs',  'gs',  'rs',  'us',
         'sp',  '!',   '"',   '#',   '$',   '%',   '&',   '\'',
         '(',   ')',   '*',   '+',   ',',   '-',   '.',   '/',
         '0',   '1',   '2',   '3',   '4',   '5',   '6',   '7',
         '8',   '9',   ':',   ';',   '<',   '=',   '>',   '?',
         '@',   'A',   'B',   'C',   'D',   'E',   'F',   'G',
         'H',   'I',   'J',   'K',   'L',   'M',   'N',   'O',
         'P',   'Q',   'R',   'S',   'T',   'U',   'V',   'W',
         'X',   'Y',   'Z',   '[',   '\\',  ']',   '^',   '_',
         '`',   'a',   'b',   'c',   'd',   'e',   'f',   'g',
         'h',   'i',   'j',   'k',   'l',   'm',   'n',   'o',
         'p',   'q',   'r',   's',   't',   'u',   'v',   'w',
         'x',   'y',   'z',   '{',   '|',   '}',   '~',   'del')

# All whitespace characters:
#   horizontal tab, vertical tab, new line, form feed, carriage return, space
whitespace = (9, 10, 11, 12, 13, 32)

# Bytes not valid in UTF-8 sequences
utf8_invalid = (0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF)

print('    /* **** DO NOT EDIT! ****')
print('       This table was generated by genctype.py, make changes there. */')

for c in range(256):
    bits = []

    # Ascii subrange
    if c < 128:
        bits.append('SVN_CTYPE_ASCII')

        if len(names[c]) == 1:
            name = names[c].center(3)
        else:
            name = names[c].ljust(3)

        # Control characters
        if c < 32 or c == 127:
            bits.append('SVN_CTYPE_CNTRL')

        # Whitespace characters
        if c in whitespace:
            bits.append('SVN_CTYPE_SPACE')

        # Punctuation marks
        if c >= 33 and c < 48 \
           or c >= 58 and c < 65 \
           or c >= 91 and c < 97 \
           or c >= 123 and c < 127:
            bits.append('SVN_CTYPE_PUNCT')

        # Decimal digits
        elif c >= 48 and c < 58:
            bits.append('SVN_CTYPE_DIGIT')

        # Uppercase letters
        elif c >= 65 and c < 91:
            bits.append('SVN_CTYPE_UPPER')
            # Hexadecimal digits
            if c <= 70:
                bits.append('SVN_CTYPE_XALPHA')

        # Lowercase letters
        elif c >= 97 and c < 123:
            bits.append('SVN_CTYPE_LOWER')
            # Hexadecimal digits
            if c <= 102:
                bits.append('SVN_CTYPE_XALPHA')

    # UTF-8 multibyte sequences
    else:
        name = hex(c)[1:]

        # Lead bytes (start of sequence)
        if c > 0xC0 and c < 0xFE and c not in utf8_invalid:
            bits.append('SVN_CTYPE_UTF8LEAD')

        # Continuation bytes
        elif (c & 0xC0) == 0x80:
            bits.append('SVN_CTYPE_UTF8CONT')

    if len(bits) == 0:
        flags = '0'
    else:
        flags = ' | '.join(bits)
    print('    /* %s */ %s,' % (name, flags))