File: make_ci.pike

package info (click to toggle)
pike8.0 8.0.702-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 79,608 kB
  • sloc: ansic: 266,508; xml: 186,324; makefile: 3,537; sh: 1,731; cpp: 1,328; lisp: 655; awk: 441; asm: 242; objc: 240; pascal: 157; perl: 34; sed: 34
file content (155 lines) | stat: -rw-r--r-- 3,909 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#! /usr/bin/env pike

/*
 * Creates the file case_info.h
 *
 * Henrik Grubbstr�m 1999-03-20
 */

#pragma strict_types

#define CIM_NONE	0	/* Case-less */
#define CIM_UPPERDELTA	1	/* Upper-case, delta to lower-case in data */
#define CIM_LOWERDELTA	2	/* Lower-case, -delta to upper-case in data */
#define CIM_CASEBIT	3	/* Some case, case bit in data */
#define CIM_CASEBITOFF	4	/* Same as above, but also offset by data */
#define CIM_LONGUPPERDELTA 5    /* Upper-case, delta + 0x7fff. */
#define CIM_LONGLOWERDELTA 6    /* Lower-case, delta + 0x7fff. */

int main(int argc, array(string) argv)
{
  int lineno;
  array(array(int)) ci = ({({ 0, CIM_NONE, 0 })});
  int prevchar = 0;

  if (argc < 2 || argv[1]=="--help" ) {
    werror("Creates case info file by reading the unicode database from\n"
	   "stdin and outputs it to a file.\n"
	   "\n"
	   "Usage: make_ci.pike output_file.h\n");
    exit(1);
  }

  string data = Stdio.stdin.read();

  foreach(data/"\n", string line) {
    lineno++;
    line -= "\r";
    array(string) info = line/";";

    if (!sizeof(line)) continue;

    if (sizeof(info) != 15) {
      werror("Syntax error on line %d: "
             "Bad number of fields:%d (expected 15)\n"
             "%O\n",
             lineno, sizeof(info), line);
      exit(1);
    }
    int char;
    sscanf(info[0], "%x", char);
#if 1
    // Hardcoded in builtin_functions.c
    if(char && char<='z')
      continue;
#endif
    int mode = CIM_NONE;
    int d;
    if (sizeof(info[13])) {
      // Upper-case char
      mode = CIM_UPPERDELTA;
      sscanf(info[13], "%x", d);
      int delta = d - char;
      if (!(delta & (delta - 1)) && (delta > 0)) {
	if (d & delta) {
	  mode = CIM_CASEBIT;
	} else {
	  mode = CIM_CASEBITOFF;
	}
      }
      else if( delta>0x7fff ) {
        mode = CIM_LONGUPPERDELTA;
        delta -= 0x7fff;
      }
      else if( delta<-0x8000 ) {
        mode = CIM_LONGUPPERDELTA;
        delta += 0x8000;
      }
      d = delta;
    } else if (sizeof(info[14])) {
      // Lower-case char
      mode = CIM_LOWERDELTA;
      sscanf(info[14], "%x", d);
      int delta = char - d;
      if (!(delta & (delta - 1)) && (delta > 0)) {
	if (char & delta) {
	  mode = CIM_CASEBIT;
	} else {
	  mode = CIM_CASEBITOFF;
	}
      }
      else if( delta>0x7fff ) {
        mode = CIM_LONGLOWERDELTA;
        delta -= 0x7fff;
      }
      else if( delta<-0x8000 ) {
        mode = CIM_LONGLOWERDELTA;
        delta += 0x8000;
      }
      d = delta;
    }

    if ((char > prevchar+1) && (ci[-1][1] != CIM_NONE)) {
      // Add a NONE-range.
      ci += ({({ prevchar+1, CIM_NONE, 0 })});
    }

    if ((ci[-1][1] != mode) || (ci[-1][2] != d)) {
      // New range.
      ci += ({({ char, mode, d })});
    }

    prevchar = char;
  }

  array(string) table = allocate(sizeof(ci));

  for (int i = 0; i < sizeof (ci); i++) {
    array(int) info = ci[i];
    if ((info[2] <= -0x8000) || (info[2] > 0x7fff)) {
      werror("Case information out of range for shorts: %d\n", info[2]);
      exit(1);
    }
    table[i] =
      sprintf("{ 0x%06x, %s, %s0x%04x, },\n",
	      info[0],
	      ({ "CIM_NONE", "CIM_UPPERDELTA", "CIM_LOWERDELTA",
		 "CIM_CASEBIT", "CIM_CASEBITOFF",
                 "CIM_LONGUPPERDELTA", "CIM_LONGLOWERDELTA" })[info[1]],
	      (info[2]<0)?"-":"",
	      (info[2]<0)?-info[2]:info[2]);
  }

  Stdio.File outfile = Stdio.File(argv[1], "wct");

  outfile->
    write("/*\n"
          " * Created by make_ci.pike\n"
          " * on %s"
          " *\n"
          " * Table used for looking up the case of\n"
          " * Unicode characters.\n"
          " *\n"
          " * Henrik Grubbstr�m 1999-03-20\n"
          " */\n\n", ctime(time()));

  map(table, outfile->write);

  for (lineno=0; lineno<sizeof(ci); lineno++)
    if (ci[lineno][0] > 0xff)
      break;

  outfile->write("\n\n#define CASE_INFO_SHIFT0_HIGH 0x%04x\n", lineno);

  exit(0);
}