File: ucm_write_simple.cc

package info (click to toggle)
libtranscript 0.3.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 18,272 kB
  • sloc: ansic: 223,135; cpp: 4,289; sh: 1,095; xml: 172; makefile: 70; lex: 44
file content (204 lines) | stat: -rw-r--r-- 7,287 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
/* Copyright (C) 2011 G.P. Halkes
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License version 3, as
   published by the Free Software Foundation.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <arpa/inet.h>
#include <cstdio>
#include <cstdlib>
#include <cstring>

#include "ucm2ltc.h"

static int unique;

static void write_byte_data(FILE *output, uint8_t *data, size_t size, int indent_level) {
  static const char tabs[] = "\t\t\t\t\t\t\t\t";
  size_t i;

  for (i = 0; i < size; i++) {
    if ((i & 0xf) == 0) {
      if (i != 0) fprintf(output, ",\n%.*s", indent_level, tabs);
    } else {
      fprintf(output, ", ");
    }
    fprintf(output, "0x%02x", data[i]);
  }
}

static void write_word_data(FILE *output, uint16_t *data, size_t size, int indent_level) {
  static const char tabs[] = "\t\t\t\t\t\t\t\t";
  size_t i;

  for (i = 0; i < size; i++) {
    if ((i & 0x7) == 0) {
      if (i != 0) fprintf(output, ",\n%.*s", indent_level, tabs);
    } else {
      fprintf(output, ", ");
    }
    fprintf(output, "0x%04x", data[i]);
  }
}

uint8_t *Ucm::write_simple_from_unicode(FILE *output) {
  uint8_t(*map)[32], *flag_data, *level0_indices;
  uint8_t level1_indices[64][32];
  int level0_map_used, level1_map_used;
  int i, j;
  vector<Mapping *>::const_iterator iter;

  map = (uint8_t(*)[32])safe_malloc(65536);
  level0_indices = (uint8_t *)safe_malloc(64);
  memset(map, 0, 65536);
  for (iter = simple_mappings.begin(); iter != simple_mappings.end(); iter++)
    ((uint8_t *)map)[(*iter)->codepoints[0]] = (*iter)->codepage_bytes[0];

  level1_map_used = 1;
  level1_indices[0][0] = 0;
  for (i = 1; i < 2048; i++) {
    for (j = 0; j < level1_map_used; j++) {
      if (memcmp(map[i], map[j], 32) == 0) {
        ((uint8_t *)level1_indices)[i] = j;
        break;
      }
    }
    if (j == level1_map_used) {
      if (i != j) memcpy(map[level1_map_used], map[i], 32);
      ((uint8_t *)level1_indices)[i] = j;
      level1_map_used++;
    }
  }
  /* This can only happen if all the mappings are in different 32-codepoint
     ranges. Although technically not impossible, this is seriously unlikely.
     So we don't check for it beforehand, but only make sure here that we
     don't generate a bogus table. */
  if (level1_map_used > 255) PANIC();

  level0_map_used = 1;
  level0_indices[0] = 0;
  for (i = 1; i < 64; i++) {
    for (j = 0; j < level0_map_used; j++) {
      if (memcmp(level1_indices[i], level1_indices[j], 32) == 0) {
        level0_indices[i] = j;
        break;
      }
    }
    if (j == level0_map_used) {
      if (i != j) memcpy(level1_indices[level0_map_used], level1_indices[i], 32);
      level0_indices[i] = j;
      level0_map_used++;
    }
  }

  fprintf(output, "static const uint8_t codepoint_to_byte_data_%d[%d][32] = {\n", unique,
          level1_map_used);
  for (i = 0; i < level1_map_used; i++) {
    if (i != 0) fprintf(output, " },\n");
    fprintf(output, "\t{ ");
    write_byte_data(output, map[i], 32, 2);
  }
  fprintf(output, " }\n};\n\n");
  fprintf(output, "static const uint8_t codepoint_to_byte_idx1_%d[%d][32] = {\n", unique,
          level0_map_used);
  for (i = 0; i < level0_map_used; i++) {
    if (i != 0) fprintf(output, " },\n");
    fprintf(output, "\t{ ");
    write_byte_data(output, level1_indices[i], 32, 2);
  }
  fprintf(output, " }\n};\n\n");
  free(map);

  if (used_from_unicode_flags & Mapping::FROM_UNICODE_FALLBACK) {
    flag_data = (uint8_t *)safe_malloc(level1_map_used * 4);
    memset(flag_data, 0, level1_map_used * 4);
    for (iter = simple_mappings.begin(); iter != simple_mappings.end(); iter++) {
      if ((*iter)->from_unicode_flags & Mapping::FROM_UNICODE_FALLBACK) {
        uint16_t codepoint = (*iter)->codepoints[0];
        uint16_t idx =
            ((uint16_t)level1_indices[level0_indices[codepoint >> 10]][(codepoint >> 5) & 0x1f]
             << 5) +
            (codepoint & 0x1f);
        flag_data[idx >> 3] |= 1 << (idx & 7);
      }
    }
    fprintf(output, "static const uint8_t codepoint_to_byte_flags_%d[%d] = {\n\t", unique,
            level1_map_used * 4);
    write_byte_data(output, flag_data, level1_map_used * 4, 1);
    fprintf(output, "\n};\n\n");
    free(flag_data);
  }
  return level0_indices;
}

void Ucm::write_simple(FILE *output) {
  uint16_t byte_to_codepoint[256];
  uint8_t *level0_indices;
  vector<Mapping *>::const_iterator iter;

  unique++;

  memset(byte_to_codepoint, 0xff, sizeof(byte_to_codepoint));
  for (iter = simple_mappings.begin(); iter != simple_mappings.end(); iter++) {
    if (!((*iter)->from_unicode_flags & Mapping::FROM_UNICODE_FALLBACK))
      byte_to_codepoint[(unsigned int)(*iter)->codepage_bytes[0]] = (*iter)->codepoints[0];
  }

  /* Set entries for illegal to 0xfffe */
  for (vector<Entry>::const_iterator entry_iter = codepage_states.front()->entries.begin();
       entry_iter != codepage_states.front()->entries.end(); entry_iter++) {
    if (entry_iter->action == ACTION_ILLEGAL) {
      for (int i = entry_iter->low; i <= entry_iter->high; i++) byte_to_codepoint[i] = 0xfffe;
    }
  }

  level0_indices = write_simple_from_unicode(output);
  fprintf(output, "static const sbcs_converter_v1_t sbcs_converter_%d = {\n", unique);
  if (used_from_unicode_flags & Mapping::FROM_UNICODE_FALLBACK)
    fprintf(output, "\tcodepoint_to_byte_flags_%d, ", unique);
  else
    fprintf(output, "\tNULL, ");
  fprintf(output, "codepoint_to_byte_data_%d, codepoint_to_byte_idx1_%d,\n", unique, unique);
  fprintf(output, "\t{ ");
  write_byte_data(output, level0_indices, 64, 2);
  fprintf(output, " },\n\t{ ");
  write_word_data(output, byte_to_codepoint, 256, 2);
  fprintf(output, " },\n\t{ ");

  if (used_to_unicode_flags & Mapping::TO_UNICODE_FALLBACK) {
    uint8_t byte_to_codepoint_flags[32];

    memset(byte_to_codepoint_flags, 0xff, sizeof(byte_to_codepoint_flags));
    for (iter = simple_mappings.begin(); iter != simple_mappings.end(); iter++) {
      if ((*iter)->to_unicode_flags & Mapping::TO_UNICODE_FALLBACK)
        byte_to_codepoint_flags[((unsigned int)(*iter)->codepage_bytes[0]) >> 3] =
            1 << ((*iter)->codepage_bytes[0] & 7);
    }
    write_byte_data(output, byte_to_codepoint_flags, 32, 2);
  } else {
    fprintf(output, "0");
  }
  vector<uint8_t> subchar;
  if (tag_values[Ucm::SUBCHAR].str == NULL)
    subchar.push_back(0);
  else
    parse_byte_sequence(tag_values[Ucm::SUBCHAR].str, subchar);
  fprintf(output, " },\n\t0x%02x, 0x%02x\n};\n\n", !!(flags & INTERNAL_TABLE), subchar[0]);

  fprintf(
      output,
      "TRANSCRIPT_EXPORT int transcript_get_iface_%s(void) { return TRANSCRIPT_SBCS_TABLE_V1; }\n",
      variant.normalized_id);
  fprintf(output,
          "TRANSCRIPT_EXPORT const sbcs_converter_v1_t *transcript_get_table_%s(void) { return "
          "&sbcs_converter_%d; }\n\n",
          variant.normalized_id, unique);
}