File: composition.c

package info (click to toggle)
libunistring 0.9.10-4
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 28,668 kB
  • sloc: ansic: 91,149; perl: 15,827; sh: 7,478; makefile: 367; lisp: 308
file content (96 lines) | stat: -rw-r--r-- 3,285 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
/* Canonical composition of Unicode characters.
   Copyright (C) 2002, 2006, 2009, 2011-2018 Free Software Foundation, Inc.
   Written by Bruno Haible <bruno@clisp.org>, 2009.

   This program is free software: you can redistribute it and/or
   modify it under the terms of either:

     * the GNU Lesser General Public License as published by the Free
       Software Foundation; either version 3 of the License, or (at your
       option) any later version.

   or

     * the GNU General Public License as published by the Free
       Software Foundation; either version 2 of the License, or (at your
       option) any later version.

   or both in parallel, as here.
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

#include <config.h>

/* Specification.  */
#include "uninorm.h"

#include <string.h>

struct composition_rule { char codes[6]; unsigned int combined; };

#include "composition-table.h"

ucs4_t
uc_composition (ucs4_t uc1, ucs4_t uc2)
{
  if (uc1 < 0x12000 && uc2 < 0x12000)
    {
      if (uc2 >= 0x1161 && uc2 < 0x1161 + 21
          && uc1 >= 0x1100 && uc1 < 0x1100 + 19)
        {
          /* Hangul: Combine single letter L and single letter V to form
             two-letter syllable LV.  */
          return 0xAC00 + ((uc1 - 0x1100) * 21 + (uc2 - 0x1161)) * 28;
        }
      else if (uc2 > 0x11A7 && uc2 < 0x11A7 + 28
               && uc1 >= 0xAC00 && uc1 < 0xD7A4 && ((uc1 - 0xAC00) % 28) == 0)
        {
          /* Hangul: Combine two-letter syllable LV with single-letter T
             to form three-letter syllable LVT.  */
          return uc1 + (uc2 - 0x11A7);
        }
      else
        {
#if 0
          unsigned int uc = MUL1 * uc1 * MUL2 * uc2;
          unsigned int index1 = uc >> composition_header_0;
          if (index1 < composition_header_1)
            {
              int lookup1 = u_composition.level1[index1];
              if (lookup1 >= 0)
                {
                  unsigned int index2 = (uc >> composition_header_2) & composition_header_3;
                  int lookup2 = u_composition.level2[lookup1 + index2];
                  if (lookup2 >= 0)
                    {
                      unsigned int index3 = (uc & composition_header_4);
                      unsigned int lookup3 = u_composition.level3[lookup2 + index3];
                      if ((lookup3 >> 16) == uc2)
                        return lookup3 & ((1U << 16) - 1);
                    }
                }
            }
#else
          char codes[6];
          const struct composition_rule *rule;

          codes[0] = (uc1 >> 16) & 0xff;
          codes[1] = (uc1 >> 8) & 0xff;
          codes[2] = uc1 & 0xff;
          codes[3] = (uc2 >> 16) & 0xff;
          codes[4] = (uc2 >> 8) & 0xff;
          codes[5] = uc2 & 0xff;

          rule = gl_uninorm_compose_lookup (codes, 6);
          if (rule != NULL)
            return rule->combined;
#endif
        }
    }
  return 0;
}