File: mathml_operator_dictionary.cc

package info (click to toggle)
chromium 138.0.7204.183-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 6,071,908 kB
  • sloc: cpp: 34,937,088; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,953; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,806; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (211 lines) | stat: -rw-r--r-- 9,892 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
// Copyright 2020 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "third_party/blink/renderer/platform/text/mathml_operator_dictionary.h"

#include "base/compiler_specific.h"
#include "base/notreached.h"
#include "third_party/blink/renderer/platform/wtf/text/character_names.h"

namespace blink {

namespace {

// https://w3c.github.io/mathml-core/#operator-dictionary-compact-special-tables
const char* operators_2_ascii_chars[] = {"!!", "!=", "&&", "**", "*=", "++",
                                         "+=", "--", "-=", "->", "//", "/=",
                                         ":=", "<=", "<>", "==", ">=", "||"};

// https://w3c.github.io/mathml-core/#operator-dictionary-categories-hexa-table
struct EntryRange {
  uint16_t entry;
  unsigned range_bounds_delta : 4;
};
static inline uint16_t ExtractKey(const EntryRange& range) {
  return range.entry & 0x3FFF;
}
static inline uint16_t ExtractCategory(const EntryRange& range) {
  return range.entry >> 12;
}

// The following representation is taken from the spec, and reduces storage
// requirements by mapping codepoints and category to better make use of the
// available bytes. For details see
// https://w3c.github.io/mathml-core/#operator-dictionary.
// It was automatically generated from the spec's script:
// https://github.com/w3c/mathml-core/blob/main/tables/operator-dictionary.py
static const EntryRange compact_dictionary[] = {
    {0x8025, 0},  {0x802A, 0},  {0x402B, 0},  {0x402D, 0},  {0x802E, 0},
    {0x402F, 0},  {0x803F, 1},  {0xC05C, 0},  {0x805E, 0},  {0xC05F, 0},
    {0x40B1, 0},  {0x80B7, 0},  {0x80D7, 0},  {0x40F7, 0},  {0x4322, 0},
    {0x8323, 0},  {0x832E, 0},  {0x8422, 0},  {0x8443, 0},  {0x4444, 0},
    {0xC461, 3},  {0x0590, 5},  {0x059A, 15}, {0x05AA, 4},  {0x05B0, 5},
    {0x05B9, 0},  {0x05BC, 15}, {0x05CC, 9},  {0x05DA, 15}, {0x05EA, 6},
    {0x05F3, 12}, {0xC606, 0},  {0x4612, 4},  {0x8617, 2},  {0x4627, 3},
    {0x4636, 0},  {0x4638, 0},  {0x8640, 0},  {0x468C, 2},  {0x4693, 3},
    {0x8697, 0},  {0x4698, 0},  {0x8699, 2},  {0x469D, 2},  {0x86A0, 1},
    {0x86BA, 0},  {0x46BB, 2},  {0x86C4, 3},  {0x86C9, 3},  {0x46CE, 1},
    {0x46D2, 1},  {0x8705, 1},  {0x0B94, 0},  {0x4B95, 2},  {0x0B99, 0},
    {0x0B9B, 6},  {0x0BA5, 1},  {0x0BA8, 7},  {0x0BB1, 0},  {0x0BB3, 0},
    {0x0BB5, 0},  {0x0BB8, 0},  {0x0BBA, 4},  {0x8BCB, 0},  {0x8BCD, 0},
    {0x0BF0, 1},  {0x0BF4, 11}, {0x0D00, 15}, {0x0D10, 15}, {0x0D20, 0},
    {0x0D34, 3},  {0x0D42, 15}, {0x0D52, 15}, {0x0D62, 15}, {0x0D72, 3},
    {0x0D7C, 3},  {0x4DB8, 0},  {0x4DBC, 0},  {0x4DC4, 1},  {0x8DC6, 2},
    {0x8DD4, 3},  {0x8DE2, 0},  {0x4DF5, 6},  {0x8E1D, 1},  {0x4E1F, 15},
    {0x8E2F, 8},  {0x4E38, 2},  {0x8E3B, 2},  {0x4E3E, 0},  {0x8E3F, 0},
    {0x4E40, 15}, {0x8E50, 0},  {0x4E51, 15}, {0x4E61, 2},  {0x8E64, 1},
    {0x4EDB, 0},  {0x8EDC, 1},  {0x4EF6, 0},  {0x4EFB, 0},  {0x4EFD, 0},
    {0x8EFE, 0},  {0x0F04, 3},  {0x0F0C, 5},  {0x0F30, 14}, {0x0F40, 12},
    {0x0F60, 5},  {0x0F6A, 3},  {0x0F70, 3},  {0x0F7A, 3},  {0x0F80, 7},
    {0x0F95, 0},  {0x0FA0, 15}, {0x0FB8, 0},  {0x1021, 0},  {0x5028, 0},
    {0x102B, 0},  {0x102D, 0},  {0x505B, 0},  {0x507B, 1},  {0x10AC, 0},
    {0x10B1, 0},  {0x1331, 0},  {0x5416, 0},  {0x1418, 0},  {0x141C, 0},
    {0x1600, 1},  {0x1603, 1},  {0x1607, 0},  {0xD60F, 2},  {0x1612, 1},
    {0x161F, 3},  {0x962B, 8},  {0x1634, 1},  {0x163C, 0},  {0x16BE, 1},
    {0xD6C0, 3},  {0x5708, 0},  {0x570A, 0},  {0x1710, 0},  {0x1719, 0},
    {0x5729, 0},  {0x5B72, 0},  {0x1B95, 1},  {0x1BC0, 0},  {0x5BE6, 0},
    {0x5BE8, 0},  {0x5BEA, 0},  {0x5BEC, 0},  {0x5BEE, 0},  {0x5D80, 0},
    {0x5D83, 0},  {0x5D85, 0},  {0x5D87, 0},  {0x5D89, 0},  {0x5D8B, 0},
    {0x5D8D, 0},  {0x5D8F, 0},  {0x5D91, 0},  {0x5D93, 0},  {0x5D95, 0},
    {0x5D97, 0},  {0x5D99, 0},  {0x1D9B, 15}, {0x1DAB, 4},  {0x5DD8, 0},
    {0x5DDA, 0},  {0x5DFC, 0},  {0xDE00, 10}, {0x9E0B, 15}, {0x9E1B, 1},
    {0xDE1D, 1},  {0x1EEC, 1},  {0xDEFC, 0},  {0xDEFF, 0},  {0x2021, 1},
    {0x2025, 2},  {0x6029, 0},  {0x605D, 0},  {0xA05E, 1},  {0x2060, 0},
    {0x607C, 1},  {0xA07E, 0},  {0x20A8, 0},  {0xA0AF, 0},  {0x20B0, 0},
    {0x20B2, 2},  {0x20B8, 1},  {0xA2C6, 1},  {0xA2C9, 0},  {0x22CA, 1},
    {0xA2CD, 0},  {0x22D8, 2},  {0xA2DC, 0},  {0x22DD, 0},  {0xA2F7, 0},
    {0xA302, 0},  {0x2311, 0},  {0x2320, 0},  {0x2325, 0},  {0x2327, 0},
    {0x2331, 0},  {0x6416, 0},  {0x2419, 2},  {0x241D, 2},  {0x2432, 5},
    {0xA43E, 0},  {0x2457, 0},  {0x24DB, 1},  {0x6709, 0},  {0x670B, 0},
    {0xA722, 1},  {0x672A, 0},  {0xA7B4, 1},  {0x27CD, 0},  {0xA7DC, 5},
    {0x6B73, 0},  {0x6BE7, 0},  {0x6BE9, 0},  {0x6BEB, 0},  {0x6BED, 0},
    {0x6BEF, 0},  {0x6D80, 0},  {0x6D84, 0},  {0x6D86, 0},  {0x6D88, 0},
    {0x6D8A, 0},  {0x6D8C, 0},  {0x6D8E, 0},  {0x6D90, 0},  {0x6D92, 0},
    {0x6D94, 0},  {0x6D96, 0},  {0x6D98, 1},  {0x6DD9, 0},  {0x6DDB, 0},
    {0x6DFD, 0}};

}  // namespace

MathMLOperatorDictionaryCategory FindCategory(
    const String& content,
    MathMLOperatorDictionaryForm form) {
  DCHECK(!content.Is8Bit());
  // Handle special cases and calculate a BMP code point used for the key.
  uint16_t key{0};
  if (content.length() == 1) {
    UChar32 character = content[0];
    if (character < kCombiningMinusSignBelow ||
        character > kGreekCapitalReversedDottedLunateSigmaSymbol) {
      // Accept BMP characters that are not in the ranges where 2-ASCII-chars
      // operators are mapped below.
      key = character;
    }
  } else if (content.length() == 2) {
    UChar32 character = content.CharacterStartingAt(0);
    if (character == kArabicMathematicalOperatorMeemWithHahWithTatweel ||
        character == kArabicMathematicalOperatorHahWithDal) {
      // Special handling of non-BMP Arabic operators.
      if (form == MathMLOperatorDictionaryForm::kPostfix)
        return MathMLOperatorDictionaryCategory::kI;
      return MathMLOperatorDictionaryCategory::kNone;
    } else if (content[1] == kCombiningLongSolidusOverlay ||
               content[1] == kCombiningLongVerticalLineOverlay) {
      // If the second character is COMBINING LONG SOLIDUS OVERLAY or
      // COMBINING LONG VERTICAL LINE OVERLAY, then use the property of the
      // first character.
      key = content[0];
    } else {
      // Perform a binary search for 2-ASCII-chars operators.
      const char** last = UNSAFE_TODO(operators_2_ascii_chars +
                                      std::size(operators_2_ascii_chars));
      const char** entry = std::lower_bound(
          operators_2_ascii_chars, last, content,
          [](const char* lhs, const String& rhs) -> bool {
            return lhs[0] < rhs[0] ||
                   (lhs[0] == rhs[0] && UNSAFE_TODO(lhs[1] < rhs[1]));
          });
      if (entry != last && content == *entry)
        key = kCombiningMinusSignBelow + (entry - operators_2_ascii_chars);
    }
  }

  if (!key)
    return MathMLOperatorDictionaryCategory::kNone;

  // Handle special categories that are not encoded in the compact dictionary.
  // https://w3c.github.io/mathml-core/#operator-dictionary-categories-values
  if (form == MathMLOperatorDictionaryForm::kInfix &&
      (key == kVerticalLineCharacter || key == kTildeOperatorCharacter)) {
    return MathMLOperatorDictionaryCategory::kForceDefault;
  }
  if (form == MathMLOperatorDictionaryForm::kPrefix &&
      ((kDoubleStruckItalicCapitalDCharacter <= key &&
        key <= kDoubleStruckItalicSmallDCharacter) ||
       key == kPartialDifferential ||
       (kSquareRootCharacter <= key && key <= kFourthRootCharacter))) {
    return MathMLOperatorDictionaryCategory::kL;
  }
  if (form == MathMLOperatorDictionaryForm::kInfix &&
      (key == kComma || key == kColon || key == kSemiColon)) {
    return MathMLOperatorDictionaryCategory::kM;
  }
  // Calculate the key for the compact dictionary.
  if (kEnQuadCharacter <= key && key <= kHellschreiberPauseSymbol) {
    // Map above range (U+2000–U+2BFF) to (U+0400-0x0FFF) to fit into
    // 12 bits by decrementing with (U+2000 - U+0400) == 0x1C00.
    key -= 0x1C00;
  } else if (key > kGreekCapitalReversedDottedLunateSigmaSymbol) {
    return MathMLOperatorDictionaryCategory::kNone;
  }
  // Bitmasks used to set form 2-bits (infix=00, prefix=01, postfix=10).
  if (form == MathMLOperatorDictionaryForm::kPrefix)
    key |= 0x1000;
  else if (form == MathMLOperatorDictionaryForm::kPostfix)
    key |= 0x2000;
  DCHECK_LE(key, 0x2FFF);

  // Perform a binary search on the compact dictionary.
  const EntryRange* entry_range = std::upper_bound(
      compact_dictionary,
      UNSAFE_TODO(compact_dictionary + std::size(compact_dictionary)), key,
      [](uint16_t lhs, EntryRange rhs) -> bool {
        return lhs < ExtractKey(rhs);
      });

  if (entry_range == compact_dictionary)
    return MathMLOperatorDictionaryCategory::kNone;
  UNSAFE_TODO(entry_range--);

  DCHECK_LE(ExtractKey(*entry_range), key);
  if (key > (ExtractKey(*entry_range) + entry_range->range_bounds_delta))
    return MathMLOperatorDictionaryCategory::kNone;

  // An entry is found: set the properties according the category.
  // https://w3c.github.io/mathml-core/#operator-dictionary-categories-values
  switch (ExtractCategory(*entry_range)) {
    case 0x0:
      return MathMLOperatorDictionaryCategory::kA;
    case 0x4:
      return MathMLOperatorDictionaryCategory::kB;
    case 0x8:
      return MathMLOperatorDictionaryCategory::kC;
    case 0x1:
    case 0x2:
    case 0xC:
      return MathMLOperatorDictionaryCategory::kDorEorK;
    case 0x5:
    case 0x6:
      return MathMLOperatorDictionaryCategory::kForG;
    case 0x9:
      return MathMLOperatorDictionaryCategory::kH;
    case 0xA:
      return MathMLOperatorDictionaryCategory::kI;
    case 0xD:
      return MathMLOperatorDictionaryCategory::kJ;
  }

  NOTREACHED();
}

}  // namespace blink