File: hyphenator_aosp.h

package info (click to toggle)
chromium 138.0.7204.183-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 6,071,908 kB
  • sloc: cpp: 34,937,088; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,953; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,806; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (92 lines) | stat: -rw-r--r-- 3,625 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
/* ***** BEGIN LICENSE BLOCK *****
 *
 * Copyright (C) 2015 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * ***** END LICENSE BLOCK ***** */

#ifndef THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_HYPHENATION_HYPHENATOR_AOSP_H_
#define THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_HYPHENATION_HYPHENATOR_AOSP_H_

/**
 * An implementation of Liang's hyphenation algorithm.
 */

#include "base/memory/raw_ptr.h"
#include "third_party/blink/renderer/platform/wtf/vector.h"

namespace android {

// hyb file header; implementation details are in the .cpp file
struct Header;

class Hyphenator {
 public:
  // Note: this will also require a locale, for proper case folding behavior
  static Hyphenator* load(const uint16_t* patternData, size_t size);

  // Compute the hyphenation of a word, storing the hyphenation in result
  // vector. Each entry in the vector is a "hyphen edit" to be applied at the
  // corresponding code unit offset in the word. Currently 0 means no hyphen and
  // 1 means insert hyphen and break, but this will be expanded to other edits
  // for nonstandard hyphenation.  Example: word is "hyphen", result is [0 0 1 0
  // 0 0], corresponding to "hy-phen".
  void hyphenate(Vector<uint8_t>* result, const uint16_t* word, wtf_size_t len);

  // pattern data is in binary format, as described in doc/hyb_file_format.md.
  // Note: the caller is responsible for ensuring that the lifetime of the
  // pattern data is at least as long as the Hyphenator object.

  // Note: nullptr is valid input, in which case the hyphenator only processes
  // soft hyphens
  static Hyphenator* loadBinary(const uint8_t* patternData);

 private:
  // apply soft hyphens only, ignoring patterns
  void hyphenateSoft(uint8_t* result, const uint16_t* word, wtf_size_t len);

  // Try looking up word in alphabet table, return false if any code units fail
  // to map.  Note that this methor writes len+2 entries into alpha_codes
  // (including start and stop).
  bool alphabetLookup(uint16_t* alpha_codes,
                      const uint16_t* word,
                      wtf_size_t len);

  // calculate hyphenation from patterns, assuming alphabet lookup has already
  // been done
  void hyphenateFromCodes(uint8_t* result,
                          const uint16_t* codes,
                          wtf_size_t len);

  // TODO: these should become parameters, as they might vary by locale, screen
  // size, and possibly explicit user control.
  static const int MIN_PREFIX = 2;
  static const int MIN_SUFFIX = 3;

  // See also LONGEST_HYPHENATED_WORD in LineBreaker.cpp. Here the constant is
  // used so that temporary buffers can be stack-allocated without waste, which
  // is a slightly different use case. It measures UTF-16 code units.
  static const wtf_size_t MAX_HYPHENATED_SIZE = 64;

  raw_ptr<const uint8_t> patternData;

  // accessors for binary data
  const Header* getHeader() const {
    return reinterpret_cast<const Header*>(patternData.get());
  }
};

}  // namespace android

#endif  // THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_HYPHENATION_HYPHENATOR_AOSP_H_