File: GraphemeBreaking.swift

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (61 lines) | stat: -rw-r--r-- 1,947 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

// Normalization tests are currently only available on Darwin, awaiting a sensible
// file API...
#if _runtime(_ObjC)
import Foundation

public struct GraphemeBreakTest {
  public let string: String
  public let pieces: [[Unicode.Scalar]]

  init?(line: some StringProtocol) {
    // Only look at actual tests
    guard line.hasPrefix("÷") else { return nil }

    let info = line.split(separator: "#")
    let components = info[0].split(separator: " ")

    var string = ""
    var pieces: [[Unicode.Scalar]] = []

    var piece: [Unicode.Scalar] = []
    for component in components {
      switch component {
      case "":
        break
      case "×": // no grapheme break opportunity
        break
      case "÷": // grapheme break opportunity
        guard !piece.isEmpty else { break }
        pieces.append(piece)
        piece = []
      case _: // hexadecimal scalar value
        guard let value = UInt32(component, radix: 16) else { return nil }
        guard let scalar = Unicode.Scalar(value) else { return nil }
        string.unicodeScalars.append(scalar)
        piece.append(scalar)
      }
    }
    if !piece.isEmpty { pieces.append(piece) }
    self.string = string
    self.pieces = pieces
  }
}

public let graphemeBreakTests: [GraphemeBreakTest] = {
  let testFile = readInputFile("GraphemeBreakTest.txt")
  return testFile.split(separator: "\n")
    .compactMap { GraphemeBreakTest(line: $0) }
}()
#endif