File: RegexParticipant.swift

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (98 lines) | stat: -rw-r--r-- 2,941 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//

import _StringProcessing
import RegexBuilder

/*

 TODO: We probably want to allow participants to register
 multiple variations or strategies.

 We have:

 1) DSL vs literal
 2) HareVM, TortoiseVM, transpile to PEG, transpile to
    MatchingEngine

*/


struct RegexDSLParticipant: Participant {
  static var name: String { "Regex DSL" }

    // Produce a function that will parse a grapheme break entry from a line
  static func graphemeBreakProperty() throws -> (String) -> GraphemeBreakEntry? {
    graphemeBreakPropertyData(forLine:)
  }
}

struct RegexLiteralParticipant: Participant {
  static var name: String { "Regex Literal" }

    // Produce a function that will parse a grapheme break entry from a line
  static func graphemeBreakProperty() throws -> (String) -> GraphemeBreakEntry? {
    graphemeBreakPropertyDataLiteral(forLine:)
  }
}

// MARK: - Regex literal

private func extractFromCaptures(
  _ match: (Substring, Substring, Substring?, Substring)
) -> GraphemeBreakEntry? {
  guard let lowerScalar = Unicode.Scalar(hex: match.1),
        let upperScalar = match.2.map(Unicode.Scalar.init(hex:)) ?? lowerScalar,
        let property = Unicode.GraphemeBreakProperty(match.3)
  else {
    return nil
  }
  return GraphemeBreakEntry(lowerScalar...upperScalar, property)
}

@inline(__always) // get rid of generic please
private func graphemeBreakPropertyData<RP: RegexComponent>(
  forLine line: String,
  using regex: RP
) -> GraphemeBreakEntry? where RP.RegexOutput == (Substring, Substring, Substring?, Substring) {
  line.wholeMatch(of: regex).map(\.output).flatMap(extractFromCaptures)
}

private func graphemeBreakPropertyDataLiteral(
  forLine line: String
) -> GraphemeBreakEntry? {
  let regex = try! Regex(
      #"([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*"#,
    as: (Substring, Substring, Substring?, Substring).self)
  return graphemeBreakPropertyData(forLine: line, using: regex)
}

// MARK: - Builder DSL

private func graphemeBreakPropertyData(
  forLine line: String
) -> GraphemeBreakEntry? {
  line.wholeMatch {
    TryCapture(OneOrMore(.hexDigit)) { Unicode.Scalar(hex: $0) }
    Optionally {
      ".."
      TryCapture(OneOrMore(.hexDigit)) { Unicode.Scalar(hex: $0) }
    }
    OneOrMore(.whitespace)
    ";"
    OneOrMore(.whitespace)
    TryCapture(OneOrMore(.word)) { Unicode.GraphemeBreakProperty($0) }
    ZeroOrMore(.any)
  }.map {
    let (_, lower, upper, property) = $0.output
    return GraphemeBreakEntry(lower...(upper ?? lower), property)
  }
}