1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
|
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//
import _StringProcessing
import RegexBuilder
/*
TODO: We probably want to allow participants to register
multiple variations or strategies.
We have:
1) DSL vs literal
2) HareVM, TortoiseVM, transpile to PEG, transpile to
MatchingEngine
*/
struct RegexDSLParticipant: Participant {
static var name: String { "Regex DSL" }
// Produce a function that will parse a grapheme break entry from a line
static func graphemeBreakProperty() throws -> (String) -> GraphemeBreakEntry? {
graphemeBreakPropertyData(forLine:)
}
}
struct RegexLiteralParticipant: Participant {
static var name: String { "Regex Literal" }
// Produce a function that will parse a grapheme break entry from a line
static func graphemeBreakProperty() throws -> (String) -> GraphemeBreakEntry? {
graphemeBreakPropertyDataLiteral(forLine:)
}
}
// MARK: - Regex literal
private func extractFromCaptures(
_ match: (Substring, Substring, Substring?, Substring)
) -> GraphemeBreakEntry? {
guard let lowerScalar = Unicode.Scalar(hex: match.1),
let upperScalar = match.2.map(Unicode.Scalar.init(hex:)) ?? lowerScalar,
let property = Unicode.GraphemeBreakProperty(match.3)
else {
return nil
}
return GraphemeBreakEntry(lowerScalar...upperScalar, property)
}
@inline(__always) // get rid of generic please
private func graphemeBreakPropertyData<RP: RegexComponent>(
forLine line: String,
using regex: RP
) -> GraphemeBreakEntry? where RP.RegexOutput == (Substring, Substring, Substring?, Substring) {
line.wholeMatch(of: regex).map(\.output).flatMap(extractFromCaptures)
}
private func graphemeBreakPropertyDataLiteral(
forLine line: String
) -> GraphemeBreakEntry? {
let regex = try! Regex(
#"([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*"#,
as: (Substring, Substring, Substring?, Substring).self)
return graphemeBreakPropertyData(forLine: line, using: regex)
}
// MARK: - Builder DSL
private func graphemeBreakPropertyData(
forLine line: String
) -> GraphemeBreakEntry? {
line.wholeMatch {
TryCapture(OneOrMore(.hexDigit)) { Unicode.Scalar(hex: $0) }
Optionally {
".."
TryCapture(OneOrMore(.hexDigit)) { Unicode.Scalar(hex: $0) }
}
OneOrMore(.whitespace)
";"
OneOrMore(.whitespace)
TryCapture(OneOrMore(.word)) { Unicode.GraphemeBreakProperty($0) }
ZeroOrMore(.any)
}.map {
let (_, lower, upper, property) = $0.output
return GraphemeBreakEntry(lower...(upper ?? lower), property)
}
}
|