File: ParseStrategy%2BRegexComponentTests.swift

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (333 lines) | stat: -rw-r--r-- 16,272 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// RUN: %target-run-simple-swift
// REQUIRES: executable_test
// REQUIRES: objc_interop

import RegexBuilder

#if canImport(TestSupport)
import TestSupport
#endif

final class ParseStrategyMatchTests: XCTestCase {

    let enUS = Locale(identifier: "en_US")
    let enGB = Locale(identifier: "en_GB")
    let gmt = TimeZone(secondsFromGMT: 0)!
    let pst = TimeZone(secondsFromGMT: -3600*8)!

    func testDate() {
        let regex = Regex {
            OneOrMore {
                Capture { Date.ISO8601FormatStyle() }
            }
        }

        guard let res = "šŸ’šŸ½šŸ³ļøā€šŸŒˆ2021-07-01T15:56:32Z".firstMatch(of: regex) else {
            XCTFail()
            return
        }

        XCTAssertEqual(res.output.0, "2021-07-01T15:56:32Z")
        // dateFormatter.date(from: "2021-07-01 15:56:32.000")!
        XCTAssertEqual(res.output.1, Date(timeIntervalSinceReferenceDate: 646847792.0))
    }

    func testAPIHTTPHeader() {

        let header = """
        HTTP/1.1 301 Redirect
        Date: Wed, 16 Feb 2022 23:53:19 GMT
        Connection: close
        Location: https://www.apple.com/
        Content-Type: text/html
        Content-Language: en
        """

        let regex = Regex {
            Capture {
                .date(format: "\(day: .twoDigits) \(month: .abbreviated) \(year: .padded(4))", locale: Locale(identifier: "en_US"), timeZone: TimeZone(identifier: "GMT")!)
            }
        }

        guard let res = header.firstMatch(of: regex) else {
            XCTFail()
            return
        }

        // dateFormatter.date(from: "2022-02-16 00:00:00.000")!
        let expectedDate = Date(timeIntervalSinceReferenceDate: 666662400.0)
        XCTAssertEqual(res.output.0, "16 Feb 2022")
        XCTAssertEqual(res.output.1, expectedDate)
    }

// https://github.com/apple/swift-foundation/issues/60
#if FOUNDATION_FRAMEWORK
    func testAPIStatement() {

        let statement = """
CREDIT    04/06/2020    Paypal transfer        $4.99
DSLIP    04/06/2020    REMOTE ONLINE DEPOSIT  $3,020.85
CREDIT    04/03/2020    PAYROLL                $69.73
DEBIT    04/02/2020    ACH TRNSFR             ($38.25)
DEBIT    03/31/2020    Payment to BoA card    ($27.44)
DEBIT    03/24/2020    IRX tax payment        ($52,249.98)
"""

        let expectedDateStrings :[Substring] = ["04/06/2020", "04/06/2020", "04/03/2020", "04/02/2020", "03/31/2020", "03/24/2020"]
        let expectedDates = [
            Date(timeIntervalSinceReferenceDate: 607824000.0), // "2020-04-06 00:00:00.000"
            Date(timeIntervalSinceReferenceDate: 607824000.0), // "2020-04-06 00:00:00.000"
            Date(timeIntervalSinceReferenceDate: 607564800.0), // "2020-04-03 00:00:00.000"
            Date(timeIntervalSinceReferenceDate: 607478400.0), // "2020-04-02 00:00:00.000"
            Date(timeIntervalSinceReferenceDate: 607305600.0), // "2020-03-31 00:00:00.000"
            Date(timeIntervalSinceReferenceDate: 606700800.0), // "2020-03-24 00:00:00.000"
        ]
        let expectedAmounts = [Decimal(string:"4.99")!, Decimal(string:"3020.85")!, Decimal(string:"69.73")!, Decimal(string:"-38.25")!, Decimal(string:"-27.44")!, Decimal(string:"-52249.98")!]

        let regex = Regex {
            Capture {
                .localizedCurrency(code: "USD", locale: enUS).sign(strategy: .accounting)
            }
        }


        let money = statement.matches(of: regex)
        XCTAssertEqual(money.map(\.output.0), ["$4.99", "$3,020.85", "$69.73", "($38.25)", "($27.44)", "($52,249.98)"])
        XCTAssertEqual(money.map(\.output.1), expectedAmounts)

        let dateRegex = Regex {
            Capture {
                .date(format:"\(month: .twoDigits)/\(day: .twoDigits)/\(year: .defaultDigits)", locale: enUS, timeZone: gmt)
            }
        }
        let dateMatches = statement.matches(of: dateRegex)
        XCTAssertEqual(dateMatches.map(\.output.0), expectedDateStrings)
        XCTAssertEqual(dateMatches.map(\.output.1), expectedDates)

        let dot = try! Regex(#"."#)
        let dateCurrencyRegex = Regex {
            Capture {
                .date(format:"\(month: .twoDigits)/\(day: .twoDigits)/\(year: .defaultDigits)", locale: enUS, timeZone: gmt)
            }
            "    "
            OneOrMore(dot)
            "  "
            Capture {
                .localizedCurrency(code: "USD", locale: enUS).sign(strategy: .accounting)
            }
        }

        let matches = statement.matches(of: dateCurrencyRegex)
        XCTAssertEqual(matches.map(\.output.0), [
            "04/06/2020    Paypal transfer        $4.99",
            "04/06/2020    REMOTE ONLINE DEPOSIT  $3,020.85",
            "04/03/2020    PAYROLL                $69.73",
            "04/02/2020    ACH TRNSFR             ($38.25)",
            "03/31/2020    Payment to BoA card    ($27.44)",
            "03/24/2020    IRX tax payment        ($52,249.98)",
        ])
        XCTAssertEqual(matches.map(\.output.1), expectedDates)
        XCTAssertEqual(matches.map(\.output.2), expectedAmounts)


        let numericMatches = statement.matches(of: Regex {
            Capture(.date(.numeric, locale: enUS, timeZone: gmt))
        })
        XCTAssertEqual(numericMatches.map(\.output.0), expectedDateStrings)
        XCTAssertEqual(numericMatches.map(\.output.1), expectedDates)
    }

    func testAPIStatements2() {
        // Test dates and numbers appearing in unexpeted places
        let statement = """
CREDIT   Apr 06/20    Zombie 5.29lb@$3.99/lb       USDĀ 21.11
DSLIP    Apr 06/20    GMT gain                     USDĀ 3,020.85
CREDIT   Apr 03/20    PAYROLL 03/29/20-04/02/20    USDĀ 69.73
DEBIT    Apr 02/20    ACH TRNSFR Apr 02/20         -USDĀ 38.25
DEBIT    Mar 31/20    March Payment to BoA         -USDĀ 52,249.98
"""

        let dot = try! Regex(#"."#)
        let dateCurrencyRegex = Regex {
            Capture {
                .date(format:"\(month: .abbreviated) \(day: .twoDigits)/\(year: .twoDigits)", locale: enUS, timeZone: gmt)
            }
            "    "
            Capture(OneOrMore(dot))
            "  "
            Capture {
                .localizedCurrency(code: "USD", locale: enUS).presentation(.isoCode)
            }
        }

        let expectedDates = [
            Date(timeIntervalSinceReferenceDate: 607824000.0), // "2020-04-06 00:00:00.000"
            Date(timeIntervalSinceReferenceDate: 607824000.0), // "2020-04-06 00:00:00.000"
            Date(timeIntervalSinceReferenceDate: 607564800.0), // "2020-04-03 00:00:00.000"
            Date(timeIntervalSinceReferenceDate: 607478400.0), // "2020-04-02 00:00:00.000"
            Date(timeIntervalSinceReferenceDate: 607305600.0), // "2020-03-31 00:00:00.000"
        ]
        let expectedAmounts = [Decimal(string:"21.11")!, Decimal(string:"3020.85")!, Decimal(string:"69.73")!, Decimal(string:"-38.25")!, Decimal(string:"-52249.98")!]

        let matches = statement.matches(of: dateCurrencyRegex)
        XCTAssertEqual(matches.map(\.output.0), [
            "Apr 06/20    Zombie 5.29lb@$3.99/lb       USDĀ 21.11",
            "Apr 06/20    GMT gain                     USDĀ 3,020.85",
            "Apr 03/20    PAYROLL 03/29/20-04/02/20    USDĀ 69.73",
            "Apr 02/20    ACH TRNSFR Apr 02/20         -USDĀ 38.25",
            "Mar 31/20    March Payment to BoA         -USDĀ 52,249.98",
        ])
        XCTAssertEqual(matches.map(\.output.1), expectedDates)
        XCTAssertEqual(matches.map(\.output.3), expectedAmounts)
    }

    func testAPITestSuites() {
        let input = "Test Suite 'MergeableSetTests' started at 2021-07-08 10:19:35.418"

        let testSuiteLog = Regex {
            "Test Suite '"
            Capture(OneOrMore(.any, .reluctant)) // name
            "' "
            TryCapture {
                ChoiceOf {    // status
                    "started"
                    "passed"
                    "failed"
                }
            } transform: {
                String($0)
            }
            " at "
            Capture(.iso8601(timeZone: gmt,
                             includingFractionalSeconds: true,
                             dateTimeSeparator: .space)) // date
            Optionally(".")
        }


        guard let match = input.wholeMatch(of: testSuiteLog) else {
            XCTFail()
            return
        }

        XCTAssertEqual(match.output.0, "Test Suite 'MergeableSetTests' started at 2021-07-08 10:19:35.418")
        XCTAssertEqual(match.output.1, "MergeableSetTests")
        XCTAssertEqual(match.output.2, "started")
        // dateFormatter.date(from: "2021-07-08 10:19:35.418")!
        XCTAssertEqual(match.output.3, Date(timeIntervalSinceReferenceDate: 647432375.418))
    }
#endif

    func testVariousDatesAndTimes() {
        func verify(_ str: String, _ strategy: Date.ParseStrategy, _ expected: String?, file: StaticString = #filePath, line: UInt = #line) {
            let match = str.wholeMatch(of: strategy) // Regex<Date>.Match?
            if let expected {
                guard let match else {
                    XCTFail("<\(str)> did not match, but it should", file: file, line: line)
                    do {
                        _ = try strategy.parse(str)
                    } catch {
                        print(error)
                    }

                    return
                }
                let expectedDate = try! Date(expected, strategy: .iso8601)
                XCTAssertEqual(match.0, expectedDate, file: file, line: line)
            } else {
                XCTAssertNil(match, "<\(str)> should not match, but it did", file: file, line: line)
            }
        }

        verify("03/05/2020", .date(.numeric, locale: enUS, timeZone: gmt), "2020-03-05T00:00:00+00:00")
        verify("03/05/2020", .date(.numeric, locale: enGB, timeZone: gmt), "2020-05-03T00:00:00+00:00")
        verify("03/05/2020, 4:29:24\u{202f}PM", .dateTime(date: .numeric, time: .standard, locale: enUS, timeZone: pst), "2020-03-05T16:29:24-08:00")
        verify("03/05/2020, 16:29:24", .dateTime(date: .numeric, time: .standard, locale: enGB, timeZone: gmt), "2020-05-03T16:29:24+00:00")
        verify("03/05/2020, 4:29:24 PM", .dateTime(date: .numeric, time: .standard, locale: enGB, timeZone: pst), nil) // en_GB uses 24-hour time, therefore it does not parse "PM"
        // Passing in time zone does nothing when the string contains the time zone and matches the style
        verify("03/05/2020, 4:29:24\u{202f}PM PDT", .dateTime(date: .numeric, time: .complete, locale: enUS, timeZone: pst), "2020-03-05T16:29:24-07:00")
        verify("03/05/2020, 16:29:24 GMT-7", .dateTime(date: .numeric, time: .complete, locale: enGB, timeZone: gmt), "2020-05-03T16:29:24-07:00")

        verify("03_05_2020", .date(format: "\(month: .twoDigits)_\(day: .twoDigits)_\(year: .defaultDigits)", locale: enUS, timeZone: gmt), "2020-03-05T00:00:00+00:00")
        verify("03_05_89", .date(format: "\(month: .twoDigits)_\(day: .twoDigits)_\(year: .twoDigits)", locale: enUS, timeZone: gmt), "1989-03-05T00:00:00+00:00")
        verify("03_05_69", .date(format: "\(month: .twoDigits)_\(day: .twoDigits)_\(year: .twoDigits)", locale: enUS, timeZone: gmt), "2069-03-05T00:00:00+00:00")

        verify("03_05_89", .date(format: "\(month: .twoDigits)_\(day: .twoDigits)_\(year: .twoDigits)", locale: enUS, timeZone: pst), "1989-03-05T00:00:00-08:00")
        // Default two-digit start date is Jan 1st, 1970, 00:00:00 in GMT time zone, which is Dec 31st 1969, so year "69" is 1969 given pst time zone
        verify("03_05", .date(format: "\(month: .twoDigits)_\(day: .twoDigits)", locale: enUS, timeZone: pst), "1969-03-05T00:00:00-08:00")
        verify("03_05_69", .date(format: "\(month: .twoDigits)_\(day: .twoDigits)_\(year: .twoDigits)", locale: enUS, timeZone: pst), "1969-03-05T00:00:00-08:00")

        verify("03/05/2020", .date(.numeric, locale: enUS, timeZone: pst), "2020-03-05T08:00:00+00:00")
        verify("03/05/2020", .date(.numeric, locale: enGB, timeZone: pst), "2020-05-03T00:00:00-08:00")
    }

    func testMatchISO8601String() {
        func verify(_ str: String, _ strategy: Date.ISO8601FormatStyle, _ expected: String?, file: StaticString = #filePath, line: UInt = #line) {

            let match = str.wholeMatch(of: strategy) // Regex<Date>.Match?
            if let expected {
                guard let match else {
                    var message = ""
                    do {
                        let result = try strategy.consuming(str, startingAt: str.startIndex, in: str.startIndex ..< str.endIndex)
                        if let result {
                            message = "upperBound: \(result.0.utf16Offset(in: str)), output: \(result.1)"
                        } else {
                            message = "no matched result"
                        }
                    } catch {
                        message += "error: \(error)"
                    }

                    XCTFail("<\(str)> did not match, but it should. Information: \(message)", file: file, line: line)
                    return
                }
                let expectedDate = try! Date(expected, strategy: .iso8601)
                XCTAssertEqual(match.0, expectedDate, file: file, line: line)
            } else {
                XCTAssertNil(match, "<\(str)> should not match, but it did", file: file, line: line)
            }
        }

        verify("2020-03-05T16:29:24-08:00", .iso8601, "2020-03-05T16:29:24-08:00")
        verify("2020-03-05T16:29:24Z", .iso8601, "2020-03-05T16:29:24+00:00")
        verify("2020-03-05T16:29:24", .iso8601(timeZone: gmt), "2020-03-05T16:29:24+00:00")
        verify("2020-03-05T16:29:24", .iso8601(timeZone: pst), "2020-03-05T16:29:24-08:00")

        // this function assumes the time zone is missing from the string,
        // therefore it does not fully match a string with time zone
        verify("2020-03-05T16:29:24-08:00", .iso8601(timeZone: gmt), nil)
        verify("2020-03-05T16:29:24", .iso8601(timeZone: pst), "2020-03-05T16:29:24-08:00") // matches when current == pst

        verify("2020-03-05T16:29:24", .iso8601WithTimeZone(), nil) // This function requires time zone to be present in the string, so it doesn't match
        verify("2020-03-05T16:29:24-08:00", .iso8601WithTimeZone(), "2020-03-05T16:29:24-08:00")

        verify("20200305T16:29:24-08:00",   .iso8601WithTimeZone(dateSeparator: .omitted), "2020-03-05T16:29:24-08:00")
        verify("2020-03-05T16:29:24-08:00", .iso8601WithTimeZone(dateSeparator: .omitted), nil) // Does not match "-" in "2020-03-05"

        verify("2020-03-05 16:29:24-08:00", .iso8601WithTimeZone(dateTimeSeparator: .space),    "2020-03-05T16:29:24-08:00")
        verify("2020-03-05T16:29:24-08:00", .iso8601WithTimeZone(dateTimeSeparator: .space),    nil) // Does not match "T"
        verify("2020-03-05 16:29:24-08:00", .iso8601WithTimeZone(dateTimeSeparator: .standard), nil) // Does not match " "

        verify("2020-03-05T162924-08:00",   .iso8601WithTimeZone(timeSeparator: .omitted), "2020-03-05T16:29:24-08:00")
        verify("2020-03-05T16:29:24-08:00", .iso8601WithTimeZone(timeSeparator: .omitted), nil) // Does not match ":" in "16:29:24"
        verify("2020-03-05T162924-08:00",   .iso8601WithTimeZone(timeSeparator: .colon),   nil) // Does not match "162924"

        // FIXME 94663783: This passes but shouldn't since the time zone separator doesn't match
        verify("2020-03-05T16:29:24-08:00", .iso8601WithTimeZone(timeZoneSeparator: .omitted), "2020-03-05T16:29:24-08:00")

        verify("2020-03-05",          .iso8601Date(timeZone: gmt), "2020-03-05T00:00:00+00:00")
        verify("2020-03-05T16:29:24", .iso8601Date(timeZone: pst), nil) // Does not match the time part fully

        verify("2020-03-05", .iso8601Date(timeZone: pst), "2020-03-05T00:00:00-08:00")
    }

}