File: regexp_test.py

package info (click to toggle)
python-tatsu 5.17.1%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,516 kB
  • sloc: python: 13,185; makefile: 127
file content (70 lines) | stat: -rw-r--r-- 1,907 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# Copyright (c) 2017-2026 Juancarlo AƱez (apalala@gmail.com)
# SPDX-License-Identifier: BSD-4-Clause
from __future__ import annotations

import pytest

from tatsu.util.string import regexp


def test_patterns_quotes():
    assert regexp("'") == 'r"\'"'
    assert regexp('\'') == 'r"\'"'

    # Input: a'b -> Result: r'a\'b'
    assert regexp("a'b") == 'r"a\'b"'

    assert regexp('"') == "r'\"'"
    assert regexp('"') == r"r'" + '"' + "'"


def test_backslash_edge_cases() -> None:
    assert regexp(r"\'") == 'r"\\\'"'

    # raw string r"r'\\\''" represents exactly three backslashes
    assert regexp(r"\\'") == 'r"\\\\\'"'


def test_patterns_newlines():
    assert regexp("\n") == r"r'\n'"
    assert regexp(r"\n") == r"r'\n'"
    assert regexp(r"\\n") == r"r'\\n'"


def test_patterns_expr():
    assert regexp('[abc]') == r"r'[abc]'"

    with pytest.raises(ValueError, match=r"Invalid regex passed to regexp\(\)"):
        assert regexp('\\') == r"r'\\'"

    assert regexp(r'\\') == r"r'\\'"


def test_patterns_real():
    e = r"(REM\s|')[^\r\n]*(\r?\n|\r)"
    # Result preserves the original \r \n and escapes the '
    assert regexp(e) == 'r"(REM\\s|\')[^\\r\\n]*(\\r?\\n|\\r)"'


def test_roundtrip_verification() -> None:
    # Standard case
    assert eval(regexp("it's")) == "it's"  # noqa: S307

    # Trailing backslash case: Now safe to eval()
    with pytest.raises(ValueError, match=r"Invalid regex passed to regexp\(\)"):
        assert eval(regexp("\\")) == "\\\\"  # noqa: S307


def test_edge_cases() -> None:
    assert regexp("'''") == 'r"\'\'\'"'
    assert regexp("") == "r''"
    assert regexp(123) == "r'123'"


def test_regexp_is_runnable():
    pattern = r"\'"
    generated_code = regexp(pattern)

    # This ensures the generated code is actually valid Python
    # and evaluates back to the original pattern.
    assert eval(generated_code) == pattern  # noqa: S307