1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
|
# Copyright (c) 2017-2026 Juancarlo AƱez (apalala@gmail.com)
# SPDX-License-Identifier: BSD-4-Clause
from __future__ import annotations
import pytest
from tatsu.util.string import regexp
def test_patterns_quotes():
assert regexp("'") == 'r"\'"'
assert regexp('\'') == 'r"\'"'
# Input: a'b -> Result: r'a\'b'
assert regexp("a'b") == 'r"a\'b"'
assert regexp('"') == "r'\"'"
assert regexp('"') == r"r'" + '"' + "'"
def test_backslash_edge_cases() -> None:
assert regexp(r"\'") == 'r"\\\'"'
# raw string r"r'\\\''" represents exactly three backslashes
assert regexp(r"\\'") == 'r"\\\\\'"'
def test_patterns_newlines():
assert regexp("\n") == r"r'\n'"
assert regexp(r"\n") == r"r'\n'"
assert regexp(r"\\n") == r"r'\\n'"
def test_patterns_expr():
assert regexp('[abc]') == r"r'[abc]'"
with pytest.raises(ValueError, match=r"Invalid regex passed to regexp\(\)"):
assert regexp('\\') == r"r'\\'"
assert regexp(r'\\') == r"r'\\'"
def test_patterns_real():
e = r"(REM\s|')[^\r\n]*(\r?\n|\r)"
# Result preserves the original \r \n and escapes the '
assert regexp(e) == 'r"(REM\\s|\')[^\\r\\n]*(\\r?\\n|\\r)"'
def test_roundtrip_verification() -> None:
# Standard case
assert eval(regexp("it's")) == "it's" # noqa: S307
# Trailing backslash case: Now safe to eval()
with pytest.raises(ValueError, match=r"Invalid regex passed to regexp\(\)"):
assert eval(regexp("\\")) == "\\\\" # noqa: S307
def test_edge_cases() -> None:
assert regexp("'''") == 'r"\'\'\'"'
assert regexp("") == "r''"
assert regexp(123) == "r'123'"
def test_regexp_is_runnable():
pattern = r"\'"
generated_code = regexp(pattern)
# This ensures the generated code is actually valid Python
# and evaluates back to the original pattern.
assert eval(generated_code) == pattern # noqa: S307
|