1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
|
"""
Pygments regex lexer tests
~~~~~~~~~~~~~~~~~~~~~~~~~~
:copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import time
import pytest
from pygments.token import Keyword, Name, String, Text
from pygments.lexers.perl import PerlLexer
@pytest.fixture(scope='module')
def lexer():
yield PerlLexer()
# Test runaway regexes.
# A previous version of the Perl lexer would spend a great deal of
# time backtracking when given particular strings. These tests show that
# the runaway backtracking doesn't happen any more (at least for the given
# cases).
# Test helpers.
def assert_single_token(lexer, s, token):
"""Show that a given string generates only one token."""
tokens = list(lexer.get_tokens_unprocessed(s))
assert len(tokens) == 1
assert s == tokens[0][2]
assert token == tokens[0][1]
def assert_tokens(lexer, strings, expected_tokens):
"""Show that a given string generates the expected tokens."""
tokens = list(lexer.get_tokens_unprocessed(''.join(strings)))
parsed_strings = [t[2] for t in tokens]
assert parsed_strings == strings
parsed_tokens = [t[1] for t in tokens]
assert parsed_tokens == expected_tokens
def assert_fast_tokenization(lexer, s):
"""Show that a given string is tokenized quickly."""
start = time.time()
tokens = list(lexer.get_tokens_unprocessed(s))
end = time.time()
# Isn't 10 seconds kind of a long time? Yes, but we don't want false
# positives when the tests are starved for CPU time.
if end-start > 10:
pytest.fail('tokenization took too long')
return tokens
# Strings.
def test_single_quote_strings(lexer):
assert_single_token(lexer, r"'foo\tbar\\\'baz'", String)
assert_fast_tokenization(lexer, "'" + '\\'*999)
def test_double_quote_strings(lexer):
assert_single_token(lexer, r'"foo\tbar\\\"baz"', String)
assert_fast_tokenization(lexer, '"' + '\\'*999)
def test_backtick_strings(lexer):
assert_single_token(lexer, r'`foo\tbar\\\`baz`', String.Backtick)
assert_fast_tokenization(lexer, '`' + '\\'*999)
# Regex matches with various delimiters.
def test_match(lexer):
assert_single_token(lexer, r'/aa\tbb/', String.Regex)
assert_fast_tokenization(lexer, '/' + '\\'*999)
def test_match_with_slash(lexer):
assert_tokens(lexer, ['m', '/\n\\t\\\\/'], [String.Regex, String.Regex])
assert_fast_tokenization(lexer, 'm/xxx\n' + '\\'*999)
def test_match_with_bang(lexer):
assert_tokens(lexer, ['m', r'!aa\t\!bb!'], [String.Regex, String.Regex])
assert_fast_tokenization(lexer, 'm!' + '\\'*999)
def test_match_with_brace(lexer):
assert_tokens(lexer, ['m', r'{aa\t\}bb}'], [String.Regex, String.Regex])
assert_fast_tokenization(lexer, 'm{' + '\\'*999)
def test_match_with_angle_brackets(lexer):
assert_tokens(lexer, ['m', r'<aa\t\>bb>'], [String.Regex, String.Regex])
assert_fast_tokenization(lexer, 'm<' + '\\'*999)
def test_match_with_parenthesis(lexer):
assert_tokens(lexer, ['m', r'(aa\t\)bb)'], [String.Regex, String.Regex])
assert_fast_tokenization(lexer, 'm(' + '\\'*999)
def test_match_with_at_sign(lexer):
assert_tokens(lexer, ['m', r'@aa\t\@bb@'], [String.Regex, String.Regex])
assert_fast_tokenization(lexer, 'm@' + '\\'*999)
def test_match_with_percent_sign(lexer):
assert_tokens(lexer, ['m', r'%aa\t\%bb%'], [String.Regex, String.Regex])
assert_fast_tokenization(lexer, 'm%' + '\\'*999)
def test_match_with_dollar_sign(lexer):
assert_tokens(lexer, ['m', r'$aa\t\$bb$'], [String.Regex, String.Regex])
assert_fast_tokenization(lexer, 'm$' + '\\'*999)
# Regex substitutions with various delimeters.
def test_substitution_with_slash(lexer):
assert_single_token(lexer, 's/aaa/bbb/g', String.Regex)
assert_fast_tokenization(lexer, 's/foo/' + '\\'*999)
def test_substitution_with_at_sign(lexer):
assert_single_token(lexer, r's@aaa@bbb@g', String.Regex)
assert_fast_tokenization(lexer, 's@foo@' + '\\'*999)
def test_substitution_with_percent_sign(lexer):
assert_single_token(lexer, r's%aaa%bbb%g', String.Regex)
assert_fast_tokenization(lexer, 's%foo%' + '\\'*999)
def test_substitution_with_brace(lexer):
assert_single_token(lexer, r's{aaa}', String.Regex)
assert_fast_tokenization(lexer, 's{' + '\\'*999)
def test_substitution_with_angle_bracket(lexer):
assert_single_token(lexer, r's<aaa>', String.Regex)
assert_fast_tokenization(lexer, 's<' + '\\'*999)
def test_substitution_with_square_bracket(lexer):
assert_single_token(lexer, r's[aaa]', String.Regex)
assert_fast_tokenization(lexer, 's[' + '\\'*999)
def test_substitution_with_parenthesis(lexer):
assert_single_token(lexer, r's(aaa)', String.Regex)
assert_fast_tokenization(lexer, 's(' + '\\'*999)
# Namespaces/modules
def test_package_statement(lexer):
assert_tokens(lexer, ['package', ' ', 'Foo'],
[Keyword, Text.Whitespace, Name.Namespace])
assert_tokens(lexer, ['package', ' ', 'Foo::Bar'],
[Keyword, Text.Whitespace, Name.Namespace])
def test_use_statement(lexer):
assert_tokens(lexer, ['use', ' ', 'Foo'],
[Keyword, Text.Whitespace, Name.Namespace])
assert_tokens(lexer, ['use', ' ', 'Foo::Bar'],
[Keyword, Text.Whitespace, Name.Namespace])
def test_no_statement(lexer):
assert_tokens(lexer, ['no', ' ', 'Foo'],
[Keyword, Text.Whitespace, Name.Namespace])
assert_tokens(lexer, ['no', ' ', 'Foo::Bar'],
[Keyword, Text.Whitespace, Name.Namespace])
def test_require_statement(lexer):
assert_tokens(lexer, ['require', ' ', 'Foo'],
[Keyword, Text.Whitespace, Name.Namespace])
assert_tokens(lexer, ['require', ' ', 'Foo::Bar'],
[Keyword, Text.Whitespace, Name.Namespace])
assert_tokens(lexer, ['require', ' ', '"Foo/Bar.pm"'],
[Keyword, Text.Whitespace, String])
|