1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
# -*- coding: utf-8 -*-
"""
Pygments regex lexer tests
~~~~~~~~~~~~~~~~~~~~~~~~~~
:copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import time
import unittest
from pygments.token import String
from pygments.lexers.perl import PerlLexer
class RunawayRegexTest(unittest.TestCase):
# A previous version of the Perl lexer would spend a great deal of
# time backtracking when given particular strings. These tests show that
# the runaway backtracking doesn't happen any more (at least for the given
# cases).
lexer = PerlLexer()
### Test helpers.
def assert_single_token(self, s, token):
"""Show that a given string generates only one token."""
tokens = list(self.lexer.get_tokens_unprocessed(s))
self.assertEqual(len(tokens), 1, tokens)
self.assertEqual(s, tokens[0][2])
self.assertEqual(token, tokens[0][1])
def assert_tokens(self, strings, expected_tokens):
"""Show that a given string generates the expected tokens."""
tokens = list(self.lexer.get_tokens_unprocessed(''.join(strings)))
self.assertEqual(len(tokens), len(expected_tokens), tokens)
for index, s in enumerate(strings):
self.assertEqual(s, tokens[index][2])
self.assertEqual(expected_tokens[index], tokens[index][1])
def assert_fast_tokenization(self, s):
"""Show that a given string is tokenized quickly."""
start = time.time()
tokens = list(self.lexer.get_tokens_unprocessed(s))
end = time.time()
# Isn't 10 seconds kind of a long time? Yes, but we don't want false
# positives when the tests are starved for CPU time.
if end-start > 10:
self.fail('tokenization took too long')
return tokens
### Strings.
def test_single_quote_strings(self):
self.assert_single_token(r"'foo\tbar\\\'baz'", String)
self.assert_fast_tokenization("'" + '\\'*999)
def test_double_quote_strings(self):
self.assert_single_token(r'"foo\tbar\\\"baz"', String)
self.assert_fast_tokenization('"' + '\\'*999)
def test_backtick_strings(self):
self.assert_single_token(r'`foo\tbar\\\`baz`', String.Backtick)
self.assert_fast_tokenization('`' + '\\'*999)
### Regex matches with various delimiters.
def test_match(self):
self.assert_single_token(r'/aa\tbb/', String.Regex)
self.assert_fast_tokenization('/' + '\\'*999)
def test_match_with_slash(self):
self.assert_tokens(['m', '/\n\\t\\\\/'], [String.Regex, String.Regex])
self.assert_fast_tokenization('m/xxx\n' + '\\'*999)
def test_match_with_bang(self):
self.assert_tokens(['m', r'!aa\t\!bb!'], [String.Regex, String.Regex])
self.assert_fast_tokenization('m!' + '\\'*999)
def test_match_with_brace(self):
self.assert_tokens(['m', r'{aa\t\}bb}'], [String.Regex, String.Regex])
self.assert_fast_tokenization('m{' + '\\'*999)
def test_match_with_angle_brackets(self):
self.assert_tokens(['m', r'<aa\t\>bb>'], [String.Regex, String.Regex])
self.assert_fast_tokenization('m<' + '\\'*999)
def test_match_with_parenthesis(self):
self.assert_tokens(['m', r'(aa\t\)bb)'], [String.Regex, String.Regex])
self.assert_fast_tokenization('m(' + '\\'*999)
def test_match_with_at_sign(self):
self.assert_tokens(['m', r'@aa\t\@bb@'], [String.Regex, String.Regex])
self.assert_fast_tokenization('m@' + '\\'*999)
def test_match_with_percent_sign(self):
self.assert_tokens(['m', r'%aa\t\%bb%'], [String.Regex, String.Regex])
self.assert_fast_tokenization('m%' + '\\'*999)
def test_match_with_dollar_sign(self):
self.assert_tokens(['m', r'$aa\t\$bb$'], [String.Regex, String.Regex])
self.assert_fast_tokenization('m$' + '\\'*999)
### Regex substitutions with various delimeters.
def test_substitution_with_slash(self):
self.assert_single_token('s/aaa/bbb/g', String.Regex)
self.assert_fast_tokenization('s/foo/' + '\\'*999)
def test_substitution_with_at_sign(self):
self.assert_single_token(r's@aaa@bbb@g', String.Regex)
self.assert_fast_tokenization('s@foo@' + '\\'*999)
def test_substitution_with_percent_sign(self):
self.assert_single_token(r's%aaa%bbb%g', String.Regex)
self.assert_fast_tokenization('s%foo%' + '\\'*999)
def test_substitution_with_brace(self):
self.assert_single_token(r's{aaa}', String.Regex)
self.assert_fast_tokenization('s{' + '\\'*999)
def test_substitution_with_angle_bracket(self):
self.assert_single_token(r's<aaa>', String.Regex)
self.assert_fast_tokenization('s<' + '\\'*999)
def test_substitution_with_angle_bracket(self):
self.assert_single_token(r's<aaa>', String.Regex)
self.assert_fast_tokenization('s<' + '\\'*999)
def test_substitution_with_square_bracket(self):
self.assert_single_token(r's[aaa]', String.Regex)
self.assert_fast_tokenization('s[' + '\\'*999)
def test_substitution_with_parenthesis(self):
self.assert_single_token(r's(aaa)', String.Regex)
self.assert_fast_tokenization('s(' + '\\'*999)
|