1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
|
"""
HTML Lexer Tests
~~~~~~~~~~~~~~~~
:copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import time
import pytest
from pygments.lexers.html import HtmlLexer
from pygments.token import Token
MAX_HL_TIME = 10
@pytest.fixture(scope='module')
def lexer_html():
yield HtmlLexer()
def test_happy_javascript_fragment(lexer_html):
"""valid, even long Javascript fragments should still get parsed ok"""
fragment = "<script type=\"text/javascript\">"+"alert(\"hi\");"*2000+"</script>"
start_time = time.time()
tokens = list(lexer_html.get_tokens(fragment))
assert all(x[1] != Token.Error for x in tokens)
assert time.time() - start_time < MAX_HL_TIME, \
'The HTML lexer might have an expensive happy-path script case'
def test_happy_css_fragment(lexer_html):
"""valid, even long CSS fragments should still get parsed ok"""
fragment = "<style>"+".ui-helper-hidden{display:none}"*2000+"</style>"
start_time = time.time()
tokens = list(lexer_html.get_tokens(fragment))
assert all(x[1] != Token.Error for x in tokens)
assert time.time() - start_time < MAX_HL_TIME, \
'The HTML lexer might have an expensive happy-path style case'
def test_long_unclosed_javascript_fragment(lexer_html):
"""unclosed, long Javascript fragments should parse quickly"""
reps = 2000
fragment = "<script type=\"text/javascript\">"+"alert(\"hi\");"*reps
start_time = time.time()
tokens = list(lexer_html.get_tokens(fragment))
assert time.time() - start_time < MAX_HL_TIME, \
'The HTML lexer might have an expensive error script case'
tokens_intro = [
(Token.Punctuation, '<'),
(Token.Name.Tag, 'script'),
(Token.Text, ' '),
(Token.Name.Attribute, 'type'),
(Token.Operator, '='),
(Token.Literal.String, '"text/javascript"'),
(Token.Punctuation, '>'),
]
tokens_body = [
(Token.Name.Other, 'alert'),
(Token.Punctuation, '('),
(Token.Literal.String.Double, '"hi"'),
(Token.Punctuation, ')'),
(Token.Punctuation, ';'),
]
# make sure we get the right opening tokens
assert tokens[:len(tokens_intro)] == tokens_intro
# and make sure we get the right body tokens even though the script is
# unclosed
assert tokens[len(tokens_intro):-1] == tokens_body * reps
# and of course, the newline we get for free from get_tokens
assert tokens[-1] == (Token.Text.Whitespace, "\n")
def test_long_unclosed_css_fragment(lexer_html):
"""unclosed, long CSS fragments should parse quickly"""
reps = 2000
fragment = "<style>"+".ui-helper-hidden{display:none}"*reps
start_time = time.time()
tokens = list(lexer_html.get_tokens(fragment))
assert time.time() - start_time < MAX_HL_TIME, \
'The HTML lexer might have an expensive error style case'
tokens_intro = [
(Token.Punctuation, '<'),
(Token.Name.Tag, 'style'),
(Token.Punctuation, '>'),
]
tokens_body = [
(Token.Punctuation, '.'),
(Token.Name.Class, 'ui-helper-hidden'),
(Token.Punctuation, '{'),
(Token.Keyword, 'display'),
(Token.Punctuation, ':'),
(Token.Keyword.Constant, 'none'),
(Token.Punctuation, '}'),
]
# make sure we get the right opening tokens
assert tokens[:len(tokens_intro)] == tokens_intro
# and make sure we get the right body tokens even though the style block is
# unclosed
assert tokens[len(tokens_intro):-1] == tokens_body * reps
# and of course, the newline we get for free from get_tokens
assert tokens[-1] == (Token.Text.Whitespace, "\n")
def test_unclosed_fragment_with_newline_recovery(lexer_html):
"""unclosed Javascript fragments should recover on the next line"""
fragment = "<script type=\"text/javascript\">"+"alert(\"hi\");"*20+"\n<div>hi</div>"
tokens = list(lexer_html.get_tokens(fragment))
recovery_tokens = [
(Token.Punctuation, '<'),
(Token.Name.Tag, 'div'),
(Token.Punctuation, '>'),
(Token.Text, 'hi'),
(Token.Punctuation, '<'),
(Token.Punctuation, '/'),
(Token.Name.Tag, 'div'),
(Token.Punctuation, '>'),
(Token.Text, '\n'),
]
assert tokens[-1*len(recovery_tokens):] == recovery_tokens
|