1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
|
import html.entities
from hypothesis import given
from hypothesis.strategies import sampled_from
from matrix.colors import MatrixHtmlParser
try:
# python 3
html_entities = [(name, char, ord(char))
for name, char in html.entities.html5.items()
if not name.endswith(';')]
except AttributeError:
# python 2
html_entities = [(name, unichr(codepoint), codepoint)
for name, codepoint
in html.entities.name2codepoint.items()]
@given(sampled_from(html_entities))
def test_html_named_entity_parsing(entitydef):
name = entitydef[0]
character = entitydef[1]
parser = MatrixHtmlParser()
assert parser.unescape('&{};'.format(name)) == character
@given(sampled_from(html_entities))
def test_html_numeric_reference_parsing(entitydef):
character = entitydef[1]
num = entitydef[2]
parser = MatrixHtmlParser()
assert parser.unescape('&#{};'.format(num)) == character
@given(sampled_from(html_entities))
def test_html_entityref_reconstruction_from_name(entitydef):
name = entitydef[0]
parser = MatrixHtmlParser()
parser.handle_entityref(name)
s = parser.get_substrings()
assert s[0].text == parser.unescape('&{};'.format(name)) and len(s) == 1
@given(sampled_from(html_entities))
def test_html_charref_reconstruction_from_name(entitydef):
num = entitydef[2]
parser = MatrixHtmlParser()
parser.handle_charref(num)
s = parser.get_substrings()
assert s[0].text == parser.unescape('&#{};'.format(num)) and len(s) == 1
def test_parsing_of_escaped_brackets():
p = MatrixHtmlParser()
p.feed('<pre><code><faketag></code></pre>')
s = p.get_substrings()
assert s[0].text == '<faketag>' and len(s) == 1
|