1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
|
"""
A grammar for parsing a tiny HTML-like language, plus a transformer for it.
"""
from parsley import makeGrammar, term, termMaker as t
from itertools import chain
tinyHTMLGrammar = r"""
name = <letterOrDigit+>
tag = ('<' spaces name:n spaces attribute*:attrs '>'
html:c
'<' '/' token(n) spaces '>'
-> t.Element(n.lower(), dict(attrs), c))
html = (text | tag)*
text = <(~('<') anything)+>
attribute = spaces name:k token('=') quotedString:v -> (k, v)
quotedString = (('"' | '\''):q <(~exactly(q) anything)*>:xs exactly(q))
-> xs
"""
TinyHTML = makeGrammar(tinyHTMLGrammar, globals(), name="TinyHTML")
testSource = "<html><title>Yes</title><body><h1>Man, HTML is <i>great</i>.</h1><p>How could you even <b>think</b> otherwise?</p><img src='HIPPO.JPG'></img><a href='http://twistedmatrix.com'>A Good Website</a></body></html>"
print(TinyHTML(testSource).html())
|