File: test_htmlb.py

package info (click to toggle)
qm 1.1.3-1
  • links: PTS
  • area: main
  • in suites: woody
  • size: 8,628 kB
  • ctags: 10,249
  • sloc: python: 41,482; ansic: 20,611; xml: 12,837; sh: 485; makefile: 226
file content (45 lines) | stat: -rw-r--r-- 1,319 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45

# dom.html_builder tests

from xml.dom.ext.reader import HtmlLib
from xml.dom.ext import XHtmlPrettyPrint
import sys

good_html = """
<html>
<body>
<P>I prefer (all things being equal) regularity/orthogonality and logical
syntax/semantics in a language because there is less to have to remember.
(Of course I <em>know</em> all things are NEVER really equal!)
<P CLASS=source>Guido van Rossum, 6 Dec 91
<P>The details of that silly code are irrelevant.
<P CLASS=source>Tim Peters, 4 Mar 92
&amp; &lt; &gt; &eacute; &ouml; &nbsp;
</body>
</html>
"""

bad_html = """
<html>
Interdigitated <b>bold and <i>italic</B> tags.</i>&amp; &lt; &gt; &eacute; &ouml; &nbsp;
</html>
"""

# Try the good output with both settings of ignore_mismatched_end_tags
# At the moment, don't; HtmlLib does not have these two modes of
# operation.

print "Good document"
b = HtmlLib.FromHtml(good_html) 
#b.expand_entities = b.expand_entities + ('eacute',)
XHtmlPrettyPrint(b, stream=sys.stdout, encoding = "ISO-8859-1")

# Sgmlop currently does not complain about mismatched or misplaced tags
# or other aspects of invalidity.
#  print "Bad document"
#  try:
#      HtmlLib.FromHtml(bad_html)
#  except html_builder.BadHTMLError:
#      print "Exception raised for bad HTML"
#  else:
#      print "*** ERROR: no exception raised for bad HTML"