1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
|
#!/usr/bin/env python
from unittest import TestCase
import mechanize
import mechanize._form
from mechanize._response import test_html_response
from mechanize._html import content_parser, get_title, Factory
class RegressionTests(TestCase):
def test_close_base_tag(self):
# any document containing a </base> tag used to cause an exception
br = mechanize.Browser()
br.set_html("</base>")
list(br.links())
def test_bad_base_tag(self):
# a document with a base tag with no href used to cause an exception
br = mechanize.Browser()
br.set_html(
"<BASE TARGET='_main'><a href='http://example.com/'>eg</a>",
url="http://example.com/",
)
list(br.links())
class EncodingFinderTests(TestCase):
def make_response(self, encodings):
return mechanize._response.test_response(
headers=[("Content-type", "text/html; charset=\"%s\"" % encoding)
for encoding in encodings])
def test_known_encoding(self):
encoding_finder = mechanize._html.EncodingFinder("default")
response = self.make_response(["utf-8"])
self.assertEqual(encoding_finder.encoding(response), "utf-8")
def test_unknown_encoding(self):
encoding_finder = mechanize._html.EncodingFinder("default")
response = self.make_response(["bogus"])
self.assertEqual(encoding_finder.encoding(response), "default")
def test_precedence(self):
encoding_finder = mechanize._html.EncodingFinder("default")
response = self.make_response(["latin-1", "utf-8"])
self.assertEqual(encoding_finder.encoding(response), "latin-1")
def test_fallback(self):
encoding_finder = mechanize._html.EncodingFinder("default")
response = self.make_response(["bogus", "utf-8"])
self.assertEqual(encoding_finder.encoding(response), "utf-8")
class TitleTests(TestCase):
def test_title_parsing(self):
html = ("""\
<html><head>
<title> Title\n Test</title>
</head><body><p>Blah.<p></body></html>
""")
self.assertEqual(get_title(content_parser(html)), 'Title Test')
class MiscTests(TestCase):
def test_util_func(self):
headers1 = str(test_html_response('').info())
headers2 = str(test_html_response('').info())
self.assertEqual(headers1, headers2)
def test_link_parsing(self):
def get_first_link_text(html):
factory = Factory()
response = test_html_response(html, url="http://example.com/")
factory.set_response(response)
return list(factory.links())[0].text
html = ("""\
<html><head><title>Title</title></head><body>
<p><a href="http://example.com/">The quick\tbrown fox jumps
over the <i><b>lazy</b></i> dog </a>
</body></html>
""")
self.assertEqual(
get_first_link_text(html), u'The quick brown fox jumps over the lazy dog')
html = ("""\
<html><head><title>Title</title></head><body>
<p><a href="http://example.com/"></a>
</body></html>
""")
self.assertEqual(get_first_link_text(html), '')
html = ("""\
<html><head><title>Title</title></head><body>
<p><iframe src="http://example.com/"></iframe>
</body></html>
""")
self.assertEqual(get_first_link_text(html), '')
def test_title_parsing(self):
def get_title(html):
factory = Factory()
response = test_html_response(html)
factory.set_response(response)
return factory.title
html = (b"""\
<html><head>
<title>T>itle</title>
</head><body><p>Blah.<p></body></html>
""")
self.assertEqual(get_title(html), u'T>itle')
html = ("""\
<html><head>
<title> Ti<script type="text/strange">alert("this is valid HTML -- yuck!")</script>
tle &&
</title>
</head><body><p>Blah.<p></body></html>
""")
self.assertEqual(
str(get_title(html)), 'Ti<script type="text/strange">alert("this is valid HTML -- yuck!")</script> tle &&')
html = ("""\
<html><head>
<title>""")
self.assertEqual(get_title(html), u'')
if __name__ == "__main__":
import unittest
unittest.main()
|