1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
|
#!/usr/bin/env python
from unittest import TestCase
import mechanize
import mechanize._form
from mechanize._response import test_html_response
class RegressionTests(TestCase):
def test_close_base_tag(self):
# any document containing a </base> tag used to cause an exception
br = mechanize.Browser()
response = test_html_response("</base>")
br.set_response(response)
list(br.links())
def test_bad_base_tag(self):
# a document with a base tag with no href used to cause an exception
for factory in [mechanize.DefaultFactory(), mechanize.RobustFactory()]:
br = mechanize.Browser(factory=factory)
response = test_html_response(
"<BASE TARGET='_main'><a href='http://example.com/'>eg</a>")
br.set_response(response)
list(br.links())
def test_robust_form_parser_uses_beautifulsoup(self):
factory = mechanize.RobustFormsFactory()
self.assertIs(factory.form_parser_class,
mechanize._form.RobustFormParser)
def test_form_parser_does_not_use_beautifulsoup(self):
factory = mechanize.FormsFactory()
self.assertIs(factory.form_parser_class, mechanize._form.FormParser)
def _make_forms_from_bad_html(self, factory):
bad_html = "<! -- : -- >"
factory.set_response(test_html_response(bad_html), "utf-8")
return list(factory.forms())
def test_robust_form_parser_does_not_raise_on_bad_html(self):
self._make_forms_from_bad_html(mechanize.RobustFormsFactory())
def test_form_parser_fails_on_bad_html(self):
self.assertRaises(
mechanize.ParseError,
self._make_forms_from_bad_html, mechanize.FormsFactory())
class CachingGeneratorFunctionTests(TestCase):
def _get_simple_cgenf(self, log):
from mechanize._html import CachingGeneratorFunction
todo = []
for ii in range(2):
def work(ii=ii):
log.append(ii)
return ii
todo.append(work)
def genf():
for a in todo:
yield a()
return CachingGeneratorFunction(genf())
def test_cache(self):
log = []
cgenf = self._get_simple_cgenf(log)
for repeat in range(2):
for ii, jj in zip(cgenf(), range(2)):
self.assertEqual(ii, jj)
self.assertEqual(log, range(2)) # work only done once
def test_interleaved(self):
log = []
cgenf = self._get_simple_cgenf(log)
cgen = cgenf()
self.assertEqual(cgen.next(), 0)
self.assertEqual(log, [0])
cgen2 = cgenf()
self.assertEqual(cgen2.next(), 0)
self.assertEqual(log, [0])
self.assertEqual(cgen.next(), 1)
self.assertEqual(log, [0, 1])
self.assertEqual(cgen2.next(), 1)
self.assertEqual(log, [0, 1])
self.assertRaises(StopIteration, cgen.next)
self.assertRaises(StopIteration, cgen2.next)
class UnescapeTests(TestCase):
def test_unescape_charref(self):
from mechanize._html import unescape_charref
mdash_utf8 = u"\u2014".encode("utf-8")
for ref, codepoint, utf8, latin1 in [
("38", 38, u"&".encode("utf-8"), "&"),
("x2014", 0x2014, mdash_utf8, "—"),
("8212", 8212, mdash_utf8, "—"),
]:
self.assertEqual(unescape_charref(ref, None), unichr(codepoint))
self.assertEqual(unescape_charref(ref, 'latin-1'), latin1)
self.assertEqual(unescape_charref(ref, 'utf-8'), utf8)
def test_unescape(self):
import htmlentitydefs
from mechanize._html import unescape
data = "& < — — —"
mdash_utf8 = u"\u2014".encode("utf-8")
ue = unescape(data, htmlentitydefs.name2codepoint, "utf-8")
self.assertEqual("& < %s %s %s" % ((mdash_utf8,)*3), ue)
for text, expect in [
("&a&", "&a&"),
("a&", "a&"),
]:
got = unescape(text, htmlentitydefs.name2codepoint, "latin-1")
self.assertEqual(got, expect)
class EncodingFinderTests(TestCase):
def make_response(self, encodings):
return mechanize._response.test_response(
headers=[("Content-type", "text/html; charset=\"%s\"" % encoding)
for encoding in encodings])
def test_known_encoding(self):
encoding_finder = mechanize._html.EncodingFinder("default")
response = self.make_response(["utf-8"])
self.assertEqual(encoding_finder.encoding(response), "utf-8")
def test_unknown_encoding(self):
encoding_finder = mechanize._html.EncodingFinder("default")
response = self.make_response(["bogus"])
self.assertEqual(encoding_finder.encoding(response), "default")
def test_precedence(self):
encoding_finder = mechanize._html.EncodingFinder("default")
response = self.make_response(["latin-1", "utf-8"])
self.assertEqual(encoding_finder.encoding(response), "latin-1")
def test_fallback(self):
encoding_finder = mechanize._html.EncodingFinder("default")
response = self.make_response(["bogus", "utf-8"])
self.assertEqual(encoding_finder.encoding(response), "utf-8")
if __name__ == "__main__":
import unittest
unittest.main()
|