File: test_html.py

package info (click to toggle)
python-mechanize 1%3A0.4.10%2Bds-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,316 kB
  • sloc: python: 16,656; makefile: 11; sh: 4
file content (138 lines) | stat: -rw-r--r-- 4,397 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/env python

from unittest import TestCase

import mechanize
import mechanize._form
from mechanize._response import test_html_response
from mechanize._html import content_parser, get_title, Factory


class RegressionTests(TestCase):

    def test_close_base_tag(self):
        # any document containing a </base> tag used to cause an exception
        br = mechanize.Browser()
        br.set_html("</base>")
        list(br.links())

    def test_bad_base_tag(self):
        # a document with a base tag with no href used to cause an exception
        br = mechanize.Browser()
        br.set_html(
            "<BASE TARGET='_main'><a href='http://example.com/'>eg</a>",
            url="http://example.com/",
        )
        list(br.links())


class EncodingFinderTests(TestCase):

    def make_response(self, encodings):
        return mechanize._response.test_response(
            headers=[("Content-type", "text/html; charset=\"%s\"" % encoding)
                     for encoding in encodings])

    def test_known_encoding(self):
        encoding_finder = mechanize._html.EncodingFinder("default")
        response = self.make_response(["utf-8"])
        self.assertEqual(encoding_finder.encoding(response), "utf-8")

    def test_unknown_encoding(self):
        encoding_finder = mechanize._html.EncodingFinder("default")
        response = self.make_response(["bogus"])
        self.assertEqual(encoding_finder.encoding(response), "default")

    def test_precedence(self):
        encoding_finder = mechanize._html.EncodingFinder("default")
        response = self.make_response(["latin-1", "utf-8"])
        self.assertEqual(encoding_finder.encoding(response), "latin-1")

    def test_fallback(self):
        encoding_finder = mechanize._html.EncodingFinder("default")
        response = self.make_response(["bogus", "utf-8"])
        self.assertEqual(encoding_finder.encoding(response), "utf-8")


class TitleTests(TestCase):

    def test_title_parsing(self):
        html = ("""\
<html><head>
<title> Title\n Test</title>
</head><body><p>Blah.<p></body></html>
""")
        self.assertEqual(get_title(content_parser(html)), 'Title Test')


class MiscTests(TestCase):

    def test_util_func(self):
        headers1 = str(test_html_response('').info())
        headers2 = str(test_html_response('').info())
        self.assertEqual(headers1, headers2)

    def test_link_parsing(self):

        def get_first_link_text(html):
            factory = Factory()
            response = test_html_response(html, url="http://example.com/")
            factory.set_response(response)
            return list(factory.links())[0].text

        html = ("""\
        <html><head><title>Title</title></head><body>
        <p><a href="http://example.com/">The  quick\tbrown fox jumps
        over the <i><b>lazy</b></i> dog </a>
        </body></html>
        """)
        self.assertEqual(
            get_first_link_text(html), u'The quick brown fox jumps over the lazy dog')

        html = ("""\
        <html><head><title>Title</title></head><body>
        <p><a href="http://example.com/"></a>
        </body></html>
        """)
        self.assertEqual(get_first_link_text(html), '')

        html = ("""\
        <html><head><title>Title</title></head><body>
        <p><iframe src="http://example.com/"></iframe>
        </body></html>
        """)
        self.assertEqual(get_first_link_text(html), '')

    def test_title_parsing(self):
        def get_title(html):
            factory = Factory()
            response = test_html_response(html)
            factory.set_response(response)
            return factory.title

        html = (b"""\
        <html><head>
        <title>T&gt;itle</title>
        </head><body><p>Blah.<p></body></html>
        """)
        self.assertEqual(get_title(html), u'T>itle')

        html = ("""\
        <html><head>
        <title>  Ti<script type="text/strange">alert("this is valid HTML -- yuck!")</script>
        tle &amp;&#38;
        </title>
        </head><body><p>Blah.<p></body></html>
        """)
        self.assertEqual(
            str(get_title(html)), 'Ti<script type="text/strange">alert("this is valid HTML -- yuck!")</script> tle &&')

        html = ("""\
        <html><head>
        <title>""")
        self.assertEqual(get_title(html), u'')


if __name__ == "__main__":
    import unittest
    unittest.main()