File: test_html2text.py

package info (click to toggle)
python-html2text 2014.9.25-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 404 kB
  • ctags: 143
  • sloc: python: 1,073; sh: 76; makefile: 14
file content (143 lines) | stat: -rw-r--r-- 3,683 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import codecs
import glob
import os
import re
import subprocess
import sys

if sys.version_info[:2] < (2, 7):
    import unittest2 as unittest
else:
    import unittest
import logging

logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
                    level=logging.DEBUG)

import html2text


def test_module(fn, google_doc=False, **kwargs):
    h = html2text.HTML2Text()
    h.fn = fn

    if google_doc:
        h.google_doc = True
        h.ul_item_mark = '-'
        h.body_width = 0
        h.hide_strikethrough = True

    for k, v in kwargs.items():
        setattr(h, k, v)

    result = get_baseline(fn)
    inf = open(fn)
    actual = h.handle(inf.read())
    inf.close()
    return result, actual


def test_command(fn, *args):
    args = list(args)
    cmd = [sys.executable, '-m', 'html2text.__init__']

    if '--googledoc' in args:
        args.remove('--googledoc')
        cmd += ['-g', '-d', '-b', '0', '-s']

    if args:
        cmd.extend(args)

    cmd += [fn]

    result = get_baseline(fn)
    pid = subprocess.Popen(cmd, stdout=subprocess.PIPE)
    out, _ = pid.communicate()

    actual = out.decode('utf8')

    if os.name == 'nt':
        # Fix the unwanted CR to CRCRLF replacement
        # during text pipelining on Windows/cygwin
        actual = re.sub(r'\r+', '\r', actual)
        actual = actual.replace('\r\n', '\n')

    return result, actual


def get_dump_name(fn, suffix):
    return '%s-%s_output.md' % (os.path.splitext(fn)[0], suffix)


def get_baseline_name(fn):
    return os.path.splitext(fn)[0] + '.md'


def get_baseline(fn):
    name = get_baseline_name(fn)
    f = codecs.open(name, mode='r', encoding='utf8')
    out = f.read()
    f.close()
    return out


class TestHTML2Text(unittest.TestCase):
    pass


def generate_test(fn):
    def test_mod(self):
        self.maxDiff = None
        result, actual = test_module(fn, **module_args)
        self.assertEqual(result, actual)

    def test_cmd(self):
        # Because there is no command-line option to control unicode_snob
        if not 'unicode_snob' in module_args:
            self.maxDiff = None
            result, actual = test_command(fn, *cmdline_args)
            self.assertEqual(result, actual)

    module_args = {}
    cmdline_args = []
    base_fn = os.path.basename(fn).lower()

    if base_fn.startswith('google'):
        module_args['google_doc'] = True
        cmdline_args.append('--googledoc')

    if base_fn.find('unicode') >= 0:
        module_args['unicode_snob'] = True

    if base_fn.find('flip_emphasis') >= 0:
        module_args['emphasis_mark'] = '*'
        module_args['strong_mark'] = '__'
        cmdline_args.append('-e')

    if base_fn.find('escape_snob') >= 0:
        module_args['escape_snob'] = True
        cmdline_args.append('--escape-all')

    if base_fn.find('table_bypass') >= 0:
        module_args['bypass_tables'] = True
        cmdline_args.append('--bypass-tables')

    if base_fn.startswith('bodywidth'):
        #module_args['unicode_snob'] = True
        module_args['body_width'] = 0
        cmdline_args.append('--body-width=0')

    return test_mod, test_cmd

# Originally from http://stackoverflow.com/questions/32899/\
#    how-to-generate-dynamic-parametrized-unit-tests-in-python
test_dir_name = os.path.dirname(os.path.realpath(__file__))
for fn in glob.glob("%s/*.html" % test_dir_name):
    test_name = 'test_%s' % os.path.splitext(os.path.basename(fn))[0].lower()
    test_m, test_c = generate_test(fn)
    setattr(TestHTML2Text, test_name + "_mod", test_m)
    if test_c:
        setattr(TestHTML2Text, test_name + "_cmd", test_c)

if __name__ == "__main__":
    unittest.main()