1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
import codecs
import glob
import os
import re
import subprocess
import sys
if sys.version_info[:2] < (2, 7):
import unittest2 as unittest
else:
import unittest
import logging
logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
level=logging.DEBUG)
import html2text
def test_module(fn, google_doc=False, **kwargs):
h = html2text.HTML2Text()
h.fn = fn
if google_doc:
h.google_doc = True
h.ul_item_mark = '-'
h.body_width = 0
h.hide_strikethrough = True
for k, v in kwargs.items():
setattr(h, k, v)
result = get_baseline(fn)
inf = open(fn)
actual = h.handle(inf.read())
inf.close()
return result, actual
def test_command(fn, *args):
args = list(args)
cmd = [sys.executable, '-m', 'html2text.__init__']
if '--googledoc' in args:
args.remove('--googledoc')
cmd += ['-g', '-d', '-b', '0', '-s']
if args:
cmd.extend(args)
cmd += [fn]
result = get_baseline(fn)
pid = subprocess.Popen(cmd, stdout=subprocess.PIPE)
out, _ = pid.communicate()
actual = out.decode('utf8')
if os.name == 'nt':
# Fix the unwanted CR to CRCRLF replacement
# during text pipelining on Windows/cygwin
actual = re.sub(r'\r+', '\r', actual)
actual = actual.replace('\r\n', '\n')
return result, actual
def get_dump_name(fn, suffix):
return '%s-%s_output.md' % (os.path.splitext(fn)[0], suffix)
def get_baseline_name(fn):
return os.path.splitext(fn)[0] + '.md'
def get_baseline(fn):
name = get_baseline_name(fn)
f = codecs.open(name, mode='r', encoding='utf8')
out = f.read()
f.close()
return out
class TestHTML2Text(unittest.TestCase):
pass
def generate_test(fn):
def test_mod(self):
self.maxDiff = None
result, actual = test_module(fn, **module_args)
self.assertEqual(result, actual)
def test_cmd(self):
# Because there is no command-line option to control unicode_snob
if not 'unicode_snob' in module_args:
self.maxDiff = None
result, actual = test_command(fn, *cmdline_args)
self.assertEqual(result, actual)
module_args = {}
cmdline_args = []
base_fn = os.path.basename(fn).lower()
if base_fn.startswith('google'):
module_args['google_doc'] = True
cmdline_args.append('--googledoc')
if base_fn.find('unicode') >= 0:
module_args['unicode_snob'] = True
if base_fn.find('flip_emphasis') >= 0:
module_args['emphasis_mark'] = '*'
module_args['strong_mark'] = '__'
cmdline_args.append('-e')
if base_fn.find('escape_snob') >= 0:
module_args['escape_snob'] = True
cmdline_args.append('--escape-all')
if base_fn.find('table_bypass') >= 0:
module_args['bypass_tables'] = True
cmdline_args.append('--bypass-tables')
if base_fn.startswith('bodywidth'):
#module_args['unicode_snob'] = True
module_args['body_width'] = 0
cmdline_args.append('--body-width=0')
return test_mod, test_cmd
# Originally from http://stackoverflow.com/questions/32899/\
# how-to-generate-dynamic-parametrized-unit-tests-in-python
test_dir_name = os.path.dirname(os.path.realpath(__file__))
for fn in glob.glob("%s/*.html" % test_dir_name):
test_name = 'test_%s' % os.path.splitext(os.path.basename(fn))[0].lower()
test_m, test_c = generate_test(fn)
setattr(TestHTML2Text, test_name + "_mod", test_m)
if test_c:
setattr(TestHTML2Text, test_name + "_cmd", test_c)
if __name__ == "__main__":
unittest.main()
|