1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
|
#! /usr/bin/env python3
# $Id: test_unicode.py 9425 2023-06-30 14:56:47Z milde $
# Author: David Goodger <goodger@python.org>
# Copyright: This module has been placed in the public domain.
"""
Tests for misc.py "unicode" directive.
"""
from pathlib import Path
import sys
import unittest
if __name__ == '__main__':
# prepend the "docutils root" to the Python library path
# so we import the local `docutils` package.
sys.path.insert(0, str(Path(__file__).resolve().parents[4]))
from docutils.frontend import get_default_settings
from docutils.parsers.rst import Parser
from docutils.utils import new_document
class ParserTestCase(unittest.TestCase):
def test_parser(self):
parser = Parser()
settings = get_default_settings(Parser)
settings.warning_stream = ''
for name, cases in totest.items():
for casenum, (case_input, case_expected) in enumerate(cases):
with self.subTest(id=f'totest[{name!r}][{casenum}]'):
document = new_document('test data', settings.copy())
parser.parse(case_input, document)
output = document.pformat()
self.assertEqual(case_expected, output)
try:
chr(0x111111111111111111)
except OverflowError as unichr_exception:
unichr_exception_string = f'code too large ({unichr_exception})'
except Exception as unichr_exception:
unichr_exception_string = str(unichr_exception)
else:
unichr_exception_string = ''
try:
chr(0x11111111)
except Exception as detail:
invalid_char_code = f'{detail.__class__.__name__}: {detail}'
else:
invalid_char_code = ''
totest = {}
totest['unicode'] = [
["""
Insert an em-dash (|mdash|), a copyright symbol (|copy|), a non-breaking
space (|nbsp|), a backwards-not-equals (|bne|), and a captial omega (|Omega|).
.. |mdash| unicode:: 0x02014
.. |copy| unicode:: \\u00A9
.. |nbsp| unicode::  
.. |bne| unicode:: U0003D U020E5
.. |Omega| unicode:: U+003A9
""",
"""\
<document source="test data">
<paragraph>
Insert an em-dash (
<substitution_reference refname="mdash">
mdash
), a copyright symbol (
<substitution_reference refname="copy">
copy
), a non-breaking
space (
<substitution_reference refname="nbsp">
nbsp
), a backwards-not-equals (
<substitution_reference refname="bne">
bne
), and a captial omega (
<substitution_reference refname="Omega">
Omega
).
<substitution_definition names="mdash">
\u2014
<substitution_definition names="copy">
\u00A9
<substitution_definition names="nbsp">
\u00A0
<substitution_definition names="bne">
=
\u20e5
<substitution_definition names="Omega">
\u03a9
"""],
["""
Bad input:
.. |empty| unicode::
.. |empty too| unicode:: .. comment doesn't count as content
.. |not hex| unicode:: 0xHEX
.. |not all hex| unicode:: UABCX
.. unicode:: not in a substitution definition
""",
"""\
<document source="test data">
<paragraph>
Bad input:
<system_message level="3" line="4" source="test data" type="ERROR">
<paragraph>
Error in "unicode" directive:
1 argument(s) required, 0 supplied.
<literal_block xml:space="preserve">
unicode::
<system_message level="2" line="4" source="test data" type="WARNING">
<paragraph>
Substitution definition "empty" empty or invalid.
<literal_block xml:space="preserve">
.. |empty| unicode::
<system_message level="2" line="5" source="test data" type="WARNING">
<paragraph>
Substitution definition "empty too" empty or invalid.
<literal_block xml:space="preserve">
.. |empty too| unicode:: .. comment doesn't count as content
<substitution_definition names="not\\ hex">
0xHEX
<substitution_definition names="not\\ all\\ hex">
UABCX
<system_message level="3" line="8" source="test data" type="ERROR">
<paragraph>
Invalid context: the "unicode" directive can only be used within a substitution definition.
<literal_block xml:space="preserve">
.. unicode:: not in a substitution definition
"""],
["""
Testing comments and extra text.
Copyright |copy| 2003, |BogusMegaCorp (TM)|.
.. |copy| unicode:: 0xA9 .. copyright sign
.. |BogusMegaCorp (TM)| unicode:: BogusMegaCorp U+2122
.. with trademark sign
""",
"""\
<document source="test data">
<paragraph>
Testing comments and extra text.
<paragraph>
Copyright \n\
<substitution_reference refname="copy">
copy
2003, \n\
<substitution_reference refname="BogusMegaCorp (TM)">
BogusMegaCorp (TM)
.
<substitution_definition names="copy">
\u00A9
<substitution_definition names="BogusMegaCorp\\ (TM)">
BogusMegaCorp
\u2122
"""],
["""
.. |too big for int| unicode:: 0x111111111111111111
.. |too big for unicode| unicode:: 0x11111111
""",
"""\
<document source="test data">
<system_message level="3" line="2" source="test data" type="ERROR">
<paragraph>
Invalid character code: 0x111111111111111111
ValueError: %s
<literal_block xml:space="preserve">
unicode:: 0x111111111111111111
<system_message level="2" line="2" source="test data" type="WARNING">
<paragraph>
Substitution definition "too big for int" empty or invalid.
<literal_block xml:space="preserve">
.. |too big for int| unicode:: 0x111111111111111111
<system_message level="3" line="3" source="test data" type="ERROR">
<paragraph>
Invalid character code: 0x11111111
%s
<literal_block xml:space="preserve">
unicode:: 0x11111111
<system_message level="2" line="3" source="test data" type="WARNING">
<paragraph>
Substitution definition "too big for unicode" empty or invalid.
<literal_block xml:space="preserve">
.. |too big for unicode| unicode:: 0x11111111
""" % (unichr_exception_string, invalid_char_code)]
]
if __name__ == '__main__':
unittest.main()
|