1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
|
# ----------------------------------------------------------------------------
# "THE BEER-WARE LICENSE" (Revision 42):
# Daniel Kratzert <dkratzert@gmx.de> wrote this file. As long as you retain
# this notice you can do whatever you want with this stuff. If we meet some day,
# and you think this stuff is worth it, you can buy me a beer in return.
# ----------------------------------------------------------------------------
import unittest
import gemmi
from packaging.version import Version
from finalcif.cif.text import quote, utf8_to_str, retranslate_delimiter, delimit_string, charcters, string_to_utf8
class TestText(unittest.TestCase):
def setUp(self) -> None:
d = gemmi.cif.Document()
self.block: gemmi.cif.Block = d.add_new_block('new-block')
def test_quote_short(self):
q = quote('Hello this is a test for a quoted text')
self.assertEqual("'Hello this is a test for a quoted text'", q)
def test_quote_long(self):
q = quote('This is a moch longer text, because I want to see what this method does with text over 80 '
'characters wide. Let\'s add also some special characters; ?!"§$%&/()=`? Oh yeah!#++-_.,:;')
quoted = (";This is a moch longer text, because I want to see what this method does with\n"
"text over 80 characters wide. Let's add also some special characters;\n"
"?!\"§$%&/()=`? Oh yeah!#++-_.,:;\n"
";")
self.assertEqual(quoted, q)
def test_set_pair_delimited_empty(self):
self.block.set_pair('_foobar', delimit_string(''))
if Version(gemmi.__version__) >= Version('0.5.7'):
self.assertEqual(('_foobar', ''), self.block.find_pair('_foobar'))
else:
self.assertEqual(['_foobar', ''], self.block.find_pair('_foobar'))
def test_set_pair_delimited_question(self):
self.block.set_pair('_foobar', delimit_string('?'))
if Version(gemmi.__version__) >= Version('0.5.7'):
self.assertEqual(('_foobar', '?'), self.block.find_pair('_foobar'))
else:
self.assertEqual(['_foobar', '?'], self.block.find_pair('_foobar'))
def test_set_pair_delimited_number(self):
self.block.set_pair('_foobar', delimit_string('1.123'))
if Version(gemmi.__version__) >= Version('0.5.7'):
self.assertEqual(('_foobar', '1.123'), self.block.find_pair('_foobar'))
else:
self.assertEqual(['_foobar', '1.123'], self.block.find_pair('_foobar'))
def test_set_pair_delimited_with_newline(self):
self.block.set_pair('_foobar', delimit_string('abc\ndef foo'))
if Version(gemmi.__version__) >= Version('0.5.7'):
self.assertEqual(('_foobar', 'abc\ndef foo'), self.block.find_pair('_foobar'))
else:
self.assertEqual(['_foobar', 'abc\ndef foo'], self.block.find_pair('_foobar'))
def test_delimit_ut8_to_cif_str(self):
s = utf8_to_str('100 °C')
self.assertEqual(r'100 \%C', s)
def test_cif_str_to_utf8(self):
r = retranslate_delimiter(r'100 \%C')
self.assertEqual('100 °C', r)
def test_retranslate_sentence(self):
r = retranslate_delimiter(r"Crystals were grown from thf at -20 \%C.")
self.assertEqual('Crystals were grown from thf at -20 °C.', r)
def test_delimit_umlaut(self):
self.assertEqual(r'\"a\"o\"u\,c', delimit_string('äöüç'))
def test__backwards_delimit_umlaut(self):
self.assertEqual('ä ö ü ç', retranslate_delimiter(r'\"a \"o \"u \,c'))
def test_retranslate_all(self):
for char in charcters:
if char in ('Å', 'Å'):
continue
self.assertEqual(char, retranslate_delimiter(delimit_string(char)))
def test_translate_wrong_cif_umlauts(self):
# This can fail if äöü are next to each other but this is unlikely
self.assertEqual('ä ö ü', string_to_utf8(r'a\" o\" u\"'))
def test_translate_wrong_cif_umlauts_next_to_each_other(self):
# This can fail if äöü are next to each other but this is unlikely
self.assertEqual(r'aöü\"', string_to_utf8(r'a\"o\"u\"'))
class TestHeavyUtf8(unittest.TestCase):
def setUp(self) -> None:
# We have an utf-8 string with characters that CIF does not know:
self.txt = "∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), " \
"ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (A ⇔ B), " \
"2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm"
# The expected result is a mix of CIF and html entities:
self.quoted = r"∮ E⋅da = Q, n \\rightarrow \\infty, ∑ f(i) = ∏ g(i), " \
r"∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, \a " \
r"∧ ¬\b = ¬(¬\a ∨ \b), " \
r"ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ " \
r"⊂ ℂ, ⊥ < a \\neq b ≡ c ≤ d ≪ ⊤ ⇒ " \
r"(A ⇔ B), " \
r"2H~2~ + O~2~ ⇌ 2H~2~O, R = 4.7 k\W, ⌀ 200 mm"
def test_encode_heavy_utf8(self):
# Test for the quoted string
self.assertEqual(self.quoted, utf8_to_str(self.txt))
def test_encode_and_decode_utf8(self):
# Test for quote and immediate decode to utf-8 again:
self.assertEqual(self.txt, retranslate_delimiter(utf8_to_str(self.txt)))
|