File: test_unicode_util.py

package info (click to toggle)
pypy 7.0.0%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 107,216 kB
  • sloc: python: 1,201,787; ansic: 62,419; asm: 5,169; cpp: 3,017; sh: 2,534; makefile: 545; xml: 243; lisp: 45; awk: 4
file content (73 lines) | stat: -rwxr-xr-x 2,075 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
import py
import codecs
from dotviewer.strunicode import RAW_ENCODING, forcestr, forceunicode, tryencode

SOURCE1 = u"""digraph G{
λ -> b
b -> μ
}
"""

FILENAME = 'test.dot'

class TestUnicodeUtil(object):

    def test_idempotent(self):
        x = u"a"
        assert forceunicode(forcestr(x)) == x

        x = u"λ"
        assert forceunicode(forcestr(x)) == x

        assert forceunicode(forcestr(SOURCE1)) == SOURCE1

        x = "a"
        assert forcestr(forceunicode(x)) == x

        # utf-8 encoded.
        # fragile, does not consider RAW_ENCODING
        # x = "\xef\xbb\xbf\xce\xbb"
        # assert forcestr(forceunicode(x)) == x

    def test_does_not_double_encode(self):
        x = u"λ"
        x_e = forcestr(x)
        assert forcestr(x_e) == x_e

        x_u = forceunicode(x_e)
        assert forceunicode(x_u) == x_u

    def test_file(self):
        udir = py.path.local.make_numbered_dir(prefix='usession-dot-', keep=3)
        full_filename = str(udir.join(FILENAME))
        f = codecs.open(full_filename, 'wb', RAW_ENCODING)
        f.write(SOURCE1)
        f.close()

        with open(full_filename) as f1:
            assert forceunicode(f1.read()) == SOURCE1

        f3 = codecs.open(full_filename, 'r', RAW_ENCODING)
        c = f3.read()
        f3.close()
        result = (c == SOURCE1)
        assert result

    def test_only_unicode_encode(self):
        sut =      [1,   u"a", "miau", u"λ"]
        expected = [int, str,  str,    str ]

        results = map(tryencode, sut)
        for result, expected_type in zip(results, expected):
            assert isinstance(result, expected_type)

    def test_forceunicode_should_not_fail(self):
        garbage = "\xef\xff\xbb\xbf\xce\xbb\xff\xff"   # garbage with a lambda
        result = forceunicode(garbage)                 # should not raise

    def test_forcestr_should_not_fail(self):
        garbage = u"\xef\xff\xbb\xbf\xce\xbb\xff\xff"  # garbage
        result = forcestr(garbage)                     # should not raise