File: test_encoding.py

package info (click to toggle)
python-clevercsv 0.7.5%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 872 kB
  • sloc: python: 5,076; ansic: 763; makefile: 81
file content (88 lines) | stat: -rw-r--r-- 2,544 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# -*- coding: utf-8 -*-

"""Unit tests for encoding detection

Author: G.J.J. van den Burg
License: See the LICENSE file.

This file is part of CleverCSV.

"""

import os
import platform
import tempfile
import unittest

from clevercsv.encoding import get_encoding
from clevercsv.write import writer


class EncodingTestCase(unittest.TestCase):

    cases = [
        {
            "table": [["Å", "B", "C"], [1, 2, 3], [4, 5, 6]],
            "encoding": "ISO-8859-1",
            "cchardet_encoding": "WINDOWS-1252",
        },
        {
            "table": [["A", "B", "C"], [1, 2, 3], [4, 5, 6]],
            "encoding": "ascii",
            "cchardet_encoding": "ASCII",
        },
        {
            "table": [["亜唖", "娃阿", "哀愛"], [1, 2, 3], ["挨", "姶", "葵"]],
            "encoding": "ISO-2022-JP",
            "cchardet_encoding": "ISO-2022-JP",
        },
    ]

    def setUp(self):
        self._tmpfiles = []

    def tearDown(self):
        for f in self._tmpfiles:
            os.unlink(f)

    def _build_file(self, table, encoding):
        tmpfd, tmpfname = tempfile.mkstemp(
            prefix="ccsv_",
            suffix=".csv",
        )
        tmpfp = os.fdopen(tmpfd, "w", newline=None, encoding=encoding)
        w = writer(tmpfp, dialect="excel")
        w.writerows(table)
        tmpfp.close()
        self._tmpfiles.append(tmpfname)
        return tmpfname

    def test_encoding_chardet(self):
        for case in self.cases:
            table = case["table"]
            encoding = case["encoding"]
            with self.subTest(encoding=encoding):
                tmpfname = self._build_file(table, encoding)
                detected = get_encoding(tmpfname, try_cchardet=False)
                self.assertEqual(encoding, detected)

    # Temporarily, until https://github.com/faust-streaming/cChardet/pull/15 is
    # resolved.
# Skip, as cchardet isn't packaged
#    @unittest.skipIf(
#        platform.system() == "Windows",
#        reason="No faust-cchardet wheels for Windows (yet)",
#    )
#    def test_encoding_cchardet(self):
#        for case in self.cases:
#            table = case["table"]
#            encoding = case["encoding"]
#            with self.subTest(encoding=encoding):
#                out_encoding = case["cchardet_encoding"]
#                tmpfname = self._build_file(table, encoding)
#                detected = get_encoding(tmpfname, try_cchardet=True)
#                self.assertEqual(out_encoding, detected)


if __name__ == "__main__":
    unittest.main()