File: test_unicode.py

package info (click to toggle)
fiona 1.10.1-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,632 kB
  • sloc: python: 12,616; makefile: 214; sh: 45
file content (177 lines) | stat: -rw-r--r-- 5,963 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
"""Tests of path and field encoding."""

import os
import shutil
import sys
import tempfile
import unittest

import pytest

import fiona
from fiona.errors import SchemaError
from fiona.model import Feature


class TestUnicodePath(unittest.TestCase):
    def setUp(self):
        tempdir = tempfile.mkdtemp()
        self.dir = os.path.join(tempdir, "français")
        shutil.copytree(os.path.join(os.path.dirname(__file__), "data"), self.dir)

    def tearDown(self):
        shutil.rmtree(os.path.dirname(self.dir))

    def test_unicode_path(self):
        path = self.dir + "/coutwildrnp.shp"
        with fiona.open(path) as c:
            assert len(c) == 67

    def test_unicode_path_layer(self):
        path = self.dir
        layer = "coutwildrnp"
        with fiona.open(path, layer=layer) as c:
            assert len(c) == 67

    def test_utf8_path(self):
        path = self.dir + "/coutwildrnp.shp"
        if sys.version_info < (3,):
            with fiona.open(path) as c:
                assert len(c) == 67


class TestUnicodeStringField(unittest.TestCase):
    def setUp(self):
        self.tempdir = tempfile.mkdtemp()

    def tearDown(self):
        shutil.rmtree(self.tempdir)

    @pytest.mark.xfail(reason="OGR silently fails to convert strings")
    def test_write_mismatch(self):
        """TOFIX: OGR silently fails to convert strings"""
        # Details:
        #
        # If we tell OGR that we want a latin-1 encoded output file and
        # give it a feature with a unicode property that can't be converted
        # to latin-1, no error is raised and OGR just writes the utf-8
        # encoded bytes to the output file.
        #
        # This might be shapefile specific.
        #
        # Consequences: no error on write, but there will be an error
        # on reading the data and expecting latin-1.
        schema = {"geometry": "Point", "properties": {"label": "str", "num": "int"}}

        with fiona.open(
            os.path.join(self.tempdir, "test-write-fail.shp"),
            "w",
            driver="ESRI Shapefile",
            schema=schema,
            encoding="latin1",
        ) as c:
            c.writerecords(
                [
                    {
                        "type": "Feature",
                        "geometry": {"type": "Point", "coordinates": [0, 0]},
                        "properties": {"label": "徐汇区", "num": 0},
                    }
                ]
            )

        with fiona.open(os.path.join(self.tempdir), encoding="latin1") as c:
            f = next(iter(c))
            # Next assert fails.
            assert f.properties["label"] == "徐汇区"

    def test_write_utf8(self):
        schema = {
            "geometry": "Point",
            "properties": {"label": "str", "verit\xe9": "int"},
        }
        with fiona.open(
            os.path.join(self.tempdir, "test-write.shp"),
            "w",
            "ESRI Shapefile",
            schema=schema,
            encoding="utf-8",
        ) as c:
            c.writerecords(
                [
                    Feature.from_dict(
                        **{
                            "type": "Feature",
                            "geometry": {"type": "Point", "coordinates": [0, 0]},
                            "properties": {"label": "Ba\u2019kelalan", "verit\xe9": 0},
                        }
                    )
                ]
            )

        with fiona.open(os.path.join(self.tempdir), encoding="utf-8") as c:
            f = next(iter(c))
            assert f.properties["label"] == "Ba\u2019kelalan"
            assert f.properties["verit\xe9"] == 0

    @pytest.mark.iconv
    def test_write_gb18030(self):
        """Can write a simplified Chinese shapefile"""
        schema = {"geometry": "Point", "properties": {"label": "str", "num": "int"}}
        with fiona.open(
            os.path.join(self.tempdir, "test-write-gb18030.shp"),
            "w",
            driver="ESRI Shapefile",
            schema=schema,
            encoding="gb18030",
        ) as c:
            c.writerecords(
                [
                    Feature.from_dict(
                        **{
                            "type": "Feature",
                            "geometry": {"type": "Point", "coordinates": [0, 0]},
                            "properties": {"label": "徐汇区", "num": 0},
                        }
                    )
                ]
            )

        with fiona.open(os.path.join(self.tempdir), encoding="gb18030") as c:
            f = next(iter(c))
            assert f.properties["label"] == "徐汇区"
            assert f.properties["num"] == 0

    @pytest.mark.iconv
    def test_gb2312_field_wrong_encoding(self):
        """Attempt to create field with a name not supported by the encoding

        ESRI Shapefile driver defaults to ISO-8859-1 encoding if none is
        specified. This doesn't support the field name used. Previously this
        went undetected and would raise a KeyError later when the user tried
        to write a feature to the layer. Instead we raise a more useful error.

        See GH#595.
        """
        field_name = "区县名称"
        meta = {
            "schema": {
                "properties": {field_name: "int"},
                "geometry": "Point",
            },
            "driver": "ESRI Shapefile",
        }
        feature = Feature.from_dict(
            **{
                "properties": {field_name: 123},
                "geometry": {"type": "Point", "coordinates": [1, 2]},
            }
        )
        # when encoding is specified, write is successful
        with fiona.open(
            os.path.join(self.tempdir, "test1.shp"), "w", encoding="GB2312", **meta
        ) as collection:
            collection.write(feature)
        # no encoding
        with pytest.raises(SchemaError):
            fiona.open(os.path.join(self.tempdir, "test2.shp"), "w", **meta)