File: file.py

package info (click to toggle)
python-geopandas 0.8.2-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 12,004 kB
  • sloc: python: 14,226; makefile: 150; sh: 14
file content (359 lines) | stat: -rw-r--r-- 12,624 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
from distutils.version import LooseVersion

import io
import numpy as np
import pandas as pd

import fiona
import pyproj
from shapely.geometry import mapping
from shapely.geometry.base import BaseGeometry

try:
    from fiona import Env as fiona_env
except ImportError:
    from fiona import drivers as fiona_env

from geopandas import GeoDataFrame, GeoSeries


# Adapted from pandas.io.common
from urllib.request import urlopen as _urlopen
from urllib.parse import urlparse as parse_url
from urllib.parse import uses_netloc, uses_params, uses_relative


_FIONA18 = LooseVersion(fiona.__version__) >= LooseVersion("1.8")
_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
_VALID_URLS.discard("")


def _is_url(url):
    """Check to see if *url* has a valid protocol."""
    try:
        return parse_url(url).scheme in _VALID_URLS
    except Exception:
        return False


def _read_file(filename, bbox=None, mask=None, rows=None, **kwargs):
    """
    Returns a GeoDataFrame from a file or URL.

    .. versionadded:: 0.7.0 mask, rows

    Parameters
    ----------
    filename : str, path object or file-like object
        Either the absolute or relative path to the file or URL to
        be opened, or any object with a read() method (such as an open file
        or StringIO)
    bbox : tuple | GeoDataFrame or GeoSeries | shapely Geometry, default None
        Filter features by given bounding box, GeoSeries, GeoDataFrame or a
        shapely geometry. CRS mis-matches are resolved if given a GeoSeries
        or GeoDataFrame. Cannot be used with mask.
    mask : dict | GeoDataFrame or GeoSeries | shapely Geometry, default None
        Filter for features that intersect with the given dict-like geojson
        geometry, GeoSeries, GeoDataFrame or shapely geometry.
        CRS mis-matches are resolved if given a GeoSeries or GeoDataFrame.
        Cannot be used with bbox.
    rows : int or slice, default None
        Load in specific rows by passing an integer (first `n` rows) or a
        slice() object.
    **kwargs :
        Keyword args to be passed to the `open` or `BytesCollection` method
        in the fiona library when opening the file. For more information on
        possible keywords, type:
        ``import fiona; help(fiona.open)``

    Examples
    --------
    >>> df = geopandas.read_file("nybb.shp")

    Returns
    -------
    :obj:`geopandas.GeoDataFrame` or :obj:`pandas.DataFrame` :
        If `ignore_geometry=True` a :obj:`pandas.DataFrame` will be returned.

    Notes
    -----
    The format drivers will attempt to detect the encoding of your data, but
    may fail. In this case, the proper encoding can be specified explicitly
    by using the encoding keyword parameter, e.g. ``encoding='utf-8'``.
    """
    if _is_url(filename):
        req = _urlopen(filename)
        path_or_bytes = req.read()
        reader = fiona.BytesCollection
    elif isinstance(filename, io.TextIOBase):
        path_or_bytes = filename.read()
        reader = fiona.open
    else:
        path_or_bytes = filename
        reader = fiona.open

    with fiona_env():
        with reader(path_or_bytes, **kwargs) as features:

            # In a future Fiona release the crs attribute of features will
            # no longer be a dict, but will behave like a dict. So this should
            # be forwards compatible
            crs = (
                features.crs["init"]
                if features.crs and "init" in features.crs
                else features.crs_wkt
            )

            # handle loading the bounding box
            if bbox is not None:
                if isinstance(bbox, (GeoDataFrame, GeoSeries)):
                    bbox = tuple(bbox.to_crs(crs).total_bounds)
                elif isinstance(bbox, BaseGeometry):
                    bbox = bbox.bounds
                assert len(bbox) == 4
            # handle loading the mask
            elif isinstance(mask, (GeoDataFrame, GeoSeries)):
                mask = mapping(mask.to_crs(crs).unary_union)
            elif isinstance(mask, BaseGeometry):
                mask = mapping(mask)
            # setup the data loading filter
            if rows is not None:
                if isinstance(rows, int):
                    rows = slice(rows)
                elif not isinstance(rows, slice):
                    raise TypeError("'rows' must be an integer or a slice.")
                f_filt = features.filter(
                    rows.start, rows.stop, rows.step, bbox=bbox, mask=mask
                )
            elif any((bbox, mask)):
                f_filt = features.filter(bbox=bbox, mask=mask)
            else:
                f_filt = features
            # get list of columns
            columns = list(features.schema["properties"])
            if kwargs.get("ignore_geometry", False):
                return pd.DataFrame(
                    [record["properties"] for record in f_filt], columns=columns
                )

            return GeoDataFrame.from_features(
                f_filt, crs=crs, columns=columns + ["geometry"]
            )


def read_file(*args, **kwargs):
    import warnings

    warnings.warn(
        "geopandas.io.file.read_file() is intended for internal "
        "use only, and will be deprecated. Use geopandas.read_file() instead.",
        DeprecationWarning,
        stacklevel=2,
    )

    return _read_file(*args, **kwargs)


def to_file(*args, **kwargs):
    import warnings

    warnings.warn(
        "geopandas.io.file.to_file() is intended for internal "
        "use only, and will be deprecated. Use GeoDataFrame.to_file() "
        "or GeoSeries.to_file() instead.",
        DeprecationWarning,
        stacklevel=2,
    )

    return _to_file(*args, **kwargs)


def _to_file(
    df,
    filename,
    driver="ESRI Shapefile",
    schema=None,
    index=None,
    mode="w",
    crs=None,
    **kwargs
):
    """
    Write this GeoDataFrame to an OGR data source

    A dictionary of supported OGR providers is available via:
    >>> import fiona
    >>> fiona.supported_drivers

    Parameters
    ----------
    df : GeoDataFrame to be written
    filename : string
        File path or file handle to write to.
    driver : string, default 'ESRI Shapefile'
        The OGR format driver used to write the vector file.
    schema : dict, default None
        If specified, the schema dictionary is passed to Fiona to
        better control how the file is written. If None, GeoPandas
        will determine the schema based on each column's dtype
    index : bool, default None
        If True, write index into one or more columns (for MultiIndex).
        Default None writes the index into one or more columns only if
        the index is named, is a MultiIndex, or has a non-integer data
        type. If False, no index is written.

        .. versionadded:: 0.7
            Previously the index was not written.
    mode : string, default 'w'
        The write mode, 'w' to overwrite the existing file and 'a' to append.
        Not all drivers support appending. The drivers that support appending
        are listed in fiona.supported_drivers or
        https://github.com/Toblerity/Fiona/blob/master/fiona/drvsupport.py
    crs : pyproj.CRS, default None
        If specified, the CRS is passed to Fiona to
        better control how the file is written. If None, GeoPandas
        will determine the crs based on crs df attribute.
        The value can be anything accepted
        by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
        such as an authority string (eg "EPSG:4326") or a WKT string.

    The *kwargs* are passed to fiona.open and can be used to write
    to multi-layer data, store data within archives (zip files), etc.
    The path may specify a fiona VSI scheme.

    Notes
    -----
    The format drivers will attempt to detect the encoding of your data, but
    may fail. In this case, the proper encoding can be specified explicitly
    by using the encoding keyword parameter, e.g. ``encoding='utf-8'``.
    """
    if index is None:
        # Determine if index attribute(s) should be saved to file
        index = list(df.index.names) != [None] or type(df.index) not in (
            pd.RangeIndex,
            pd.Int64Index,
        )
    if index:
        df = df.reset_index(drop=False)
    if schema is None:
        schema = infer_schema(df)
    if crs:
        crs = pyproj.CRS.from_user_input(crs)
    else:
        crs = df.crs
    with fiona_env():
        crs_wkt = None
        try:
            gdal_version = fiona.env.get_gdal_release_name()
        except AttributeError:
            gdal_version = "2.0.0"  # just assume it is not the latest
        if LooseVersion(gdal_version) >= LooseVersion("3.0.0") and crs:
            crs_wkt = crs.to_wkt()
        elif crs:
            crs_wkt = crs.to_wkt("WKT1_GDAL")
        with fiona.open(
            filename, mode=mode, driver=driver, crs_wkt=crs_wkt, schema=schema, **kwargs
        ) as colxn:
            colxn.writerecords(df.iterfeatures())


def infer_schema(df):
    from collections import OrderedDict

    # TODO: test pandas string type and boolean type once released
    types = {"Int64": "int", "string": "str", "boolean": "bool"}

    def convert_type(column, in_type):
        if in_type == object:
            return "str"
        if in_type.name.startswith("datetime64"):
            # numpy datetime type regardless of frequency
            return "datetime"
        if str(in_type) in types:
            out_type = types[str(in_type)]
        else:
            out_type = type(np.zeros(1, in_type).item()).__name__
        if out_type == "long":
            out_type = "int"
        if not _FIONA18 and out_type == "bool":
            raise ValueError(
                'column "{}" is boolean type, '.format(column)
                + "which is unsupported in file writing with fiona "
                "< 1.8. Consider casting the column to int type."
            )
        return out_type

    properties = OrderedDict(
        [
            (col, convert_type(col, _type))
            for col, _type in zip(df.columns, df.dtypes)
            if col != df._geometry_column_name
        ]
    )

    if df.empty:
        raise ValueError("Cannot write empty DataFrame to file.")

    # Since https://github.com/Toblerity/Fiona/issues/446 resolution,
    # Fiona allows a list of geometry types
    geom_types = _geometry_types(df)

    schema = {"geometry": geom_types, "properties": properties}

    return schema


def _geometry_types(df):
    """
    Determine the geometry types in the GeoDataFrame for the schema.
    """
    if _FIONA18:
        # Starting from Fiona 1.8, schema submitted to fiona to write a gdf
        # can have mixed geometries:
        # - 3D and 2D shapes can coexist in inferred schema
        # - Shape and MultiShape types can (and must) coexist in inferred
        #   schema
        geom_types_2D = df[~df.geometry.has_z].geometry.geom_type.unique()
        geom_types_2D = [gtype for gtype in geom_types_2D if gtype is not None]
        geom_types_3D = df[df.geometry.has_z].geometry.geom_type.unique()
        geom_types_3D = ["3D " + gtype for gtype in geom_types_3D if gtype is not None]
        geom_types = geom_types_3D + geom_types_2D

    else:
        # Before Fiona 1.8, schema submitted to write a gdf should have
        # one single geometry type whenever possible:
        # - 3D and 2D shapes cannot coexist in inferred schema
        # - Shape and MultiShape can not coexist in inferred schema
        geom_types = _geometry_types_back_compat(df)

    if len(geom_types) == 0:
        # Default geometry type supported by Fiona
        # (Since https://github.com/Toblerity/Fiona/issues/446 resolution)
        return "Unknown"

    if len(geom_types) == 1:
        geom_types = geom_types[0]

    return geom_types


def _geometry_types_back_compat(df):
    """
    for backward compatibility with Fiona<1.8 only
    """
    unique_geom_types = df.geometry.geom_type.unique()
    unique_geom_types = [gtype for gtype in unique_geom_types if gtype is not None]

    # merge single and Multi types (eg Polygon and MultiPolygon)
    unique_geom_types = [
        gtype
        for gtype in unique_geom_types
        if not gtype.startswith("Multi") or gtype[5:] not in unique_geom_types
    ]

    if df.geometry.has_z.any():
        # declare all geometries as 3D geometries
        unique_geom_types = ["3D " + type for type in unique_geom_types]
    # by default, all geometries are 2D geometries

    return unique_geom_types