File: test_merge.py

package info (click to toggle)
python-geopandas 1.1.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 14,752 kB
  • sloc: python: 26,021; makefile: 147; sh: 25
file content (230 lines) | stat: -rw-r--r-- 9,908 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
import warnings

import pandas as pd

from shapely.geometry import Point

from geopandas import GeoDataFrame, GeoSeries
from geopandas._compat import HAS_PYPROJ, PANDAS_GE_21

import pytest
from geopandas.testing import assert_geodataframe_equal
from pandas.testing import assert_index_equal


class TestMerging:
    def setup_method(self):
        self.gseries = GeoSeries([Point(i, i) for i in range(3)])
        self.series = pd.Series([1, 2, 3])
        self.gdf = GeoDataFrame({"geometry": self.gseries, "values": range(3)})
        self.df = pd.DataFrame({"col1": [1, 2, 3], "col2": [0.1, 0.2, 0.3]})

    def _check_metadata(self, gdf, geometry_column_name="geometry", crs=None):
        assert gdf._geometry_column_name == geometry_column_name
        assert gdf.crs == crs

    def test_merge(self):
        res = self.gdf.merge(self.df, left_on="values", right_on="col1")

        # check result is a GeoDataFrame
        assert isinstance(res, GeoDataFrame)

        # check geometry property gives GeoSeries
        assert isinstance(res.geometry, GeoSeries)

        # check metadata
        self._check_metadata(res)

        # test that crs and other geometry name are preserved
        self.gdf.crs = "epsg:4326"
        self.gdf = self.gdf.rename(columns={"geometry": "points"}).set_geometry(
            "points"
        )
        res = self.gdf.merge(self.df, left_on="values", right_on="col1")
        assert isinstance(res, GeoDataFrame)
        assert isinstance(res.geometry, GeoSeries)
        self._check_metadata(res, "points", self.gdf.crs)

    def test_concat_axis0(self):
        # frame
        res = pd.concat([self.gdf, self.gdf])
        assert res.shape == (6, 2)
        assert isinstance(res, GeoDataFrame)
        assert isinstance(res.geometry, GeoSeries)
        self._check_metadata(res)
        exp = GeoDataFrame(pd.concat([pd.DataFrame(self.gdf), pd.DataFrame(self.gdf)]))
        assert_geodataframe_equal(exp, res)

        # series
        res = pd.concat([self.gdf.geometry, self.gdf.geometry])
        assert res.shape == (6,)
        assert isinstance(res, GeoSeries)
        assert isinstance(res.geometry, GeoSeries)

    @pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
    def test_concat_axis0_crs(self):
        # CRS not set for both GeoDataFrame
        res = pd.concat([self.gdf, self.gdf])
        self._check_metadata(res)

        # CRS set for both GeoDataFrame, same CRS
        res1 = pd.concat([self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4326")])
        self._check_metadata(res1, crs="epsg:4326")

        # CRS not set for one GeoDataFrame, but set for the other GeoDataFrame
        with pytest.warns(
            UserWarning, match=r"CRS not set for some of the concatenation inputs.*"
        ):
            res2 = pd.concat([self.gdf, self.gdf.set_crs("epsg:4326")])
            self._check_metadata(res2, crs="epsg:4326")

        # CRS set for both GeoDataFrame, different CRS
        with pytest.raises(
            ValueError, match=r"Cannot determine common CRS for concatenation inputs.*"
        ):
            pd.concat([self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4327")])

        # CRS not set for one GeoDataFrame, but set for the other GeoDataFrames,
        # same CRS
        with pytest.warns(
            UserWarning, match=r"CRS not set for some of the concatenation inputs.*"
        ):
            res3 = pd.concat(
                [self.gdf, self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4326")]
            )
            self._check_metadata(res3, crs="epsg:4326")

        # CRS not set for one GeoDataFrame, but set for the other GeoDataFrames,
        # different CRS
        with pytest.raises(
            ValueError, match=r"Cannot determine common CRS for concatenation inputs.*"
        ):
            pd.concat(
                [self.gdf, self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4327")]
            )

    @pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
    def test_concat_axis0_unaligned_cols(self):
        # https://github.com/geopandas/geopandas/issues/2679
        gdf = self.gdf.set_crs("epsg:4326").assign(
            geom=self.gdf.geometry.set_crs("epsg:4327")
        )
        both_geom_cols = gdf[["geom", "geometry"]]
        single_geom_col = gdf[["geometry"]]
        with warnings.catch_warnings():
            warnings.simplefilter("error")
            pd.concat([both_geom_cols, single_geom_col])
        # Check order of mismatch doesn't matter
        with warnings.catch_warnings():
            warnings.simplefilter("error")
            pd.concat([single_geom_col, both_geom_cols])

        # Side effect of this fix, explicitly provided all none geoseries
        # will not be warned for (ideally this would still warn)
        explicit_all_none_case = gdf[["geometry"]].assign(
            geom=GeoSeries([None for _ in range(len(gdf))])
        )
        with warnings.catch_warnings():
            warnings.simplefilter("error")
            pd.concat([both_geom_cols, explicit_all_none_case])

        # Check concat with partially None col is not affected by the special casing
        # for all None no CRS handling
        with pytest.warns(
            UserWarning, match=r"CRS not set for some of the concatenation inputs.*"
        ):
            partial_none_case = self.gdf[["geometry"]].copy()
            partial_none_case.iloc[0] = None
            pd.concat([single_geom_col, partial_none_case])

    def test_concat_axis0_crs_wkt_mismatch(self):
        pyproj = pytest.importorskip("pyproj")

        # https://github.com/geopandas/geopandas/issues/326#issuecomment-1727958475
        wkt_template = """GEOGCRS["WGS 84",
        ENSEMBLE["World Geodetic System 1984 ensemble",
        MEMBER["World Geodetic System 1984 (Transit)"],
        MEMBER["World Geodetic System 1984 (G730)"],
        MEMBER["World Geodetic System 1984 (G873)"],
        MEMBER["World Geodetic System 1984 (G1150)"],
        MEMBER["World Geodetic System 1984 (G1674)"],
        MEMBER["World Geodetic System 1984 (G1762)"],
        MEMBER["World Geodetic System 1984 (G2139)"],
        ELLIPSOID["WGS 84",6378137,298.257223563,LENGTHUNIT["metre",1]],
        ENSEMBLEACCURACY[2.0]],PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],CS[ellipsoidal,2],
        AXIS["geodetic latitude (Lat)",north,ORDER[1],
        ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["geodetic longitude (Lon)",east,ORDER[2],
        ANGLEUNIT["degree",0.0174532925199433]],
        USAGE[SCOPE["Horizontal component of 3D system."],
        AREA["World.{}"],BBOX[-90,-180,90,180]],ID["EPSG",4326]]"""
        wkt_v1 = wkt_template.format("")
        wkt_v2 = wkt_template.format(" ")  # add additional whitespace
        crs1 = pyproj.CRS.from_wkt(wkt_v1)
        crs2 = pyproj.CRS.from_wkt(wkt_v2)
        # pyproj crs __hash__ based on WKT strings means these are distinct in a
        # set are but equal by equality
        assert len({crs1, crs2}) == 2
        assert crs1 == crs2
        expected = pd.concat([self.gdf, self.gdf]).set_crs(crs1)
        res = pd.concat([self.gdf.set_crs(crs1), self.gdf.set_crs(crs2)])
        assert_geodataframe_equal(expected, res)

    def test_concat_axis1(self):
        res = pd.concat([self.gdf, self.df], axis=1)

        assert res.shape == (3, 4)
        assert isinstance(res, GeoDataFrame)
        assert isinstance(res.geometry, GeoSeries)
        self._check_metadata(res)

    def test_concat_axis1_multiple_geodataframes(self):
        # https://github.com/geopandas/geopandas/issues/1230
        # Expect that concat should fail gracefully if duplicate column names belonging
        # to geometry columns are introduced.
        if PANDAS_GE_21:
            # _constructor_from_mgr changes mean we now get the concat specific error
            # message in this case too
            expected_err = (
                "Concat operation has resulted in multiple columns using the geometry "
                "column name 'geometry'."
            )
        else:
            expected_err = (
                "GeoDataFrame does not support multiple columns using the geometry"
                " column name 'geometry'"
            )
        with pytest.raises(ValueError, match=expected_err):
            pd.concat([self.gdf, self.gdf], axis=1)

        # Check case is handled if custom geometry column name is used
        df2 = self.gdf.rename_geometry("geom")
        expected_err2 = (
            "Concat operation has resulted in multiple columns using the geometry "
            "column name 'geom'."
        )
        with pytest.raises(ValueError, match=expected_err2):
            pd.concat([df2, df2], axis=1)

        if HAS_PYPROJ:
            # Check that two geometry columns is fine, if they have different names
            res3 = pd.concat([df2.set_crs("epsg:4326"), self.gdf], axis=1)
            # check metadata comes from first df
            self._check_metadata(res3, geometry_column_name="geom", crs="epsg:4326")

    @pytest.mark.filterwarnings("ignore:Accessing CRS")
    def test_concat_axis1_geoseries(self):
        gseries2 = GeoSeries([Point(i, i) for i in range(3, 6)], crs="epsg:4326")
        result = pd.concat([gseries2, self.gseries], axis=1)
        # Note this is not consistent with concat([gdf, gdf], axis=1) where the
        # left metadata is set on the result. This is deliberate for now.
        assert type(result) is GeoDataFrame
        assert result._geometry_column_name is None
        assert_index_equal(pd.Index([0, 1]), result.columns)

        gseries2.name = "foo"
        result2 = pd.concat([gseries2, self.gseries], axis=1)
        assert type(result2) is GeoDataFrame
        assert result._geometry_column_name is None
        assert_index_equal(pd.Index(["foo", 0]), result2.columns)