1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
|
import warnings
import pandas as pd
from shapely.geometry import Point
from geopandas import GeoDataFrame, GeoSeries
from geopandas._compat import HAS_PYPROJ, PANDAS_GE_21
import pytest
from geopandas.testing import assert_geodataframe_equal
from pandas.testing import assert_index_equal
class TestMerging:
def setup_method(self):
self.gseries = GeoSeries([Point(i, i) for i in range(3)])
self.series = pd.Series([1, 2, 3])
self.gdf = GeoDataFrame({"geometry": self.gseries, "values": range(3)})
self.df = pd.DataFrame({"col1": [1, 2, 3], "col2": [0.1, 0.2, 0.3]})
def _check_metadata(self, gdf, geometry_column_name="geometry", crs=None):
assert gdf._geometry_column_name == geometry_column_name
assert gdf.crs == crs
def test_merge(self):
res = self.gdf.merge(self.df, left_on="values", right_on="col1")
# check result is a GeoDataFrame
assert isinstance(res, GeoDataFrame)
# check geometry property gives GeoSeries
assert isinstance(res.geometry, GeoSeries)
# check metadata
self._check_metadata(res)
# test that crs and other geometry name are preserved
self.gdf.crs = "epsg:4326"
self.gdf = self.gdf.rename(columns={"geometry": "points"}).set_geometry(
"points"
)
res = self.gdf.merge(self.df, left_on="values", right_on="col1")
assert isinstance(res, GeoDataFrame)
assert isinstance(res.geometry, GeoSeries)
self._check_metadata(res, "points", self.gdf.crs)
def test_concat_axis0(self):
# frame
res = pd.concat([self.gdf, self.gdf])
assert res.shape == (6, 2)
assert isinstance(res, GeoDataFrame)
assert isinstance(res.geometry, GeoSeries)
self._check_metadata(res)
exp = GeoDataFrame(pd.concat([pd.DataFrame(self.gdf), pd.DataFrame(self.gdf)]))
assert_geodataframe_equal(exp, res)
# series
res = pd.concat([self.gdf.geometry, self.gdf.geometry])
assert res.shape == (6,)
assert isinstance(res, GeoSeries)
assert isinstance(res.geometry, GeoSeries)
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
def test_concat_axis0_crs(self):
# CRS not set for both GeoDataFrame
res = pd.concat([self.gdf, self.gdf])
self._check_metadata(res)
# CRS set for both GeoDataFrame, same CRS
res1 = pd.concat([self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4326")])
self._check_metadata(res1, crs="epsg:4326")
# CRS not set for one GeoDataFrame, but set for the other GeoDataFrame
with pytest.warns(
UserWarning, match=r"CRS not set for some of the concatenation inputs.*"
):
res2 = pd.concat([self.gdf, self.gdf.set_crs("epsg:4326")])
self._check_metadata(res2, crs="epsg:4326")
# CRS set for both GeoDataFrame, different CRS
with pytest.raises(
ValueError, match=r"Cannot determine common CRS for concatenation inputs.*"
):
pd.concat([self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4327")])
# CRS not set for one GeoDataFrame, but set for the other GeoDataFrames,
# same CRS
with pytest.warns(
UserWarning, match=r"CRS not set for some of the concatenation inputs.*"
):
res3 = pd.concat(
[self.gdf, self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4326")]
)
self._check_metadata(res3, crs="epsg:4326")
# CRS not set for one GeoDataFrame, but set for the other GeoDataFrames,
# different CRS
with pytest.raises(
ValueError, match=r"Cannot determine common CRS for concatenation inputs.*"
):
pd.concat(
[self.gdf, self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4327")]
)
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
def test_concat_axis0_unaligned_cols(self):
# https://github.com/geopandas/geopandas/issues/2679
gdf = self.gdf.set_crs("epsg:4326").assign(
geom=self.gdf.geometry.set_crs("epsg:4327")
)
both_geom_cols = gdf[["geom", "geometry"]]
single_geom_col = gdf[["geometry"]]
with warnings.catch_warnings():
warnings.simplefilter("error")
pd.concat([both_geom_cols, single_geom_col])
# Check order of mismatch doesn't matter
with warnings.catch_warnings():
warnings.simplefilter("error")
pd.concat([single_geom_col, both_geom_cols])
# Side effect of this fix, explicitly provided all none geoseries
# will not be warned for (ideally this would still warn)
explicit_all_none_case = gdf[["geometry"]].assign(
geom=GeoSeries([None for _ in range(len(gdf))])
)
with warnings.catch_warnings():
warnings.simplefilter("error")
pd.concat([both_geom_cols, explicit_all_none_case])
# Check concat with partially None col is not affected by the special casing
# for all None no CRS handling
with pytest.warns(
UserWarning, match=r"CRS not set for some of the concatenation inputs.*"
):
partial_none_case = self.gdf[["geometry"]].copy()
partial_none_case.iloc[0] = None
pd.concat([single_geom_col, partial_none_case])
def test_concat_axis0_crs_wkt_mismatch(self):
pyproj = pytest.importorskip("pyproj")
# https://github.com/geopandas/geopandas/issues/326#issuecomment-1727958475
wkt_template = """GEOGCRS["WGS 84",
ENSEMBLE["World Geodetic System 1984 ensemble",
MEMBER["World Geodetic System 1984 (Transit)"],
MEMBER["World Geodetic System 1984 (G730)"],
MEMBER["World Geodetic System 1984 (G873)"],
MEMBER["World Geodetic System 1984 (G1150)"],
MEMBER["World Geodetic System 1984 (G1674)"],
MEMBER["World Geodetic System 1984 (G1762)"],
MEMBER["World Geodetic System 1984 (G2139)"],
ELLIPSOID["WGS 84",6378137,298.257223563,LENGTHUNIT["metre",1]],
ENSEMBLEACCURACY[2.0]],PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],CS[ellipsoidal,2],
AXIS["geodetic latitude (Lat)",north,ORDER[1],
ANGLEUNIT["degree",0.0174532925199433]],
AXIS["geodetic longitude (Lon)",east,ORDER[2],
ANGLEUNIT["degree",0.0174532925199433]],
USAGE[SCOPE["Horizontal component of 3D system."],
AREA["World.{}"],BBOX[-90,-180,90,180]],ID["EPSG",4326]]"""
wkt_v1 = wkt_template.format("")
wkt_v2 = wkt_template.format(" ") # add additional whitespace
crs1 = pyproj.CRS.from_wkt(wkt_v1)
crs2 = pyproj.CRS.from_wkt(wkt_v2)
# pyproj crs __hash__ based on WKT strings means these are distinct in a
# set are but equal by equality
assert len({crs1, crs2}) == 2
assert crs1 == crs2
expected = pd.concat([self.gdf, self.gdf]).set_crs(crs1)
res = pd.concat([self.gdf.set_crs(crs1), self.gdf.set_crs(crs2)])
assert_geodataframe_equal(expected, res)
def test_concat_axis1(self):
res = pd.concat([self.gdf, self.df], axis=1)
assert res.shape == (3, 4)
assert isinstance(res, GeoDataFrame)
assert isinstance(res.geometry, GeoSeries)
self._check_metadata(res)
def test_concat_axis1_multiple_geodataframes(self):
# https://github.com/geopandas/geopandas/issues/1230
# Expect that concat should fail gracefully if duplicate column names belonging
# to geometry columns are introduced.
if PANDAS_GE_21:
# _constructor_from_mgr changes mean we now get the concat specific error
# message in this case too
expected_err = (
"Concat operation has resulted in multiple columns using the geometry "
"column name 'geometry'."
)
else:
expected_err = (
"GeoDataFrame does not support multiple columns using the geometry"
" column name 'geometry'"
)
with pytest.raises(ValueError, match=expected_err):
pd.concat([self.gdf, self.gdf], axis=1)
# Check case is handled if custom geometry column name is used
df2 = self.gdf.rename_geometry("geom")
expected_err2 = (
"Concat operation has resulted in multiple columns using the geometry "
"column name 'geom'."
)
with pytest.raises(ValueError, match=expected_err2):
pd.concat([df2, df2], axis=1)
if HAS_PYPROJ:
# Check that two geometry columns is fine, if they have different names
res3 = pd.concat([df2.set_crs("epsg:4326"), self.gdf], axis=1)
# check metadata comes from first df
self._check_metadata(res3, geometry_column_name="geom", crs="epsg:4326")
@pytest.mark.filterwarnings("ignore:Accessing CRS")
def test_concat_axis1_geoseries(self):
gseries2 = GeoSeries([Point(i, i) for i in range(3, 6)], crs="epsg:4326")
result = pd.concat([gseries2, self.gseries], axis=1)
# Note this is not consistent with concat([gdf, gdf], axis=1) where the
# left metadata is set on the result. This is deliberate for now.
assert type(result) is GeoDataFrame
assert result._geometry_column_name is None
assert_index_equal(pd.Index([0, 1]), result.columns)
gseries2.name = "foo"
result2 = pd.concat([gseries2, self.gseries], axis=1)
assert type(result2) is GeoDataFrame
assert result._geometry_column_name is None
assert_index_equal(pd.Index(["foo", 0]), result2.columns)
|