1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
|
from shapely.geometry import Point
from geopandas.tests.util import _NATURALEARTH_LOWRES
from geopandas.tests.util import _NATURALEARTH_CITIES
from geopandas import read_file, GeoSeries
# Derive list of valid query predicates based on underlying index backend;
# we have to create a non-empty instance of the index to get these
index = GeoSeries([Point(0, 0)]).sindex
predicates = sorted(p for p in index.valid_query_predicates if p is not None)
geom_types = ("mixed", "points", "polygons")
def generate_test_df():
world = read_file(_NATURALEARTH_LOWRES)
capitals = read_file(_NATURALEARTH_CITIES)
countries = world.to_crs("epsg:3395")[["geometry"]]
capitals = capitals.to_crs("epsg:3395")[["geometry"]]
mixed = capitals.append(countries) # get a mix of geometries
points = capitals
polygons = countries
# filter out invalid geometries
data = {
"mixed": mixed[mixed.is_valid],
"points": points[points.is_valid],
"polygons": polygons[polygons.is_valid],
}
# ensure index is pre-generated
for data_type in data.keys():
data[data_type].sindex.query(data[data_type].geometry.values[0])
return data
class BenchIntersection:
param_names = ["input_geom_type", "tree_geom_type"]
params = [
geom_types,
geom_types,
]
def setup(self, *args):
self.data = generate_test_df()
# cache bounds so that bound creation is not counted in benchmarks
self.bounds = {
data_type: [g.bounds for g in self.data[data_type].geometry]
for data_type in self.data.keys()
}
def time_intersects(self, input_geom_type, tree_geom_type):
tree = self.data[tree_geom_type].sindex
for bounds in self.bounds[input_geom_type]:
tree.intersection(bounds)
class BenchIndexCreation:
param_names = ["tree_geom_type"]
params = [
geom_types,
]
def setup(self, *args):
self.data = generate_test_df()
def time_index_creation(self, tree_geom_type):
"""Time creation of spatial index.
Note: requires running a single query to ensure that
lazy-building indexes are actually built.
"""
# Note: the GeoDataFram._sindex_generated attribute will
# be removed by GH#1444 but is kept here (in the benchmarks
# so that we can compare pre GH#1444 to post GH#1444 if needed
self.data[tree_geom_type]._sindex_generated = None
self.data[tree_geom_type].geometry.values._sindex = None
tree = self.data[tree_geom_type].sindex
# also do a single query to ensure the index is actually
# generated and used
tree.query(self.data[tree_geom_type].geometry.values[0])
class BenchQuery:
param_names = ["predicate", "input_geom_type", "tree_geom_type"]
params = [
predicates,
geom_types,
geom_types,
]
def setup(self, *args):
self.data = generate_test_df()
def time_query_bulk(self, predicate, input_geom_type, tree_geom_type):
self.data[tree_geom_type].sindex.query(
self.data[input_geom_type].geometry.values,
predicate=predicate,
)
def time_query(self, predicate, input_geom_type, tree_geom_type):
tree = self.data[tree_geom_type].sindex
for geom in self.data[input_geom_type].geometry.values:
tree.query(geom, predicate=predicate)
|