File: sindex.py

package info (click to toggle)
python-geopandas 1.1.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 14,752 kB
  • sloc: python: 26,021; makefile: 147; sh: 25
file content (102 lines) | stat: -rw-r--r-- 3,449 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from shapely.geometry import Point
from geopandas.tests.util import _NATURALEARTH_LOWRES
from geopandas.tests.util import _NATURALEARTH_CITIES
from geopandas import read_file, GeoSeries


# Derive list of valid query predicates based on underlying index backend;
# we have to create a non-empty instance of the index to get these
index = GeoSeries([Point(0, 0)]).sindex
predicates = sorted(p for p in index.valid_query_predicates if p is not None)

geom_types = ("mixed", "points", "polygons")


def generate_test_df():
    world = read_file(_NATURALEARTH_LOWRES)
    capitals = read_file(_NATURALEARTH_CITIES)
    countries = world.to_crs("epsg:3395")[["geometry"]]
    capitals = capitals.to_crs("epsg:3395")[["geometry"]]
    mixed = capitals.append(countries)  # get a mix of geometries
    points = capitals
    polygons = countries
    # filter out invalid geometries
    data = {
        "mixed": mixed[mixed.is_valid],
        "points": points[points.is_valid],
        "polygons": polygons[polygons.is_valid],
    }
    # ensure index is pre-generated
    for data_type in data.keys():
        data[data_type].sindex.query(data[data_type].geometry.values[0])
    return data


class BenchIntersection:
    param_names = ["input_geom_type", "tree_geom_type"]
    params = [
        geom_types,
        geom_types,
    ]

    def setup(self, *args):
        self.data = generate_test_df()
        # cache bounds so that bound creation is not counted in benchmarks
        self.bounds = {
            data_type: [g.bounds for g in self.data[data_type].geometry]
            for data_type in self.data.keys()
        }

    def time_intersects(self, input_geom_type, tree_geom_type):
        tree = self.data[tree_geom_type].sindex
        for bounds in self.bounds[input_geom_type]:
            tree.intersection(bounds)


class BenchIndexCreation:
    param_names = ["tree_geom_type"]
    params = [
        geom_types,
    ]

    def setup(self, *args):
        self.data = generate_test_df()

    def time_index_creation(self, tree_geom_type):
        """Time creation of spatial index.

        Note: requires running a single query to ensure that
        lazy-building indexes are actually built.
        """
        # Note: the GeoDataFram._sindex_generated attribute will
        # be removed by GH#1444 but is kept here (in the benchmarks
        # so that we can compare pre GH#1444 to post GH#1444 if needed
        self.data[tree_geom_type]._sindex_generated = None
        self.data[tree_geom_type].geometry.values._sindex = None
        tree = self.data[tree_geom_type].sindex
        # also do a single query to ensure the index is actually
        # generated and used
        tree.query(self.data[tree_geom_type].geometry.values[0])


class BenchQuery:
    param_names = ["predicate", "input_geom_type", "tree_geom_type"]
    params = [
        predicates,
        geom_types,
        geom_types,
    ]

    def setup(self, *args):
        self.data = generate_test_df()

    def time_query_bulk(self, predicate, input_geom_type, tree_geom_type):
        self.data[tree_geom_type].sindex.query(
            self.data[input_geom_type].geometry.values,
            predicate=predicate,
        )

    def time_query(self, predicate, input_geom_type, tree_geom_type):
        tree = self.data[tree_geom_type].sindex
        for geom in self.data[input_geom_type].geometry.values:
            tree.query(geom, predicate=predicate)