1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
|
import time
from collections import defaultdict
import pandas as pd
from shapely.geometry import Point
import geopandas
def _get_throttle_time(provider):
"""
Amount of time to wait between requests to a geocoding API, for providers
that specify rate limits in their terms of service.
"""
import geopy.geocoders
# https://operations.osmfoundation.org/policies/nominatim/
if provider == geopy.geocoders.Nominatim:
return 1
else:
return 0
def geocode(strings, provider=None, **kwargs):
"""
Geocode a set of strings and get a GeoDataFrame of the resulting points.
Parameters
----------
strings : list or Series of addresses to geocode
provider : str or geopy.geocoder
Specifies geocoding service to use. If none is provided,
will use 'photon' (see the Photon's terms of service at:
https://photon.komoot.io).
Either the string name used by geopy (as specified in
geopy.geocoders.SERVICE_TO_GEOCODER) or a geopy Geocoder instance
(e.g., geopy.geocoders.Photon) may be used.
Some providers require additional arguments such as access keys
See each geocoder's specific parameters in geopy.geocoders
Notes
-----
Ensure proper use of the results by consulting the Terms of Service for
your provider.
Geocoding requires geopy. Install it using 'pip install geopy'. See also
https://github.com/geopy/geopy
Examples
--------
>>> df = geopandas.tools.geocode( # doctest: +SKIP
... ["boston, ma", "1600 pennsylvania ave. washington, dc"]
... )
>>> df # doctest: +SKIP
geometry address
0 POINT (-71.05863 42.35899) Boston, MA, United States
1 POINT (-77.03651 38.89766) 1600 Pennsylvania Ave NW, Washington, DC 20006...
"""
if provider is None:
provider = "photon"
throttle_time = _get_throttle_time(provider)
return _query(strings, True, provider, throttle_time, **kwargs)
def reverse_geocode(points, provider=None, **kwargs):
"""
Reverse geocode a set of points and get a GeoDataFrame of the resulting
addresses.
The points
Parameters
----------
points : list or Series of Shapely Point objects.
x coordinate is longitude
y coordinate is latitude
provider : str or geopy.geocoder (opt)
Specifies geocoding service to use. If none is provided,
will use 'photon' (see the Photon's terms of service at:
https://photon.komoot.io).
Either the string name used by geopy (as specified in
geopy.geocoders.SERVICE_TO_GEOCODER) or a geopy Geocoder instance
(e.g., geopy.geocoders.Photon) may be used.
Some providers require additional arguments such as access keys
See each geocoder's specific parameters in geopy.geocoders
Notes
-----
Ensure proper use of the results by consulting the Terms of Service for
your provider.
Reverse geocoding requires geopy. Install it using 'pip install geopy'.
See also https://github.com/geopy/geopy
Examples
--------
>>> from shapely.geometry import Point
>>> df = geopandas.tools.reverse_geocode( # doctest: +SKIP
... [Point(-71.0594869, 42.3584697), Point(-77.0365305, 38.8977332)]
... )
>>> df # doctest: +SKIP
geometry address
0 POINT (-71.05941 42.35837) 29 Court Sq, Boston, MA 02108, United States
1 POINT (-77.03641 38.89766) 1600 Pennsylvania Ave NW, Washington, DC 20006...
"""
if provider is None:
provider = "photon"
throttle_time = _get_throttle_time(provider)
return _query(points, False, provider, throttle_time, **kwargs)
def _query(data, forward, provider, throttle_time, **kwargs):
# generic wrapper for calls over lists to geopy Geocoders
from geopy.geocoders import get_geocoder_for_service
from geopy.geocoders.base import GeocoderQueryError
if forward:
if not isinstance(data, pd.Series):
data = pd.Series(data)
else:
if not isinstance(data, geopandas.GeoSeries):
data = geopandas.GeoSeries(data)
if isinstance(provider, str):
provider = get_geocoder_for_service(provider)
coder = provider(**kwargs)
results = {}
for i, s in data.items():
try:
if forward:
results[i] = coder.geocode(s)
else:
results[i] = coder.reverse((s.y, s.x), exactly_one=True)
except (GeocoderQueryError, ValueError):
results[i] = (None, None)
time.sleep(throttle_time)
df = _prepare_geocode_result(results)
return df
def _prepare_geocode_result(results):
"""Convert the geocode results to a GeoDataFrame.
Takes a dict where keys are index entries, values are tuples containing:
(address, (lat, lon))
"""
# Prepare the data for the DataFrame as a dict of lists
d = defaultdict(list)
index = []
for i, s in results.items():
if s is None:
p = Point()
address = None
else:
address, loc = s
# loc is lat, lon and we want lon, lat
if loc is None:
p = Point()
else:
p = Point(loc[1], loc[0])
d["geometry"].append(p)
d["address"].append(address)
index.append(i)
df = geopandas.GeoDataFrame(d, index=index, crs="EPSG:4326")
return df
|