1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
|
from collections import defaultdict
import time
import numpy as np
import pandas as pd
from shapely.geometry import Point
import geopandas
def _get_throttle_time(provider):
"""
Amount of time to wait between requests to a geocoding API, for providers
that specify rate limits in their terms of service.
"""
import geopy.geocoders
# https://operations.osmfoundation.org/policies/nominatim/
if provider == geopy.geocoders.Nominatim:
return 1
else:
return 0
def geocode(strings, provider=None, **kwargs):
"""
Geocode a set of strings and get a GeoDataFrame of the resulting points.
Parameters
----------
strings : list or Series of addresses to geocode
provider : str or geopy.geocoder
Specifies geocoding service to use. If none is provided,
will use 'geocodefarm' with a rate limit applied (see the geocodefarm
terms of service at:
https://geocode.farm/geocoding/free-api-documentation/ ).
Either the string name used by geopy (as specified in
geopy.geocoders.SERVICE_TO_GEOCODER) or a geopy Geocoder instance
(e.g., geopy.geocoders.GeocodeFarm) may be used.
Some providers require additional arguments such as access keys
See each geocoder's specific parameters in geopy.geocoders
Notes
-----
Ensure proper use of the results by consulting the Terms of Service for
your provider.
Geocoding requires geopy. Install it using 'pip install geopy'. See also
https://github.com/geopy/geopy
Examples
--------
>>> df = geocode(['boston, ma', '1600 pennsylvania ave. washington, dc'])
>>> df
address \\
0 Boston, MA, USA
1 1600 Pennsylvania Avenue Northwest, President'...
geometry
0 POINT (-71.0597732 42.3584308)
1 POINT (-77.0365305 38.8977332)
"""
if provider is None:
# https://geocode.farm/geocoding/free-api-documentation/
provider = "geocodefarm"
throttle_time = 0.25
else:
throttle_time = _get_throttle_time(provider)
return _query(strings, True, provider, throttle_time, **kwargs)
def reverse_geocode(points, provider=None, **kwargs):
"""
Reverse geocode a set of points and get a GeoDataFrame of the resulting
addresses.
The points
Parameters
----------
points : list or Series of Shapely Point objects.
x coordinate is longitude
y coordinate is latitude
provider : str or geopy.geocoder (opt)
Specifies geocoding service to use. If none is provided,
will use 'geocodefarm' with a rate limit applied (see the geocodefarm
terms of service at:
https://geocode.farm/geocoding/free-api-documentation/ ).
Either the string name used by geopy (as specified in
geopy.geocoders.SERVICE_TO_GEOCODER) or a geopy Geocoder instance
(e.g., geopy.geocoders.GeocodeFarm) may be used.
Some providers require additional arguments such as access keys
See each geocoder's specific parameters in geopy.geocoders
Notes
-----
Ensure proper use of the results by consulting the Terms of Service for
your provider.
Reverse geocoding requires geopy. Install it using 'pip install geopy'.
See also https://github.com/geopy/geopy
Examples
--------
>>> df = reverse_geocode([Point(-71.0594869, 42.3584697),
Point(-77.0365305, 38.8977332)])
>>> df
address \\
0 29 Court Square, Boston, MA 02108, USA
1 1600 Pennsylvania Avenue Northwest, President'...
geometry
0 POINT (-71.0594869 42.3584697)
1 POINT (-77.0365305 38.8977332)
"""
if provider is None:
# https://geocode.farm/geocoding/free-api-documentation/
provider = "geocodefarm"
throttle_time = 0.25
else:
throttle_time = _get_throttle_time(provider)
return _query(points, False, provider, throttle_time, **kwargs)
def _query(data, forward, provider, throttle_time, **kwargs):
# generic wrapper for calls over lists to geopy Geocoders
from geopy.geocoders.base import GeocoderQueryError
from geopy.geocoders import get_geocoder_for_service
if not isinstance(data, pd.Series):
data = pd.Series(data)
if isinstance(provider, str):
provider = get_geocoder_for_service(provider)
coder = provider(**kwargs)
results = {}
for i, s in data.items():
try:
if forward:
results[i] = coder.geocode(s)
else:
results[i] = coder.reverse((s.y, s.x), exactly_one=True)
except (GeocoderQueryError, ValueError):
results[i] = (None, None)
time.sleep(throttle_time)
df = _prepare_geocode_result(results)
return df
def _prepare_geocode_result(results):
"""
Helper function for the geocode function
Takes a dict where keys are index entries, values are tuples containing:
(address, (lat, lon))
"""
# Prepare the data for the DataFrame as a dict of lists
d = defaultdict(list)
index = []
for i, s in results.items():
address, loc = s
# loc is lat, lon and we want lon, lat
if loc is None:
p = Point()
else:
p = Point(loc[1], loc[0])
if address is None:
address = np.nan
d["geometry"].append(p)
d["address"].append(address)
index.append(i)
df = geopandas.GeoDataFrame(d, index=index, crs="EPSG:4326")
return df
|