1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
|
import json
from urllib.parse import urljoin
import requests
from bs4 import BeautifulSoup
BASE_URLS = [
"https://www.iban.es/bancos/",
"https://www.iban.es/bancos-extranjeros/",
"https://www.iban.es/cajas/",
]
titles = {
"Código de entidad (código de banco)": "bank_code",
"Código BIC Banco (Codigo SWIFT)": "bic",
"Denominación legal del Banco": "name",
"Nombre Comercial (abreviado)": "short_name",
}
def get_bank_details(url):
soup = BeautifulSoup(requests.get(url).content, "html.parser")
rows = soup.select("div.about-content-text table tr")
record = {"country_code": "ES", "primary": True}
for row in rows:
cells = row.find_all("td")
if len(cells) != 2:
continue
title, value = cells[0].text.strip(), cells[1].text.strip()
key = titles.get(title)
if key:
record[key] = value
return record
def process():
result = []
for url in BASE_URLS:
soup = BeautifulSoup(requests.get(url).content, "html.parser")
paths = [str(a["href"]) for a in soup.select("h6.portfolio-title a")]
print(f"Fetched {len(paths)} bank records")
result.extend([get_bank_details(urljoin(url, path)) for path in paths])
return result
if __name__ == "__main__":
with open("schwifty/bank_registry/generated_es.json", "w") as fp:
json.dump(process(), fp, indent=2)
|