1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
|
import json
import re
from time import sleep
import requests
from bs4 import BeautifulSoup
def split_bank_name(s):
# The patterns that might suggest the start of the short name.
patterns = [
r"IN FORMA ABBREVIATA",
r"IN BREVE",
r"IN SIGLA",
r"ABBR\.?",
r"O IN FORMA ABBREVIATA",
r"OVVERO",
]
for pattern in patterns:
# Special case for the OVVERO pattern
if pattern == r"OVVERO":
match = re.search(rf"{pattern} ([^\(]*?) O", s)
else:
match = re.search(rf"{pattern} (.*?)(?=\s*(\(|,|$))", s)
if match:
# Short name is found in the match.
short_name = match.group(1).strip().rstrip(")")
# Full name is everything before the match.
full_name = s[: match.start()]
# Further process full_name to remove trailing keywords and extra spaces.
for possible_end in ["O", pattern]:
full_name = re.sub(rf"\s*{possible_end}\s*$", "", full_name).rstrip(" (")
if "OVVERO" in short_name:
short_name = short_name.split("OVVERO")[0].strip()
return full_name, short_name
# If no patterns match and the string contains a parenthesis, split at the first parenthesis.
if "(" in s:
return s.split("(", 1)[0].strip(), None
# If no patterns match and no parenthesis, just return the original string and None.
return s, None
def runtime_test_split_bank_name():
"""Run tests against split_bank_name function.
In case there is pattern which is not covered by the function, add it to the patterns list in
the function.
"""
test_cases = [
(
"CASSA DI RISPARMIO DI FERMO S.P.A. (IN FORMA ABBREVIATA CARIFERMO S.P.A.)",
("CASSA DI RISPARMIO DI FERMO S.P.A.", "CARIFERMO S.P.A."),
),
("CAIXABANK S.A", ("CAIXABANK S.A", None)),
(
"IBL ISTITUTO BANCARIO DEL LAVORO S.P.A. (IN FORMA ABBREVIATA IBL BANCA)",
("IBL ISTITUTO BANCARIO DEL LAVORO S.P.A.", "IBL BANCA"),
),
(
"BANCA VALSABBINA SOCIETA' COOPERATIVA PER AZIONI (IN BREVE LA VALSABBINA)",
("BANCA VALSABBINA SOCIETA' COOPERATIVA PER AZIONI", "LA VALSABBINA"),
),
(
"BANCO DI BRESCIA SAN PAOLO CAB SOCIETA' PER AZIONI (ABBR. BANCO DI BRESCIA S.P.A.)",
("BANCO DI BRESCIA SAN PAOLO CAB SOCIETA' PER AZIONI", "BANCO DI BRESCIA S.P.A."),
),
(
"BANCA DI CIVIDALE SOCIETA' PER AZIONI O IN FORMA ABBREVIATA CIVIBANK S.P.A.",
("BANCA DI CIVIDALE SOCIETA' PER AZIONI", "CIVIBANK S.P.A."),
),
(
(
"MEDIOCREDITO TRENTINO-ALTO ADIGE - S.P.A. "
"(IN LINGUA TEDESCAINVESTITIONSBANK TRENTINO-SUDTIROL - A.G.)"
),
("MEDIOCREDITO TRENTINO-ALTO ADIGE - S.P.A.", None),
),
(
(
"BANCA POPOLARE DELL'ETRURIA E DEL LAZIO - SOCIETA' COOPERATIVA "
"(IN BREVE BANCAETRURIA SOCIETA' COOPERATIVA)"
),
(
"BANCA POPOLARE DELL'ETRURIA E DEL LAZIO - SOCIETA' COOPERATIVA",
"BANCAETRURIA SOCIETA' COOPERATIVA",
),
),
(
(
"CASSA DI RISPARMIO DI ASTI S.P.A. "
"(IN FORMA ABBREVIATA BANCA C.R. ASTI S.P.A.), Filiale di Treviso"
),
("CASSA DI RISPARMIO DI ASTI S.P.A.", "BANCA C.R. ASTI S.P.A."),
),
(
(
"BANCA PICCOLO CREDITO VALTELLINESE, SOCIETA' COOPERATIVA "
"(OVVERO CREDITO VALTELLINESE S.C. O SOLO CREDITO VALTELLINESE)"
),
(
"BANCA PICCOLO CREDITO VALTELLINESE, SOCIETA' COOPERATIVA",
"CREDITO VALTELLINESE S.C.",
),
),
(
(
"BANCA DI CREDITO COOPERATIVO - BANCA DI SIRACUSA "
"IN SIGLA BCC BANCA DI SIRACUSA - SOCIETA' COOPERATIVA"
),
(
"BANCA DI CREDITO COOPERATIVO - BANCA DI SIRACUSA",
"BCC BANCA DI SIRACUSA - SOCIETA' COOPERATIVA",
),
),
]
for input_str, expected_output in test_cases:
assert split_bank_name(input_str) == expected_output
print("test: split_bank_name passed")
def get_banks_registry_data_from_bank_name(bank_name):
sleep(1) # prevent server DoSing
url = "https://www.ibancalculator.com/blz.html"
data = {
"tx_blz_pi1[country]": "IT",
"tx_blz_pi1[searchterms]": bank_name,
"tx_blz_pi1[bankcode]": "",
"tx_blz_pi1[fi]": "fi",
"no_cache": 1,
"Action": "Search",
}
response = requests.post(url, data=data)
soup = BeautifulSoup(response.content, "html.parser")
results_tables = soup.select(".table")
if results_tables:
for row in results_tables[0].select("tr")[1:]:
bank_code = row.select("td")[3].text
bic = row.select("td")[2].text
if bank_code and bic and row.select("td")[0].text == "IT":
bank_name, bank_name_short = split_bank_name(row.select("td")[1].text)
yield {
"country_code": "IT",
"primary": True,
"bic": str(bic).split(",")[0],
"bank_code": str(int(bank_code)).zfill(5),
"name": bank_name,
"short_name": bank_name_short or bank_name,
}
def get_italian_bank_names():
base_url = "https://infostat.bancaditalia.it/GIAVAInquiry-public/ng/"
session = requests.Session()
session.headers.update(
{
"Accept": "application/json, text/plain, */*",
"Content-Type": "application/json",
"Referer": base_url,
"Origin": "https://infostat.bancaditalia.it",
}
)
# Login requests, obtains jwt token and sets cookies required for subsequent requests
print("Logging in...")
session.get(base_url, allow_redirects=True)
session.post(f"{base_url}api/getElements?domainId=INQ_INT_ALBI_SUB1")
# Get banks
print("Getting banks...")
response = session.post(
f"{base_url}api/searchAllIntermediaries",
data=json.dumps(
{
"searchElement": {
"intermediaryBoards": [
{
"boardType": {
"code": "001",
"description": "ALBO DELLE BANCHE",
"type": None,
"startDate": "1936-12-31",
"endDate": "9999-12-31",
},
"inscriptionProtocol": "",
}
],
"establishmentDate": "2023-08-24",
},
"endIndex": 30,
"startIndex": 0,
"rowCount": 30,
"searchOrderItems": [
{
"columnIndex": 1,
"insertedIndexColumn": 1,
"dataField": "abiCode",
"descending": False,
}
],
}
),
allow_redirects=True,
)
response.raise_for_status()
return [x["name"] for x in response.json()]
if __name__ == "__main__":
runtime_test_split_bank_name()
bank_names = sorted(set(get_italian_bank_names()))
bic_to_bank = {}
for i, bank_name in enumerate(bank_names):
print(f"{i}/{len(bank_names)}", "- ", bank_name)
banks = get_banks_registry_data_from_bank_name(bank_name)
for bank in banks:
bic_to_bank[bank["bic"]] = bank
with open("schwifty/bank_registry/generated_it.json", "w") as fp:
json.dump(list(bic_to_bank.values()), fp, indent=2)
|