File: get_bank_registry_it.py

package info (click to toggle)
python-schwifty 2024.09.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 6,072 kB
  • sloc: python: 3,057; makefile: 209; sh: 9
file content (230 lines) | stat: -rw-r--r-- 8,112 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
import json
import re
from time import sleep

import requests
from bs4 import BeautifulSoup


def split_bank_name(s):
    # The patterns that might suggest the start of the short name.
    patterns = [
        r"IN FORMA ABBREVIATA",
        r"IN BREVE",
        r"IN SIGLA",
        r"ABBR\.?",
        r"O IN FORMA ABBREVIATA",
        r"OVVERO",
    ]

    for pattern in patterns:
        # Special case for the OVVERO pattern
        if pattern == r"OVVERO":
            match = re.search(rf"{pattern} ([^\(]*?) O", s)
        else:
            match = re.search(rf"{pattern} (.*?)(?=\s*(\(|,|$))", s)

        if match:
            # Short name is found in the match.
            short_name = match.group(1).strip().rstrip(")")
            # Full name is everything before the match.
            full_name = s[: match.start()]
            # Further process full_name to remove trailing keywords and extra spaces.
            for possible_end in ["O", pattern]:
                full_name = re.sub(rf"\s*{possible_end}\s*$", "", full_name).rstrip(" (")
            if "OVVERO" in short_name:
                short_name = short_name.split("OVVERO")[0].strip()
            return full_name, short_name

    # If no patterns match and the string contains a parenthesis, split at the first parenthesis.
    if "(" in s:
        return s.split("(", 1)[0].strip(), None

    # If no patterns match and no parenthesis, just return the original string and None.
    return s, None


def runtime_test_split_bank_name():
    """Run tests against split_bank_name function.

    In case there is pattern which is not covered by the function, add it to the patterns list in
    the function.
    """
    test_cases = [
        (
            "CASSA DI RISPARMIO DI FERMO S.P.A. (IN FORMA ABBREVIATA CARIFERMO S.P.A.)",
            ("CASSA DI RISPARMIO DI FERMO S.P.A.", "CARIFERMO S.P.A."),
        ),
        ("CAIXABANK S.A", ("CAIXABANK S.A", None)),
        (
            "IBL ISTITUTO BANCARIO DEL LAVORO S.P.A. (IN FORMA ABBREVIATA IBL BANCA)",
            ("IBL ISTITUTO BANCARIO DEL LAVORO S.P.A.", "IBL BANCA"),
        ),
        (
            "BANCA VALSABBINA SOCIETA' COOPERATIVA PER AZIONI (IN BREVE LA VALSABBINA)",
            ("BANCA VALSABBINA SOCIETA' COOPERATIVA PER AZIONI", "LA VALSABBINA"),
        ),
        (
            "BANCO DI BRESCIA SAN PAOLO CAB SOCIETA' PER AZIONI (ABBR. BANCO DI BRESCIA S.P.A.)",
            ("BANCO DI BRESCIA SAN PAOLO CAB SOCIETA' PER AZIONI", "BANCO DI BRESCIA S.P.A."),
        ),
        (
            "BANCA DI CIVIDALE SOCIETA' PER AZIONI O IN FORMA ABBREVIATA CIVIBANK S.P.A.",
            ("BANCA DI CIVIDALE SOCIETA' PER AZIONI", "CIVIBANK S.P.A."),
        ),
        (
            (
                "MEDIOCREDITO TRENTINO-ALTO ADIGE - S.P.A. "
                "(IN LINGUA TEDESCAINVESTITIONSBANK TRENTINO-SUDTIROL - A.G.)"
            ),
            ("MEDIOCREDITO TRENTINO-ALTO ADIGE - S.P.A.", None),
        ),
        (
            (
                "BANCA POPOLARE DELL'ETRURIA E DEL LAZIO - SOCIETA' COOPERATIVA "
                "(IN BREVE BANCAETRURIA SOCIETA' COOPERATIVA)"
            ),
            (
                "BANCA POPOLARE DELL'ETRURIA E DEL LAZIO - SOCIETA' COOPERATIVA",
                "BANCAETRURIA SOCIETA' COOPERATIVA",
            ),
        ),
        (
            (
                "CASSA DI RISPARMIO DI ASTI S.P.A. "
                "(IN FORMA ABBREVIATA BANCA C.R. ASTI S.P.A.), Filiale di Treviso"
            ),
            ("CASSA DI RISPARMIO DI ASTI S.P.A.", "BANCA C.R. ASTI S.P.A."),
        ),
        (
            (
                "BANCA PICCOLO CREDITO VALTELLINESE, SOCIETA' COOPERATIVA "
                "(OVVERO CREDITO VALTELLINESE S.C. O SOLO CREDITO VALTELLINESE)"
            ),
            (
                "BANCA PICCOLO CREDITO VALTELLINESE, SOCIETA' COOPERATIVA",
                "CREDITO VALTELLINESE S.C.",
            ),
        ),
        (
            (
                "BANCA DI CREDITO COOPERATIVO - BANCA DI SIRACUSA "
                "IN SIGLA BCC BANCA DI SIRACUSA - SOCIETA' COOPERATIVA"
            ),
            (
                "BANCA DI CREDITO COOPERATIVO - BANCA DI SIRACUSA",
                "BCC BANCA DI SIRACUSA - SOCIETA' COOPERATIVA",
            ),
        ),
    ]

    for input_str, expected_output in test_cases:
        assert split_bank_name(input_str) == expected_output

    print("test: split_bank_name passed")


def get_banks_registry_data_from_bank_name(bank_name):
    sleep(1)  # prevent server DoSing

    url = "https://www.ibancalculator.com/blz.html"
    data = {
        "tx_blz_pi1[country]": "IT",
        "tx_blz_pi1[searchterms]": bank_name,
        "tx_blz_pi1[bankcode]": "",
        "tx_blz_pi1[fi]": "fi",
        "no_cache": 1,
        "Action": "Search",
    }

    response = requests.post(url, data=data)
    soup = BeautifulSoup(response.content, "html.parser")

    results_tables = soup.select(".table")

    if results_tables:
        for row in results_tables[0].select("tr")[1:]:
            bank_code = row.select("td")[3].text
            bic = row.select("td")[2].text
            if bank_code and bic and row.select("td")[0].text == "IT":
                bank_name, bank_name_short = split_bank_name(row.select("td")[1].text)
                yield {
                    "country_code": "IT",
                    "primary": True,
                    "bic": str(bic).split(",")[0],
                    "bank_code": str(int(bank_code)).zfill(5),
                    "name": bank_name,
                    "short_name": bank_name_short or bank_name,
                }


def get_italian_bank_names():
    base_url = "https://infostat.bancaditalia.it/GIAVAInquiry-public/ng/"
    session = requests.Session()
    session.headers.update(
        {
            "Accept": "application/json, text/plain, */*",
            "Content-Type": "application/json",
            "Referer": base_url,
            "Origin": "https://infostat.bancaditalia.it",
        }
    )

    # Login requests, obtains jwt token and sets cookies required for subsequent requests
    print("Logging in...")
    session.get(base_url, allow_redirects=True)
    session.post(f"{base_url}api/getElements?domainId=INQ_INT_ALBI_SUB1")

    # Get banks
    print("Getting banks...")
    response = session.post(
        f"{base_url}api/searchAllIntermediaries",
        data=json.dumps(
            {
                "searchElement": {
                    "intermediaryBoards": [
                        {
                            "boardType": {
                                "code": "001",
                                "description": "ALBO DELLE BANCHE",
                                "type": None,
                                "startDate": "1936-12-31",
                                "endDate": "9999-12-31",
                            },
                            "inscriptionProtocol": "",
                        }
                    ],
                    "establishmentDate": "2023-08-24",
                },
                "endIndex": 30,
                "startIndex": 0,
                "rowCount": 30,
                "searchOrderItems": [
                    {
                        "columnIndex": 1,
                        "insertedIndexColumn": 1,
                        "dataField": "abiCode",
                        "descending": False,
                    }
                ],
            }
        ),
        allow_redirects=True,
    )
    response.raise_for_status()
    return [x["name"] for x in response.json()]


if __name__ == "__main__":
    runtime_test_split_bank_name()

    bank_names = sorted(set(get_italian_bank_names()))
    bic_to_bank = {}
    for i, bank_name in enumerate(bank_names):
        print(f"{i}/{len(bank_names)}", "- ", bank_name)
        banks = get_banks_registry_data_from_bank_name(bank_name)
        for bank in banks:
            bic_to_bank[bank["bic"]] = bank

    with open("schwifty/bank_registry/generated_it.json", "w") as fp:
        json.dump(list(bic_to_bank.values()), fp, indent=2)