File: get_bank_registry_de.py

package info (click to toggle)
python-schwifty 2024.09.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 6,072 kB
  • sloc: python: 3,057; makefile: 209; sh: 9
file content (74 lines) | stat: -rwxr-xr-x 2,066 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python
import json
from urllib.parse import urljoin

import requests
from bs4 import BeautifulSoup


FIELD_LENGTHS = {
    "bank_code": 8,
    "feature": 1,
    "name": 58,
    "postal_code": 5,
    "place": 35,
    "short_name": 27,
    "pan": 5,
    "bic": 11,
    "check_digit_method": 2,
    "record_number": 6,
    "mod_number": 1,
    "tbd": 1,
    "successor_bank_code": 8,
}

URL = "https://www.bundesbank.de/de/aufgaben/unbarer-zahlungsverkehr/serviceangebot/bankleitzahlen"


def get_download_url():
    soup = BeautifulSoup(requests.get(URL).content, "html.parser")
    atag = soup.find(href=lambda ref: ref and "download-bankleitzahlen" in ref)

    soup = BeautifulSoup(requests.get(urljoin(URL, atag.get("href"))).content, "html.parser")
    atag = soup.find(href=lambda ref: ref and "blz-aktuell-txt-data.txt" in ref)
    return urljoin(URL, atag.get("href"))


def get_raw():
    return requests.get(get_download_url()).content.decode(encoding="latin1")


def parse(raw):
    for line in raw.split("\n"):
        if not line:
            continue
        record = {}
        offset = 0
        for field, length in FIELD_LENGTHS.items():
            record[field] = line[offset : offset + length].strip()
            offset = offset + length
        yield record


def process(records):
    fieldnames = ("bank_code", "name", "short_name", "bic")
    registry = []
    for record in sorted(records, key=lambda item: f'{item["bank_code"]}{item["bic"]}'):
        if not record["bank_code"]:
            continue

        cleaned = {k: v for k, v in record.items() if k in fieldnames}
        cleaned["primary"] = record["feature"] == "1"
        cleaned["country_code"] = "DE"
        cleaned["checksum_algo"] = record["check_digit_method"]

        if cleaned["bic"]:
            registry.append(cleaned)

    print(f"Fetched {len(registry)} bank records")
    return registry


if __name__ == "__main__":
    with open("schwifty/bank_registry/generated_de.json", "w") as fp:
        json.dump(process(parse(get_raw())), fp, indent=2)