File: process-localization-extract.py

package info (click to toggle)
cookidoo-api 0.15.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,044 kB
  • sloc: python: 7,485; sh: 14; makefile: 3
file content (36 lines) | stat: -rwxr-xr-x 1,128 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
"""Process the extract for a localization table from the cookidoo web application."""
#!/usr/bin/env python3

# Instructions found here: cookidoo-api/docs/localization.md

import json
import os

from bs4 import BeautifulSoup

script_dir = os.path.dirname(__file__)
input_file_path = os.path.join(script_dir, "../raw/localization-extract.html")
output_file_path = os.path.join(script_dir, "../cookidoo_api/localization.json")

# Load the HTML content
with open(input_file_path, encoding="utf-8") as file:
    html_content = file.read()

# Parse the HTML
soup = BeautifulSoup(html_content, "html.parser")

# Extract the data
localization_data = [
    {
        "country_code": li["data-filter"],
        "language": li["data-lang"],
        "url": li.find("input")["value"],
    }
    for li in soup.find_all("li", {"class": "core-dropdown-list__item"})
    if li.find("input", {"type": "checkbox"})
]
print(f"Successfully extract {len(localization_data)} entries")

# Save the extracted data to JSON
with open(output_file_path, "w", encoding="utf-8") as file:
    json.dump(localization_data, file, ensure_ascii=False, indent=4)