File: update_test.py

package info (click to toggle)
python-recipe-scrapers 15.9.0-1
  • links: PTS
  • area: main
  • in suites: forky, sid
  • size: 246,580 kB
  • sloc: python: 13,214; makefile: 3
file content (50 lines) | stat: -rw-r--r-- 1,517 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import argparse
import json
import pathlib

import requests
from reorder_json_keys import reorder_json_keys

from recipe_scrapers import SCRAPERS, scrape_html


def update_testcase(json_file: pathlib.Path) -> None:
    """Update testcase by downloading the latest version of the html
    and run the scraper on it to create a new version of the json file.
    Assumes the json file is in the standard file tree.

    Args:
        json_file (pathlib.Path): The original json file.

    """
    json_file = json_file.absolute()
    html_file = json_file.with_suffix(".testhtml")
    orig_data = json.loads(json_file.read_text(encoding="utf-8"))
    url = orig_data["canonical_url"]
    host = html_file.parent.name
    html_data = requests.get(url, timeout=10).content.decode()
    html_file.write_text(html_data)
    supported_only = host in SCRAPERS
    actual = scrape_html(
        html=html_data,
        org_url=host,
        online=False,
        supported_only=supported_only,
    )
    new_data = actual.to_json()

    json_file.write_text(
        json.dumps(new_data, indent=2, ensure_ascii=False) + "\n",
        encoding="utf-8",
    )
    reorder_json_keys(str(json_file), quiet=True)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Fetch the latest version of a recipe and update the test data",
    )
    parser.add_argument("json_file", help="The json file describing the test")

    args = parser.parse_args()
    update_testcase(pathlib.Path(args.json_file))