File: python_snippet_updater.py

package info (click to toggle)
python-azure 20250603%2Bgit-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 851,724 kB
  • sloc: python: 7,362,925; ansic: 804; javascript: 287; makefile: 195; sh: 145; xml: 109
file content (135 lines) | stat: -rw-r--r-- 4,726 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import sys
import logging
from pathlib import Path
import argparse
import re
from typing import Dict

_LOGGER = logging.getLogger(__name__)

# Uncomment the following lines to show debug info
# _LOGGER.setLevel(logging.DEBUG)
# console_handler = logging.StreamHandler()
# _LOGGER.addHandler(console_handler)

snippets = {}
not_up_to_date = False

target_snippet_sources = ["samples/*.py", "samples/**/*.py"]
target_md_files = ["README.md"]


def check_snippets() -> Dict:
    return snippets


def check_not_up_to_date() -> bool:
    return not_up_to_date


def get_snippet(file: str) -> None:
    file_obj = Path(file)
    with open(file_obj, "r", encoding="utf8") as f:
        content = f.read()
    pattern = "# \\[START(?P<name>[A-Z a-z0-9_]+)\\](?P<body>[\\s\\S]+?)# \\[END[A-Z a-z0-9_]+\\]"
    matches = re.findall(pattern, content)
    for match in matches:
        s = match
        name = s[0].strip()
        snippet = s[1]
        # Remove extra spaces
        # A sample code snippet could be like:
        # \n
        #         # [START trio]
        #         from azure.core.pipeline.transport import TrioRequestsTransport

        #         async with AsyncPipeline(TrioRequestsTransport(), policies=policies) as pipeline:
        #             return await pipeline.run(request)
        #         # [END trio]
        # \n
        # On one hand, the spaces in the beginning of the line may vary. e.g. If the snippet
        # is in a class, it may have more spaces than if it is not in a class.
        # On the other hand, we cannot remove all spaces because indents are part of Python syntax.
        # Here is our algorithm:
        # We firstly count the spaces of the # [START snippet] line.
        # And for every line, we remove this amount of spaces in the beginning of the line.
        # To only remove the spaces in the beginning and to make sure we only remove it once per line,
        # We use replace('\n' + spaces, '\n').
        spaces = ""
        for char in snippet[1:]:
            if char == " ":
                spaces += char
            else:
                break
        snippet = snippet.replace("\n" + spaces, "\n")
        # Remove first newline
        snippet = snippet[1:].rstrip()
        if snippet[-1] == "\n":
            snippet = snippet[:-1]

        file_name = str(file_obj.name)[:-3]
        identifier = ".".join([file_name, name])
        if identifier in snippets.keys():
            _LOGGER.warning(f'Found duplicated snippet name "{identifier}".')
            _LOGGER.warning(file)
        _LOGGER.debug(f"Found snippet: {file_obj.name}.{name}")
        snippets[identifier] = snippet


def update_snippet(file: str) -> None:
    file_obj = Path(file)
    with open(file_obj, "r", encoding="utf8") as f:
        content = f.read()
    pattern = r"(?P<content>(?P<header><!-- SNIPPET:(?P<name>[A-Z a-z0-9_.]+)-->)[\n]+```python\n[\s\S]*?\n<!-- END SNIPPET -->)"
    matches = re.findall(pattern, content, flags=re.MULTILINE)

    for match in matches:
        s = match
        body = s[0].strip()
        header = s[1].strip()
        name = s[2].strip()
        _LOGGER.debug(f"Found name: {name}")
        if name not in snippets.keys():
            _LOGGER.error(f'In {file}, failed to find snippet name "{name}".')
            exit(1)
        target_code = "".join([header, "\n\n```python\n", snippets[name], "\n```\n\n", "<!-- END SNIPPET -->"])
        if body != target_code:
            _LOGGER.warning(f'Snippet "{name}" is not up to date.')
            global not_up_to_date
            not_up_to_date = True
            content = content.replace(body, target_code)
    with open(file_obj, "w", encoding="utf8") as f:
        f.write(content)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "path",
        nargs="?",
        help=("The targeted path for update."),
    )
    args = parser.parse_args()
    path = sys.argv[1]

    _LOGGER.info(f"Path: {path}")
    for source in target_snippet_sources:
        for py_file in Path(path).rglob(source):
            try:
                get_snippet(py_file)
            except UnicodeDecodeError:
                pass
    for key in snippets.keys():
        _LOGGER.debug(f"Found snippet: {key}")
    for target in target_md_files:
        for md_file in Path(path).rglob(target):
            try:
                update_snippet(md_file)
            except UnicodeDecodeError:
                pass
    if not_up_to_date:
        _LOGGER.error(
            f'Error: code snippets are out of sync. Please run Python python_snippet_updater.py "{path}" to fix it.'
        )
        exit(1)
    _LOGGER.info(f"README.md under {path} is up to date.")