File: dot_url_resolve.py

package info (click to toggle)
graphviz 14.0.5-1
  • links: PTS
  • area: main
  • in suites: experimental
  • size: 139,388 kB
  • sloc: ansic: 141,938; cpp: 11,957; python: 7,766; makefile: 4,043; yacc: 3,030; xml: 2,972; tcl: 2,495; sh: 1,388; objc: 1,159; java: 560; lex: 423; perl: 243; awk: 156; pascal: 139; php: 58; ruby: 49; cs: 31; sed: 1
file content (170 lines) | stat: -rwxr-xr-x 4,705 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#!/usr/bin/env python3

"""
Resolve external image references in a Graphviz source.

Nodes in a graph can have an associated image, `my_node[image="foo.png"]`, but
the image string must be a path to a local file. Using a URL to point to a
remotely hosted image is not supported natively. This script resolves such
external references allowing the use of such references:

  echo 'graph { a[image="https://graphviz.org/Resources/app.png"]; }' \
    | dot_url_resolve.py \
    | dot -Tpng -o my_output.png

This script does not have a sophisticated understanding of the Graphviz
language. It simply treats anything that looks like a string containing a URL as
something that should be downloaded.
"""

import argparse
import hashlib
import io
import logging
import re
import sys
import tempfile
import urllib.request
from pathlib import Path
from typing import Optional, TextIO


def _translate(
    source: str,
    translations: dict[str, Path],
    local_store: Path,
    log: Optional[logging.Logger],
) -> str:
    """
    convert a remote URL to a local path, downloading if necessary

    If `source` is not a remote URL, it is returned as-is.

    Args:
        source: URL to resolve
        translations: accumulated mapping from URLs to local paths
        local_store: directory to write downloaded files to
        log: optional progress sink

    Returns:
        local path corresponding to where the URL was downloaded to
    """

    # does this look like a remote URL?
    if re.match(r"https?:", source, flags=re.IGNORECASE):
        # have we not yet downloaded this ?
        local = translations.get(source)
        if local is None:
            # generate a unique local filename to write to
            digest = hashlib.sha256(source.encode("utf-8")).hexdigest()
            extension = Path(source).suffix
            dest = local_store / f"{digest}{extension}"

            # download the file
            if log is not None:
                log.info(f"downloading {source} → {dest}")
            urllib.request.urlretrieve(source, dest)
            translations[source] = dest

        return str(translations[source])

    return source


def resolve(
    inp: TextIO, outp: TextIO, local_store: Path, log: Optional[logging.Logger] = None
) -> dict[str, Path]:
    """
    process Graphviz source, converting remote URLs to local paths

    Args:
        inp: source to read from
        outp: destination to write to
        local_store: directory to write downloaded files to
        log: optional progress sink

    Returns:
        a mapping from URLs discovered to paths to which they were downloaded
    """

    # translations from original URLs to local paths
    downloaded: dict[str, Path] = {}

    in_string = False
    pending = io.StringIO()
    while True:
        c = inp.read(1)

        if in_string:
            # does this terminate a string we were accruing?
            if c in ("", '"'):
                accrued = pending.getvalue()
                pending = io.StringIO()

                outp.write(_translate(accrued, downloaded, local_store, log))

                in_string = False
            else:
                pending.write(c)
                continue

        elif not in_string and c == '"':
            in_string = True

        if c == "":
            break

        outp.write(c)

    return downloaded


def main(args: list[str]) -> int:
    """
    entry point
    """

    # parse command line options
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "input",
        nargs="?",
        type=argparse.FileType("rt"),
        default=sys.stdin,
        help="Graphviz source to read",
    )
    parser.add_argument(
        "output",
        nargs="?",
        type=argparse.FileType("wt"),
        default=sys.stdout,
        help="Graphviz source to write",
    )
    parser.add_argument(
        "--local-dir",
        help="path to write resolved files to (default: temporary location)",
    )
    parser.add_argument(
        "--quiet", "-q", action="store_true", help="suppress progress messages"
    )
    options = parser.parse_args(args[1:])

    # use a temporary directory if we were not given one
    if options.local_dir is None:
        options.local_dir = Path(tempfile.mkdtemp())
    else:
        options.local_dir = Path(options.local_dir)

    # setup logging
    log = logging.getLogger()
    log.setLevel(logging.WARNING if options.quiet else logging.INFO)
    handler = logging.StreamHandler(sys.stderr)
    log.addHandler(handler)

    resolve(options.input, options.output, options.local_dir, log)

    return 0


if __name__ == "__main__":
    sys.exit(main(sys.argv))