File: import_transforms.py

package info (click to toggle)
asdf-transform-schemas 0.6.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,444 kB
  • sloc: python: 381; makefile: 111
file content (96 lines) | stat: -rw-r--r-- 3,123 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""
Script that imports transform schemas from asdf-standard.
This file can be removed once the format of the new schemas
is finalized.
"""
import argparse
import glob
import os
import re

import pkg_resources

TAG_PATTERN = re.compile(r"^tag: .*?\n", re.MULTILINE | re.DOTALL)
ID_PATTERN = re.compile(r"^id: .*?/([^/-]+)-.*?\n", re.MULTILINE | re.DOTALL)
METASCHEMA_PATTERN = re.compile(r"^\$schema: .*?\n", re.MULTILINE | re.DOTALL)

# References to schemas outside of the transforms directory.
EXTERNAL_REF_PATTERN = re.compile(r'\$ref: "?\.\./(.*?[0-9]\.[0-9]\.[0-9])"?')

# Any $ref that hasn't been converted to an http URI by the
# previous regex will be a reference to another transform.
INTERNAL_REF_PATTERN = re.compile(r'\$ref: "?(?!http)(.*?)-[0-8]\.[0-9]\.[0-9]"?')

# These (found in the examples) will be updated to absolute tags:
BANG_TRANSFORM = re.compile(r"!transform/([^ \n-]*)-[^ \n]*")
BANG_UNIT = re.compile(r"!unit/([^ \n]*)")
BANG_CORE = re.compile(r"!core/([^ \n]*)")

# URI prefix (both id and tag) of new schemas.
URI_PREFIX = "http://asdf-format.org/schemas/transform"

# Initial version of new schemas.
VERSION = "2.0.0"

# For now just leave this at draft-01, but when we remove tag
# we'll need to bring in draft-02.
METASCHEMA = "http://stsci.edu/schemas/yaml-schema/draft-01"


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("source_path")
    parser.add_argument("dest_path")
    return parser.parse_args()


args = parse_args()


paths = list(glob.glob(os.path.join(args.source_path, "*.yaml")))
paths_by_name = {}
for path in paths:
    filename = os.path.basename(path)
    name = filename.split("-")[0]
    version = filename.split("-")[-1].rsplit(".", 1)[0]
    if name not in paths_by_name:
        paths_by_name[name] = []
    paths_by_name[name].append((path, version))


latest_paths = []
for name, name_paths in paths_by_name.items():
    latest_paths.append(sorted(name_paths, key=lambda p: pkg_resources.parse_version(p[-1]))[-1][0])


for path in latest_paths:
    with open(path) as f:
        content = f.read()

    filename = os.path.basename(path)
    new_filename = filename.split("-")[0] + f"-{VERSION}.yaml"

    schema_name = ID_PATTERN.search(content).group(1)
    new_uri = f"{URI_PREFIX}/{schema_name}-{VERSION}"

    new_id = f"id: {new_uri}\n"
    content = ID_PATTERN.sub(new_id, content)

    new_tag = f"tag: {new_uri}\n"
    content = TAG_PATTERN.sub(new_tag, content)

    content = METASCHEMA_PATTERN.sub(f"$schema: {METASCHEMA}\n", content)

    content = EXTERNAL_REF_PATTERN.sub(r"$ref: http://stsci.edu/schemas/asdf/\1", content)
    content = INTERNAL_REF_PATTERN.sub(rf"$ref: \1-{VERSION}", content)

    content = BANG_TRANSFORM.sub(rf"!<{URI_PREFIX}/\1-{VERSION}>", content)

    # TODO: Update these once core schemas start using http:// URIs:
    content = BANG_UNIT.sub(r"!<tag:stsci.edu:asdf/unit/\1>", content)
    content = BANG_CORE.sub(r"!<tag:stsci.edu:asdf/core/\1>", content)

    output_path = os.path.join(args.dest_path, new_filename)

    with open(output_path, "w") as f:
        f.write(content)