File: adapt_sitemap.py

package info (click to toggle)
mdanalysis 2.9.0-12
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 115,708 kB
  • sloc: python: 86,757; ansic: 8,156; makefile: 215; sh: 138
file content (91 lines) | stat: -rwxr-xr-x 2,681 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env python
#
#
# Adjust path in sitemap.xml

import argparse
from xml.etree import ElementTree

# defaults for MDAnalysis, see https://github.com/MDAnalysis/mdanalysis/pull/1890
# and https://github.com/MDAnalysis/MDAnalysis.github.io/issues/78

RELEASE_URL = "https://www.mdanalysis.org/docs/"
DEVELOP_URL = "https://www.mdanalysis.org/mdanalysis/"

# change if sitemaps.org updates their schema
NAMESPACE = {"sitemaps": "http://www.sitemaps.org/schemas/sitemap/0.9"}


def replace_loc(tree, search, replace, namespace=NAMESPACE):
    root = tree.getroot()
    urls = root.findall("sitemaps:url", namespace)
    if len(urls) == 0:
        raise ValueError(
            "No sitemaps:url element found: check if the namespace in the XML file "
            "is still xmlns='{0[sitemaps]}'".format(namespace)
        )
    for url in urls:
        loc = url.find("sitemaps:loc", namespace)
        try:
            loc.text = loc.text.replace(search, replace)
        except AttributError:
            raise ValueError(
                "No sitemaps:loc element found: check if the namespace in the XML file "
                "is still xmlns='{0[sitemaps]}'".format(namespace)
            )
    return tree


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Change top level loc in sitemap.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )

    parser.add_argument(
        "sitemap",
        metavar="FILE",
        help="path to sitemap.xml file, will be changed in place",
    )
    parser.add_argument(
        "--output",
        "-o",
        metavar="FILE",
        default="sitemap_release.xml",
        help="write altered XML to output FILE",
    )
    parser.add_argument(
        "--search",
        "-s",
        metavar="URL",
        default=DEVELOP_URL,
        help="search this URL in the loc elements",
    )
    parser.add_argument(
        "--replace",
        "-r",
        metavar="URL",
        default=RELEASE_URL,
        help="replace the searched URL with this URL in the loc elements",
    )
    args = parser.parse_args()

    with open(args.sitemap) as xmlfile:
        tree = ElementTree.parse(xmlfile)

    tree = replace_loc(tree, args.search, args.replace)

    with open(args.output, "wb") as xmlfile:
        tree.write(
            xmlfile,
            encoding="utf-8",
            xml_declaration=True,
            default_namespace=NAMESPACE["sitemaps"],
        )

    print(
        "adapt_sitemap.py: Created output file {} with change in loc:".format(
            args.output
        )
    )
    print("adapt_sitemap.py: {0} --> {1}".format(args.search, args.replace))