1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
|
#!/usr/bin/env python3
import re
import sys
import os
import codecs
canonical_url = 'https://slurm.schedmd.com/'
include_pat = r'(<!--\s*#include\s*virtual\s*=\s*"([^"]+)"\s*-->)'
include_regex = re.compile(include_pat)
canonical_pat = r'(<!--\s*#canonical\s*-->)'
canonical_regex = re.compile(canonical_pat)
page_title_pat = r'(<!--\s*#pagetitle\s*-->)'
page_title_regex = re.compile(page_title_pat)
url_pat = r'(\s+href\s*=\s*")([^"#]+)(#[^"]+)?(")'
url_regex = re.compile(url_pat)
first_header_pat = r'<[hH]1>\s*(<a name="top">)?\s*(?P<title>[a-zA-Z0-9_ ()\'/-]+)[:]*.*\s*[</a>]?\s*</[hH]1>'
first_header_regex = re.compile(first_header_pat)
version_pat = r'(@SLURM_VERSION@)'
version_regex = re.compile(version_pat)
title = ''
dirname = ''
newfilename = ''
def include_virtual(matchobj):
global dirname
if dirname:
filename = dirname + '/' + matchobj.group(2)
else:
filename = matchobj.group(2)
if os.access(filename, os.F_OK):
#print('Including file', filename)
lines = open(filename, 'r').read()
return lines
else:
return matchobj.group(0)
def canonical_rewrite(matchobj):
global newfilename
return '<link rel="canonical" href="' + canonical_url + newfilename + '">'
def page_title_rewrite(matchobj):
global title
return '<title>Slurm Workload Manager - ' + title + '</title>'
def url_rewrite(matchobj):
global dirname
if dirname:
localpath = dirname + '/' + matchobj.group(2)
else:
localpath = matchobj.group(2)
if matchobj.group(2)[-6:] == '.shtml' and os.access(localpath, os.F_OK):
location = matchobj.group(2)
if matchobj.group(3) is None:
newname = location[:-6] + '.html'
else:
newname = location[:-6] + '.html' + matchobj.group(3)
#print('Rewriting', location, 'to', newname)
return matchobj.group(1) + newname + matchobj.group(4)
else:
return matchobj.group(0)
def version_rewrite(matchobj):
global version
return version
# Make sure all of the files on the command line have the .shtml extension.
version = sys.argv[1]
files = []
for f in sys.argv[2:]:
if f[-6:] == '.shtml':
files.append(f)
else:
#print('Skipping file ', f, ' (extension is not .shtml)')
pass
for filename in files:
dirname, basefilename = os.path.split(filename)
newfilename = basefilename[:-6] + '.html'
print('Converting', filename, '->', newfilename)
shtml = codecs.open(filename, 'r', encoding='utf-8')
html = codecs.open(newfilename, 'w', encoding='utf-8')
for line in shtml.readlines():
result = first_header_regex.match(line)
if result:
title = result.group('title')
break
shtml.seek(0)
for line in shtml.readlines():
line = include_regex.sub(include_virtual, line)
line = page_title_regex.sub(page_title_rewrite, line)
line = version_regex.sub(version_rewrite, line)
line = canonical_regex.sub(canonical_rewrite, line)
line = url_regex.sub(url_rewrite, line)
html.write(line)
html.close()
shtml.close()
|