File: man2html.py

package info (click to toggle)
slurm-wlm-contrib 24.11.5-4
links: PTS, VCS
area: contrib
in suites: forky, sid
size: 50,600 kB
sloc: ansic: 529,598; exp: 64,795; python: 17,051; sh: 9,411; javascript: 6,528; makefile: 4,030; perl: 3,762; pascal: 131
file content (311 lines) | stat: -rwxr-xr-x 10,737 bytes
parent folder | download | duplicates (3)
#!/usr/bin/env python3

import re
import sys
import os
import codecs

canonical_url = 'https://slurm.schedmd.com/'

include_pat = r'(<!--\s*#include\s*virtual\s*=\s*"([^"]+)"\s*-->)'
include_regex = re.compile(include_pat)

canonical_pat = r'(<!--\s*#canonical\s*-->)'
canonical_regex = re.compile(canonical_pat)

page_title_pat = r'(<!--\s*#pagetitle\s*-->)'
page_title_regex = re.compile(page_title_pat)

url_pat = r'(\s+href\s*=\s*")([^"#]+)(#[^"]+)?(")'
url_regex = re.compile(url_pat)

first_header_pat = r'(<[h|H]1>\s*([a-zA-Z0-9_ ()\'/-\\.]+)\s*</[h|H]1>)'
first_header_regex = re.compile(first_header_pat)

version_pat = r'(@SLURM_VERSION@)'
version_regex = re.compile(version_pat)

ids = {}

title = ''
dirname = ''

# Insert tags for options
#   Two styles are processed.
#       <DT><B>pppppp</B><DD>
#           has tag <a id="OPT_pppppp"></a>
#       <DT><B>--pppppp</B> or <DT><B>-P</B>, <B>--pppppp</B>
#           has tag <a id="OPT_pppppp"></a>
#   <H2>hhhh</h2> also has tag has tag <a id="SECTION_hhhh"></a> inserted
def insert_tag(lineIn):
    lineOt = ""
    if lineIn[0:2] == "<H" and lineIn[3] == ">":
        header_num = lineIn[2]
        # Don't make links to h1 tags - no need, since already at top of page
        if header_num == "1":
            return lineIn;

        posEnd = lineIn.find("</H%s>" % header_num)
        if posEnd == -1:
            return lineIn;

        contents = lineIn[4:posEnd]
        id_name = contents.replace(' ','-')
        if id_name in ids:
            ids[id_name] += 1
            id_name += "_" + str(ids[id_name])
        else:
            ids[id_name] = 0
        id_name = "SECTION_%s" % (id_name)
        lineOt = '<h%s>%s<a class="slurm_link" id="%s" href="#%s"></a></h%s>' % (header_num, contents, id_name, id_name, header_num)
        return lineOt

    if lineIn[0:4] != "<DT>":
        return lineIn
    if lineIn[4:7] == "<B>":
        bold_tag = True
    elif lineIn[4:7] == "<I>":
        bold_tag = False
    else:
        return lineIn
    contentsBgn = 4
    contentsEnd = lineIn.find("<DD>")
    contents = lineIn[contentsBgn:contentsEnd]
    posBgn = lineIn.find("--")
    if posBgn == -1:
        # 1st form
        posBgn = 7
    else:
        posBgn += 2

    if bold_tag:
        posEnd = lineIn.find("</B>",posBgn)
    else:
        posEnd = lineIn.find("</I>",posBgn)

    if posEnd == -1:
        # poorly constructed
        return lineIn

    id_name = lineIn[posBgn:posEnd]
    id_name = id_name.replace(' ','-')
    if id_name in ids:
        ids[id_name] += 1
        id_name += "_" + str(ids[id_name])
    else:
        ids[id_name] = 0
    id_name = "OPT_%s" % (id_name)
    link_and_contents = '%s<a class="slurm_link" id="%s" href="#%s"></a>' % (contents, id_name, id_name)
    lineOt = "<dt>" + link_and_contents + "</dt><dd>"
    return lineOt


def llnl_references(line):
        manStr = "Refer to mc_support.html"
        htmlStr = 'Refer to <a href="mc_support.html">mc_support</a>'
        lineFix = line.replace(manStr,htmlStr)
        if lineFix != line:
            return lineFix
        manStr = '<A HREF="https://slurm.schedmd.com/mc_support.html">https://slurm.schedmd.com/mc_support.html</A>'
        htmlStr = 'the <a href="mc_support.html">mc_support</a> document'
        lineFix = line.replace(manStr,htmlStr)
        if lineFix != line:
            return lineFix
        manStr = '<A HREF="https://slurm.schedmd.com/dist_plane.html.">https://slurm.schedmd.com/dist_plane.html.</A>'
        htmlStr = 'the <a href="dist_plane.html">dist_plane</a> document'
        lineFix = line.replace(manStr,htmlStr)
        if lineFix != line:
            return lineFix
        manStr = '&lt;<A HREF="https://slurm.schedmd.com/mpi_guide.html">https://slurm.schedmd.com/mpi_guide.html</A>&gt;'
        htmlStr = '<a href="mpi_guide.html">mpi_guide</a>'
        lineFix = line.replace(manStr,htmlStr)
        if lineFix != line:
            return lineFix
        manStr = '(<A HREF="https://slurm.schedmd.com/power_save.html).">https://slurm.schedmd.com/power_save.html).</A>'
        htmlStr = '<a href="power_save.html">power_save</a>'
        lineFix = line.replace(manStr,htmlStr)
        if lineFix != line:
            return lineFix
        manStr = '<A HREF="https://slurm.schedmd.com/cons_tres.html">https://slurm.schedmd.com/cons_tres.html</A>'
        htmlStr = '<a href="cons_tres.html">cons_tres</a>'
        lineFix = line.replace(manStr,htmlStr)
        if lineFix != line:
            return lineFix
        manStr = '<A HREF="https://slurm.schedmd.com/cons_tres_share.html">https://slurm.schedmd.com/cons_tres_share.html</A>'
        htmlStr = '<a href="cons_tres_share.html">cons_tres_share</a>'
        lineFix = line.replace(manStr,htmlStr)
        if lineFix != line:
            return lineFix
        manStr = '<A HREF="https://slurm.schedmd.com/gang_scheduling.html">https://slurm.schedmd.com/gang_scheduling.html</A>'
        htmlStr = '<a href="gang_scheduling.html">gang_scheduling</a>'
        lineFix = line.replace(manStr,htmlStr)
        if lineFix != line:
            return lineFix
        manStr = '<A HREF="https://slurm.schedmd.com/preempt.html">https://slurm.schedmd.com/preempt.html</A>'
        htmlStr = '<a href="preempt.html">preempt</a>'
        lineFix = line.replace(manStr,htmlStr)
        if lineFix != line:
            return lineFix
        manStr = '<A HREF="/cgi-bin/man/man2html?8+logrotate">logrotate</A>'
        htmlStr = 'logrotate'
        lineFix = line.replace(manStr,htmlStr)
        if lineFix != line:
            return lineFix
        manStr = '<A HREF="/man/man2html?8+logrotate">logrotate</A>'
        htmlStr = 'logrotate'
        lineFix = line.replace(manStr,htmlStr)
        if lineFix != line:
            return lineFix
        manStr = '<A HREF="/cgi-bin/man/man2html?7+path_resolution">path_resolution</A>'
        htmlStr = 'path_resolution'
        lineFix = line.replace(manStr,htmlStr)
        if lineFix != line:
            return lineFix
        manStr = '<A HREF="/man/man2html?7+path_resolution">path_resolution</A>'
        htmlStr = 'path_resolution'
        lineFix = line.replace(manStr,htmlStr)
        if lineFix != line:
            return lineFix
        return line

def relative_reference(lineIn):
    lineOt = ""
    refAnchor="<A HREF=";
    lenRefAnchor = len(refAnchor)

    for fullRef in ["/cgi-bin/man/man2html", "/man/man2html"]:
        lenRef = len(fullRef)
        lineOt = ""
        cursor = 0

        posHREF = lineIn.find(fullRef,cursor)
        if posHREF == -1:
            continue
        if lineIn[posHREF+lenRef] != "?":
            pos = lineIn.find("Return to Main Contents",cursor)
            if pos != -1:
                return ""
            return "<i>man2html</i> "
        while posHREF != -1:
            posRefAnchor = lineIn.find(refAnchor,cursor)
            lineOt = lineOt + lineIn[cursor:posRefAnchor+lenRefAnchor]
            cursor = posHREF + lenRef + 3
            lineOt = lineOt + '"'
            posQuote = lineIn.find('"',cursor)
            lineOt = lineOt + lineIn[cursor:posQuote] + ".html"
            cursor = posQuote
            posHREF = lineIn.find(fullRef,cursor)
        lineOt = lineOt + lineIn[cursor:]
        if lineOt != lineIn:
            break;

    if lineOt == "":
        return lineIn
    return lineOt


def include_virtual(matchobj):
    global dirname
    if dirname:
        filename = dirname + '/' + matchobj.group(2)
    else:
        filename = matchobj.group(2)

    if os.access(filename, os.F_OK):
        lines = open(filename, 'r').read()
        return lines
    else:
        return matchobj.group(0)

def canonical_rewrite(matchobj):
    global newfilename
    return '<link rel="canonical" href="' + canonical_url + newfilename + '" />'

def page_title_rewrite(matchobj):
    global title
    return '<title>Slurm Workload Manager - ' + title + '</title>'

def url_rewrite(matchobj):
    global dirname
    if dirname:
        localpath = dirname + '/' + matchobj.group(2)
    else:
        localpath = matchobj.group(2)

    if matchobj.group(2)[-6:] == '.shtml' and os.access(localpath, os.F_OK):
        location = matchobj.group(2)
        if matchobj.group(3) is None:
            newname = location[:-6] + '.html'
        else:
            newname = location[:-6] + '.html' + matchobj.group(3)
        return matchobj.group(1) + newname + matchobj.group(4)
    else:
        return matchobj.group(0)

def version_rewrite(matchobj):
    global version
    return version

files = []
version = sys.argv[1]
for f in sys.argv[4:]:
    dirname, basefilename = os.path.split(f)
    posLastDot = basefilename.rfind(".")
    mhtmlname = basefilename[:posLastDot] + ".mhtml"
    cmd = "man2html < " + f + "> " + mhtmlname
    os.system(cmd)
    print(">>>>>>> " + mhtmlname)
    files.append(mhtmlname)

for filename in files:
    newfilename = filename[:-6] + '.html'
    print('Converting', filename, '->', newfilename)
    shtml = codecs.open(filename, 'r', encoding='utf-8')
    html = codecs.open(newfilename, 'w', encoding='utf-8')

    for line in shtml.readlines():
        result = first_header_regex.match(line)
        if result:
            title = result.group(2)
            break
    shtml.seek(0)

    lines = open(sys.argv[2], 'r').read()
    lines = lines.replace(".shtml",".html")
    lines = version_regex.sub(version_rewrite, lines)
    lines = page_title_regex.sub(page_title_rewrite, lines)
    lines = canonical_regex.sub(canonical_rewrite, lines)
    html.write(lines)
#    html.write(<!--#include virtual="header.txt"-->)
    for line in shtml.readlines():
        # Remove html header/footer created by man2html
        if line == "Content-type: text/html\n":
            continue
        if line == "Content-type: text/html; charset=UTF-8\n":
            continue
        if line[:6] == "<HTML>":
            continue
        if line[:7] == "</HEAD>":
            continue
        if line[:7] == "</HTML>":
            continue
        if line[:7] == "</BODY>":
            continue
        line = include_regex.sub(include_virtual, line)
        # Special case some html references
        line = llnl_references(line)
        #insert tags for some options
        line = insert_tag(line)
        # Make man2html links relative ones
        line = relative_reference(line)

        line = url_regex.sub(url_rewrite, line)
        html.write(line)
    lines = open(sys.argv[3], 'r').read()
    lines = lines.replace(".shtml",".html")
    lines = version_regex.sub(version_rewrite, lines)
    html.write(lines)
#    html.write(<!--#include virtual="footer.txt"-->)
    html.close()
    shtml.close()
    os.remove(filename)