File: ns-html2rst

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (57 lines) | stat: -rwxr-xr-x 1,636 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python3
import re
import subprocess
import sys


def run():
    if len(sys.argv) > 1:
        print("""
ns-html2rst - Convert Cocoa HTML documentation into ReST

usage: nshtml2rst < NSString.html > NSString.rst
        """)
        sys.exit(0)

    html = sys.stdin.read()

    # Treat <div class="declaration>...</div> as <pre>...</pre>
    html = re.sub(
        r'<div\s+class="declaration">(.*?)</div>',
        r'<pre>\1</pre>',
        html, flags=re.MULTILINE | re.DOTALL)

    # Strip all attributes from <pre>...</pre> containing class="..."
    # The resulting classes confound ReST
    html = re.sub(
        r'<pre\s[^>]*class=[^>]*>(.*?)</pre>',
        r'<pre>\1</pre>',
        html, flags=re.MULTILINE | re.DOTALL)

    # Remove links from <code>...</code>, which doesn't have a rendering in
    # ReST
    html = re.sub(
        r'<code>(.*?)<a[^>]*?>(.*?)</a>(.*?)</code>',
        r'<code>\1\2\3</code>',
        html, flags=re.MULTILINE | re.DOTALL)

    # Let pandoc do most of the hard work
    p = subprocess.Popen(
        args=['pandoc', '--reference-links', '-f', 'html', '-t', 'rst'],
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE
    )
    rst, stderr = p.communicate(html)

    # HACKETY HACK HACK: Our html documents apparently contain some
    # bogus heading level nesting.  Just fix up the one we know about
    # so that ReST doesn't complain later.
    rst = re.sub("(^|\n)('+)($|\n)",
                 lambda m: m.group(1) + len(m.group(2)) * '^' + m.group(3),
                 rst, flags=re.MULTILINE)

    sys.stdout.write(rst)


if __name__ == '__main__':
    run()