File: util.py

package info (click to toggle)
sphinx-needs 5.1.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 11,924 kB
  • sloc: python: 21,132; javascript: 187; makefile: 89; sh: 29; xml: 10
file content (58 lines) | stat: -rw-r--r-- 1,821 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from io import StringIO
from xml.etree import ElementTree

NS = {"html": "http://www.w3.org/1999/xhtml"}


class HtmlNeed:
    """Helper class to parse HTML needs"""

    def __init__(self, need):
        self.need = need

    @property
    def id(self):
        found_id = self.need.find(".//html:a[@class='reference internal']", NS)
        if found_id is None:
            found_id = self.need.find(
                ".//html:a[@class='reference internal']", {"html": ""}
            )
        return found_id.text

    @property
    def title(self):
        found_title = self.need.find(".//html:span[@class='needs_title']", NS)
        if found_title is None:
            found_title = self.need.find(
                ".//html:span[@class='needs_title']", {"html": ""}
            )
        return (
            found_title[0].text if found_title else None
        )  # title[0] aims to the span_data element


def extract_needs_from_html(html):
    # Replace entities, which elementTree can not handle
    html = html.replace("©", "")
    html = html.replace("&", "")

    source = StringIO(html)
    parser = ElementTree.XMLParser(encoding="utf-8")

    # XML knows not nbsp definition, which comes from HTML.
    # So we need to add it
    parser.entity["nbsp"] = " "

    etree = ElementTree.ElementTree()
    document = etree.parse(source, parser=parser)
    tables = document.findall(".//html:table", NS)

    # Sphinx <3.0 start html-code with:
    #    <html xmlns="http://www.w3.org/1999/xhtml">
    # Sphinx >= 3.0 starts it with:
    #    <html>
    # So above search will not work for Sphinx >= 3.0 and we try a new one
    if len(tables) == 0:
        tables = document.findall(".//html:table", {"html": ""})

    return [HtmlNeed(table) for table in tables if "need" in table.get("class", "")]