File: _title_parser.py

package info (click to toggle)
sphinxext-opengraph 0.12.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,200 kB
  • sloc: python: 1,130; makefile: 11; sh: 8
file content (34 lines) | stat: -rw-r--r-- 829 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from __future__ import annotations

from html.parser import HTMLParser


def get_title(title: str) -> tuple[str, str]:
    htp = HTMLTextParser()
    htp.feed(title)
    htp.close()

    return htp.text, htp.text_outside_tags


class HTMLTextParser(HTMLParser):
    """Parse HTML into text."""

    def __init__(self) -> None:
        super().__init__()
        # All text found
        self.text = ''
        # Only text outside of html tags
        self.text_outside_tags = ''
        self.level = 0

    def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
        self.level += 1

    def handle_endtag(self, tag: str) -> None:
        self.level -= 1

    def handle_data(self, data: str) -> None:
        self.text += data
        if self.level == 0:
            self.text_outside_tags += data