File: parser.py

package info (click to toggle)
python-tld 0.13-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,008 kB
  • sloc: python: 12,972; sh: 119; makefile: 19
file content (83 lines) | stat: -rw-r--r-- 2,394 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from typing import Dict, Optional

from data import RAW_TLD_NAMES_DATA

from tld.base import BaseTLDSourceParser
from tld.exceptions import TldIOError
from tld.trie import Trie
from tld.utils import tld_names, update_tld_names_container


class GAEMozillaTLDSourceParser(BaseTLDSourceParser):

    uid: str = "gae_mozilla"
    source_url: str = "https://publicsuffix.org/list/public_suffix_list.dat"
    local_path: str = "gae_mozilla"

    @classmethod
    def get_tld_names(
        cls, fail_silently: bool = False, retry_count: int = 0
    ) -> Optional[Dict[str, Trie]]:
        """Parse.

        :param fail_silently:
        :param retry_count:
        :return:
        """
        if retry_count > 1:
            if fail_silently:
                return None
            else:
                raise TldIOError

        _tld_names = tld_names

        # If already loaded, return
        if (
            cls.local_path in _tld_names
            and _tld_names[cls.local_path] is not None
        ):
            return _tld_names

        try:
            local_file = RAW_TLD_NAMES_DATA.split("\n")
            trie = Trie()
            trie_add = trie.add  # Performance opt
            # Make a list of it all, strip all garbage
            private_section = False

            for line in local_file:
                if "===BEGIN PRIVATE DOMAINS===" in line:
                    private_section = True

                # Puny code TLD names
                if "// xn--" in line:
                    line = line.split()[1]

                if not line or line[0] in ("/", "\n"):
                    continue

                trie_add(f"{line.strip()}", private=private_section)

            update_tld_names_container(cls.local_path, trie)
        except IOError:
            # Grab the file
            cls.update_tld_names(fail_silently=fail_silently)
            # Increment ``retry_count`` in order to avoid infinite loops
            retry_count += 1
            # Run again
            return cls.get_tld_names(
                fail_silently=fail_silently, retry_count=retry_count
            )
        except Exception as err:
            if fail_silently:
                return None
            else:
                raise err
        finally:
            try:
                local_file.close()
            except Exception:
                pass

        return _tld_names