File: param_allowlist.py

package info (click to toggle)
url-normalize 2.2.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 268 kB
  • sloc: python: 935; makefile: 16; sh: 8
file content (49 lines) | stat: -rw-r--r-- 1,372 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# Copyright (c) 2023. All rights reserved.
"""URL query parameter allowlist module."""

from __future__ import annotations

DEFAULT_ALLOWLIST = {
    "google.com": ["q", "ie"],
    "baidu.com": ["wd", "ie"],
    "bing.com": ["q"],
    "youtube.com": ["v", "search_query"],
}


def get_allowed_params(
    host: str | None = None,
    allowlist: dict | list | None = None,
) -> set[str]:
    """Get allowed parameters for a given domain.

    Params:
        host: Domain name to check (e.g. 'google.com')
        allowlist: Optional override for default allowlist
            If provided as a list, it will be used as is.
            If provided as a dictionary, it should map domain names to
            lists of allowed parameters.
            If None, the default allowlist will be used.

    Returns:
        Set of allowed parameter names for the domain

    """
    if isinstance(allowlist, list):
        return set(allowlist)

    if not host:
        return set()

    # Normalize host by removing www and port
    domain = host.lower()
    if domain.startswith("www."):
        domain = domain[4:]
    domain = domain.split(":")[0]

    # Use default allowlist if none provided
    if allowlist is None:
        allowlist = DEFAULT_ALLOWLIST

    # Return allowed parameters for the domain, or an empty set if not found
    return set(allowlist.get(domain, []))