File: escape.py

package info (click to toggle)
python-clevercsv 0.7.5%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 872 kB
  • sloc: python: 5,076; ansic: 763; makefile: 81
file content (64 lines) | stat: -rw-r--r-- 1,487 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# -*- coding: utf-8 -*-

"""
Common functions for dealing with escape characters.

Author: Gertjan van den Burg
Date: 2018-11-06
"""

import codecs
import unicodedata


def is_potential_escapechar(char, encoding, block_char=None):
    """Check if a character is a potential escape character.

    A character is considered a potential escape character if it is in the
    "Punctuation, Other" Unicode category and in the list of blocked
    characters.

    Parameters
    ----------
    char: str
        The character to check

    encoding : str
        The encoding of the character

    block_char : iterable
        Characters that are in the Punctuation Other category but that should
        not be considered as escape character. If None, the default set is
        used, equal to::

        ["!", "?", '"', "'", ".", ",", ";", ":", "%", "*", "&", "#"

    Returns
    -------
    is_escape : bool
        Whether the character is considered a potential escape or not.

    """
    as_unicode = codecs.decode(bytes(char, encoding), encoding=encoding)

    ctr = unicodedata.category(as_unicode)
    if block_char is None:
        block_char = [
            "!",
            "?",
            '"',
            "'",
            ".",
            ",",
            ";",
            ":",
            "%",
            "*",
            "&",
            "#",
        ]
    if ctr == "Po":
        if as_unicode in block_char:
            return False
        return True
    return False