File: helpers.py

package info (click to toggle)
python-pattern 2.6%2Bgit20180818-2
  • links: PTS
  • area: main
  • in suites: bullseye
  • size: 93,888 kB
  • sloc: python: 28,119; xml: 15,085; makefile: 194
file content (45 lines) | stat: -rw-r--r-- 1,443 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division

from builtins import str, bytes, dict, int
from builtins import map, zip, filter
from builtins import object, range

#--- STRING FUNCTIONS ------------------------------------------------------------------------------
# Latin-1 (ISO-8859-1) encoding is identical to Windows-1252 except for the code points 128-159:
# Latin-1 assigns control codes in this range, Windows-1252 has characters, punctuation, symbols
# assigned to these code points.


def decode_string(v, encoding="utf-8"):
    """ Returns the given value as a Unicode string (if possible).
    """
    if isinstance(encoding, str):
        encoding = ((encoding,),) + (("windows-1252",), ("utf-8", "ignore"))
    if isinstance(v, bytes):
        for e in encoding:
            try:
                return v.decode(*e)
            except:
                pass
        return v
    return str(v)


def encode_string(v, encoding="utf-8"):
    """ Returns the given value as a Python byte string (if possible).
    """
    if isinstance(encoding, str):
        encoding = ((encoding,),) + (("windows-1252",), ("utf-8", "ignore"))
    if isinstance(v, str):
        for e in encoding:
            try:
                return v.encode(*e)
            except:
                pass
        return v
    return bytes(v)

decode_utf8 = decode_string
encode_utf8 = encode_string