File: imap_utf7.py

package info (click to toggle)
imap-tools 1.10.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,624 kB
  • sloc: python: 4,709; makefile: 5
file content (70 lines) | stat: -rw-r--r-- 2,093 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
"""
Encode and decode UTF-7 string, as described in the RFC 3501

There are variations, specific to IMAP4rev1, therefore the built-in python UTF-7 codec can't be used.
The main difference is the shift character, used to switch from ASCII to base64 encoding context.
This is "&" in that modified UTF-7 convention, since "+" is considered as mainly used in mailbox names.
Full description at RFC 3501, section 5.1.3.
"""

import binascii
from typing import MutableSequence

AMPERSAND_ORD = ord('&')
HYPHEN_ORD = ord('-')


# ENCODING
# --------
def _modified_base64(value: str) -> bytes:
    return binascii.b2a_base64(value.encode('utf-16be')).rstrip(b'\n=').replace(b'/', b',')


def _do_b64(_in: MutableSequence[str], r: MutableSequence[bytes]):
    if _in:
        r.append(b'&' + _modified_base64(''.join(_in)) + b'-')
    _in.clear()


def utf7_encode(value: str) -> bytes:
    res = []
    _in = []
    for char in value:
        ord_c = ord(char)
        if 0x20 <= ord_c <= 0x25 or 0x27 <= ord_c <= 0x7e:
            _do_b64(_in, res)
            res.append(char.encode())
        elif char == '&':
            _do_b64(_in, res)
            res.append(b'&-')
        else:
            _in.append(char)
    _do_b64(_in, res)
    return b''.join(res)


# DECODING
# --------
def _modified_unbase64(value: bytearray) -> str:
    return binascii.a2b_base64(value.replace(b',', b'/') + b'===').decode('utf-16be')


def utf7_decode(value: bytes) -> str:
    res = []
    encoded_chars = bytearray()
    for char in value:
        if char == AMPERSAND_ORD and not encoded_chars:
            encoded_chars.append(AMPERSAND_ORD)
        elif char == HYPHEN_ORD and encoded_chars:
            if len(encoded_chars) == 1:
                res.append('&')
            else:
                res.append(_modified_unbase64(encoded_chars[1:]))
            encoded_chars = bytearray()
        elif encoded_chars:
            encoded_chars.append(char)
        else:
            res.append(chr(char))
    if encoded_chars:
        res.append(_modified_unbase64(encoded_chars[1:]))
    return ''.join(res)