File: testsupport.py

package info (click to toggle)
python-sabyenc 5.4.3-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 39,044 kB
  • sloc: cpp: 12,297; sh: 1,121; python: 652; ansic: 191; makefile: 13
file content (190 lines) | stat: -rw-r--r-- 5,914 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#!/usr/bin/python3 -OO
# Copyright 2007-2019 The SABnzbd-Team <team@sabnzbd.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

###################
# SUPPORT FUNCTIONS
###################
import binascii
import re
import pickle
import chardet
import sabyenc3


def correct_unknown_encoding(str_or_bytes_in):
    """Files created on Windows but unpacked/repaired on
    linux can result in invalid filenames. Try to fix this
    encoding by going to bytes and then back to unicode again.
    Last resort we use chardet package
    """
    # If already string, back to bytes
    if not isinstance(str_or_bytes_in, bytes):
        str_or_bytes_in = str_or_bytes_in.encode("utf-8", "surrogateescape")

    # Try simple bytes-to-string
    try:
        return str_or_bytes_in.decode("utf-8")
    except UnicodeDecodeError:
        try:
            # Try using 8-bit ASCII, if came from Windows
            return str_or_bytes_in.decode("ISO-8859-1")
        except ValueError:
            # Last resort we use the slow chardet package
            return str_or_bytes_in.decode(chardet.detect(str_or_bytes_in)["encoding"])


def read_and_split(filename, chunk_size=14):
    # Default to chunks of 16K, as used in SSL
    with open("tests/yencfiles/%s" % filename, "rb") as yencfile:
        data_raw = yencfile.read()
        n = 2**chunk_size
        data_chunks = [data_raw[i : i + n] for i in range(0, len(data_raw), n)]
    return data_raw, data_chunks


def read_pickle(filename):
    with open(filename, "rb") as yencfile:
        try:
            data_chunks, data_bytes = pickle.load(yencfile, encoding="bytes")
        except:
            # Reset the pointer and try again
            yencfile.seek(0)
            data_chunks, data_bytes, lines = pickle.load(yencfile, encoding="bytes")
    return b"".join(data_chunks), data_chunks


def sabyenc3_wrapper(data_chunks):
    """CRC's are"""
    decoded_data, filename, crc_correct = sabyenc3.decode_usenet_chunks(data_chunks)
    return decoded_data, correct_unknown_encoding(filename), crc_correct


def python_yenc(data_plain):
    """Use the older decoder to verify the new one"""
    data = []

    # Remove the NNTP-double-dot style
    new_lines = data_plain.split(b"\r\n")
    for i in range(len(new_lines)):
        if new_lines[i][:2] == b"..":
            new_lines[i] = new_lines[i][1:]
    if new_lines[-1] == b".":
        new_lines = new_lines[1:-1]
    data.extend(new_lines)

    # Parse the yEnc headers
    yenc, data = parse_yenc_data(data)
    ybegin, ypart, yend = yenc

    # Now we get the true flat data
    flat_yenc_data = b"".join(data)

    # Remove the escaped-chars
    for i in (0, 9, 10, 13, 27, 32, 46, 61):
        j = b"=%c" % (i + 64)
        flat_yenc_data = flat_yenc_data.replace(j, b"%c" % i)

    # Use the much faster translate function to do fast-subtract of 42
    from_bytes = b"".join([b"%c" % i for i in range(256)])
    to_bytes = b"".join([b"%c" % ((i + 256 - 42) % 256) for i in range(256)])
    translate_table = bytes.maketrans(from_bytes, to_bytes)
    decoded_data = flat_yenc_data.translate(translate_table)

    # Let's get the CRC
    crc = binascii.crc32(decoded_data)
    partcrc = "%08X" % (crc & 2**32 - 1)

    if ypart:
        crcname = "pcrc32"
    else:
        crcname = "crc32"

    if crcname in yend:
        _partcrc = "0" * (8 - len(yend[crcname])) + yend[crcname].upper()
    else:
        _partcrc = None

    return decoded_data, ybegin["name"], _partcrc == partcrc


def parse_yenc_data(data):
    ybegin = None
    ypart = None
    yend = None

    # Check head
    for i in range(min(40, len(data))):
        try:
            if data[i].startswith(b"=ybegin "):
                splits = 3
                if data[i].find(b" part=") > 0:
                    splits += 1
                if data[i].find(b" total=") > 0:
                    splits += 1

                ybegin = get_yenc_data(data[i], splits)

                if data[i + 1].startswith(b"=ypart "):
                    ypart = get_yenc_data(data[i + 1])
                    data = data[i + 2 :]
                    break
                else:
                    data = data[i + 1 :]
                    break
        except IndexError:
            break

    # Check tail
    for i in range(-1, -11, -1):
        try:
            if data[i].startswith(b"=yend "):
                yend = get_yenc_data(data[i])
                data = data[:i]
                break
        except IndexError:
            break

    return ((ybegin, ypart, yend), data)


def get_yenc_data(line, splits=None):
    # Example: =ybegin part=1 line=128 size=123 name=-=DUMMY=- abc.par
    YSPLIT_RE = re.compile(b"([a-zA-Z0-9]+)=")

    fields = {}

    if splits:
        parts = YSPLIT_RE.split(line, splits)[1:]
    else:
        parts = YSPLIT_RE.split(line)[1:]

    if len(parts) % 2:
        return fields

    for i in range(0, len(parts), 2):
        key, value = parts[i], parts[i + 1]
        fields[correct_unknown_encoding(key)] = correct_unknown_encoding(value.strip())

    return fields


def yenc_subtract(char, subtract):
    """Wrap-around for below 0"""
    char_diff = char - subtract
    if char_diff < 0:
        return 256 + char_diff
    return char_diff