File: encoding_tests.py

package info (click to toggle)
python-lamson 1.0pre11-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd, squeeze, wheezy
  • size: 3,508 kB
  • ctags: 1,036
  • sloc: python: 5,772; xml: 177; makefile: 19
file content (296 lines) | stat: -rw-r--r-- 10,071 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
from __future__ import with_statement
from nose.tools import *
import re
import os
from lamson import encoding, mail
import mailbox
import email
from email import encoders
from email.utils import parseaddr
from mock import *
import chardet


BAD_HEADERS = [
    u'"\u8003\u53d6\u5206\u4eab" <Ernest.Beard@msa.hinet.net>'.encode('utf-8'),
    '"=?windows-1251?B?RXhxdWlzaXRlIFJlcGxpY2E=?="\n\t<wolfem@barnagreatlakes.com>',
    '=?iso-2022-jp?B?Zmlicm91c19mYXZvcmF0ZUB5YWhvby5jby5qcA==?=<fibrous_favorate@yahoo.co.jp>',

    '=?windows-1252?Q?Global_Leadership_in_HandCare_-_Consumer,\n\t_Professional_and_Industrial_Products_OTC_:_FLKI?=',
    '=?windows-1252?q?Global_Leadership_in_Handcare_-_Consumer, _Auto,\n\t_Professional_&_Industrial_Products_-_OTC_:_FLKI?=',
    'I am just normal.',
    '=?koi8-r?B?WW91ciBtYW6ScyBzdGFtaW5hIHdpbGwgY29tZSBiYWNrIHRvIHlvdSBs?=\n\t=?koi8-r?B?aWtlIGEgYm9vbWVyYW5nLg==?=',
    '=?koi8-r?B?WW91IGNhbiBiZSBvbiB0b3AgaW4gYmVkcm9vbSBhZ2FpbiCWIGp1c3Qg?=\n\t=?koi8-r?B?YXNrIHVzIGZvciBhZHZpY2Uu?=',
    '"=?koi8-r?B?5MXMz9DSz8na18/E09TXzw==?=" <daniel@specelec.com>',
    '=?utf-8?b?IumrlOiCsuWckuWNgOermSDihpIg6ZW35bqa6Yar6Zmi56uZIOKGkiDmlofljJbk?=\n =?utf-8?b?uInot6/nq5kiIDx2Z3hkcmp5Y2lAZG5zLmh0Lm5ldC50dz4=?=',
    '=?iso-8859-1?B?SOlhdnkgTel05WwgVW7uY/hk?=\n\t=?iso-8859-1?Q?=E9?=',
]

DECODED_HEADERS = encoding.header_from_mime_encoding(BAD_HEADERS)

NORMALIZED_HEADERS = [encoding.header_to_mime_encoding(x) for x in DECODED_HEADERS]


def test_MailBase():
    the_subject = u'p\xf6stal'
    m = encoding.MailBase()
    
    m['To'] = "testing@localhost"
    m['Subject'] = the_subject

    assert m['To'] == "testing@localhost"
    assert m['TO'] == m['To']
    assert m['to'] == m['To']

    assert m['Subject'] == the_subject
    assert m['subject'] == m['Subject']
    assert m['sUbjeCt'] == m['Subject']
    
    msg = encoding.to_message(m)
    m2 = encoding.from_message(msg)

    assert_equal(len(m), len(m2))

    for k in m:
        assert m[k] == m2[k], "%s: %r != %r" % (k, m[k], m2[k])
    
    for k in m.keys():
        assert k in m
        del m[k]
        assert not k in m

def test_header_to_mime_encoding():
    for i, header in enumerate(DECODED_HEADERS):
        assert_equal(NORMALIZED_HEADERS[i], encoding.header_to_mime_encoding(header))

def test_dumb_shit():
    # this is a sample of possibly the worst case Mutt can produce
    idiot = '=?iso-8859-1?B?SOlhdnkgTel05WwgVW7uY/hk?=\n\t=?iso-8859-1?Q?=E9?='
    should_be = u'H\xe9avy M\xe9t\xe5l Un\xeec\xf8d\xe9'
    assert_equal(encoding.header_from_mime_encoding(idiot), should_be)

def test_header_from_mime_encoding():
    assert not encoding.header_from_mime_encoding(None)
    assert_equal(len(BAD_HEADERS), len(encoding.header_from_mime_encoding(BAD_HEADERS)))
    
    for i, header in enumerate(BAD_HEADERS):
        assert_equal(DECODED_HEADERS[i], encoding.header_from_mime_encoding(header))


def test_to_message_from_message_with_spam():
    mb = mailbox.mbox("tests/spam")
    fails = 0
    total = 0

    for msg in mb:
        try:
            m = encoding.from_message(msg)
            out = encoding.to_message(m)
            assert repr(out)

            m2 = encoding.from_message(out)

            for k in m:
                if '@' in m[k]:
                    assert_equal(parseaddr(m[k]), parseaddr(m2[k]))
                else:
                    assert m[k].strip() == m2[k].strip(), "%s: %r != %r" % (k, m[k], m2[k])

                assert not m[k].startswith(u"=?")
                assert not m2[k].startswith(u"=?")
                assert m.body == m2.body, "Bodies don't match" 

                assert_equal(len(m.parts), len(m2.parts), "Not the same number of parts.")

                for i, part in enumerate(m.parts):
                    assert part.body == m2.parts[i].body, "Part %d isn't the same: %r \nvs\n. %r" % (i, part.body, m2.parts[i].body)
            total += 1
        except encoding.EncodingError, exc:
            fails += 1

    assert fails/total < 0.01, "There were %d failures out of %d total." % (fails, total)


def test_to_file_from_file():
    mb = mailbox.mbox("tests/spam")
    msg = encoding.from_message(mb[0])

    outfile = "run/encoding_test.msg"

    with open(outfile, 'w') as outfp:
        encoding.to_file(msg, outfp)

    with open(outfile) as outfp:
        msg2 = encoding.from_file(outfp)
    
    outdata = open(outfile).read()

    assert_equal(len(msg), len(msg2))
    os.unlink(outfile)


def test_guess_encoding_and_decode():
    for header in DECODED_HEADERS:
        try:
            encoding.guess_encoding_and_decode('ascii', header.encode('utf-8'))
        except encoding.EncodingError:
            pass


def test_attempt_decoding():
    for header in DECODED_HEADERS:
        encoding.attempt_decoding('ascii', header.encode('utf-8'))


def test_properly_decode_header():
    for i, header in enumerate(BAD_HEADERS):
        parsed = encoding.properly_decode_header(header)
        assert_equal(DECODED_HEADERS[i], parsed)


def test_headers_round_trip():
    # round trip the headers to make sure they convert reliably back and forth
    for header in BAD_HEADERS:
        original = encoding.header_from_mime_encoding(header)

        assert original
        assert "=?" not in original and "?=" not in original, "Didn't decode: %r" % (encoding.SCANNER.scan(header),)

        encoded = encoding.header_to_mime_encoding(original)
        assert encoded

        return_original = encoding.header_from_mime_encoding(encoded)
        assert_equal(original, return_original)

        return_encoded = encoding.header_to_mime_encoding(return_original)
        assert_equal(encoded, return_encoded)


def test_MIMEPart():
    text1 = encoding.MIMEPart("text/plain")
    text1.set_payload("The first payload.")
    text2 = encoding.MIMEPart("text/plain")
    text2.set_payload("The second payload.")

    image_data = open("tests/lamson.png").read()
    img1 = encoding.MIMEPart("image/png")
    img1.set_payload(image_data)
    img1.set_param('attachment','', header='Content-Disposition')
    img1.set_param('filename','lamson.png', header='Content-Disposition')
    encoders.encode_base64(img1)
    
    multi = encoding.MIMEPart("multipart/mixed")
    for x in [text1, text2, img1]:
        multi.attach(x)

    mail = encoding.from_message(multi)

    assert mail.parts[0].body == "The first payload."
    assert mail.parts[1].body == "The second payload."
    assert mail.parts[2].body == image_data

    encoding.to_message(mail)


@patch('chardet.detect', new=Mock())
@raises(encoding.EncodingError)
def test_guess_encoding_fails_completely():
    chardet.detect.return_value = {'encoding': None, 'confidence': 0.0}
    encoding.guess_encoding_and_decode('ascii', 'some data', errors='strict')


def test_attach_text():
    mail = encoding.MailBase()
    mail.attach_text("This is some text.", 'text/plain')

    msg = encoding.to_message(mail)
    assert msg.get_payload(0).get_payload() == "This is some text."
    assert encoding.to_string(mail)

    mail.attach_text("<html><body><p>Hi there.</p></body></html>", "text/html")
    msg = encoding.to_message(mail)
    assert len(msg.get_payload()) == 2
    assert encoding.to_string(mail)


def test_attach_file():
    mail = encoding.MailBase()
    png = open("tests/lamson.png").read()
    mail.attach_file("lamson.png", png, "image/png", "attachment")
    msg = encoding.to_message(mail)

    payload = msg.get_payload(0)
    assert payload.get_payload(decode=True) == png
    assert payload.get_filename() == "lamson.png", payload.get_filename()



def test_content_encoding_headers_are_maintained():
    inmail = encoding.from_file(open("tests/signed.msg"))

    ctype, ctype_params = inmail.content_encoding['Content-Type']

    assert_equal(ctype, 'multipart/signed')

    # these have to be maintained
    for key in ['protocol', 'micalg']:
        assert key in ctype_params

    # these get removed
    for key in encoding.CONTENT_ENCODING_REMOVED_PARAMS:
        assert key not in ctype_params

    outmsg = encoding.to_message(inmail)
    ctype, ctype_params = encoding.parse_parameter_header(outmsg, 'Content-Type')
    for key in ['protocol', 'micalg']:
        assert key in ctype_params, key


def test_odd_content_type_with_charset():
    mail = encoding.MailBase()
    mail.body = u"p\xf6stal".encode('utf-8')
    mail.content_encoding['Content-Type'] = ('application/plain', {'charset': 'utf-8'})

    msg = encoding.to_string(mail)
    assert msg

def test_specially_borked_lua_message():
    assert encoding.from_file(open("tests/borked.msg"))

def raises_TypeError(*args):
    raise TypeError()

@patch('lamson.encoding.MIMEPart.__init__')
@raises(encoding.EncodingError)
def test_to_message_encoding_error(mp_init):
    mp_init.side_effect = raises_TypeError
    test = encoding.from_file(open("tests/borked.msg"))
    msg = encoding.to_message(test)

def raises_UnicodeError(*args):
    raise UnicodeError()

@raises(encoding.EncodingError)
def test_guess_encoding_and_decode_unicode_error():
    data = Mock()
    data.__str__ = Mock()
    data.__str__.return_value = u"\0\0"
    data.decode.side_effect = raises_UnicodeError
    encoding.guess_encoding_and_decode("ascii", data)
    
def test_attempt_decoding_with_bad_encoding_name():
    assert_equal("test", encoding.attempt_decoding("asdfasdf", "test"))

@raises(encoding.EncodingError)
def test_apply_charset_to_header_with_bad_encoding_char():
    encoding.apply_charset_to_header('ascii', 'X', 'bad')

def test_odd_roundtrip_bug():
    decoded_addrs=[u'"\u0414\u0435\u043b\u043e\u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0441\u0442\u0432\u043e" <daniel@specelec.com>',
                   u'"\u8003\u53d6\u5206\u4eab" <Ernest.Beard@msa.hinet.net>',
                   u'"Exquisite Replica"\n\t<wolfem@barnagreatlakes.com>',]

    for decoded in decoded_addrs:
        encoded = encoding.header_to_mime_encoding(decoded)
        assert '<' in encoded and '"' in encoded, "Address wasn't encoded correctly:\n%s" % encoded