1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
|
# coding:utf-8
from nose.tools import eq_
from mock import *
from flanker.mime.message.headers import encodedword
from flanker.mime.message import utils
from flanker.mime.message import errors, charsets
from flanker.addresslib.address import parse
def encoded_word_test():
def t(value):
m = encodedword._RE_ENCODED_WORD.match(value)
return (m.group('charset'), m.group('encoding'), m.group('encoded'))
r = t('=?utf-8?B?U2ltcGxlIHRleHQuIEhvdyBhcmUgeW91PyDQmtCw0Log0YLRiyDQv9C+0LY=?=')
eq_(r[0], 'utf-8')
eq_(r[1], 'B')
eq_(r[2], 'U2ltcGxlIHRleHQuIEhvdyBhcmUgeW91PyDQmtCw0Log0YLRiyDQv9C+0LY=')
r = t('=?UTF-8?Q?=D1=80=D1=83=D1=81=D1=81=D0=BA=D0=B8=D0=B9?=')
eq_(r[0], 'UTF-8')
eq_(r[1], 'Q')
eq_(r[2], '=D1=80=D1=83=D1=81=D1=81=D0=BA=D0=B8=D0=B9')
r = t('=?iso-8859-1?q?this=20is=20some=20text?=')
eq_(r[0], 'iso-8859-1')
eq_(r[1], 'q')
eq_(r[2], 'this=20is=20some=20text')
def unfold_test():
u = encodedword.unfold
eq_('\t\t\t', u('\n\r\t\t\t'))
eq_('\t\t\t', u('\n\t\t\t'))
eq_(' ', u('\n\r '))
eq_(' ', u('\r\n '))
eq_(' ', u('\n '))
eq_(' ', u('\r '))
eq_(' \t', u('\n\r \t'))
def happy_mime_to_unicode_test():
v = """ =?utf-8?B?U2ltcGxlIHRleHQuIEhvdyBhcmUgeW91PyDQmtCw0Log0YLRiyDQv9C+0LY=?=\n =?utf-8?B?0LjQstCw0LXRiNGMPw==?="""
eq_(u'Simple text. How are you? Как ты поживаешь?', encodedword.mime_to_unicode(v))
v = ' =?US-ASCII?Q?Foo?= <foo@example.com>'
eq_(u'Foo <foo@example.com>', encodedword.mime_to_unicode(v))
v = '''=?UTF-8?Q?=D1=80=D1=83=D1=81=D1=81=D0=BA=D0=B8=D0=B9?=\n =?UTF-8?Q?_=D0=B8?= english112 =?UTF-8?Q?=D1=81=D0=B0=D0=B1=D0=B6?= subject'''
eq_(u'русский и english112 сабж subject', encodedword.mime_to_unicode(v))
v = '=?iso-8859-1?B?SOlhdnkgTel05WwgVW7uY/hk?=\n\t=?iso-8859-1?Q?=E9?='
eq_(u'Héavy Métål Unîcødé', encodedword.mime_to_unicode(v))
def lying_encodings_mime_to_unicode_test():
v = '''=?US-ASCII?Q?=D1=80=D1=83=D1=81=D1=81=D0=BA=D0=B8=D0=B9?=\n english112 =?UTF-8?Q?=D1=81=D0=B0=D0=B1=D0=B6?= subject'''
eq_(u'русский english112 сабж subject', encodedword.mime_to_unicode(v))
def missing_padding_mime_to_unicode_test():
v = """ =?utf-8?B?U2ltcGxlIHRleHQuIEhvdyBhcmUgeW91PyDQmtCw0Log0YLRiyDQv9C+0LY?=\n =?utf-8?B?0LjQstCw0LXRiNGMPw?="""
eq_(u'Simple text. How are you? Как ты поживаешь?', encodedword.mime_to_unicode(v))
def neutral_headings_test():
v = '''from mail-iy0-f179.google.com (mail-iy0-f179.google.com
\t[209.85.210.179])
\tby mxa.mailgun.org (Postfix) with ESMTP id 2D0D3F01116
\tfor <alex@mailgun.net>; Fri, 17 Dec 2010 12:50:07 +0000 (UTC)'''
eq_(u'from mail-iy0-f179.google.com (mail-iy0-f179.google.com\t[209.85.210.179])\tby mxa.mailgun.org (Postfix) with ESMTP id 2D0D3F01116\tfor <alex@mailgun.net>; Fri, 17 Dec 2010 12:50:07 +0000 (UTC)', encodedword.mime_to_unicode(v))
v = '''multipart/mixed; boundary="===============7553021138737466228=="'''
eq_(v, encodedword.mime_to_unicode(v))
def outlook_encodings_test():
v = '''=?koi8-r?B?/NTPINPPz8Ldxc7JxSDTIMTMyc7O2c0g08HC1sXL1M/NINPQxcPJwQ==?=
=?koi8-r?B?zNjOzyDe1M/C2SDQ0s/XxdLJ1Nggy8/EydLP18vJ?='''
eq_(u"Это сообщение с длинным сабжектом специально чтобы проверить кодировки", encodedword.mime_to_unicode(v))
def gmail_encodings_test():
v = ''' =?KOI8-R?B?/NTPINPPz8Ldxc7JxSDTIMTMyc7O2c0g08HC1g==?=
=?KOI8-R?B?xcvUz80g09DFw8nBzNjOzyDe1M/C2SDQ0s/XxdLJ1A==?=
=?KOI8-R?B?2CDLz8TJ0s/Xy8k=?='''
eq_(u"Это сообщение с длинным сабжектом специально чтобы проверить кодировки", encodedword.mime_to_unicode(v))
def aol_encodings_test():
v = ''' =?utf-8?Q?=D0=AD=D1=82=D0=BE_=D1=81=D0=BE=D0=BE=D0=B1=D1=89=D0=B5=D0=BD?=
=?utf-8?Q?=D0=B8=D0=B5_=D1=81_=D0=B4=D0=BB=D0=B8=D0=BD=D0=BD=D1=8B=D0=BC?=
=?utf-8?Q?_=D1=81=D0=B0=D0=B1=D0=B6=D0=B5=D0=BA=D1=82=D0=BE=D0=BC_=D1=81?=
=?utf-8?Q?=D0=BF=D0=B5=D1=86=D0=B8=D0=B0=D0=BB=D1=8C=D0=BD=D0=BE_=D1=87?=
=?utf-8?Q?=D1=82=D0=BE=D0=B1=D1=8B_=D0=BF=D1=80=D0=BE=D0=B2=D0=B5=D1=80?=
=?utf-8?Q?=D0=B8=D1=82=D1=8C_=D0=BA=D0=BE=D0=B4=D0=B8=D1=80=D0=BE=D0=B2?=
=?utf-8?Q?=D0=BA=D0=B8?='''
eq_(u"Это сообщение с длинным сабжектом специально чтобы проверить кодировки", encodedword.mime_to_unicode(v))
def yahoo_encodings_test():
v = '''
=?utf-8?B?0K3RgtC+INGB0L7QvtCx0YnQtdC90LjQtSDRgSDQtNC70LjQvdC90YvQvCA=?=
=?utf-8?B?0YHQsNCx0LbQtdC60YLQvtC8INGB0L/QtdGG0LjQsNC70YzQvdC+INGH0YI=?=
=?utf-8?B?0L7QsdGLINC/0YDQvtCy0LXRgNC40YLRjCDQutC+0LTQuNGA0L7QstC60Lg=?='''
eq_(u"Это сообщение с длинным сабжектом специально чтобы проверить кодировки", encodedword.mime_to_unicode(v))
def hotmail_encodings_test():
v = ''' =?koi8-r?B?/NTPINPPz8LdxQ==?= =?koi8-r?B?zsnFINMgxMzJzg==?=
=?koi8-r?B?ztnNINPBwtbFyw==?= =?koi8-r?B?1M/NINPQxcPJwQ==?=
=?koi8-r?B?zNjOzyDe1M/C2Q==?= =?koi8-r?B?INDSz9fF0snU2A==?=
=?koi8-r?B?IMvPxMnSz9fLyQ==?='''
eq_(u"Это сообщение с длинным сабжектом специально чтобы проверить кодировки", encodedword.mime_to_unicode(v))
def various_encodings_test():
v = '"=?utf-8?b?6ICD5Y+W5YiG5Lqr?=" <foo@example.com>'
eq_(u'"考取分享" <foo@example.com>', encodedword.mime_to_unicode(v))
v = """=?UTF-8?B?0JbQtdC60LA=?= <ev@mailgun.net>, =?UTF-8?B?0JrQvtC90YbQtdCy0L7QuQ==?= <eugueny@gmail.com>"""
eq_(u"Жека <ev@mailgun.net>, Концевой <eugueny@gmail.com>", encodedword.mime_to_unicode(v))
v = encodedword.mime_to_unicode("=?utf-8?b?0JrQvtC90YbQtdCy0L7QuQ==?= <ev@host.com>, Bob <bob@host.com>, =?utf-8?b?0JLQuNC90YE=?= <vince@host.com>")
eq_(u"Концевой <ev@host.com>, Bob <bob@host.com>, Винс <vince@host.com>", v)
v = '=?UTF-8?B?0J/RgNC+0LLQtdGA0Y/QtdC8INGA0YPRgdGB0LrQuNC1INGB0LDQsdC2?=\n =?UTF-8?B?0LXQutGC0Ysg0Lgg0Y7QvdC40LrQvtC0IOKYoA==?='
eq_(u'Проверяем русские сабжекты и юникод ☠', encodedword.mime_to_unicode(v))
v = '=?UTF-8?B?0J/RgNC+0LLQtdGA0Y/QtdC8INGA0YPRgdGB0LrQuNC1INGB0LDQsdC2?=\r\n =?UTF-8?B?0LXQutGC0Ysg0Lgg0Y7QvdC40LrQvtC0IOKYoA==?='
eq_(u'Проверяем русские сабжекты и юникод ☠', encodedword.mime_to_unicode(v))
v = '=?utf-8?Q?Evaneos-Concepci=C3=B3n.pdf?='
eq_(u'Evaneos-Concepción.pdf', encodedword.mime_to_unicode(v))
v = u'=?gb2312?Q?Hey_There=D7=B2=D8=B0?='
eq_(u'Hey There撞匕', encodedword.mime_to_unicode(v))
v = u'=?gb18030?Q?Hey_There=D7=B2=D8=B0?='
eq_(u'Hey There撞匕', encodedword.mime_to_unicode(v))
v = parse(u'Тест длинного дисплей нейма <test@example.com>')
eq_(v.display_name, encodedword.mime_to_unicode(v.ace_display_name))
@patch.object(utils, '_guess_and_convert', Mock(side_effect=errors.EncodingError()))
def test_convert_to_utf8_unknown_encoding():
eq_(u"abc\u20acdef",
charsets.convert_to_unicode("windows-874", b"abc\x80def"))
eq_(u"qwe",
charsets.convert_to_unicode('X-UNKNOWN', u'qwe'))
eq_(u"qwe",
charsets.convert_to_unicode('ru_RU.KOI8-R', 'qwe'))
eq_(u"qwe",
charsets.convert_to_unicode('"utf-8"; format="flowed"', 'qwe'))
@patch.object(encodedword, 'unfold', Mock(side_effect=Exception))
def test_error_reporting():
eq_("Sasha", encodedword.mime_to_unicode("Sasha"))
|