1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
|
"""8-bit string definitions for Python 2/3 compatibility
Defines the following which allow for dealing with Python 3 breakages:
STR_IS_BYTES
STR_IS_UNICODE
Easily checked booleans for type identities
_NULL_8_BYTE
An 8-bit byte with NULL (0) value
as_8_bit( x, encoding='utf-8')
Returns the value as the 8-bit version
unicode -- always pointing to the unicode type
bytes -- always pointing to the 8-bit bytes type
"""
import sys
STR_IS_BYTES = True
if sys.version_info[:2] < (2,6):
# no bytes, traditional setup...
bytes = str
else:
bytes = bytes
try:
long = long
except NameError as err:
long = int
if sys.version_info[:2] < (3,0):
# traditional setup, with bytes defined...
unicode = unicode
_NULL_8_BYTE = '\000'
def as_8_bit( x, encoding='utf-8' ):
if isinstance( x, unicode ):
return x.encode( encoding )
return bytes( x )
integer_types = int,long
def as_str( x, encoding='utf-8'):
"""Produce a native string (i.e. different on python 2 and 3)"""
if isinstance(x,bytes):
return x
elif isinstance(x,unicode):
return x.encode(encoding)
else:
return str(x)
else:
# new setup, str is now unicode...
STR_IS_BYTES = False
_NULL_8_BYTE = bytes( '\000','latin1' )
def as_8_bit( x, encoding='utf-8' ):
if isinstance( x,unicode ):
return x.encode(encoding)
elif isinstance( x, bytes ):
# Note: this can create an 8-bit string that is *not* in encoding,
# but that is potentially exactly what we wanted, as these can
# be arbitrary byte-streams being passed to C functions
return x
return str(x).encode( encoding )
unicode = str
integer_types = int,
def as_str( x, encoding='utf-8'):
"""Produce a native string (i.e. different on python 2 and 3)"""
if isinstance(x,unicode):
return x
elif isinstance(x,bytes):
return x.decode(encoding)
else:
return str(x)
STR_IS_UNICODE = not STR_IS_BYTES
if hasattr( sys, 'maxsize' ):
maxsize = sys.maxsize
else:
maxsize = sys.maxint
def as_unicode(x,encoding='utf-8'):
"""Ensure is a unicode object given default encoding"""
if isinstance(x,unicode):
return x
elif isinstance(x,bytes):
try:
return x.decode(encoding)
except UnicodeDecodeError as err:
return x.decode('latin-1')
else:
return unicode(x)
|