1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
|
from __future__ import unicode_literals
from .compat import valid_source_type, chr, map, range, zip
import re
def is_valid_type(source):
return isinstance(source, valid_source_type)
def make_char_list(source):
char_list = []
back_slash = False
hyphen = False
for char in source:
if char == '\\':
if not back_slash:
back_slash = True
char_list.append(92)
continue
elif char == '-':
if back_slash: # \-
del char_list[-1]
else:
hyphen = True
continue
elif hyphen:
start = char_list[-1] + 1
char_list += range(start, ord(char))
char_list.append(ord(char))
back_slash = False
hyphen = False
return char_list
def to_unichr(char_list):
return map(chr, char_list)
def squeeze(from_list, source):
for char in from_list:
pattern = '%s{2,}' % re.escape(char)
source = re.sub(pattern, char, source)
return source
def translate(from_list, to_list, source):
translate_dict = dict(zip(from_list, to_list))
return source.translate(translate_dict)
def tr(string1, string2, source, option=''):
"""Replace or remove specific characters.
If not given option, then replace all characters in string1 with
the character in the same position in string2.
Following options are available:
c Replace all complemented characters in string1 with
the character in the same position in string2.
d Delete all characters in string1.
s Squeeze all characters in string1.
cs Squeeze all the characters in string2 besides "c" replacement.
ds Delete all characters in string1. Squeeze all characters
in string2.
cd Delete all complemented characters in string1.
Params:
<unicode> string1
<unicode> string2
<unicode> source
<basestring> option
Return:
<unicode> translated_source
"""
if not is_valid_type(source):
raise TypeError('source must be unicode')
from_list = make_char_list(string1)
if option == 's':
from_list = to_unichr(from_list)
return squeeze(from_list, source)
elif 'c' in option:
from_list = to_unichr(from_list)
from_list = [ord(c) for c in set(source) - set(from_list)]
if 'd' in option:
to_list = [None for i in from_list]
else:
to_list = [string2[-1] for i in from_list]
source = translate(from_list, to_list, source)
if 's' in option:
source = squeeze(to_list, source)
return source
elif 'd' in option:
to_list = [None for i in from_list]
source = translate(from_list, to_list, source)
if 's' in option:
to_list = make_char_list(string2)
to_list = to_unichr(to_list)
source = squeeze(to_list, source)
return source
else:
to_list = make_char_list(string2)
length_diff = (len(from_list) - len(to_list))
if length_diff:
to_list += [to_list[-1]] * length_diff
to_list = to_unichr(to_list)
return translate(from_list, to_list, source)
|