1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
|
import warnings
import numpy as np
from pandas import Series
import pandas.util.testing as tm
class Methods(object):
goal_time = 0.2
def setup(self):
self.s = Series(tm.makeStringIndex(10**5))
def time_cat(self):
self.s.str.cat(sep=',')
def time_center(self):
self.s.str.center(100)
def time_count(self):
self.s.str.count('A')
def time_endswith(self):
self.s.str.endswith('A')
def time_extract(self):
with warnings.catch_warnings(record=True):
self.s.str.extract('(\\w*)A(\\w*)')
def time_findall(self):
self.s.str.findall('[A-Z]+')
def time_get(self):
self.s.str.get(0)
def time_len(self):
self.s.str.len()
def time_match(self):
self.s.str.match('A')
def time_pad(self):
self.s.str.pad(100, side='both')
def time_replace(self):
self.s.str.replace('A', '\x01\x01')
def time_slice(self):
self.s.str.slice(5, 15, 2)
def time_startswith(self):
self.s.str.startswith('A')
def time_strip(self):
self.s.str.strip('A')
def time_rstrip(self):
self.s.str.rstrip('A')
def time_lstrip(self):
self.s.str.lstrip('A')
def time_title(self):
self.s.str.title()
def time_upper(self):
self.s.str.upper()
def time_lower(self):
self.s.str.lower()
class Repeat(object):
goal_time = 0.2
params = ['int', 'array']
param_names = ['repeats']
def setup(self, repeats):
N = 10**5
self.s = Series(tm.makeStringIndex(N))
repeat = {'int': 1, 'array': np.random.randint(1, 3, N)}
self.repeat = repeat[repeats]
def time_repeat(self, repeats):
self.s.str.repeat(self.repeat)
class Contains(object):
goal_time = 0.2
params = [True, False]
param_names = ['regex']
def setup(self, regex):
self.s = Series(tm.makeStringIndex(10**5))
def time_contains(self, regex):
self.s.str.contains('A', regex=regex)
class Split(object):
goal_time = 0.2
params = [True, False]
param_names = ['expand']
def setup(self, expand):
self.s = Series(tm.makeStringIndex(10**5)).str.join('--')
def time_split(self, expand):
self.s.str.split('--', expand=expand)
class Dummies(object):
goal_time = 0.2
def setup(self):
self.s = Series(tm.makeStringIndex(10**5)).str.join('|')
def time_get_dummies(self):
self.s.str.get_dummies('|')
class Encode(object):
goal_time = 0.2
def setup(self):
self.ser = Series(tm.makeUnicodeIndex())
def time_encode_decode(self):
self.ser.str.encode('utf-8').str.decode('utf-8')
class Slice(object):
goal_time = 0.2
def setup(self):
self.s = Series(['abcdefg', np.nan] * 500000)
def time_vector_slice(self):
# GH 2602
self.s.str[:5]
|