1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
|
import numpy as np
import pandas.util.testing as tm
from pandas import DataFrame, date_range, timedelta_range, concat, read_json
from ..pandas_vb_common import setup, BaseIO # noqa
class ReadJSON(BaseIO):
goal_time = 0.2
fname = "__test__.json"
params = (['split', 'index', 'records'], ['int', 'datetime'])
param_names = ['orient', 'index']
def setup(self, orient, index):
N = 100000
indexes = {'int': np.arange(N),
'datetime': date_range('20000101', periods=N, freq='H')}
df = DataFrame(np.random.randn(N, 5),
columns=['float_{}'.format(i) for i in range(5)],
index=indexes[index])
df.to_json(self.fname, orient=orient)
def time_read_json(self, orient, index):
read_json(self.fname, orient=orient)
class ReadJSONLines(BaseIO):
goal_time = 0.2
fname = "__test_lines__.json"
params = ['int', 'datetime']
param_names = ['index']
def setup(self, index):
N = 100000
indexes = {'int': np.arange(N),
'datetime': date_range('20000101', periods=N, freq='H')}
df = DataFrame(np.random.randn(N, 5),
columns=['float_{}'.format(i) for i in range(5)],
index=indexes[index])
df.to_json(self.fname, orient='records', lines=True)
def time_read_json_lines(self, index):
read_json(self.fname, orient='records', lines=True)
def time_read_json_lines_concat(self, index):
concat(read_json(self.fname, orient='records', lines=True,
chunksize=25000))
def peakmem_read_json_lines(self, index):
read_json(self.fname, orient='records', lines=True)
def peakmem_read_json_lines_concat(self, index):
concat(read_json(self.fname, orient='records', lines=True,
chunksize=25000))
class ToJSON(BaseIO):
goal_time = 0.2
fname = "__test__.json"
params = ['split', 'columns', 'index']
param_names = ['orient']
def setup(self, lines_orient):
N = 10**5
ncols = 5
index = date_range('20000101', periods=N, freq='H')
timedeltas = timedelta_range(start=1, periods=N, freq='s')
datetimes = date_range(start=1, periods=N, freq='s')
ints = np.random.randint(100000000, size=N)
floats = np.random.randn(N)
strings = tm.makeStringIndex(N)
self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N))
self.df_date_idx = DataFrame(np.random.randn(N, ncols), index=index)
self.df_td_int_ts = DataFrame({'td_1': timedeltas,
'td_2': timedeltas,
'int_1': ints,
'int_2': ints,
'ts_1': datetimes,
'ts_2': datetimes},
index=index)
self.df_int_floats = DataFrame({'int_1': ints,
'int_2': ints,
'int_3': ints,
'float_1': floats,
'float_2': floats,
'float_3': floats},
index=index)
self.df_int_float_str = DataFrame({'int_1': ints,
'int_2': ints,
'float_1': floats,
'float_2': floats,
'str_1': strings,
'str_2': strings},
index=index)
def time_floats_with_int_index(self, orient):
self.df.to_json(self.fname, orient=orient)
def time_floats_with_dt_index(self, orient):
self.df_date_idx.to_json(self.fname, orient=orient)
def time_delta_int_tstamp(self, orient):
self.df_td_int_ts.to_json(self.fname, orient=orient)
def time_float_int(self, orient):
self.df_int_floats.to_json(self.fname, orient=orient)
def time_float_int_str(self, orient):
self.df_int_float_str.to_json(self.fname, orient=orient)
def time_floats_with_int_idex_lines(self, orient):
self.df.to_json(self.fname, orient='records', lines=True)
def time_floats_with_dt_index_lines(self, orient):
self.df_date_idx.to_json(self.fname, orient='records', lines=True)
def time_delta_int_tstamp_lines(self, orient):
self.df_td_int_ts.to_json(self.fname, orient='records', lines=True)
def time_float_int_lines(self, orient):
self.df_int_floats.to_json(self.fname, orient='records', lines=True)
def time_float_int_str_lines(self, orient):
self.df_int_float_str.to_json(self.fname, orient='records', lines=True)
|