1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
|
""" This module contains functions for cleaning experimental data.
This includes removing NaN values, removing repeated values, and sorting.
The data can either be in x/y format (i.e., two arrays of equal length) or in
matrix form (i.e., a matrix with two columns).
"""
import numpy as np
# Basic functions to clean x/y data ------------------------------------------
def remove_nans_xy(x, y):
"""Remove NaNs from x/y data.
Args:
x (ndarray): x array
y (ndarray): y array
Returns:
x/y data with NaNs removed
"""
mask = np.invert(np.isnan(x)) & \
np.invert(np.isnan(y))
return x[mask], y[mask]
def sort_xy(x, y):
"""Sort x/y data by the x values.
Args:
x (ndarray): x array
y (ndarray): y array
Returns:
x/y data sorted by x
"""
idx = x.argsort()
return x[idx], y[idx]
def remove_doubles_xy(x, y, check=True):
"""Given x/y data, remove double values of x.
This function assumes that the data is already sorted by x!
Args:
x (ndarray): x array
y (ndarray): y array
check (bool): check that x is sorted
Returns:
x/y data with doubles values of x removed
"""
# Check to see if x is sorted
if check:
assert (x[1:] - x[:-1]).min() >= 0
# Find doubles
mask = np.ones(len(x), dtype=bool)
mask[1:] = (x[1:] != x[:-1])
return x[mask], y[mask]
def clean_xy(x, y):
"""Clean x/y data.
Remove NaNs, sort by x, remove double values for x.
Args:
x (ndarray): x data
y (ndarray): y data
Returns:
Cleaned x/y data
"""
assert len(x) == len(y)
x, y = remove_nans_xy(x, y)
x, y = sort_xy(x, y)
x, y = remove_doubles_xy(x, y)
return x, y
def xy_to_matrix(x, y):
"""Take x/y data in separate arrays and combine into a matrix.
Args:
x (ndarray): x data
y (ndarray): y data
Returns:
Matrix of x/y data
"""
return np.vstack((x, y)).T
# Basic functions to clean x/y data in matrix form ---------------------------
# Assuming that the matrix is in 2-column form
def remove_nans_matrix(matrix):
"""Remove all NaN values data from a matrix
Args:
matrix (ndarray): 2-column matrix
Returns:
2-column matrix with NaNs removed
"""
mask = np.invert(np.isnan(matrix[:, 0])) & \
np.invert(np.isnan(matrix[:, 1]))
return matrix[mask]
def sort_matrix(matrix, col=0):
"""Sort a 2D matrix by a specific column.
Args:
matrix (ndarray): 2-column matrix
col (int): column to sort by
Returns:
2-column matrix sorted by the given column
"""
idx = matrix[:, col].argsort()
return matrix[idx]
def remove_doubles_matrix(matrix, col=0, check=True):
"""Remove double values from 2-column matrix.
Args:
matrix: 2-column matrix
col: column to remove doubles from (default 0)
check (bool): check that x data is sorted
Returns:
2-column matrix with double values of given column removed
"""
if check:
# Check to see if x is sorted
assert (matrix[1:, 0] - matrix[:-1, 0]).min() >= 0
column = matrix[:, col]
mask = np.ones_like(column, dtype=bool)
mask[1:] = (column[1:] != column[:-1])
return matrix[mask, :]
def clean_matrix(matrix):
"""Clean 2D matrix data.
Remove NaNs, sort by first column, remove double values for first column.
Args:
matrix (ndarray): 2-column matrix
Returns:
Cleaned 2-column matrix
"""
assert matrix.shape[1] == 2, "Matrix should only have 2 columns."
matrix = remove_nans_matrix(matrix)
matrix = sort_matrix(matrix)
matrix = remove_doubles_matrix(matrix)
return matrix
def matrix_to_xy(matrix):
"""Convert matrix into x/y data.
Args:
matrix (ndarray): 2-column matrix
Returns:
x/y data
"""
return matrix[:, 0], matrix[:, 1]
|