File: clean_data.py

package info (click to toggle)
python-qmix 1.0.6-11
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 9,460 kB
  • sloc: python: 4,312; makefile: 215
file content (208 lines) | stat: -rw-r--r-- 4,050 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
""" This module contains functions for cleaning experimental data.

This includes removing NaN values, removing repeated values, and sorting.

The data can either be in x/y format (i.e., two arrays of equal length) or in
matrix form (i.e., a matrix with two columns).

"""

import numpy as np


# Basic functions to clean x/y data ------------------------------------------

def remove_nans_xy(x, y):
    """Remove NaNs from x/y data.

    Args:
        x (ndarray): x array
        y (ndarray): y array

    Returns:
        x/y data with NaNs removed

    """

    mask = np.invert(np.isnan(x)) & \
           np.invert(np.isnan(y))

    return x[mask], y[mask]


def sort_xy(x, y):
    """Sort x/y data by the x values.

    Args:
        x (ndarray): x array
        y (ndarray): y array

    Returns:
        x/y data sorted by x

    """

    idx = x.argsort()

    return x[idx], y[idx]


def remove_doubles_xy(x, y, check=True):
    """Given x/y data, remove double values of x.

    This function assumes that the data is already sorted by x!

    Args:
        x (ndarray): x array
        y (ndarray): y array
        check (bool): check that x is sorted

    Returns:
        x/y data with doubles values of x removed

    """

    # Check to see if x is sorted
    if check:
        assert (x[1:] - x[:-1]).min() >= 0

    # Find doubles
    mask = np.ones(len(x), dtype=bool)
    mask[1:] = (x[1:] != x[:-1])

    return x[mask], y[mask]


def clean_xy(x, y):
    """Clean x/y data.

    Remove NaNs, sort by x, remove double values for x.

    Args:
        x (ndarray): x data
        y (ndarray): y data

    Returns:
        Cleaned x/y data

    """

    assert len(x) == len(y)

    x, y = remove_nans_xy(x, y)
    x, y = sort_xy(x, y)
    x, y = remove_doubles_xy(x, y)

    return x, y


def xy_to_matrix(x, y):
    """Take x/y data in separate arrays and combine into a matrix.

    Args:
        x (ndarray): x data
        y (ndarray): y data

    Returns:
        Matrix of x/y data

    """

    return np.vstack((x, y)).T


# Basic functions to clean x/y data in matrix form ---------------------------
# Assuming that the matrix is in 2-column form

def remove_nans_matrix(matrix):
    """Remove all NaN values data from a matrix

    Args:
        matrix (ndarray): 2-column matrix

    Returns:
        2-column matrix with NaNs removed

    """

    mask = np.invert(np.isnan(matrix[:, 0])) & \
           np.invert(np.isnan(matrix[:, 1]))

    return matrix[mask]


def sort_matrix(matrix, col=0):
    """Sort a 2D matrix by a specific column.

    Args:
        matrix (ndarray): 2-column matrix
        col (int): column to sort by

    Returns:
        2-column matrix sorted by the given column

    """

    idx = matrix[:, col].argsort()

    return matrix[idx]


def remove_doubles_matrix(matrix, col=0, check=True):
    """Remove double values from 2-column matrix.

    Args:
        matrix: 2-column matrix
        col: column to remove doubles from (default 0)
        check (bool): check that x data is sorted

    Returns: 
        2-column matrix with double values of given column removed

    """

    if check:
        # Check to see if x is sorted
        assert (matrix[1:, 0] - matrix[:-1, 0]).min() >= 0

    column = matrix[:, col]
    mask = np.ones_like(column, dtype=bool)
    mask[1:] = (column[1:] != column[:-1])

    return matrix[mask, :]


def clean_matrix(matrix):
    """Clean 2D matrix data.

    Remove NaNs, sort by first column, remove double values for first column.

    Args:
        matrix (ndarray): 2-column matrix

    Returns:
        Cleaned 2-column matrix

    """

    assert matrix.shape[1] == 2, "Matrix should only have 2 columns."

    matrix = remove_nans_matrix(matrix)
    matrix = sort_matrix(matrix)
    matrix = remove_doubles_matrix(matrix)

    return matrix


def matrix_to_xy(matrix):
    """Convert matrix into x/y data.

    Args:
        matrix (ndarray): 2-column matrix

    Returns:
        x/y data

    """

    return matrix[:, 0], matrix[:, 1]