File: t_DataCleaning_std.py

package info (click to toggle)
persalys 19.1%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 46,900 kB
  • sloc: xml: 97,263; cpp: 61,701; python: 4,109; sh: 397; makefile: 84
file content (70 lines) | stat: -rw-r--r-- 2,124 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#! /usr/bin/env python

import persalys
import openturns as ot
import openturns.testing
import math

sample = ot.Sample(0, 3)
sample.add([4, 2, 4])
sample.add([2, math.nan, 4])
sample.add([2, 3, 7])
sample.add([8, 7, 3])
sample.add([math.inf, math.nan, 7])


clean = persalys.DataCleaning(sample)
openturns.testing.assert_almost_equal(clean.getMean(), [4, 4, 5])
openturns.testing.assert_almost_equal(clean.getMedian(), [3, 3, 4])

# remove points containing Nans/Infs
clean.removeAllNans()
openturns.testing.assert_almost_equal(
    clean.getSample(), [[4, 2, 4], [2, 3, 7], [8, 7, 3]]
)

clean = persalys.DataCleaning(sample)
# replace Nans/Infs with zeros
clean.replaceAllNans([0, 0, 0])
openturns.testing.assert_almost_equal(
    clean.getSample(), [[4, 2, 4], [2, 0, 4], [2, 3, 7], [8, 7, 3], [0, 0, 7]]
)

clean = persalys.DataCleaning(sample)
# replace Nans/Infs with medians
clean.replaceAllNans(clean.getMedian())
openturns.testing.assert_almost_equal(
    clean.getSample(), [[4, 2, 4], [2, 3, 4], [2, 3, 7], [8, 7, 3], [3, 3, 7]]
)

clean.computeGeometricMAD()
openturns.testing.assert_almost_equal(clean.getMAD(), [1, 0, 1])
openturns.testing.assert_almost_equal(clean.getGeometricMAD(), 1.41421)

clean = persalys.DataCleaning(sample)
# replace Nans/Infs with means
clean.replaceAllNans(clean.getMean())
openturns.testing.assert_almost_equal(
    clean.getSample(), [[4, 2, 4], [2, 4, 4], [2, 3, 7], [8, 7, 3], [4, 4, 7]]
)

# compute MAD
clean.computeGeometricMAD()
openturns.testing.assert_almost_equal(clean.getMAD(), [1, 1, 1])
openturns.testing.assert_almost_equal(clean.getGeometricMAD(), 1.73205)

clean = persalys.DataCleaning(sample)
# replace Nans by column
clean.replaceNansByColumn(0, 0)
clean.replaceNansByColumn(1, 0)
openturns.testing.assert_almost_equal(
    clean.getSample(), [[4, 2, 4], [2, 0, 4], [2, 3, 7], [8, 7, 3], [0, 0, 7]]
)

clean = persalys.DataCleaning(sample)
# remove point / replace values column by column
clean.removeNansByColumn(0)
clean.replaceNansByColumn(1, -2)
openturns.testing.assert_almost_equal(
    clean.getSample(), [[4, 2, 4], [2, -2, 4], [2, 3, 7], [8, 7, 3]]
)