1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
|
#! /usr/bin/env python
import persalys
import openturns as ot
import openturns.testing
import math
sample = ot.Sample(0, 3)
sample.add([4, 2, 4])
sample.add([2, math.nan, 4])
sample.add([2, 3, 7])
sample.add([8, 7, 3])
sample.add([math.inf, math.nan, 7])
clean = persalys.DataCleaning(sample)
openturns.testing.assert_almost_equal(clean.getMean(), [4, 4, 5])
openturns.testing.assert_almost_equal(clean.getMedian(), [3, 3, 4])
# remove points containing Nans/Infs
clean.removeAllNans()
openturns.testing.assert_almost_equal(
clean.getSample(), [[4, 2, 4], [2, 3, 7], [8, 7, 3]]
)
clean = persalys.DataCleaning(sample)
# replace Nans/Infs with zeros
clean.replaceAllNans([0, 0, 0])
openturns.testing.assert_almost_equal(
clean.getSample(), [[4, 2, 4], [2, 0, 4], [2, 3, 7], [8, 7, 3], [0, 0, 7]]
)
clean = persalys.DataCleaning(sample)
# replace Nans/Infs with medians
clean.replaceAllNans(clean.getMedian())
openturns.testing.assert_almost_equal(
clean.getSample(), [[4, 2, 4], [2, 3, 4], [2, 3, 7], [8, 7, 3], [3, 3, 7]]
)
clean.computeGeometricMAD()
openturns.testing.assert_almost_equal(clean.getMAD(), [1, 0, 1])
openturns.testing.assert_almost_equal(clean.getGeometricMAD(), 1.41421)
clean = persalys.DataCleaning(sample)
# replace Nans/Infs with means
clean.replaceAllNans(clean.getMean())
openturns.testing.assert_almost_equal(
clean.getSample(), [[4, 2, 4], [2, 4, 4], [2, 3, 7], [8, 7, 3], [4, 4, 7]]
)
# compute MAD
clean.computeGeometricMAD()
openturns.testing.assert_almost_equal(clean.getMAD(), [1, 1, 1])
openturns.testing.assert_almost_equal(clean.getGeometricMAD(), 1.73205)
clean = persalys.DataCleaning(sample)
# replace Nans by column
clean.replaceNansByColumn(0, 0)
clean.replaceNansByColumn(1, 0)
openturns.testing.assert_almost_equal(
clean.getSample(), [[4, 2, 4], [2, 0, 4], [2, 3, 7], [8, 7, 3], [0, 0, 7]]
)
clean = persalys.DataCleaning(sample)
# remove point / replace values column by column
clean.removeNansByColumn(0)
clean.replaceNansByColumn(1, -2)
openturns.testing.assert_almost_equal(
clean.getSample(), [[4, 2, 4], [2, -2, 4], [2, 3, 7], [8, 7, 3]]
)
|