1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
|
{-# LANGUAGE FlexibleContexts, Rank2Types, ScopedTypeVariables #-}
-- | Student's T-test is for assessing whether two samples have
-- different mean. This module contain several variations of
-- T-test. It's a parametric tests and assumes that samples are
-- normally distributed.
module Statistics.Test.StudentT
(
studentTTest
, welchTTest
, pairedTTest
, module Statistics.Test.Types
) where
import Statistics.Distribution hiding (mean)
import Statistics.Distribution.StudentT
import Statistics.Sample (mean, varianceUnbiased)
import Statistics.Test.Types
import Statistics.Types (mkPValue,PValue)
import Statistics.Function (square)
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U
import qualified Data.Vector.Storable as S
import qualified Data.Vector as V
-- | Two-sample Student's t-test. It assumes that both samples are
-- normally distributed and have same variance. Returns @Nothing@ if
-- sample sizes are not sufficient.
studentTTest :: (G.Vector v Double)
=> PositionTest -- ^ one- or two-tailed test
-> v Double -- ^ Sample A
-> v Double -- ^ Sample B
-> Maybe (Test StudentT)
studentTTest test sample1 sample2
| G.length sample1 < 2 || G.length sample2 < 2 = Nothing
| otherwise = Just Test
{ testSignificance = significance test t ndf
, testStatistics = t
, testDistribution = studentT ndf
}
where
(t, ndf) = tStatistics True sample1 sample2
{-# INLINABLE studentTTest #-}
{-# SPECIALIZE studentTTest :: PositionTest -> U.Vector Double -> U.Vector Double -> Maybe (Test StudentT) #-}
{-# SPECIALIZE studentTTest :: PositionTest -> S.Vector Double -> S.Vector Double -> Maybe (Test StudentT) #-}
{-# SPECIALIZE studentTTest :: PositionTest -> V.Vector Double -> V.Vector Double -> Maybe (Test StudentT) #-}
-- | Two-sample Welch's t-test. It assumes that both samples are
-- normally distributed but doesn't assume that they have same
-- variance. Returns @Nothing@ if sample sizes are not sufficient.
welchTTest :: (G.Vector v Double)
=> PositionTest -- ^ one- or two-tailed test
-> v Double -- ^ Sample A
-> v Double -- ^ Sample B
-> Maybe (Test StudentT)
welchTTest test sample1 sample2
| G.length sample1 < 2 || G.length sample2 < 2 = Nothing
| otherwise = Just Test
{ testSignificance = significance test t ndf
, testStatistics = t
, testDistribution = studentT ndf
}
where
(t, ndf) = tStatistics False sample1 sample2
{-# INLINABLE welchTTest #-}
{-# SPECIALIZE welchTTest :: PositionTest -> U.Vector Double -> U.Vector Double -> Maybe (Test StudentT) #-}
{-# SPECIALIZE welchTTest :: PositionTest -> S.Vector Double -> S.Vector Double -> Maybe (Test StudentT) #-}
{-# SPECIALIZE welchTTest :: PositionTest -> V.Vector Double -> V.Vector Double -> Maybe (Test StudentT) #-}
-- | Paired two-sample t-test. Two samples are paired in a
-- within-subject design. Returns @Nothing@ if sample size is not
-- sufficient.
pairedTTest :: forall v. (G.Vector v (Double, Double), G.Vector v Double)
=> PositionTest -- ^ one- or two-tailed test
-> v (Double, Double) -- ^ paired samples
-> Maybe (Test StudentT)
pairedTTest test sample
| G.length sample < 2 = Nothing
| otherwise = Just Test
{ testSignificance = significance test t ndf
, testStatistics = t
, testDistribution = studentT ndf
}
where
(t, ndf) = tStatisticsPaired sample
{-# INLINABLE pairedTTest #-}
{-# SPECIALIZE pairedTTest :: PositionTest -> U.Vector (Double,Double) -> Maybe (Test StudentT) #-}
{-# SPECIALIZE pairedTTest :: PositionTest -> V.Vector (Double,Double) -> Maybe (Test StudentT) #-}
-------------------------------------------------------------------------------
significance :: PositionTest -- ^ one- or two-tailed
-> Double -- ^ t statistics
-> Double -- ^ degree of freedom
-> PValue Double -- ^ p-value
significance test t df =
case test of
-- Here we exploit symmetry of T-distribution and calculate small tail
SamplesDiffer -> mkPValue $ 2 * tailArea (negate (abs t))
AGreater -> mkPValue $ tailArea (negate t)
BGreater -> mkPValue $ tailArea t
where
tailArea = cumulative (studentT df)
-- Calculate T statistics for two samples
tStatistics :: (G.Vector v Double)
=> Bool -- variance equality
-> v Double
-> v Double
-> (Double, Double)
{-# INLINE tStatistics #-}
tStatistics varequal sample1 sample2 = (t, ndf)
where
-- t-statistics
t = (m1 - m2) / sqrt (
if varequal
then ((n1 - 1) * s1 + (n2 - 1) * s2) / (n1 + n2 - 2) * (1 / n1 + 1 / n2)
else s1 / n1 + s2 / n2)
-- degree of freedom
ndf | varequal = n1 + n2 - 2
| otherwise = square (s1 / n1 + s2 / n2)
/ (square s1 / (square n1 * (n1 - 1)) + square s2 / (square n2 * (n2 - 1)))
-- statistics of two samples
n1 = fromIntegral $ G.length sample1
n2 = fromIntegral $ G.length sample2
m1 = mean sample1
m2 = mean sample2
s1 = varianceUnbiased sample1
s2 = varianceUnbiased sample2
-- Calculate T-statistics for paired sample
tStatisticsPaired :: (G.Vector v (Double, Double))
=> v (Double, Double)
-> (Double, Double)
{-# INLINE tStatisticsPaired #-}
tStatisticsPaired sample = (t, ndf)
where
-- t-statistics
t = let d = U.map (uncurry (-)) $ G.convert sample
sumd = U.sum d
in sumd / sqrt ((n * U.sum (U.map square d) - square sumd) / ndf)
-- degree of freedom
ndf = n - 1
n = fromIntegral $ G.length sample
|