1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
|
{-# LANGUAGE ViewPatterns #-}
-- |
-- Module : Statistics.Test.WilcoxonT
-- Copyright : (c) 2010 Neil Brown
-- License : BSD3
--
-- Maintainer : bos@serpentine.com
-- Stability : experimental
-- Portability : portable
--
-- The Wilcoxon matched-pairs signed-rank test is non-parametric test
-- which could be used to test whether two related samples have
-- different means.
module Statistics.Test.WilcoxonT (
-- * Wilcoxon signed-rank matched-pair test
-- ** Test
wilcoxonMatchedPairTest
-- ** Building blocks
, wilcoxonMatchedPairSignedRank
, wilcoxonMatchedPairSignificant
, wilcoxonMatchedPairSignificance
, wilcoxonMatchedPairCriticalValue
, module Statistics.Test.Types
-- * References
-- $references
) where
--
--
--
-- Note that: wilcoxonMatchedPairSignedRank == (\(x, y) -> (y, x)) . flip wilcoxonMatchedPairSignedRank
-- The samples are zipped together: if one is longer than the other, both are truncated
-- The value returned is the pair (T+, T-). T+ is the sum of positive ranks (the
-- These values mean little by themselves, and should be combined with the 'wilcoxonSignificant'
-- function in this module to get a meaningful result.
-- ranks of the differences where the first parameter is higher) whereas T- is
-- the sum of negative ranks (the ranks of the differences where the second parameter is higher).
-- to the length of the shorter sample.
import Data.Function (on)
import Data.List (findIndex)
import Data.Ord (comparing)
import qualified Data.Vector.Unboxed as U
import Prelude hiding (sum)
import Statistics.Function (sortBy)
import Statistics.Sample.Internal (sum)
import Statistics.Test.Internal (rank, splitByTags)
import Statistics.Test.Types
import Statistics.Types -- (CL,pValue,getPValue)
import Statistics.Distribution
import Statistics.Distribution.Normal
-- | Calculate (n,T⁺,T⁻) values for both samples. Where /n/ is reduced
-- sample where equal pairs are removed.
wilcoxonMatchedPairSignedRank :: (Ord a, Num a, U.Unbox a) => U.Vector (a,a) -> (Int, Double, Double)
wilcoxonMatchedPairSignedRank ab
= (nRed, sum ranks1, negate (sum ranks2))
where
-- Positive and negative ranks
(ranks1, ranks2) = splitByTags
$ U.zip tags (rank ((==) `on` abs) diffs)
-- Sorted list of differences
diffsSorted = sortBy (comparing abs) -- Sort the differences by absolute difference
$ U.filter (/= 0) -- Remove equal elements
$ U.map (uncurry (-)) ab -- Work out differences
nRed = U.length diffsSorted
-- Sign tags and differences
(tags,diffs) = U.unzip
$ U.map (\x -> (x>0 , x)) -- Attach tags to distribution elements
$ diffsSorted
-- | The coefficients for x^0, x^1, x^2, etc, in the expression
-- \prod_{r=1}^s (1 + x^r). See the Mitic paper for details.
--
-- We can define:
-- f(1) = 1 + x
-- f(r) = (1 + x^r)*f(r-1)
-- = f(r-1) + x^r * f(r-1)
-- The effect of multiplying the equation by x^r is to shift
-- all the coefficients by r down the list.
--
-- This list will be processed lazily from the head.
coefficients :: Int -> [Integer]
coefficients 1 = [1, 1] -- 1 + x
coefficients r = let coeffs = coefficients (r-1)
(firstR, rest) = splitAt r coeffs
in firstR ++ add rest coeffs
where
add (x:xs) (y:ys) = x + y : add xs ys
add xs [] = xs
add [] ys = ys
-- This list will be processed lazily from the head.
summedCoefficients :: Int -> [Double]
summedCoefficients n
| n < 1 = error "Statistics.Test.WilcoxonT.summedCoefficients: nonpositive sample size"
| n > 1023 = error "Statistics.Test.WilcoxonT.summedCoefficients: sample is too large (see bug #18)"
| otherwise = map fromIntegral $ scanl1 (+) $ coefficients n
-- | Tests whether a given result from a Wilcoxon signed-rank matched-pairs test
-- is significant at the given level.
--
-- This function can perform a one-tailed or two-tailed test. If the first
-- parameter to this function is 'TwoTailed', the test is performed two-tailed to
-- check if the two samples differ significantly. If the first parameter is
-- 'OneTailed', the check is performed one-tailed to decide whether the first sample
-- (i.e. the first sample you passed to 'wilcoxonMatchedPairSignedRank') is
-- greater than the second sample (i.e. the second sample you passed to
-- 'wilcoxonMatchedPairSignedRank'). If you wish to perform a one-tailed test
-- in the opposite direction, you can either pass the parameters in a different
-- order to 'wilcoxonMatchedPairSignedRank', or simply swap the values in the resulting
-- pair before passing them to this function.
wilcoxonMatchedPairSignificant
:: PositionTest -- ^ How to compare two samples
-> PValue Double -- ^ The p-value at which to test (e.g. @mkPValue 0.05@)
-> (Int, Double, Double) -- ^ The (n,T⁺, T⁻) values from 'wilcoxonMatchedPairSignedRank'.
-> Maybe TestResult -- ^ Return 'Nothing' if the sample was too
-- small to make a decision.
wilcoxonMatchedPairSignificant test pVal (sampleSize, tPlus, tMinus) =
case test of
-- According to my nearest book (Understanding Research Methods and Statistics
-- by Gary W. Heiman, p590), to check that the first sample is bigger you must
-- use the absolute value of T- for a one-tailed check:
AGreater -> do crit <- wilcoxonMatchedPairCriticalValue sampleSize pVal
return $ significant $ abs tMinus <= fromIntegral crit
BGreater -> do crit <- wilcoxonMatchedPairCriticalValue sampleSize pVal
return $ significant $ abs tPlus <= fromIntegral crit
-- Otherwise you must use the value of T+ and T- with the smallest absolute value:
--
-- Note that in absence of ties sum of |T+| and |T-| is constant
-- so by selecting minimal we are performing two-tailed test and
-- look and both tails of distribution of T.
SamplesDiffer -> do crit <- wilcoxonMatchedPairCriticalValue sampleSize (mkPValue $ p/2)
return $ significant $ t <= fromIntegral crit
where
t = min (abs tPlus) (abs tMinus)
p = pValue pVal
-- | Obtains the critical value of T to compare against, given a sample size
-- and a p-value (significance level). Your T value must be less than or
-- equal to the return of this function in order for the test to work out
-- significant. If there is a Nothing return, the sample size is too small to
-- make a decision.
--
-- 'wilcoxonSignificant' tests the return value of 'wilcoxonMatchedPairSignedRank'
-- for you, so you should use 'wilcoxonSignificant' for determining test results.
-- However, this function is useful, for example, for generating lookup tables
-- for Wilcoxon signed rank critical values.
--
-- The return values of this function are generated using the method
-- detailed in the Mitic's paper. According to that paper, the results
-- may differ from other published lookup tables, but (Mitic claims)
-- the values obtained by this function will be the correct ones.
wilcoxonMatchedPairCriticalValue ::
Int -- ^ The sample size
-> PValue Double -- ^ The p-value (e.g. @mkPValue 0.05@) for which you want the critical value.
-> Maybe Int -- ^ The critical value (of T), or Nothing if
-- the sample is too small to make a decision.
wilcoxonMatchedPairCriticalValue n pVal
| n < 100 =
case subtract 1 <$> findIndex (> m) (summedCoefficients n) of
Just k | k < 0 -> Nothing
| otherwise -> Just k
Nothing -> error "Statistics.Test.WilcoxonT.wilcoxonMatchedPairCriticalValue: impossible happened"
| otherwise =
case quantile (normalApprox n) p of
z | z < 0 -> Nothing
| otherwise -> Just (round z)
where
p = pValue pVal
m = (2 ** fromIntegral n) * p
-- | Works out the significance level (p-value) of a T value, given a sample
-- size and a T value from the Wilcoxon signed-rank matched-pairs test.
--
-- See the notes on 'wilcoxonCriticalValue' for how this is calculated.
wilcoxonMatchedPairSignificance
:: Int -- ^ The sample size
-> Double -- ^ The value of T for which you want the significance.
-> PValue Double -- ^ The significance (p-value).
wilcoxonMatchedPairSignificance n t
= mkPValue p
where
p | n < 100 = (summedCoefficients n !! floor t) / 2 ** fromIntegral n
| otherwise = cumulative (normalApprox n) t
-- | Normal approximation for Wilcoxon T statistics
normalApprox :: Int -> NormalDistribution
normalApprox ni
= normalDistr m s
where
m = n * (n + 1) / 4
s = sqrt $ (n * (n + 1) * (2*n + 1)) / 24
n = fromIntegral ni
-- | The Wilcoxon matched-pairs signed-rank test. The samples are
-- zipped together: if one is longer than the other, both are
-- truncated to the length of the shorter sample.
--
-- For one-tailed test it tests whether first sample is significantly
-- greater than the second. For two-tailed it checks whether they
-- significantly differ
--
-- Check 'wilcoxonMatchedPairSignedRank' and
-- 'wilcoxonMatchedPairSignificant' for additional information.
wilcoxonMatchedPairTest
:: (Ord a, Num a, U.Unbox a)
=> PositionTest -- ^ Perform one-tailed test.
-> U.Vector (a,a) -- ^ Sample of pairs
-> Test () -- ^ Return 'Nothing' if the sample was too
-- small to make a decision.
wilcoxonMatchedPairTest test pairs =
Test { testSignificance = pVal
, testStatistics = t
, testDistribution = ()
}
where
(n,tPlus,tMinus) = wilcoxonMatchedPairSignedRank pairs
(t,pVal) = case test of
AGreater -> (abs tMinus, wilcoxonMatchedPairSignificance n (abs tMinus))
BGreater -> (abs tPlus, wilcoxonMatchedPairSignificance n (abs tPlus ))
-- Since we take minimum of T+,T- we can't get more
-- that p=0.5 and can multiply it by 2 without risk
-- of error.
SamplesDiffer -> let t' = min (abs tMinus) (abs tPlus)
p = wilcoxonMatchedPairSignificance n t'
in (t', mkPValue $ min 1 $ 2 * pValue p)
-- $references
--
-- * \"Critical Values for the Wilcoxon Signed Rank Statistic\", Peter
-- Mitic, The Mathematica Journal, volume 6, issue 3, 1996
-- (<http://www.mathematica-journal.com/issue/v6i3/article/mitic/contents/63mitic.pdf>)
|