File: WilcoxonT.hs

package info (click to toggle)
haskell-statistics 0.16.2.1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 640 kB
  • sloc: haskell: 6,819; ansic: 35; python: 33; makefile: 9
file content (245 lines) | stat: -rw-r--r-- 10,579 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
{-# LANGUAGE ViewPatterns #-}
-- |
-- Module    : Statistics.Test.WilcoxonT
-- Copyright : (c) 2010 Neil Brown
-- License   : BSD3
--
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : portable
--
-- The Wilcoxon matched-pairs signed-rank test is non-parametric test
-- which could be used to test whether two related samples have
-- different means.
module Statistics.Test.WilcoxonT (
    -- * Wilcoxon signed-rank matched-pair test
    -- ** Test
    wilcoxonMatchedPairTest
    -- ** Building blocks
  , wilcoxonMatchedPairSignedRank
  , wilcoxonMatchedPairSignificant
  , wilcoxonMatchedPairSignificance
  , wilcoxonMatchedPairCriticalValue
  , module Statistics.Test.Types
    -- * References
    -- $references
  ) where



--
--
--
-- Note that: wilcoxonMatchedPairSignedRank == (\(x, y) -> (y, x)) . flip wilcoxonMatchedPairSignedRank
-- The samples are zipped together: if one is longer than the other, both are truncated
-- The value returned is the pair (T+, T-).  T+ is the sum of positive ranks (the
-- These values mean little by themselves, and should be combined with the 'wilcoxonSignificant'
-- function in this module to get a meaningful result.
-- ranks of the differences where the first parameter is higher) whereas T- is
-- the sum of negative ranks (the ranks of the differences where the second parameter is higher).
-- to the length of the shorter sample.

import Data.Function (on)
import Data.List (findIndex)
import Data.Ord (comparing)
import qualified Data.Vector.Unboxed as U
import Prelude hiding (sum)
import Statistics.Function (sortBy)
import Statistics.Sample.Internal (sum)
import Statistics.Test.Internal (rank, splitByTags)
import Statistics.Test.Types
import Statistics.Types -- (CL,pValue,getPValue)
import Statistics.Distribution
import Statistics.Distribution.Normal


-- | Calculate (n,T⁺,T⁻) values for both samples. Where /n/ is reduced
--   sample where equal pairs are removed.
wilcoxonMatchedPairSignedRank :: (Ord a, Num a, U.Unbox a) => U.Vector (a,a) -> (Int, Double, Double)
wilcoxonMatchedPairSignedRank ab
  = (nRed, sum ranks1, negate (sum ranks2))
  where
    -- Positive and negative ranks
    (ranks1, ranks2) = splitByTags
                     $ U.zip tags (rank ((==) `on` abs) diffs)
    -- Sorted list of differences
    diffsSorted = sortBy (comparing abs)    -- Sort the differences by absolute difference
                $ U.filter  (/= 0)          -- Remove equal elements
                $ U.map (uncurry (-)) ab    -- Work out differences
    nRed = U.length diffsSorted
    -- Sign tags and differences
    (tags,diffs) = U.unzip
                 $ U.map (\x -> (x>0 , x))   -- Attach tags to distribution elements
                 $ diffsSorted



-- | The coefficients for x^0, x^1, x^2, etc, in the expression
-- \prod_{r=1}^s (1 + x^r).  See the Mitic paper for details.
--
-- We can define:
-- f(1) = 1 + x
-- f(r) = (1 + x^r)*f(r-1)
--      = f(r-1) + x^r * f(r-1)
-- The effect of multiplying the equation by x^r is to shift
-- all the coefficients by r down the list.
--
-- This list will be processed lazily from the head.
coefficients :: Int -> [Integer]
coefficients 1 = [1, 1] -- 1 + x
coefficients r = let coeffs = coefficients (r-1)
                     (firstR, rest) = splitAt r coeffs
  in firstR ++ add rest coeffs
  where
    add (x:xs) (y:ys) = x + y : add xs ys
    add xs [] = xs
    add [] ys = ys

-- This list will be processed lazily from the head.
summedCoefficients :: Int -> [Double]
summedCoefficients n
  | n < 1     = error "Statistics.Test.WilcoxonT.summedCoefficients: nonpositive sample size"
  | n > 1023  = error "Statistics.Test.WilcoxonT.summedCoefficients: sample is too large (see bug #18)"
  | otherwise = map fromIntegral $ scanl1 (+) $ coefficients n



-- | Tests whether a given result from a Wilcoxon signed-rank matched-pairs test
-- is significant at the given level.
--
-- This function can perform a one-tailed or two-tailed test.  If the first
-- parameter to this function is 'TwoTailed', the test is performed two-tailed to
-- check if the two samples differ significantly.  If the first parameter is
-- 'OneTailed', the check is performed one-tailed to decide whether the first sample
-- (i.e. the first sample you passed to 'wilcoxonMatchedPairSignedRank') is
-- greater than the second sample (i.e. the second sample you passed to
-- 'wilcoxonMatchedPairSignedRank').  If you wish to perform a one-tailed test
-- in the opposite direction, you can either pass the parameters in a different
-- order to 'wilcoxonMatchedPairSignedRank', or simply swap the values in the resulting
-- pair before passing them to this function.
wilcoxonMatchedPairSignificant
  :: PositionTest          -- ^ How to compare two samples
  -> PValue Double         -- ^ The p-value at which to test (e.g. @mkPValue 0.05@)
  -> (Int, Double, Double) -- ^ The (n,T⁺, T⁻) values from 'wilcoxonMatchedPairSignedRank'.
  -> Maybe TestResult      -- ^ Return 'Nothing' if the sample was too
                           --   small to make a decision.
wilcoxonMatchedPairSignificant test pVal (sampleSize, tPlus, tMinus) =
  case test of
    -- According to my nearest book (Understanding Research Methods and Statistics
    -- by Gary W. Heiman, p590), to check that the first sample is bigger you must
    -- use the absolute value of T- for a one-tailed check:
    AGreater      -> do crit <- wilcoxonMatchedPairCriticalValue sampleSize pVal
                        return $ significant $ abs tMinus <= fromIntegral crit
    BGreater      -> do crit <- wilcoxonMatchedPairCriticalValue sampleSize pVal
                        return $ significant $ abs tPlus <= fromIntegral crit
    -- Otherwise you must use the value of T+ and T- with the smallest absolute value:
    --
    -- Note that in absence of ties sum of |T+| and |T-| is constant
    -- so by selecting minimal we are performing two-tailed test and
    -- look and both tails of distribution of T.
    SamplesDiffer -> do crit <- wilcoxonMatchedPairCriticalValue sampleSize (mkPValue $ p/2)
                        return $ significant $ t <= fromIntegral crit
  where
    t = min (abs tPlus) (abs tMinus)
    p = pValue pVal


-- | Obtains the critical value of T to compare against, given a sample size
-- and a p-value (significance level).  Your T value must be less than or
-- equal to the return of this function in order for the test to work out
-- significant.  If there is a Nothing return, the sample size is too small to
-- make a decision.
--
-- 'wilcoxonSignificant' tests the return value of 'wilcoxonMatchedPairSignedRank'
-- for you, so you should use 'wilcoxonSignificant' for determining test results.
--  However, this function is useful, for example, for generating lookup tables
-- for Wilcoxon signed rank critical values.
--
-- The return values of this function are generated using the method
-- detailed in the Mitic's paper. According to that paper, the results
-- may differ from other published lookup tables, but (Mitic claims)
-- the values obtained by this function will be the correct ones.
wilcoxonMatchedPairCriticalValue ::
     Int                -- ^ The sample size
  -> PValue Double      -- ^ The p-value (e.g. @mkPValue 0.05@) for which you want the critical value.
  -> Maybe Int          -- ^ The critical value (of T), or Nothing if
                        --   the sample is too small to make a decision.
wilcoxonMatchedPairCriticalValue n pVal
  | n < 100   =
      case subtract 1 <$> findIndex (> m) (summedCoefficients n) of
        Just k | k < 0     -> Nothing
               | otherwise -> Just k
        Nothing  -> error "Statistics.Test.WilcoxonT.wilcoxonMatchedPairCriticalValue: impossible happened"
  | otherwise =
     case quantile (normalApprox n) p of
       z | z < 0     -> Nothing
         | otherwise -> Just (round z)
  where
    p = pValue pVal
    m = (2 ** fromIntegral n) * p


-- | Works out the significance level (p-value) of a T value, given a sample
-- size and a T value from the Wilcoxon signed-rank matched-pairs test.
--
-- See the notes on 'wilcoxonCriticalValue' for how this is calculated.
wilcoxonMatchedPairSignificance
  :: Int           -- ^ The sample size
  -> Double        -- ^ The value of T for which you want the significance.
  -> PValue Double -- ^ The significance (p-value).
wilcoxonMatchedPairSignificance n t
  = mkPValue p
  where
    p | n < 100   = (summedCoefficients n !! floor t) / 2 ** fromIntegral n
      | otherwise = cumulative (normalApprox n) t


-- | Normal approximation for Wilcoxon T statistics
normalApprox :: Int -> NormalDistribution
normalApprox ni
  = normalDistr m s
  where
    m = n * (n + 1) / 4
    s = sqrt $ (n * (n + 1) * (2*n + 1)) / 24
    n = fromIntegral ni


-- | The Wilcoxon matched-pairs signed-rank test. The samples are
-- zipped together: if one is longer than the other, both are
-- truncated to the length of the shorter sample.
--
-- For one-tailed test it tests whether first sample is significantly
-- greater than the second. For two-tailed it checks whether they
-- significantly differ
--
-- Check 'wilcoxonMatchedPairSignedRank' and
-- 'wilcoxonMatchedPairSignificant' for additional information.
wilcoxonMatchedPairTest
  :: (Ord a, Num a, U.Unbox a)
  => PositionTest     -- ^ Perform one-tailed test.
  -> U.Vector (a,a)   -- ^ Sample of pairs
  -> Test ()          -- ^ Return 'Nothing' if the sample was too
                      --   small to make a decision.
wilcoxonMatchedPairTest test pairs =
  Test { testSignificance = pVal
       , testStatistics   = t
       , testDistribution = ()
       }
  where
    (n,tPlus,tMinus) = wilcoxonMatchedPairSignedRank pairs
    (t,pVal) = case test of
                 AGreater      -> (abs tMinus, wilcoxonMatchedPairSignificance n (abs tMinus))
                 BGreater      -> (abs tPlus,  wilcoxonMatchedPairSignificance n (abs tPlus ))
                 -- Since we take minimum of T+,T- we can't get more
                 -- that p=0.5 and can multiply it by 2 without risk
                 -- of error.
                 SamplesDiffer -> let t' = min (abs tMinus) (abs tPlus)
                                      p  = wilcoxonMatchedPairSignificance n t'
                                  in (t', mkPValue $ min 1 $ 2 * pValue p)


-- $references
--
-- * \"Critical Values for the Wilcoxon Signed Rank Statistic\", Peter
--   Mitic, The Mathematica Journal, volume 6, issue 3, 1996
--   (<http://www.mathematica-journal.com/issue/v6i3/article/mitic/contents/63mitic.pdf>)