File: WilcoxonT.hs

package info (click to toggle)
haskell-statistics 0.10.1.0-2
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 304 kB
  • sloc: haskell: 2,225; python: 33; makefile: 2
file content (185 lines) | stat: -rw-r--r-- 8,824 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
-- |
-- Module    : Statistics.Test.WilcoxonT
-- Copyright : (c) 2010 Neil Brown
-- License   : BSD3
--
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : portable
--
-- The Wilcoxon matched-pairs signed-rank test is non-parametric test
-- which could be used to whether two related samples have different
-- means.
--
-- WARNING: current implementation contain critical bugs
-- <https://github.com/bos/statistics/issues/18>
module Statistics.Test.WilcoxonT (
    -- * Wilcoxon signed-rank matched-pair test
    wilcoxonMatchedPairTest
  , wilcoxonMatchedPairSignedRank
  , wilcoxonMatchedPairSignificant
  , wilcoxonMatchedPairSignificance
  , wilcoxonMatchedPairCriticalValue
    -- * Data types
  , TestType(..)
  , TestResult(..)
  ) where

import Control.Applicative ((<$>))
import Data.Function       (on)
import Data.List           (findIndex)
import Data.Ord            (comparing)
import qualified Data.Vector.Unboxed as U

import Statistics.Types               (Sample)
import Statistics.Function            (sortBy)
import Statistics.Test.Types
import Statistics.Test.Internal


-- | The Wilcoxon matched-pairs signed-rank test.
--
-- The value returned is the pair (T+, T-).  T+ is the sum of positive ranks (the
-- ranks of the differences where the first parameter is higher) whereas T- is
-- the sum of negative ranks (the ranks of the differences where the second parameter is higher).
-- These values mean little by themselves, and should be combined with the 'wilcoxonSignificant'
-- function in this module to get a meaningful result.
--
-- The samples are zipped together: if one is longer than the other, both are truncated
-- to the the length of the shorter sample.
--
-- Note that: wilcoxonMatchedPairSignedRank == (\(x, y) -> (y, x)) . flip wilcoxonMatchedPairSignedRank
wilcoxonMatchedPairSignedRank :: Sample -> Sample -> (Double, Double)
wilcoxonMatchedPairSignedRank a b = (          U.sum ranks1
                                    , negate $ U.sum ranks2
                                    )
  where
    (ranks1, ranks2) = splitByTags
                     $ U.zip tags (rank ((==) `on` abs) diffs)
    (tags,diffs) = U.unzip
                 $ U.map (\x -> (x>0 , x))   -- Attack tags to distribution elements
                 $ U.filter  (/= 0.0)        -- Remove equal elements
                 $ sortBy (comparing abs)    -- Sort the differences by absolute difference
                 $ U.zipWith (-) a b         -- Work out differences


-- | The coefficients for x^0, x^1, x^2, etc, in the expression
-- \prod_{r=1}^s (1 + x^r).  See the Mitic paper for details.
--
-- We can define:
-- f(1) = 1 + x
-- f(r) = (1 + x^r)*f(r-1)
--      = f(r-1) + x^r * f(r-1)
-- The effect of multiplying the equation by x^r is to shift
-- all the coefficients by r down the list.
--
-- This list will be processed lazily from the head.
coefficients :: Int -> [Int]
coefficients 1 = [1, 1] -- 1 + x
coefficients r = let coeffs = coefficients (r-1)
                     (firstR, rest) = splitAt r coeffs
  in firstR ++ add rest coeffs
  where
    add (x:xs) (y:ys) = x + y : add xs ys
    add xs [] = xs
    add [] ys = ys

-- This list will be processed lazily from the head.
summedCoefficients :: Int -> [Double]
summedCoefficients = map fromIntegral . scanl1 (+) . coefficients

-- | Tests whether a given result from a Wilcoxon signed-rank matched-pairs test
-- is significant at the given level.
--
-- This function can perform a one-tailed or two-tailed test.  If the first
-- parameter to this function is 'TwoTailed', the test is performed two-tailed to
-- check if the two samples differ significantly.  If the first parameter is
-- 'OneTailed', the check is performed one-tailed to decide whether the first sample
-- (i.e. the first sample you passed to 'wilcoxonMatchedPairSignedRank') is
-- greater than the second sample (i.e. the second sample you passed to
-- 'wilcoxonMatchedPairSignedRank').  If you wish to perform a one-tailed test
-- in the opposite direction, you can either pass the parameters in a different
-- order to 'wilcoxonMatchedPairSignedRank', or simply swap the values in the resulting
-- pair before passing them to this function.
wilcoxonMatchedPairSignificant ::
     TestType            -- ^ Perform one- or two-tailed test (see description below).
  -> Int                 -- ^ The sample size from which the (T+,T-) values were derived.
  -> Double              -- ^ The p-value at which to test (e.g. 0.05)
  -> (Double, Double)    -- ^ The (T+, T-) values from 'wilcoxonMatchedPairSignedRank'.
  -> Maybe TestResult    -- ^ Return 'Nothing' if the sample was too
                         --   small to make a decision.
wilcoxonMatchedPairSignificant test sampleSize p (tPlus, tMinus) =
  case test of
    -- According to my nearest book (Understanding Research Methods and Statistics
    -- by Gary W. Heiman, p590), to check that the first sample is bigger you must
    -- use the absolute value of T- for a one-tailed check:
    OneTailed -> (significant . (abs tMinus <=) . fromIntegral) <$> wilcoxonMatchedPairCriticalValue sampleSize p
    -- Otherwise you must use the value of T+ and T- with the smallest absolute value:
    TwoTailed -> (significant . (t <=) . fromIntegral) <$> wilcoxonMatchedPairCriticalValue sampleSize (p/2)
  where
    t = min (abs tPlus) (abs tMinus)

-- | Obtains the critical value of T to compare against, given a sample size
-- and a p-value (significance level).  Your T value must be less than or
-- equal to the return of this function in order for the test to work out
-- significant.  If there is a Nothing return, the sample size is too small to
-- make a decision.
--
-- 'wilcoxonSignificant' tests the return value of 'wilcoxonMatchedPairSignedRank'
-- for you, so you should use 'wilcoxonSignificant' for determining test results.
--  However, this function is useful, for example, for generating lookup tables
-- for Wilcoxon signed rank critical values.
--
-- The return values of this function are generated using the method detailed in
-- the paper \"Critical Values for the Wilcoxon Signed Rank Statistic\", Peter
-- Mitic, The Mathematica Journal, volume 6, issue 3, 1996, which can be found
-- here: <http://www.mathematica-journal.com/issue/v6i3/article/mitic/contents/63mitic.pdf>.
-- According to that paper, the results may differ from other published lookup tables, but
-- (Mitic claims) the values obtained by this function will be the correct ones.
wilcoxonMatchedPairCriticalValue ::
     Int                -- ^ The sample size
  -> Double             -- ^ The p-value (e.g. 0.05) for which you want the critical value.
  -> Maybe Int          -- ^ The critical value (of T), or Nothing if
                        --   the sample is too small to make a decision.
wilcoxonMatchedPairCriticalValue sampleSize p
  = case critical of
      Just n | n < 0 -> Nothing
             | otherwise -> Just n
      Nothing -> Just maxBound -- shouldn't happen: beyond end of list
  where
    m = (2 ** fromIntegral sampleSize) * p
    critical = subtract 1 <$> findIndex (> m) (summedCoefficients sampleSize)

-- | Works out the significance level (p-value) of a T value, given a sample
-- size and a T value from the Wilcoxon signed-rank matched-pairs test.
--
-- See the notes on 'wilcoxonCriticalValue' for how this is calculated.
wilcoxonMatchedPairSignificance :: Int    -- ^ The sample size
                                -> Double -- ^ The value of T for which you want the significance.
                                -> Double -- ^ The significance (p-value).
wilcoxonMatchedPairSignificance sampleSize rnk
  = (summedCoefficients sampleSize !! floor rnk) / 2 ** fromIntegral sampleSize

-- | The Wilcoxon matched-pairs signed-rank test. The samples are
-- zipped together: if one is longer than the other, both are
-- truncated to the the length of the shorter sample.
--
-- For one-tailed test it tests whether first sample is significantly
-- greater than the second. For two-tailed it checks whether they
-- significantly differ
--
-- Check 'wilcoxonMatchedPairSignedRank' and
-- 'wilcoxonMatchedPairSignificant' for additional information.
wilcoxonMatchedPairTest :: TestType   -- ^ Perform one-tailed test.
                        -> Double     -- ^ The p-value at which to test (e.g. 0.05)
                        -> Sample     -- ^ First sample
                        -> Sample     -- ^ Second sample
                        -> Maybe TestResult
                        -- ^ Return 'Nothing' if the sample was too
                        --   small to make a decision.
wilcoxonMatchedPairTest test p smp1 smp2 =
    wilcoxonMatchedPairSignificant test (min n1 n2) p
  $ wilcoxonMatchedPairSignedRank smp1 smp2
  where
    n1 = U.length smp1
    n2 = U.length smp2