File: ChiSquared.hs

package info (click to toggle)
haskell-statistics 0.10.2.0-3
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 372 kB
  • ctags: 3
  • sloc: haskell: 2,976; python: 33; makefile: 2
file content (39 lines) | stat: -rw-r--r-- 1,579 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
{-# LANGUAGE FlexibleContexts #-}
-- | Pearson's chi squared test.
module Statistics.Test.ChiSquared (
    chi2test
    -- * Data types
  , TestType(..)
  , TestResult(..)
  ) where

import qualified Data.Vector.Generic as G

import Statistics.Distribution
import Statistics.Distribution.ChiSquared
import Statistics.Test.Types


-- | Generic form of Pearson chi squared tests for binned data. Data
--   sample is supplied in form of tuples (observed quantity,
--   expected number of events). Both must be positive.
chi2test :: (G.Vector v (Int,Double), G.Vector v Double)
         => Double              -- ^ p-value
         -> Int                 -- ^ Number of additional degrees of
                                --   freedom. One degree of freedom
                                --   is due to the fact that the are
                                --   N observation in total and
                                --   accounted for automatically.
         -> v (Int,Double)      -- ^ Observation and expectation.
         -> TestResult
chi2test p ndf vec
  | ndf < 0        = error $ "Statistics.Test.ChiSquare.chi2test: negative NDF " ++ show ndf
  | n   < 0        = error $ "Statistics.Test.ChiSquare.chi2test: too short data sample"
  | p > 0 && p < 1 = significant $ complCumulative d chi2 < p
  | otherwise      = error $ "Statistics.Test.ChiSquare.chi2test: bad p-value: " ++ show p
  where
    n     = G.length vec - ndf - 1
    chi2  = G.sum $ G.map (\(o,e) -> sqr (fromIntegral o - e) / e) vec
    d     = chiSquared n
    sqr x = x * x
{-# INLINE chi2test #-}