File: Binomial.hs

package info (click to toggle)
haskell-statistics 0.5.1.0-1
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 180 kB
  • sloc: haskell: 1,265; makefile: 2
file content (138 lines) | stat: -rw-r--r-- 4,573 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
{-# LANGUAGE DeriveDataTypeable #-}
-- |
-- Module    : Statistics.Distribution.Binomial
-- Copyright : (c) 2009 Bryan O'Sullivan
-- License   : BSD3
--
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : portable
--
-- The binomial distribution.  This is the discrete probability
-- distribution of the number of successes in a sequence of /n/
-- independent yes\/no experiments, each of which yields success with
-- probability /p/.

module Statistics.Distribution.Binomial
    (
      BinomialDistribution
    -- * Constructors
    , binomial
    -- * Accessors
    , bdTrials
    , bdProbability
    ) where

import Control.Exception (assert)
import qualified Data.Vector.Unboxed as U
import Data.Int (Int64)
import Data.Typeable (Typeable)
import Statistics.Constants (m_epsilon)
import qualified Statistics.Distribution as D
import Statistics.Distribution.Normal (standard)
import Statistics.Math (choose, logFactorial)

-- | The binomial distribution.
data BinomialDistribution = BD {
      bdTrials      :: {-# UNPACK #-} !Int
    -- ^ Number of trials.
    , bdProbability :: {-# UNPACK #-} !Double
    -- ^ Probability.
    } deriving (Eq, Read, Show, Typeable)

instance D.Distribution BinomialDistribution where
    density    = density
    cumulative = cumulative
    quantile   = quantile

instance D.Variance BinomialDistribution where
    variance = variance

instance D.Mean BinomialDistribution where
    mean = mean

density :: BinomialDistribution -> Double -> Double
density (BD n p) x
    | not (isIntegral x) = integralError "density"
    | n == 0             = 1
    | x < 0 || x > n'    = 0
    | n <= 50 || x < 2   = sign * p'' ** x' * (n `choose` fx) * q'' ** nx'
    | otherwise          = sign * exp (x' * log p'' + nx' * log q'' + lf)
  where sign = oddX * oddNX
        (x',p',q') | x > n' / 2 = (n'-x, q, p)
                   | otherwise  = (x,    p, q)
        oddX | p' < 0 && odd fx     = -1
             | otherwise            = 1
        oddNX | q' < 0 && odd nx    = -1
              | otherwise           = 1
        p'' = abs p'
        q'' = abs q'
        q   = 1 - p
        nx  = n - fx
        nx' = fromIntegral nx
        fx  = floor x'
        n'  = fromIntegral n
        lf  = logFactorial n - logFactorial nx - logFactorial fx

cumulative :: BinomialDistribution -> Double -> Double
cumulative d x
  | isIntegral x = U.sum . U.map (density d . fromIntegral) . U.enumFromTo (0::Int) . floor $ x
  | otherwise    = integralError "cumulative"

isIntegral :: Double -> Bool
isIntegral x = x == floorf x

floorf :: Double -> Double
floorf = fromIntegral . (floor :: Double -> Int64)

quantile :: BinomialDistribution -> Double -> Double
quantile dist@(BD n p) prob
    | isNaN prob = prob
    | p == 1     = n'
    | n' < 1e5   = fst (search 1 y0 z0)
    | otherwise  = let dy = floorf (n' / 1000)
                   in  narrow dy (search dy y0 z0)
  where q  = 1 - p
        n' = fromIntegral n
        y0 = n' `min` floorf (µ + σ * (d + γ * (d * d - 1) / 6) + 0.5)
          where µ  = n' * p
                σ  = sqrt (n' * p * q)
                d = D.quantile standard prob
                γ  = (q - p) / σ
        z0 = cumulative dist y0
        search dy y1 z1 | z0 >= prob' = left y1 z1
                        | otherwise   = right y1
          where
            prob' = prob * (1 - 64 * m_epsilon)
            left y oldZ | y == 0 || z < prob' = (y, oldZ)
                        | otherwise           = left (max 0 y') z
                where z  = cumulative dist y'
                      y' = y - dy
            right y | y' >= n' || z >= prob' = (y', z)
                    | otherwise              = right y'
                where z  = cumulative dist y'
                      y' = y + dy
        narrow dy (y,z) | dy <= 1 || dy' <= n'/1e15 = y
                        | otherwise                 = narrow dy' (search dy y z)
            where dy' = floorf (dy / 100)

mean :: BinomialDistribution -> Double
mean (BD n p) = fromIntegral n * p
{-# INLINE mean #-}

variance :: BinomialDistribution -> Double
variance (BD n p) = fromIntegral n * p * (1 - p)
{-# INLINE variance #-}

binomial :: Int                 -- ^ Number of trials.
         -> Double              -- ^ Probability.
         -> BinomialDistribution
binomial n p =
    assert (n > 0) .
    assert (p > 0 && p < 1) $
    BD n p
{-# INLINE binomial #-}

integralError :: String -> a
integralError f = error ("Statistics.Distribution.Binomial." ++ f ++
                         ": non-integer-valued input")