1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
|
{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
-- |
-- Module : Statistics.Distribution
-- Copyright : (c) 2009 Bryan O'Sullivan
-- License : BSD3
--
-- Maintainer : bos@serpentine.com
-- Stability : experimental
-- Portability : portable
--
-- Types classes for probability distrubutions
module Statistics.Distribution
(
-- * Type classes
Distribution(..)
, DiscreteDistr(..)
, ContDistr(..)
-- ** Distribution statistics
, MaybeMean(..)
, Mean(..)
, MaybeVariance(..)
, Variance(..)
-- ** Random number generation
, ContGen(..)
, DiscreteGen(..)
, genContinous
-- * Helper functions
, findRoot
, sumProbabilities
) where
import Control.Applicative ((<$>), Applicative(..))
import Control.Monad.Primitive (PrimMonad,PrimState)
import qualified Data.Vector.Unboxed as U
import System.Random.MWC
-- | Type class common to all distributions. Only c.d.f. could be
-- defined for both discrete and continous distributions.
class Distribution d where
-- | Cumulative distribution function. The probability that a
-- random variable /X/ is less or equal than /x/,
-- i.e. P(/X/≤/x/).
cumulative :: d -> Double -> Double
-- | One's complement of cumulative distibution:
--
-- > complCumulative d x = 1 - cumulative d x
--
-- It's useful when one is interested in P(/X/≥/x/) and
-- expression on the right side begin to lose precision. This
-- function have default implementation but implementors are
-- encouraged to provide more precise implementation
complCumulative :: d -> Double -> Double
complCumulative d x = 1 - cumulative d x
-- | Discrete probability distribution.
class Distribution d => DiscreteDistr d where
-- | Probability of n-th outcome.
probability :: d -> Int -> Double
-- | Continuous probability distributuion
class Distribution d => ContDistr d where
-- | Probability density function. Probability that random
-- variable /X/ lies in the infinitesimal interval
-- [/x/,/x+/δ/x/) equal to /density(x)/⋅δ/x/
density :: d -> Double -> Double
-- | Inverse of the cumulative distribution function. The value
-- /x/ for which P(/X/≤/x/) = /p/. If probability is outside
-- of [0,1] range function should call 'error'
quantile :: d -> Double -> Double
-- | Type class for distributions with mean. 'maybeMean' should return
-- 'Nothing' if it's undefined for current value of data
class Distribution d => MaybeMean d where
maybeMean :: d -> Maybe Double
-- | Type class for distributions with mean. If distribution have
-- finite mean for all valid values of parameters it should be
-- instance of this type class.
class MaybeMean d => Mean d where
mean :: d -> Double
-- | Type class for distributions with variance. If variance is
-- undefined for some parameter values both 'maybeVariance' and
-- 'maybeStdDev' should return Nothing.
--
-- Minimal complete definition is 'maybeVariance' or 'maybeStdDev'
class MaybeMean d => MaybeVariance d where
maybeVariance :: d -> Maybe Double
maybeVariance d = (*) <$> x <*> x where x = maybeStdDev d
maybeStdDev :: d -> Maybe Double
maybeStdDev = fmap sqrt . maybeVariance
-- | Type class for distributions with variance. If distibution have
-- finite variance for all valid parameter values it should be
-- instance of this type class.
--
-- Minimal complete definition is 'variance' or 'stdDev'
class (Mean d, MaybeVariance d) => Variance d where
variance :: d -> Double
variance d = x * x where x = stdDev d
stdDev :: d -> Double
stdDev = sqrt . variance
-- | Generate discrete random variates which have given
-- distribution.
class Distribution d => ContGen d where
genContVar :: PrimMonad m => d -> Gen (PrimState m) -> m Double
-- | Generate discrete random variates which have given
-- distribution. 'ContGen' is superclass because it's always possible
-- to generate real-valued variates from integer values
class (DiscreteDistr d, ContGen d) => DiscreteGen d where
genDiscreteVar :: PrimMonad m => d -> Gen (PrimState m) -> m Int
-- | Generate variates from continous distribution using inverse
-- transform rule.
genContinous :: (ContDistr d, PrimMonad m) => d -> Gen (PrimState m) -> m Double
genContinous d gen = do
x <- uniform gen
return $! quantile d x
{-# INLINE genContinous #-}
data P = P {-# UNPACK #-} !Double {-# UNPACK #-} !Double
-- | Approximate the value of /X/ for which P(/x/>/X/)=/p/.
--
-- This method uses a combination of Newton-Raphson iteration and
-- bisection with the given guess as a starting point. The upper and
-- lower bounds specify the interval in which the probability
-- distribution reaches the value /p/.
findRoot :: ContDistr d =>
d -- ^ Distribution
-> Double -- ^ Probability /p/
-> Double -- ^ Initial guess
-> Double -- ^ Lower bound on interval
-> Double -- ^ Upper bound on interval
-> Double
findRoot d prob = loop 0 1
where
loop !(i::Int) !dx !x !lo !hi
| abs dx <= accuracy || i >= maxIters = x
| otherwise = loop (i+1) dx'' x'' lo' hi'
where
err = cumulative d x - prob
P lo' hi' | err < 0 = P x hi
| otherwise = P lo x
pdf = density d x
P dx' x' | pdf /= 0 = P (err / pdf) (x - dx)
| otherwise = P dx x
P dx'' x''
| x' < lo' || x' > hi' || pdf == 0 = let y = (lo' + hi') / 2
in P (y-x) y
| otherwise = P dx' x'
accuracy = 1e-15
maxIters = 150
-- | Sum probabilities in inclusive interval.
sumProbabilities :: DiscreteDistr d => d -> Int -> Int -> Double
sumProbabilities d low hi =
-- Return value is forced to be less than 1 to guard againist roundoff errors.
-- ATTENTION! this check should be removed for testing or it could mask bugs.
min 1 . U.sum . U.map (probability d) $ U.enumFromTo low hi
{-# INLINE sumProbabilities #-}
|