1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
|
{-# LANGUAGE DeriveDataTypeable, OverloadedStrings, RecordWildCards #-}
-- |
-- Module : Statistics.Resampling.Bootstrap
-- Copyright : (c) 2009, 2011 Bryan O'Sullivan
-- License : BSD3
--
-- Maintainer : bos@serpentine.com
-- Stability : experimental
-- Portability : portable
--
-- The bootstrap method for statistical inference.
module Statistics.Resampling.Bootstrap
(
Estimate(..)
, bootstrapBCA
, scale
-- * References
-- $references
) where
import Control.DeepSeq (NFData)
import Control.Exception (assert)
import Control.Monad.Par (runPar, parMap)
import Data.Data (Data)
import Data.Typeable (Typeable)
import Data.Vector.Unboxed ((!))
import Statistics.Distribution (cumulative, quantile)
import Statistics.Distribution.Normal
import Statistics.Resampling (Resample(..), jackknife)
import Statistics.Sample (mean)
import Statistics.Types (Estimator, Sample)
import qualified Data.Vector.Unboxed as U
-- | A point and interval estimate computed via an 'Estimator'.
data Estimate = Estimate {
estPoint :: {-# UNPACK #-} !Double
-- ^ Point estimate.
, estLowerBound :: {-# UNPACK #-} !Double
-- ^ Lower bound of the estimate interval (i.e. the lower bound of
-- the confidence interval).
, estUpperBound :: {-# UNPACK #-} !Double
-- ^ Upper bound of the estimate interval (i.e. the upper bound of
-- the confidence interval).
, estConfidenceLevel :: {-# UNPACK #-} !Double
-- ^ Confidence level of the confidence intervals.
} deriving (Eq, Show, Typeable, Data)
instance NFData Estimate
-- | Multiply the point, lower bound, and upper bound in an 'Estimate'
-- by the given value.
scale :: Double -- ^ Value to multiply by.
-> Estimate -> Estimate
scale f e@Estimate{..} = e {
estPoint = f * estPoint
, estLowerBound = f * estLowerBound
, estUpperBound = f * estUpperBound
}
estimate :: Double -> Double -> Double -> Double -> Estimate
estimate pt lb ub cl =
assert (lb <= ub) .
assert (cl > 0 && cl < 1) $
Estimate { estPoint = pt
, estLowerBound = lb
, estUpperBound = ub
, estConfidenceLevel = cl
}
data T = {-# UNPACK #-} !Double :< {-# UNPACK #-} !Double
infixl 2 :<
-- | Bias-corrected accelerated (BCA) bootstrap. This adjusts for both
-- bias and skewness in the resampled distribution.
bootstrapBCA :: Double -- ^ Confidence level
-> Sample -- ^ Sample data
-> [Estimator] -- ^ Estimators
-> [Resample] -- ^ Resampled data
-> [Estimate]
bootstrapBCA confidenceLevel sample estimators resamples =
assert (confidenceLevel > 0 && confidenceLevel < 1)
runPar $ parMap (uncurry e) (zip estimators resamples)
where
e est (Resample resample)
| U.length sample == 1 = estimate pt pt pt confidenceLevel
| otherwise =
estimate pt (resample ! lo) (resample ! hi) confidenceLevel
where
pt = est sample
lo = max (cumn a1) 0
where a1 = bias + b1 / (1 - accel * b1)
b1 = bias + z1
hi = min (cumn a2) (ni - 1)
where a2 = bias + b2 / (1 - accel * b2)
b2 = bias - z1
z1 = quantile standard ((1 - confidenceLevel) / 2)
cumn = round . (*n) . cumulative standard
bias = quantile standard (probN / n)
where probN = fromIntegral . U.length . U.filter (<pt) $ resample
ni = U.length resample
n = fromIntegral ni
accel = sumCubes / (6 * (sumSquares ** 1.5))
where (sumSquares :< sumCubes) = U.foldl' f (0 :< 0) jack
f (s :< c) j = s + d2 :< c + d2 * d
where d = jackMean - j
d2 = d * d
jackMean = mean jack
jack = jackknife est sample
-- $references
--
-- * Davison, A.C; Hinkley, D.V. (1997) Bootstrap methods and their
-- application. <http://statwww.epfl.ch/davison/BMA/>
|