1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
|
{-# LANGUAGE BangPatterns, DeriveDataTypeable, DeriveGeneric,
FlexibleContexts #-}
-- |
-- Module : Statistics.Sample.Powers
-- Copyright : (c) 2009, 2010 Bryan O'Sullivan
-- License : BSD3
--
-- Maintainer : bos@serpentine.com
-- Stability : experimental
-- Portability : portable
--
-- Very fast statistics over simple powers of a sample. These can all
-- be computed efficiently in just a single pass over a sample, with
-- that pass subject to stream fusion.
--
-- The tradeoff is that some of these functions are less numerically
-- robust than their counterparts in the 'Statistics.Sample' module.
-- Where this is the case, the alternatives are noted.
module Statistics.Sample.Powers
(
-- * Types
Powers
-- * Constructor
, powers
-- * Descriptive functions
, order
, count
, sum
-- * Statistics of location
, mean
-- * Statistics of dispersion
, variance
, stdDev
, varianceUnbiased
-- * Functions over central moments
, centralMoment
, skewness
, kurtosis
-- * References
-- $references
) where
import Control.Monad.ST
import Data.Aeson (FromJSON, ToJSON)
import Data.Binary (Binary(..))
import Data.Data (Data, Typeable)
import Data.Vector.Binary ()
import Data.Vector.Unboxed ((!))
import GHC.Generics (Generic)
import Numeric.SpecFunctions (choose)
import Prelude hiding (sum)
import Statistics.Function (indexed)
import qualified Data.Vector as V
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Storable as SV
import qualified Data.Vector.Unboxed as U
import qualified Data.Vector.Unboxed.Mutable as MU
import qualified Statistics.Sample.Internal as S
newtype Powers = Powers (U.Vector Double)
deriving (Eq, Read, Show, Typeable, Data, Generic)
instance FromJSON Powers
instance ToJSON Powers
instance Binary Powers where
put (Powers v) = put v
get = fmap Powers get
-- | O(/n/) Collect the /n/ simple powers of a sample.
--
-- Functions computed over a sample's simple powers require at least a
-- certain number (or /order/) of powers to be collected.
--
-- * To compute the /k/th 'centralMoment', at least /k/ simple powers
-- must be collected.
--
-- * For the 'variance', at least 2 simple powers are needed.
--
-- * For 'skewness', we need at least 3 simple powers.
--
-- * For 'kurtosis', at least 4 simple powers are required.
--
-- This function is subject to stream fusion.
powers :: G.Vector v Double =>
Int -- ^ /n/, the number of powers, where /n/ >= 2.
-> v Double
-> Powers
powers k sample
| k < 2 = error "Statistics.Sample.powers: too few powers"
| otherwise = runST $ do
acc <- MU.replicate l 0
G.forM_ sample $ \x ->
let loop !i !xk
| i == l = return ()
| otherwise = do MU.write acc i . (+ xk) =<< MU.read acc i
loop (i+1) (xk * x)
in loop 0 1
fmap Powers $ U.unsafeFreeze acc
where
l = k + 1
{-# SPECIALIZE powers :: Int -> U.Vector Double -> Powers #-}
{-# SPECIALIZE powers :: Int -> V.Vector Double -> Powers #-}
{-# SPECIALIZE powers :: Int -> SV.Vector Double -> Powers #-}
-- | The order (number) of simple powers collected from a 'sample'.
order :: Powers -> Int
order (Powers pa) = U.length pa - 1
-- | Compute the /k/th central moment of a sample. The central
-- moment is also known as the moment about the mean.
centralMoment :: Int -> Powers -> Double
centralMoment k p@(Powers pa)
| k < 0 || k > order p =
error ("Statistics.Sample.Powers.centralMoment: "
++ "invalid argument")
| k == 0 = 1
| otherwise = (/n) . S.sum . U.map go . indexed . U.take (k+1) $ pa
where
go (i , e) = (k `choose` i) * ((-m) ^ (k-i)) * e
n = U.head pa
m = mean p
-- | Maximum likelihood estimate of a sample's variance. Also known
-- as the population variance, where the denominator is /n/. This is
-- the second central moment of the sample.
--
-- This is less numerically robust than the variance function in the
-- 'Statistics.Sample' module, but the number is essentially free to
-- compute if you have already collected a sample's simple powers.
--
-- Requires 'Powers' with 'order' at least 2.
variance :: Powers -> Double
variance = centralMoment 2
-- | Standard deviation. This is simply the square root of the
-- maximum likelihood estimate of the variance.
stdDev :: Powers -> Double
stdDev = sqrt . variance
-- | Unbiased estimate of a sample's variance. Also known as the
-- sample variance, where the denominator is /n/-1.
--
-- Requires 'Powers' with 'order' at least 2.
varianceUnbiased :: Powers -> Double
varianceUnbiased p@(Powers pa)
| n > 1 = variance p * n / (n-1)
| otherwise = 0
where n = U.head pa
-- | Compute the skewness of a sample. This is a measure of the
-- asymmetry of its distribution.
--
-- A sample with negative skew is said to be /left-skewed/. Most of
-- its mass is on the right of the distribution, with the tail on the
-- left.
--
-- > skewness . powers 3 $ U.to [1,100,101,102,103]
-- > ==> -1.497681449918257
--
-- A sample with positive skew is said to be /right-skewed/.
--
-- > skewness . powers 3 $ U.to [1,2,3,4,100]
-- > ==> 1.4975367033335198
--
-- A sample's skewness is not defined if its 'variance' is zero.
--
-- Requires 'Powers' with 'order' at least 3.
skewness :: Powers -> Double
skewness p = centralMoment 3 p * variance p ** (-1.5)
-- | Compute the excess kurtosis of a sample. This is a measure of
-- the \"peakedness\" of its distribution. A high kurtosis indicates
-- that the sample's variance is due more to infrequent severe
-- deviations than to frequent modest deviations.
--
-- A sample's excess kurtosis is not defined if its 'variance' is
-- zero.
--
-- Requires 'Powers' with 'order' at least 4.
kurtosis :: Powers -> Double
kurtosis p = centralMoment 4 p / (v * v) - 3
where v = variance p
-- | The number of elements in the original 'Sample'. This is the
-- sample's zeroth simple power.
count :: Powers -> Int
count (Powers pa) = floor $ U.head pa
-- | The sum of elements in the original 'Sample'. This is the
-- sample's first simple power.
sum :: Powers -> Double
sum (Powers pa) = pa ! 1
-- | The arithmetic mean of elements in the original 'Sample'.
--
-- This is less numerically robust than the mean function in the
-- 'Statistics.Sample' module, but the number is essentially free to
-- compute if you have already collected a sample's simple powers.
mean :: Powers -> Double
mean p@(Powers pa)
| n == 0 = 0
| otherwise = sum p / n
where n = U.head pa
-- $references
--
-- * Besset, D.H. (2000) Elements of statistics.
-- /Object-oriented implementation of numerical methods/
-- ch. 9, pp. 311–331.
-- <http://www.elsevier.com/wps/product/cws_home/677916>
--
-- * Anderson, G. (2009) Compute /k/th central moments in one
-- pass. /quantblog/. <http://quantblog.wordpress.com/2009/02/07/compute-kth-central-moments-in-one-pass/>
|