1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
|
{-# LANGUAGE FlexibleContexts #-}
module Statistics.Test.Internal (
rank
, rankUnsorted
, splitByTags
) where
import Data.Ord
import Data.Vector.Generic ((!))
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Generic.Mutable as M
import Statistics.Function
-- Private data type for unfolding
data Rank v a = Rank {
rankCnt :: {-# UNPACK #-} !Int -- Number of ranks to return
, rankVal :: {-# UNPACK #-} !Double -- Rank to return
, rankNum :: {-# UNPACK #-} !Double -- Current rank
, rankVec :: v a -- Remaining vector
}
-- | Calculate rank of every element of sample. In case of ties ranks
-- are averaged. Sample should be already sorted in ascending order.
--
-- Rank is index of element in the sample, numeration starts from 1.
-- In case of ties average of ranks of equal elements is assigned
-- to each
--
-- >>> rank (==) (fromList [10,20,30::Int])
-- > fromList [1.0,2.0,3.0]
--
-- >>> rank (==) (fromList [10,10,10,30::Int])
-- > fromList [2.0,2.0,2.0,4.0]
rank :: (G.Vector v a, G.Vector v Double)
=> (a -> a -> Bool) -- ^ Equivalence relation
-> v a -- ^ Vector to rank
-> v Double
rank eq vec = G.unfoldr go (Rank 0 (-1) 1 vec)
where
go (Rank 0 _ r v)
| G.null v = Nothing
| otherwise =
case G.length h of
1 -> Just (r, Rank 0 0 (r+1) rest)
n -> go Rank { rankCnt = n
, rankVal = 0.5 * (r*2 + fromIntegral (n-1))
, rankNum = r + fromIntegral n
, rankVec = rest
}
where
(h,rest) = G.span (eq $ G.head v) v
go (Rank n val r v) = Just (val, Rank (n-1) val r v)
{-# INLINE rank #-}
-- | Compute rank of every element of vector. Unlike rank it doesn't
-- require sample to be sorted.
rankUnsorted :: ( Ord a
, G.Vector v a
, G.Vector v Int
, G.Vector v Double
, G.Vector v (Int, a)
)
=> v a
-> v Double
rankUnsorted xs = G.create $ do
-- Put ranks into their original positions
-- NOTE: backpermute will do wrong thing
vec <- M.new n
for 0 n $ \i ->
M.unsafeWrite vec (index ! i) (ranks ! i)
return vec
where
n = G.length xs
-- Calculate ranks for sorted array
ranks = rank (==) sorted
-- Sort vector and retain original indices of elements
(index, sorted)
= G.unzip
$ sortBy (comparing snd)
$ indexed xs
{-# INLINE rankUnsorted #-}
-- | Split tagged vector
splitByTags :: (G.Vector v a, G.Vector v (Bool,a)) => v (Bool,a) -> (v a, v a)
splitByTags vs = (G.map snd a, G.map snd b)
where
(a,b) = G.unstablePartition fst vs
{-# INLINE splitByTags #-}
|