1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
|
{-# LANGUAGE Rank2Types #-}
-- from base
import qualified Control.Exception as E
import Control.Monad (when, liftM2)
import Data.List (delete, sort, nub)
import Text.Printf (printf)
import Text.Show.Functions ()
-- from hspec
import Test.Hspec (hspec, describe, it, pendingWith, shouldBe, Spec)
import Test.Hspec.QuickCheck (prop)
-- from HUnit
import Test.HUnit (Assertion, assertFailure)
-- from QuickCheck
import Test.QuickCheck (Property, Arbitrary(..), Gen, forAll)
-- from this package
import Data.Clustering.Hierarchical
import qualified Data.Clustering.Hierarchical.Internal.DistanceMatrix as DM
import qualified Data.Clustering.Hierarchical.Internal.Optimal as O
main :: IO ()
main = hspec $ do
test_cutAt
test_dendrogram
test_cutAt :: Spec
test_cutAt =
describe "cutAt" $ do
let dendro :: Dendrogram Char
dendro = Branch 0.8 d_0_8_left d_0_8_right
d_0_8_left = Branch 0.5 d_0_5_left d_0_5_right
d_0_5_left = Branch 0.2 d_0_2_left d_0_2_right
d_0_2_left = Leaf 'A'
d_0_2_right = Leaf 'B'
d_0_5_right = Leaf 'C'
d_0_8_right = Leaf 'D'
let testFor threshold expected =
it (printf "works for 'dendro' with threshold %0.1f" threshold) $
dendro `cutAt` threshold `shouldBe` expected
testFor 0.9 [dendro]
testFor 0.8 [dendro]
testFor 0.7 [d_0_8_left, d_0_8_right]
testFor 0.5 [d_0_8_left, d_0_8_right]
testFor 0.4 [d_0_5_left, d_0_5_right, d_0_8_right]
testFor 0.2 [d_0_5_left, d_0_5_right, d_0_8_right]
testFor 0.1 [d_0_2_left, d_0_2_right, d_0_5_right, d_0_8_right]
test_dendrogram :: Spec
test_dendrogram = do
describe "Optimal's singleLinkage" $ do
basicDendrogramTests O.singleLinkage
prop "really is single linkage" $
propCorrectLinkage O.singleLinkage singleLink
describe "Optimal's completeLinkage" $ do
basicDendrogramTests O.completeLinkage
prop "really is complete linkage" $
propCorrectLinkage O.completeLinkage completeLink
describe "DistanceMatrix's singleLinkage" $ do
basicDendrogramTests DM.singleLinkage
prop "really is single linkage" $
propCorrectLinkage DM.singleLinkage singleLink
describe "DistanceMatrix's completeLinkage" $ do
basicDendrogramTests DM.completeLinkage
prop "really is complete linkage" $
propCorrectLinkage DM.completeLinkage completeLink
describe "DistanceMatrix's upgma" $ do
basicDendrogramTests DM.upgma
prop "really is UPGMA" $
propCorrectLinkage DM.upgma upgma
describe "DistanceMatrix's fakeAverageLinkage" $ do
basicDendrogramTests DM.fakeAverageLinkage
describe "Optimal and DistanceMatrix" $ do
let test f1 f2 = forAll nonNullLists $ \ps ->
f1 ps euclideanDist ==== f2 ps euclideanDist
prop "agree on singleLinkage" $ test O.singleLinkage DM.singleLinkage
it "agree on completeLinkage" $
pendingWith "This doesn't work because CLINK doesn't \
\always give the best complete linkage."
basicDendrogramTests :: (forall a. [a] -> (a -> a -> Distance) -> Dendrogram a) -> Spec
basicDendrogramTests f = do
it "fails for an empty input" $
assertErrors (f [] (\_ _ -> zero))
it "works for one element" $
Leaf () == f [()] undefined
prop "always returns the elements we gave" $
forAll nonNullLists $ \points ->
elements (f points euclideanDist) `isPermutationOf` points
prop "works for examples where all elements have the same distance" $
\fixedDist ->
forAll nonNullLists $ \xs' ->
let xs = nub xs'
okay :: Dendrogram Char -> [Char] -> Maybe [Char]
okay (Leaf z) ys | z `elem` ys = Just (delete z ys)
okay (Branch d l r) ys | d ~= fixedDist = okay l ys >>= okay r
okay _ _ = Nothing
dist x y | x == y = error "shouldn't calculate (dist x x)"
| otherwise = fixedDist
in okay (f xs dist) xs == Just []
----------------------------------------------------------------------
type P = (Double, Double)
propCorrectLinkage :: ([P] -> (P -> P -> Distance) -> Dendrogram P)
-> (D P -> [P] -> [P] -> Distance)
-> Property
propCorrectLinkage f link =
forAll nonNullLists $ \xs -> correctLinkage link d (f xs d)
where d = euclideanDist
type D a = a -> a -> Distance
correctLinkage :: (D a -> [a] -> [a] -> Distance) -> D a -> Dendrogram a -> Bool
correctLinkage link dist = go
where
go (Branch d l r) = go l && go r &&
link dist (elements l) (elements r) ~= d
go (Leaf _) = True
singleLink, completeLink, upgma :: D a -> [a] -> [a] -> Distance
singleLink dist xs ys = minimum [x `dist` y | x <- xs, y <- ys]
completeLink dist xs ys = maximum [x `dist` y | x <- xs, y <- ys]
upgma dist xs ys = sum [x `dist` y | x <- xs, y <- ys] /
fromIntegral (length xs * length ys)
----------------------------------------------------------------------
nonNullLists :: Arbitrary a => Gen [a]
nonNullLists = liftM2 (:) arbitrary arbitrary
isPermutationOf :: Ord a => [a] -> [a] -> Bool
isPermutationOf xs ys = sort xs == sort ys
euclideanDist :: P -> P -> Double
euclideanDist (x1,y1) (x2,y2) = sqrt $ sq (x1-x2) + sq (y1-y2)
where sq x = x * x
(~=) :: Double -> Double -> Bool
a ~= b = abs (a - b) < 1e-5
zero :: Double
zero = 0
assertErrors :: a -> Assertion
assertErrors x = do
b <- E.catch (E.evaluate x >> return True)
(\(E.ErrorCall _) -> return False {- Ok -})
when b $ assertFailure "Didn't raise an 'error'."
-- | Compare two dendrograms without being concerned about
-- permutations.
(====) :: Eq a => Dendrogram a -> Dendrogram a -> Bool
Leaf x1 ==== Leaf x2 = x1 == x2
Branch d1 l1 r1 ==== Branch d2 l2 r2 = d1 ~= d2 && ((l1 ==== l2 && r1 ==== r2) ||
(l1 ==== r2 && r1 ==== l2))
_ ==== _ = False
|