File: runtests.hs

package info (click to toggle)
haskell-hierarchical-clustering 0.4.7-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 112 kB
  • sloc: haskell: 488; makefile: 4
file content (175 lines) | stat: -rw-r--r-- 6,166 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
{-# LANGUAGE Rank2Types #-}

-- from base
import qualified Control.Exception as E
import Control.Monad (when, liftM2)
import Data.List (delete, sort, nub)
import Text.Printf (printf)
import Text.Show.Functions ()

-- from hspec
import Test.Hspec (hspec, describe, it, pendingWith, shouldBe, Spec)
import Test.Hspec.QuickCheck (prop)

-- from HUnit
import Test.HUnit (Assertion, assertFailure)

-- from QuickCheck
import Test.QuickCheck (Property, Arbitrary(..), Gen, forAll)

-- from this package
import Data.Clustering.Hierarchical
import qualified Data.Clustering.Hierarchical.Internal.DistanceMatrix as DM
import qualified Data.Clustering.Hierarchical.Internal.Optimal as O


main :: IO ()
main = hspec $ do
         test_cutAt
         test_dendrogram

test_cutAt :: Spec
test_cutAt =
    describe "cutAt" $ do
      let dendro      :: Dendrogram Char
          dendro      = Branch 0.8 d_0_8_left d_0_8_right
          d_0_8_left  =   Branch 0.5 d_0_5_left d_0_5_right
          d_0_5_left  =     Branch 0.2 d_0_2_left d_0_2_right
          d_0_2_left  =       Leaf 'A'
          d_0_2_right =       Leaf 'B'
          d_0_5_right =     Leaf 'C'
          d_0_8_right =   Leaf 'D'

      let testFor threshold expected =
              it (printf "works for 'dendro' with threshold %0.1f" threshold) $
                 dendro `cutAt` threshold `shouldBe` expected

      testFor 0.9 [dendro]
      testFor 0.8 [dendro]
      testFor 0.7 [d_0_8_left, d_0_8_right]
      testFor 0.5 [d_0_8_left, d_0_8_right]
      testFor 0.4 [d_0_5_left, d_0_5_right, d_0_8_right]
      testFor 0.2 [d_0_5_left, d_0_5_right, d_0_8_right]
      testFor 0.1 [d_0_2_left, d_0_2_right, d_0_5_right, d_0_8_right]

test_dendrogram :: Spec
test_dendrogram = do
    describe "Optimal's singleLinkage" $ do
      basicDendrogramTests O.singleLinkage
      prop "really is single linkage" $
        propCorrectLinkage O.singleLinkage singleLink

    describe "Optimal's completeLinkage" $ do
      basicDendrogramTests O.completeLinkage
      prop "really is complete linkage" $
        propCorrectLinkage O.completeLinkage completeLink

    describe "DistanceMatrix's singleLinkage" $ do
      basicDendrogramTests DM.singleLinkage
      prop "really is single linkage" $
        propCorrectLinkage DM.singleLinkage singleLink

    describe "DistanceMatrix's completeLinkage" $ do
      basicDendrogramTests DM.completeLinkage
      prop "really is complete linkage" $
        propCorrectLinkage DM.completeLinkage completeLink

    describe "DistanceMatrix's upgma" $ do
      basicDendrogramTests DM.upgma
      prop "really is UPGMA" $
        propCorrectLinkage DM.upgma upgma

    describe "DistanceMatrix's fakeAverageLinkage" $ do
      basicDendrogramTests DM.fakeAverageLinkage

    describe "Optimal and DistanceMatrix" $ do
      let test f1 f2 = forAll nonNullLists $ \ps ->
                       f1 ps euclideanDist ==== f2 ps euclideanDist
      prop "agree on singleLinkage"   $ test O.singleLinkage DM.singleLinkage
      it "agree on completeLinkage" $
         pendingWith "This doesn't work because CLINK doesn't \
                     \always give the best complete linkage."


basicDendrogramTests :: (forall a. [a] -> (a -> a -> Distance) -> Dendrogram a) -> Spec
basicDendrogramTests f = do
  it "fails for an empty input" $
     assertErrors (f [] (\_ _ -> zero))
  it "works for one element" $
     Leaf () == f [()] undefined
  prop "always returns the elements we gave" $
     forAll nonNullLists $ \points ->
       elements (f points euclideanDist) `isPermutationOf` points
  prop "works for examples where all elements have the same distance" $
     \fixedDist ->
     forAll nonNullLists $ \xs' ->
         let xs = nub xs'

             okay :: Dendrogram Char -> [Char] -> Maybe [Char]
             okay (Leaf z)       ys | z `elem` ys    = Just (delete z ys)
             okay (Branch d l r) ys | d ~= fixedDist = okay l ys >>= okay r
             okay _ _ = Nothing

             dist x y | x == y    = error "shouldn't calculate (dist x x)"
                      | otherwise = fixedDist

         in okay (f xs dist) xs == Just []

----------------------------------------------------------------------

type P = (Double, Double)

propCorrectLinkage :: ([P] -> (P -> P -> Distance) -> Dendrogram P)
                   -> (D P -> [P] -> [P] -> Distance)
                   -> Property
propCorrectLinkage f link =
    forAll nonNullLists $ \xs -> correctLinkage link d (f xs d)
        where d = euclideanDist

type D a = a -> a -> Distance

correctLinkage :: (D a -> [a] -> [a] -> Distance) -> D a -> Dendrogram a -> Bool
correctLinkage link dist = go
    where
      go (Branch d l r) = go l && go r &&
                          link dist (elements l) (elements r) ~= d
      go (Leaf _) = True

singleLink, completeLink, upgma :: D a -> [a] -> [a] -> Distance
singleLink   dist xs ys = minimum [x `dist` y | x <- xs, y <- ys]
completeLink dist xs ys = maximum [x `dist` y | x <- xs, y <- ys]
upgma        dist xs ys = sum [x `dist` y | x <- xs, y <- ys] /
                          fromIntegral (length xs * length ys)

----------------------------------------------------------------------

nonNullLists :: Arbitrary a => Gen [a]
nonNullLists = liftM2 (:) arbitrary arbitrary

isPermutationOf :: Ord a => [a] -> [a] -> Bool
isPermutationOf xs ys = sort xs == sort ys

euclideanDist :: P -> P -> Double
euclideanDist (x1,y1) (x2,y2) = sqrt $ sq (x1-x2) + sq (y1-y2)
    where sq x = x * x

(~=) :: Double -> Double -> Bool
a ~= b = abs (a - b) < 1e-5

zero :: Double
zero = 0

assertErrors :: a -> Assertion
assertErrors x = do
    b <- E.catch (E.evaluate x >> return True)
                 (\(E.ErrorCall _) -> return False {- Ok -})
    when b $ assertFailure "Didn't raise an 'error'."


-- | Compare two dendrograms without being concerned about
-- permutations.
(====) :: Eq a => Dendrogram a -> Dendrogram a -> Bool
Leaf x1         ==== Leaf x2         = x1 == x2
Branch d1 l1 r1 ==== Branch d2 l2 r2 = d1 ~= d2 && ((l1 ==== l2 && r1 ==== r2) ||
                                                    (l1 ==== r2 && r1 ==== l2))
_ ==== _ = False