File: DirichletProcess.hs

package info (click to toggle)
bali-phy 4.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 15,392 kB
  • sloc: cpp: 120,442; xml: 13,966; haskell: 9,975; python: 2,936; yacc: 1,328; perl: 1,169; lex: 912; sh: 343; makefile: 26
file content (131 lines) | stat: -rw-r--r-- 5,094 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
module Probability.Distribution.DirichletProcess where

import Probability.Random
import Control.Monad.IO.Class

import Probability.Distribution.List
import Probability.Distribution.Normal
import Probability.Distribution.Bernoulli
import Probability.Distribution.Beta
import Probability.Distribution.Categorical
import Probability.Distribution.Exponential
import Probability.Distribution.Uniform
import Numeric.Log -- for log1p

import Foreign.Vector

import Control.DeepSeq
import MCMC -- for GibbsSampleCategorical

-- PROBLEM: Does the new stick' yield the same results in the example BES analysis?
--          It seems like stick' allows rather more categories to exist at a time.
--          With the original "stick", its usually just 1 or 2 categories.

-- Select one element from the (possibly infinite) list of values.
-- This version performs `n` bernoulli choices to select category `n`.
stick :: [Double] -> [a] -> Random a
stick (p:ps) (x:xs) = do keep <- sample $ bernoulli p
                         if keep == 1 then
                             return x
                         else
                             stick ps xs

-- This version performs 1 exponential sample to select category n.
--stick' ps xs = exponential 1.0 <&> negate <&> go_log ps xs  where
--    go_log (p:ps) (x:xs) q  = let q' = q - log1p(-p)
--                              in if q' > 0.0
--                                 then x
--                                 else go_log ps xs q'


--
dpm_lognormal n alpha mean_dist noise_dist = dpm n alpha sample_dist
    where sample_dist = do mean <- sample $ mean_dist
                           sigma_over_mu <- sample $ noise_dist
                           let sample_log_normal = do z <- sample $ normal 0 1
                                                      return $ mean*safe_exp (z*sigma_over_mu)
                           return sample_log_normal

-- In theory we could implement `dpm` in terms of `dp`:
--   dpm n alpha sample_dist = sequence $ dp n alpha sample_dist
-- I think the problem with that is that it might not be lazy in n.
-- I need the take to be at the end:
--   liftM (take n) $ sequence $ dp alpha sample_dist

dpm n alpha sample_dist = lazy $ do

  dists  <- sequence $ repeat $ sample $ sample_dist

  breaks <- sequence $ repeat $ sample $ beta 1 alpha

-- stick selects a distribution from the list, and join then samples from the distribution
  sample $ iid n (join $ stick breaks dists)

dp :: Int -> Double -> Random a -> Random [a]
dp n alpha dist = lazy $ do

  atoms  <- sequence $ repeat $ dist

  breaks <- sequence $ repeat $ sample $ beta 1 alpha

  sample $ iid n (stick breaks atoms)

---

normalize v = map (/total) v where total=sum v

do_crp alpha n d = do_crp'' alpha n bins (replicate bins 0) where bins=n+d
do_crp'' alpha 0 bins counts = return []
do_crp'' alpha n bins counts = let inc (c:cs) 0 = (c+1:cs)
                                   inc (c:cs) i = c:(inc cs (i-1))
                                   p alpha counts = normalize (map f counts)
                                   nzeros = length (filter (==0) counts)
                                   f 0 = alpha/fromIntegral nzeros
                                   f i = fromIntegral i
                               in 
                               do c <- sample $ categorical (p alpha counts)
                                  cs <- do_crp'' alpha (n-1) bins (inc counts c) 
                                  return (c:cs)

foreign import bpcall "Distribution:CRP_density" builtin_crp_density :: Double -> Int -> Int -> EVector Int -> LogDouble
crp_density alpha n d z = builtin_crp_density alpha n d (toVector z)
foreign import bpcall "Distribution:sample_CRP" sample_crp_vector :: Double -> Int -> Int -> IO (EVector Int)
sample_crp alpha n d = do v <- sample_crp_vector alpha n d
                          return $ sizedVectorToList v n
ran_sample_crp alpha n d = liftIO $ sample_crp alpha n d

triggeredModifiableList n value effect = let raw_list = mapn n modifiable value
                                             effect' = unsafePerformIO $ effect raw_list
                                             triggered_list = mapn n (withEffect effect') raw_list
                                         in triggered_list

crp_effect n d x = addMove 1 $ TransitionKernel (\c -> mapM_ (\l-> runTK c $ gibbsSampleCategorical (x!!l) (n+d)) [0..n-1])

safe_exp x = if (x < (-20)) then
               exp (-20)
             else if (x > 20) then
               exp 20
             else
               exp x


data CRP = CRP Double Int Int

instance Dist CRP where
    type Result CRP = [Int]
    dist_name _ = "crp"

instance IOSampleable CRP where
    sampleIO (CRP alpha n d) = sample_crp alpha n d

instance HasPdf CRP where
    pdf (CRP alpha n d) = crp_density alpha n d

instance HasAnnotatedPdf CRP where
    annotated_densities dist = make_densities $ pdf dist

instance Sampleable CRP where
    sample dist@(CRP alpha n d) = RanDistribution3 dist (crp_effect n d) (triggeredModifiableList n) (ran_sample_crp alpha n d)

crp = CRP