File: Simulate.hs

package info (click to toggle)
bali-phy 4.0~beta16%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 15,192 kB
  • sloc: cpp: 119,288; xml: 13,482; haskell: 9,722; python: 2,930; yacc: 1,329; perl: 1,169; lex: 904; sh: 343; makefile: 26
file content (78 lines) | stat: -rw-r--r-- 1,721 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import Probability.Distribution.PairwiseAlignment
import Bio.Alignment
import Bio.Alphabet
import Effect
import IModel
import MCMC
import Probability
import SModel
import Tree
import Tree.Newick
import Data.IntMap (IntMap)
import qualified Data.IntMap as IntMap
import qualified Data.JSON as J
import Probability.Logger
import qualified Data.Text.IO as T
import Data.Array
import System.Environment
import System.IO

-- Read the tree from a file
getTree args = do
  let (filename:_) = args

  rtree <- dropInternalLabels <$> readBranchLengthTree filename

  return rtree

getStartLength args = read startLength' :: Int
    where (_:startLength':_) = args

-- Sample substitution model parameters and define the substitution model
getSmodel = do
  kappaPur <- sample $ logNormal (log 2) 0.25

  kappaPyr <- sample $ logNormal (log 2) 0.25

  pi <- sample $ symmetricDirichletOn (letters dna) 2

  return $ tn93' dna kappaPur kappaPyr pi

-- Get the indel model
getImodel = do
  let mu = 0.05
      lambda = mu
      meanLength = 5

  return $ LongIndels mu lambda meanLength

model rootedTree startLength = do

  smodel <- getSmodel

  imodel <- getImodel

  -- Sample the sequences and their alignment
  alignment <- sample $ IndelsOnTree rootedTree imodel startLength

  -- Sample ancestral sequence STATES
  sequences <- sample $ phyloCTMC rootedTree alignment smodel 1

  -- Return the AlignedCharacterData
  return $ align alignment sequences


main = do
  -- 1. Read the tree and get the starting sequence length
  args <- getArgs

  rootedTree <- getTree args

  let startLength = getStartLength args

  alignedSequences <- runRandomLazy $ model rootedTree startLength

  T.putStr $ toFasta $ alignedSequences