File: skyline.R

package info (click to toggle)
r-cran-ape 5.8-1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,676 kB
  • sloc: ansic: 7,676; cpp: 116; sh: 17; makefile: 2
file content (149 lines) | stat: -rw-r--r-- 3,286 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
## skyline.R (2002-09-12)

##   Methods to construct skyline objects (data underlying skyline plot)

## Copyright 2002 Korbinian Strimmer

## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.

skyline <- function(x, ...) UseMethod("skyline")

# input: phylogenetic tree
skyline.phylo <- function(x, ...)
{
  if (!inherits(x, "phylo"))
    stop("object \"x\" is not of class \"phylo\"")

  skyline(coalescent.intervals(x), ...)
}

# input: coalescent intervals and epsilon
skyline.coalescentIntervals <- function(x, epsilon=0, ...)
{
  if (!inherits(x, "coalescentIntervals"))
    stop("object \"x\" is not of class \"coalescentIntervals\"")

  if (epsilon < 0)
  {
    eps <- find.skyline.epsilon(x, ...)
  }
  else
    eps <- epsilon

  skyline(collapsed.intervals(x, epsilon=eps), ...)
}


# input: collapsed intervals
skyline.collapsedIntervals <- function(x, old.style=FALSE, ...)
{
  if (!inherits(x, "collapsedIntervals"))
    stop("object \"x\" is not of class \"collapsedIntervals\"")

  link <- x$collapsed.interval
  params <- x$collapsed.interval.count
  l <- x$lineages
  w <- x$interval.length

  b <- choose(l,2) # binomial coefficients

  sg <- rep(0,params)   # sizes of collapsed intervals
  cg <- rep(0,params)   # coalescent events in interval

  if(old.style)
    ng <- rep(0,params) # lineages at beginning of an in interval
  else
  {
    ng <- rep(0,params) # sum of classic skp estimates in an interval
    m.classic <- w*b
  }

  for (i in 1:params)
  {
    group <- link==i
    sgr <- w[group]
    sg[[i]] <- sum(sgr)
    cg[[i]] <- length(sgr)

    if(old.style)
      ng[[i]] <- l[group][[1]]
    else
      ng[[i]] <- sum(m.classic[group])
  }

  # generalized skp estimate
  t <- cumsum(sg)
  if (old.style)
    m <- sg*(ng*(ng-cg)/(2.0*cg) )
  else
    m <- ng/cg

  # log-likelihood
  logL <- sum(log(b/m[link]) - b/m[link]*w)

  # AICc corrected log-likelihood
  K <- x$collapsed.interval.count
  S <- x$interval.count
  if (S-K > 1)
    logL.AICc <- logL - K- K*(K+1)/(S-K-1)
  else
    logL.AICc <- NA

  obj <- list(
    time=t,
    interval.length=sg,
    population.size=m,
    parameter.count=length(t),
    epsilon = x$epsilon,
    logL = logL,
    logL.AICc = logL.AICc
  )
  class(obj) <- "skyline"
  return(obj)
}

# grid search for finding optimal epsilon parameter
find.skyline.epsilon <- function(ci, GRID=1000, MINEPS=1e-6, ...)
{
  # Why MINEPS?
  # Because most "clock-like" trees are not properly
  # clock-like for a variety of reasons, i.e. the heights
  # of the tips are not exactly zero.

  cat("Searching for the optimal epsilon... ")

  # a grid search is a naive way but still effective of doing this ...

  size <- ci$interval.count
  besteps <- ci$total.depth
  eps <- besteps

  cli <- collapsed.intervals(ci,eps)
  skpk <- skyline(cli, ...)
  bestaicc <- skpk$ logL.AICc
  params <- skpk$parameter.count

  delta <- besteps/GRID

  eps <- eps-delta
  while(eps > MINEPS)
  {
    cli <- collapsed.intervals(ci,eps)
    skpk <- skyline(cli, ...)
    aicc <- skpk$ logL.AICc
    params <- skpk$parameter.count

    if (aicc > bestaicc && params < size-1)
    {
      besteps <- eps
      bestaicc <- aicc
    }
    eps <- eps-delta
  }

   cat("epsilon =", besteps, "\n")

  besteps
}