File: residuals.R

package info (click to toggle)
r-bioc-glmgampoi 1.2.0%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 704 kB
  • sloc: cpp: 523; ansic: 114; sh: 13; makefile: 2
file content (82 lines) | stat: -rw-r--r-- 3,534 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

#' Extract Residuals of Gamma Poisson Model
#'
#' @param object a fit of type `glmGamPoi`. It is usually produced with a call to
#'   `glm_gp()`.
#' @param type the type of residual that is calculated. See details for more information.
#'   Default: `"deviance"`.
#' @param ... currently ignored.
#'
#' @details
#' This method can calculate a range of different residuals:
#' \describe{
#'   \item{deviance}{The deviance for the Gamma-Poisson model is
#'     \deqn{dev = 2 * (1/theta * log((1 + m * theta) / (1 + y * theta)) - y log((m + y * theta) / (y + y * m * theta)))}
#'   and the residual accordingly is
#'     \deqn{res = sign(y - m) sqrt(dev).}
#'   }
#'   \item{pearson}{The Pearson residual is \eqn{res = (y - m) / sqrt(m + m^2 * theta)}}
#'   \item{randomized_quantile}{The randomized quantile residual was originally developed
#'   by Dunn & Smyth, 1995. Please see that publication or [statmod::qresiduals()] for more
#'   information.}
#'   \item{working}{The working residuals are \eqn{res = (y - m) / m}.}
#'   \item{response}{The response residuals are \eqn{res = y - m}}
#' }
#'
#' @return a matrix with the same size as `fit$data`. If `fit$data` contains a `DelayedArray` than the
#'   result will be a `DelayedArray` as well.
#'
#' @seealso [glm_gp()] and `stats::residuals.glm()
#' @export
residuals.glmGamPoi <- function(object, type = c("deviance", "pearson", "randomized_quantile", "working", "response"), ...){
  type <- match.arg(type, c("deviance", "pearson", "randomized_quantile", "working", "response"))
  Y <- assay(object$data)
  if(type == "deviance"){
    make_resid_hdf5_mat <- is_on_disk.glmGamPoi(object)
    if(! make_resid_hdf5_mat){
      compute_gp_deviance_residuals_matrix(Y, object$Mu, object$overdispersions)
    }else{
      delayed_matrix_apply_block(Y, object$Mu, object$overdispersions, compute_gp_deviance_residuals_matrix)
    }
  }else if(type == "pearson"){
    (Y - object$Mu) / sqrt(object$Mu + multiply_vector_to_each_column(object$Mu^2, object$overdispersions))
  }else if(type == "randomized_quantile"){
    make_resid_hdf5_mat <- is_on_disk.glmGamPoi(object)
    if(! make_resid_hdf5_mat){
      qres.gampoi(Y, object$Mu, object$overdispersions)
    }else{
      delayed_matrix_apply_block(Y, object$Mu, object$overdispersions, qres.gampoi)
    }
  }else if(type == "working"){
    (Y - object$Mu) / object$Mu
  }else if(type == "response"){
    Y - object$Mu
  }
}


qres.gampoi <- function(Y, Mu, dispersion){
  tmp <- vapply(seq_len(ncol(Y)), function(idx){
    a <- pnbinom(Y[, idx] - 1, mu = Mu[, idx], size = 1/dispersion)
    b <- pnbinom(Y[, idx], mu = Mu[, idx], size = 1/dispersion)
    rbind(a, b)
  }, FUN.VALUE = matrix(0, ncol = nrow(Y), nrow = 2))

  u <- runif(n = length(Y), min = pmin(tmp[1,,], tmp[2,,]), max = pmax(tmp[1,,], tmp[2,,]))
  res <- qnorm(u)

  # Get rid of infinite value by sampling on the log scale
  # This is technically wrong, but shouldn't matter.
  inf_res <- which(is.infinite(res))
  a_alt <- pnbinom(Y[inf_res] - 1, mu = Mu[inf_res], size = 1/dispersion[(inf_res - 1) %% nrow(Y) + 1],
                   log.p = TRUE, lower.tail = Mu[inf_res] > Y[inf_res])
  b_alt <- pnbinom(Y[inf_res], mu = Mu[inf_res], size = 1/dispersion[(inf_res - 1) %% nrow(Y) + 1],
                   log.p = TRUE, lower.tail = Mu[inf_res] > Y[inf_res])
  u_alt <- runif(n = length(inf_res), min = pmin(a_alt, b_alt), max = pmax(a_alt, b_alt))
  res[inf_res] <- qnorm(u_alt, log.p = TRUE, lower.tail = Mu[inf_res] > Y[inf_res])

  array(res, dim(Y))
}