File: gpbinom.R

package info (click to toggle)
r-cran-poissonbinomial 1.2.7-1
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 736 kB
sloc: cpp: 719; makefile: 3
file content (338 lines) | stat: -rw-r--r-- 13,762 bytes
#'@name GenPoissonBinomial-Distribution
#'
#'@title The Generalized Poisson Binomial Distribution
#'
#'@description
#'Density, distribution function, quantile function and random generation for
#'the generalized Poisson binomial distribution with probability vector
#'\code{probs}.
#'
#'@param x           Either a vector of observed sums or NULL. If NULL,
#'                   probabilities of all possible observations are
#'                   returned.
#'@param p           Vector of probabilities for computation of quantiles.
#'@param n           Number of observations. If \code{length(n) > 1}, the
#'                   length is taken to be the number required.
#'@param probs       Vector of probabilities of success of each Bernoulli
#'                   trial.
#'@param val_p       Vector of values that each trial produces with probability
#'                   in \code{probs}.
#'@param val_q       Vector of values that each trial produces with probability
#'                   in \code{1 - probs}.
#'@param method      Character string that specifies the method of computation
#'                   and must be one of \code{"DivideFFT"}, \code{"Convolve"}, 
#'                   \code{"Characteristic"}, \code{"Normal"} or
#'                   \code{"RefinedNormal"} (abbreviations are allowed).
#'@param wts         Vector of non-negative integer weights for the input
#'                   probabilities.
#'@param log,log.p   Logical value indicating if results are given as
#'                   logarithms.
#'@param lower.tail  Logical value indicating if results are \eqn{P[X \leq x]}
#'                   (if \code{TRUE}; default) or \eqn{P[X > x]} (if 
#'                   \code{FALSE}).
#'@param generator   Character string that specifies the random number
#'                   generator and must either be \code{"Sample"} or
#'                   \code{"Bernoulli"} (abbreviations are allowed).
#'
#'@details
#'See the references for computational details. The \emph{Divide and Conquer}
#'(\code{"DivideFFT"}) and \emph{Direct Convolution} (\code{"Convolve"})
#'algorithms are derived and described in Biscarri, Zhao & Brunner (2018). They
#'have been modified for use with the generalized Poisson binomial
#'distribution. The
#'\emph{Discrete Fourier Transformation of the Characteristic Function}
#'(\code{"Characteristic"}) is derived in Zhang, Hong & Balakrishnan (2018),
#'the \emph{Normal Approach} (\code{"Normal"}) and the
#'\emph{Refined Normal Approach} (\code{"RefinedNormal"}) are described in Hong
#'(2013). They were slightly adapted for the generalized Poisson binomial
#'distribution.
#'
#'In some special cases regarding the values of \code{probs}, the \code{method}
#'parameter is ignored (see Introduction vignette).
#'
#'Random numbers can be generated in two ways. The \code{"Sample"} method
#'uses \code{R}'s \code{sample} function to draw random values according to
#'their probabilities that are calculated by \code{dgpbinom}. The
#'\code{"Bernoulli"} procedure ignores the \code{method} parameter and
#'simulates Bernoulli-distributed random numbers according to the probabilities
#'in \code{probs} and sums them up. It is a bit slower than the \code{"Sample"}
#'generator, but may yield better results, as it allows to obtain observations
#'that cannot be generated by the \code{"Sample"} procedure, because
#'\code{dgpbinom} may compute 0-probabilities, due to rounding, if the length
#'of \code{probs} is large and/or its values contain a lot of very small
#'values.
#'
#'@return
#'\code{dgpbinom} gives the density, \code{pgpbinom} computes the distribution
#'function, \code{qgpbinom} gives the quantile function and \code{rgpbinom}
#'generates random deviates.
#'
#'For \code{rgpbinom}, the length of the result is determined by \code{n}, and
#'is the lengths of the numerical arguments for the other functions.
#'
#'@section References:
#'Hong, Y. (2018). On computing the distribution function for the Poisson
#'    binomial distribution. \emph{Computational Statistics & Data Analysis},
#'    \strong{59}, pp. 41-51. \doi{10.1016/j.csda.2012.10.006}
#'
#'Biscarri, W., Zhao, S. D. and Brunner, R. J. (2018) A simple and fast method
#'    for computing the Poisson binomial distribution.
#'    \emph{Computational Statistics and Data Analysis}, \strong{31}, pp.
#'    216–222. \doi{10.1016/j.csda.2018.01.007}
#'    
#'Zhang, M., Hong, Y. and Balakrishnan, N. (2018). The generalized 
#'    Poisson-binomial distribution and the computation of its distribution
#'    function. \emph{Journal of Statistical Computational and Simulation},
#'    \strong{88}(8), pp. 1515-1527. \doi{10.1080/00949655.2018.1440294}
#'    
#'@examples
#'set.seed(1)
#'pp <- c(1, 0, runif(10), 1, 0, 1)
#'qq <- seq(0, 1, 0.01)
#'va <- rep(5, length(pp))
#'vb <- 1:length(pp)
#'
#'dgpbinom(NULL, pp, va, vb, method = "DivideFFT")
#'pgpbinom(75:100, pp, va, vb, method = "DivideFFT")
#'qgpbinom(qq, pp, va, vb, method = "DivideFFT")
#'rgpbinom(100, pp, va, vb, method = "DivideFFT")
#'
#'dgpbinom(NULL, pp, va, vb, method = "Convolve")
#'pgpbinom(75:100, pp, va, vb, method = "Convolve")
#'qgpbinom(qq, pp, va, vb, method = "Convolve")
#'rgpbinom(100, pp, va, vb, method = "Convolve")
#'
#'dgpbinom(NULL, pp, va, vb, method = "Characteristic")
#'pgpbinom(75:100, pp, va, vb, method = "Characteristic")
#'qgpbinom(qq, pp, va, vb, method = "Characteristic")
#'rgpbinom(100, pp, va, vb, method = "Characteristic")
#'
#'dgpbinom(NULL, pp, va, vb, method = "Normal")
#'pgpbinom(75:100, pp, va, vb, method = "Normal")
#'qgpbinom(qq, pp, va, vb, method = "Normal")
#'rgpbinom(100, pp, va, vb, method = "Normal")
#'
#'dgpbinom(NULL, pp, va, vb, method = "RefinedNormal")
#'pgpbinom(75:100, pp, va, vb, method = "RefinedNormal")
#'qgpbinom(qq, pp, va, vb, method = "RefinedNormal")
#'rgpbinom(100, pp, va, vb, method = "RefinedNormal")
#'
#'@export
dgpbinom <- function(x, probs, val_p, val_q, wts = NULL, method = "DivideFFT", log = FALSE){
  ## preliminary checks
  method <- check.args.GPB(x, probs, val_p, val_q, wts, method)
  
  ## transform input to relevant range
  transf <- transformGPB(x, probs, val_p, val_q, wts)
  
  # if x = NULL, return all possible probabilities
  if(is.null(x)) x <- transf$compl.range
  
  # identify valid 'x' values (invalid ones will have 0-probability)
  idx.valid <- which(x %in% transf$compl.range)
  
  ## compute probabilities
  # vector for storing the probabilities
  d <- double(length(x))
  
  # no computation needed, if there are no valid observations in 'x'
  if(length(idx.valid)){
    # select valid observations in relevant range
    y <- x[idx.valid]
    
    # relevant observations
    idx.inner <- which(y %in% transf$inner.range)
    
    # if no input value is in relevant range, they are impossible (i.e. return 0-probabilities)
    if(length(idx.inner)){
      # transformed input parameters
      n <- transf$n
      probs <- transf$probs
      diffs <- transf$diffs
      
      if(n == 0){
        # 'probs' contains only zeros and ones, i.e. only one possible observation
        d[idx.valid][idx.inner] <- 1
      }else{
        z <- y[idx.inner] - transf$inner.range[1]
        # compute distribution
        if(all(diffs == diffs[1])){
          # all values of 'diffs' are equal, i.e. a multiplied ordinary poisson binomial distribution
          remainder <- z %% diffs[1]
          idx.r <- which(remainder == 0)
          d[idx.valid][idx.inner][idx.r] <- dpbinom((z %/% diffs[1])[idx.r], probs, method = method)
        }else{
          # compute distribution according to 'method'
          d[idx.valid][idx.inner] <- switch(method,
                                            DivideFFT = dgpb_dc(z, probs, diffs, rep(0, n)),
                                            Convolve = dgpb_conv(z, probs, diffs, rep(0, n)),
                                            Characteristic = dgpb_dftcf(z, probs, diffs, rep(0, n)),
                                            Normal = dgpb_na(z, probs, diffs, rep(0, n), FALSE),
                                            RefinedNormal = dgpb_na(z, probs, diffs, rep(0, n), TRUE))
        }
      }
    }
  }
  
  # logarithm, if required
  if(log) d <- log(d)
  
  # return results
  return(d)
}

#'@rdname GenPoissonBinomial-Distribution
#'@export
pgpbinom <- function(x, probs, val_p, val_q, wts = NULL, method = "DivideFFT", lower.tail = TRUE, log.p = FALSE){
  ## preliminary checks
  method <- check.args.GPB(x, probs, val_p, val_q, wts, method)
  
  ## transform input to relevant range
  transf <- transformGPB(x, probs, val_p, val_q, wts)
  
  # if x = NULL, return all possible probabilities
  if(is.null(x)) x <- transf$compl.range
  
  # identify valid 'x' values (invalid ones will have 0-probability)
  idx.valid <- which(x %in% transf$compl.range)
  
  ## compute probabilities
  # vector for storing the probabilities
  d <- rep(as.numeric(!lower.tail), length(x))
  
  # no computation needed, if there are no valid observations in 'x'
  if(length(idx.valid)){
    # select valid observations in relevant range
    y <- x[idx.valid]
    
    # relevant observations
    idx.inner <- which(y %in% transf$inner.range)
    
    if(length(idx.inner)){
      # transformed input parameters
      n <- transf$n
      probs <- transf$probs
      diffs <- transf$diffs
      
      if(n == 0){
        # 'probs' contains only zeros and ones, i.e. only one possible observation
        d[idx.valid][idx.inner] <- as.numeric(lower.tail)
      }else{
        # select and rescale relevant observations
        z <- y[idx.inner] - transf$inner.range[1]
        
        # compute distribution
        if(all(diffs == diffs[1])){
          # all GCD-optimized values of 'diffs' are equal, i.e. a standard binomial distribution
          d[idx.valid][idx.inner] <- ppbinom(z %/% diffs[1], probs, method = method, lower.tail = lower.tail)
        }else{
          # compute distribution according to 'method'
          d[idx.valid][idx.inner] <- switch(method,
                                            DivideFFT = pgpb_dc(z, probs, diffs, rep(0, n), lower.tail),
                                            Convolve = pgpb_conv(z, probs, diffs, rep(0, n), lower.tail),
                                            Characteristic = pgpb_dftcf(z, probs, diffs, rep(0, n), lower.tail),
                                            Normal = pgpb_na(z, probs, diffs, rep(0, n), FALSE, lower.tail),
                                            RefinedNormal = pgpb_na(z, probs, diffs, rep(0, n), TRUE, lower.tail))
        }
      }
    }
    # which valid observations are above relevant range
    idx.above <- which(y > max(transf$inner.range))
    # fill cumulative probabilities of values above the relevant range
    if(length(idx.above)) d[idx.valid][idx.above] <- as.double(lower.tail)
  }
  
  # fill cumulative probabilities of values above complete range
  d[x > max(transf$compl.range)] <- as.double(lower.tail)
  
  # logarithm, if required
  if(log.p) d <- log(d)
  
  # return results
  return(d)
}

#'@rdname GenPoissonBinomial-Distribution
#'@importFrom stats stepfun
#'@export
qgpbinom <- function(p, probs, val_p, val_q, wts = NULL, method = "DivideFFT", lower.tail = TRUE, log.p = FALSE){
  ## preliminary checks
  method <- check.args.GPB(NULL, probs, val_p, val_q, wts, method)
  
  # check if 'q' contains only probabilities
  if(!log.p){
    if(is.null(p) || any(is.na(p) | p < 0 | p > 1))
      stop("'p' must contain real numbers between 0 and 1!")
  }else{
    if(is.null(p) || any(is.na(p) | p > 0))
      stop("'p' must contain real numbers between -Inf and 0!")
  }
  
  ## transform input to relevant range
  transf <- transformGPB(NULL, probs, val_p, val_q, wts)
  probs <- transf$probs
  val_p <- transf$val_p
  val_q <- transf$val_q
  
  ## compute probabilities (does checking for the other variables)
  cdf <- pgpbinom(NULL, probs, val_p, val_q, NULL, method, lower.tail)
  
  # bounds of relevant observations
  first <- min(transf$inner.range)
  last <- max(transf$inner.range)
  
  # length of cdf
  len <- length(cdf)
  
  # logarithm, if required
  if(log.p) p <- exp(p)
  
  ## compute quantiles
  # handle quantiles between 0 and 1
  if(lower.tail) Q <- stepfun(cdf[transf$inner.range - first + 1], c(transf$inner.range, last), right = TRUE)
  else Q <- stepfun(rev(cdf[transf$inner.range - first + 1]), c(last, rev(transf$inner.range)), right = TRUE)
  
  # vector to store results
  res <- Q(p)
  
  # handle quantiles of 0 or 1
  res[p == lower.tail]  <- last
  res[p == !lower.tail] <- first
  
  # return results
  return(res)
}

#'@rdname GenPoissonBinomial-Distribution
#'@importFrom stats runif rbinom
#'@export
rgpbinom <- function(n, probs, val_p, val_q, wts = NULL, method = "DivideFFT", generator = "Sample"){
  ## preliminary checks
  method <- check.args.GPB(NULL, probs, val_p, val_q, wts, method)
  
  len <- length(n)
  if(len > 1) n <- len
  
  # check if 'n' is NULL
  if(is.null(n)) stop("'n' must not be NULL!")
  
  ## expand 'probs', 'val_p' and 'val_q' according to the counts in 'wts'
  # if 'wts' is NULL, set it to be a vector of ones
  if(is.null(wts))
    wts <- rep(1, length(probs))
  
  # expand 'probs', 'val_p', 'val_q'
  probs <- rep(probs, wts)
  val_p <- rep(val_p, wts)
  val_q <- rep(val_q, wts)
  
  # make sure that the value of 'generator' matches one of the implemented procedures
  generator <- match.arg(generator, c("Sample", "Bernoulli"))
  
  # generate random numbers
  res <- switch(generator, Sample    = sample(sum(pmin(val_p, val_q)):sum(pmax(val_p, val_q)), n, TRUE, dgpbinom(NULL, probs, val_p, val_q, NULL, method)),
                           Bernoulli = rgpb_bernoulli(n, probs, val_p, val_q))
  
  # return results
  return(res)
}