1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
|
#' Cross-validation bandwidths for spatial kernel density estimates
#'
#' Isotropic fixed or global (for adaptive) bandwidth selection for standalone 2D density/intensity
#' based on either unbiased least squares cross-validation (LSCV) or likelihood (LIK) cross-validation.
#'
#' This function implements the bivariate, edge-corrected versions of fixed-bandwidth least squares cross-validation and likelihood cross-validation
#' as outlined in Sections 3.4.3 and 3.4.4 of Silverman (1986) in order to select an optimal fixed smoothing bandwidth. With \code{type = "adaptive"} it may also be used to select the global bandwidth
#' for adaptive kernel density estimates, making use of multi-scale estimation (Davies and Baddeley, 2018) via \code{\link{multiscale.density}}.
#' Note that for computational reasons, the leave-one-out procedure is not performed on the pilot density in the adaptive setting; it
#' is only performed on the final stage estimate. Current development efforts include extending this functionality, see \code{\link{SLIK.adapt}}. See also `Warning' below.
#'
#' Where \code{LSCV.density} is based on minimisation of an unbiased estimate of the mean integrated squared error (MISE) of the density, \code{LIK.density} is based on
#' maximisation of the cross-validated leave-one-out average of the log-likelihood of the density estimate with respect to \eqn{h}.
#'
#' In both functions, the argument \code{zero.action} can be used to control the level of severity in response to small bandwidths that result (due to numerical error) in at least one density value being zero or less.
#' When \code{zero.action = -1}, the function strictly forbids bandwidths that would result in one or more \emph{pixel} values of a kernel estimate of the original data (i.e. anything over the whole region) being zero or less---this is the most restrictive truncation. With \code{zero.action = 0} (default), the function
#' automatically forbids bandwidths that yield erroneous values at the leave-one-out data point locations only. With \code{zero.action = 1}, the minimum machine value (see \code{.Machine$double.xmin} at the prompt) is
#' used to replace these individual leave-one-out values. When \code{zero.action = 2}, the minimum value of the valid (greater than zero) leave-one-out values is used to replace any erroneous leave-one-out values.
#'
#'
#'
#' @aliases LIK.density
#'
#' @rdname CV
#'
#' @param pp An object of class \code{\link[spatstat.geom]{ppp}} giving the observed
#' 2D data to be smoothed.
#' @param hlim An optional vector of length 2 giving the limits of the
#' optimisation routine with respect to the bandwidth. If unspecified, the
#' function attempts to choose this automatically.
#' @param hseq An optional increasing sequence of bandwidth values at which to
#' manually evaluate the optimisation criterion. Used only in the case
#' \code{(!auto.optim && is.null(hlim))}.
#' @param resolution Spatial grid size; the optimisation will be based on a
#' [\code{resolution} \eqn{\times}{x} \code{resolution}] density estimate.
#' @param edge Logical value indicating whether to edge-correct the density
#' estimates used.
#' @param auto.optim Logical value indicating whether to automate the numerical
#' optimisation using \code{\link{optimise}}. If \code{FALSE}, the optimisation
#' criterion is evaluated over \code{hseq} (if supplied), or over a seqence of
#' values controlled by \code{hlim} and \code{seqres}.
#' @param seqres Optional resolution of an increasing sequence of bandwidth
#' values. Only used if \code{(!auto.optim && is.null(hseq))}.
#' @param parallelise Numeric argument to invoke parallel processing, giving
#' the number of CPU cores to use when \code{!auto.optim} \bold{and} \code{type = "fixed"}. Experimental. Test
#' your system first using \code{parallel::detectCores()} to identify the
#' number of cores available to you.
#' @param verbose Logical value indicating whether to provide function progress
#' commentary.
#' @param type A character string; \code{"fixed"} (default) performs classical leave-one-out
#' cross-validation for the fixed-bandwidth estimator. Alternatively, \code{"adaptive"} utilises
#' multiscale adaptive kernel estimation (Davies & Baddeley, 2018) to run the cross-validation
#' in an effort to find a suitable global bandwidth for the adaptive estimator. Note that data points are not `left out' of
#' the pilot density estimate when using this option (this capability is currently in development). See also the entry for \code{...}.
#' @param zero.action A numeric integer, either \code{-1}, \code{0} (default), \code{1} or \code{2} controlling how the function should behave in response to numerical errors at very small bandwidths, when such a bandwidth results in one or more zero or negative density values during the leave-one-out computations. See `Details'.
#' @param ... Additional arguments controlling pilot density estimation and multi-scale bandwidth-axis
#' resolution when \code{type = "adaptive"}. Relevant arguments are \code{hp}, \code{pilot.density},
#' \code{gamma.scale}, and \code{trim} from \code{\link{bivariate.density}}; and \code{dimz} from
#' \code{\link{multiscale.density}}. If \code{hp} is missing and required, the function makes a (possibly recursive)
#' call to \code{LSCV.density} to set this using fixed-bandwidth LSCV. The remaining defaults are \code{pilot.density = pp},
#' \code{gamma.scale = "geometric"}, \code{trim = 5}, and \code{dimz = resolution}.
#'
#' @return A single numeric value of the estimated bandwidth (if
#' \code{auto.optim = TRUE}). Otherwise, a \eqn{[}\code{seqres} \eqn{x} 2\eqn{]} matrix
#' giving the bandwidth sequence and corresponding CV
#' function value.
#'
#' @section Warning: Leave-one-out CV for bandwidth selection in kernel
#' density estimation is notoriously unstable in practice and has a tendency to
#' produce rather small bandwidths, particularly for spatial data. Satisfactory bandwidths are not guaranteed
#' for every application; \code{zero.action} can curb adverse numeric effects for very small bandwidths during the optimisation procedures. This method can also be computationally expensive for
#' large data sets and fine evaluation grid resolutions. The user may also need to
#' experiment with adjusting \code{hlim} to find a suitable minimum.
#'
#' @author T. M. Davies
#'
#' @seealso \code{\link{SLIK.adapt}} and functions for bandwidth selection in package
#' \code{spatstat}: \code{\link[spatstat.explore]{bw.diggle}};
#' \code{\link[spatstat.explore]{bw.ppl}}; \code{\link[spatstat.explore]{bw.scott}};
#' \code{\link[spatstat.explore]{bw.frac}}.
#'
#' @references
#'
#' Davies, T.M. and Baddeley A. (2018), Fast computation of
#' spatially adaptive kernel estimates, \emph{Statistics and Computing}, \bold{28}(4), 937-956.
#'
#' Silverman, B.W. (1986), \emph{Density Estimation for Statistics
#' and Data Analysis}, Chapman & Hall, New York.
#'
#' Wand, M.P. and Jones,
#' C.M., 1995. \emph{Kernel Smoothing}, Chapman & Hall, London.
#'
#' @examples
#'
#' data(pbc)
#' pbccas <- split(pbc)$case
#'
#' LIK.density(pbccas)
#' LSCV.density(pbccas)
#'
#' \donttest{
#' #* FIXED
#'
#' # custom limits
#' LIK.density(pbccas,hlim=c(0.01,4))
#' LSCV.density(pbccas,hlim=c(0.01,4))
#'
#' # disable edge correction
#' LIK.density(pbccas,hlim=c(0.01,4),edge=FALSE)
#' LSCV.density(pbccas,hlim=c(0.01,4),edge=FALSE)
#'
#' # obtain objective function
#' hcv <- LIK.density(pbccas,hlim=c(0.01,4),auto.optim=FALSE)
#' plot(hcv);abline(v=hcv[which.max(hcv[,2]),1],lty=2,col=2)
#'
#' #* ADAPTIVE
#' LIK.density(pbccas,type="adaptive")
#' LSCV.density(pbccas,type="adaptive")
#'
#' # change pilot bandwidth used
#' LIK.density(pbccas,type="adaptive",hp=2)
#' LSCV.density(pbccas,type="adaptive",hp=2)
#' }
#'
#' @export
LSCV.density <- function(pp,hlim=NULL,hseq=NULL,resolution=64,edge=TRUE,auto.optim=TRUE,
type=c("fixed","adaptive"),seqres=30,parallelise=NULL,
zero.action=0,verbose=TRUE,...){
if(!inherits(pp,"ppp")) stop("data object 'pp' must be of class \"ppp\"")
W <- Window(pp)
if(is.null(hlim)){
ppu <- pp
marks(ppu) <- NULL
md <- min(nndist(unique(ppu)))
hlim <- c(md,max(md*50,min(diff(W$xrange),diff(W$yrange))/6))
} else {
hlim <- checkran(hlim,"'hlim'")
}
if(!zero.action%in%((-1):2)) stop("invalid 'zero.action'")
typ <- type[1]
if(typ=="fixed"){
if(auto.optim){
if(verbose) cat("Searching for optimal h in ",prange(hlim),"...",sep="")
result <- suppressWarnings(optimise(LSCV.density.spatial.single,interval=hlim,pp=pp,res=resolution,edge=edge,za=zero.action)$minimum)
if(verbose) cat("Done.\n")
} else {
if(is.null(hseq)) hseq <- seq(hlim[1],hlim[2],length=seqres)
hn <- length(hseq)
if(is.null(parallelise)){
lscv.vec <- rep(NA,hn)
if(verbose) pb <- txtProgressBar(1,hn)
for(i in 1:hn){
lscv.vec[i] <- LSCV.density.spatial.single(hseq[i],pp,resolution,edge,za=zero.action)
if(verbose) setTxtProgressBar(pb,i)
}
if(verbose) close(pb)
} else {
ncores <- detectCores()
if(verbose) cat(paste("Evaluating criterion on",parallelise,"/",ncores,"cores..."))
if(parallelise>ncores) stop("cores requested exceeds available count")
registerDoParallel(cores=parallelise)
lscv.vec <- foreach(i=1:hn,.packages="spatstat",.combine=c) %dopar% {
return(LSCV.density.spatial.single(hseq[i],pp,resolution,edge,zero.action))
}
if(verbose) cat("Done.\n")
}
result <- cbind(hseq,lscv.vec)
dimnames(result)[[2]] <- c("h","CV")
}
} else if(typ=="adaptive"){
ellip <- list(...)
if(is.null(ellip$hp)){
if(verbose) cat("Selecting pilot bandwidth...")
hp <- LSCV.density(pp,verbose=FALSE,zero.action=zero.action)
if(verbose) cat(paste("Done.\n [ Found hp =",hp,"]\n"))
} else {
hp <- ellip$hp
}
if(is.null(ellip$pilot.density)){
pilot.density <- pp
} else {
pilot.density <- ellip$pilot.density
}
if(is.null(ellip$gamma.scale)){
gamma.scale <- "geometric"
} else {
gamma.scale <- ellip$gamma.scale
}
if(is.null(ellip$trim)){
trim <- 5
} else {
trim <- ellip$trim
}
if(is.null(ellip$dimz)){
dimz <- resolution
} else {
dimz <- ellip$dimz
}
if(verbose) cat("Computing multi-scale estimate...")
hhash <- mean(hlim)
msobject <- multiscale.density(pp,h0=hhash,hp=hp,h0fac=hlim/hhash,edge=ifelse(edge,"uniform","none"),resolution=resolution,dimz=dimz,gamma.scale=gamma.scale,trim=trim,intensity=TRUE,pilot.density=pilot.density,verbose=FALSE)
if(verbose) cat("Done.\n")
h0range <- range(as.numeric(names(msobject$z)))
if(auto.optim){
if(verbose) cat("Searching for optimal h0 in ",prange(h0range),"...",sep="")
h0opt <- suppressWarnings(optimise(ms.loo,interval=h0range,object=msobject,za=zero.action)$minimum)
if(verbose) cat("Done.\n")
return(h0opt)
} else {
if(is.null(hseq)) hseq <- seq(h0range[1],h0range[2],length=seqres)
hn <- length(hseq)
lscv.vec <- rep(NA,hn)
if(verbose) pb <- txtProgressBar(1,hn)
for(i in 1:hn){
lscv.vec[i] <- ms.loo(hseq[i],msobject,za=zero.action)
if(verbose) setTxtProgressBar(pb,i)
}
if(verbose) close(pb)
result <- cbind(hseq,lscv.vec)
dimnames(result)[[2]] <- c("h0","CV")
}
} else stop("invalid 'type'")
return(result)
}
|