1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
|
#' Calculate sensitivity, specificity and predictive values
#'
#' These functions calculate the sensitivity, specificity or predictive values
#' of a measurement system compared to a reference results (the truth or a gold
#' standard). The measurement and "truth" data must have the same two possible
#' outcomes and one of the outcomes must be thought of as a "positive" results.
#'
#' The sensitivity is defined as the proportion of positive results out of the
#' number of samples which were actually positive. When there are no positive
#' results, sensitivity is not defined and a value of \code{NA} is returned.
#' Similarly, when there are no negative results, specificity is not defined
#' and a value of \code{NA} is returned. Similar statements are true for
#' predictive values.
#'
#' The positive predictive value is defined as the percent of predicted
#' positives that are actually positive while the negative predictive value is
#' defined as the percent of negative positives that are actually negative.
#'
#' Suppose a 2x2 table with notation
#'
#' \tabular{rcc}{ \tab Reference \tab \cr Predicted \tab Event \tab No Event
#' \cr Event \tab A \tab B \cr No Event \tab C \tab D \cr }
#'
#' The formulas used here are: \deqn{Sensitivity = A/(A+C)} \deqn{Specificity =
#' D/(B+D)} \deqn{Prevalence = (A+C)/(A+B+C+D)} \deqn{PPV = (sensitivity *
#' Prevalence)/((sensitivity*Prevalence) + ((1-specificity)*(1-Prevalence)))}
#' \deqn{NPV = (specificity * (1-Prevalence))/(((1-sensitivity)*Prevalence) +
#' ((specificity)*(1-Prevalence)))}
#'
#' See the references for discussions of the statistics.
#'
#' @aliases sensitivity sensitivity.default sensitivity.table
#' sensitivity.matrix specificity specificity.default specificity.table
#' specificity.matrix posPredValue posPredValue.default posPredValue.table
#' posPredValue.matrix negPredValue negPredValue.default negPredValue.table
#' negPredValue.matrix
#' @param data for the default functions, a factor containing the discrete
#' measurements. For the \code{table} or \code{matrix} functions, a table or
#' matric object, respectively.
#' @param reference a factor containing the reference values
#' @param positive a character string that defines the factor level
#' corresponding to the "positive" results
#' @param negative a character string that defines the factor level
#' corresponding to the "negative" results
#' @param prevalence a numeric value for the rate of the "positive" class of
#' the data
#' @param na.rm a logical value indicating whether \code{NA} values should be
#' stripped before the computation proceeds
#' @param ... not currently used
#' @return A number between 0 and 1 (or NA).
#' @author Max Kuhn
#' @seealso \code{\link{confusionMatrix}}
#' @references Kuhn, M. (2008), ``Building predictive models in R using the
#' caret package, '' \emph{Journal of Statistical Software},
#' (\doi{10.18637/jss.v028.i05}).
#'
#' Altman, D.G., Bland, J.M. (1994) ``Diagnostic tests 1: sensitivity and
#' specificity,'' \emph{British Medical Journal}, vol 308, 1552.
#'
#' Altman, D.G., Bland, J.M. (1994) ``Diagnostic tests 2: predictive values,''
#' \emph{British Medical Journal}, vol 309, 102.
#' @keywords manip
#' @examples
#'
#' \dontrun{
#' ###################
#' ## 2 class example
#'
#' lvs <- c("normal", "abnormal")
#' truth <- factor(rep(lvs, times = c(86, 258)),
#' levels = rev(lvs))
#' pred <- factor(
#' c(
#' rep(lvs, times = c(54, 32)),
#' rep(lvs, times = c(27, 231))),
#' levels = rev(lvs))
#'
#' xtab <- table(pred, truth)
#'
#' sensitivity(pred, truth)
#' sensitivity(xtab)
#' posPredValue(pred, truth)
#' posPredValue(pred, truth, prevalence = 0.25)
#'
#' specificity(pred, truth)
#' negPredValue(pred, truth)
#' negPredValue(xtab)
#' negPredValue(pred, truth, prevalence = 0.25)
#'
#'
#' prev <- seq(0.001, .99, length = 20)
#' npvVals <- ppvVals <- prev * NA
#' for(i in seq(along.with = prev))
#' {
#' ppvVals[i] <- posPredValue(pred, truth, prevalence = prev[i])
#' npvVals[i] <- negPredValue(pred, truth, prevalence = prev[i])
#' }
#'
#' plot(prev, ppvVals,
#' ylim = c(0, 1),
#' type = "l",
#' ylab = "",
#' xlab = "Prevalence (i.e. prior)")
#' points(prev, npvVals, type = "l", col = "red")
#' abline(h=sensitivity(pred, truth), lty = 2)
#' abline(h=specificity(pred, truth), lty = 2, col = "red")
#' legend(.5, .5,
#' c("ppv", "npv", "sens", "spec"),
#' col = c("black", "red", "black", "red"),
#' lty = c(1, 1, 2, 2))
#'
#' ###################
#' ## 3 class example
#'
#' library(MASS)
#'
#' fit <- lda(Species ~ ., data = iris)
#' model <- predict(fit)$class
#'
#' irisTabs <- table(model, iris$Species)
#'
#' ## When passing factors, an error occurs with more
#' ## than two levels
#' sensitivity(model, iris$Species)
#'
#' ## When passing a table, more than two levels can
#' ## be used
#' sensitivity(irisTabs, "versicolor")
#' specificity(irisTabs, c("setosa", "virginica"))
#' }
#'
#' @export sensitivity
sensitivity <-
function(data, ...){
UseMethod("sensitivity")
}
#' @rdname sensitivity
#' @importFrom stats complete.cases
#' @export
"sensitivity.default" <-
function(data, reference, positive = levels(reference)[1], na.rm = TRUE, ...)
{
if(!is.factor(reference) | !is.factor(data))
stop("inputs must be factors")
## todo: relax the =2 constraint and let ngative length be > 2
if(length(unique(c(levels(reference), levels(data)))) != 2)
stop("input data must have the same two levels")
if(na.rm)
{
cc <- complete.cases(data) & complete.cases(reference)
if(any(!cc))
{
data <- data[cc]
reference <- reference[cc]
}
}
numer <- sum(data %in% positive & reference %in% positive)
denom <- sum(reference %in% positive)
sens <- ifelse(denom > 0, numer / denom, NA)
sens
}
#' @rdname sensitivity
#' @export
"sensitivity.table" <-
function(data, positive = rownames(data)[1], ...)
{
## "truth" in columns, predictions in rows
if(!all.equal(nrow(data), ncol(data))) stop("the table must have nrow = ncol")
if(!all.equal(rownames(data), colnames(data))) stop("the table must the same groups in the same order")
if(nrow(data) > 2)
{
tmp <- data
data <- matrix(NA, 2, 2)
colnames(data) <- rownames(data) <- c("pos", "neg")
posCol <- which(colnames(tmp) %in% positive)
negCol <- which(!(colnames(tmp) %in% positive))
data[1, 1] <- sum(tmp[posCol, posCol])
data[1, 2] <- sum(tmp[posCol, negCol])
data[2, 1] <- sum(tmp[negCol, posCol])
data[2, 2] <- sum(tmp[negCol, negCol])
data <- as.table(data)
positive <- "pos"
rm(tmp)
}
numer <- sum(data[positive, positive])
denom <- sum(data[, positive])
sens <- ifelse(denom > 0, numer / denom, NA)
sens
}
#' @rdname sensitivity
#' @export
"sensitivity.matrix" <-
function(data, positive = rownames(data)[1], ...)
{
data <- as.table(data)
sensitivity.table(data)
}
|