1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/prec_rec.R
\name{recall}
\alias{recall}
\alias{recall.default}
\alias{recall.table}
\alias{precision}
\alias{precision.default}
\alias{precision.table}
\alias{precision.matrix}
\alias{F_meas}
\alias{F_meas.default}
\alias{F_meas.table}
\title{Calculate recall, precision and F values}
\usage{
recall(data, ...)
\method{recall}{table}(data, relevant = rownames(data)[1], ...)
\method{recall}{default}(data, reference, relevant = levels(reference)[1], na.rm = TRUE, ...)
precision(data, ...)
\method{precision}{default}(data, reference, relevant = levels(reference)[1], na.rm = TRUE, ...)
\method{precision}{table}(data, relevant = rownames(data)[1], ...)
F_meas(data, ...)
\method{F_meas}{default}(
data,
reference,
relevant = levels(reference)[1],
beta = 1,
na.rm = TRUE,
...
)
\method{F_meas}{table}(data, relevant = rownames(data)[1], beta = 1, ...)
}
\arguments{
\item{data}{for the default functions, a factor containing the discrete
measurements. For the \code{table} function, a table.}
\item{...}{not currently used}
\item{relevant}{a character string that defines the factor level
corresponding to the "relevant" results}
\item{reference}{a factor containing the reference values (i.e. truth)}
\item{na.rm}{a logical value indicating whether \code{NA} values should be
stripped before the computation proceeds}
\item{beta}{a numeric value used to weight precision and recall. A value of
1 is traditionally used and corresponds to the harmonic mean of the two
values but other values weight recall beta times more important than
precision.}
}
\value{
A number between 0 and 1 (or NA).
}
\description{
These functions calculate the recall, precision or F values of a measurement
system for finding/retrieving relevant documents compared to reference
results (the truth regarding relevance). The measurement and "truth" data
must have the same two possible outcomes and one of the outcomes must be
thought of as a "relevant" results.
}
\details{
The recall (aka sensitivity) is defined as the proportion of relevant
results out of the number of samples which were actually relevant. When
there are no relevant results, recall is not defined and a value of
\code{NA} is returned.
The precision is percentage of predicted truly relevant results of the total
number of predicted relevant results and characterizes the "purity in
retrieval performance" (Buckland and Gey, 1994)
The measure "F" is a combination of precision and recall (see below).
Suppose a 2x2 table with notation
\tabular{rcc}{ \tab Reference \tab \cr Predicted \tab relevant \tab
Irrelevant \cr relevant \tab A \tab B \cr Irrelevant \tab C \tab D \cr }
The formulas used here are: \deqn{recall = A/(A+C)} \deqn{precision =
A/(A+B)} \deqn{F_i = (1+i^2)*prec*recall/((i^2 * precision)+recall)}
See the references for discussions of the statistics.
}
\examples{
###################
## Data in Table 2 of Powers (2007)
lvs <- c("Relevant", "Irrelevant")
tbl_2_1_pred <- factor(rep(lvs, times = c(42, 58)), levels = lvs)
tbl_2_1_truth <- factor(c(rep(lvs, times = c(30, 12)),
rep(lvs, times = c(30, 28))),
levels = lvs)
tbl_2_1 <- table(tbl_2_1_pred, tbl_2_1_truth)
precision(tbl_2_1)
precision(data = tbl_2_1_pred, reference = tbl_2_1_truth, relevant = "Relevant")
recall(tbl_2_1)
recall(data = tbl_2_1_pred, reference = tbl_2_1_truth, relevant = "Relevant")
tbl_2_2_pred <- factor(rep(lvs, times = c(76, 24)), levels = lvs)
tbl_2_2_truth <- factor(c(rep(lvs, times = c(56, 20)),
rep(lvs, times = c(12, 12))),
levels = lvs)
tbl_2_2 <- table(tbl_2_2_pred, tbl_2_2_truth)
precision(tbl_2_2)
precision(data = tbl_2_2_pred, reference = tbl_2_2_truth, relevant = "Relevant")
recall(tbl_2_2)
recall(data = tbl_2_2_pred, reference = tbl_2_2_truth, relevant = "Relevant")
}
\references{
Kuhn, M. (2008), ``Building predictive models in R using the
caret package, '' \emph{Journal of Statistical Software},
(\doi{10.18637/jss.v028.i05}).
Buckland, M., & Gey, F. (1994). The relationship between Recall and
Precision. \emph{Journal of the American Society for Information Science},
45(1), 12-19.
Powers, D. (2007). Evaluation: From Precision, Recall and F Factor to ROC,
Informedness, Markedness and Correlation. Technical Report SIE-07-001,
Flinders University
}
\seealso{
\code{\link{confusionMatrix}}
}
\author{
Max Kuhn
}
\keyword{manip}
|