File: generateCalibrationData.Rd

package info (click to toggle)
r-cran-mlr 2.19.2%2Bdfsg-1
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 8,264 kB
sloc: ansic: 65; sh: 13; makefile: 5
file content (79 lines) | stat: -rw-r--r-- 3,279 bytes
parent folder | download | duplicates (3)
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generateCalibration.R
\name{generateCalibrationData}
\alias{generateCalibrationData}
\alias{CalibrationData}
\title{Generate classifier calibration data.}
\usage{
generateCalibrationData(obj, breaks = "Sturges", groups = NULL, task.id = NULL)
}
\arguments{
\item{obj}{(list of \link{Prediction} | list of \link{ResampleResult} | \link{BenchmarkResult})\cr
Single prediction object, list of them, single resample result, list of them, or a benchmark result.
In case of a list probably produced by different learners you want to compare, then
name the list with the names you want to see in the plots, probably
learner shortnames or ids.}

\item{breaks}{(\code{character(1)} | \link{numeric})\cr
If \code{character(1)}, the algorithm to use in generating probability bins.
See \link{hist} for details.
If \link{numeric}, the cut points for the bins.
Default is \dQuote{Sturges}.}

\item{groups}{(\code{integer(1)})\cr
The number of bins to construct.
If specified, \code{breaks} is ignored.
Default is \code{NULL}.}

\item{task.id}{(\code{character(1)})\cr
Selected task in \link{BenchmarkResult} to do plots for, ignored otherwise.
Default is first task.}
}
\value{
\link{CalibrationData}. A \link{list} containing:
\item{proportion}{\link{data.frame} with columns:
\itemize{
\item \code{Learner} Name of learner.
\item \code{bin} Bins calculated according to the \code{breaks} or \code{groups} argument.
\item \code{Class} Class labels (for binary classification only the positive class).
\item \code{Proportion} Proportion of observations from class \code{Class} among all
observations with posterior probabilities of class \code{Class} within the
interval given in \code{bin}.
}}
\item{data}{\link{data.frame} with columns:
\itemize{
\item \code{Learner} Name of learner.
\item \code{truth} True class label.
\item \code{Class} Class labels (for binary classification only the positive class).
\item \code{Probability} Predicted posterior probability of \code{Class}.
\item \code{bin} Bin corresponding to \code{Probability}.
}}
\item{task}{(\link{TaskDesc})\cr
Task description.}
}
\description{
A calibrated classifier is one where the predicted probability of a class closely matches the
rate at which that class occurs, e.g. for data points which are assigned a predicted probability
of class A of .8, approximately 80 percent of such points should belong to class A if the classifier
is well calibrated. This is estimated empirically by grouping data points with similar predicted
probabilities for each class, and plotting the rate of each class within each bin against the
predicted probability bins.
}
\references{
Vuk, Miha, and Curk, Tomaz. \dQuote{ROC Curve, Lift Chart, and Calibration Plot.} Metodoloski zvezki. Vol. 3. No. 1 (2006): 89-108.
}
\seealso{
Other generate_plot_data: 
\code{\link{generateCritDifferencesData}()},
\code{\link{generateFeatureImportanceData}()},
\code{\link{generateFilterValuesData}()},
\code{\link{generateLearningCurveData}()},
\code{\link{generatePartialDependenceData}()},
\code{\link{generateThreshVsPerfData}()},
\code{\link{plotFilterValues}()}

Other calibration: 
\code{\link{plotCalibration}()}
}
\concept{calibration}
\concept{generate_plot_data}