1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
|
## ----loadLibs, include = FALSE-----------------------
library(MASS)
library(caret)
library(mlbench)
data(Sonar)
library(pls)
library(klaR)
library(knitr)
opts_chunk$set(
comment = "#>",
collapse = TRUE,
digits = 3,
tidy = FALSE,
background = "#FFFF00",
fig.align = 'center',
warning = FALSE,
message = FALSE
)
options(width = 55, digits = 3)
theme_set(theme_bw())
getInfo <- function(what = "Suggests") {
text <- packageDescription("caret")[what][[1]]
text <- gsub("\n", ", ", text, fixed = TRUE)
text <- gsub(">=", "$\\\\ge$", text, fixed = TRUE)
eachPkg <- strsplit(text, ", ", fixed = TRUE)[[1]]
eachPkg <- gsub(",", "", eachPkg, fixed = TRUE)
#out <- paste("\\\**", eachPkg[order(tolower(eachPkg))], "}", sep = "")
#paste(out, collapse = ", ")
length(eachPkg)
}
## ----install, eval = FALSE---------------------------
# install.packages("caret", dependencies = c("Depends", "Suggests"))
## ----SonarSplit--------------------------------------
library(caret)
library(mlbench)
data(Sonar)
set.seed(107)
inTrain <- createDataPartition(
y = Sonar$Class,
## the outcome data are needed
p = .75,
## The percentage of data in the
## training set
list = FALSE
)
## The format of the results
## The output is a set of integers for the rows of Sonar
## that belong in the training set.
str(inTrain)
## ----SonarDatasets-----------------------------------
training <- Sonar[ inTrain,]
testing <- Sonar[-inTrain,]
nrow(training)
nrow(testing)
## ----plsTune1, eval = FALSE--------------------------
# plsFit <- train(
# Class ~ .,
# data = training,
# method = "pls",
# ## Center and scale the predictors for the training
# ## set and all future samples.
# preProc = c("center", "scale")
# )
## ----pls_fit-----------------------------------------
ctrl <- trainControl(
method = "repeatedcv",
repeats = 3,
classProbs = TRUE,
summaryFunction = twoClassSummary
)
set.seed(123)
plsFit <- train(
Class ~ .,
data = training,
method = "pls",
preProc = c("center", "scale"),
tuneLength = 15,
trControl = ctrl,
metric = "ROC"
)
plsFit
## ----pls-plot----------------------------------------
ggplot(plsFit)
## ----plsPred-----------------------------------------
plsClasses <- predict(plsFit, newdata = testing)
str(plsClasses)
plsProbs <- predict(plsFit, newdata = testing, type = "prob")
head(plsProbs)
## ----plsCM-------------------------------------------
confusionMatrix(data = plsClasses, testing$Class)
## ----rdaFit------------------------------------------
## To illustrate, a custom grid is used
rdaGrid = data.frame(gamma = (0:4)/4, lambda = 3/4)
set.seed(123)
rdaFit <- train(
Class ~ .,
data = training,
method = "rda",
tuneGrid = rdaGrid,
trControl = ctrl,
metric = "ROC"
)
rdaFit
rdaClasses <- predict(rdaFit, newdata = testing)
confusionMatrix(rdaClasses, testing$Class)
## ----rs----------------------------------------------
resamps <- resamples(list(pls = plsFit, rda = rdaFit))
summary(resamps)
## ----BA----------------------------------------------
xyplot(resamps, what = "BlandAltman")
## ----diffs-------------------------------------------
diffs <- diff(resamps)
summary(diffs)
|