1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
|
## TODO use foreach to parallelize
#' @rdname predict.train
#' @importFrom utils flush.console
#' @export
extractProb <- function(models,
testX = NULL,
testY = NULL,
unkX = NULL,
unkOnly = !is.null(unkX) & is.null(testX),
verbose = FALSE)
{
objectNames <- names(models)
if(is.null(objectNames)) objectNames <- paste("Object", 1:length(models), sep = "")
if(any(unlist(lapply(models, function(x) is.null(x$modelInfo$prob)))))
stop("only classification models that produce probabilities are allowed")
obsLevels <- levels(models[[1]])
if(!unkOnly) {
trainX <- models[[1]]$trainingData[,!(colnames(models[[1]]$trainingData) %in% ".outcome"), drop = FALSE]
trainY <- models[[1]]$trainingData$.outcome
}
if(verbose)
{
cat("Number of training samples:", length(trainY), "\n")
cat("Number of test samples: ", length(testY), "\n\n")
}
predProb <- predClass <- obs <- modelName <- dataType <- objName <- NULL
if(!is.null(testX))
{
if(!is.data.frame(testX)) testX <- as.data.frame(testX)
hasNa <- apply(testX, 1, function(data) any(is.na(data)))
if(verbose) cat("There were ", sum(hasNa), "rows with missing values\n\n"); flush.console()
}
for(i in seq(along = models))
{
if(verbose) cat("starting ", models[[i]]$method, "\n"); flush.console()
if(!unkOnly) {
tempTrainProb <- probFunction(models[[i]]$modelInfo,
models[[i]]$finalModel,
trainX,
models[[i]]$preProcess)
tempTrainPred <- apply(tempTrainProb, 1, which.max)
tempTrainPred <- colnames(tempTrainProb)[tempTrainPred]
tempTrainPred <- factor(tempTrainPred, levels = obsLevels)
if(verbose) cat(models[[i]]$method, ":", length(tempTrainPred), "training predictions were added\n"); flush.console()
predProb <- if(is.null(predProb)) tempTrainProb else rbind(predProb, tempTrainProb)
predClass <- c(predClass, as.character(tempTrainPred))
obs <- c(obs, as.character(trainY))
modelName <- c(modelName, rep(models[[i]]$method, length(tempTrainPred)))
objName <- c(objName, rep(objectNames[[i]], length(tempTrainPred)))
dataType <- c(dataType, rep("Training", length(tempTrainPred)))
# Test Data
if(!is.null(testX) & !is.null(testY)) {
if(!is.data.frame(testX)) testX <- as.data.frame(testX)
tempX <- testX
tempY <- testY
tempX$.outcome <- NULL
tempTestProb <- probFunction(models[[i]]$modelInfo,
models[[i]]$finalModel,
tempX,
models[[i]]$preProcess)
tempTestPred <- apply(tempTestProb, 1, which.max)
tempTestPred <- colnames(tempTestProb)[tempTestPred]
tempTestPred <- factor(tempTestPred, levels = obsLevels)
if(verbose) cat(models[[i]]$method, ":", length(tempTestPred), "test predictions were added\n")
predProb <- if(is.null(predProb)) tempTestProb else rbind(predProb, tempTestProb)
predClass <- c(predClass, as.character(tempTestPred))
obs <- c(obs, as.character(testY))
modelName <- c(modelName, rep(models[[i]]$method, length(tempTestPred)))
objName <- c(objName, rep(objectNames[[i]], length(tempTestPred)))
dataType <- c(dataType, rep("Test", length(tempTestPred)))
}
}
# Unknown Data
if(!is.null(unkX))
{
if(!is.data.frame(unkX)) unkX <- as.data.frame(unkX)
tempX <- unkX
tempX$.outcome <- NULL
tempUnkProb <- probFunction(models[[i]]$modelInfo,
models[[i]]$finalModel,
tempX,
models[[i]]$preProcess)
tempUnkPred <- apply(tempUnkProb, 1, which.max)
tempUnkPred <- colnames(tempUnkProb)[tempUnkPred]
tempUnkPred <- factor(tempUnkPred, levels = obsLevels)
if(verbose) cat(models[[i]]$method, ":", length(tempUnkPred), "unknown predictions were added\n")
predProb <- if(is.null(predProb)) tempUnkProb else rbind(predProb, tempUnkProb)
predClass <- c(predClass, as.character(tempUnkPred))
obs <- c(obs, rep(NA, length(tempUnkPred)))
modelName <- c(modelName, rep(models[[i]]$method, length(tempUnkPred)))
objName <- c(objName, rep(objectNames[[i]], length(tempUnkPred)))
dataType <- c(dataType, rep("Unknown", length(tempUnkPred)))
}
if(verbose) cat("\n")
}
predClass <- factor(predClass, levels = obsLevels)
obs <- factor(obs, levels = obsLevels)
out <- data.frame(predProb)
out$obs <- obs
out$pred <- predClass
out$model <- modelName
out$dataType <- dataType
out$object <- objName
out
}
|