1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
|
#' Spatial Sign Preprocessing
#'
#' `step_spatialsign` is a *specification* of a recipe
#' step that will convert numeric data into a projection on to a
#' unit sphere.
#'
#' @inheritParams step_pca
#' @inheritParams step_center
#' @param na_rm A logical: should missing data be removed from the
#' norm computation?
#' @param columns A character string of variable names that will
#' be populated (eventually) by the `terms` argument.
#' @template step-return
#' @family multivariate transformation steps
#' @export
#' @details The spatial sign transformation projects the variables
#' onto a unit sphere and is related to global contrast
#' normalization. The spatial sign of a vector `w` is
#' `w/norm(w)`.
#'
#' The variables should be centered and scaled prior to the
#' computations.
#'
#' # Tidying
#'
#' When you [`tidy()`][tidy.recipe()] this step, a tibble with column
#' `terms` (the columns that will be affected) is returned.
#'
#' @section Case weights:
#'
#' This step performs an unsupervised operation that can utilize case weights.
#' As a result, only frequency weights are allowed. For more
#' information, see the documentation in [case_weights] and the examples on
#' `tidymodels.org`.
#'
#' Unlike most, this step requires the case weights to be available when new
#' samples are processed (e.g., when `bake()` is used or `predict()` with a
#' workflow). To tell recipes that the case weights are required at bake time,
#' use
#' `recipe %>% update_role_requirements(role = "case_weights", bake = TRUE)`.
#' See [update_role_requirements()] for more information.
#'
#' @references Serneels, S., De Nolf, E., and Van Espen, P.
#' (2006). Spatial sign preprocessing: a simple way to impart
#' moderate robustness to multivariate estimators. *Journal of
#' Chemical Information and Modeling*, 46(3), 1402-1409.
#'
#' @examplesIf rlang::is_installed("modeldata")
#' data(biomass, package = "modeldata")
#'
#' biomass_tr <- biomass[biomass$dataset == "Training", ]
#' biomass_te <- biomass[biomass$dataset == "Testing", ]
#'
#' rec <- recipe(
#' HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur,
#' data = biomass_tr
#' )
#'
#' ss_trans <- rec %>%
#' step_center(carbon, hydrogen) %>%
#' step_scale(carbon, hydrogen) %>%
#' step_spatialsign(carbon, hydrogen)
#'
#' ss_obj <- prep(ss_trans, training = biomass_tr)
#'
#' transformed_te <- bake(ss_obj, biomass_te)
#'
#' plot(biomass_te$carbon, biomass_te$hydrogen)
#'
#' plot(transformed_te$carbon, transformed_te$hydrogen)
#'
#' tidy(ss_trans, number = 3)
#' tidy(ss_obj, number = 3)
step_spatialsign <-
function(recipe,
...,
role = "predictor",
na_rm = TRUE,
trained = FALSE,
columns = NULL,
skip = FALSE,
id = rand_id("spatialsign")) {
add_step(
recipe,
step_spatialsign_new(
terms = enquos(...),
role = role,
na_rm = na_rm,
trained = trained,
columns = columns,
skip = skip,
id = id,
case_weights = NULL
)
)
}
step_spatialsign_new <-
function(terms, role, na_rm, trained, columns, skip, id, case_weights) {
step(
subclass = "spatialsign",
terms = terms,
role = role,
na_rm = na_rm,
trained = trained,
columns = columns,
skip = skip,
id = id,
case_weights = case_weights
)
}
#' @export
prep.step_spatialsign <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("double", "integer"))
wts <- get_case_weights(info, training)
were_weights_used <- are_weights_used(wts, unsupervised = TRUE)
if (isFALSE(were_weights_used)) {
wts <- NULL
}
step_spatialsign_new(
terms = x$terms,
role = x$role,
na_rm = x$na_rm,
trained = TRUE,
columns = col_names,
skip = x$skip,
id = x$id,
case_weights = were_weights_used
)
}
#' @export
bake.step_spatialsign <- function(object, new_data, ...) {
check_new_data(names(object$columns), object, new_data)
col_names <- object$columns
if (isTRUE(object$case_weights)) {
wts_col <- purrr::map_lgl(new_data, hardhat::is_case_weights)
wts <- getElement(new_data, names(which(wts_col)))
wts <- as.double(wts)
} else {
wts <- 1
}
res <- as.matrix(new_data[, col_names])
res <- res / sqrt(rowSums((sqrt(1/wts) * res)^2, na.rm = object$na_rm))
res <- tibble::as_tibble(res)
new_data[, col_names] <- res
new_data
}
print.step_spatialsign <-
function(x, width = max(20, options()$width - 26), ...) {
title <- "Spatial sign on "
print_step(x$columns, x$terms, x$trained, title, width,
case_weights = x$case_weights)
invisible(x)
}
#' @rdname tidy.recipe
#' @export
tidy.step_spatialsign <- function(x, ...) {
res <- simple_terms(x, ...)
res$id <- x$id
res
}
|