File: mold.R

package info (click to toggle)
r-cran-hardhat 1.2.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,656 kB
  • sloc: sh: 13; makefile: 2
file content (180 lines) | stat: -rw-r--r-- 5,136 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#' Mold data for modeling
#'
#' @description
#'
#' `mold()` applies the appropriate processing steps required to get training
#' data ready to be fed into a model. It does this through the use of various
#' _blueprints_ that understand how to preprocess data that come in various
#' forms, such as a formula or a recipe.
#'
#' All blueprints have consistent return values with the others, but each is
#' unique enough to have its own help page. Click through below to learn
#' how to use each one in conjunction with `mold()`.
#'
#' * XY Method - [default_xy_blueprint()]
#'
#' * Formula Method - [default_formula_blueprint()]
#'
#' * Recipes Method - [default_recipe_blueprint()]
#'
#' @param x An object. See the method specific implementations linked in the
#' Description for more information.
#'
#' @param ... Not used.
#'
#' @return
#'
#' A named list containing 4 elements:
#'
#'  - `predictors`: A tibble containing the molded predictors to be used in the
#'  model.
#'
#'  - `outcome`: A tibble containing the molded outcomes to be used in the
#'  model.
#'
#'  - `blueprint`: A method specific `"hardhat_blueprint"` object for use when
#'  making predictions.
#'
#'  - `extras`: Either `NULL` if the blueprint returns no extra information,
#'  or a named list containing the extra information.
#'
#' @examples
#' # See the method specific documentation linked in Description
#' # for the details of each blueprint, and more examples.
#'
#' # XY
#' mold(iris[, "Sepal.Width", drop = FALSE], iris$Species)
#'
#' # Formula
#' mold(Species ~ Sepal.Width, iris)
#'
#' # Recipe
#' library(recipes)
#' mold(recipe(Species ~ Sepal.Width, iris), iris)
#' @export
mold <- function(x, ...) {
  UseMethod("mold")
}

#' @export
mold.default <- function(x, ...) {
  abort_unknown_mold_class(x)
}

#' @rdname default_xy_blueprint
#' @export
mold.data.frame <- function(x, y, ..., blueprint = NULL) {
  validate_empty_dots(...)

  if (is.null(blueprint)) {
    blueprint <- default_xy_blueprint()
  }

  validate_is_xy_blueprint(blueprint)

  run_mold(blueprint, x = x, y = y)
}

#' @rdname default_xy_blueprint
#' @export
mold.matrix <- mold.data.frame

#' @rdname default_formula_blueprint
#' @export
mold.formula <- function(formula, data, ..., blueprint = NULL) {
  validate_empty_dots(...)

  if (is.null(blueprint)) {
    blueprint <- default_formula_blueprint()
  }

  validate_is_formula_blueprint(blueprint)

  blueprint <- update_blueprint(blueprint = blueprint, formula = formula)

  run_mold(blueprint, data = data)
}

#' @rdname default_recipe_blueprint
#' @export
mold.recipe <- function(x, data, ..., blueprint = NULL) {
  validate_empty_dots(...)

  validate_recipes_available()

  if (is.null(blueprint)) {
    blueprint <- default_recipe_blueprint()
  }

  validate_is_recipe_blueprint(blueprint)

  blueprint <- update_blueprint(blueprint = blueprint, recipe = x)

  run_mold(blueprint, data = data)
}

# ------------------------------------------------------------------------------

#' `mold()` according to a blueprint
#'
#' @description
#' This is a developer facing function that is _only_ used if you are creating
#' your own blueprint subclass. It is called from [mold()] and dispatches off
#' the S3 class of the `blueprint`. This gives you an opportunity to mold the
#' data in a way that is specific to your blueprint.
#'
#' `run_mold()` will be called with different arguments depending on the
#' interface to `mold()` that is used:
#'
#' - XY interface:
#'   - `run_mold(blueprint, x = x, y = y)`
#'
#' - Formula interface:
#'   - `run_mold(blueprint, data = data)`
#'   - Additionally, the `blueprint` will have been updated to contain the
#'   `formula`.
#'
#' - Recipe interface:
#'   - `run_mold(blueprint, data = data)`
#'   - Additionally, the `blueprint` will have been updated to contain the
#'   `recipe`.
#'
#' If you write a blueprint subclass for [new_xy_blueprint()],
#' [new_recipe_blueprint()], or [new_formula_blueprint()] then your `run_mold()`
#' method signature must match whichever interface listed above will be used.
#'
#' If you write a completely new blueprint inheriting only from
#' [new_blueprint()] and write a new [mold()] method (because you aren't using
#' an xy, formula, or recipe interface), then you will have full control over
#' how `run_mold()` will be called.
#'
#' @param blueprint A preprocessing blueprint.
#'
#' @param ... Not used. Required for extensibility.
#'
#' @return
#' `run_mold()` methods return the object that is then immediately returned from
#' `mold()`. See the return value section of [mold()] to understand what the
#' structure of the return value should look like.
#'
#' @name run-mold
#' @order 1
#' @export
#' @examples
#' bp <- default_xy_blueprint()
#'
#' outcomes <- mtcars["mpg"]
#' predictors <- mtcars
#' predictors$mpg <- NULL
#'
#' run_mold(bp, x = predictors, y = outcomes)
run_mold <- function(blueprint, ...) {
  UseMethod("run_mold")
}

#' @export
run_mold.default <- function(blueprint, ...) {
  class <- class(blueprint)[[1L]]
  message <- glue("No `run_mold()` method provided for an object of type <{class}>.")
  abort(message)
}