File: shrink.Rd

package info (click to toggle)
r-cran-hardhat 1.2.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,656 kB
  • sloc: sh: 13; makefile: 2
file content (57 lines) | stat: -rw-r--r-- 1,610 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/shrink.R
\name{shrink}
\alias{shrink}
\title{Subset only required columns}
\usage{
shrink(data, ptype)
}
\arguments{
\item{data}{A data frame containing the data to subset.}

\item{ptype}{A data frame prototype containing the required columns.}
}
\value{
A tibble containing the required columns.
}
\description{
\code{shrink()} subsets \code{data} to only contain the required columns specified by
the prototype, \code{ptype}.
}
\details{
\code{shrink()} is called by \code{\link[=forge]{forge()}} before \code{\link[=scream]{scream()}} and before the actual
processing is done.
}
\examples{
# ---------------------------------------------------------------------------
# Setup

train <- iris[1:100, ]
test <- iris[101:150, ]

# ---------------------------------------------------------------------------
# shrink()

# mold() is run at model fit time
# and a formula preprocessing blueprint is recorded
x <- mold(log(Sepal.Width) ~ Species, train)

# Inside the result of mold() are the prototype tibbles
# for the predictors and the outcomes
ptype_pred <- x$blueprint$ptypes$predictors
ptype_out <- x$blueprint$ptypes$outcomes

# Pass the test data, along with a prototype, to
# shrink() to extract the prototype columns
shrink(test, ptype_pred)

# To extract the outcomes, just use the
# outcome prototype
shrink(test, ptype_out)

# shrink() makes sure that the columns
# required by `ptype` actually exist in the data
# and errors nicely when they don't
test2 <- subset(test, select = -Species)
try(shrink(test2, ptype_pred))
}