File: Dummies.R

package info (click to toggle)
r-cran-recipes 1.0.4%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 3,636 kB
  • sloc: sh: 37; makefile: 2
file content (124 lines) | stat: -rw-r--r-- 3,526 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  message = FALSE,
  digits = 3,
  collapse = TRUE,
  comment = "#>"
  )
options(digits = 3)
library(recipes)

## ----iris-base-rec------------------------------------------------------------
library(recipes)

# make a copy for use below
iris <- iris %>% mutate(original = Species)

iris_rec <- recipe( ~ ., data = iris)
summary(iris_rec)

## ----iris-ref-cell------------------------------------------------------------
ref_cell <- 
  iris_rec %>% 
  step_dummy(Species) %>%
  prep(training = iris)
summary(ref_cell)

# Get a row for each factor level
bake(ref_cell, new_data = NULL, original, starts_with("Species")) %>% distinct()

## ----defaults-----------------------------------------------------------------
param <- getOption("contrasts")
param

## ----iris-helmert-------------------------------------------------------------
# change it:
go_helmert <- param
go_helmert["unordered"] <- "contr.helmert"
options(contrasts = go_helmert)

# now make dummy variables with new parameterization
helmert <- 
  iris_rec %>% 
  step_dummy(Species) %>%
  prep(training = iris)
summary(helmert)

bake(helmert, new_data = NULL, original, starts_with("Species")) %>% distinct()

# Yuk; go back to the original method
options(contrasts = param)

## ----iris-2int----------------------------------------------------------------
iris_int <- 
  iris_rec %>%
  step_interact( ~ Sepal.Width:Sepal.Length) %>%
  prep(training = iris)
summary(iris_int)

## ----mm-int-------------------------------------------------------------------
model.matrix(~ Species*Sepal.Length, data = iris) %>% 
  as.data.frame() %>% 
  # show a few specific rows
  slice(c(1, 51, 101)) %>% 
  as.data.frame()

## ----nope, eval = FALSE-------------------------------------------------------
#  # Must I do this?
#  iris_rec %>%
#    step_interact( ~ Species_versicolor:Sepal.Length +
#                     Species_virginica:Sepal.Length)

## ----iris-sel-----------------------------------------------------------------
iris_int <- 
  iris_rec %>% 
  step_dummy(Species) %>%
  step_interact( ~ starts_with("Species"):Sepal.Length) %>%
  prep(training = iris)
summary(iris_int)

## ----sel-input, eval = FALSE--------------------------------------------------
#  starts_with("Species")

## ----sel-output, eval = FALSE-------------------------------------------------
#  (Species_versicolor + Species_virginica)

## ----int-form-----------------------------------------------------------------
iris_int

## ----iris-dont----------------------------------------------------------------
iris_int <- 
  iris_rec %>% 
  step_interact( ~ Species:Sepal.Length) %>%
  prep(training = iris)
summary(iris_int)

## ----one-hot------------------------------------------------------------------
iris_rec %>% 
  step_dummy(Species, one_hot = TRUE) %>%
  prep(training = iris) %>%
  bake(original, new_data = NULL, starts_with("Species")) %>%
  distinct()

## ----one-hot-two--------------------------------------------------------------
hot_reference <- 
  iris_rec %>% 
  step_dummy(Species, one_hot = TRUE) %>%
  prep(training = iris) %>%
  bake(original, new_data = NULL, starts_with("Species")) %>%
  distinct()

hot_reference

# from above
options(contrasts = go_helmert)

hot_helmert <- 
  iris_rec %>% 
  step_dummy(Species, one_hot = TRUE) %>%
  prep(training = iris) %>%
  bake(original, new_data = NULL, starts_with("Species")) %>%
  distinct()

hot_helmert