File: recode_into.Rd

package info (click to toggle)
r-cran-datawizard 1.0.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,300 kB
  • sloc: sh: 13; makefile: 2
file content (126 lines) | stat: -rw-r--r-- 3,590 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/recode_into.R
\name{recode_into}
\alias{recode_into}
\title{Recode values from one or more variables into a new variable}
\usage{
recode_into(
  ...,
  data = NULL,
  default = NA,
  overwrite = TRUE,
  preserve_na = FALSE,
  verbose = TRUE
)
}
\arguments{
\item{...}{A sequence of two-sided formulas, where the left hand side (LHS)
is a logical matching condition that determines which values match this case.
The LHS of this formula is also called "recode pattern" (e.g., in messages).
The right hand side (RHS) indicates the replacement value.}

\item{data}{Optional, name of a data frame. This can be used to avoid writing
the data name multiple times in \code{...}. See 'Examples'.}

\item{default}{Indicates the default value that is chosen when no match in
the formulas in \code{...} is found. If not provided, \code{NA} is used as default
value.}

\item{overwrite}{Logical, if \code{TRUE} (default) and more than one recode pattern
apply to the same case, already recoded values will be overwritten by subsequent
recode patterns. If \code{FALSE}, former recoded cases will not be altered by later
recode patterns that would apply to those cases again. A warning message is
printed to alert such situations and to avoid unintentional recodings.}

\item{preserve_na}{Logical, if \code{TRUE} and \code{default} is not \code{NA}, missing
values in the original variable will be set back to \code{NA} in the recoded
variable (unless overwritten by other recode patterns). If \code{FALSE}, missing
values in the original variable will be recoded to \code{default}. Setting
\code{preserve_na = TRUE} prevents unintentional overwriting of missing values
with \code{default}, which means that you won't find valid values where the
original data only had missing values. See 'Examples'.}

\item{verbose}{Toggle warnings.}
}
\value{
A vector with recoded values.
}
\description{
This functions recodes values from one or more variables into a new variable.
It is a convenient function to avoid nested \code{\link[=ifelse]{ifelse()}} statements, which
is similar to \code{dplyr::case_when()}.
}
\examples{
x <- 1:30
recode_into(
  x > 15 ~ "a",
  x > 10 & x <= 15 ~ "b",
  default = "c"
)

x <- 1:10
# default behaviour: second recode pattern "x > 5" overwrites
# some of the formerly recoded cases from pattern "x >= 3 & x <= 7"
recode_into(
  x >= 3 & x <= 7 ~ 1,
  x > 5 ~ 2,
  default = 0,
  verbose = FALSE
)

# setting "overwrite = FALSE" will not alter formerly recoded cases
recode_into(
  x >= 3 & x <= 7 ~ 1,
  x > 5 ~ 2,
  default = 0,
  overwrite = FALSE,
  verbose = FALSE
)

set.seed(123)
d <- data.frame(
  x = sample(1:5, 30, TRUE),
  y = sample(letters[1:5], 30, TRUE),
  stringsAsFactors = FALSE
)

# from different variables into new vector
recode_into(
  d$x \%in\% 1:3 & d$y \%in\% c("a", "b") ~ 1,
  d$x > 3 ~ 2,
  default = 0
)

# no need to write name of data frame each time
recode_into(
  x \%in\% 1:3 & y \%in\% c("a", "b") ~ 1,
  x > 3 ~ 2,
  data = d,
  default = 0
)

# handling of missing values
d <- data.frame(
  x = c(1, NA, 2, NA, 3, 4),
  y = c(1, 11, 3, NA, 5, 6)
)
# first NA in x is overwritten by valid value from y
# we have no known value for second NA in x and y,
# thus we get one NA in the result
recode_into(
  x <= 3 ~ 1,
  y > 5 ~ 2,
  data = d,
  default = 0,
  preserve_na = TRUE
)
# first NA in x is overwritten by valid value from y
# default value is used for second NA
recode_into(
  x <= 3 ~ 1,
  y > 5 ~ 2,
  data = d,
  default = 0,
  preserve_na = FALSE
)
}