1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data_match.R
\name{data_match}
\alias{data_match}
\alias{data_filter}
\title{Return filtered or sliced data frame, or row indices}
\usage{
data_match(x, to, match = "and", return_indices = FALSE, drop_na = TRUE, ...)
data_filter(x, filter, ...)
}
\arguments{
\item{x}{A data frame.}
\item{to}{A data frame matching the specified conditions. Note that if
\code{match} is a value other than \code{"and"}, the original row order might be
changed. See 'Details'.}
\item{match}{String, indicating with which logical operation matching
conditions should be combined. Can be \code{"and"} (or \code{"&"}), \code{"or"} (or \code{"|"})
or \code{"not"} (or \code{"!"}).}
\item{return_indices}{Logical, if \code{FALSE}, return the vector of rows that
can be used to filter the original data frame. If \code{FALSE} (default),
returns directly the filtered data frame instead of the row indices.}
\item{drop_na}{Logical, if \code{TRUE}, missing values (\code{NA}s) are removed before
filtering the data. This is the default behaviour, however, sometimes when
row indices are requested (i.e. \code{return_indices=TRUE}), it might be useful
to preserve \code{NA} values, so returned row indices match the row indices of
the original data frame.}
\item{...}{Not used.}
\item{filter}{A logical expression indicating which rows to keep, or a numeric
vector indicating the row indices of rows to keep. Can also be a string
representation of a logical expression. e.g. \code{filter = "x > 4"}. This might
be useful when used in packages to avoid defining undefined global variables.}
}
\value{
A filtered data frame, or the row indices that match the specified configuration.
}
\description{
Return a filtered (or sliced) data frame or row indices of a data frame that
match a specific condition. \code{data_filter()} works like \code{data_match()}, but works
with logical expressions or row indices of a data frame to specify matching
conditions.
}
\details{
For \code{data_match()}, if \code{match} is either \code{"or"} or \code{"not"}, the
original row order from \code{x} might be changed. If preserving row order is
required, use \code{data_filter()} instead.
\if{html}{\out{<div class="sourceCode">}}\preformatted{# mimics subset() behaviour, preserving original row order
head(data_filter(mtcars[c("mpg", "vs", "am")], vs == 0 | am == 1))
#> mpg vs am
#> Mazda RX4 21.0 0 1
#> Mazda RX4 Wag 21.0 0 1
#> Datsun 710 22.8 1 1
#> Hornet Sportabout 18.7 0 0
#> Duster 360 14.3 0 0
#> Merc 450SE 16.4 0 0
# re-sorting rows
head(data_match(mtcars[c("mpg", "vs", "am")],
data.frame(vs = 0, am = 1),
match = "or"))
#> mpg vs am
#> Mazda RX4 21.0 0 1
#> Mazda RX4 Wag 21.0 0 1
#> Hornet Sportabout 18.7 0 0
#> Duster 360 14.3 0 0
#> Merc 450SE 16.4 0 0
#> Merc 450SL 17.3 0 0
}\if{html}{\out{</div>}}
While \code{data_match()} works with data frames to match conditions against,
\code{data_filter()} is basically a wrapper around \verb{subset(subset = <filter>)}.
However, unlike \code{subset()}, it preserves label attributes and is useful when
working with labelled data.
}
\examples{
data_match(mtcars, data.frame(vs = 0, am = 1))
data_match(mtcars, data.frame(vs = 0, am = c(0, 1)))
# observations where "vs" is NOT 0 AND "am" is NOT 1
data_match(mtcars, data.frame(vs = 0, am = 1), match = "not")
# equivalent to
data_filter(mtcars, vs != 0 & am != 1)
# observations where EITHER "vs" is 0 OR "am" is 1
data_match(mtcars, data.frame(vs = 0, am = 1), match = "or")
# equivalent to
data_filter(mtcars, vs == 0 | am == 1)
# slice data frame by row indices
data_filter(mtcars, 5:10)
# Define a custom function containing data_filter() and pass variable names
# to it using curly brackets
my_filter <- function(data, variable) {
data_filter(data, {variable} <= 20)
}
my_filter(mtcars, "mpg")
# Pass complete filter-condition as string
my_filter <- function(data, condition) {
data_filter(data, {condition})
}
my_filter(mtcars, "am != 0")
# string can also be used directly as argument
data_filter(mtcars, "am != 0")
}
\seealso{
\itemize{
\item Functions to rename stuff: \code{\link[=data_rename]{data_rename()}}, \code{\link[=data_rename_rows]{data_rename_rows()}}, \code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}}
\item Functions to reorder or remove columns: \code{\link[=data_reorder]{data_reorder()}}, \code{\link[=data_relocate]{data_relocate()}}, \code{\link[=data_remove]{data_remove()}}
\item Functions to reshape, pivot or rotate data frames: \code{\link[=data_to_long]{data_to_long()}}, \code{\link[=data_to_wide]{data_to_wide()}}, \code{\link[=data_rotate]{data_rotate()}}
\item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}}
\item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}}
\item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}}
\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}}
\item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}}
}
}
|