1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/join.R
\name{filter-joins}
\alias{filter-joins}
\alias{semi_join}
\alias{semi_join.data.frame}
\alias{anti_join}
\alias{anti_join.data.frame}
\title{Filtering joins}
\usage{
semi_join(x, y, by = NULL, copy = FALSE, ...)
\method{semi_join}{data.frame}(x, y, by = NULL, copy = FALSE, ..., na_matches = c("na", "never"))
anti_join(x, y, by = NULL, copy = FALSE, ...)
\method{anti_join}{data.frame}(x, y, by = NULL, copy = FALSE, ..., na_matches = c("na", "never"))
}
\arguments{
\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or
lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
more details.}
\item{by}{A join specification created with \code{\link[=join_by]{join_by()}}, or a character
vector of variables to join by.
If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all
variables in common across \code{x} and \code{y}. A message lists the variables so
that you can check they're correct; suppress the message by supplying \code{by}
explicitly.
To join on different variables between \code{x} and \code{y}, use a \code{\link[=join_by]{join_by()}}
specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}.
To join by multiple variables, use a \code{\link[=join_by]{join_by()}} specification with
multiple expressions. For example, \code{join_by(a == b, c == d)} will match
\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between
\code{x} and \code{y}, you can shorten this by listing only the variable names, like
\code{join_by(a, c)}.
\code{\link[=join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap
joins. See the documentation at \link[=join_by]{?join_by} for details on
these types of joins.
For simple equality joins, you can alternatively specify a character vector
of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a}
to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y},
use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}.
To perform a cross-join, generating all combinations of \code{x} and \code{y}, see
\code{\link[=cross_join]{cross_join()}}.}
\item{copy}{If \code{x} and \code{y} are not from the same data source,
and \code{copy} is \code{TRUE}, then \code{y} will be copied into the
same src as \code{x}. This allows you to join tables across srcs, but
it is a potentially expensive operation so you must opt into it.}
\item{...}{Other parameters passed onto methods.}
\item{na_matches}{Should two \code{NA} or two \code{NaN} values match?
\itemize{
\item \code{"na"}, the default, treats two \code{NA} or two \code{NaN} values as equal, like
\code{\%in\%}, \code{\link[=match]{match()}}, and \code{\link[=merge]{merge()}}.
\item \code{"never"} treats two \code{NA} or two \code{NaN} values as different, and will
never match them together or to any other values. This is similar to joins
for database sources and to \code{base::merge(incomparables = NA)}.
}}
}
\value{
An object of the same type as \code{x}. The output has the following properties:
\itemize{
\item Rows are a subset of the input, but appear in the same order.
\item Columns are not modified.
\item Data frame attributes are preserved.
\item Groups are taken from \code{x}. The number of groups may be reduced.
}
}
\description{
Filtering joins filter rows from \code{x} based on the presence or absence
of matches in \code{y}:
\itemize{
\item \code{semi_join()} return all rows from \code{x} with a match in \code{y}.
\item \code{anti_join()} return all rows from \code{x} with\strong{out} a match in \code{y}.
}
}
\section{Methods}{
These function are \strong{generic}s, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.
Methods available in currently loaded packages:
\itemize{
\item \code{semi_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("semi_join")}.
\item \code{anti_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("anti_join")}.
}
}
\examples{
# "Filtering" joins keep cases from the LHS
band_members \%>\% semi_join(band_instruments)
band_members \%>\% anti_join(band_instruments)
# To suppress the message about joining variables, supply `by`
band_members \%>\% semi_join(band_instruments, by = join_by(name))
# This is good practice in production code
}
\seealso{
Other joins:
\code{\link{cross_join}()},
\code{\link{mutate-joins}},
\code{\link{nest_join}()}
}
\concept{joins}
|