1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rank.R
\name{row_number}
\alias{row_number}
\alias{min_rank}
\alias{dense_rank}
\title{Integer ranking functions}
\usage{
row_number(x)
min_rank(x)
dense_rank(x)
}
\arguments{
\item{x}{A vector to rank
By default, the smallest values will get the smallest ranks. Use \code{\link[=desc]{desc()}}
to reverse the direction so the largest values get the smallest ranks.
Missing values will be given rank \code{NA}. Use \code{coalesce(x, Inf)} or
\code{coalesce(x, -Inf)} if you want to treat them as the largest or smallest
values respectively.
To rank by multiple columns at once, supply a data frame.}
}
\value{
An integer vector.
}
\description{
Three ranking functions inspired by SQL2003. They differ primarily in how
they handle ties:
\itemize{
\item \code{row_number()} gives every input a unique rank, so that \code{c(10, 20, 20, 30)}
would get ranks \code{c(1, 2, 3, 4)}. It's equivalent to
\code{rank(ties.method = "first")}.
\item \code{min_rank()} gives every tie the same (smallest) value so that
\code{c(10, 20, 20, 30)} gets ranks \code{c(1, 2, 2, 4)}. It's the way that ranks
are usually computed in sports and is equivalent to
\code{rank(ties.method = "min")}.
\item \code{dense_rank()} works like \code{min_rank()}, but doesn't leave any gaps,
so that \code{c(10, 20, 20, 30)} gets ranks \code{c(1, 2, 2, 3)}.
}
}
\examples{
x <- c(5, 1, 3, 2, 2, NA)
row_number(x)
min_rank(x)
dense_rank(x)
# Ranking functions can be used in `filter()` to select top/bottom rows
df <- data.frame(
grp = c(1, 1, 1, 2, 2, 2, 3, 3, 3),
x = c(3, 2, 1, 1, 2, 2, 1, 1, 1),
y = c(1, 3, 2, 3, 2, 2, 4, 1, 2),
id = 1:9
)
# Always gives exactly 1 row per group
df \%>\% group_by(grp) \%>\% filter(row_number(x) == 1)
# May give more than 1 row if ties
df \%>\% group_by(grp) \%>\% filter(min_rank(x) == 1)
# Rank by multiple columns (to break ties) by selecting them with `pick()`
df \%>\% group_by(grp) \%>\% filter(min_rank(pick(x, y)) == 1)
# See slice_min() and slice_max() for another way to tackle the same problem
# You can use row_number() without an argument to refer to the "current"
# row number.
df \%>\% group_by(grp) \%>\% filter(row_number() == 1)
# It's easiest to see what this does with mutate():
df \%>\% group_by(grp) \%>\% mutate(grp_id = row_number())
}
\seealso{
Other ranking functions:
\code{\link{ntile}()},
\code{\link{percent_rank}()}
}
\concept{ranking functions}
|