File: count.Rd

package info (click to toggle)
r-cran-dplyr 1.1.4-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,292 kB
  • sloc: cpp: 1,403; sh: 17; makefile: 7
file content (116 lines) | stat: -rw-r--r-- 4,044 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/count-tally.R
\name{count}
\alias{count}
\alias{count.data.frame}
\alias{tally}
\alias{add_count}
\alias{add_tally}
\title{Count the observations in each group}
\usage{
count(x, ..., wt = NULL, sort = FALSE, name = NULL)

\method{count}{data.frame}(
  x,
  ...,
  wt = NULL,
  sort = FALSE,
  name = NULL,
  .drop = group_by_drop_default(x)
)

tally(x, wt = NULL, sort = FALSE, name = NULL)

add_count(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = deprecated())

add_tally(x, wt = NULL, sort = FALSE, name = NULL)
}
\arguments{
\item{x}{A data frame, data frame extension (e.g. a tibble), or a
lazy data frame (e.g. from dbplyr or dtplyr).}

\item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variables to group
by.}

\item{wt}{<\code{\link[rlang:args_data_masking]{data-masking}}> Frequency weights.
Can be \code{NULL} or a variable:
\itemize{
\item If \code{NULL} (the default), counts the number of rows in each group.
\item If a variable, computes \code{sum(wt)} for each group.
}}

\item{sort}{If \code{TRUE}, will show the largest groups at the top.}

\item{name}{The name of the new column in the output.

If omitted, it will default to \code{n}. If there's already a column called \code{n},
it will use \code{nn}. If there's a column called \code{n} and \code{nn}, it'll use
\code{nnn}, and so on, adding \code{n}s until it gets a new name.}

\item{.drop}{Handling of factor levels that don't appear in the data, passed
on to \code{\link[=group_by]{group_by()}}.

For \code{count()}: if \code{FALSE} will include counts for empty groups (i.e. for
levels of factors that don't exist in the data).

\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} For \code{add_count()}: deprecated since it
can't actually affect the output.}
}
\value{
An object of the same type as \code{.data}. \code{count()} and \code{add_count()}
group transiently, so the output has the same groups as the input.
}
\description{
\code{count()} lets you quickly count the unique values of one or more variables:
\code{df \%>\% count(a, b)} is roughly equivalent to
\code{df \%>\% group_by(a, b) \%>\% summarise(n = n())}.
\code{count()} is paired with \code{tally()}, a lower-level helper that is equivalent
to \code{df \%>\% summarise(n = n())}. Supply \code{wt} to perform weighted counts,
switching the summary from \code{n = n()} to \code{n = sum(wt)}.

\code{add_count()} and \code{add_tally()} are equivalents to \code{count()} and \code{tally()}
but use \code{mutate()} instead of \code{summarise()} so that they add a new column
with group-wise counts.
}
\examples{
# count() is a convenient way to get a sense of the distribution of
# values in a dataset
starwars \%>\% count(species)
starwars \%>\% count(species, sort = TRUE)
starwars \%>\% count(sex, gender, sort = TRUE)
starwars \%>\% count(birth_decade = round(birth_year, -1))

# use the `wt` argument to perform a weighted count. This is useful
# when the data has already been aggregated once
df <- tribble(
  ~name,    ~gender,   ~runs,
  "Max",    "male",       10,
  "Sandra", "female",      1,
  "Susan",  "female",      4
)
# counts rows:
df \%>\% count(gender)
# counts runs:
df \%>\% count(gender, wt = runs)

# When factors are involved, `.drop = FALSE` can be used to retain factor
# levels that don't appear in the data
df2 <- tibble(
  id = 1:5,
  type = factor(c("a", "c", "a", NA, "a"), levels = c("a", "b", "c"))
)
df2 \%>\% count(type)
df2 \%>\% count(type, .drop = FALSE)

# Or, using `group_by()`:
df2 \%>\% group_by(type, .drop = FALSE) \%>\% count()

# tally() is a lower-level function that assumes you've done the grouping
starwars \%>\% tally()
starwars \%>\% group_by(species) \%>\% tally()

# both count() and tally() have add_ variants that work like
# mutate() instead of summarise
df \%>\% add_count(gender, wt = runs)
df \%>\% add_tally(wt = runs)
}