File: data_summary.Rd

package info (click to toggle)
r-cran-datawizard 1.0.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,300 kB
  • sloc: sh: 13; makefile: 2
file content (67 lines) | stat: -rw-r--r-- 2,057 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data_summary.R
\name{data_summary}
\alias{data_summary}
\alias{data_summary.data.frame}
\title{Summarize data}
\usage{
data_summary(x, ...)

\method{data_summary}{data.frame}(x, ..., by = NULL, remove_na = FALSE)
}
\arguments{
\item{x}{A (grouped) data frame.}

\item{...}{One or more named expressions that define the new variable name
and the function to compute the summary statistic. Example:
\code{mean_sepal_width = mean(Sepal.Width)}. The expression can also be provided
as a character string, e.g. \code{"mean_sepal_width = mean(Sepal.Width)"}. The
summary function \code{n()} can be used to count the number of observations.}

\item{by}{Optional character string, indicating the names of one or more
variables in the data frame. If supplied, the data will be split by these
variables and summary statistics will be computed for each group.}

\item{remove_na}{Logical. If \code{TRUE}, missing values are omitted from the
grouping variable. If \code{FALSE} (default), missing values are included as a
level in the grouping variable.}
}
\value{
A data frame with the requested summary statistics.
}
\description{
This function can be used to compute summary statistics for a
data frame or a matrix.
}
\examples{
data(iris)
data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width))
data_summary(
  iris,
  MW = mean(Sepal.Width),
  SD = sd(Sepal.Width),
  by = "Species"
)

# same as
d <- data_group(iris, "Species")
data_summary(d, MW = mean(Sepal.Width), SD = sd(Sepal.Width))

# multiple groups
data(mtcars)
data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c("am", "gear"))

# expressions can also be supplied as character strings
data_summary(mtcars, "MW = mean(mpg)", "SD = sd(mpg)", by = c("am", "gear"))

# count observations within groups
data_summary(mtcars, observations = n(), by = c("am", "gear"))

# first and last observations of "mpg" within groups
data_summary(
  mtcars,
  first = mpg[1],
  last = mpg[length(mpg)],
  by = c("am", "gear")
)
}