File: rowVars.Rd

package info (click to toggle)
r-cran-matrixstats 1.5.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,104 kB
  • sloc: ansic: 7,300; sh: 11; makefile: 2
file content (147 lines) | stat: -rw-r--r-- 4,428 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rowVars.R
\name{rowVars}
\alias{rowVars}
\alias{colVars}
\title{Variance estimates for each row (column) in a matrix}
\usage{
rowVars(x, rows = NULL, cols = NULL, na.rm = FALSE, refine = TRUE,
  center = NULL, dim. = dim(x), ..., useNames = TRUE)

colVars(x, rows = NULL, cols = NULL, na.rm = FALSE, refine = TRUE,
  center = NULL, dim. = dim(x), ..., useNames = TRUE)
}
\arguments{
\item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified,
an N * K \code{\link[base]{vector}}.}

\item{rows}{A \code{\link[base]{vector}} indicating subset of rows to
operate over. If \code{\link[base]{NULL}}, no subsetting is done.}

\item{cols}{A \code{\link[base]{vector}} indicating subset of columns to
operate over. If \code{\link[base]{NULL}}, no subsetting is done.}

\item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are
excluded.}

\item{refine}{If \code{\link[base:logical]{TRUE}}, `center` is NULL, and
\code{x} is \code{\link[base]{numeric}}, then extra effort is used to
calculate the average with greater numerical precision, otherwise not.}

\item{center}{(optional; a vector or length N (K)) If the row (column)
means are already estimated, they can be pre-specified using this argument.
This avoid re-estimating them again.
_Warning: It is important that a non-biased sample mean estimate is passed.
If not, then the variance estimate of the spread will also be biased._
If NULL (default), the row/column means are estimated internally.}

\item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of
length two specifying the dimension of \code{x}, also when not a
\code{\link[base]{matrix}}.  \emph{Comment:} The reason for this argument
being named with a period at the end is purely technical (we get a run-time
error if we try to name it \code{dim}).}

\item{...}{Additional arguments passed to \code{rowMeans()} and
\code{rowSums()}.}

\item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names
attributes of the result are set, otherwise not.}
}
\value{
Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of
length N (K).
}
\description{
Variance estimates for each row (column) in a matrix.
}
\section{Providing center estimates}{

The sample variance is estimated as

  \eqn{n/(n-1) * mean((x - center)^2)},

where \eqn{center} is estimated as the sample mean, by default.
In matrixStats (< 0.58.0),

  \eqn{n/(n-1) * (mean(x^2) - center^2)}

was used.  Both formulas give the same result _when_ `center` is the
sample mean estimate.

Argument `center` can be used to provide an already existing estimate.
It is important that the sample mean estimate is passed.
If not, then the variance estimate of the spread will be biased.

For the time being, in order to lower the risk for such mistakes,
argument `center` is occasionally validated against the sample-mean
estimate.  If a discrepancy is detected, an informative error is
provided to prevent incorrect variance estimates from being used.
For performance reasons, this check is only performed once every 50 times.
The frequency can be controlled by R option `matrixStats.vars.formula.freq`,
whose default can be set by environment variable
`R_MATRIXSTATS_VARS_FORMULA_FREQ`.
}

\examples{
set.seed(1)

x <- matrix(rnorm(20), nrow = 5, ncol = 4)
print(x)

# Row averages
print(rowMeans(x))
print(rowMedians(x))

# Column averages
print(colMeans(x))
print(colMedians(x))


# Row variabilities
print(rowVars(x))
print(rowSds(x))
print(rowMads(x))
print(rowIQRs(x))

# Column variabilities
print(rowVars(x))
print(colSds(x))
print(colMads(x))
print(colIQRs(x))

# Row ranges
print(rowRanges(x))
print(cbind(rowMins(x), rowMaxs(x)))
print(cbind(rowOrderStats(x, which = 1), rowOrderStats(x, which = ncol(x))))

# Column ranges
print(colRanges(x))
print(cbind(colMins(x), colMaxs(x)))
print(cbind(colOrderStats(x, which = 1), colOrderStats(x, which = nrow(x))))


x <- matrix(rnorm(2000), nrow = 50, ncol = 40)

# Row standard deviations
d <- rowDiffs(x)
s1 <- rowSds(d) / sqrt(2)
s2 <- rowSds(x)
print(summary(s1 - s2))

# Column standard deviations
d <- colDiffs(x)
s1 <- colSds(d) / sqrt(2)
s2 <- colSds(x)
print(summary(s1 - s2))
}
\seealso{
See \code{rowMeans()} and \code{rowSums()} in
\code{\link[base]{colSums}}().
}
\author{
Henrik Bengtsson
}
\keyword{array}
\keyword{iteration}
\keyword{robust}
\keyword{univar}