File: dataDiagnosis.R

package info (click to toggle)
r-cran-semtools 0.5.7-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,204 kB
  • sloc: makefile: 2
file content (319 lines) | stat: -rw-r--r-- 10,548 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
### Sunthud Pornprasertmanit & Terrence D. Jorgensen
### Last updated: 2 October 2024
### Higher-order moments. Initial version from the simsem package.



##' Finding skewness
##'
##' Finding skewness (\eqn{g_{1}}) of an object
##'
##' The skewness computed by default is \eqn{g_{1}}, the third standardized
##' moment of the empirical distribution of `object`.
##' The population parameter skewness \eqn{\gamma_{1}} formula is
##'
##' \deqn{\gamma_{1} = \frac{\mu_{3}}{\mu^{3/2}_{2}},}
##'
##' where \eqn{\mu_{i}} denotes the \eqn{i} order central moment.
##'
##' The skewness formula for sample statistic \eqn{g_{1}} is
##'
##' \deqn{g_{1} = \frac{k_{3}}{k^{2}_{2}},}
##'
##' where \eqn{k_{i}} are the \eqn{i} order *k*-statistic.
##'
##' The standard error of the skewness is
##'
##' \deqn{Var(\hat{g}_1) = \frac{6}{N}}
##'
##' where \eqn{N} is the sample size.
##'
##'
##' @importFrom stats pnorm
##'
##' @param object A vector used to find a skewness
##' @param population `TRUE` to compute the parameter formula. `FALSE`
##' to compute the sample statistic formula.
##' @return A value of a skewness with a test statistic if the population is
##' specified as `FALSE`
##' @author Sunthud Pornprasertmanit (\email{psunthud@@gmail.com})
##' @seealso \itemize{
##'   \item [kurtosis()] Find the univariate excessive kurtosis
##'    of a variable
##'   \item [mardiaSkew()] Find Mardia's multivariate skewness
##'    of a set of variables
##'   \item [mardiaKurtosis()] Find the Mardia's multivariate
##'    kurtosis of a set of variables
##'  }
##' @references Weisstein, Eric W. (n.d.). *Skewness*. Retrived from
##'  *MathWorld*--A Wolfram Web Resource:
##'  <http://mathworld.wolfram.com/Skewness.html>
##' @examples
##'
##' skew(1:5)
##'
##' @export
skew <- function(object, population = FALSE) {
	if(any(is.na(object))) {
		object <- object[!is.na(object)]
		warning("Missing observations are removed from a vector.")
	}
	if(population) {
	  out <- centralMoment(object, 3) / (centralMoment(object, 2)^(3/2))
	} else {
		est <- kStat(object, 3) / (kStat(object, 2)^(3/2))
		se <- sqrt(6/length(object))
		z <- est/se
		p <- (1 - pnorm(abs(z)))*2
		out <- c("skew (g1)"=est, se=se, z=z, p=p)
	}

  class(out) <- c("lavaan.vector", "numeric")
  out
}



##' Finding excessive kurtosis
##'
##' Finding excessive kurtosis (\eqn{g_{2}}) of an object
##'
##' The excessive kurtosis computed by default is \eqn{g_{2}}, the fourth
##' standardized moment of the empirical distribution of `object`.
##' The population parameter excessive kurtosis \eqn{\gamma_{2}} formula is
##'
##' \deqn{\gamma_{2} = \frac{\mu_{4}}{\mu^{2}_{2}} - 3,}
##'
##' where \eqn{\mu_{i}} denotes the \eqn{i} order central moment.
##'
##' The excessive kurtosis formula for sample statistic \eqn{g_{2}} is
##'
##' \deqn{g_{2} = \frac{k_{4}}{k^{2}_{2}} - 3,}
##'
##' where \eqn{k_{i}} are the \eqn{i} order *k*-statistic.
##'
##' The standard error of the excessive kurtosis is
##'
##' \deqn{Var(\hat{g}_{2}) = \frac{24}{N}}
##'
##' where \eqn{N} is the sample size.
##'
##'
##' @importFrom stats pnorm
##'
##' @param object A vector used to find a excessive kurtosis
##' @param population `TRUE` to compute the parameter formula. `FALSE`
##' to compute the sample statistic formula.
##' @return A value of an excessive kurtosis with a test statistic if the
##' population is specified as `FALSE`
##' @author Sunthud Pornprasertmanit (\email{psunthud@@gmail.com})
##' @seealso \itemize{
##'   \item [skew()] Find the univariate skewness of a variable
##'   \item [mardiaSkew()] Find the Mardia's multivariate
##'    skewness of a set of variables
##'   \item [mardiaKurtosis()] Find the Mardia's multivariate kurtosis
##'    of a set of variables
##' }
##' @references Weisstein, Eric W. (n.d.). *Kurtosis.* Retrieved from
##' *MathWorld*--A Wolfram Web Resource:
##' <http://mathworld.wolfram.com/Kurtosis.html>
##'
##' @examples
##'
##' kurtosis(1:5)
##'
##' @export
kurtosis <- function(object, population = FALSE) {
	if(any(is.na(object))) {
		object <- object[!is.na(object)]
		warning("Missing observations are removed from a vector.")
	}
	if(population) {
	  out <- (centralMoment(object, 4) / (centralMoment(object, 2)^2)) - 3
	} else {
		est <- kStat(object, 4) / (kStat(object, 2)^(2))
		se <- sqrt(24/length(object))
		z <- est/se
		p <- (1 - pnorm(abs(z)))*2
		out <- c("Excess Kur (g2)"=est, se=se, z=z, p=p)
	}

  class(out) <- c("lavaan.vector", "numeric")
  out
}



##' Finding Mardia's multivariate skewness
##'
##' Finding Mardia's multivariate skewness of multiple variables
##'
##' The Mardia's multivariate skewness formula (Mardia, 1970) is
##'  \deqn{ b_{1, d} = \frac{1}{n^2}\sum^n_{i=1}\sum^n_{j=1}\left[
##'  \left(\bold{X}_i - \bold{\bar{X}} \right)^{'} \bold{S}^{-1}
##'  \left(\bold{X}_j - \bold{\bar{X}} \right) \right]^3, }
##' where \eqn{d} is the number of variables, \eqn{X} is the target dataset
##' with multiple variables, \eqn{n} is the sample size, \eqn{\bold{S}} is
##' the sample covariance matrix of the target dataset, and \eqn{\bold{\bar{X}}}
##' is the mean vectors of the target dataset binded in \eqn{n} rows.
##' When the population multivariate skewness is normal, the
##' \eqn{\frac{n}{6}b_{1,d}} is asymptotically distributed as \eqn{\chi^2}
##' distribution with \eqn{d(d + 1)(d + 2)/6} degrees of freedom.
##'
##'
##' @importFrom stats cov pchisq
##'
##' @param dat The target matrix or data frame with multiple variables
##' @param use Missing data handling method from the [stats::cov()]
##' function.
##' @return A value of a Mardia's multivariate skewness with a test statistic
##' @author Sunthud Pornprasertmanit (\email{psunthud@@gmail.com})
##' @seealso \itemize{
##'   \item [skew()] Find the univariate skewness of a variable
##'   \item [kurtosis()] Find the univariate excessive
##'     kurtosis of a variable
##'   \item [mardiaKurtosis()] Find the Mardia's multivariate
##'     kurtosis of a set of variables
##' }
##' @references Mardia, K. V. (1970). Measures of multivariate skewness and
##'   kurtosis with applications. *Biometrika, 57*(3), 519--530.
##'   \doi{10.2307/2334770}
##' @examples
##'
##' library(lavaan)
##' mardiaSkew(HolzingerSwineford1939[ , paste0("x", 1:9)])
##'
##' @export
mardiaSkew <- function(dat, use = "everything") {
	centeredDat <- scale(dat, center=TRUE, scale=FALSE)
	invS <- solve(cov(dat, use = use))
	FUN <- function(vec1, vec2, invS) {
		as.numeric(t(as.matrix(vec1)) %*% invS %*% as.matrix(vec2))
	}
	FUN2 <- function(vec1, listVec2, invS) {
		sapply(listVec2, FUN, vec1=vec1, invS=invS)
	}
	indivTerm <- sapply(as.list(data.frame(t(centeredDat))), FUN2,
	                    listVec2=as.list(data.frame(t(centeredDat))), invS=invS)

	b1d <- sum(indivTerm^3, na.rm = TRUE) / (nrow(dat)^2)
	d <- ncol(dat)
	chi <- nrow(dat) * b1d / 6
	df <- d * (d + 1) * (d + 2) / 6
	p <- pchisq(chi, df = df, lower.tail = FALSE)

	out <- c(b1d = b1d, chi = chi, df=df, p=p)
	class(out) <- c("lavaan.vector", "numeric")
	return(out)
}



##' Finding Mardia's multivariate kurtosis
##'
##' Finding Mardia's multivariate kurtosis of multiple variables
##'
##' The Mardia's multivariate kurtosis formula (Mardia, 1970) is
##'  \deqn{ b_{2, d} = \frac{1}{n}\sum^n_{i=1}\left[ \left(\bold{X}_i -
##'  \bold{\bar{X}} \right)^{'} \bold{S}^{-1} \left(\bold{X}_i -
##'  \bold{\bar{X}} \right) \right]^2, }
##' where \eqn{d} is the number of variables, \eqn{X} is the target
##' dataset with multiple variables, \eqn{n} is the sample size, \eqn{\bold{S}}
##' is the sample covariance matrix of the target dataset, and
##' \eqn{\bold{\bar{X}}} is the mean vectors of the target dataset binded in
##' \eqn{n} rows. When the population multivariate kurtosis is normal, the
##' \eqn{b_{2,d}} is asymptotically distributed as normal distribution with the
##' mean of \eqn{d(d + 2)} and variance of \eqn{8d(d + 2)/n}.
##'
##'
##' @importFrom stats cov pnorm
##'
##' @param dat The target matrix or data frame with multiple variables
##' @param use Missing data handling method from the [stats::cov()]
##' function.
##' @return A value of a Mardia's multivariate kurtosis with a test statistic
##' @author Sunthud Pornprasertmanit (\email{psunthud@@gmail.com})
##' @seealso \itemize{
##'  \item [skew()] Find the univariate skewness of a variable
##'  \item [kurtosis()] Find the univariate excessive kurtosis
##'   of a variable
##'  \item [mardiaSkew()] Find the Mardia's multivariate skewness
##'   of a set of variables
##' }
##' @references Mardia, K. V. (1970). Measures of multivariate skewness and
##'  kurtosis with applications. *Biometrika, 57*(3), 519--530.
##'  \doi{10.2307/2334770}
##' @examples
##'
##' library(lavaan)
##' mardiaKurtosis(HolzingerSwineford1939[ , paste0("x", 1:9)])
##'
##' @export
mardiaKurtosis <- function(dat, use = "everything") {
	centeredDat <- scale(dat, center=TRUE, scale=FALSE)
	invS <- solve(cov(dat, use = use))
	FUN <- function(vec, invS) {
		as.numeric(t(as.matrix(vec)) %*% invS %*% as.matrix(vec))
	}
	indivTerm <- sapply(as.list(data.frame(t(centeredDat))), FUN, invS=invS)

	b2d <- sum(indivTerm^2, na.rm = TRUE) / nrow(dat)
	d <- ncol(dat)
	m <- d * (d + 2)
	v <- 8 * d * (d + 2) / nrow(dat)
	z <- (b2d - m)/sqrt(v)
	p <- pnorm(-abs(z)) * 2

	out <- c(b2d = b2d, z = z, p=p)
	class(out) <- c("lavaan.vector", "numeric")
	return(out)
}



## ----------------
## Hidden Functions
## ----------------

## centralMoment
## Calculate central moments of a variable
## Arguments:
##	 x: vector of a variable
## 	 ord: order of the moment
## 	 weight: weight variable
centralMoment <- function(x, ord) {
  if(ord < 2) stop("Central moment can be calculated for order 2 or more in an integer.")
  wm <- mean(x)
  result <- sum((x - wm)^(ord))/length(x)
  return(result)
}
## Example
## centralMoment(1:5, 2)

## kStat
## Calculate the k-statistic (i.e., unbiased estimator of a cumulant) of a variable
## Arguments:
##	  x: vector of a variable
## 	ord: order of the k-statistics
kStat <- function(x, ord) {
  # Formula from mathworld wolfram
  n <- length(x)
  if(ord == 1) {
    return(mean(x))
  } else if (ord == 2) {
    return(centralMoment(x, 2) * n / (n - 1))
  } else if (ord == 3) {
    return(centralMoment(x, 3) * n^2 / ((n - 1) * (n - 2)))
  } else if (ord == 4) {
    num1 <- n^2
    num2 <- (n + 1) * centralMoment(x, 4)
    num3 <- 3 * (n - 1) * centralMoment(x, 2)^2
    denom <- (n - 1) * (n - 2) * (n - 3)
    return((num1 * (num2 - num3))/denom)
  } else {
    stop("Order can be 1, 2, 3, or 4 only.")
  }
}
## Example
## kStat(1:5, 4)