1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
|
# Project: 2 * t Contingency Table
# adapted from Angie Zhu's getTable2xtNull.R
# Filename: Contingency2xt.R
# Last modified: 03.23.2012
# This function computes or simulates the null distribution of
# the Kruskal-Wallis statistics in a 2 x t contingency table.
# Treatment | 1 2 ... t | Total
# ---------------------------------------
# Response | |
# a | A_1 A_2 ... A_t | m
# b | B_1 B_2 ... B_t | n
# ---------------------------------------
# Total | d_1 d_2 ... d_t | N
# Arguments:
# Avec: integer vector of length tnum, containing the
# "a" reponses
# Bvec: integer vector of length tnum, containing the
# "b" reponses
# method: "asymptotic", "simulated", "exact",
# indicating the method of p-value calculation,
# dist: FALSE (default) means that the exact or
# estimated null distribution is not requested,
# otherwise it is.
# tab0: TRUE (default), when method = "simulated",
# the null distribution is given in tabular form,
# when tab0 = FALSE and method = "simulated", the
# null distribution is given as a single vector
# of all simulated values.
# Nsim: number of simulations to run, default 10^6
#
#
# Output: a list of class "kSamples" with components
# test.name = "2 x t Contingency Table"
# t = number of columns in the table
# KW.cont = 2 (or 3 vector) containing the observed KW
# value, its asymptotic (and the simulated
# or exact) P-value
# null.dist = a 2 x M matrix giving the M unique
# ordered KW values of the null
# distribution in the first column, and
# the corresponding probabilities
# (simulated or exact) in the second
# column. When method = "simulated" and
# tab0 = FALSE null.dist consists of a
# single vector of all simulated values.
# When dist = NULL or method = "asymptotic", only
# NULL is returned.
# method = the method used
# Nsim = number of simulations employed
contingency2xt <- function(Avec, Bvec, method=c("asymptotic","simulated","exact"),
dist = FALSE, tab0 = TRUE, Nsim=1e6) {
method <- match.arg(method)
m <- sum(Avec)
n <- sum(Bvec)
tnum <- length(Avec)
N <- m + n
dvec <- Avec + Bvec
KW.obs <- (N * (N - 1) / (m * n)) * (sum(Avec^2 / dvec) -
m^2/N )
pValue.asy <- 1 - pchisq(KW.obs,tnum-1)
if(dist == TRUE) Nsim <- min(Nsim,1e8)
# limits the size of null.dist
# whether method = "exact" or = "simulated"
if(method == "asymptotic"){
KW.cont <- c(KW.obs,pValue.asy)
null.dist <- NULL
}else{
ncomb <- choose(m + tnum - 1, tnum - 1)
if (ncomb <= Nsim && method == "exact") {
if(dist){
ans <- numeric(2+2*ncomb)
}else{
ans <- numeric(2)
}
out <- .C("contingency2xtExact0",
Avec = as.integer(Avec), Bvec = as.integer(Bvec),
tnum = as.integer(tnum),
ncomb = as.integer(ncomb),
getDist = as.integer(dist),
ans = as.double(ans), PACKAGE = "kSamples")
KW.obs <- out$ans[1]
KW.obs <- (N * (N - 1) / (m * n)) * (KW.obs -
m^2 / N)
KW.obs <- round(KW.obs, 6)
pValue <- out$ans[2]
if(dist){
out <- out$ans[-(1:2)]
out <- matrix(out, nrow=ncomb, ncol=2, byrow=FALSE)
out <- out[out[, 2] > 0, ]
prob <- out[ , 2]
KW <- (N * (N - 1) / (m * n)) * (
out[ , 1] - m^2 / N)
KW <- round(KW, 6)
KW.u <- sort(unique(KW))
k.u <- length(KW.u)
prob.u <- sapply(1:k.u, function(j) {
sum( prob[KW == KW.u[j]] ) })
null.dist <- cbind(KW.u,prob.u)
dimnames(null.dist) <- list(NULL,c("KW","prob"))
}else{
null.dist <- NULL
}
KW.cont <- c(KW.obs,pValue.asy,pValue)
}else{
method <- "simulated"
}
if( method == "simulated" ){
if( Nsim < 100) Nsim <- 100
if(dist){
ans <- numeric(2+Nsim)
}else{
ans <- numeric(2)
}
out <- .C("contingency2xtSim0",
Avec = as.integer(Avec), Bvec = as.integer(Bvec),
tnum = as.integer(tnum),
Nsim = as.integer(Nsim),
getDist = as.integer(dist),
ans = as.double(ans))
KW.obs <- out$ans[1]
KW.obs <- (N * (N - 1) / (m * n)) * (KW.obs - m^2 / N)
KW.obs <- round(KW.obs, 6)
pValue <- out$ans[2]
if(dist){
KW <- (N * (N - 1) / (m * n)) *
(round(out$ans[3:(Nsim+2)],6) - m^2 / N)
if(tab0==TRUE){
tab <- table(KW)/Nsim
null.dist <-
cbind(as.numeric(names(tab)),
as.vector(tab))
dimnames(null.dist) <-
list(NULL,c("KW","prob"))}else{
null.dist <- KW
}
}else{
null.dist <- NULL
}
KW.cont <- c(KW.obs,pValue.asy,pValue)
}
}
if(method=="asymptotic"){
names(KW.cont) <- c("observed KW",
" asympt. P-value")
}
if(method=="exact"){
names(KW.cont) <- c("observed KW",
" asympt. P-value","exact P-Value")
}
if(method=="simulated"){
names(KW.cont) <- c("observed KW",
" asympt. P-value","sim. P-Value")
}
object <- list(test.name =paste("2 x t Contingency Table"),
t = tnum, KW.cont = KW.cont,
null.dist = null.dist,
method = method, Nsim = Nsim)
class(object) <- "kSamples"
object
}
|