File: ot2kSim.R

package info (click to toggle)
r-cran-openmx 2.21.13%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 13,716 kB
  • sloc: cpp: 36,559; ansic: 13,821; fortran: 2,001; sh: 1,440; python: 350; perl: 21; makefile: 11
file content (150 lines) | stat: -rw-r--r-- 5,661 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
library(ggplot2)
library(OpenMx)
library(rpf)
library(pROC)

mkmodel <- function(numItems, numPeople, genCond, fitCond, numBad) {
  spec <- list()
  
  trueFit <- rep(FALSE, numBad)
  trueFit <- c(trueFit, rep(TRUE, numItems - length(trueFit)))
  
  if (genCond == "2pl") {
    spec[1:numItems] <- rpf.grm()
    correct <- sapply(spec, rpf.rparam)
    correct['a', trueFit==TRUE] <- 1
    data <- rpf.sample(numPeople, spec, correct)
    
    if (fitCond == "1pl") {
      spec[1:numItems] <- rpf.grm()
      ip.mat <- mxMatrix(name="ItemParam", nrow=2, ncol=numItems,
                         values=c(1,0),
                         free=c(FALSE, TRUE),
                         dimnames=list(rownames(correct), colnames(data)))
    } else { stop(fitCond) }
  }
  
  if (genCond == "3pl") {
    spec[1:numItems] <- rpf.drm()
    correct <- sapply(spec, rpf.rparam)
    correct['g',trueFit==TRUE] <- logit(0)
    correct['u',trueFit==TRUE] <- logit(1)
    correct['u',trueFit==FALSE] <- logit(.5)
    correct['u',trueFit==FALSE] <- logit(1)
    data <- rpf.sample(numPeople, spec, correct)

    if (fitCond == "2pl") {
      spec[1:numItems] <- rpf.drm()
      ip.mat <- mxMatrix(name="ItemParam", nrow=4, ncol=numItems,
                         values=c(1,0, logit(0), logit(1)),
                         free=c(TRUE, TRUE, FALSE, FALSE),
                         dimnames=list(rownames(correct), colnames(data)))
    } else { stop(fitCond) }
  }
  
  if (genCond == "nom2") {
    spec[1:numItems] <- rpf.nrm(outcomes=4)
    correct <- sapply(spec, rpf.rparam)
    half <- numItems/2
    correct[c('alf2', 'alf3'), trueFit==TRUE] <- 0
    data <- rpf.sample(numPeople, spec, correct)
    
    if (fitCond == "grm") {
      spec[1:numItems] <- rpf.grm(outcomes=4)
      ip.mat <- mxMatrix(name="ItemParam", nrow=4, ncol=numItems,
                         values=rpf.rparam(spec[[1]]),
                         free=TRUE,
                         dimnames=list(names(rpf.rparam(spec[[1]])), colnames(data)))
    } else { stop(fitCond) }
  }
  
  m.mat <- mxMatrix(name="mean", nrow=1, ncol=1, values=0, free=FALSE, dimnames=list("a","a"))
  cov.mat <- mxMatrix(name="cov", nrow=1, ncol=1, values=1, free=FALSE, dimnames=list("a","a"))
  
  m1 <- mxModel(model="ot2k", ip.mat, m.mat, cov.mat,
                mxData(observed=data, type="raw"),
                mxExpectationBA81(ItemSpec=spec, ItemParam="ItemParam",
                                  mean="mean", cov="cov"),
                mxFitFunctionML(),
                mxComputeEM('expectation', 'scores', mxComputeNewtonRaphson()))
  list(model=m1, trueFit=trueFit)
}

trial <- function(numItems, numPeople, genCond, fitCond, numBad) {
  got <- mkmodel(numItems, numPeople, genCond, fitCond, numBad)
  model <- got$model
  trueFit <- got$trueFit
  model <- mxRun(model, silent=TRUE)
  
  grp <- list(spec=model$expectation$ItemSpec,
              param=model$ItemParam$values,
              mean=model$mean$values,
              cov=model$cov$values,
              data=model$data$observed)
  
  result <- expand.grid(method=c("pearson", "rms"), alt=c(TRUE, FALSE), item=1:numItems, trueFit=NA, pval=NA)
  result <- result[!(result$method=="rms" & result$alt),]
  for (ix in 1:numItems) {
    result[result$item == ix, 'trueFit'] <- trueFit[ix]
  }

  got <- rpf.SitemFit(grp, method="pearson")
  mask <- result$method=="pearson" & !result$alt
  result[mask,'pval'] <- sapply(got, function(x) x$pval)
  
  got <- rpf.SitemFit(grp, method="pearson", alt=TRUE)
  mask <- result$method=="pearson" & result$alt
  result[mask,'pval'] <- sapply(got, function(x) x$pval)
  
  got <- rpf.SitemFit(grp, method="rms")
  mask <- result$method=="rms" & !result$alt
  result[mask,'pval'] <- sapply(got, function(x) x$pval)
  result
}

result <- NULL
for (replication in 1:20) {
  result <- rbind(result, trial(20, 500, "nom2", "grm", 20))
  result <- rbind(result, trial(20, 500, "nom2", "grm", 0))
#  result <- rbind(result, trial(10, 1000, "3pl", "2pl", 1))
}

alphaPlot <- function(result) {
  aResult <- expand.grid(method=unique(result$method), alt=unique(result$alt), alpha=seq(.01,.1, .005), got=0)
  aResult$label <- paste(aResult$method, aResult$alt, sep="+")
  # need at least 10/min(alpha) replications for good accuracy
  minRows <- 10/min(aResult$alpha)
  for (r in 1:nrow(aResult)) {
    mre <- subset(result, method==aResult$method[r] & alt==aResult$alt[r] & trueFit)
    if (aResult$alpha[r] == min(aResult$alpha) &&
          nrow(mre) && nrow(mre) < minRows) {
      warning(paste("Only", nrow(mre),"rows for", aResult$label[r], "need", minRows))
    }
    aResult$got[r] <- sum(mre$pval < aResult$alpha[r]) / nrow(mre)
  }
  aResult <- aResult[is.finite(aResult$got),]
  ggplot(aResult, aes(alpha, got, color=label)) + geom_line() +
    geom_abline(intercept=0, slope=1, color="green") +
    labs(x="expected alpha", y="empirical alpha") + ylim(0,max(aResult$got))
}

if (0) {
  numItems = 20
  numPeople = 1000
  genCond = "3pl"
  fitCond = "2pl"
  roc(trueFit ~ pval, subset(result, method=="pearson" & !alt), plot=TRUE, ci=TRUE)
  roc(trueFit ~ pval, subset(result, method=="rms" & !alt), plot=TRUE, ci=TRUE)
  roc(trueFit ~ pval, subset(result, method=="pearson" & alt), plot=TRUE, ci=TRUE)  # difference at small # of items
  alphaPlot(result)
}

# bad items make other items bad so there are two feasible ways to compose the items,
# 100% bad, 100% good
# 1 bad and the rest good

# rms has no advantage in all good vs all bad, dichotomous

# rms has less power but better alpha level (who cares?)

# pearson & alt=TRUE has slightly less power than pearson & alt=FALSE with less than 10 items