File: diag-1dim.Rnw

package info (click to toggle)
r-cran-rpf 1.0.11%2Bdfsg-2
links: PTS, VCS
area: main
in suites: bookworm
size: 1,484 kB
sloc: cpp: 5,364; sh: 114; ansic: 41; makefile: 2
file content (172 lines) | stat: -rw-r--r-- 5,158 bytes
parent folder | download | duplicates (3)
%\VignetteEngine{knitr::knitr}
%\VignetteIndexEntry{How to perform common IRT diagnostics}
\documentclass{article}
\begin{document}

<<setup, cache=FALSE, include=FALSE>>=
library(rpf)
library(ggplot2)
library(reshape2)
library(gridExtra)
@ 

\section{How to perform common IRT diagnostics}

Here's how to create an item characteristic curve plot for any 1
dimensional item. These are random item parameters so the curves
change every time the documentation is recreated.

<<fig.height=2.5>>=
plot.icc <- function(item, param, width=3) {
  pm <- t(rpf.prob(item, param, seq(-width, width, .1)))
  icc <- as.data.frame(melt(pm, varnames=c("theta",'category')))
  icc$theta <- seq(-width, width, .1)
  icc$category <- as.factor(icc$category)
  ggplot(icc, aes(theta, value)) +
    geom_line(aes(color=category)) +
      ylim(0,1) + xlim(-width,width)
}

i1 <- rpf.gpcm(5)
i1.p <- rpf.rparam(i1)
i2 <- rpf.drm()
i2.p <- rpf.rparam(i2)
grid.arrange(plot.icc(i1, i1.p),
             plot.icc(i2, i2.p), ncol=2)
@ 

Now let us look at some real data.

<<fig.height=3.5, cache=TRUE>>=
data(ms.items)

spec <- list()
spec[1:10] <- rpf.gpcm(5)

plot.info <- function(spec, param, i.name, width=3) {
  if (missing(i.name)) {
    i.name <- paste0('i', 1:length(spec))
  }
  grid <- seq(-width,width,.1)
  df <- list(score=grid)
  total <- numeric(length(grid))
  for (ix in 1:length(spec)) {
    id <- i.name[ix]
    s <- spec[[ix]]
    df[[id]] <- rpf.info(s, param[ix,1:rpf.numParam(s)], grid)
    total <- total + df[[id]]
  }
  df$total <- total
  df <- as.data.frame(df)
  long<- melt(df, id.vars=c('score'), variable.name="item")
  long$item <- factor(long$item)
  ggplot(long, aes(score, value, group=item)) +
    geom_line(size=1.1,aes(linetype=item, color=item)) + ylab("information")
}

param <- ms.items[,c('slope',paste0('b',1:4))]

plot.info(spec, param, ms.items[,'name'])
@ 

<<fig.height=3, cache=TRUE>>=
data(ms.people)

data.vs.model <- function(spec1, param, espt, item.name, width=3, data.bins=10) {
  pm <- t(rpf.prob(spec1, param[1:rpf.numParam(spec1)], seq(-width, width, .1)))
  icc <- as.data.frame(melt(pm, varnames=c("theta",'category')))
  icc$theta <- seq(-width, width, .1)
  icc$category <- as.ordered(1+max(icc$category)-icc$category)  #parscale reverses stuff
  icc$type <- 'model'
  
  breaks <- seq(min(espt$score, na.rm=TRUE),
                max(espt$score, na.rm=TRUE),
                length.out=data.bins+1)
  bin <- unclass(cut(espt$score, breaks, include.lowest = TRUE))
  est <- tabulate(bin, length(levels(bin)))
  if (any(est < 10)) {
    warning("Some bins have less than 10 samples; try fewer data.bins")
  }
  
  eout <- array(dim=c(data.bins, spec1@outcomes+1))
  for (px in 1:data.bins) {
    t <- table(espt[[item.name]][bin==px], useNA="no")
    eout[px,2:(spec1@outcomes+1)] <- t / sum(t)
  }
  eout[,1] <- ((c(breaks,0) + c(0,breaks))/2)[2:(data.bins+1)]
  
  edf <- melt(as.data.frame(eout), id.vars=c('V1'),
              variable.name="category")
  edf$category <- ordered(unclass(edf$category))
  edf$theta <- edf$V1
  edf$V1 <- NULL
  edf$type <- 'data'

  both <- rbind(edf, icc)
  both$type <- factor(both$type)

  ggplot(both, aes(theta, value)) +
              geom_line(aes(color=category, linetype=category)) + facet_wrap(~type) +
    ylim(0,1) + xlim(-width,width) + labs(y="probability", x="score")
}

name <- 'msCause'
item.x <- match(name,ms.items$name)
param <- ms.items[item.x, c('slope',paste0('b',1:4))]
data.vs.model(spec[[item.x]], param,ms.people , name, data.bins=12) +
  labs(title = paste0(name, ", slope = ",param[1]))
@ 

Let us plot one more. For msShared, categories 2 and 4 are never the
most likely choice. You can see this visually by confirming that the
curves for categories 2 and 4 are already beneath the curves for other
categories.

<<fig.height=3, cache=TRUE>>=
name <- 'msShared'
item.x <- match(name,ms.items$name)
param <- ms.items[item.x, c('slope',paste0('b',1:4))]
data.vs.model(spec[[item.x]], param,ms.people , name, data.bins=12) +
  labs(title = paste0(name, ", slope = ",param[1]))
@ 

Item-wise covariance of residuals can be informative.

<<>>=
param <- ms.items[, c('slope',paste0('b',1:4))]
res <- rpf.1dim.residual(spec, param, ms.people[,ms.items$name], ms.people$score)
res <- res[!apply(is.na(res), 1, any),]
res.cov <- cov(res)
dimnames(res.cov) <- list(ms.items$id, ms.items$id)
round(cov(res.cov),1)
@ 

Let's take another dataset and look at item fit. This will show the
most overfitting and underfitting items.

<<>>=
data(science.items)
data(science.people)

scores <- science.people$trait
params <- cbind(1, science.items[,c(paste0('b',1:2))])
rownames(params) <- as.character(science.items$name)
items<-list()
items[1:25] <- rpf.gpcm(3)
responses <- science.people[,as.character(science.items$name)]
rownames(responses) <- science.people$name
fit <- rpf.1dim.fit(items, params, responses, scores, 2)
head(fit[order(-fit$outfit),])
tail(fit[order(-fit$outfit),])
@ 

And we can do the same with people.

<<>>=
fit <- rpf.1dim.fit(items, params, responses, scores, 1)
head(fit[order(-fit$outfit),])
tail(fit[order(-fit$outfit),])
@ 


\end{document}