File: datcheck.R

package info (click to toggle)
r-cran-erm 1.0-6-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,952 kB
  • sloc: f90: 401; ansic: 103; makefile: 8
file content (143 lines) | stat: -rwxr-xr-x 6,112 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
datcheck <- function(X, W, mpoints, groupvec, model){
  if(is.data.frame(X)){
    X <- as.matrix(X)   # X as data frame allowed
  }                  

  if(is.null(colnames(X))){                                 #determine item names
    if(mpoints > 1){
      mpind <- paste("t",rep(1:mpoints,each=(ncol(X)/mpoints),1),sep="") #time points
      itemind <- paste("I",1:(ncol(X)/mpoints),sep="")
      colnames(X) <- paste(itemind,mpind)
    } else {
      colnames(X) <- paste("I",1:ncol(X),sep="")                         #item labels
    }
  }
  if(is.null(rownames(X))) rownames(X) <- paste0("P", seq_len(nrow(X)))   #person labels

#----------------------- check groupvec --------------------------

  if((length(groupvec) > 1L) && (length(groupvec) != nrow(X))){
    stop("Wrong specification of groupvec!")
  }

  if(min(groupvec) != 1L){
    stop("Group specification must start with 1!")
  }

  if(length(unique(groupvec)) != (max(groupvec))){
    stop("Group vector is incorrectly specified (perhaps a category is missing)!")   # rh 2011-03-03
  }

  if((max(groupvec) > 1L) && (mpoints == 1)){
    stop(paste0("\n", prettyPaste("Model not identifiable! Group contrasts can only be imposed for repeated measurement designs.")))
  }

#  if ((length(groupvec) > 1) && any(is.na(X))) {
#    stop("Model with repeated measures, group specification and NAs cannot be computed!") }

#----------------------- check X --------------------------------
  allna.vec <- apply(X,2,function(y) {all(is.na(y))})                 #eliminate items with all NA's
  if (any(allna.vec)) {stop("There are items with full NA responses which must be deleted!")}

  allna.vec <- apply(X,1,function(y) {all(is.na(y))})                 #eliminate items with all NA's
  if (any(allna.vec)) {stop("There are persons with full NA responses which must be deleted!")}

  allna.vec <- apply(X,1,function(y) {sum(is.na(y))})
  if (any(allna.vec == (ncol(X)-1L))) {stop("Subjects with only 1 valid response must be removed!")}

  ri.min <- apply(X,2,min,na.rm=TRUE)                                 #if no 0 responses
  if(any(ri.min > 0)){
    warning(paste0(
      "\n",
      prettyPaste("The following items have no 0-responses:"),
      "\n",
      paste(colnames(X)[ri.min > 0], collapse=" "),
      "\n",
      prettyPaste("Responses are shifted such that lowest category is 0.")
    ), call. = FALSE, immediate.=TRUE)
  }
  X <- t(apply(X,1,function(y) {y-ri.min}))                           #shift down to 0

  ri <- apply(X,2,sum,na.rm=TRUE)                                     #item raw scores
  n.NA <- colSums(apply(X,2,is.na))                                   #number of NA's per column
  maxri <- (dim(X)[1]*(apply(X,2,max,na.rm=TRUE)))-n.NA               #maximum item raw scores with NA
  TFcol <- ((ri==maxri) | (ri==0))
  X.n <- X[,!TFcol]                                                   #new matrix with excluded items
  item.ex <- (seq_len(ncol(X)))[TFcol]                                     #excluded items
  if(length(item.ex) > 0) {
    if(mpoints == 1){
      warning(paste0(
        "\n",
        prettyPaste("The following items were excluded due to complete 0/full responses:"),
        "\n",
        paste(colnames(X)[item.ex], collapse=" ")
      ), call. = FALSE, immediate.=TRUE)
    } else {
      stop(paste0(
        "\n",
        "The following items show complete 0/full responses:",
        "\n",
        paste(colnames(X)[item.ex], collapse=" "),
        "\n",
        prettyPaste("Estimation cannot be performed! Delete the corresponding items for the other measurement points as well!")
      ), call. = FALSE)
    }
  }

  if ((model=="PCM") || (model=="LPCM")) {                         #check if there are missing categories for PCM (for RSM doesn't matter)
    tablist <- apply(X,2,function(x) list(as.vector(table(x))))
    tablen <- sapply(tablist,function(x) length(x[[1]]))
    xmax <- apply(X,2,max)+1
    indwrong <- which(tablen != xmax)
    if(length(indwrong) > 0){
      warning(paste0(
        "\n",
        prettyPaste("The following items do not have responses on each category:"),
        "\n",
        paste(colnames(X)[indwrong], collapse=" "),
        "\n",
        prettyPaste("Estimation may not be feasible. Please check data matrix!")
      ), call. = FALSE, immediate.=TRUE)
    }
  }


#-------------------------- ill conditioned for RM and LLTM --------------
  if ((model=="RM") || (model=="LLTM")) {
    if (length(table(X.n)) != 2L) stop("Dichotomous data matrix required!")
    k.t   <- dim(X.n)[2L]/mpoints                                    #check for each mpoint separately
    t.ind <- rep(seq_len(mpoints), 1L, each=k.t)
    X.nlv <- split(t(X.n),t.ind)                                  #split X due to mpoints
    cn.lv <- split(colnames(X.n),t.ind)
    X.nl  <- lapply(X.nlv,matrix,ncol=k.t,byrow=TRUE)
    for(i in seq_len(length(X.nl))) colnames(X.nl[[i]]) <- cn.lv[[i]]

    for(l in seq_len(mpoints)){                                       #check within mpoints
      X.nll <- X.nl[[l]]
      k <- ncol(X.nll)
      adj <- matrix(0, ncol=k, nrow=k)
      for(i in seq_len(k)) for(j in seq_len(k)) {
        adj[i,j]<- 1*any(X.nll[,i] > X.nll[,j], na.rm = TRUE)
      }
      cd  <- component.dist(adj, connected = "strong")
      cm  <- cd$membership
      cmp <- max(cm)
      if(cmp > 1L) {
        cmtab <- table(cm)
        maxcm.n <- as.numeric(names(cmtab)[cmtab!=max(cmtab)])
        suspcol <- (seq_len(length(cm)))[tapply(cm, seq_len(length(cm)), function(x){ any(maxcm.n == x) })]
        n.suspcol <- colnames(X.nll)[suspcol]
        stop(paste0(
          "\n",
          prettyPaste("Estimation stopped due to ill-conditioned data matrix X! Suspicious items:"),
          "\n",
          paste(n.suspcol, collapse=" ")
        ), call. = FALSE)
      }
    }
  }
#----------------------- end ill-conditioned check -------------------------------

  return(list(X = X.n, groupvec = groupvec))

}