File: subclassify.R

package info (click to toggle)
matchit 1.0-1-1
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 268 kB
  • ctags: 4
  • sloc: sh: 24; makefile: 12
file content (124 lines) | stat: -rw-r--r-- 4,443 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
subclassify <- function(formula,data,in.sample,pscore,nearest=TRUE,
                        match.matrix,subclass=0,sub.by="treat",
                        counter=TRUE, full = FALSE, full.options=list()){

  data <- eval(data,parent.frame())
  treata <- model.frame(formula,data)[,1,drop=FALSE]
  treat <- as.vector(treata[,1])
  names(treat) <- row.names(treata)

  if(full) { # full matching with propensity score
    if(counter){cat("Full Matching...")}
    full <- full.options
    if(!is.list(full.options)){
      warning("full.options must be a list; assuming defaults for full matching",call.=FALSE)
    } 
    if(is.null(full$min.controls)){
      full$min.controls <- 0
    }
    if(is.null(full$max.controls)){
      full$max.controls <- Inf
    }
    if(is.null(full$omit.fraction)){
      full$omit.fraction <- NULL
    }
    if(is.null(full$tol)){
      full$tol <- 0.01
    }
    if(is.null(full$subclass.indices)){
      full$subclass.indices <- NULL
    }
    notin <- names(full.options)[which(!names(full.options)%in%c("min.controls","max.controls",
                                "omit.fraction","omit.fraction",
                                "tol", "subclass.indices"))]
    if(!is.null(notin)){
      warning(paste(notin,collapse=" "), " in full.options invalid and ignored for full matching",call.=FALSE)
    }
    n1 <- length(treat[treat==1])
    n0 <- length(treat[treat==0])
    p1 <- pscore[treat==1]
    p0 <- pscore[treat==0]
    distance <- matrix(0, ncol=n0, nrow=n1)
    rownames(distance) <- row.names(treata)[treat==1]
    colnames(distance) <- row.names(treata)[treat==0]
    for (i in 1:n1)
      distance[i,] <- abs(p1[i]-p0)
    full <- as.matrix(fullmatch(distance,subclass.indices = full$subclass.indices,
                                min.controls = full$min.controls,
                                max.controls = full$max.controls,
                                omit.fraction = full$omit.fraction,
                                tol = full$tol))
    psclass <- full[pmatch(row.names(treata), row.names(full)),]
    psclass <- as.numeric(as.factor(psclass))
    names(psclass) <- row.names(treata)
    q <- NULL
    if(counter){cat("Done\n")}
  }
  else if(subclass) {
    if(counter){cat("Subclassifying...")}  
    n <- length(treat)
    if(nearest){
      match.matrix <- match.matrix[in.sample[treat==1],,drop=F]
      t.units <- row.names(match.matrix)[!is.na(match.matrix)]
      
      c.units <- na.omit(as.vector(as.matrix(match.matrix)))
      matched <-c(t.units,c.units)
      matched <- names(treat)%in%matched
    } else{
      matched <- rep(TRUE,n)}
    names(matched) <- names(treat)
    m1 <- matched[treat==1]
    m0 <- matched[treat==0]
    p1 <- pscore[treat==1][m1]
    p0 <- pscore[treat==0][m0]
    
    if(length(subclass)!=1 | (length(subclass)==1 &
               identical(subclass<1,TRUE))) {
      subclass <- sort(subclass)
      if (subclass[1]==0) subclass <- subclass[-1]
      if (subclass[length(subclass)]==1) subclass <- subclass[-length(subclass)]
      if(sub.by=="treat") {
        q <- c(0,quantile(p1,probs=c(subclass)),1)
      }
      else if(sub.by=="control") {
        q <- c(0,quantile(p0,probs=c(subclass)),1)
      }
      else if(sub.by=="all") {
        q <- c(0,quantile(pscore,probs=c(subclass)),1)
      }
      ##          else {stop("Must specify a valid sub.by",call.=FALSE)
      ##             }
    }
    else {
      ##        if(subclass<=0){stop("Subclass must be a positive vector",call.=FALSE)}
      sprobs <- seq(0,1,length=(round(subclass)+1))
      sprobs <- sprobs[2:(length(sprobs)-1)]
      if(sub.by=="treat")            {
        q <- c(0,quantile(p1,probs=sprobs,na.rm=TRUE),1)
      }
      else if(sub.by=="control") {
        q <- c(0,quantile(p0,probs=sprobs,na.rm=TRUE),1)
      }
      else if(sub.by=="all") {
        q <- c(0,quantile(pscore,probs=sprobs,na.rm=TRUE),1)
      }
      ##      else {stop("Must specify a valid sub.by",call.=FALSE)}
    }
    
    qbins <- length(q)-1
    psclass <- rep(0,n)
    names(psclass) <- names(treat)
    for (i in 1:qbins){
      q1 <- q[i]
      q2 <- q[i+1]
      psclass <- psclass+i*as.numeric(pscore<q2 & pscore>=q1)}
    
    ## Make sure not to assign subclass to discarded units
    psclass[in.sample==0] <- 0
    psclass[!matched] <- 0
    if(counter){cat("Done\n")}
  }  
  else {psclass <- NULL; q=NULL}

  return(list(psclass = psclass, q.cut = q))
}