File: MCMCprobit.R

package info (click to toggle)
r-cran-mcmcpack 1.7-1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,492 kB
  • sloc: cpp: 21,271; ansic: 4,372; makefile: 2; sh: 1
file content (320 lines) | stat: -rw-r--r-- 12,730 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
##########################################################################
## sample from the posterior distribution of a probit
## model in R using linked C++ code in Scythe
##
## ADM and KQ 5/21/2002
## Modified to meet new developer specification 7/26/2004 KQ
## Modified for new Scythe and rngs 7/26/2004 KQ
## Modified to handle marginal likelihood calculation 1/27/2006 KQ
##
## This software is distributed under the terms of the GNU GENERAL
## PUBLIC LICENSE Version 2, June 1991.  See the package LICENSE
## file for more information.
##
## Copyright (C) 2003-2007 Andrew D. Martin and Kevin M. Quinn
## Copyright (C) 2007-present Andrew D. Martin, Kevin M. Quinn,
##    and Jong Hee Park
##########################################################################

#' Markov Chain Monte Carlo for Probit Regression
#'
#' This function generates a sample from the posterior distribution of a probit
#' regression model using the data augmentation approach of Albert and Chib
#' (1993). The user supplies data and priors, and a sample from the posterior
#' distribution is returned as an mcmc object, which can be subsequently
#' analyzed with functions provided in the coda package.
#'
#' \code{MCMCprobit} simulates from the posterior distribution of a probit
#' regression model using data augmentation. The simulation proper is done in
#' compiled C++ code to maximize efficiency.  Please consult the coda
#' documentation for a comprehensive list of functions that can be used to
#' analyze the posterior sample.
#'
#' The model takes the following form:
#'
#' \deqn{y_i \sim \mathcal{B}ernoulli(\pi_i)}
#'
#' Where the inverse link function:
#'
#' \deqn{\pi_i = \Phi(x_i'\beta)}
#'
#' We assume a multivariate Normal prior on \eqn{\beta}:
#'
#' \deqn{\beta \sim \mathcal{N}(b_0,B_0^{-1})}
#'
#' See Albert and Chib (1993)
#' for estimation details.
#'
#'
#' @param formula Model formula.
#'
#' @param data Data frame.
#'
#' @param burnin The number of burn-in iterations for the sampler.
#'
#' @param mcmc The number of Gibbs iterations for the sampler.
#'
#' @param thin The thinning interval used in the simulation.  The number of
#' Gibbs iterations must be divisible by this value.
#'
#' @param verbose A switch which determines whether or not the progress of the
#' sampler is printed to the screen.  If \code{verbose} is greater than 0 the
#' iteration number and the betas are printed to the screen every
#' \code{verbose}th iteration.
#'
#' @param seed The seed for the random number generator.  If NA, the Mersenne
#' Twister generator is used with default seed 12345; if an integer is passed
#' it is used to seed the Mersenne twister.  The user can also pass a list of
#' length two to use the L'Ecuyer random number generator, which is suitable
#' for parallel computation.  The first element of the list is the L'Ecuyer
#' seed, which is a vector of length six or NA (if NA a default seed of
#' \code{rep(12345,6)} is used).  The second element of list is a positive
#' substream number. See the MCMCpack specification for more details.
#'
#' @param beta.start The starting value for the \eqn{\beta} vector.  This
#' can either be a scalar or a column vector with dimension equal to the number
#' of betas.  If this takes a scalar value, then that value will serve as the
#' starting value for all of the betas. The default value of NA will use the
#' maximum likelihood estimate of \eqn{\beta} as the starting value.
#'
#' @param b0 The prior mean of \eqn{\beta}.  This can either be a scalar
#' or a column vector with dimension equal to the number of betas. If this
#' takes a scalar value, then that value will serve as the prior mean for all
#' of the betas.
#'
#' @param B0 The prior precision of \eqn{\beta}.  This can either be a
#' scalar or a square matrix with dimensions equal to the number of betas.  If
#' this takes a scalar value, then that value times an identity matrix serves
#' as the prior precision of \eqn{\beta}. Default value of 0 is
#' equivalent to an improper uniform prior on \eqn{\beta}.
#'
#' @param bayes.resid Should latent Bayesian residuals (Albert and Chib, 1995)
#' be returned? Default is FALSE meaning no residuals should be returned.
#' Alternatively, the user can specify an array of integers giving the
#' observation numbers for which latent residuals should be calculated and
#' returned. TRUE will return draws of latent residuals for all observations.
#'
#' @param marginal.likelihood How should the marginal likelihood be calculated?
#' Options are: \code{none} in which case the marginal likelihood will not be
#' calculated, \code{Laplace} in which case the Laplace approximation (see Kass
#' and Raftery, 1995) is used, or \code{Chib95} in which case Chib (1995)
#' method is used.
#'
#' @param ... further arguments to be passed
#'
#' @return An mcmc object that contains the posterior sample.  This object can
#' be summarized by functions provided by the coda package.
#'
#' @export
#'
#' @seealso \code{\link[coda]{plot.mcmc}},\code{\link[coda]{summary.mcmc}},
#' \code{\link[stats]{glm}}
#'
#' @references Albert, J. H. and S. Chib. 1993. ``Bayesian Analysis of Binary
#' and Polychotomous Response Data.'' \emph{J. Amer. Statist. Assoc.} 88,
#' 669-679
#'
#' Albert, J. H. and S. Chib. 1995. ``Bayesian Residual Analysis for Binary
#' Response Regression Models.'' \emph{Biometrika.} 82, 747-759.
#'
#' Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park. 2011.  ``MCMCpack:
#' Markov Chain Monte Carlo in R.'', \emph{Journal of Statistical Software}.
#' 42(9): 1-21.  \doi{10.18637/jss.v042.i09}.
#'
#' Siddhartha Chib. 1995. ``Marginal Likelihood from the Gibbs Output.''
#' \emph{Journal of the American Statistical Association}. 90: 1313-1321.
#' <doi: 10.1080/01621459.1995.10476635>
#'
#' Daniel Pemstein, Kevin M. Quinn, and Andrew D. Martin.  2007.  \emph{Scythe
#' Statistical Library 1.0.} \url{http://scythe.wustl.edu.s3-website-us-east-1.amazonaws.com/}.
#'
#' Martyn Plummer, Nicky Best, Kate Cowles, and Karen Vines. 2006.  ``Output
#' Analysis and Diagnostics for MCMC (CODA)'', \emph{R News}. 6(1): 7-11.
#' \url{https://CRAN.R-project.org/doc/Rnews/Rnews_2006-1.pdf}.
#'
#' @keywords models
#'
#' @examples
#'
#'    \dontrun{
#'    data(birthwt)
#'    out1 <- MCMCprobit(low~as.factor(race)+smoke, data=birthwt,
#'    	b0 = 0, B0 = 10, marginal.likelihood="Chib95")
#'    out2 <- MCMCprobit(low~age+as.factor(race), data=birthwt,
#'    	b0 = 0, B0 = 10,  marginal.likelihood="Chib95")
#'    out3 <- MCMCprobit(low~age+as.factor(race)+smoke, data=birthwt,
#'    	b0 = 0, B0 = 10,  marginal.likelihood="Chib95")
#'    BayesFactor(out1, out2, out3)
#'    plot(out3)
#'    summary(out3)
#'    }
#'
"MCMCprobit" <-
  function(formula, data=NULL, burnin = 1000, mcmc = 10000,
           thin = 1, verbose = 0, seed = NA, beta.start = NA,
           b0 = 0, B0 = 0, bayes.resid=FALSE,
           marginal.likelihood = c("none", "Laplace", "Chib95"), ...) {

    ## checks
    check.offset(list(...))
    check.mcmc.parameters(burnin, mcmc, thin)

    cl <- match.call()


    ## seeds
    seeds <- form.seeds(seed)
    lecuyer <- seeds[[1]]
    seed.array <- seeds[[2]]
    lecuyer.stream <- seeds[[3]]

    ## form response and model matrices
    holder <- parse.formula(formula, data=data)
    Y <- holder[[1]]
    X <- holder[[2]]
    xnames <- holder[[3]]
    K <- ncol(X)  # number of covariates

    ## starting values and priors
    beta.start <- coef_start(beta.start, K, formula,
                             family=binomial(link="probit"), data)
    mvn.prior <- form.mvn.prior(b0, B0, K)
    b0 <- mvn.prior[[1]]
    B0 <- mvn.prior[[2]]

    ## get marginal likelihood argument
    marginal.likelihood  <- match.arg(marginal.likelihood)
    B0.eigenvalues <- eigen(B0)$values
    if (min(B0.eigenvalues) < 0){
      stop("B0 is not positive semi-definite.\nPlease respecify and call again.\n")
    }
    if (isTRUE(all.equal(min(B0.eigenvalues), 0))){
      if (marginal.likelihood != "none"){
        warning("Cannot calculate marginal likelihood with improper prior\n")
        marginal.likelihood <- "none"
      }
    }
    logmarglike <- NULL


    ## residuals setup
    resvec <- NULL
    if (is.logical(bayes.resid) && bayes.resid==TRUE){
      resvec <- matrix(1:length(Y), length(Y), 1)
    }
    else if (!is.logical(bayes.resid)){
      resvec <- matrix(bayes.resid, length(bayes.resid), 1)
      if (min(resvec %in% 1:length(Y)) == 0){
        cat("Elements of bayes.resid are not valid row numbers.\n")
        stop("Check data and call MCMCprobit() again.\n")
      }
    }

    ## y \in {0, 1} error checking
    if (sum(Y!=0 & Y!=1) > 0) {
      cat("Elements of Y equal to something other than 0 or 1.\n")
      stop("Check data and call MCMCprobit() again.\n")
    }

    ## if Chib95 is true
    chib <- 0
    if (marginal.likelihood == "Chib95"){
      chib <- 1
    }

    posterior <- NULL

    if (is.null(resvec)){
      ## define holder for posterior density sample
      sample <- matrix(data=0, mcmc/thin, dim(X)[2] )

      ## call C++ code to draw sample
      auto.Scythe.call(output.object="posterior", cc.fun.name="cMCMCprobit",
                       sample.nonconst=sample, Y=Y, X=X,
                       burnin=as.integer(burnin),
                       mcmc=as.integer(mcmc), thin=as.integer(thin),
                       lecuyer=as.integer(lecuyer),
                       seedarray=as.integer(seed.array),
                       lecuyerstream=as.integer(lecuyer.stream),
                       verbose=as.integer(verbose), betastart=beta.start,
                       b0=b0, B0=B0, logmarglikeholder.nonconst = as.double(0.0),
                       chib = as.integer(chib))

      ## get marginal likelihood if Chib95
      if (marginal.likelihood == "Chib95"){
        logmarglike <- posterior$logmarglikeholder
      }
      ## marginal likelihood calculation if Laplace
      if (marginal.likelihood == "Laplace"){
        theta.start <- beta.start
        optim.out <- optim(theta.start, logpost.probit, method="BFGS",
                           control=list(fnscale=-1),
                           hessian=TRUE, y=Y, X=X, b0=b0, B0=B0)

        theta.tilde <- optim.out$par
        beta.tilde <- theta.tilde[1:K]

        Sigma.tilde <- solve(-1*optim.out$hessian)

        logmarglike <- (length(theta.tilde)/2)*log(2*pi) +
          log(sqrt(det(Sigma.tilde))) +
            logpost.probit(theta.tilde, Y, X, b0, B0)

      }

      ## put together matrix and build MCMC object to return
      output <- form.mcmc.object(posterior, names=xnames,
                                 title="MCMCprobit Posterior Sample",
                                 y=Y, call=cl,
                                 logmarglike=logmarglike)

    }
    else{
      # define holder for posterior density sample
      sample <- matrix(data=0, mcmc/thin, dim(X)[2]+length(resvec) )

      ## call C++ code to draw sample
      auto.Scythe.call(output.object="posterior", cc.fun.name="MCMCprobitres",
                       sample.nonconst=sample, Y=Y, X=X, resvec=resvec,
                       burnin=as.integer(burnin),
                       mcmc=as.integer(mcmc), thin=as.integer(thin),
                       lecuyer=as.integer(lecuyer),
                       seedarray=as.integer(seed.array),
                       lecuyerstream=as.integer(lecuyer.stream),
                       verbose=as.integer(verbose), betastart=beta.start,
                       b0=b0, B0=B0,  logmarglikeholder.nonconst= as.double(0.0),
                       chib = as.integer(chib))

      ## get marginal likelihood if Chib95
      if (marginal.likelihood == "Chib95"){
        logmarglike <- posterior$logmarglikeholder
      }
      ## marginal likelihood calculation if Laplace
      if (marginal.likelihood == "Laplace"){
        theta.start <- beta.start
        optim.out <- optim(theta.start, logpost.probit, method="BFGS",
                           control=list(fnscale=-1),
                           hessian=TRUE, y=Y, X=X, b0=b0, B0=B0)

        theta.tilde <- optim.out$par
        beta.tilde <- theta.tilde[1:K]

        Sigma.tilde <- solve(-1*optim.out$hessian)

        logmarglike <- (length(theta.tilde)/2)*log(2*pi) +
          log(sqrt(det(Sigma.tilde))) +
            logpost.probit(theta.tilde, Y, X, b0, B0)

      }

      ## put together matrix and build MCMC object to return
      xnames <- c(xnames, paste("epsilonstar", as.character(resvec), sep="") )

      output <- form.mcmc.object(posterior, names=xnames,
                                 title="MCMCprobit Posterior Sample",
                                 y=Y, call=cl, logmarglike=logmarglike)

    }
    return(output)

  }