File: ssllrm.Rd

package info (click to toggle)
r-cran-gss 2.1-3-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 1,740 kB
  • ctags: 1,400
  • sloc: fortran: 5,241; ansic: 1,388; makefile: 1
file content (131 lines) | stat: -rw-r--r-- 5,396 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
\name{ssllrm}
\alias{ssllrm}
\title{Fitting Smoothing Spline Log-Linear Regression Models}
\description{
    Fit smoothing spline log-linear regression models.  The symbolic
    model specification via \code{formula} follows the same rules as in
    \code{\link{lm}}.
}
\usage{
ssllrm(formula, response, type=NULL, data=list(), weights, subset,
       na.action=na.omit, alpha=1, id.basis=NULL, nbasis=NULL,
       seed=NULL, random=NULL, prec=1e-7, maxiter=30, skip.iter=FALSE)
}
\arguments{
    \item{formula}{Symbolic description of the model to be fit.}
    \item{response}{Formula listing response variables.}
    \item{type}{List specifying the type of spline for each variable.
        See \code{\link{mkterm}} for details.}
    \item{data}{Optional data frame containing the variables in the
	model.}
    \item{weights}{Optional vector of weights to be used in the
	fitting process.}
    \item{subset}{Optional vector specifying a subset of observations
	to be used in the fitting process.}
    \item{na.action}{Function which indicates what should happen when
	the data contain NAs.}
    \item{alpha}{Parameter modifying GCV or Mallows' CL; larger absolute
        values yield smoother fits; negative value invokes a stable and
	more accurate GCV/CL evaluation algorithm but may take two to
	five times as long.  Ignored when \code{method="m"} are
	specified.}
    \item{id.basis}{Index designating selected "knots".}
    \item{nbasis}{Number of "knots" to be selected.  Ignored when
	\code{id.basis} is supplied.}
    \item{seed}{Seed to be used for the random generation of "knots".
        Ignored when \code{id.basis} is supplied.}
    \item{random}{Input for parametric random effects in nonparametric
        mixed-effect models.  See \code{\link{mkran}} for details.}
    \item{prec}{Precision requirement for internal iterations.}
    \item{maxiter}{Maximum number of iterations allowed for
        internal iterations.}
    \item{skip.iter}{Flag indicating whether to use initial values of
        theta and skip theta iteration.  See \code{\link{ssanova}} for
	notes on skipping theta iteration.}
}
\details{
    The model is specified via \code{formula} and \code{response}, where
    \code{response} lists the response variables.  For example,
    \code{ssllrm(~y1*y2*x,~y1+y2)} prescribe a model of the form
    \deqn{
      log f(y1,y2|x) = g_{1}(y1) + g_{2}(y2) + g_{12}(y1,y2)
              + g_{x1}(x,y1) + g_{x2}(x,y2) + g_{x12}(x,y1,y2) + C(x)
    }
    with the terms denoted by \code{"y1"}, \code{"y2"}, \code{"y1:y2"},
    \code{"y1:x"}, \code{"y2:x"}, and \code{"y1:y2:x"}; the term(s) not
    involving response(s) are removed and the constant \code{C(x)} is
    determined by the fact that a conditional density integrates (adds)
    to one on the \code{y} axis.

    The model terms are sums of unpenalized and penalized
    terms. Attached to every penalized term there is a smoothing
    parameter, and the model complexity is largely determined by the
    number of smoothing parameters.

    A subset of the observations are selected as "knots."  Unless
    specified via \code{id.basis} or \code{nbasis}, the number of
    "knots" \eqn{q} is determined by \eqn{max(30,10n^{2/9})}, which is
    appropriate for the default cubic splines for numerical vectors.
}
\note{
    The responses, or y-variables, must be factors, and there must be at
    least one numerical x's.  For \code{response}, there is no difference
    between \code{~y1+y2} and \code{~y1*y2}.
    
    The results may vary from run to run. For consistency, specify
    \code{id.basis} or set \code{seed}.
}
\value{
    \code{ssllrm} returns a list object of class \code{"ssllrm"}.

    The method \code{\link{predict.ssllrm}} can be used to evaluate
    \code{f(y|x)} at arbitrary x, or contrasts of \code{log{f(y|x)}}
    such as the odds ratio along with standard errors.  The method
    \code{\link{project.ssllrm}} can be used to calculate the
    Kullback-Leibler projection for model selection.
}
\author{Chong Gu, \email{chong@stat.purdue.edu}}
\references{
    Gu, C. and Ma, P. (2011), Nonparametric regression with
    cross-classified responses.  \emph{The Canadian Journal of
      Statistics}, \bold{39}, 591--609.

    Chong Gu (2014), Smoothing Spline ANOVA Models: R Package gss.
    \emph{Journal of Statistical Software}, 58(5), 1-25. URL
    http://www.jstatsoft.org/v58/i05/.
}
\examples{
## Simulate data
test <- function(x)
        {.3*(1e6*(x^11*(1-x)^6)+1e4*(x^3*(1-x)^10))-2}
x <- (0:100)/100
p <- 1-1/(1+exp(test(x)))
y <- rbinom(x,3,p)
y1 <- as.ordered(y)
y2 <- as.factor(rbinom(x,1,p))
## Fit model
fit <- ssllrm(~y1*y2*x,~y1+y2)

## Evaluate f(y|x)
est <- predict(fit,data.frame(x=x),
               data.frame(y1=as.factor(0:3),y2=as.factor(rep(0,4))))
## f(y|x) at all y values (fit$qd.pt)
est <- predict(fit,data.frame(x=x))

## Evaluate contrast of log f(y|x)
est <- predict(fit,data.frame(x=x),odds=c(-1,.5,.5,0),
               data.frame(y1=as.factor(0:3),y2=as.factor(rep(0,4))),se=TRUE)
## Odds ratio log{f(0,0|x)/f(3,0|x)}
est <- predict(fit,data.frame(x=x),odds=c(1,-1),
               data.frame(y1=as.factor(c(0,3)),y2=as.factor(c(0,1))),se=TRUE)

## KL projection
kl <- project(fit,include=c("y2:x","y1:y2","y1:x","y2:x"))

## Clean up
\dontrun{rm(test,x,p,y,y1,y2,fit,est,kl)
dev.off()}
}
\keyword{smooth}
\keyword{models}
\keyword{regression}