1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606
|
# design.trans FEH 4 Oct 90
# Contains individual functions for creating sub-design matrices from
# vectors, for use with design().
# code name
# 1 asis leave variable coded as-is, get default name, label,
# limits, values
# 2 pol polynomial expansion
# 3 lsp linear spline
# 4 rcs restricted cubic spline
# 5 catg category
# 7 scored scored ordinal variable
# 8 strat stratification factor
#10 matrx matrix factor - used to keep groups of variables together
# as one factor
#
# des.args generic function for retrieving arguments
# set.atr generic function to set attributes of sub design matrix
# options sets default options
# [.Design subsets variables, keeping attributes
# gparms retrieve parms for design or fit object. Not used by any
# of these routines, but used by analyst to force a new
# fit to use same parms as previous fit for a given factor.
# value.chk
# Check a given list of values for a factor for validity,
# or if list is NA, return list of possible values
# var.inner - stripped down terms.inner, returns character strings
#
# Default label is attr(x,"label") or argument name if label= omitted.
# First argument can be as follows, using asis as an example:
# asis(x, ...) name="x", label=attr(x,"label") or "x"
# if NULL
# asis(w=abs(q), ...) name="w", label=attr(x,"label") or "w"
# asis(age=xx) name="age", label=label attr or "age"
# asis(x,label="Age, yr") name="x", label="Age, yr"
# asis(age=x,label= name="age", label="Age in Years"
# "Age in Years")
# matrx(dx=cbind(dx1=dx1,dx2=dx2)) name="dx", individual names
# dx1 and dx2
# For matrx, default label is list of column names.
# An additional argument, name, can be used to instead specify the name of the
# variable. This is used when the functions are implicitly called from within
# design().
#
# The routines define dimnames for the returned object with column
# names = expanded list of names based on original name.
# assume.code is added to attributes of returned matrix. Is 1-8
# corresponding to transformation routines asis-strat above, 10 for matrx.
# Adds attribute nonlinear, one element/column of expanded design matrix.
# nonlinear=T if column is a nonlinear expansion of original variable,
# F if linear part or not applicable
# (e.g. dummy variable for category -> F). For matrx, all are linear.
#
# System options used: nknots for default number of knots in restr. cubic spline
# and poly.degree, default degree of polynomials
# Second argument to routines is the parameters (parms) of the
# transformation (except for asis), defined as follows:
#
# poly order of polynomial, e.g. 2 for quadratic
# lsp list of knots
# rcs number of knots if parms=1 element (-> compute default
# knot locations), actual knot locations if >2 elements
# (2 knots not allowed for restr. cubic spline)
# catg list of value labels corresponding to values 1,2,3,...
# scored list of unique values of the variable
# strat list of value labels corresponding to values 1,2,3
#
# For catg and strat, parms are omitted if the variable is character or
# is already an S category variable.
#
# Argument retrieval: After variable and optional parms, other variables
# may be named or positional, in the following order: label, name.
# For matrx, parms are not allowed.
#
# Function to return list with elements name, parms, label.
# corresponding to arguments in call to asis, etc. parms=NULL if
# parms.allowed=F. Reason for going to this trouble is that first arg to
# asis, etc. is allowed to be a named argument to set a new name for it.
# With ordinary argument fetching, remaining arguments would have to be
# named. This logic allows them to be named or positional in order:
# parms (if allowed), label.
#
# If options(Design.attr) is non-null, looks up attributes in elements
# in Design.attr corresponding to the name of the current variable.
# This is used to get predicted values when the original fitting
# function (e.g., rcs) derived parms of the transformation from the data.
#
des.args <- function(x,parms.allowed,call.args) {
nam <- names(x)
if(!length(nam)) nam <- rep("",5)
name <- nam[1]
if(name=="") {
form <- formula(call("~",as.name("...y..."),call.args[[2]]))
name <- var.inner(form)
}
pa <- parms.allowed
argu <- function(x,karg, arg.name, parms.all, nm) {
if(!parms.all) karg <- karg-1
k <- charmatch(arg.name,nm,0) #k>0 : named arg found
## Added karg <= length(x) 9Apr02 for R; R doesn't return NULL
## like S+
if(k>0) x[[k]] else
if(length(nm) < karg || nm[karg]!="") NULL else
if(karg <= length(x)) x[[karg]] else NULL
}
if(parms.allowed) parms <- argu(x,2,"parms",pa,nam) else {
parms <- NULL
if(charmatch("parms",nam,0)>0)
stop(paste("parms not allowed for",as.character(call.args[1])))
}
nm <- argu(x,5,"name",pa,nam)
if(!is.null(nm)) name <- nm
if(!is.null(.Options$Design.attr)) {
atr <- .Options$Design.attr
i <- charmatch(name, atr$name, 0)
if(is.null(i))stop("program logic error for options(factor.number)")
parmi <- atr$parms[[name]]
return(list(name=atr$name[i],parms=parmi,label=atr$label[i],
units=atr$units[i])) # added units 9Jun99
}
label <- argu(x,3,"label",pa,nam)
atx <- attributes(x[[1]]) # 9Jun99
if(is.null(label)) label <- atx$label # 9Jun99 attr(x[[1]],"label")
if(is.null(label)) label <- name
list(name=name,parms=parms,label=label,units=atx$units) #9Jun99
}
# Function to list all attributes of new sub-design matrix
set.atr <- function(xd,x,z,colnames,assume,code,parms,nonlinear) {
#Note: x argument isn't used
## Added z$units 9Jun99
if(is.matrix(xd))
list(dim=dim(xd),dimnames=list(NULL,colnames),class="Design",
name=z$name, label=z$label, assume=assume, assume.code=code,
parms=parms,
nonlinear=nonlinear,colnames=colnames,units=z$units)
else list(dim=dim(xd),class="Design",
name=z$name, label=z$label, assume=assume, assume.code=code,
parms=parms,
nonlinear=nonlinear,colnames=colnames,units=z$units)
}
# asis transformation - no transformation
asis<-function(...) {
cal <- sys.call()
xx <- list(...)
z <- des.args(xx, FALSE, cal)
xd <- xx[[1]]
if(is.factor(xd)) attr(xd,"class") <- NULL
if(!(is.numeric(xd) | is.logical(xd))) stop(paste(z$name,"is not numeric"))
storage.mode(xd) <- "single"
attributes(xd) <- set.atr(xd,xd,z,z$name,"asis",1,NULL,FALSE)
xd
}
# matrx transformation - no transformation, keep original vars as matrix
# column names as parameter names, parms=column medians
matrx <- function(...) {
cal <- sys.call()
xx <- list(...)
z <- des.args(xx, FALSE, cal)
xd <- xx[[1]]
nc <- ncol(xd)
if(!is.matrix(xd)) stop(paste(z$name,"is not a matrix"))
storage.mode(xd) <- "single"
colname <- dimnames(xd)[[2]]
if(length(colname)==0) colname <- paste(z$name,'[',1:nc,']',sep="") else
if(z$label==z$name) z$label <- paste(colname,collapse=",")
parms <- single(nc)
for(i in 1:nc)parms[i] <- median(xd[,i], na.rm=TRUE)
attributes(xd) <- set.atr(xd,NULL,z,colname,"matrix",10,parms,rep(FALSE,nc))
xd
}
# Polynomial expansion
pol <- function(...) {
cal <- sys.call()
xx <- list(...)
z <- des.args(xx,TRUE,cal)
x <- xx[[1]]
if(!is.numeric(x)) stop(paste(z$name,"is not numeric"))
poly.degree <- .Options$poly.degree
if(is.null(poly.degree)) poly.degree <- 2
if(!is.null(z$parms)) poly.degree <- z$parms
if(poly.degree<2)stop("order for polynomial must be 2,3,...")
xd <- matrix(single(1),nrow=length(x),ncol=poly.degree)
nam <- z$name
name <- character(poly.degree)
name[1] <- nam
xd[,1] <- x
for(j in 2:poly.degree) {
name[j] <- paste(nam,"^",j,sep="")
xd[,j] <- x^j }
attributes(xd) <- set.atr(xd,x,z,name,"polynomial",2,poly.degree,
c(FALSE,rep(TRUE,poly.degree-1)))
xd
}
# Linear spline expansion
lsp <- function(...) {
cal <- sys.call()
xx <- list(...)
z <- des.args(xx,TRUE,cal)
x <- xx[[1]]
if(!is.numeric(x)) stop(paste(z$name,"is not numeric"))
parms <- z$parms
if(is.null(parms) || any(is.na(parms)))
stop("must specify knots for linear spline")
suffix <- NULL
nam <- z$name
lp <- length(parms)
xd <- matrix(single(1),nrow=length(x),ncol=lp+1)
name <- character(lp+1)
xd[,1] <- x
name[1] <- nam
for(j in 1:lp) {
suffix <- paste(suffix,"'",sep="")
name[j+1] <- paste(nam,suffix,sep="")
xd[,j+1] <- pmax(x-parms[j],0)
}
attributes(xd) <- set.atr(xd,x,z,name,"lspline",3,parms,c(FALSE,rep(TRUE,lp)))
xd
}
# Restricted cubic spline expansion
rcs <- function(...) {
cal <- sys.call()
xx <- list(...)
z <- des.args(xx,TRUE,cal)
x <- xx[[1]]
if(!is.numeric(x)) stop(paste(z$name,"is not numeric"))
nknots <- .Options$nknots
if(is.null(nknots)) nknots <- 5
parms <- z$parms
if(is.null(parms)) parms <- nknots
if(length(parms)==1) {
nknots <- parms
knots <- NULL }
else { nknots <- length(parms)
knots <- parms }
if(is.null(knots)) {
xd <- rcspline.eval(x,nk=nknots,inclx=TRUE)
knots <- attr(xd,"knots")
} else
xd <- rcspline.eval(x,knots=knots,inclx=TRUE)
parms <- knots
nknots <- length(parms)
name <- character(nknots-1)
nam <- z$name
name[1] <- nam
suffix <- NULL
for(j in 1:(nknots-2)) {
suffix <- paste(suffix,"'",sep="")
name[j+1] <- paste(nam,suffix,sep="")
}
attributes(xd) <- set.atr(xd,x,z,name,"rcspline",4,parms,
c(FALSE,rep(TRUE,nknots-2)))
xd
}
# Category variable
catg <- function(...) {
cal <- sys.call()
xx <- list(...)
z <- des.args(xx,TRUE,cal)
nam <- z$name
y <- xx[[1]]
parms <- z$parms
if(is.category(y) & !is.factor(y)) oldClass(y) <- "factor"
if(is.null(parms) & is.category(y)) parms <- levels(y)
if(is.null(parms)) {
if(is.character(y)) parms <- sort(unique(y[y!="" & y!=" "]))
else parms <- as.character(sort(unique(y[!is.na(y)])))
}
if(!is.factor(y))x <- factor(y, levels=parms) else x <- y
if((is.character(y) && any(y!="" & y!=" " & is.na(x))) ||
(is.numeric(y) & any(!is.na(y) & is.na(x))))
stop(paste(nam,"has non-allowable values"))
if(all(is.na(x)))stop(paste(nam,"has no non-missing observations"))
lp <- length(parms)
if(lp<2)stop(paste(nam,"has <2 category levels"))
attributes(x) <- list(levels=parms,class=c("factor","Design"),
name=nam,label=z$label,assume="category",assume.code=5,
parms=parms,nonlinear=rep(FALSE,lp-1),
colnames=paste(nam,"=",parms[-1],sep=""))
x
}
# Scored expansion parms=unique values
scored <- function(...) {
cal <- sys.call()
xx <- list(...)
z <- des.args(xx,TRUE,cal)
parms <- z$parms
nam <- z$name
x <- xx[[1]]
if(is.category(x)) {
levx <- as.single(levels(x))
if(any(is.na(levx))) stop(paste("levels for",nam,"not numeric"))
if(is.null(parms)) parms <- levx
# .Options$warn <- -1 #suppress warning about NAs 14Sep00
oldopt <- options(warn=-1)
on.exit(options(oldopt))
x <- levx[x] }
if(!is.numeric(x))stop(paste(nam,"is not a numeric variable"))
y <- sort(unique(x[!is.na(x)]))
if(is.null(parms)) parms <- y
parms <- sort(parms)
n.unique <- length(parms)
if(n.unique<3) stop("scored specified with <3 levels")
lp <- length(parms)-1
#Form contrast matrix of the form linear | dummy | dummy ...
xd <- matrix(single(1),nrow=length(y),ncol=lp)
xd[,1] <- y
name <- character(lp)
name[1] <- nam
i <- 1
for(k in parms[3:length(parms)]) {
i <- i+1
name[i] <- paste(nam,"=",k,sep="")
xd[,i] <- y==k }
dimnames(xd) <- list(NULL, name)
x <- ordered(x)
if(!.SV4.) oldClass(x) <- c("ordered","factor","Design") # 17Jul01
attributes(x) <- c(attributes(x),
list(name=nam,label=z$label,assume="scored",assume.code=7,
parms=parms,
nonlinear=c(FALSE,rep(TRUE,lp-1)),colnames=name,contrasts=xd))
x
}
# strat parms=value labels
strat <- function(...) {
cal <- sys.call()
xx <- list(...)
y <- xx[[1]]
z <- des.args(xx,TRUE,cal)
if(is.category(y) & !is.factor(y)) oldClass(y) <- "factor"
parms <- z$parms
if(is.null(parms)) parms <- levels(y)
if(is.null(parms)) {
if(is.character(y)) parms <- sort(unique(y[y!="" & y!=" "]))
else parms <- as.character(sort(unique(y[!is.na(y)])))}
nam <- z$name
if(!is.factor(y)) x <- factor(y,levels=parms) else x <- y
if((is.character(y) & any(y!="" & y!=" " & is.na(x))) ||
(is.numeric(y) & any(!is.na(y) & is.na(x))))
stop(paste(nam," has a non-allowable value"))
name <- nam
attributes(x) <- list(levels=parms,class=c("factor","Design"),
name=nam, label=z$label, assume="strata", assume.code=8,
parms=parms, nonlinear=FALSE)
x
}
#Function to subscript a variable, keeping attributes
#Is similar to [.smooth, but does not keep attribute NAs
"[.Design" <- function(x, ..., drop = FALSE) {
ats <- attributes(x)
ats$dimnames <- NULL
ats$dim <- NULL
ats$names <- NULL
oldClass(x) <- NULL
y <- x[..., drop = drop]
attributes(y) <- c(attributes(y), ats)
y }
#Function to get parms of factor in fit or design object "fit" with name
#given by second argument (without quotes)
gparms <- function(fit,...) {
name <- as.character(sys.call())[3]
atr <- fit$Design
if(!length(atr)) atr <- getOldDesign(fit) # 13Apr01
atr$parms[[name]]
}
#value.chk - if x=NA, returns list of possible values of factor i defined
# in object f's attributes. For continuous factors, returns n values
# in default prediction range. Use n=0 to return trio of effect
# limits. Use n<0 to return pretty(plotting range,nint=-n).
# If type.range="full" uses the full range instead of default plot rng.
# If x is not NA, checks that list to see that each value is allowable
# for the factor type, and returns x
# Last argument is object returned from Getlim (see Design.Misc)
# First argument is Design list
value.chk <- function(f, i, x, n, limval, type.range="plot") {
as <- f$assume.code[i]
name <- f$name[i]
parms <- f$parms[[name]]
isna <- length(x)==1 && is.na(x)
values <- limval$values[[name]]
charval <- !is.null(values) && is.character(values)
if(isna & as!=7) {
if(is.null(limval) || match(name, dimnames(limval$limits)[[2]], 0)==0 ||
is.na(limval$limits["Adjust to",name]))
stop(paste("variable",name,"does not have limits defined by datadist"))
limits <- limval$limits[,name]
lim <- if(type.range=="full") limits[6:7] else limits[4:5]
}
if(as<5 | as==6) {
if(isna) {
if(is.null(values)) {
if(n==0) x <- limits[1:3] else {
if(n>0) x <- seq(oldUnclass(lim[1]), #handles chron
oldUnclass(lim[2]),length=n)
else x <- pretty(oldUnclass(lim[1:2]), n=-n)
oldClass(x) <- oldClass(lim)
}
} else x <- values
} else {
if(is.character(x) && !charval)
stop(paste("character value not allowed for variable",
name)) #Allow any numeric value
if(charval) {
j <- match(x, values, 0)
if(any(j==0))
stop(paste("illegal values for categorical variable:",
paste(x[j==0],collapse=" "),"\nPossible levels:",
paste(values,collapse=" ")))
}
}
}
else if(as==5|as==8) {
# if(isna) x <- lim[1]:lim[2] else
if(isna) x <- parms else {
j <- match(x, parms, 0) #match converts x to char if needed
if(any(j==0))
stop(paste("illegal levels for categorical variable:",
paste(x[j==0],collapse=" "),"\nPossible levels:",
paste(parms,collapse=" ")))
x
}
}
else if(as==7) {
if(isna) x <- parms
else if(is.character(x))stop(paste("character value not allowed for",
"variable",name))
else {
j <- match(x, parms, 0)
if(any(j==0))
stop(paste("illegal levels for categorical variable:",
paste(x[j==0],collapse=" "),"\n","Possible levels:",
paste(parms,collapse=" ")))
}
}
invisible(x)
}
# This is a stripped-down version of terms.inner
# Returns the inner-most variable expression
# Moved to Hmisc 2Apr01
if(FALSE) var.inner <- function(formula)
{
if(!inherits(formula,"formula")) formula <- attr(formula,"formula")
if(length(formula) > 2)
formula[[2]] <- NULL
maxch <- 100
z <- .C("all_names",
list(formula),
as.integer(FALSE),
labels = character(maxch),
n = as.integer(maxch),
expr = character(maxch),
as.logical(TRUE),
NAOK = TRUE)
z$labels[1:z$n]
}
#ia.operator.s - restricted interaction operators for use with Design
#F. Harrell 8 Nov 91
#Set up proper attributes for a restricted interaction for a model
#such as y ~ rcs(x1) + rcs(x2) + x1 %ia% x2 or x1 %ia% rcs(x2)
#or rcs(x1) %ia% x2
"%ia%" <- function(x1, x2) {
a1 <- attributes(x1)
a2 <- attributes(x2)
nam <- as.character(sys.call())[-1]
redo <- function(x,nam) {
if(is.null(attr(x,"assume.code"))) {
if(!is.null(oldClass(x)) && oldClass(x)[1]=="ordered")
x <- scored(x, name=nam)
else if(is.character(x) | is.category(x)) x <- catg(x, name=nam)
else if(is.matrix(x)) x <- matrx(x, name=nam)
else x <- asis(x, name=nam) }
ass <- attr(x,"assume.code")
nam <- attr(x,"name")
if(ass==5) {
colnames <- attr(x,"colnames")
len <- length(attr(x,"parms"))-1 }
else if(ass==8) {
prm <- attr(x,"parms")
colnames <- paste(nam,"=",prm[-1],sep="")
len <- length(prm)-1 }
else if(ass==7) {
prm <- attr(x,"parms")
colnames <- c(nam,paste(nam,"=",prm[-(1:2)],sep=""))
len <- length(prm)-1 }
else {
if(is.null(ncol(x))) {
len <- 1
colnames <- nam } else {
colnames <- dimnames(x)[[2]]
len <- ncol(x) }
}
attr(x,"colnames") <- colnames
attr(x,"len") <- len
if(ass==8) attr(x,"nonlinear") <- rep(FALSE, len)
x }
x1 <- redo(x1,nam[1])
x2 <- redo(x2,nam[2])
a1 <- attributes(x1)
a2 <- attributes(x2)
n1 <- a1$colnames
n2 <- a2$colnames
nl1 <- a1$nonlinear
nl2 <- a2$nonlinear
as1 <- a1$assume.code
as2 <- a2$assume.code
l1 <- a1$len
l2 <- a2$len
if(any(nl1) & any(nl2)) nc <- l1+l2-1
else nc <- l1*l2
if(is.matrix(x1)) nr <- nrow(x1)
else nr <- length(x1)
x <- matrix(single(1),nrow=nr,ncol=nc)
name <- character(nc)
parms <- matrix(integer(1),nrow=2,ncol=nc+1)
nonlinear <- logical(nc)
k <- 0
if(!is.factor(x1)) x1 <- as.matrix(x1)
if(!is.factor(x2)) x2 <- as.matrix(x2)
for(i in 1:l1) {
if(as1==5 | as1==8) x1i <- oldUnclass(x1)==(i+1)
else x1i <- x1[,i]
for(j in 1:l2) {
#Remove doubly nonlinear terms
if(nl1[i] & nl2[j]) break
k <- k + 1
if(as2==5 | as2==8) x2j <- oldUnclass(x2)==(j+1)
else x2j <- x2[,j]
x[,k] <- x1i * x2j
name[k] <- paste(n1[i],"*",n2[j])
parms[,k+1] <- c(nl1[i],nl2[j])
nonlinear[k] <- nl1[i] | nl2[j]
} }
dimnames(x) <- list(NULL, name)
attr(x,"ia") <- c(a1$name, a2$name)
attr(x,"parms") <- parms
attr(x,"nonlinear") <- nonlinear
attr(x,"assume.code") <- 9
attr(x,"name") <- paste(a1$name,"*",a2$name)
attr(x,"label") <- attr(x,"name")
attr(x,"colnames") <- name
attr(x,"class") <- "Design"
storage.mode(x) <- "single"
x
}
|