File: Rtest

package info (click to toggle)
survival 2.29-1
links: PTS
area: main
in suites: etch, etch-m68k
size: 3,204 kB
ctags: 1,077
sloc: asm: 8,713; ansic: 6,928; sh: 22; makefile: 2
file content (1256 lines) | stat: -rw-r--r-- 41,100 bytes
parent folder | download | duplicates (2)
#
# Set up for the test
#
#dyn.load("../loadmod.o")
#attach("../.Data")
library(survival)

options(na.action="na.exclude")

#
# Compute some answers "by hand"
#     For test1 and test2 data
byhand1 <- function(coef, newx) {
    s <- exp(coef)
    loglik <- 2*coef - (log(3*s+3) + 2*log(s+3))
    u <- (6 + 3*s - s^2) / ((s+1)*(s+3))
    imat <- s/(s+1)^2 + 6*s/(s+3)^2

    x <- c(1,1,1,0,0,0)
    status <- c(1,0,1,1,0,1)
    xbar <- c(s/(s+1), s/(s+3), 0, 0)
    haz <- c(1/(3*s+3), 2/(s+3), 0, 1 )
    ties <- c(1,1,2,2,3,4)
    wt <- c(s,s,s,1,1,1)
    mart <- c(1,0,1,1,0,1) -  wt* (cumsum(haz))[ties]

    score <- rep(6,0)
    for (i in 1:6) {
	j <- ties[i]
	score[i] <-  -wt[i]*(cumsum((x[i]-xbar) * haz))[j]
	score[i] <- score[i] + wt[i]* ((x[i]-xbar)*status[i])[j]
	}

    scho <- c(1/(s+1), (3-s)/(3+s), 0)

    surv  <- exp(-cumsum(haz)* exp(coef*newx))
    varhaz.g <- cumsum(c(1/(3*s+3)^2, 2/(s+3)^2, 0, 1 ))

    varhaz.d <- cumsum((newx-xbar) * haz)

    varhaz <- (varhaz.g + varhaz.d0^2/ imat) * exp(2*coef*newx)

    names(xbar) <- names(haz) <- 1:4
    names(surv) <- names(varhaz) <- 1:4
    list(loglik=loglik, u=u, imat=imat, xbar=xbar, haz=haz,
	     mart=mart,  score=score,
		scho=scho, surv=surv, var=varhaz,
		varhaz.g=varhaz.g, varhaz.d=varhaz.d)
    }


byhand2 <- function(coef, newx) {
    s <- exp(coef)

    loglik <- 4*coef - log(s+1) - log(s+2) - 3*log(3*s+2) - 2*log(3*s+1)
    u <- 1/(s+1) +  1/(3*s+1) + 4/(3*s+2) -
		 ( s/(s+2) +3*s/(3*s+2) + 3*s/(3*s+1))
    imat <- s/(s+1)^2 + 2*s/(s+2)^2 + 6*s/(3*s+2)^2 +
	    3*s/(3*s+1)^2 + 3*s/(3*s+1)^2 + 12*s/(3*s+2)^2

    hazard <-c( 1/(s+1), 1/(s+2), 1/(3*s+2), 1/(3*s+1), 1/(3*s+1), 2/(3*s+2) )
    xbar <- c(s/(s+1), s/(s+2), 3*s/(3*s+2), 3*s/(3*s+1), 3*s/(3*s+1),
		3*s/(3*s+2))

    var.g <- cumsum(hazard*hazard /c(1,1,1,1,1,2))
    var.d <- cumsum( (xbar-newx)*hazard)

    surv <- exp(-cumsum(hazard) * exp(coef*newx))
    varhaz <- (var.g + var.d^2/imat)* exp(2*coef*newx)

    list(loglik=loglik, u=u, imat=imat, hazard=hazard,
	    xbar=xbar, surv=surv, varhaz=varhaz, var.g=var.g, var.d=var.d)
    }

byhand3 <- function(coef) {
    #Hard coded -- what is found in the Agreg.3 comments file
    s <- as.vector(exp(coef))  #kill the names attr

    imat <- s/(s+1)^2 + 2*s/(s+2)^2 + 6*s/(3*s+2)^2 +
	    3*s/(3*s+1)^2 + 3*s/(3*s+1)^2 + 12*s/(3*s+2)^2

    hazard <-c( 1/(s+1), 1/(s+2), 1/(3*s+2), 1/(3*s+1), 1/(3*s+1), 2/(3*s+2) )
    xbar <- c(s/(s+1), s/(s+2), 3*s/(3*s+2), 3*s/(3*s+1), 3*s/(3*s+1),
		3*s/(3*s+2))


    newx <- c(0,0,1,1,1,0,0, 2,2,2,2)
    wt <- exp(coef*newx)
    indx <- c(1,2,4,5,6,1,2,3,4,5,6)

    var.g <- hazard*hazard /c(1,1,1,1,1,2)
    surv <- exp(-cumsum(hazard[indx]*wt))
    var1 <- cumsum(var.g[indx]*wt*wt)
    d    <- cumsum( (xbar[indx] - newx)* hazard[indx] * wt)
    var2 <- d^2/imat
    names(surv) <- names(var1) <-names(var2) <- NULL
    list(time= c(2,3,7,8,9,12,13,16,17,18,19),
	 surv=surv, std= sqrt(var1 + var2), var.g=var1, var.d=var2, d=d,
		hazard=hazard, wt=wt)
    }

# Create the simplest test data set
#
test1 <- data.frame(time=  c(4, 3,1,1,2,2,3),
                    status=c(1,NA,1,0,1,1,0),
                    x=     c(0, 2,1,1,1,0,0))
#
#   Do the test on the simple data set
#

fit <-coxph(Surv(time, status) ~x, test1, method='breslow')
fit
fit0 <-coxph(Surv(time, status) ~x, test1, iter=0)
fit0$coef
coxph(Surv(time, status) ~x, test1, iter=1, method='breslow')$coef
coxph(Surv(time, status) ~x, test1, iter=2, method='breslow')$coef
coxph(Surv(time, status) ~x, test1, iter=3, method='breslow')$coef

coxph(Surv(time, status) ~ x, test1, method='efron')
coxph(Surv(time, status) ~ x, test1, method='exact')

resid(fit0)
resid(coxph(Surv(time, status) ~1, test1))
resid(fit0, 'scor')
resid(fit0, 'scho')

resid(fit)
resid(fit, 'scor')
resid(fit, 'scho')

predict(fit, type='lp')
predict(fit, type='risk')
predict(fit, type='expected')
predict(fit, type='terms') 
predict(fit, type='lp', se.fit=T)
predict(fit, type='risk', se.fit=T)
predict(fit, type='expected', se.fit=T)
predict(fit, type='terms', se.fit=T)

summary(survfit(fit))
summary(survfit(fit, list(x=2)))
#
# Create the simple data for agreg
#

test2 <- data.frame(start=c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8),
		    stop =c(2, 3, 6, 7, 8, 9, 9, 9,14,17),
		    event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0),
		    x    =c(1, 0, 0, 1, 0, 1, 1, 1, 0, 0) )
#
#   Do the test on the simple agreg data set
#
fit <-coxph(Surv(start, stop, event)~ x, test2, method='breslow')
fit
fit0 <-coxph(Surv(start, stop, event)~ x, test2, iter=0)
fit0$coef
coxph(Surv(start, stop, event)~ x, test2, iter=1, method='breslow')$coef
coxph(Surv(start, stop, event)~ x, test2, iter=2, method='breslow')$coef
coxph(Surv(start, stop, event)~ x, test2, iter=3, method='breslow')$coef

coxph(Surv(start, stop, event) ~ x, test2, method='efron')
coxph(Surv(start, stop, event) ~ x, test2, method='exact')

resid(fit0)
resid(fit0, 'scor')
resid(fit0, 'scho')

resid(fit)
resid(fit, 'scor')
resid(fit, 'scho')

resid(coxph(Surv(start, stop, event)~ x, test2, iter=0, init=log(2)), 'score')

sfit <-survfit(fit)
sfit
summary(sfit)

sfit.km <- survfit(Surv(start, stop, event)~ x, data=test2)
sfit.km
summary(sfit.km)

# make a doubled data set
temp <- rbind(test2, test2)
temp <- data.frame(temp, x2=c(test2$x, test2$x^2), 
		         ss=c(rep(0, nrow(test2)), rep(1, nrow(test2))))
fitx <- coxph(Surv(start, stop, event) ~ x2 * strata(ss), data=temp,
	      method='breslow')

sfit <- survfit(fitx, c(fitx$means[1], 0) ) 
sfit
summary(sfit)
#
# Even though everyone in strata 1 has x2==0, I won't get the same survival
#  curve above if survfit is called without forcing predicted x2 to be
#  zero-- otherwise I am asking for a different baseline than the
#  simple model did.  In this particular case coef[2] is nearly zero, so
#  the curves are the same, but the variances differ.
#

# This mimics 'byhand3' and the documentation
fit <- coxph(Surv(start, stop, event) ~x, test2, method='breslow')
tdata <- data.frame( start=c(0,20, 6,0,5),
		     stop =c(5,23,10,5,15),
		     event=rep(0,5),
		     x=c(0,0,1,0,2) )

temp <- survfit(fit, tdata, individual=T)
temp2 <- byhand3(fit$coef)
all.equal(temp$surv, temp2$surv)
all.equal(temp2$std, temp$std.err)

temp2
#
# Test out the survival curve and variance, in gory detail
#
xdata <- data.frame(rbind(test1,test1), ss = rep(1:2, rep(nrow(test1),2)))
fit <- coxph(Surv(time, status)~x + strata(ss), xdata, method='breslow')
sfit <- survfit(fit, list(x=0))	 #type='aalen' is default 

# From the hand worked notes
bb <- as.vector(exp(fit$coef))
realhaz <- cumsum(c(1/(3*bb+3), 2/(bb+3), 1))
all.equal(sfit$surv, exp(-rep(realhaz,2)))

realvar <- cumsum(c( (1/(3*bb+3))^2, 2/(bb+3)^2, 1))
dd <- cumsum(c( (bb/(bb+1))*(1/(3*bb+3)),
	        (bb/(bb+3))*(2/(bb+3)),  0*1))
realvar <- realvar + dd^2 * fit$var
all.equal(sfit$std, sqrt(rep(realvar,2)))
summary(sfit)

# Get the Kalbfleisch-Prentice survival with Greenwood variance
#  
sfit <- survfit(fit, list(x=0), type='kalb')	 

# second term is the solution to an equation, in the neighborhood of .6
tfun <- function(alpha) (bb/(1-alpha^bb) + 1/(1-alpha) - (bb+3))^2
## not in R
## temp <- nlminb(0.6, tfun, lower=.1, upper=.9)
temp <- optimize(tfun, lower=.1, upper=.9,tol=sqrt(.Machine$double.eps))

realkm <- cumprod(c((1- bb/(3*bb+3))^(1/bb), temp$minimum, 0))
all.equal(sfit$surv, rep(realkm,2))

realvar <- cumsum(c( 1/((3*bb+3)*(2*bb+3)), 2/((bb+3)*2), 1))
dd <- cumsum(c( (bb/(bb+1))*(1/(3*bb+3)),
	        (bb/(bb+3))*(2/(bb+3)),  0*1))
realvar <- realvar + dd^2 * fit$var
all.equal(sfit$std, sqrt(rep(realvar,2)))
summary(sfit)


#
# Repeat with the Efron approximation for Cox, Efron estimates
#
fit <- coxph(Surv(time, status)~x + strata(ss), xdata)
sfit <- survfit(fit, list(x=0))	 
bb <- as.vector(exp(fit$coef))

realhaz <- cumsum(c(1/(3*bb+3), 1/(bb+3) + 2/(bb+5), 1))
all.equal(sfit$surv, exp(-rep(realhaz,2)))

realvar <- cumsum(c( (1/(3*bb+3))^2, 1/(bb+3)^2 + 4/(bb+5)^2, 1))
dd <- cumsum(c( (bb/(bb+1))*(1/(3*bb+3)),
	        (bb/(bb+3))*(1/(bb+3))+ (bb/(bb+5))*(2/(bb+5)),  0*1))
realvar <- realvar + dd^2 * fit$var
all.equal(sfit$std, sqrt(rep(realvar,2)))
summary(sfit)


rm(bb, sfit, realhaz, realvar, fit, realkm, xdata)
#
# The AML data, from Miller, "Survival Analysis", page 49.
#
aml <- list(time=   c( 9, 13, 13, 18, 23, 28, 31, 34, 45, 48, 161,
			   5,  5,  8,  8, 12, 16, 23, 27, 30, 33, 43, 45),
	    status= c( 1,1,0,1,1,0,1,1,0,1,0, 1,1,1,1,1,0,1,1,1,1,1,1),
	    x     = as.factor(c(rep("Maintained", 11),
				rep("Nonmaintained", 12) )))

aml <- data.frame(aml)
#
# These results can be found in Miller
#
fit <- coxph(Surv(aml$time, aml$status) ~ aml$x, method='breslow')
fit
resid(fit, type='mart')
resid(fit, type='score')
resid(fit, type='scho')

fit <- survfit(Surv(aml$time, aml$status) ~ aml$x)
fit
summary(fit)
survdiff(Surv(aml$time, aml$status)~ aml$x)

#
# Test out the weighted K-M
#
#  First, equal case weights- shouldn't change the survival, but will
#    halve the variance
temp2 <-survfit(Surv(aml$time, aml$status), type='kaplan', weight=rep(2,23))
temp  <-survfit(Surv(time, status)~1, aml)
temp$surv/temp2$surv
(temp$std.err/temp2$std.err)^2

# Risk weights-- use a null Cox model
tfit <- coxph(Surv(aml$time, aml$status) ~ offset(log(1:23)))
sfit <- survfit(tfit, type='aalen')

# Now compute it by hand
#  Ties are a challenge
atime <- sort(aml$time)
denom <- rev(cumsum(rev((1:23)[order(aml$time)])))
denom <- denom[match(unique(atime), atime)]
deaths <- tapply(aml$status, aml$time, sum)
chaz <- cumsum(deaths/denom)
all.equal(sfit$surv, as.vector(exp(-chaz[deaths>0])))
cvar <- cumsum(deaths/denom^2)
all.equal(sfit$std^2, as.vector(cvar[deaths>0]))
summary(sfit)

# And the Efron result
summary(survfit(tfit))

# Lots of ties, so its a good test case
x1 <- coxph(Surv(time, status)~x, aml, method='efron')
x1
x2 <- coxph(Surv(rep(0,23),time, status) ~x, aml, method='efron')
x1$coef - x2$coef

rm(x1, x2, atime, denom, deaths, chaz,cvar, tfit, sfit, temp, temp2, fit)
#
# Trivial test of stratified residuals
#   Make a second strata = replicate of the first, and I should get the
#   exact same answers

temp <- as.matrix(test1)
n    <- nrow(temp)
ndead<- sum(test1$status[!is.na(test1$status)])
temp <- data.frame(rbind(temp, temp)) #later releases of S have rbind.data.frame
tstrat <- rep(1:2, c(n,n))

fit1 <- coxph(Surv(time, status) ~x, test1)
fit2 <- coxph(Surv(time, status) ~x + strata(tstrat), temp)

all.equal(resid(fit1) , (resid(fit2))[1:n])
all.equal(resid(fit1, type='score') , (resid(fit2, type='score'))[1:n])
all.equal(resid(fit1, type='schoe') , (resid(fit2, type='schoe'))[1:ndead])

#AG model
temp <- as.matrix(test2)
n    <- nrow(temp)
ndead<- sum(test2$event[!is.na(test2$event)])
temp <- data.frame(rbind(temp, temp))
tstrat <- rep(1:2, c(n,n))

fit1 <- coxph(Surv(start, stop, event) ~x, test2)
fit2 <- coxph(Surv(start, stop, event) ~x + strata(tstrat), temp)

all.equal(resid(fit1) , (resid(fit2))[1:n])
all.equal(resid(fit1, type='score') , (resid(fit2, type='score'))[1:n])
all.equal(resid(fit1, type='schoe') , (resid(fit2, type='schoe'))[1:ndead])
#
# A test to exercise the "infinity" check on 2 variables
#
test3  <-  data.frame(futime=1:12, fustat=c(1,0,1,0,1,0,0,0,0,0,0,0),
		   x1=rep(0:1,6), x2=c(rep(0,6), rep(1,6)))
coxph(Surv(futime, fustat) ~ x1 + x2, test3)
# Create a "counting process" version of the simplest test data set
#
test1b<- list(start= c(0, 3,  0,  0, 5,  0, 6,14,  0,  0, 10,20,30, 0),
	      stop = c(3,10, 10,  5,20,  6,14,20, 30,  10,20,30,40, 10),
	      status=c(0, 1,  0,  0, 1,  0, 0, 1,  0,   0, 0, 0, 1,  0),
	      x=     c(1, 1,  1,  1, 1,  0, 0, 0,  0,   0, 0, 0, 0,  NA),
	      id =   c(3, 3,  4,  5, 5,  6, 6, 6,  7,   1, 1, 1, 1,   2))
#
#  Check out the various residuals under an Efron approximation
#
fit0 <- coxph(Surv(time, status)~ x, test1, iter=0)
fit  <- coxph(Surv(time, status) ~x, test1)
fit0b <- coxph(Surv(start, stop, status) ~ x, test1b, iter=0)
fitb  <- coxph(Surv(start, stop, status) ~x, test1b)
fitc  <- coxph(Surv(time, status) ~ offset(fit$coef*x), test1)
fitd  <- coxph(Surv(start, stop, status) ~ offset(fit$coef*x), test1b)

resid(fit0)
resid(fit0b, collapse=test1b$id)
resid(fit)
resid(fitb, collapse=test1b$id)
resid(fitc)
resid(fitd, collapse=test1b$id)
all.equal(resid(fitc), resid(fit))

resid(fit0, type='score')
resid(fit0b, type='score', collapse=test1b$id)
resid(fit, type='score')
resid(fitb, type='score', collapse=test1b$id)

resid(fit0, type='scho')
resid(fit0b, type='scho', collapse=test1b$id)
resid(fit, type='scho')
resid(fitb, type='scho', collapse=test1b$id)

summary(survfit(fit0, list(x=0)))
summary(survfit(fit, list(x=0)))
summary(survfit(fitb,list(x=0)))
summary(survfit(fit))
# Tests of the weighted Cox model
#
# Similar data set to test1, but add weights,
#                                    a double-death/censor tied time
#                                    a censored last subject
# The latter two are cases covered only feebly elsewhere.
testw1 <- data.frame(time=  c(1,1,2,2,2,2,3,4,5),
		    status= c(1,0,1,1,1,0,0,1,0),
		    x=      c(2,0,1,1,0,1,0,1,0),
		    wt =    c(1,2,3,4,3,2,1,2,1))

fit0 <- coxph(Surv(time, status) ~x, testw1, weights=wt,
		    method='breslow', iter=0)
fit  <- coxph(Surv(time, status) ~x, testw1, weights=wt, method='breslow')
fit0
summary(fit)
resid(fit0, type='mart')
resid(fit0, type='score')
resid(fit0, type='scho')
fit0 <- coxph(Surv(time, status) ~x,testw1, weights=wt, iter=0)
resid(fit0, 'mart')
resid(coxph(Surv(time, status) ~1, testw1, weights=wt))  #Null model

resid(fit, type='mart')
resid(fit, type='score')
resid(fit, type='scho')

fit  <- coxph(Surv(time, status) ~x, testw1, weights=wt, method='efron')
fit
resid(fit, type='mart')
resid(fit, type='score')
resid(fit, type='scho')
# Tests of the weighted Cox model, AG form of the data
#   Same solution as doweight1.s
#
testw2 <- data.frame(id  =  c( 1, 1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 8, 8, 9),
		     begin= c( 0, 5, 0, 0,10,15, 0, 0,14, 0, 0, 0,23, 0),
		     time=  c( 5,10,10,10,15,20,20,14,20,20,30,23,40,50),
		    status= c( 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0),
		    x=      c( 2, 2, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0),
		    wt =    c( 1, 1, 2, 3, 3, 3, 4, 3, 3, 2, 1, 2, 2, 1))

fit0 <- coxph(Surv(begin,time, status) ~x, testw2, weights=wt,
		    method='breslow', iter=0)
fit  <- coxph(Surv(begin,time, status) ~x, testw2, weights=wt, method='breslow')
fit0
summary(fit)
resid(fit0, type='mart', collapse=testw2$id)
resid(fit0, type='score', collapse=testw2$id)
resid(fit0, type='scho')

resid(fit, type='mart', collapse=testw2$id)
resid(fit, type='score', collapse=testw2$id)
resid(fit, type='scho')
fit0 <- coxph(Surv(begin, time, status) ~x,testw2, weights=wt, iter=0)
resid(fit0, 'mart', collapse=testw2$id)
resid(coxph(Surv(begin, time, status) ~1, testw2, weights=wt)
		      , collapse=testw2$id)  #Null model

fit  <- coxph(Surv(begin,time, status) ~x, testw2, weights=wt, method='efron')
fit
resid(fit, type='mart', collapse=testw2$id)
resid(fit, type='score', collapse=testw2$id)
resid(fit, type='scho')
#
# The test data set from Turnbull, JASA 1974, 169-73.
#
#  status  0=right censored
#          1=exact
#          2=left censored
#

turnbull <- data.frame( time  =c( 1,1,1, 2,2,2, 3,3,3, 4,4,4),
			status=c( 1,0,2, 1,0,2, 1,0,2, 1,0,2),
			  n   =c(12,3,2, 6,2,4, 2,0,2, 3,3,5))
#
# Compute the K-M for the Turnbull data
#      via a slow EM calculation
#

emsurv <- function(time, status, wt, verbose=T) {
    left.cen <- (status==2)
    if (!any(left.cen)) stop("No left censored data!")
    if (!any(status==1))stop("Must have some exact death times")

    tempy <- Surv(time[!left.cen], status[!left.cen])
    ww <- wt[!left.cen]
    tempx <- factor(rep(1, sum(!left.cen)))
    tfit <- survival:::survfit.km(tempx, tempy, casewt=ww)
    if (verbose)
       cat("Iteration 0, survival=", format(round(tfit$surv[tfit$n.event>0],3)),
		       "\n")

    stimes <- tfit$time[tfit$n.event>0]
    ltime <- time[left.cen]
    lwt   <- wt[left.cen]
    tempx <- factor(rep(1, length(stimes) + sum(!left.cen)))
    tempy <- Surv(c(time[!left.cen], stimes),
		  c(status[!left.cen], rep(1, length(stimes))))
    for (iter in 1:4) {
	wt2 <- stimes*0
	ssurv <- tfit$surv[tfit$n.event>0]
	sjump <- diff(c(1, ssurv))
	for (j in 1:(length(ltime))) {
	    k <- sum(ltime[j]>=stimes)   #index of the death time
	    if (k==0)
		stop("Left censored observation before the first death")
	    wt2[1:k] <- wt2[1:k] + lwt[j]*sjump[1:k] /(ssurv[k]-1)
	    }
	tfit <- survival:::survfit.km(tempx, tempy, casewt=c(ww, wt2))
	if (verbose) {
	   cat("Iteration", iter, "survival=",
		 format(round(tfit$surv[tfit$n.event>0],3)),  "\n")
	   cat("             weights=", format(round(wt2,3)), "\n")
	   }
	}
    survfit(tempy ~ tempx, weights=c(ww, wt2))
    }

temp <-emsurv(turnbull$time, turnbull$status, turnbull$n)
print(summary(temp))
#
# Read in the ovarian data
#

ovarian <- read.table("data.ovarian", row.names=NULL,
	    col.names= c("futime", "fustat", "age", "resid.ds", "rx", "ecog.ps"))
#
# Test the coxph program on the Ovarian data
#

xx  <-  order(ovarian$futime)   #put data in same order as SAS green book
temp <- ovarian[xx,]
attach(temp)

# List the data
temp

summary(survfit(Surv(futime, fustat)), censor=T)

# Various models
coxph(Surv(futime, fustat)~ age)
coxph(Surv(futime, fustat)~ resid.ds)
coxph(Surv(futime, fustat)~ rx)
coxph(Surv(futime, fustat)~ ecog.ps)

coxph(Surv(futime, fustat)~ resid.ds + rx + ecog.ps)
coxph(Surv(futime, fustat)~ age + rx + ecog.ps)
coxph(Surv(futime, fustat)~ age + resid.ds + ecog.ps)
coxph(Surv(futime, fustat)~ age + resid.ds + rx)

# Residuals
fit <- coxph(Surv(futime, fustat)~ age + resid.ds + rx + ecog.ps )
resid(fit)
resid(fit, 'dev')
resid(fit, 'scor')
resid(fit, 'scho')

fit <- coxph(Surv(futime, fustat) ~ age + ecog.ps + strata(rx))
summary(fit)
summary(survfit(fit))
sfit <- survfit(fit, list(age=c(30,70), ecog.ps=c(2,3)))  #two columns
sfit
summary(sfit)
detach(2)


# Test the robust=T option of coxph
fit <- coxph(Surv(futime, fustat) ~ age + ecog.ps + rx, ovarian, robust=T)
rr <- resid(fit, type='dfbeta')
all.equal(as.vector(t(rr) %*% rr), as.vector(fit$var))
temp  <-  scan("data.jasa", what=list(id=0, b.mo=0, b.d=0, b.y=0,
                                       a.mo=0, a.d=0, a.y=0,
                                       t.mo=0, t.d=0, t.y=0,
                                       f.mo=0, f.d=0, f.y=0,
                                       fu.stat=0, surg=0, mismatch=0,
                                       hla.a2=0, mscore=0, reject=0))

temp3  <-  mdy.date(temp$b.mo, temp$b.d, temp$b.y,nineteen=TRUE)
temp4  <-  mdy.date(temp$a.mo, temp$a.d, temp$a.y,nineteen=TRUE)
temp5  <-  mdy.date(temp$t.mo, temp$t.d, temp$t.y,nineteen=TRUE)
temp6  <-  mdy.date(temp$f.mo, temp$f.d, temp$f.y,nineteen=TRUE)

# Make sure that a particular idiocy is turned off: turning logicals into 
#       factors!
as.data.frame.logical <- as.data.frame.vector
jasa <- data.frame( birth.dt=temp3, accept.dt=temp4, tx.date=temp5,
                    fu.date=temp6, fustat=temp$fu.stat,
                    surgery = temp$surg,  age= temp4-temp3,
                    futime = 1+temp6 - temp4, wait.time= 1+temp5 - temp4,
                    transplant = c(!is.na(temp5)),
                    mismatch=temp$mismatch, hla.a2=temp$hla.a2,
                    mscore = temp$mscore, reject=temp$reject)

row.names(jasa) <- temp$id

# The "1+" above causes us to match the analysis in Kalbfleisch and Prentice.
#  Someone accepted and transplanted on the same day is assumed to have one
#  day under the non-transplant risk.

rm(temp, temp3, temp4, temp5, temp6)


data(ratetables)
expect <- survexp(futime ~ ratetable(age=(accept.dt - birth.dt), sex=1,year=accept.dt,race="white"), jasa, cohort=F, ratetable=survexp.usr)

survdiff(Surv(jasa$futime, jasa$fustat) ~ offset(expect)) 
# Do a Stanford heart transplant data the way that K&P do
#
# Input - a data frame containing the raw data
# Output- a data frame with multiple obs for the transplanted subjects
#
#  There are more efficient ways to do this than the "for" loop below, but
#    this script is much more readable than any of them.
#
stan1 <- function(jasa) {
    id <- row.names(jasa)
    tx  <-   1*(jasa$transplant)
    covar  <-  cbind(jasa$age/365.25 -48,
		  (jasa$accept.dt - mdy.date(10,1,1967))/365.25,
		  jasa$surgery)
    n  <-  length(tx)     #number in study
    ntx  <-  sum(tx)      #number transplanted
    # Per paragraph 2, p138, the patient who died on the day of transplant is
    #   treated differently, for all others deaths are assumed to occur "earlier
    #   in the day" than transplants.
    special <- id ==38
    wait  <-  jasa$wait.time
    wait[special]  <-  wait[special] - .5

    age <- year <- surgery <- transplant <- id2 <- double(n+ntx)
    start <- stop <- event <- double(n+ntx)
    ii <- 1
    for (i in 1:n) {
	age[ii] <- covar[i,1]
	year[ii] <- covar[i,2]
	surgery[ii] <- covar[i,3]
	transplant[ii] <- 0
	id2[ii] <- id[i]

	if (tx[i])  { #transplanted  - 2 lines if data
	    start[ii] <- 0
	    stop[ii]  <- wait[i]
	    event[ii] <- 0

	    ii <- ii+1
	    start[ii] <- wait[i]
	    stop[ii]  <- jasa$futime[i]
	    event[ii] <- jasa$fustat[i]
	    age[ii] <- covar[i,1]
	    year[ii] <- covar[i,2]
	    surgery[ii] <- covar[i,3]
	    transplant[ii] <- 1
	    id2[ii] <- id[i]
	    }
	else {                             # one line of data
	    start[ii] <-0
	    stop[ii] <- jasa$futime[i]
	    event[ii]<- jasa$fustat[i]
	    }
	ii <- ii+1
	}

    data.frame(start, stop, event, age, year, surgery,
		    transplant=factor(transplant), id=id2)
    }

jasa1  <-  stan1(jasa)
attach(jasa1)
# Now fit the 6 models found in Kalbfleisch and Prentice, p139
#options(contrasts=c("contr.treatment", "contr.treatment"))
sfit.1  <-  coxph(Surv(start, stop, event)~ (age + surgery)*transplant,
				method='breslow')
sfit.2  <-  coxph(Surv(start, stop, event)~ year*transplant,
				method='breslow')
sfit.3  <-  coxph(Surv(start, stop, event)~ (age + year)*transplant,
				method='breslow')
sfit.4  <-  coxph(Surv(start, stop, event)~ (year +surgery) *transplant,
				method='breslow')
sfit.5  <-  coxph(Surv(start, stop, event)~ (age + surgery)*transplant + year ,
				method='breslow')
sfit.6  <-  coxph(Surv(start, stop, event)~ age*transplant + surgery + year,
				method='breslow')

summary(sfit.1)
sfit.2
summary(sfit.3)
sfit.4
sfit.5
sfit.6

# Survival curve for the "average" subject
summary(survfit(sfit.1))

# Survival curve for a subject of age 50, with prior surgery, tx at 6 months
data <- data.frame(start=c(0,183), stop=c(183,3*365), event=c(1,1),
		   age=c(50,50),  surgery=c(1,1), transplant=c(0,1))
summary(survfit(sfit.1, data, individual=T))

# These should all give the same answer
j.age <- jasa$age/365.25 -48
fit1 <- coxph(Surv(futime, fustat) ~ j.age, data=jasa)
fit2 <- coxph(Surv(futime, fustat) ~ j.age, jasa, init=fit1$coef, iter=0)
fit3 <- coxph(Surv(start, stop, event) ~ age)
fit4 <- coxph(Surv(start, stop, event) ~ offset((age-fit3$means)*fit1$coef))
s1 <- survfit(fit1, fit3$means) 
s2 <- survfit(fit2, fit3$means)
s3 <- survfit(fit3)
s4 <- survfit(fit4)

all.equal(s1$surv, s2$surv)
all.equal(s1$surv, s3$surv)
all.equal(s1$surv, s4$surv)

# Still the same answer, fit multiple strata at once
#  Strata 1 has independent coefs of strata 2, so putting in
#    the other data should not affect it
ll <- length(start)
ss <- rep(0:1, c(ll,ll))
fit <- coxph(Surv(rep(start,2), rep(stop,2), rep(event,2)) ~
			rep(age,2)*strata(ss) + I(rep(age,2)^2*ss) )
fit$coef[1] - fit3$coef
s4 <- survfit(fit, c(fit3$means, 0,0))
all.equal(s4$surv[1:(s4$strata[1])],  s3$surv)
detach("jasa1")
# A subset of some local data on lung cancer patients.  A useful internal test
#  because it has multiple strata and lots of missing values.
#

# inst = enrolling institution
# sex  1=male  2=female
# ph.ecog  physician's estimate of the ECOG performace score.  0=fully active,
#              4=bedridden
# ph.karno  physician's estimate of the Karnofsky score, a competitor to the
#              ECOG ps.
# pat.karno  patient's assesment of his/her Karnofsky score
# meal.cal   # calories consumed at meals (exclude beverages and snacks)
# wt.loss    weight loss in the last 6 months


# 12/98: this is now part of the Splus distribution, but they call it
#  "lung"
#cancer <- lung
## In R it's data(cancer) or data(lung)
data(cancer)
# Test out all of the routines on a more complex data set
#
temp <- survfit(Surv(time, status) ~ ph.ecog, cancer)
summary(temp, times=c(30*1:11, 365*1:3))
print(temp[2:3])

temp <- survfit(Surv(time, status), cancer, type='fleming',
		   conf.int=.9, conf.type='log-log', error='tsiatis')
summary(temp, times=30 *1:5)

temp <- survdiff(Surv(time, status) ~ inst, cancer, rho=.5)
print(temp, digits=6)

temp <- coxph(Surv(time, status) ~ ph.ecog + ph.karno + pat.karno + wt.loss 
	      + sex + age + meal.cal + strata(inst), cancer)
summary(temp)
cox.zph(temp)
cox.zph(temp, transform='identity')

coxph(Surv(rep(0,length(time)), time, status) ~ ph.ecog + ph.karno + pat.karno
		+ wt.loss + sex + age + meal.cal + strata(inst), cancer)
bladder <- read.table('data.bladder4',
	     col.names=c('id', 'rx', 'number', 'size', 'start', 'stop',
				'event', 'enum'))

bladder2 <- bladder[bladder$start< bladder$stop, ]
bladder$start <- NULL
bladder <- bladder[bladder$enum<5,]
#
# Fit the models found in Wei et. al.
#
#options(contrasts='contr.treatment')
wfit <- coxph(Surv(stop, event) ~ (rx + size + number)* strata(enum) +
		 cluster(id), bladder, method='breslow')
wfit

# Check the rx coefs versus Wei, et al, JASA 1989
rx <- c(1,4,5,6)  # the treatment coefs above
cmat <- diag(4); cmat[1,] <- 1;          #contrast matrix
wfit$coef[rx] %*% cmat                   # the coefs in their paper (table 5)
t(cmat) %*% wfit$var[rx,rx] %*% cmat  # var matrix (eqn 3.2)

# Anderson-Gill fit
fita <- coxph(Surv(start, stop, event) ~ rx + size + number + cluster(id),
		  bladder2,  method='breslow')
fita

# Prentice fits.  Their model 1 a and b are the same
fit1p  <- coxph(Surv(stop, event) ~ rx + size + number, bladder2,
		subset=(enum==1), method='breslow')
fit2pa <- coxph(Surv(stop, event) ~ rx + size + number, bladder2,
		subset=(enum==2), method='breslow')
fit2pb <- coxph(Surv(stop-start,  event) ~ rx + size + number, bladder2,
		   subset=(enum==2), method='breslow')
fit3pa <- coxph(Surv(stop, event) ~ rx + size + number, bladder2,
		subset=(enum==3), method='breslow')
 #and etc.
fit1p
fit2pa
fit2pb
fit3pa
#
# A test of  NULL models
#
fit1 <- coxph(Surv(stop, event) ~ rx + strata(number), bladder, iter=0)
fit2 <- coxph(Surv(stop, event) ~ strata(number), bladder)

all.equal(fit1$loglik[2], fit2$loglik)
all.equal(fit1$resid, fit2$resid)


fit1 <- coxph(Surv(start, stop, event) ~ rx + strata(number), bladder2, iter=0)
fit2 <- coxph(Surv(start, stop, event) ~ strata(number), bladder2)

all.equal(fit1$loglik[2], fit2$loglik[2])
all.equal(fit1$resid, fit2$resid)
#  Tests of expected survival

#
# Simple case 1: a single male subject, born 6/6/36 and entered on study 6/6/55.
#
#  Compute the 1, 5, 10 and 12 year expected survival

temp1 <- mdy.date(6,6,36)
temp2 <- mdy.date(6,6,55)
##exp1 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1),
##		    ratetable=survexp.uswhite,times=c(366, 1827, 3653, 4383))
exp1 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1,race="white"),
		    ratetable=survexp.usr,times=c(366, 1827, 3653, 4383))

# Well, almost easy.  The uswhite table assumes that someone age 19 will have
#   seen 5 leap years-- but this lad has only seen 4.  Thus the routine sees
#   him as 1 day shy of his birthday.
#   (The age 19 category starts at 6940, but he is 6939 days old at entry).
# Thus his passage through the table is a bit more complicated

# First epoch:  1 day  at age 18, 1954    6/6/55
#             365 days at age 19, 1955    6/7/55 - 6/5/56

# Second      365 days at age 20, 1956    6/6/56 - 6/5/57
#             365 days at age 21, 1957    6/6/57 - 6/5/58
#             366 days at age 22, 1958    6/6/58 - 6/6/59
#             365 days at age 23, 1959    6/7/59 - 6/5/60

# Third       365 days at age 24, 1960    6/6/60 - 6/5/61
#             365 days at age 25, 1961    6/6/61 - 6/5/62
#             366 days at age 26, 1962    6/6/62 - 6/6/63
#             365 days at age 27, 1963    6/7/63 - 6/5/64
#             365 days at age 28, 1964    6/6/64 - 6/5/64

# Fourth      365 days at age 29, 1965    6/6/65 - 6/5/66
#             365 days at age 30, 1966    6/6/66 - 6/5/67

# remember, a first subscript of "1" is for age 0

##xx <- survexp.uswhite[,1,]

## Edited to account for change to survexp.usr, which has more categories
## before age 1.
xx <- survexp.usr[,1,"white",]
check <- c(       (.6*xx["18","1950"] + .4*xx["18","1960"])    +
             365* (.5*xx["19","1950"] + .5*xx["19","1960"]) ,

	     365* (.4*xx["20","1950"] + .6*xx["20","1960"]) +
	     365* (.3*xx["21","1950"] + .7*xx["21","1960"]) +
	     366* (.2*xx["22","1950"] + .8*xx["23","1960"]) +
	     365* (.1*xx["23","1950"] + .9*xx["24","1960"]) ,

	     365* (   xx["24","1960"]              ) +
	     365* (.9*xx["25","1960"] + .1*xx["25","1970"]) +
	     366* (.8*xx["26","1960"] + .2*xx["26","1970"]) +
	     365* (.7*xx["27","1960"] + .3*xx["27","1970"]) +
	     365* (.6*xx["28","1960"] + .4*xx["28","1970"]) ,

	     365* (.5*xx["29","1960"] + .5*xx["29","1970"]) +
	     365* (.4*xx["30","1960"] + .6*xx["30","1970"]) )

print(exp1$surv)
print(exp(-cumsum(check))) ###?FIXME

# This does not pass the "all.equal" test.  Because of leap year (again),
#  the internal S code does not do exactly what is stated above.  US
#  rate tables are special: the entry for age 20 in 1950 is the probability
#  that someone who becomes 20 years old in 1950 will reach his 21st birthday.
#  In order to fit this into the general "cutpoints" calculations of
#  person-years, dates are adjusted so that everyone appears to have a
#  birthday on Jan 1.  But because of leap years, a birthday then moves to
#  Dec 31 sometimes --  this happens in each of the 366 day intervals above,
#  e.g., 1 day at age 22 with 1957 rates, 365 at age 22 with 1958 rates.
#  Upshot: they only agree to 6 decimals.  Close enough for me!

rm(temp1, temp2, exp1, check, xx)
#  Tests of expected survival

#
# Simple case 2: make sure that the averages are correct, for Ederer method
#
#  Compute the 1, 5, 10 and 12 year expected survival

temp1 <- mdy.date(1:6,6:11,36:41)
temp2 <- mdy.date(6:1,11:6,55:50)
age <- temp2 - temp1

exp1 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1),
		       times=c(366, 1827, 3653, 4383))
exp2 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1) + I(1:6),
			times=c(366, 1827, 3653, 4383))

print(all.equal(exp1$surv, apply(exp2$surv, 1, mean)))

#
# Check that adding more time points doesn't change things
#
exp3 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1),
		times=sort(c(366, 1827, 3653, 4383, 30*(1:100))))

exp1$surv
exp3$surv[match(exp1$time, exp3$time, nomatch=0)]


#
# Now test Hakulinen's method, assuming an analysis date of 3/1/57
#
futime <- mdy.date(3,1,57) - temp2
xtime  <- sort(c(futime, 30, 60, 185, 365))

exp1 <- survexp(futime ~ ratetable(year=temp2, age=(temp2-temp1), sex=1),
		times=xtime, conditional=F)
exp2 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1) + I(1:6),
			times=futime)

wt <- rep(1,6)
con <- double(6)
for (i in 1:6) {
    con[i] <- sum(exp2$surv[i,i:6])/sum(wt[i:6])
    wt <- exp2$surv[i,]
    }

exp1$surv[match(futime, xtime)]
cumprod(con)                  #should be equal


#
# Now for the conditional method
#
exp1 <- survexp(futime ~ ratetable(year=temp2, age=(temp2-temp1), sex=1),
		times=xtime, conditional=T)

cond <- exp2$surv
for (i in 6:2) cond[i,]  <-  (cond[i,]/cond[i-1,])  #conditional survival
for (i in 1:6) con[i]  <-  exp(mean(log(cond[i, i:6])))

all.equal(exp1$surv[match(futime, xtime)], cumprod(con))
cumprod(con)

rm(con, cond, exp1, exp2, wt, temp1, temp2, age)
rm(exp3, futime, xtime)
#
# Test out expected survival, when the parent pop is another Cox model
#
fit <- coxph(Surv(time, status) ~x, test1, method='breslow')

dummy <- data.frame(time=c(.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5),
		    status=c(1,0,1,0,1,0,1,1,1), x=(-4:4)/2)

efit <- survexp(time ~ ratetable(x=x), dummy, ratetable=fit, cohort=F)

#
# Now, compare to the true answer, which is known to us
#
ss <- exp(fit$coef)
haz <- c( 1/(3*ss+3), 2/(ss+3), 1) #truth at time 0,1,2,4+
chaz <- cumsum(c(0,haz))
chaz2 <- chaz[c(1,2,2,3,3,3,3,4,4)]

risk <- exp(fit$coef*dummy$x)
efit2 <- exp(-risk*chaz2)

all.equal(as.vector(efit), as.vector(efit2))  #ignore mismatched name attrib

#
# Now test the direct-adjusted curve (Ederer)
#
efit <- survexp( ~ ratetable(x=x), dummy, ratetable=fit, se=F)
direct <- survfit(fit, newdata=dummy)$surv

chaz <- chaz[-1]                  #drop time 0
d2 <- exp(outer(-chaz, risk))
all.equal(as.vector(direct), as.vector(d2))   #this tests survfit

all.equal(as.vector(efit$surv), as.vector(apply(direct,1,mean)))  #direct


#
# Now test out the Hakulinen method (Bonsel's method)
#  By construction, we have a large correlation between x and censoring
#
# In theory, hak1 and hak2 would be the same.  In practice, like a KM and
#   F-H, they differ when n is small.
#
efit <- survexp( time ~ ratetable(x=x), dummy, ratetable=fit, se=F)

surv  <- wt <- rep(1,9)
tt <- c(1,2,4)
hak1 <- hak2 <- NULL
for (i in 1:3) {
    wt[dummy$time < tt[i]]  <- 0
    hak1 <- c(hak1,  exp(-sum(haz[i]*risk*surv*wt)/sum(surv*wt)))
    hak2 <- c(hak2,  sum(exp(-haz[i]*risk)*surv*wt)/sum(surv*wt))
    surv <- surv * exp(-haz[i]*risk)
    }

all.equal(as.vector(efit$surv), as.vector(cumprod(hak2)))

#
#  Now do the conditional estimate
#
efit <- survexp( time ~ ratetable(x=x), dummy, ratetable=fit, se=F,
			conditional=T)
wt <- rep(1,9)
cond <- NULL
for (i in 1:3) {
    wt[dummy$time < tt[i]]  <- 0
    cond <- c(cond,  exp(-sum(haz[i]*risk*wt)/sum(wt)))
    }

all.equal(as.vector(efit$surv), as.vector(cumprod(cond)))

rm(wt, cond, efit, tt, surv, hak1, hak2)
rm(fit, dummy, ss, efit2, chaz, chaz2, risk)
rm(d2, direct)
#
# Run a test that can be verified using SAS's LIFEREG
#
fit1w <- survreg(Surv(time, status) ~x, test1, dist='weibull')
fit1w
summary(fit1w)

fit1e <- survreg(Surv(time, status) ~x, test1, dist='exp')
fit1e
summary(fit1e)

fit1l <- survreg(Surv(time, status) ~x, test1, dist='loglogistic')
fit1l
summary(fit1l)

fit1g <- survreg(Surv(time, status) ~x, test1, dist='lognormal')
summary(fit1g)
#
#  Do a test with the ovarian data
#
fitfw <- survreg.old(Surv(futime, fustat) ~ age + ecog.ps, ovarian,
	link='log', dist='extreme')
fitfw

fitfl <- survreg.old(Surv(futime, fustat) ~ age + ecog.ps, ovarian,
	link='log', dist='logistic')
fitfl

flem2 <- scan("fleming.data2", what=list(ltime=0, rtime=0))
flsurv<- Surv(flem2$ltime, flem2$rtime, type='interval2')

fitfw2 <- survreg(flsurv ~ age + ecog.ps, ovarian, dist='weibull')
summary(fitfw2)

fitfl2 <- survreg(flsurv ~ age + ecog.ps, ovarian, dist='loglogistic')
summary(fitfl2)

fitfg2 <- survreg(flsurv ~ age + ecog.ps, ovarian, dist='lognormal')
summary(fitfg2)
#
# A simple test of the singular=ok option
#
test1 <- data.frame(time=  c(4, 3,1,1,2,2,3),
		    status=c(1,NA,1,0,1,1,0),
		    x=     c(0, 2,1,1,1,0,0))

temp <- rep(0:3, rep(7,4))

stest <- data.frame(start  = 10*temp,
		    stop   = 10*temp + test1$time,
		    status = rep(test1$status,4),
		    x      = c(test1$x+ 1:7, rep(test1$x,3)),
		    epoch  = rep(1:4, rep(7,4)))

fit1 <- coxph(Surv(start, stop, status) ~ x * factor(epoch), stest)
fit1$coef    # elements 2:4 should be NA
#
# Test some more features of surv.diff
#
#  First, what happens when one group is a dummy
#


#
# The AML data, with a third group of early censorings "tacked on"
#
aml3 <- list(time=   c( 9, 13, 13, 18, 23, 28, 31, 34, 45, 48, 161,
			   5,  5,  8,  8, 12, 16, 23, 27, 30, 33, 43, 45,
			   1,  2,  2,  3,  3,  3,  4),
	    status= c( 1,1,0,1,1,0,1,1,0,1,0, 1,1,1,1,1,0,1,1,1,1,1,1,
				0,0,0,0,0,0,0),
	    x     = as.factor(c(rep("Maintained", 11),
				rep("Nonmaintained", 12), rep("Dummy",7) )))

aml3 <- data.frame(aml3)

# These should give the same result (chisq, df), but the second has an
#  extra group
survdiff(Surv(time, status) ~x, aml)
survdiff(Surv(time, status) ~x, aml3)


#
# Now a test of the stratified log-rank
#   There are no tied times within institution, so the coxph program
#   can be used to give a complete test
#
fit <- survdiff(Surv(time, status) ~ pat.karno + strata(inst), cancer)

#options(contrasts='contr.treatment')
cfit <- coxph(Surv(time, status) ~ factor(pat.karno) + strata(inst),
		cancer, iter=0)

tdata <- na.omit(cancer[,c('time', 'status', 'pat.karno', 'inst')])

temp1 <- tapply(tdata$status-1, list(tdata$pat.karno, tdata$inst), sum)
temp1 <- ifelse(is.na(temp1), 0, temp1)
temp2 <- tapply(cfit$resid,  list(tdata$pat.karno, tdata$inst), sum)
temp2 <- ifelse(is.na(temp2), 0, temp2)

temp2 <- temp1 - temp2

#Now temp1=observed, temp2=expected
all.equal(c(temp1), c(fit$obs))
all.equal(c(temp2), c(fit$exp))

all.equal(fit$var[-1,-1], solve(cfit$var))

rm(tdata, temp1, temp2)
# 
# Simple case: a single male subject, born 6/6/36 and entered on study 6/6/55.
#

temp1 <- mdy.date(6,6,36)
temp2 <- mdy.date(6,6,55)# Now compare the results from person-years
#
temp.age <- tcut(temp2-temp1, floor(c(-1, (18:31 * 365.24))),
	labels=c('0-18', paste(18:30, 19:31, sep='-')))
temp.yr  <- tcut(temp2, mdy.date(1,1,1954:1965), labels=1954:1964)
temp.time <- 3700   #total days of fu
py1 <- pyears(temp.time ~ temp.age + temp.yr, scale=1) #output in days

# The subject should appear in 20 cells
#    6/6/55 - 12/31/55, 209 days, age 19-20, 1955
#    1/1/56 -  6/ 4/56, 156 days, age 19-20, 1956
#    6/5/56 - 12/31/56, 210 days, age 20-21, 1956   (a leap year, and his
#			birthday computes one day earlier)
#    1/1/57 -  6/ 5/57, 156 days, age 20-21, 1957
#    6/6/57 - 12/31/57, 209 days, age 21-22, 1957
# and etc
#   with 203 days "off table", ie, beyond the last cell of the table
#
# It is a nuisance, but tcut follows 'cut' in that we give the ENDS of
#  the intervals, whereas the survival tables use the starts of intervals.
#  Thus this breakdown does not match that in doexpect.s
#
xx <- matrix(0, nrow=14, ncol=11)
xx[cbind(3:11, 3:11)] <- 156
xx[cbind(3:12, 2:11)] <- c(209, 210, rep(c(209, 209, 209, 210),2))
dimnames(xx) <- list(c('0-18', paste(18:30, 19:31, sep='-')), 1954:1964)
all.equal(xx, py1$pyears)
all.equal(203, py1$offtable)
all.equal(1*(xx>0), py1$n)

#
# Now with expecteds
#
py2 <- pyears(temp.time ~ temp.age + temp.yr
		+ ratetable(age=temp2-temp1, year=temp2, sex=1,race="white"),
	     scale=1, ratetable=survexp.usr ) #output in days
all.equal(xx, py2$pyears)
all.equal(203, py2$offtable)
all.equal(1*(xx>0), py2$n)


py3 <-  pyears(temp.time ~ temp.age + temp.yr
		+ ratetable(age=temp2-temp1, year=temp2, sex=1,race="white"),
	     scale=1, ratetable=survexp.usr , expect='pyears')
all.equal(py2$n, py3$n)
all.equal(py2$pyear, py3$pyear)
all.equal(py3$n, 1*(py3$expect>0))

# Now, compute the py3 result "by hand".  Since there is only one person
#   it can be derived from py2.
#
xx1 <- py2$expect[py2$n>0]   		# the hazard over each interval
cumhaz <- cumsum(c(0, xx1[-length(xx1)]))     # the cumulative hazard	
xx2 <- py3$expect[py3$n>0]   		# the expected number of person days
xx3 <- py3$pyears[py3$n>0]   		# the potential number of person days

# This is the integral of the curve "exp(-haz *t)" over the interval
integral <- xx3 * exp(-cumhaz)* (1- exp(-xx1))/ xx1
# They might not be exactly equal, since the C code tracks changes in the
#   rate tables that occur -within- an interval.  So try for 7 digits
all.equal(round(integral,4)/10000, round(xx2,4)/10000)

# Cut off the bottom of the table, instead of the side
temp.age <- tcut(temp2-temp1, floor(c(-1, (18:27 * 365.24))),
	labels=c('0-18', paste(18:26, 19:27, sep='-')))

py4 <- eval(py3$call)
all.equal(py4$pyear, py3$pyear[1:10,])
all.equal(py4$expect, py3$expect[1:10,])

rm(xx1, xx2, xx3, cumhaz, temp1, temp2)
rm(temp.age, temp.yr, temp.time, xx)
#
# Test out subscripting in the case of a coxph survival curve
#
fit <- coxph(Surv(time, status) ~ age + sex + meal.cal + strata(ph.ecog),
		data=cancer)

surv1 <- survfit(fit)
temp <- surv1[2:3]

which <- cumsum(surv1$strata)
zed   <- (which[1]+1):(which[3])
all.equal(surv1$surv[zed], temp$surv)
all.equal(surv1$time[zed], temp$time)

#
# Now a result with a matrix of survival curves
#
dummy <- data.frame(age=c(30,40,60), sex=c(1,2,2), meal.cal=c(500, 1000, 1500))
surv2 <- survfit(fit, newdata=dummy)

zed <- 1:which[1]
all.equal(surv2$surv[zed,1], surv2[1,1]$surv)
all.equal(surv2$surv[zed,2], surv2[1,2]$surv)
all.equal(surv2$surv[zed,3], surv2[1,3]$surv)
all.equal(surv2$surv[zed, ], surv2[1,1:3]$surv)
all.equal(surv2$surv[zed],   (surv2[1]$surv)[,1])
all.equal(surv2$surv[zed, ], surv2[1, ]$surv)