### An simulation example ### Author: Lei Pang, Judy Wang & Wenbin Lu ### Department of Statistics, North Carolina State University ### Created on: Sep 30, 2010 ### Function list: ### IS.func(): for IS estimation ### KS.func(): for Kernel-Smoothing estimation ### Boot.func(): for BT-boot estimation ### Other functions: ### weight.func(), update.beta(), Sigma.func(), update.Gamma() ### update.beta.h(), Sigma.func.h(), update.Gamma.h(), BT.func() ### dist.ex(), dist.logi(), processor() ### Parameter list: ### Y or y: pmin(t,c), censored time data, n*1 array; ### di: I(t<=c), censoring indicator, n*1 array; ### X or x: Covariates, n*p array; ### beta: coefficient, p*1 array; ### Gamma: covariance matrix, p*p array; ### Sigma: covariance matrix, p*p array; ### tau: quantile level, scalar ### W: inverse weights, n*1 array ### h: bandwidth for Kernel-Smoothing, scalar library(survival) library(quantreg) ###****************IS main function begins*******************### IS.func = function(y, x, di, tau, tol=0.01, maxiter=20) { # y is a n*1 vector: observed time = min(ti, ci) # x are the design matrix/vector, not including the intercept # di: censoring indicator vector # first, all data should be orderd based on order(Y) n=dim(x)[1] p=dim(x)[2] ord=order(y) y=y[ord] di=di[ord] x=x[ord,] W=weight.func(y,di) #weight vector; to be used as input for other functions #(1) initial beta and Gamma, using only uncensored observations #### BT's estimator, as the initial beta value rq0 = rq(y~x-1, weights=W, tau=tau) beta0 = rq0$coeff summ0 = summary(rq0, se="nid", covariance=TRUE) Gamma0 = summ0$cov ##initial Gamma; could be (1/n)In # update beta and Gamma beta.old = as.vector(beta0) Gamma.old = Gamma0 i=0; eps=1 ###iteration step while(eps >= tol & i < maxiter) { beta.new = update.beta(x, y, tau, beta=beta.old, Gamma=Gamma.old, W=W) #update beta first eps1 = max(abs(beta.new - beta.old)) #sqrt(sum((beta.new - beta.old)^2)) #test convergence beta.old = beta.new S=Sigma.func(beta.old, Gamma.old, y, di, x, tau, W) #update Sigma Gamma.new = update.Gamma(x, y, tau, beta.old, Gamma.old, S, W=W) #update Gamma eps2 = max(abs(Gamma.new - Gamma.old)) #test convergence Gamma.old = Gamma.new eps = max(eps1, eps2) i = i+1 } S=Sigma.func(beta.new, Gamma.new, y, di, x, tau, W) Gamma.new = update.Gamma(x, y, tau, beta.new, Gamma.new, S, W=W) beta.new = update.beta(x, y, tau, beta=beta.new, Gamma=Gamma.new, W=W) #95% confidence interval se = sqrt(diag(Gamma.new)) #get estimated standard deviations lb = beta.new - qnorm(0.975)*se #get Wald-type 95% confidence interval ub = beta.new + qnorm(0.975)*se se0 = sqrt(diag(Gamma0)) Coef = cbind(beta.new, lb, ub, se) colnames(Coef) = c("beta.CSRQ", "lb", "ub", "std.CSRQ") out = list(Gamma0=Gamma0,Gamma.new=Gamma.new, Coef=Coef, iter=i) return(out) ##returns the BT covariance matrix, the IS covariance matrix, ##coefficient estimates, lower and upper bounds, standard error } ###****************IS main function ends*******************### ###****************KS main function begins*******************### KS.func= function(y, x, di, tau, h) { # kernel-smoothing variance estimation # y is a n*1 vector: observed time = min(ti, ci) # x are the design matrix/vector, not including the intercept # di: censoring indicator vector # first, all data should be orderd based on order(Y) n=dim(x)[1] p=dim(x)[2] ord=order(y) y=y[ord] di=di[ord] x=x[ord,] W=weight.func(y,di) #weight vector; to be used as input for other functions #(1) initial beta and Gamma, using only uncensored observations #### BT's estimator rq0 = rq(y~x-1, weights=W, tau=tau) beta0 = rq0$coeff summ0 = summary(rq0, se="nid", covariance=TRUE) Gamma0 = summ0$cov # update beta and Gamma beta.old = as.vector(beta0) Gamma.old = Gamma0 beta.new = update.beta.h(x, y, tau, beta=beta.old, h, W=W) beta.old = beta.new S=Sigma.func.h(beta.old, h, y, di, x, tau, W) Gamma.new = update.Gamma.h(x, y, tau, beta.old, Gamma.old, S, W=W, h) #95% confidence interval se = sqrt(diag(Gamma.new)) lb = beta.new - qnorm(0.975)*se ub = beta.new + qnorm(0.975)*se se0 = sqrt(diag(Gamma0)) Coef = cbind(beta.new, lb, ub, se) colnames(Coef) = c("beta.CSRQ", "lb", "ub", "std.CSRQ") out = list(Gamma0=Gamma0,Gamma.new=Gamma.new, Coef=Coef) return(out) } BT.func = function(y, x, di, tau) { # get BT-estimator # y is a n*1 vector: observed time = min(ti, ci) # x are the design matrix/vector, not including the intercept # di: censoring indicator vector # first, all data should be orderd based on order(Y) n=dim(x)[1] p=dim(x)[2] ord=order(y) y=y[ord] di=di[ord] x=x[ord,] W=weight.func(y,di) #weight vector; to be used as input for other functions #(1) initial beta and Gamma, using only uncensored observations #### BT's estimator rq0 = rq(y~x-1, weights=W, tau=tau) beta0 = rq0$coeff return(beta0) } ###**************BT-boot main function begins************### Boot.func = function(y, x, di, tau, nboot, seed) { #bootstrap method: BT-boot n = length(y) bhat = BT.func(y, x, di, tau) set.seed(seed) bhat.b = NULL for(b in 1:nboot) { ind = sample(1:n, n, replace=TRUE) y.b = y[ind] x.b = x[ind,] di.b = di[ind] bhat.b = rbind(bhat.b, BT.func(y.b, x.b, di.b, tau)) } ### bootstrap se se = apply(bhat.b, 2, sd) ### bootstrap 95% se lb = bhat-qnorm(0.975)*se ub = bhat+qnorm(0.975)*se lb2 = apply(bhat.b, 2, quantile, 0.025) ub2 = apply(bhat.b, 2, quantile, 0.975) Coef = cbind(bhat, lb, ub, se, lb2, ub2) Gamma = cov(bhat.b) colnames(Coef) = c("bhat", "lb", "ub", "se", "lb2", "ub2") out = list(Gamma=Gamma, Coef=Coef) return(out) } weight.func = function(y,di) { #weight.func() is an function to calculate di/G.hat(yi); #It takes It is called in all the main functions; ind=order(y) #order the data set by the value of y y=y[ind] di=di[ind] n=length(y) s = numeric(length(y)) l = survfit(Surv(y,1-di)~1) s[duplicated(y)==F] = l$surv index=seq(1,length(y),1) s[duplicated(y)==T]=s[index[duplicated(y)==T]-1] if (s[length(y)]==0) { s[length(y)]=1 } #assign any nonzero value to s[n] to avoid NaN for (j in 1:(n-1)) { if (s[j+1]==0) { s[j+1]=s[j] } } weight=di/s ### avoid zero denominator weight2 = rep(0,n) weight2[ind] = weight return(weight2) #weights returned are in the original order } update.beta = function(X, y, tau, beta, Gamma, W) { #this is an one step update for beta in the IS algorithm #returns an updated beta #W= weight = di/\hat G(yi), where di is the censoring indicator and G is the survival function beta.old = beta si = sqrt(pmax(0.0001,diag(X%*%Gamma%*%t(X)))) #score function Phi = pnorm((X%*%beta.old-y)/si) ### smoothed score function S = t(X*W)%*%(tau - Phi) #print(S) #the derivative of the score function wrt beta phi = as.vector(dnorm((X%*%beta.old-y)/si)/si) D = -t(W * phi * X) %*% X if(abs(det(D)) > 0.001) ############ stop the iteration if det(D) is too close to 0. { D.inv = solve(D) beta.new = beta.old - D.inv%*%S } else { beta.new = beta.old } return(beta.new) } ###*************Sigma starts here*******************### Sigma.func =function(beta, Gamma, y, di, x, tau, W) ###calculate the matrix Sigma for IS { Phi = 1*(x%*%beta>y) #pnorm((x%*%beta-y)/si) n=length(y) Index=matrix(1,n,n) Index[upper.tri(Index)]=0 # i=1*(y>=t) #I(Y>=s) dim n vector ys=seq(from=n,to=1) #this is the y(s)*n in the equation j=as.vector(W*(Phi-tau)) #I(Yj<=t(X)*beta)-tau dim n vector A.y=(t(x)%*%diag(j)%*%Index)%*%diag(1/ys) #p*n matrix multiply n*1 vector, divided by a scalar -> a,p*1 vector int.part1=A.y%*%diag(1-di) #p*n matrix multiply a n*n matrix. M=matrix(1,n,n) M[lower.tri(M)]=0 V=M*((1-di)/ys) int.part2=A.y%*%V int=int.part1 - int.part2 #integral calculation is done vec=as.vector((W*(Phi-tau))) #temporary variable p1=t(x)%*%diag(vec) #part1 of varphi's formula p2=int Varphi=p1+p2 #return a p*n matrix =(varphi_1,...,varphi_n) Sigma=Varphi%*%t(Varphi) #p*n multiply n*p, Sigma is a p*p matrix return(Sigma) } ###*************Sigma ends here*******************### update.Gamma = function(X, Y, tau, beta, Gamma, Sigma, W) { #this is an one step update for the Gamma (or H) matrix si = sqrt(pmax(0.0001,diag(X%*%Gamma%*%t(X)))) Phi = pnorm((X%*%beta-Y)/si) S = t(X)%*%(Phi-tau) #the derivative of the score function wrt beta phi = as.vector(dnorm((X%*%beta-Y)/si)/si) D = t(W * phi * X) %*% X invD=solve(D) Gamma.new=invD%*%Sigma%*%t(invD) return(Gamma.new) } update.beta.h = function(X, y, tau, beta, h, W) { # update beta for kernel-smoothed methods; # W= weight = di/\hat G(yi), where di is the censoring indicator and G is the survival function beta.old = beta si = h # score Phi = pnorm((X%*%beta.old-y)/si) # smoothed score function S = t(X*W)%*%(tau - Phi) # the derivative of the score function wrt beta phi = as.vector(dnorm((X%*%beta.old-y)/si)/si) D = -t(W * phi * X) %*% X if(abs(det(D)) > 0.001) ############ stop the iteration if det(D) is too close to 0. { D.inv = solve(D) beta.new = beta.old - D.inv%*%S } else { beta.new = beta.old } return(beta.new) } Sigma.func.h =function(beta, h, y, di, x, tau, W) ###calculate the matrix Sigma for kernel-smoothing ###similar to Sigma.func() { Phi = 1*(x%*%beta>y) n=length(y) Index=matrix(1,n,n) Index[upper.tri(Index)]=0 # i=1*(y>=t) #I(Y>=s) dim n vector ys=seq(from=n,to=1) #this is the y(s)*n in the equation j=as.vector(W*(Phi-tau)) #I(Yj<=t(X)*beta)-tau dim n vector A.y=(t(x)%*%diag(j)%*%Index)%*%diag(1/ys) #p*n matrix multiply n*1 vector, divided by a scalar -> a,p*1 vector int.part1=A.y%*%diag(1-di) #p*n matrix multiply a n*n matrix. M=matrix(1,n,n) M[lower.tri(M)]=0 V=M*((1-di)/ys) int.part2=A.y%*%V int=int.part1 - int.part2 #integral calculation is done vec=as.vector((W*(Phi-tau))) #temporary variable p1=t(x)%*%diag(vec) #part1 of varphi's formula p2=int Varphi=p1+p2 #return a p*n matrix =(varphi_1,...,varphi_n) Sigma=Varphi%*%t(Varphi) ##p*n multiply n*p, resulting in Sigma, a p*p matrix return(Sigma) #return the Sigma matrix } update.Gamma.h = function(X, Y, tau, beta, Gamma, Sigma, W, h) { # update the Gamma matrix for kernel-smoothing si = h Phi = pnorm((X%*%beta-Y)/si) S = t(X)%*%(tau - Phi) #the derivative of the score function wrt beta phi = as.vector(dnorm((X%*%beta-Y)/si)/si) D = -t(W * phi * X) %*% X invD=solve(D) Gamma.new=invD%*%Sigma%*%t(invD) return(Gamma.new) } dist.logi=function(x,mu,sigma) #the logistic distribution function { return(sigma*log(x/(1-x))+mu) } dist.ex=function(x,mu,sigma) #the extreme distribution function { return(sigma*log(-log(1-x))+mu) } #generate tables from simulation result #CSRQ is the array name holding simulation result, and beta is the true parameter #beta has 3 dimension processor=function(CSRQ,beta) { bias = apply((CSRQ[,1,]-beta),1,mean)*1000 mse = apply((CSRQ[,1,]-beta)^2,1,mean)*1000 sd = apply(CSRQ[,1,],1,sd) se = apply(CSRQ[,4,],1,mean) #mean of CSRQ sd estimate rat = se/sd sese = apply(CSRQ[,4,],1,sd) covp.0=mean((beta[1]> CSRQ[1,2,])*(beta[1]< CSRQ[1,3,])) covp.1=mean((beta[2]> CSRQ[2,2,])*(beta[2]< CSRQ[2,3,])) covp.2=mean((beta[3]> CSRQ[3,2,])*(beta[3]< CSRQ[3,3,])) covp=c(covp.0,covp.1,covp.2) data=as.vector(rbind(bias,mse,rat,covp)) return(data) #return a table with bias, mse, se/sd and covp }