#------------------------------------------------------------------------# # modified on : October 5, 2011 at 15:26 # created on : September 25, 2009 at 22:20 # author : Huixia Judy Wang # AFFILIATION : DEPARTMENT OF STATISTICS, NORTH CAROLINA STATE UNIVERSITY # EMAIL : WANG@STAT.NCSU.EDU # Functions for "Quantile regression for competing risks data with missing cause of failure" # by Sun, Y., Wang, H., and Gilbert, P. B. (2011) # R: 2.8.1. #------------------------------------------------------------------------# library(quantreg) library(survival) library(abind) Omni.func = function(X, Z, delta, Jdelta, tau, M=10^5) { # assume no missing cause # competing risk # for simulated data: log transformation, 2 covariates # cause of interest: 1 # minimize the equivalent L1 loss function (similar to Peng and Fine) # Inputs # X : observed time = T^C, a n*1 vector, on the original scale # Z : design matrix, the first column is all 1's corresponding to the intercept # delta : censoring indicator, 1 for uncensored data, 0 for censored observation # Jdelta: Jstar*delta, where Jstar=1 for cause 1, 2 for cause 2 # tau : quantile level of interest n=length(X) ## ##### estimate G(T): survival function of the censoring variable ## delta2 = 1-delta fit <- survfit(Surv(X, delta2)~1) idx = order(X) Ghat = rep(0,n) Ghat[idx] = summary(fit, times=X[idx])$surv # minimize the L1 objective function Vi = 1*(Jdelta==1)/(Ghat+0.000001) # augmented response and design matrix logX2 = c(log(X), M, M) Z = as.matrix(Z) Z2 = rbind(Z, -apply(Z*Vi,2,sum), apply(2*Z*tau,2,sum)) wi = c(Vi, 1, 1) bhat = rq(logX2~Z2-1, weights=wi, tau=1/2)$coef return(bhat) } logL.Pi.func = function(Ri, delta, W, phi) { # -loglikelihood function of phi (logistic regression) # used to estimate the parameter phi involved in the logistic regression for prop of nonmissingness of causes # W = design matrix logit.r = W%*%phi r = as.matrix(exp(logit.r)/(1+exp(logit.r))) logl = sum(Ri*(delta==1)*log(r)) + sum((1-Ri)*log(1-r)) return(-logl) } logL.rho.func = function(Delta, W, theta) { # -loglikelihood function of rho=P(J=1|delta,W) through logistic regression # W = design matrix # Delta=J*delta*Ri: 0 if censored or missing; 1 if cause1 failure nonmissing; 2 if cause 2 failure nonmissing logit.rho = W%*%theta r = as.matrix(exp(logit.rho)/(1+exp(logit.rho))) #logl = sum((delta==1 & Ri==1 & Ji==1)*log(r)) + sum((delta==1 & Ri==1 & Ji==2)*log(1-r)) logl = sum((Delta==1)*log(r)) + sum((Delta==2)*log(1-r)) return(-logl) } IPW.func = function(X, Z, A, delta, Delta, Ri, tau, M=10^5) { # Inverse probability weighting (simple version): only use unmissing data # for simulated data: log transformation, 2 covariates # cause of interest: 1 # minimize the equivalent L1 loss function (similar to Peng and Fine) # Inputs # X : observed time = T^C, a n*1 vector, on the original scale # Z : design matrix, the first column is all 1's corresponding to the intercept # A : deisgn matrix of auxillary covariate # delta : censoring indicator, 1 for uncensored data, 0 for censored observation # Ri : missing indicator, 1 for uncensored (failure) and observed cause or censored, 0 otherwise # tau : quantile level of interest # Delta = delta*J*Ri: 1 if cause 1, failure (uncensored) and nonmissing cause; 2 if cuase 2 failure nonmissing cause; # 0 otherwise (censored, or failure but missing cause) n=length(X) # estimate G(T): survival function of the censoring variable delta2 = 1-delta fit <- survfit(Surv(X, delta2)~1) idx = order(X) Ghat = rep(0,n) Ghat[idx] = summary(fit, times=X[idx])$surv # Estimate Pi(Qi, phi), where Qi=(delta, Z, A) W = cbind(Z, A, X) phi0 = c(1,1,1,0,0) phi = optim(phi0, logL.Pi.func, Ri=Ri, delta=delta, W=W)$par logit.r = W%*%phi # estimated gamma(Wi, phi)=gamma.wi gamma.wi = as.matrix(exp(logit.r)/(1+exp(logit.r))) Pi.Q = gamma.wi*delta + (1-delta) # minimize the L1 objective function Vi = as.vector(1*(Delta==1)/pmax(Ghat*Pi.Q,0.000001)) #theta_{1,i} # augemented response vector and design matrix logX2 = c(log(X), M, M) Z = as.matrix(Z) Z2 = rbind(Z, -apply(Z*Vi,2,sum), apply(2*Z*tau,2,sum)) wi = c(Vi, 1, 1) bhat = rq(logX2~Z2-1, weights=wi, tau=1/2)$coef return(bhat) } ###################################################### ### augmented IPW estimator (DEFAULT: SANN) # ### minimize equivalent L1 loss function by optim() # ###################################################### AIPW.func = function(X, Z, A, delta, Delta, Ri, tau, b0, method="SANN", M=10^5) { # Augmented Inverse probability weighting # for simulated data: log transformation, 2 covariates # cause of interest: 1 #### Use the R optim function directly to minimize |score|: option "SANN" # Inputs # X : observed time = T^C, a n*1 vector, on the original scale # Z : design matrix, the first column is all 1's corresponding to the intercept # A : deisgn matrix of auxillary covariate # delta : censoring indicator, 1 for uncensored data, 0 for censored observation # Ri : missing indicator, 1 for uncensored (failure) and observed cause or censored, 0 otherwise # tau : quantile level of interest # Delta = delta*J*Ri: 1 if cause 1, failure (uncensored) and nonmissing cause; 2 if cuase 2 failure nonmissing cause; # 0 otherwise (censored, or failure but missing cause) n=length(X) # estimate G(T): survival function of the censoring variable delta2 = 1-delta fit <- survfit(Surv(X, delta2)~1) idx = order(X) Ghat = rep(0,n) Ghat[idx] = summary(fit, times=X[idx])$surv # Estimate Pi(Qi, phi), where Qi=(delta, Z, A) W = cbind(Z, A, X) phi0 = c(1,1,1,0,1) phi = optim(phi0, logL.Pi.func, Ri=Ri, delta=delta, W=W)$par logit.r = W%*%phi # estimated gamma(Wi, phi)=gamma.wi gamma.wi = as.matrix(exp(logit.r)/(1+exp(logit.r))) Pi.Q = gamma.wi*delta + (1-delta) # Estimate rho(Wi) W = cbind(Z, A) theta0 = c(1,0,0,0) theta = optim(theta0, logL.rho.func, Delta=Delta, W=W)$par logit.r = W%*%theta rho = as.matrix(exp(logit.r)/(1+exp(logit.r))) Vi = as.vector( 1*(Delta==1)/pmax(Ghat*Pi.Q,0.000001) + (1-Ri/Pi.Q)*delta*rho/pmax(Ghat,0.000001)) X2 = c(log(X), M, M) # augmented response variable wi = c(Vi, 1, 1) # weight, could be negative Z = as.matrix(Z) Z2 = rbind(Z, -apply(Z*Vi,2,sum), apply(2*Z*tau,2,sum)) # augmented design matrix # minimize sum_i wi*|X2 - Z2i*beta| by optim bhat=optim.wL1(b0, X2, Z2, wi, method=method) return(bhat) } optim.wL1 = function(b0, y, x, w, method = c("Nelder-Mead", "BFGS", "CG", "L-BFGS-B", "SANN")) { # use optim to minimize sum_i w_i|y_i-x_i^T\beta| for beta # b0: initial value p = ncol(x) n = length(y) obj = function(beta, y, x, w) { sum(w*abs(y-x%*%beta)) } bhat=optim(b0, obj, y=y,x=x,w=w, method=method)$par return(bhat) } #========================================================# # variance of IPW estimator # #========================================================# var.IPW.func <- function(X, Z, A, delta, Delta, Ri, tau, M=10^5) { # Inverse probability weighting (simple version): only use unmissing data # for simulated data: log transformation, 2 covariates # cause of interest: 1 # minimize the equivalent L1 loss function (similar to Peng and Fine) # Inputs # X : observed time = T^C, a n*1 vector, on the original scale # Z : design matrix, the first column is all 1's corresponding to the intercept # A : deisgn matrix of auxillary covariate # delta : censoring indicator, 1 for uncensored data, 0 for censored observation # Ri : missing indicator, 1 for uncensored (failure) and observed cause or censored, 0 otherwise # tau : quantile level of interest # Delta = delta*J*Ri: 1 if cause 1, failure (uncensored) and nonmissing cause; 2 if cuase 2 failure nonmissing cause; # 0 otherwise (censored, or failure but missing cause) ###(1) estimate bhat n=length(X) # M=10^5 # estimate G(T): survival function of the censoring variable delta2 = 1-delta fit <- survfit(Surv(X, delta2)~1) idx = order(X) Ghat = rep(0,n) Ghat[idx] = summary(fit, times=X[idx])$surv # Estimate Pi(Qi, phi), where Qi=(delta, Z, A) W = cbind(Z, A, X) phi0 = c(1,1,1,0,1) phi = optim(phi0, logL.Pi.func, Ri=Ri, delta=delta, W=W)$par logit.r = W%*%phi # estimated gamma(Wi, phi)=gamma.wi gamma.wi = as.matrix(exp(logit.r)/(1+exp(logit.r))) Pi.Q = gamma.wi*delta + (1-delta) # derivative of Pi wrt phi Pi.prime = W * as.vector(delta * gamma.wi*(1-gamma.wi)) # Fisher Information matrix tmp = as.vector(gamma.wi*(1-gamma.wi)*(Ri*delta+1-Ri)) W2 = W*tmp Imat = t(W2)%*%W/n # minimize the L1 objective function Vi = as.vector(1*(Delta==1)/pmax(Ghat*Pi.Q,0.000001)) #theta_{1,i} # augemented response vector and design matrix logX2 = c(log(X), M, M) Z = as.matrix(Z) Z2 = rbind(Z, -apply(Z*Vi,2,sum), apply(2*Z*tau,2,sum)) wi = c(Vi, 1, 1) bhat = rq(logX2~Z2-1, weights=wi, tau=1/2)$coef ###### (2) Estimate Gamma ## part1 of Gamma res = log(X)-Z%*%bhat Gamma1 = Z* as.vector(Vi*(res<=0)-tau) Gamma1 = t(Gamma1)%*%Gamma1/n ## part2 of Gamma # sort X and other quantities correspondingly idx = order(X) X.sort = X[idx] res.sort = res[idx] Z.sort = Z[idx,] Vi.sort = Vi[idx] delta.sort = delta[idx] ZItheta.sort = Z.sort * Vi.sort * (res.sort<=0) num = apply(ZItheta.sort, 2, function(x) c(sum(x), sum(x)-cumsum(x))[1:length(x)]) denom = n:1 tmp = num/denom Gamma2 = tmp*(1-delta.sort) Gamma2 = -t(Gamma2)%*%Gamma2/n ## part3 of Gamma tmp = Pi.prime * as.vector(1/Pi.Q * Vi * (res<0)) w2hat = -t(Z)%*%tmp/n Gamma3 = -w2hat %*% solve(Imat) %*% t(w2hat) GAMMA = Gamma1 + Gamma2 + Gamma3 En = eigen(GAMMA) En = En$vectors %*% diag(sqrt(pmax(En$values,0.00001))) %*% solve(En$vectors) # Step2: solve for b statisfying S1-En=0 # equivalent to minimize a L1 loss function logX3 = c(log(X), M, M, M) wi2 = c(Vi, 1, 1, 1) ee = apply(En, 2, function(x) { Z3 = rbind(Z2, 2*sqrt(n)*x) rq(logX3~Z3-1, weights=wi2, tau=1/2)$coef }) Dn = ee-bhat # Step 3 En2 = solve(En) #Cov = Dn %*% En2 %*% GAMMA %*% En2 %*% Dn Cov = Dn%*%t(Dn) return(list(Cov=Cov, bhat=bhat)) } ### for simulated data design 1, using the true A(beta_0) matrix f1.func = function(t, z, gamma, p0, p1) { prob = p0*(z[2]==0)+p1*(z[2]==1) mu = t(z)%*%gamma dlnorm(t, mu, 1)*prob } Abeta.func = function(Z, b0, gamma=c(0.5,-0.5), p0=0.8, p1=0.6) { n= nrow(Z) tt = cbind(exp(Z%*%b0), Z[,-1]) tmp = apply(tt, 1, function(x) f1.func(x[1], x[-1], gamma, p0, p1)) 1/n * t(Z) %*% (Z * as.vector(tmp)) } #========================================================# # variance of Peng-Pine estimator # # (no missing causes) # #========================================================# var.Omni.func <- function(X, Z, delta, Jstar, tau,M=10^5) { # Inputs # X : observed time = T^C, a n*1 vector, on the original scale # Z : design matrix, the first column is all 1's corresponding to the intercept # delta : censoring indicator, 1 for uncensored data, 0 for censored observation # Jstar : cause 1 or 2 # tau : quantile level of interest ###(1) estimate bhat n=length(X) #M=10^5 Jdelta = Jstar*delta ## ##### estimate G(T): survival function of the censoring variable ## delta2 = 1-delta fit <- survfit(Surv(X, delta2)~1) idx = order(X) Ghat = rep(0,n) Ghat[idx] = summary(fit, times=X[idx])$surv # minimize the L1 objective function Vi = 1*(Jdelta==1)/(Ghat+0.000001) # augmented response and design matrix logX2 = c(log(X), M, M) Z = as.matrix(Z) Z2 = rbind(Z, -apply(Z*Vi,2,sum), apply(2*Z*tau,2,sum)) wi = c(Vi, 1, 1) bhat = rq(logX2~Z2-1, weights=wi, tau=1/2)$coef ###### (2) Estimate Gamma ## part1 of Gamma res = log(X)-Z%*%bhat Gamma1 = Z* as.vector(Vi*(res<=0)-tau) Gamma1 = t(Gamma1)%*%Gamma1/n ## part2 of Gamma # sort X and other quantities correspondingly idx = order(X) X.sort = X[idx] res.sort = res[idx] Z.sort = Z[idx,] Vi.sort = Vi[idx] Jdelta.sort = Jdelta[idx] ZItheta.sort = Z.sort * Vi.sort * (res.sort<=0) num = apply(ZItheta.sort, 2, function(x) c(sum(x), sum(x)-cumsum(x))[1:length(x)]) denom = n:1 tmp = num/denom Gamma2 = tmp*(Jdelta.sort==0) Gamma2 = -t(Gamma2)%*%Gamma2/n GAMMA = Gamma1 + Gamma2 En = eigen(GAMMA) En = En$vectors %*% diag(sqrt(pmax(En$values,0.00001))) %*% solve(En$vectors) # Step2: solve for b statisfying S1-En=0 # equivalent to minimize a L1 loss function logX3 = c(log(X), M, M, M) wi2 = c(Vi, 1, 1, 1) ee = apply(En, 2, function(x) { Z3 = rbind(Z2, 2*sqrt(n)*x) rq(logX3~Z3-1, weights=wi2, tau=1/2)$coef }) Dn = ee-bhat # Step 3 En2 = solve(En) Cov = Dn%*%t(Dn) return(list(Cov=Cov, bhat=bhat)) } #========================================================# # variance of AIPW estimator # #========================================================# var.AIPW.func <- function(X, Z, A, delta, Delta, Ri, tau, b0, method="SANN", M=10^5) { # Augmented Inverse probability weighting # for simulated data: log transformation, 2 covariates # cause of interest: 1 #### Use the R optim function directly to minimize |score|: option "SANN" # Inputs # X : observed time = T^C, a n*1 vector, on the original scale # Z : design matrix, the first column is all 1's corresponding to the intercept # A : deisgn matrix of auxillary covariate # delta : censoring indicator, 1 for uncensored data, 0 for censored observation # Ri : missing indicator, 1 for uncensored (failure) and observed cause or censored, 0 otherwise # tau : quantile level of interest # Delta = delta*J*Ri: 1 if cause 1, failure (uncensored) and nonmissing cause; 2 if cuase 2 failure nonmissing cause; # 0 otherwise (censored, or failure but missing cause) ###(1) estimate bhat n=length(X) #M=10^5 # estimate G(T): survival function of the censoring variable delta2 = 1-delta fit <- survfit(Surv(X, delta2)~1) idx = order(X) Ghat = rep(0,n) Ghat[idx] = summary(fit, times=X[idx])$surv # Estimate Pi(Qi, phi), where Qi=(delta, Z, A) W = cbind(Z, A, X) phi0 = c(1,1,1,0,1) phi = optim(phi0, logL.Pi.func, Ri=Ri, delta=delta, W=W)$par logit.r = W%*%phi # estimated gamma(Wi, phi)=gamma.wi gamma.wi = as.matrix(exp(logit.r)/(1+exp(logit.r))) Pi.Q = gamma.wi*delta + (1-delta) # derivative of Pi wrt phi Pi.prime = W * as.vector(delta * gamma.wi*(1-gamma.wi)) # Estimate rho(Wi) W = cbind(Z, A) theta0 = c(1,0,0,0) theta = optim(theta0, logL.rho.func, Delta=Delta, W=W)$par logit.r = W%*%theta rho = as.matrix(exp(logit.r)/(1+exp(logit.r))) Vi = as.vector( 1*(Delta==1)/pmax(Ghat*Pi.Q,0.000001) + (1-Ri/Pi.Q)*delta*rho/pmax(Ghat,0.000001)) #M=10^10 X2 = c(log(X), M, M) # augmented response variable wi = c(Vi, 1, 1) # weight, could be negative Z = as.matrix(Z) Z2 = rbind(Z, -apply(Z*Vi,2,sum), apply(2*Z*tau,2,sum)) # augmented design matrix # minimize sum_i wi*|X2 - Z2i*beta| by optim bhat=optim.wL1(b0, X2, Z2, wi, method=method) ###### (2) Estimate Gamma ## part1 of Gamma res = log(X)-Z%*%bhat Gamma1 = Z* as.vector(Vi*(res<=0)-tau) Gamma1 = t(Gamma1)%*%Gamma1/n ## part2 of Gamma # sort X and other quantities correspondingly idx = order(X) X.sort = X[idx] res.sort = res[idx] Z.sort = Z[idx,] Vi.sort = Vi[idx] delta.sort = delta[idx] ZItheta.sort = Z.sort * Vi.sort * (res.sort<=0) num = apply(ZItheta.sort, 2, function(x) c(sum(x), sum(x)-cumsum(x))[1:length(x)]) denom = n:1 tmp = num/denom Gamma2 = tmp*(1-delta.sort) Gamma2 = -t(Gamma2)%*%Gamma2/n GAMMA = Gamma1 + Gamma2 # Step1 En = eigen(GAMMA) En = En$vectors %*% diag(sqrt(pmax(0.00001,En$values))) %*% solve(En$vectors) #t(En)%*%En # Step2: solve for b statisfying S1-En=0 # equivalent to minimize a L1 loss function logX3 = c(log(X), M, M, M) wi2 = c(Vi, 1, 1, 1) ee = apply(En, 2, function(x) { Z3 = rbind(Z2, 2*sqrt(n)*x) # minimize sum_i wi*|X2 - Z2i*beta| by optim optim.wL1(b0, logX3, Z3, wi2, method=method) }) Dn = ee-bhat # Step 3 En2 = solve(En) #Cov = Dn %*% En2 %*% GAMMA %*% En2 %*% Dn Cov = Dn%*%t(Dn) return(list(Cov=Cov, bhat=bhat)) }