# CREATED ON : Wednesday, May 23, 2007 # MODIFIED ON : Monday, August 31, 2009 07:58 # AUTHOR : HUIXIA JUDY WANG # R version : R 2.3.1, quantreg V4.01, SparseM V0.71 # AFFILIATION : DEPARTMENT OF STATISTICS, NORTH CAROLINA STATE UNIVERSITY # EMAIL : WANG@STAT.NCSU.EDU # FUNCTIONS : QUANTILE RANK SCORE TEST FOR LEFT FIXED CENSORED DATA WITH A RANDOM INTERCEPT EFFECT # main functions: cQRS.func and cQRS.ci.func # A simulated data is used to illustrate the method at the end of this file. library(quantreg) Power.c <- function(y, x, yc, tau) { # approximation of Powell's estimates, using only the observations predicted to be uncensored to estimate parameter untill the convergence # This modification is used to by pass the cases with different yc at diffrent observations, in which rq.fit.fcen is unstable # y: the observed response variable (censored) # x: design vector, the first column of x are all 1's (corresponding to the intercept) # tau: quantile level of interest # yc: the censoring variable maxerr=1e-10; maxiter=500 n = length(y) if(length(yc)==1) yc=rep(yc,n) if(length(yc)!=n) warning("the dimension of yc is incorrect") ind = which(y>yc) x = as.matrix(x) rq0 = rq(y[ind]~x[ind,]-1, tau=tau) alpha0 = as.vector(rq0$coeff) err=1 alphas=alpha.new=alpha0 iter=1 while(err>maxerr & iter<=maxiter) { alpha0 = alpha.new yhat = x%*%alpha0 ind = which(yhat > yc) alpha.new = rq(y[ind]~x[ind,]-1,tau=tau)$coeff err = sqrt(sum((alpha.new-alpha0)^2)) alphas = rbind(alphas, alpha.new) iter=iter+1 } return(alpha.new) } delta.cQRS.func <- function(Indic, psi, subject, p, tau) { # Indic: index of residuals predicted to be unceosred at the quantile level tau, i.e. I(x*a.hat>yc) # psi = tau-I(uhat<0): the score function # p: dimension of covariates an = psi + (1-tau) ind = which(Indic==1) subject2 = subject[ind,] an2 = an[ind] K <- apply(subject2, 2, sum) # the number of repeats for each subject delta <- (t(1-an2)%*%subject2%*%t(subject2)%*%(1-an2) - crossprod(1-an2))/(sum(apply(as.matrix(K),1,function(x)x*(x-1)))-p) return(delta) } cQRS.func = function(y, x, z, subject, tau, yc=0, approx=FALSE) { # cQRS.func: Rank Score test for Censored Quantile regression (dependent data) # y: the observed response variable (censored) # x: covariates under the null model # z: the covariate whose effect is to be tested # tau: quantile level of interest # yc: the censoring variable # some remarks: # delta is estimated by using residuals obtained under H0 # when delta is not known # when yc!=0, rq and rq.fit.fcen is not stable # when yc!=0 and is a constant, then can force yc=0 by let y2=y-yc, this only affect the intercept estimation. # when yc!=0 and is not a constant, then need use Powell.c if(is.factor(subject)==TRUE) subject <- model.matrix(~-1+subject) if(is.vector(subject)==TRUE) subject <- model.matrix(~-1+factor(subject)) n = length(y) q <- qr(z)$rank x = cbind(1,x) p <- qr(x)$rank uyc = unique(yc) if(length(uyc)==1 & approx==FALSE) { y2 = y -yc rq0 = rq(y2~x-1, tau=tau, method="fcen", yc=rep(0,n)) cCoeff0 = as.vector(rq0$coeff) cCoeff0[1] = cCoeff0[1]+yc rq1 = rq(y2~z+x-1, tau=tau, method="fcen", yc=rep(0,n)) cCoeff1 = as.vector(rq1$coeff) cCoeff1[2] = cCoeff1[2]+yc } if(length(uyc)>1 | approx==TRUE) { cCoeff0 = Power.c(y=y, x=x, yc=yc, tau) cCoeff1 = Power.c(y=y, x=cbind(z, x), yc=yc, tau) } if(length(yc)!=n) yc = rep(yc, n) # under H0 yfit0 = x%*%cCoeff0 uhat0 = y - pmax(yfit0, yc) Indic = as.vector(1*(yfit0 > yc)) xx = Indic*x H = xx%*%solve(t(xx)%*%xx)%*%t(xx) indic.zstar = Indic*(z-H%*%z) psi0 <- tau - 1*(uhat0 <=0) cS <- 1/sqrt(n)*t(psi0)%*%indic.zstar cP1 <- crossprod(indic.zstar) cP2 <- t(indic.zstar)%*%subject%*%t(subject)%*%indic.zstar - cP1 # assuming independence cVi = tau*(1-tau)*1/n* cP1 cTi = cS%*%solve(cVi)%*%t(cS) cQRSi = 1-pchisq(cTi,q) # using the estimated delta based on uhat0 (obtained under H0) cdelta <- delta.cQRS.func(Indic, psi0, subject, p, tau) cV <- cVi + 1/n*cP2 * as.numeric(-tau^2+cdelta) cT <- cS%*%solve(cV)%*%t(cS) cQRS <- 1-pchisq(cT, q) out = list(cQRSi=cQRSi, cQRS=cQRS, cdelta = cdelta, cS=cS, cVi=cVi, cV=cV, cTi=cTi, cT=cT, a=cCoeff1[3], b=cCoeff1[1]) return(out) } cQRS.ci.func <- function (y, xx, subject, tau = 0.5, level=0.05, yc, maxiter=100) { # Construct the confidence intervals of cofficients except the intercept, by inversion of the rank score test # Assume common yc for different observations # xx: the design vector, does not include the column of 1's # the estimated coefficients if(is.factor(subject)==TRUE) subject <- model.matrix(~-1+subject) if(is.vector(subject)==TRUE) subject <- model.matrix(~-1+factor(subject)) X = cbind(1, xx) n = nrow(X) p <- ncol(X) cutoff <- qnorm(1 - level/2) y2 = y-yc #inital rough confidence intervals rq1=rq(y2~xx, method="fcen", tau=tau) coeff=summary(rq1)$coeff coefficients = matrix(0, p, 3) coefficients[,1] = coeff[,1] coefficients[,2] = coeff[,1] - 2*coeff[,2] coefficients[,3] = coeff[,1] + 2*coeff[,2] vnames = rownames(coeff) cnames <- c("coefficients", "lower bd", "upper bd") dimnames(coefficients) <- list(vnames, cnames) # Fine tuning the CIs of coefficients except the intercept for (j in 2:p) { x = X[, -c(1,j)] z = X[, j] # lower bound incre = (coefficients[p,3]-coefficients[p,2])/100 lb0 = coefficients[j,2] ub0 = coefficients[j,3] b = coefficients[j,1] tmp1 = cQRS.func(y=y, x=x, z=z, subject=subject, tau=tau, yc=yc) pval=tmp1$cQRS if(pval0) { lb0 = max(0.001, lb0) ub0 = max(0.002, ub0) } if(pval=level) { lb0 = min(-0.001, lb0) ub0 = max(0.001, ub0) } coefficients[j,2] = ci.tuning.lb.cQRS.func(x, z, y, subject, tau, cutoff, b0=lb0, incre, yc, maxiter) coefficients[j,3] = ci.tuning.ub.cQRS.func(x, z, y, subject, tau, cutoff, b0=ub0, incre, yc, maxiter) } return(coefficients) } ci.tuning.lb.cQRS.func <- function(x, z, y, subject, tau, cutoff, b0, incre, yc, maxiter=100) { new.b = old.b = b0 tmp = as.data.frame(cQRS.func(y=y-z*b0, x, z, subject, tau, yc=yc-z*b0)) (b = tmp$b) Sign1 = sign(tmp$b) new.t = sqrt(tmp$cT) while(b<0 | new.t < cutoff) { b0 = b0-incre*10 tmp = as.data.frame(cQRS.func(y=y-z*b0, x, z, subject, tau, yc=yc-z*b0)) b = tmp$b Sign1 = sign(tmp$b) new.t = sqrt(tmp$cT) } new.b = old.b = b0 new.t = old.t = sqrt(tmp$cT) eps = old.t - cutoff Sign2 = sign(eps) ts = new.t i=1 while(b>0 & sign(eps)==Sign2 & i< maxiter) { old.b = new.b old.t = new.t new.b = old.b + incre new.yc = yc-z*new.b new.y = y-z*new.b tmp = as.data.frame(cQRS.func(y=new.y, x, z, subject, tau, yc=new.yc)) b = tmp$b new.t = sqrt(tmp$cT) eps = new.t - cutoff + 0.01 i=i+1 ts = c(ts, new.t) } out = new.b return(out) } ci.tuning.ub.cQRS.func <- function(x, z, y, subject, tau, cutoff, b0, incre, yc, maxiter=100) { new.b = old.b = b0 tmp = as.data.frame(cQRS.func(y=y-z*b0, x, z, subject, tau, yc=yc-z*b0)) b = tmp$b Sign1 = sign(tmp$b) new.t = sqrt(tmp$cT) while(Sign1==1 | new.t < cutoff) { b0 = b0+incre*10 tmp = as.data.frame(cQRS.func(y=y-z*b0, x, z, subject, tau, yc=yc-z*b0)) b = tmp$b Sign1 = sign(tmp$b) new.t = sqrt(tmp$cT) } new.b = old.b = b0 new.t = old.t = sqrt(tmp$cT) eps = old.t - cutoff Sign2 = sign(eps) ts = new.t i=1 while(b<0 & sign(eps)==Sign2 & i< maxiter) { old.b = new.b old.t = new.t new.b = old.b - incre new.yc = yc-z*new.b new.y = y-z*new.b tmp = as.data.frame(cQRS.func(y=new.y, x, z, subject, tau, yc=new.yc)) b = tmp$b new.t = sqrt(tmp$cT) eps = new.t - cutoff + 0.01 i=i+1 ts = c(ts, new.t) } out = new.b return(out) } QRS = function(x, z, y, subject, tau=0.5) { # Quantile Rank Score Test for noncensored data with a randon effect (this function is included for comparative purpose) # Arguments: # x: the covariate for the nuisance parameters, and x does not contain the intercept # z: the covariate for the parameter(s) of interest # y: the response variable # subject: indicates the ID or number of arrays/subjects # tau: the quantile level # Value # QRSi : p-value of the quantile rank score test which ignores the dependence entirely, such that delta = tau^2 # QRS0 : p-value of the quantile rank score test, where delta is estimated by using the residuals obtained under H0 # QRS1 : p-value of the quantile rank score test, where delta is estimated by using the residuals obtained under H1 # sn : the quantile rank score # Qni, Qn0, Qn1: the variance-covariance of sn for methods QRSi, QRS0 and QRS1, respectively # Tni, Tn0, Tn1: the test statistic values for methods QRSi, QRS0 and QRS1, respectively # beta: the quantile estimate for z if(is.factor(x)==TRUE) x=model.matrix(~-1+x)[,-1] if(is.factor(z)==TRUE) z=model.matrix(~-1+z)[,-1] if(is.factor(subject)==TRUE) subject = model.matrix(~-1+subject) if(is.vector(subject)==TRUE) subject = model.matrix(~-1+factor(subject)) q = qr(z)$rank x = cbind(1,x) p = qr(x)$rank n = length(y) if(p==1) dsol0 = dsolu.br(x, y, tau = tau, interp = FALSE) if(p>1) dsol0 = dsolu.br(x, y, tau = tau, interp = TRUE) an0 = dsol0$dsol bn0 = an0- (1-tau) if(p==1) zstar = as.matrix(qr.resid(qr(x[,-1]), z)) if(p>1) zstar = as.matrix(qr.resid(qr(x), z)) sn = 1/sqrt(n)*crossprod(zstar,bn0) P1 = crossprod(zstar) P2 = t(zstar)%*%subject%*%t(subject)%*%zstar - P1 Qni = 1/n*P1*tau*(1-tau) Tni = t(sn)%*%solve(Qni)%*%sn QRSi = 1-pchisq(Tni,q) if(p==1) dsol1 = dsolu.br(cbind(x,z), y, tau = tau, interp = FALSE) if(p>1) dsol1 = dsolu.br(cbind(x,z), y, tau = tau, interp = TRUE) an1 = dsol1$dsol beta = dsol1$coeff[-(1:p)] bn1 = an1 - (1-tau) delta0 = delta.est(an0, subject,p,tau) delta1 = delta.est(an1, subject,p,tau) Qn0 = Qni + 1/n*P2 * as.numeric(-tau^2+delta0) Tn0 = t(sn)%*%solve(Qn0)%*%sn QRS0 = 1-pchisq(Tn0, q) Qn1 = Qni + 1/n*P2 * as.numeric(-tau^2+delta1) Tn1 = t(sn)%*%solve(Qn1)%*%sn QRS1 = 1-pchisq(Tn1, q) out = list( delta0 = delta0, delta1 = delta1, QRSi = QRSi, QRS0 = QRS0, QRS1 = QRS1, beta=beta, Tni = Tni, Tn0 = Tn0, Tn1=Tn1, sn = sn, Qni = Qni, Qn0 = Qn0, Qn1=Qn1, P1=P1, P2=P2,n=n) return(out) } delta.est = function(an, subject, p, tau) { # subject is a matrix of n by nsubj K = apply(subject, 2, sum) # the number of repeats within each subject delta = (t(1-an)%*%subject%*%t(subject)%*%(1-an) - crossprod(1-an))/(sum(apply(as.matrix(K),1,function(x)x*(x-1)))-p) delta = min(max(tau^2, delta), tau) return(delta) } dsolu.br = function (x, y, tau = 0.5, interp = TRUE) { # obtain the dual solution at a single quantile level # the first column of x contains all 1's # if interp=FALSE, then do not include the intercept # this function was modified based on the function "rq.fit.br" in quantreg package tol = .Machine$double.eps^(2/3) eps = tol big = .Machine$double.xmax x = as.matrix(x) if(interp==FALSE) x = x[,-1] x = as.matrix(x) p = ncol(x) n = nrow(x) nsol = 2 ndsol = 2 if (tau < 0 || tau > 1) warning("Stop. tau can only be (0,1)") if(tau>0 & tau<1) { lci1 = FALSE qn = rep(0, p) cutoff = 0 } Z = .Fortran("rqbr", as.integer(n), as.integer(p), as.integer(n + 5), as.integer(p + 3), as.integer(p + 4), as.double(x), as.double(y), as.double(tau), as.double(tol), flag = as.integer(1), coef = double(p), resid = double(n), integer(n), double((n + 5) * (p + 4)), double(n), as.integer(nsol), as.integer(ndsol), sol = double((p + 3) * nsol), dsol = double(n * ndsol), lsol = as.integer(0), h = integer(p * nsol), qn = as.double(qn), cutoff = as.double(cutoff), ci = double(4 * p), tnmat = double(4 * p), as.double(big), as.logical(lci1), PACKAGE = "quantreg") dsol = Z$dsol[1:n] return(list(dsol=dsol, coeff=Z$coef)) } ####### a simulated data set set.seed(1234567) tau=0.5 I=50; J=8 n = I*J subject = rep(1:I, each=J) z = rep(c(0,1),each=n/2) x = rnorm(n,0,1) ai = rep(rnorm(I, 0, 1), each=J) eij = rnorm(n, 0, 1) u = (ai + eij - qnorm(tau)*sqrt(1 + 1)) ystar = 1 + x + 1*z + u yc = 0.2 mean(ystar<=yc) y <- pmax(yc, ystar) ##### test the effect of z # the Ominiscient method using ystar omni = as.data.frame(QRS(x, z, ystar, subject, tau=tau)) # naive method ignoring the censoring naive = as.data.frame(QRS(x, z, y, subject, tau=tau)) # method proposed in Wang and Fygenson WF = as.data.frame(cQRS.func(y=y, x=x, z=z, subject=subject, tau=tau, yc)) # results from rq assuming independent data, suppose ystar is known omni.iid = summary(rq(ystar~x+z, tau=tau), se="nid") ##### obtain the estimation and 95\% confidence intervals WF.ci = cQRS.ci.func(y, cbind(x,z), subject, tau = tau, level=0.05, yc, maxiter=50)