##################################################################### library(quantreg) Wald.Real = function(y, x, subject, h, tau, alpha=0.05) { # works also for unbalanced design n = ncol(subject) zstar = as.matrix(x) # estimation under full model rq1 = rq(y~x-1, tau=tau) beta = rq1$coeff ehat = rq1$resid ehat = ehat - quantile(ehat, tau) psi = tau - 1*(ehat<0) B = diag(Ku(ehat/h)) p = ncol(x) nt = max(apply(subject, 2, sum)) #Lambda: meat part of the sandwich formula # S: bread part of the sandwich formula psi2 = rep(0, n*nt) zstar2 = matrix(0, nrow=n*nt, ncol=p) B2 = matrix(0, ncol=(n*nt), nrow=n*nt) for(i in 1:n) { ind = which(subject[,i]==1) ind2 = ((i-1)*nt+1): (i*nt) psi2[ind2[1:length(ind)]] = psi[ind] zstar2[ind2[1:length(ind)],] = zstar[ind,] diag(B2)[ind2[1:length(ind)]] = diag(B)[ind] } S = 1/(n*h)*t(zstar2)%*%B2%*%zstar2 Obj = cbind(zstar2, psi2) Obj = array(Obj, dim=c(nt, n, p+1)) # the first [,,1:p] are zstar.m, and the last one is psi.m Lambda = 1/n*matrix(apply( apply(Obj, 2, function(xx) { z1 = xx[,1:p] ppsi = xx[,p+1] out = t(z1)%*%ppsi%*%t(ppsi)%*%z1 return(out) }), 1, sum), ncol=p) # the bread part of the sandwitch formula inv.S = solve(S) COV = 1/n*(inv.S%*%Lambda%*%inv.S) se = sqrt(diag(COV)) t.wald = beta/se pval.wald = 2*(1-pnorm(abs(t.wald))) lb = beta - qnorm(1-alpha/2)* se ub = beta + qnorm(1-alpha/2)* se stat = cbind(beta, se, t.wald, pval.wald, lb, ub) return(list(stat=stat, COV=COV)) } ################################################### Main functions ################# Ku = function(u) { # kernel 1*(abs(u)<=sqrt(5))*3/(4*sqrt(5))*(1-u^2/5) } Gu = function(u) { # CDF (based on kernel Ku) u = pmin(u, sqrt(5)) 3/(4*sqrt(5))*(u - u^3/15 + 2*sqrt(5)/3)*(abs(u)<=sqrt(5)) } psi.h = function(u, hn, tau) { # the smoothed quantile score function # unsmoothed when hn=0, out = I(u<0) - tau if(hn==0) out = 1*(u<0) - tau if(hn>0) out = 1-Gu(u/hn) - tau return(out) } rho.h = function(u, hn, tau) { # smoothed check function -u*(psi.h(u,hn,tau)) } Lag.Wu = function(z, maxit=50, itertrace=0) { # using the modified Newton-Raphson's method of Wu to # solve for Lagrange multiplier lam such that g(lam) = sum_{i=1}^n z_i(beta) /[1 + lambda^T z_i(beta) ] = 0 # INPUTS # z : a n*p matrix, score function, depends on beta # itertrace : =1/0 track or don't track the iteration process # maxit : maximum iteration # # OUTPUTS # loglr : =-2logEL ratio = -2 \sum_{i=1}^n log(n \hat p_i) (So want to minimize loglr) # maxit=50; itertrace=1 z = as.matrix(z) n = nrow(z) p = ncol(z) if(p==1){ L<--1/max(z) R<--1/min(z) gsize <-1 tol <-1e-8 k = 0 if(itertrace) cat("lam, logl, loglr, gsize, k", "\n") while(gsize>tol & k<=maxit){ lam = (L+R)/2 glam = sum(z/(1+lam*z)) # decreases with lam if(glam>0) L = lam if(glam<0) R = lam # glam<0 so need increase glam, decrease lam, so reduce the right end of lam: R gsize = abs(glam) k = k + 1 phat <- 1/as.vector((1+z*lam)*n) logl = sum(log(phat)) loglr = -2 * sum(log(n*phat)) glam = apply(z/as.vector((1+z*lam)),2,sum) gsize = mean(abs(glam)) if(itertrace) print(c(lam, logl, loglr, gsize, k)) } } # check if 1 + lam^Tz_i > 1/n for all i #arg = 1 + z%*%lam #sum(arg < 0) # for higher dimensional z if(p>1){ lam = rep(0, p) gsize = 1 tol = 1e-08 D1 = rep(0, p) D2 = matrix(0, p, p) eval = 1 k = 0 # number of iteration if(itertrace) cat("lam, logl, loglr, eval=|hessian|, gsize=|g(lam)|, iter", "\n") while(eval > tol & k<=maxit){ arg <- as.vector(1 + z%*%lam) #n*1 vector D1 = apply(z / arg, 2, mean) tmp = t(apply(z, 1, function(x) x%*%t(x))) # dim=n * (p^2) tmp = tmp/(arg^2) DD <- - matrix(apply(tmp, 2, mean), ncol=p) # Delta <- solve(D2, D1, tol=1e-15) eig <- eigen(DD)$values if(max(abs(eig))/min(abs(eig))>=1e+07){ lam <- rep(0,p) k <- maxit + 1 } if(max(abs(eig))/min(abs(eig))<1e+07){ #D2<-solve(DD, D1) D2 = solve(DD)%*% D1 eval <- max(abs(D2)) delta <- D2 dd <- min(1+(z%*%(lam-delta))) #check if all (1+z lambda) > 1/n for all i while(dd<1/n){ delta <- delta/2 dd <- min(1+(z%*%(lam-delta))) } lam <-lam-delta } k <- k+1 phat <- 1/as.vector((1+z%*%lam)*n) logl = sum(log(phat)) loglr = -2 * sum(log(n*phat)) glam = apply(z/as.vector((1+z%*%lam)),2,sum) gsize = mean(abs(glam)) if(itertrace) print(c(lam, logl, loglr, eval, gsize, k)) } } if(k>=maxit) loglr<-1000 # diverge, or for such beta, g(lam) has no solution return(loglr) } ELLDrq.Real = function(beta, x, y, ID, uID, tau=0.5, hn=0.1) { ########## calculate the logratio for given beta ########## for unbalanced design in the real data analysis (slower than the function in simulation study for balanced designs) # beta=c(1,1); tau=0.5; hn=0.1 maxit=50; itertrace=0 x = as.matrix(x) N = length(y) n = length(uID) p = ncol(x) if(is.na(beta)) beta = as.vector(rep(0,p)) if(length(beta) != p) warning("Need beta to be the same dimension with the number of covariates") ### z is n by p matrix! E = psi.h(y-x%*%beta, hn, tau) z = NULL for(i in 1:n) { ind = which(ID==uID[i]) z = rbind(z, t(t(x[ind,])%*% E[ind])) } # solve for lambda and pi loglr = Lag.Wu(z, maxit, itertrace) return(loglr) } #==========================================================# # EL hypothesis testing, H0: beta(j) = theta (logitudinal) # # test the j-th component # #==========================================================# test.ELLDrq.Real = function(x, y, ID, uID, comp, theta, tau=0.5, hn=0.1) { x = as.matrix(x) p = ncol(x) if(p<=1) warning("For univariate case, this can be done by simply calculating loglr with function ELLDrq") j = comp # LOG LIKELIHOOD UNDER THE FULL MODEL beta0 = rq(y~x-1, tau=tau)$coef EL.F = optim(par=beta0, fn=ELLDrq.Real, x=x, y=y, ID=ID, uID=uID, tau=tau, hn=hn, method="Nelder-Mead") beta.full = EL.F$par logel.full = EL.F$value # LOG LIKELIHOOD UNDER THE RESTRICTED MODEL xstar = x[,-j] ystar = y - x[,j]*theta beta.R = rep(0, p) beta.R[-j] = optim(par=beta.full[-j], fn=ELLDrq.Real, x=xstar, y=ystar, ID=ID, uID=uID, tau=tau, hn=hn, method="Nelder-Mead")$par beta.R[j] = theta logel.R = ELLDrq.Real(beta.R, x, y, ID=ID, uID=uID, tau=tau, hn=hn) loglr = -logel.full + logel.R pval = 1-pchisq(loglr,1) out = c(beta0[j], loglr, pval) names(out) = c("coef EL", "logelr", "pval") return(out) } test.boot.Real = function(x, y, ID, uID, tau, hn, nboot=500, alpha=0.05) { tmp.boot= NULL n = length(uID) set.seed(12345678) x = as.matrix(x) p = ncol(x) subject = model.matrix(~-1+factor(ID)) m = max(apply(subject, 2, sum)) # (x,y) bootstrap b.hat = rq(y~x-1, tau=tau)$coef beta.b = NULL for(j in 1:nboot) { sub.b = sample(1:n, replace=T) ind.b = as.vector(sapply(sub.b, function(x) which(ID ==x))) if(typeof(ind.b)=="list") ind.b = do.call("c",ind.b) y.b = y[ind.b] x.b = x[ind.b,] beta.b = rbind(beta.b, rq(y.b~x.b-1,tau=tau)$coef) } se = apply(beta.b,2,sd) pval = 2*(1-pnorm(abs(b.hat/se))) Q025 = apply(beta.b, 2, quantile, 0.025) Q975 = apply(beta.b, 2, quantile, 0.975) Q05 = apply(beta.b, 2, quantile, 0.05) Q95 = apply(beta.b, 2, quantile, 0.95) lb = b.hat - qnorm(1-alpha/2)*se ub = b.hat + qnorm(1-alpha/2)*se tmp.boot = cbind(b.hat, lb, ub, se, pval, Q025, Q975, Q05, Q95) return(tmp.boot) } ########################################################################### ### EL confidence interval (longitudinal) ### obtain the CI for each compoenent of the pars separately ########################################################################### CI.ELLDrq.Real = function(x, y, ID, uID, tau=0.5, hn=0.1, alpha=0.05) { # ONLY FOR P>1 # tau=0.5; hn=0.1; maxit=50; itertrace=0 x = as.matrix(x) N = length(y) p = ncol(x) Cut = qchisq(1-alpha, 1) # LOG LIKELIHOOD UNDER THE FULL MODEL rq1 = summary(rq(y~x-1, tau=tau), se="iid")$coef EL.F = optim(par=rq1[,1], fn=ELLDrq.Real, x=x, y=y, ID=ID, uID=uID, tau=tau, hn=hn, method="Nelder-Mead") beta.full = EL.F$par logel.full = EL.F$value coeff = matrix(0, nrow=p, ncol=3) for(j in 1:p) { b0 = beta.full[j] se = rq1[j, 2] # search for the right end of the interval t1 = b0 t2 = b0 + 5*se dif1 = t2 - t1 tol = 1e-08 while(dif1>tol) { tt = (t1+t2)/2 ystar = y - x[,j] * tt xstar = x[,-j] #### in order to reduce the computation, obtain the estimate of beta[-j] using rq beta.R = rep(0, p) beta.R[j] = tt beta.R[-j] = rq(ystar~xstar-1, tau=tau)$coef logel.R = ELLDrq.Real(beta.R, x, y, ID=ID, uID=uID, tau=tau, hn=hn) loglr = -logel.full + logel.R if(loglr > Cut) t2 = tt if(loglr < Cut) t1 = tt dif1 = t2-t1 # print(c(t1, t2)) } coeff[j, 1] = b0 coeff[j, 3] = (t1+t2)/2 # search for the left end of the interval t1 = b0 t2 = b0 - 5*se dif1 = t1 - t2 tol = 1e-08 while(dif1>tol) { tt = (t1+t2)/2 ystar = y - x[,j] * tt xstar = x[,-j] #### in order to reduce the computation, obtain the estimate of beta[-j] using rq beta.R = rep(0, p) beta.R[j] = tt beta.R[-j] = rq(ystar~xstar-1, tau=tau)$coef logel.R = ELLDrq.Real(beta.R, x, y, ID=ID, uID=uID, tau=tau, hn=hn) loglr = -logel.full + logel.R if(loglr > Cut) t2 = tt if(loglr < Cut) t1 = tt dif1 = t1-t2 # print(c(t1, t2)) } coeff[j, 2] = (t1+t2)/2 } colnames(coeff) = c("coef EL","lower bd","upper bd") return(coeff) } ############# bartlet correction factor calculation ############ bartlett = function(x, y, ID, uID, tau=0.5, hn) { # unbalanced design # bhat is any consistent estimator of beta such as rq or EL estimate # x includes the intercept beta = rq(y~x-1, tau=tau)$coef x = as.matrix(x) N = length(y) n = length(uID) p = ncol(x) ### z is n by p(>1) matrix! E = psi.h(y-x%*%beta, hn, tau) z = NULL for(i in 1:n) { ind = which(ID==uID[i]) z = rbind(z, t(t(x[ind,])%*% E[ind])) } Vn = t(z)%*%z/n alpha.iikk = 1/n * sum((diag(z %*% solve(Vn) %*% t(z)))^2) alpha.ikm2 = 1/(n^2) * sum(( z %*% solve(Vn) %*% t(z))^3) bart.b = p^(-1) *( alpha.iikk/2 - alpha.ikm2/3) return(bart.b) } CI.ELLDrq.Bartlet.Real = function(x, y, ID, uID, tau=0.5, hn=0.1, alpha=0.05) { # ONLY FOR P>1 # tau=0.5; hn=0.1; maxit=50; itertrace=0 x = as.matrix(x) N = length(y) n = length(uID) p = ncol(x) Cut = qchisq(1-alpha, 1) b = bartlett(x, y, ID, uID, tau=0.5, hn) Cut = Cut*(1+b/n) # LOG LIKELIHOOD UNDER THE FULL MODEL rq1 = summary(rq(y~x-1, tau=tau), se="iid")$coef EL.F = optim(par=rq1[,1], fn=ELLDrq.Real, x=x, y=y, ID=ID, uID=uID, tau=tau, hn=hn, method="Nelder-Mead") beta.full = EL.F$par logel.full = EL.F$value coeff = matrix(0, nrow=p, ncol=3) for(j in 1:p) { b0 = beta.full[j] se = rq1[j, 2] # search for the right end of the interval t1 = b0 t2 = b0 + 5*se dif1 = t2 - t1 tol = 1e-08 while(dif1>tol) { tt = (t1+t2)/2 ystar = y - x[,j] * tt xstar = x[,-j] #### in order to reduce the computation, obtain the estimate of beta[-j] using rq beta.R = rep(0, p) beta.R[j] = tt beta.R[-j] = rq(ystar~xstar-1, tau=tau)$coef logel.R = ELLDrq.Real(beta.R, x, y, ID=ID, uID=uID, tau=tau, hn=hn) loglr = -logel.full + logel.R if(loglr > Cut) t2 = tt if(loglr < Cut) t1 = tt dif1 = t2-t1 # print(c(t1, t2)) } coeff[j, 1] = b0 coeff[j, 3] = (t1+t2)/2 # search for the left end of the interval t1 = b0 t2 = b0 - 5*se dif1 = t1 - t2 tol = 1e-08 while(dif1>tol) { tt = (t1+t2)/2 ystar = y - x[,j] * tt xstar = x[,-j] #### in order to reduce the computation, obtain the estimate of beta[-j] using rq beta.R = rep(0, p) beta.R[j] = tt beta.R[-j] = rq(ystar~xstar-1, tau=tau)$coef logel.R = ELLDrq.Real(beta.R, x, y, ID=ID, uID=uID, tau=tau, hn=hn) loglr = -logel.full + logel.R if(loglr > Cut) t2 = tt if(loglr < Cut) t1 = tt dif1 = t1-t2 # print(c(t1, t2)) } coeff[j, 2] = (t1+t2)/2 } colnames(coeff) = c("coef EL","lower bd","upper bd") return(coeff) }