# CREATED ON : Wednesday, January 3, 2007 at 18:04 # MODIFIED ON : Friday, May 15, 2009 at 12:01 # AUTHOR : HUIXIA JUDY WANG # AFFILIATION : DEPARTMENT OF STATISTICS, NORTH CAROLINA STATE UNIVERSITY # EMAIL : WANG@STAT.NCSU.EDU # R version : R 2.3.1 # FUNCTIONS : QUANTILE RANK SCORE TEST FOR Clustered data library(quantreg) set.seed(1237991) n = 50 m = 10 out = NULL for(i in 1:100) { ai = rnorm(n) eij = rnorm(n*m) x = runif(n*m) z = rep(c(rep(1, 25), rep(0, 25)), each=m) y = 1 + x + 0*z + rep(ai, each=m) + eij subject = rep(1:n, each=m) out = rbind(out, as.data.frame(QRS(x, z, y, subject, tau=0.5))) } # type I errors apply(out[,3:5], 2, function(x) mean(x<0.05)) set.seed(1237991) n = 50 m = 10 out = NULL for(i in 1:100) { ai = rnorm(n) eij = rnorm(n*m) x = runif(n*m) z = rep(c(rep(1, 25), rep(0, 25)), each=m) y = 1 + x + 1*z + rep(ai, each=m) + eij subject = rep(1:n, each=m) out = rbind(out, as.data.frame(QRS(x, z, y, subject, tau=0.5))) } # type I errors apply(out[,3:5], 2, function(x) mean(x<0.05)) ### ###### Function QRS: Quantile rank score test for clustered data ###### QRS0 controls the type I error better, but QRS1 is more powerful ###### Here I assumed that $P(u_{ij}<0, u_{ij'}<0)$ is the same for any pairs within a cluster ###### Another way is to relax this assumption, and just estimate the covariance matrix of the score test statistic ###### sn=1/\sqrt{n} \sum_{ij} z_{ij}* \psi(uhat_{ij}), \psi(u)=\tau-I(u<0), Z*=(z_{ij}*) is orthogonal to (1, X). ###### by 1/n sum_{i=1}^n Zi*^T \psi(uhat_i) Zi*, where Zi* is m by p matrix (orthogonized Z) corresponding to the ith cluster ###### uhat_i is the estimated residual vector corresponding to the ith cluster. ### QRS = function(x, z, y, subject, tau=0.5) { # Quantile Rank Score Test based on gene-specific delta # Arguments: # x: the covariate for the nuisance parameters, and x does not contain the intercept # z: the covariate for the parameter(s) of interest # y: the response variable # subject: indicates the ID of subjects/clusters # tau: the quantile level # Value # delta = P(u_{ij}<0, u_{ij'}<0) # QRSi : p-value of the quantile rank score test which ignores the dependence entirely, such that delta = tau^2 # QRS0 : p-value of the quantile rank score test, where delta is estimated by using the residuals obtained under H0 # QRS1 : p-value of the quantile rank score test, where delta is estimated by using the residuals obtained under H1 # sn : the quantile rank score # Qni, Qn0, Qn1: the variance-covariance of sn for methods QRSi, QRS0 and QRS1, respectively # Tni, Tn0, Tn1: the test statistic values for methods QRSi, QRS0 and QRS1, respectively # beta: the quantile estimate for z if(is.factor(x)==TRUE) x=model.matrix(~-1+x)[,-1] if(is.factor(z)==TRUE) z=model.matrix(~-1+z)[,-1] if(is.factor(subject)==TRUE) subject = model.matrix(~-1+subject) if(is.vector(subject)==TRUE) subject = model.matrix(~-1+factor(subject)) q = qr(z)$rank x = cbind(1,x) p = qr(x)$rank n = length(y) if(p==1) dsol0 = dsolu.br(x, y, tau = tau, interp = FALSE) if(p>1) dsol0 = dsolu.br(x, y, tau = tau, interp = TRUE) an0 = dsol0$dsol bn0 = an0- (1-tau) if(p==1) zstar = as.matrix(qr.resid(qr(x[,-1]), z)) if(p>1) zstar = as.matrix(qr.resid(qr(x), z)) sn = 1/sqrt(n)*crossprod(zstar,bn0) P1 = crossprod(zstar) P2 = t(zstar)%*%subject%*%t(subject)%*%zstar - P1 Qni = 1/n*P1*tau*(1-tau) Tni = t(sn)%*%solve(Qni)%*%sn QRSi = 1-pchisq(Tni,q) if(p==1) dsol1 = dsolu.br(cbind(x,z), y, tau = tau, interp = FALSE) if(p>1) dsol1 = dsolu.br(cbind(x,z), y, tau = tau, interp = TRUE) an1 = dsol1$dsol beta = dsol1$coeff[-(1:p)] bn1 = an1 - (1-tau) delta0 = delta.est(an0, subject,p,tau) delta1 = delta.est(an1, subject,p,tau) Qn0 = Qni + 1/n*P2 * as.numeric(-tau^2+delta0) Tn0 = t(sn)%*%solve(Qn0)%*%sn QRS0 = 1-pchisq(Tn0, q) Qn1 = Qni + 1/n*P2 * as.numeric(-tau^2+delta1) Tn1 = t(sn)%*%solve(Qn1)%*%sn QRS1 = 1-pchisq(Tn1, q) out = list( delta0 = delta0, delta1 = delta1, QRSi = QRSi, QRS0 = QRS0, QRS1 = QRS1, beta=beta, Tni = Tni, Tn0 = Tn0, Tn1=Tn1, sn = sn, Qni = Qni, Qn0 = Qn0, Qn1=Qn1, P1=P1, P2=P2,n=n) return(out) } delta.est = function(an, subject, p, tau) { # subject is a matrix of n by nsubj K = apply(subject, 2, sum) # the number of repeats within each subject delta = (t(1-an)%*%subject%*%t(subject)%*%(1-an) - crossprod(1-an))/(sum(apply(as.matrix(K),1,function(x)x*(x-1)))-p) delta = min(max(tau^2, delta), tau) return(delta) } dsolu.br = function (x, y, tau = 0.5, interp = TRUE) { # obtain the dual solution at a single quantile level # the first column of x contains all 1's # if interp=FALSE, then do not include the intercept # this function was modified based on the function "rq.fit.br" in quantreg package tol = .Machine$double.eps^(2/3) eps = tol big = .Machine$double.xmax x = as.matrix(x) if(interp==FALSE) x = x[,-1] x = as.matrix(x) p = ncol(x) n = nrow(x) nsol = 2 ndsol = 2 if (tau < 0 || tau > 1) warning("Stop. tau can only be (0,1)") if(tau>0 & tau<1) { lci1 = FALSE qn = rep(0, p) cutoff = 0 } Z = .Fortran("rqbr", as.integer(n), as.integer(p), as.integer(n + 5), as.integer(p + 3), as.integer(p + 4), as.double(x), as.double(y), as.double(tau), as.double(tol), flag = as.integer(1), coef = double(p), resid = double(n), integer(n), double((n + 5) * (p + 4)), double(n), as.integer(nsol), as.integer(ndsol), sol = double((p + 3) * nsol), dsol = double(n * ndsol), lsol = as.integer(0), h = integer(p * nsol), qn = as.double(qn), cutoff = as.double(cutoff), ci = double(4 * p), tnmat = double(4 * p), as.double(big), as.logical(lci1), PACKAGE = "quantreg") dsol = Z$dsol[1:n] return(list(dsol=dsol, coeff=Z$coef)) }