# The following are R codes published in the Biometrics online supplementary materials # CREATED ON : Wednesday, January 3, 2007 at 18:04 # MODIFIED ON : Thursday, June 14, 2007 at 16:24 # AUTHOR : HUIXIA JUDY WANG # AFFILIATION : DEPARTMENT OF STATISTICS, NORTH CAROLINA STATE UNIVERSITY # EMAIL : WANG@STAT.NCSU.EDU # R version : R 2.3.1 # FUNCTIONS : QUANTILE RANK SCORE TEST FOR REPEATED MEASUREMENTS (QRS) # QRSc: QUANTILE RANK SCORE TEST ASSUMING EQUAL DELTA ACROSS GENES # ENAHNCED QUANTILE RANK SCORE TEST (EQRS) FOR MICROARRAY DATA # The two packages "quantreg" and "qvalue" needed can be installed from R cran library(quantreg) library(qvalue) #################################################### # A subset of the Obese Mice study # #################################################### # ObserMice.Rdata can be obtained from http://www4.stat.ncsu.edu/~wang/research/EQRS/ # As a demonstration, this example uses only the first 500 genes, while in the paper, total 22,690 genes were analyzed. # The intensity measurements in this example data are at log base 2 scale. load("ObeseMice.Rdata") Data1 = Obese.Mice[,3:7]; Data2 = Obese.Mice[,8:12] geneid = unique(Obese.Mice$AffyID) narray = ncol(Data1) + ncol(Data2) G=500 Mice.QRS = NULL; V=NULL for(i in 1:G) { index = which(Obese.Mice$AffyID == geneid[i]) tmp.Data1 = as.vector(as.matrix(Data1[index,])) tmp.Data2 = as.vector(as.matrix(Data2[index,])) y = c(as.vector(tmp.Data1), tmp.Data2) n = length(y) nprobe = n/narray tmp.probe = rep(1:nprobe, narray) tmp.group = c(rep(1, length(tmp.Data1)), rep(-1, length(tmp.Data2))) x = cbind(model.matrix(~-1+factor(tmp.probe))[,-1]) z = model.matrix(~-1+factor(tmp.group))[,-1] subject = rep(1:narray, each=nprobe) temp = as.data.frame(QRS(x, z, y, subject, tau=0.5)) Mice.QRS = rbind(Mice.QRS, temp) # to save some computation time, only 100 variances of deltahat are calculated. if(i<=100) V = c(V, var.deltahat(x, z, y, subject, nprobe, tau=0.5)) } Mice.QRS = cbind(geneid[1:G], Mice.QRS) Mice.QRS$QRSc = QRSc(Mice.QRS, tau=0.5, q=1) Mice.EQRS = EQRS(Mice.QRS, tau=0.5, sigma2=mean(V), PoD="QRSc", qval.cut=0.05, niter=1) Mice.EQRS = EQRS(Mice.QRS, tau=0.5, sigma2=mean(V), PoD="QRS1", qval.cut=0.05, niter=1) Mice.EQRS = EQRS(Mice.QRS, tau=0.5, sigma2=mean(V), PoD="FC", qval.cut=0.05, FC.cut=1.5, niter=1) # The p-values obtained from EQRS can be used as input to FDR controlling precudures ################################################### FUNCTIONS ############################################## EQRS = function(QRS.obj, tau, sigma2, PoD=c("QRSc", "QRS1","FC"), qval.cut=0.05, FC.cut=1.5, niter=1) { # Enhanced Quantile Rank Score test #Arguments: # QRS.obj: a data frame with G rows, and each row represents the output of function "QRS" for one gene. # tau: the quantile level of interest, some value between 0 and 1. # sigma2: the estimated sigma^2=Var(\hat{\delta}|\delta_g) through the bootstrap method # PoD: Either "QRSc", "QRS1" or "FC", the option to form the initial PoD class. # qval.cut: The q-value cutoff for determining the PoD class. This is required when PoD="QRS1" is specified. # FC.cut: The fold change cutoff for determining the PoD class. This is required when PoD="FC" is specified. # niter: the maximum number of iterations allowed. When niter=1, no iteration is performed. #Value: # delta.tilde: calibrated delta estimation # EQRS: the p-value of the enhanced quantile rank score test n = as.numeric(QRS.obj$n) B = as.numeric(QRS.obj$P2)/n A = as.numeric(QRS.obj$P1)/n*tau*(1-tau) - B*tau^2 delta0 = QRS.obj$delta0 sn = abs(QRS.obj$sn) G = length(sn) if(PoD=="QRSc") { qval = qvalue(QRS.obj$QRSc)$qvalue ind.PoD = which(qval<=qval.cut) } if(PoD=="QRS1") { QRS1 = QRS.obj$QRS1 qval = qvalue(QRS1)$qvalue ind.PoD = which(qval<=qval.cut) } if(PoD=="FC") { beta = QRS.obj$beta ind.PoD = which(abs(beta) >= log2(FC.cut)) } PoDs = NULL PoDs[[1]] = ind.PoD err =100 commonDEGs = length(ind.PoD) i=1 while(err>0 & i<=niter) { ind.NoD = (1:G)[-ind.PoD] deltahat1 = delta0[ind.PoD] deltahat0 = delta0[ind.NoD] sn0 = sn[ind.NoD] sn1 = sn[ind.PoD] G1 = length(ind.PoD) s2 = var(deltahat0) theta2 = max(s2 - sigma2, 0) eta = theta2 / s2 mu = mean(deltahat0) x1 = sn1^2 lm1 = lm(pmax(deltahat1 - mu,0) ~ x1) tmp1 = (1-eta)*mu + eta*(deltahat1-lm1$fitted) delta.tilde = as.vector(delta0) delta.tilde[ind.PoD] = tmp1 delta.tilde[ind.NoD] = (1-eta)*mu + eta*deltahat0 Sn = QRS.obj$sn Qn = A + B*delta.tilde Tn = Sn^2/Qn EQRS = 1-pchisq(Tn, 1) EQRS.q = qvalue(EQRS)$qvalue ind.PoD = which(EQRS.q1) dsol0 = dsolu.br(x, y, tau = tau, interp = TRUE) an0 = dsol0$dsol bn0 = an0- (1-tau) if(p==1) zstar = as.matrix(qr.resid(qr(x[,-1]), z)) if(p>1) zstar = as.matrix(qr.resid(qr(x), z)) sn = 1/sqrt(n)*crossprod(zstar,bn0) P1 = crossprod(zstar) P2 = t(zstar)%*%subject%*%t(subject)%*%zstar - P1 Qni = 1/n*P1*tau*(1-tau) Tni = t(sn)%*%solve(Qni)%*%sn QRSi = 1-pchisq(Tni,q) if(p==1) dsol1 = dsolu.br(cbind(x,z), y, tau = tau, interp = FALSE) if(p>1) dsol1 = dsolu.br(cbind(x,z), y, tau = tau, interp = TRUE) an1 = dsol1$dsol beta = dsol1$coeff[-(1:p)] bn1 = an1 - (1-tau) delta0 = delta.est(an0, subject,p,tau) delta1 = delta.est(an1, subject,p,tau) Qn0 = Qni + 1/n*P2 * as.numeric(-tau^2+delta0) Tn0 = t(sn)%*%solve(Qn0)%*%sn QRS0 = 1-pchisq(Tn0, q) Qn1 = Qni + 1/n*P2 * as.numeric(-tau^2+delta1) Tn1 = t(sn)%*%solve(Qn1)%*%sn QRS1 = 1-pchisq(Tn1, q) out = list( delta0 = delta0, delta1 = delta1, QRSi = QRSi, QRS0 = QRS0, QRS1 = QRS1, beta=beta, Tni = Tni, Tn0 = Tn0, Tn1=Tn1, sn = sn, Qni = Qni, Qn0 = Qn0, Qn1=Qn1, P1=P1, P2=P2,n=n) return(out) } var.deltahat = function(x, z, y, subject, nprobe, tau=0.5) { # Estimate the variance of delta estimation by using the boostrap method # Arguments: # x: the covariate for the nuisance parameters, and x does not contain the intercept # z: the covariate for the parameter(s) of interest # y: the response variable # subject: indicates the ID or number of arrays/subjects # tau: the quantile level if(is.factor(x)==TRUE) x=model.matrix(~-1+x)[,-1] if(is.factor(z)==TRUE) z=model.matrix(~-1+z)[,-1] if(is.factor(subject)==TRUE) subject <- model.matrix(~-1+subject) if(is.vector(subject)==TRUE) subject <- model.matrix(~-1+factor(subject)) p = qr(cbind(1,x))$rank x = cbind(1,x,z) rq1 = rq(y ~ x -1, tau=tau) uhat = rq1$resid uhat.m = matrix(uhat, nrow=nprobe) yhat = x%*%rq1$coeff delta.b = NULL m = n/nprobe for(i in 1:500) { Array = sample(1:m, replace=TRUE) ub = as.vector(uhat.m[,Array]) yb = yhat + ub an.b = dsolu.br(x, yb, tau = tau, interp = TRUE)$dsol delta.b <- c(delta.b, delta.est(an.b, subject, p, tau)) } return(var(delta.b)) } QRSc = function(QRS.obj, tau=0.5, q) { # the common delta approach # Arguments # QRS.obj: a data frame with G rows, and each row represents the output of function "QRS" for one gene. # tau: the quantile level # q: the dimension of the parameters of interest # Value # QRSc: the p-value of the quantile rank score test assuming common delta deltac = mean(QRS.obj$delta0) Qnc = QRS.obj$Qni + QRS.obj$P2 * (-tau^2+ deltac)/QRS.obj$n if(q>1) Tc = t(QRS.obj$sn)%*%solve(Qnc)%*%QRS.obj$sn else if (q==1) Tc=QRS.obj$sn^2/Qnc QRSc = 1-pchisq(Tc, q) return(QRSc) } #################### Subroutines ##################### delta.est = function(an, subject, p, tau) { # subject is a matrix of n by nsubj K = apply(subject, 2, sum) # the number of repeats within each subject delta = (t(1-an)%*%subject%*%t(subject)%*%(1-an) - crossprod(1-an))/(sum(apply(as.matrix(K),1,function(x)x*(x-1)))-p) delta = min(max(tau^2, delta), tau) return(delta) } dsolu.br = function (x, y, tau = 0.5, interp = TRUE) { # obtain the dual solution at a single quantile level # the first column of x contains all 1's # if interp=FALSE, then do not include the intercept # this function was modified based on the function "rq.fit.br" in quantreg package tol = .Machine$double.eps^(2/3) eps = tol big = .Machine$double.xmax x = as.matrix(x) if(interp==FALSE) x = x[,-1] x = as.matrix(x) p = ncol(x) n = nrow(x) nsol = 2 ndsol = 2 if (tau < 0 || tau > 1) warning("Stop. tau can only be (0,1)") if(tau>0 & tau<1) { lci1 = FALSE qn = rep(0, p) cutoff = 0 } Z = .Fortran("rqbr", as.integer(n), as.integer(p), as.integer(n + 5), as.integer(p + 3), as.integer(p + 4), as.double(x), as.double(y), as.double(tau), as.double(tol), flag = as.integer(1), coef = double(p), resid = double(n), integer(n), double((n + 5) * (p + 4)), double(n), as.integer(nsol), as.integer(ndsol), sol = double((p + 3) * nsol), dsol = double(n * ndsol), lsol = as.integer(0), h = integer(p * nsol), qn = as.double(qn), cutoff = as.double(cutoff), ci = double(4 * p), tnmat = double(4 * p), as.double(big), as.logical(lci1), PACKAGE = "quantreg") dsol = Z$dsol[1:n] return(list(dsol=dsol, coeff=Z$coef)) }