library(MASS) ## y=Y.qt; geno=Geno; x.adj=NA; trait.type="gaussian"; approx.method=3; missing.rm=T vc.score.fun<-function(y=Y.vec, geno=Geno, x.adj=NA, trait.type="binomial", approx.method=3, missing.rm=T, n.resamp=1e5){ ##------------------------------------------------------------------------------------------------------ ## ARGUMENTS: ##------------------------------------------------------------------------------------------------------ ## y = vector of trait values ## geno = matrix of alleles in the same format of "haplo.score". That is, each SNP has 2 adjacent ## columns of alleles, and the order of columns corresponds to the order of SNPs on a chromosome ## x.adj = matrix of non-genetic covariates, EXCLUDING the intercept term ## trait.type = character string defining the type of trait, with values of "gaussian" or "binomial" ## approx.method = a number specifying the type of method used to approximate p-value, with values of ## 1=resampling method, 2=Gamma approximation (i.e., the 2-moment approximation), and ## 3=the 3-moment approximation ## missing.rm = T or F. Option "T" performs complete data analysis, i.e., removing individuals if missing ## in genotypes, trait or covariates. Option "F" imputes the similarity score based on allele ## frequencies, but still remove individuals with missing traits or missing covariates ## n.resamp = this specifies the resampling sample size for approx.method=1. ##------------------------------------------------------------------------------------------------------ ## OTHER NOTEs: ##------------------------------------------------------------------------------------------------------ ## This program assumes that missing values are coded as "NA" ##------------------------------------------------------------------------------------------------------ trait.int <- charmatch(trait.type, c("gaussian", "binomial")) if (is.na(trait.int)) stop("Invalid trait type") if (trait.int == 0) stop("Ambiguous trait type") if (length(y) != nrow(geno)) stop("Dims of y and geno are not compatible") n.loci <- ncol(geno)/2 if (n.loci != (floor(ncol(geno)/2))) stop("Odd number of cols of geno") adjusted <- TRUE ; if (all(is.na(x.adj))) adjusted <- FALSE if (adjusted) { x.adj <- as.matrix(x.adj) if (nrow(x.adj) != length(y)) stop("Dims of y and x.adj are not compatible") } if(missing.rm==T) { ##---remove observation with missing in genotypes, traits, or covariates miss.geno <- rowSums(is.na(geno))>0 miss.y <- is.na(y) miss <- (miss.geno+miss.y)>0 }else{ ##---imputing missing geno new.geno=geno new.geno[is.na(geno)]=(-9) miss <- is.na(y) new.geno<- new.geno[!miss, ] } if (adjusted) miss <- miss | apply(is.na(x.adj), 1, any) y <- as.numeric(y[!miss]) geno <- geno[!miss, ] if (adjusted) x.adj <- x.adj[!miss, , drop = FALSE] n.subj <- length(y) n.ij <- n.subj *(n.subj-1)/2 ##----------------------------- if (!adjusted) { Ey = mean(y) Vy = var(y) newx.adj = as.matrix(rep(1, n.subj)) } if (adjusted) { reg.out = glm(y ~ x.adj, family = trait.type) Ey = reg.out$fitted.values Vy = switch(trait.int, sum(reg.out$resid^2)/reg.out$df.resid, Ey*(1-Ey)) newx.adj = as.matrix( cbind(rep(1, n.subj), x.adj) ) } ## -------- key = (adjusted +trait.int*2)-1 pre.IJ.list = cbind(rep((1:n.subj), each=n.subj), rep((1:n.subj), n.subj)) IJ.list = pre.IJ.list[pre.IJ.list[,1] < pre.IJ.list[,2],] ##------------ ## get SSS ##------------ if(missing.rm==T){ tmp.ct = getSij.ct.fun(IJ.list=IJ.list, Nloci=n.loci, Nsubj=n.subj, geno=geno) SSij = tmp.ct$Sij.ct SSii = tmp.ct$Sii.ct }else{ odd.index = seq(1,2*n.loci,2) even.index = seq(2,2*n.loci,2) tmp.ct <-getSij.ct.imputegenoNA.fun(IJ.list=IJ.list, Nloci=n.loci, Nsubj=n.subj, new.geno=new.geno, geno=geno, Odd.index=odd.index, Even.index=even.index) SSij = tmp.ct$Sij.ct SSii = tmp.ct$Sii.ct } SSS = diag(SSii); SSS[lower.tri(SSS)] = SSij; SSS = t(SSS); SSS[lower.tri(SSS)] = SSij SSd = diag(SSii) SS0 = SSS-SSd dg1 = switch(trait.int, 1, 1/Vy); ## For BT, Delta = diag(dg1) WW = switch(key, diag(rep(1/Vy, n.subj)), diag(rep(1/Vy, n.subj)), diag(rep(Vy, n.subj)), diag(Vy)) ## the above line is because of that for logistic, WW=diag(Vy); in general WW = diag(1/(Vy*dg1^2))) P.mat = WW - WW %*% newx.adj %*% ginv(t(newx.adj) %*% WW %*% newx.adj ) %*% t(newx.adj) %*% WW ##============================== ## calculate VC ##============================== newy = switch(trait.int, (y-Ey)*diag(WW), (y-Ey)) Tvc = 1/2 * newy %*% SSS %*% newy ##-------------- ## get p-values ##-------------- if(approx.method==3) { ## ---- 3-moment approximation CCC.vc = diag(1/WW) * (P.mat %*% SSS %*% P.mat)/2 eg = eigen(CCC.vc, symmetric=T);evalue = eg$values; ev = evalue[round(evalue,10)>0] c1 = sum(evalue); c2=sum(evalue^2); c3=sum(evalue^3); dof = hprime = c2^3/c3^2 pval = 1-pchisq( (Tvc-c1)*sqrt(hprime/c2)+hprime, dof) }else if(approx.method==2) { ## ---- Gamma approximation PS = P.mat %*% SSS Evc = 1/2 * sum(diag( PS )) Itt = 1/2 * sum(diag( PS %*% PS )) Vvc = Itt if(trait.int==1){ Itp = 1/2*sum(diag( (P.mat*Vy) %*% SSS ))/Vy^2 Ipp = 1/2*sum(diag( P.mat*Vy ))/Vy^2 Vvc = Itt - Itp^2/Ipp } out = get.gamma.distn.fun(Evc, Vvc) pval = 1 - pgamma(Tvc, out["aa"], 1/out["bb"]) } else{ ## ---- resampling Method CCC.vc = diag(1/WW) * (P.mat %*% SSS %*% P.mat)/2 eg = eigen(CCC.vc, symmetric=T);evalue = eg$values; ev = evalue[round(evalue,10)>0] rchi = rchisq(length(ev)*n.resamp,1); tmp = matrix(rchi, nrow=length(ev)); emp = colSums(tmp*ev) pval = sum(emp>c(Tvc))/1e5; } return(c("Tvc"=Tvc,"pval"=pval)) } ## VC test: the version allows imputing missing genotypes if choose missing.rm=F adj.missing<-function(k=1, preSS=preSii, Geno.ii=geno.i, Geno.jj=geno.j, P0=p0, P1=p1) { preS=preSS[,k]; pp=P0[k]; qq=P1[k]; geno.ii=Geno.ii[,k]; geno.jj=Geno.jj[,k] keyi.Ana = (geno.ii==(-9)) keyi.ana = (geno.ii==(-8)) keyi.nana = (geno.ii==(-18)) keyj.Ana = (geno.jj==(-9)) keyj.ana = (geno.jj==(-8)) keyj.nana = (geno.jj==(-18)) if(sum(keyi.Ana)>0){ preS[ (keyi.Ana + (geno.jj==( 0)) )==2 ] = 2+2*pp preS[ (keyi.Ana + (geno.jj==( 1)) )==2 ] = 2 preS[ (keyi.Ana + (geno.jj==( 2)) )==2 ] = 2*qq preS[ (keyi.Ana + keyj.Ana )==2 ] = 1+2*pp+pp^2+qq^2 preS[ (keyi.Ana + keyj.ana )==2 ] = 1+pp^2+qq^2 preS[ (keyi.Ana + keyj.nana )==2 ] = 2*(pp+pp^2+qq^2) } if(sum(keyi.ana)>0){ preS[ (keyi.ana + (geno.jj==( 0)) )==2 ] = 2*pp preS[ (keyi.ana + (geno.jj==( 1)) )==2 ] = 2 preS[ (keyi.ana + (geno.jj==( 2)) )==2 ] = 2*(1+qq) preS[ (keyi.ana + keyj.Ana )==2 ] = 1+pp^2+qq^2 preS[ (keyi.ana + keyj.ana )==2 ] = 1+2*qq+pp^2+qq^2 preS[ (keyi.ana + keyj.nana )==2 ] = 2*(qq+pp^2+qq^2) } if(sum(keyi.nana)>0){ preS[ (keyi.nana + (geno.jj==( 0)) )==2 ] = 4*pp preS[ (keyi.nana + (geno.jj==( 1)) )==2 ] = 2 preS[ (keyi.nana + (geno.jj==( 2)) )==2 ] = 4*qq preS[ (keyi.nana + keyj.Ana )==2 ] = 2*(pp+pp^2+qq^2) preS[ (keyi.nana + keyj.ana )==2 ] = 2*(qq+pp^2+qq^2) preS[ (keyi.nana + keyj.nana )==2 ] = 4*(pp^2+qq^2) } if(sum(keyj.Ana)>0){ preS[ (keyj.Ana + (geno.ii==( 0)) )==2 ] = 2+2*pp preS[ (keyj.Ana + (geno.ii==( 1)) )==2 ] = 2 preS[ (keyj.Ana + (geno.ii==( 2)) )==2 ] = 2*qq } if(sum(keyj.ana)>0){ preS[ (keyj.ana + (geno.ii==( 0)) )==2 ] = 2*pp preS[ (keyj.ana + (geno.ii==( 1)) )==2 ] = 2 preS[ (keyj.ana + (geno.ii==( 2)) )==2 ] = 2+2*qq } if(sum(keyj.Ana)>0){ preS[ (keyj.nana + (geno.ii==( 0)) )==2 ] = 4*pp preS[ (keyj.nana + (geno.ii==( 1)) )==2 ] = 2 preS[ (keyj.nana + (geno.ii==( 2)) )==2 ] = 4*qq } return(preS) } getSij.ct.imputegenoNA.fun<-function(IJ.list=ij.list, Nloci=nloci, Nsubj=nsubj, new.geno=New.Geno, geno=geno, Odd.index, Even.index){ ii.vec = IJ.list[,1]; jj.vec=IJ.list[,2] workgeno = 1-as.matrix(geno) p0 = apply(rbind(workgeno[,Odd.index], workgeno[,Even.index]), 2, mean, na.rm=T) p1 = 1-p0 geno.add = new.geno[,Odd.index]+ new.geno[,Even.index] geno.i = geno.add[ii.vec,] geno.j = geno.add[jj.vec,] preSij =((geno.i-geno.j)==0)*4 preSij[( (geno.i==1)+(geno.j==1) )>0] = 2 if.na = colSums(geno.add<0) if(sum(if.na)>0){ lst = (1:Nloci)[if.na>0] for(kk in lst){##print(kk); preSij[,kk] = adj.missing(k=kk, preSS=preSij, Geno.ii=geno.i, Geno.jj=geno.j, P0=p0,P1=p1)} } SSij = rowSums(preSij)/Nloci ## get Sii geno.i = geno.j = geno.add preSii =((geno.i-geno.j)==0)*4 preSii[( (geno.i==1)+(geno.j==1) )>0] = 2 if(sum(if.na)>0){ lst = (1:Nloci)[if.na>0] for(kk in lst){## print(kk); preSii[,kk] = adj.missing(k=kk, preSS=preSii, Geno.ii=geno.i, Geno.jj=geno.j, P0=p0,P1=p1)} } SSii = rowSums(preSii)/Nloci return(list("Sij.ct"=SSij, "Sii.ct"=SSii)) } getSij.ct.fun<-function(IJ.list=ij.list, Nloci=nloci, Nsubj=nsubj, geno=Geno){ ii.vec = IJ.list[,1]; jj.vec=IJ.list[,2] geno.add = matrix(0, ncol=Nloci, nrow=Nsubj) for(i in 1:Nloci){geno.add[,i] = rowSums(geno[,((i-1)*2+1):(2*i)])} geno.i = geno.add[ii.vec,] geno.j = geno.add[jj.vec,] preSij =((geno.i-geno.j)==0)*4 preSij[( (geno.i==1)+(geno.j==1) )>0] = 2 SSij = rowSums(preSij)/Nloci ## get Sii geno.i = geno.j = geno.add preSii =((geno.i-geno.j)==0)*4 preSii[( (geno.i==1)+(geno.j==1) )>0] = 2 SSii = rowSums(preSii)/Nloci return(list("Sij.ct"=SSij, "Sii.ct"=SSii)) } get.gamma.distn.fun<-function(E, V){ aa = E^2/V bb = V/E return(c("aa"=aa,"bb"=bb))}