# CREATED ON : Friday, April 2, 2010 00:16 # MODIFIED ON : June 23, 2010 # AUTHOR : HUIXIA JUDY WANG & JIANHUA HU # AFFILIATION : DEPARTMENT OF STATISTICS, NORTH CAROLINA STATE UNIVERSITY # EMAIL : WANG@STAT.NCSU.EDU # R version : R 2.8.1 # PAPER : Identification Of Differential Aberrations in Multiple-sample Array CGH Studies library(quantreg) library(qvalue) FAL.func <- function(expdat, n0, n1, G, tau=0.5, lambdas, nboot=100, alpha=0.05, adap.wt, plot.SIC=FALSE, test="QRS", segcut=0) { ### 12/08/2009 #### Adaptive Fused Penalty #### only one step at the probe level #### bootstrap residuals #### Use QRS to test the significance of each segment # segcut=1; nboot=100; alpha=0.05; adap.wt=NA; plot.SIC=TRUE # adap.wt=NA: perform fuased adaptive lasso, otherwise, adap.wt=rep(1,2*(G-1)) (will be specified this way) # plot.SIC=TRUE: then plot SIC versus lambdas to help decide on the suitable range for choosing lam # Adaptive Fused Lasso function based on the residual bootstrap n = n0+n1 N = n*G y = as.vector(t(expdat)) loc = 1:G # adaptive weights, Fedlity and R matrices tmp.mat = design.mat.AF.func(expdat, y, n0, n1, G, tau, adap.wt) Fid = tmp.mat$Fid R = tmp.mat$R wt = tmp.mat$wt #### Use SIC to search for the appropriate lambda within lambdas ahat = bhat = SIC = NULL for(lam in lambdas) { Coef = est.flasso(y, n0, n1, G, lam, tau=tau, wt=wt, Fid, R) uhat = y - Fid%*%Coef tmp.ahat = Coef[1:G] tmp.bhat = Coef[-(1:G)] delta = diff(tmp.ahat) Delta = diff(tmp.bhat) edf = sum(abs(delta)>0.0001) + sum(abs(Delta)>0.0001) ###edf = sum(abs(uhat)<=0.0000001);edf = sum(abs(delta)>0.000001) + sum(abs(Delta)>0.000001) tmp.SIC = log(sum(uhat*(tau-1*(uhat<0)))) + log(N)/(2*N)*edf SIC = c(SIC, tmp.SIC) ahat = rbind(ahat, tmp.ahat) bhat = rbind(bhat, tmp.bhat) } idx = order(SIC)[1] ahat = ahat[idx,] bhat=bhat[idx,] lam = lambdas[idx] cat("lam is", lam, "\n") Coef = c(ahat, bhat) uhat = y - Fid%*%Coef if(plot.SIC){ plot(SIC~lambdas); abline(v=lam, col=2)} b.est = bhat a.est = ahat ##------------ bootstrap to detect the segments (clone level) --------------## bhat.b = ahat.b = NULL expdat.star = matrix(uhat, byrow=T, ncol=G) for(b in 1:nboot) { idx1 = sample(1:n, n, replace=T) expdat.b = expdat.star[idx1,] y.b = as.vector(t(expdat.b)) Coef = est.flasso(y.b, n0, n1, G, lam, tau=tau, wt=wt, Fid, R) tmp.ahat = Coef[1:G] tmp.bhat = Coef[-(1:G)] ## plot(tmp.bhat) ahat.b = rbind(ahat.b, tmp.ahat) bhat.b = rbind(bhat.b, tmp.bhat) ###if(b%%50==0) print(b) } #### ####### 1.1 Change points associated with intercept #### delta = diff(a.est) delta.b = apply(ahat.b, 1, diff) pval.a = apply(cbind(delta, delta.b), 1, function(x) mean(abs(x[-1])>=abs(x[1]))) cutoff = matrix(seq(0.00001, 0.01, 0.00001),ncol=1) FDRa = pmin(1, apply(cutoff, 1, function(x) x*G/max(0.000001,sum(pval.a < x)))) #FDRa <- qvalue(pval.a)$qvalues if(min(FDRa)=alpha) CP.a = c(1,G) # 1.2 Change points associated with slope delta = diff(b.est) delta.b = apply(bhat.b, 1, diff) pval.b = apply(cbind(delta, delta.b), 1, function(x) mean(abs(x[-1])>=abs(x[1]))) cutoff = matrix(seq(0.00001, 0.05, 0.00001),ncol=1) FDRb = pmin(1, apply(cutoff, 1, function(x) x*G/max(0.000001,sum(pval.b < x)))) #FDRb = qvalue(pval.b)$qvalues if(min(FDRb)=alpha) CP.b = c(1,G) ## p-values from the clone-wise test p1.a = pval.a p1.b = pval.b #2.1 combine the very short segments with neighbor segments (baseline effect) -# if(length(CP.a)>1) { length.seg.a = c(CP.a[1]-1,diff(c(CP.a,G+1))) segidx.a = rep(1:(length(CP.a)+1), times=length.seg.a) while(min(length.seg.a)<= segcut) { idx = order(length.seg.a)[1] ind = which(segidx.a==idx) dd = abs(ahat[ind][1] - c(median(ahat[which(segidx.a==idx-1)]), median(ahat[which(segidx.a==idx+1)]))) # print(dd) # combine with the cloest neighboring segment if(order(dd)[1]==1) segidx.a[which(segidx.a>=idx)]=segidx.a[which(segidx.a>=idx)]-1 else segidx.a[which(segidx.a>idx)]=segidx.a[which(segidx.a>idx)]-1 length.seg.a = as.vector(table(segidx.a)) # print(length.seg.a) } CP.a = pmin(cumsum(length.seg.a)+1, G)} #else if (length(CP.a)<=1) {segidx.a=rep(1,G); CP.a=G; length.seg.a=G} #2.2 combine the very short segments with neighbor segments (slope effect) -# if(length(CP.b)>1) { length.seg.b = c(CP.b[1]-1,diff(c(CP.b,G+1))) segidx.b = rep(1:(length(CP.b)+1), times=length.seg.b) while(min(length.seg.b)<= segcut) { idx = order(length.seg.b)[1] # print(idx) ind = which(segidx.b==idx) dd = abs(bhat[ind][1] - c(median(bhat[which(segidx.b==idx-1)]), median(bhat[which(segidx.b==idx+1)]))) #print(dd) # combine with the cloest neighboring segment if(order(dd)[1]==1) segidx.b[which(segidx.b>=idx)]=segidx.b[which(segidx.b>=idx)]-1 else segidx.b[which(segidx.b>idx)]=segidx.b[which(segidx.b>idx)]-1 length.seg.b = as.vector(table(segidx.b)) #print(length.seg.b) } CP.b = pmin(cumsum(length.seg.b)+1, G)} if(length(CP.b)<=1) {segidx.b=rep(1,G); CP.b=G; length.seg.b=G} CP.a = c(1, CP.a) CP.b = c(1, CP.b) ####### #################### Return the segment-wise estimates ####### grp = c(rep(0,n0), rep(1,n1)) Z.csr = as.matrix.csr(model.matrix(~as.factor(grp))) nseg.a = length(CP.a)-1 length.seg.a = diff(CP.a); length.seg.a[nseg.a] = length.seg.a[nseg.a]+1 nseg.b = length(CP.b)-1 length.seg.b = diff(CP.b); length.seg.b[nseg.b] = length.seg.b[nseg.b]+1 if(nseg.a>0) segidx.a = rep(1:nseg.a, times=length.seg.a) if(nseg.b>0) segidx.b = rep(1:nseg.b, times=length.seg.b) if(length(unique(segidx.a))>1) Z1 = as.matrix.csr(model.matrix(~as.factor(segidx.a)-1)) #else if(length(unique(segidx.a))<=1) Z1 = rep(1, G) if(length(unique(segidx.b))>1) Z2 = as.matrix.csr(model.matrix(~as.factor(segidx.b)-1)) #else if(length(unique(segidx.b))<=1) Z2 = rep(1,G) X.csr = cbind(Z.csr[,1] %x% Z1, Z.csr[,2] %x% Z2) # The estimated segment-wise coefficients from the (final) reduced model b0 = rq.fit.sfn(X.csr, y, tau=tau)$coef ahat = b0[1:nseg.a] bhat = b0[-(1:nseg.a)] ####### ################ Use QRS to test the significance of each segment ####### # segment for the second group tmp = paste(segidx.a, ".", segidx.b, sep="") tmp = table(tmp) tmp = tmp[order(as.numeric(names(tmp)))] segidx.c = rep(1:length(tmp), times=tmp) nseg.a = max(segidx.a) nseg.b = max(segidx.b) nseg.c = max(segidx.c) #### TEST IF THE EFFECTs OF A SEGMENT are SIGNIFICANT OR NOT (Use rank score test for each segment, for #### intercept and slope separately (modified on June 11, 2009, 2pm) # for intercept effects pval.a = NULL for(k in 1:nseg.a) { idx = which(segidx.a==k) tmp.dat = expdat[, idx] tmp.y = as.vector(t(tmp.dat)) # subject 1 (clones), subject 2, .... g = length(idx) subject = rep(1:n, each=g) tmp.x = c(rep(0, n0*g), rep(1, n1*g)) tmp.z = rep(1, n*g) if(test=="QRS.g") tmp.out = QRS.g.2grp(tmp.x, tmp.z, tmp.y, subject, tau=tau)$pval if(test=="QRS") tmp.out = QRS(tmp.x, tmp.z, tmp.y, subject, tau=tau)$QRS1 pval.a = c(pval.a, tmp.out) } # slope effects pval.b = NULL for(k in 1:nseg.b) { idx = which(segidx.b==k) tmp.dat = expdat[, idx] tmp.y = as.vector(t(tmp.dat)) # subject 1 (clones), subject 2, .... g = length(idx) subject = rep(1:n, each=g) tmp.z = c(rep(0, n0*g), rep(1, n1*g)) tmp.x = rep(1, n*g) if(test=="QRS.g") tmp.out = QRS.g.2grp(tmp.x, tmp.z, tmp.y, subject, tau=tau)$pval if(test=="QRS") tmp.out = QRS(tmp.x, tmp.z, tmp.y, subject, tau=tau)$QRS1 pval.b = c(pval.b, tmp.out) } # test the effect of intercept + slope (median of the second group) pval.c = NULL for(k in 1:nseg.c) { idx = which(segidx.c==k) tmp.dat = expdat[, idx] tmp.y = as.vector(t(tmp.dat)) # subject 1 (clones), subject 2, .... g = length(idx) subject = rep(1:n, each=g) tmp.x = c(rep(1, n0*g), rep(0, n1*g)) tmp.z = rep(1, n*g) if(test=="QRS.g") tmp.out = QRS.g.2grp(tmp.x, tmp.z, tmp.y, subject, tau=tau)$pval if(test=="QRS") tmp.out = QRS(tmp.x, tmp.z, tmp.y, subject, tau=tau)$QRS1 pval.c = c(pval.c, tmp.out) } mu2 = rep(ahat, times=table(segidx.a)) + rep(bhat, times=table(segidx.b)) #plot(mu2~loc) sigseg.b = which(pval.b < alpha/max(1,nseg.b)) sigseg.a = which(pval.a < alpha/max(1,nseg.a)) sigseg.c = which(pval.c < alpha/max(1,nseg.c)) return(list(lam=lam, a.est=a.est, b.est=b.est, mu2=mu2, ahat=ahat, bhat=bhat, CP.a=CP.a, CP.b=CP.b, p1.a=p1.a, p1.b=p1.b, segidx.a=segidx.a, segidx.b=segidx.b, segidx.c=segidx.c, pval.a = pval.a, pval.b=pval.b, pval.c=pval.c, sigseg.a=sigseg.a, sigseg.b=sigseg.b, sigseg.c=sigseg.c )) } design.mat.AF.func = function(expdat, y, n0, n1, G, tau, adap.wt="adapt") { # obtain the arguments needed for rq.fit.sfn # including Fidelity part and the R part (Penalty=R*lambda) # y: N*G vector, the first G observations are from array 1 (control), the first n0 arrays are from control # and the last n1 arrays are from treated group # n0: number of arrays for control # n1: number of arrays for treated group # G: total number of clones # tau: quantile level of interest, default is 0.5 n = n0+n1 N = n*G if(length(y)!=N) warning("The length of y doesn't equal n*G") grp = c(rep(0,n0), rep(1,n1)) Z.csr = as.matrix.csr(model.matrix(~as.factor(grp))) X.csr = Z.csr %x% as(G,"matrix.diag.csr") ### penalization nlam = 2*(G-1) R <- (as(2,"matrix.diag.csr") %x% as.matrix.csr(diff(diag(G)))) ## adaptive weights if(adap.wt=="adapt") { b0.sfn = rq.fit.sfn(X.csr, y, tau=tau)$coef wt = 1/ pmax(abs(diff(b0.sfn)), 0.00001) wt = wt[-G] } ## adaptive weights: based on the clone-wise estimates else if(adap.wt=="adapt2") { mu1 = apply(expdat[1:n0,], 2, quantile, tau) mu2 = apply(expdat[-(1:n0),], 2, quantile, tau) beta = mu2-mu1 wt = c(1/pmax(abs(diff(mu1)),0.00001),1/pmax(abs(diff(beta)),0.00001)) } else if(adap.wt=="scaled") { b0.sfn = rq.fit.sfn(X.csr, y, tau=tau)$coef wt = 1/(abs(diff(b0.sfn))+0.000001) wt = wt[-G] # match the scale of mu and beta coefficients std = c(sd(diff(b0.sfn[1:G])),sd(diff(b0.sfn[-(1:G)]))) std = rep(std, each=G-1) wt = wt*std } else wt=adap.wt return(list(Fid=X.csr, R=R, wt=wt)) } est.flasso = function(y, n0, n1, G, lam, tau=0.5, wt, Fidelity, R) { # obtain the shrinkage estimators ahat_j and bhat_j for a known lam value. # The estimation is obtained by using sfn: sparse fn # Input # y: observed N*1 response vector, N=(n0+n1)*G # Fidelity: output from design.mat.csr # R: output from design.mat.csr # n0: number of arrays for control # n1: number of arrays for treated group # G: total number of clones # lam: shrinkage parameter. For simplicity, we choose same lam for location and slope parameters # tau: quantile level of interest, default is 0.5 # wt: the adaptive weights # output: estimated ahat_j and bhat_j, j=1,..,G, after fused adaptive lasso penalization n = n0+n1 N = n*G if(length(y)!=N) warning("The length of y doesn't equal n*G") ### penalization lambda = lam*wt # 2*(G-1) length nlam = length(lambda) Penalty <- R* lambda D <- rbind(Fidelity, Penalty) y2 = c(y, rep(0, nlam)) rhs = (1-tau) * (t(Fidelity)%*%rep(1,N)) + 0.5* (t(Penalty)%*%rep(1,nlam)) coeff <- rq.fit.sfn(D, y2, rhs=rhs, tau=tau)$coef return(coeff) } QRS.g.2grp = function(x, z, y, subject, tau=0.5) { # doesn't specify any correlation structure, for now only works for the balanced design, and dim(z)=1 # Arguments: # x: the covariate for the nuisance parameters # z: the covariate for the parameter(s) of interest # y: the response variable # subject: indicates the ID or number of arrays/subjects # tau: the quantile level # Value # pval : p-value of the quantile rank score test # sn : the quantile rank score # Qni, Qn0, Qn1: the variance-covariance of sn for methods QRSi, QRS0 and QRS1, respectively # Tni, Tn0, Tn1: the test statistic values for methods QRSi, QRS0 and QRS1, respectively # beta: the quantile estimate for z if(is.factor(subject)==TRUE) subject = model.matrix(~-1+subject) if(is.vector(subject)==TRUE) subject = model.matrix(~-1+factor(subject)) q = qr(z)$rank if(q!=1) warning("do not have this option yet") # refer to Judy\research\WangZhu07\codes\functions-simu-v6.R") p = qr(x)$rank n = length(y) nsubj = ncol(subject) nt = sum(subject[,1]) ehat = rq(y~x-1, tau=tau)$resid ranks = 1*(ehat<0)-tau ranks[abs(ehat)<=0.00001] = 0 zstar = qr.resid(qr(x), z) sn = crossprod(zstar, ranks) psi = ranks psi.m = matrix(psi, ncol=nsubj) zstar.m = matrix(zstar, ncol=nsubj) Obj = rbind(psi.m, zstar.m) Qn.g = sum(apply(Obj, 2, function(x) crossprod(x[-(1:nt)],x[(1:nt)])^2)) tval = sn^2/Qn.g pval = 1 -pchisq(tval, q) out = data.frame(sn=sn, tval=tval, pval=pval) return(out) } QRS = function(x, z, y, subject, tau=0.5) { # Quantile Rank Score Test based on gene-specific delta # Arguments: # x: the covariate for the nuisance parameters, and x does not contain the intercept # z: the covariate for the parameter(s) of interest # y: the response variable # subject: indicates the ID or number of arrays/subjects # tau: the quantile level # Value # QRSi : p-value of the quantile rank score test which ignores the dependence entirely, such that delta = tau^2 # QRS0 : p-value of the quantile rank score test, where delta is estimated by using the residuals obtained under H0 # QRS1 : p-value of the quantile rank score test, where delta is estimated by using the residuals obtained under H1 # sn : the quantile rank score # Qni, Qn0, Qn1: the variance-covariance of sn for methods QRSi, QRS0 and QRS1, respectively # Tni, Tn0, Tn1: the test statistic values for methods QRSi, QRS0 and QRS1, respectively # beta: the quantile estimate for z if(is.factor(x)==TRUE) x=model.matrix(~-1+x)[,-1] if(is.factor(z)==TRUE) z=model.matrix(~-1+z)[,-1] if(is.factor(subject)==TRUE) subject = model.matrix(~-1+subject) if(is.vector(subject)==TRUE) subject = model.matrix(~-1+factor(subject)) q = qr(z)$rank x = cbind(1,x) p = qr(x)$rank n = length(y) ###################### modified on July 30, 2008 to test intercept ####################### if(p==1 | sum(z-1)==0) dsol0 = dsolu.br(x, y, tau = tau, interp = FALSE) # when x is only intercept or z is intercept if(p>1 & sum(z-1)!=0) dsol0 = dsolu.br(x, y, tau = tau, interp = TRUE) ###################### modified on July 30, 2008 to test intercept ####################### an0 = dsol0$dsol # I(e>0) bn0 = an0- (1-tau) # tau - I(e<0) #### modified on June 11, 2009 to test intercept, account for too many zero residuals if(sum(z-1)==0){ ehat = rq(y~x[,2]-1, tau=tau)$resid bn0 = 1*(ehat<0)-tau bn0[abs(ehat)<=0.00001] = 0 } ###################### modified on July 30, 2008 to test intercept ####################### if(p==1 | sum(z-1)==0) zstar = lm(z~x[,-1]-1)$resid if(p>1 & sum(z-1)!=0) zstar = lm(z~x-1)$resid ###################### modified on July 30, 2008 to test intercept ####################### sn = 1/sqrt(n)*crossprod(zstar,bn0) P1 = crossprod(zstar) P2 = t(zstar)%*%subject%*%t(subject)%*%zstar - P1 Qni = 1/n*P1*tau*(1-tau) Tni = t(sn)%*%solve(Qni)%*%sn QRSi = 1-pchisq(Tni,q) if(p==1) dsol1 = dsolu.br(cbind(x,z), y, tau = tau, interp = FALSE) if(p>1) dsol1 = dsolu.br(cbind(x,z), y, tau = tau, interp = TRUE) an1 = dsol1$dsol beta = dsol1$coeff[-(1:p)] bn1 = an1 - (1-tau) delta0 = delta.est(an0, subject,p,tau) delta1 = delta.est(an1, subject,p,tau) Qn0 = Qni + 1/n*P2 * as.numeric(-tau^2+delta0) Tn0 = t(sn)%*%solve(Qn0)%*%sn QRS0 = 1-pchisq(Tn0, q) Qn1 = Qni + 1/n*P2 * as.numeric(-tau^2+delta1) Tn1 = t(sn)%*%solve(Qn1)%*%sn QRS1 = 1-pchisq(Tn1, q) out = list( delta0 = delta0, delta1 = delta1, QRSi = QRSi, QRS0 = QRS0, QRS1 = QRS1, beta=beta, Tni = Tni, Tn0 = Tn0, Tn1=Tn1, sn = sn, Qni = Qni, Qn0 = Qn0, Qn1=Qn1, P1=P1, P2=P2,n=n) return(out) } delta.est = function(an, subject, p, tau) { # subject is a matrix of n by nsubj K = apply(subject, 2, sum) # the number of repeats within each subject delta = (t(1-an)%*%subject%*%t(subject)%*%(1-an) - crossprod(1-an))/(sum(apply(as.matrix(K),1,function(x)x*(x-1)))-p) delta = min(max(tau^2, delta), tau) return(delta) } dsolu.br = function (x, y, tau = 0.5, interp = TRUE) { # obtain the dual solution at a single quantile level # the first column of x contains all 1's # if interp=FALSE, then do not include the intercept # this function was modified based on the function "rq.fit.br" in quantreg package tol = .Machine$double.eps^(2/3) eps = tol big = .Machine$double.xmax x = as.matrix(x) if(interp==FALSE) x = x[,-1] x = as.matrix(x) p = ncol(x) n = nrow(x) nsol = 2 ndsol = 2 if (tau < 0 || tau > 1) warning("Stop. tau can only be (0,1)") if(tau>0 & tau<1) { lci1 = FALSE qn = rep(0, p) cutoff = 0 } Z = .Fortran("rqbr", as.integer(n), as.integer(p), as.integer(n + 5), as.integer(p + 3), as.integer(p + 4), as.double(x), as.double(y), as.double(tau), as.double(tol), flag = as.integer(1), coef = double(p), resid = double(n), integer(n), double((n + 5) * (p + 4)), double(n), as.integer(nsol), as.integer(ndsol), sol = double((p + 3) * nsol), dsol = double(n * ndsol), lsol = as.integer(0), h = integer(p * nsol), qn = as.double(qn), cutoff = as.double(cutoff), ci = double(4 * p), tnmat = double(4 * p), as.double(big), as.logical(lci1), PACKAGE = "quantreg") dsol = Z$dsol[1:n] return(list(dsol=dsol, coeff=Z$coef)) } plot.AFL.func = function(obj, expdat, n0, n1, G, CP0, main, frame.plot, lwd1, lwd2, cex.bracket, cex) { y = apply(expdat[-(1:n0),],2,median) - apply(expdat[(1:n0),],2,median) loc = 1:G CP = obj$CP.b nseg = length(CP)-1 lengthseg = as.numeric(table(obj$segidx.b)) est = rep(obj$bhat, times= lengthseg) segidx = rep(1:nseg, times= lengthseg) plot(y~loc, type="p", col="grey32", main=main, ylab="Group difference", xlab="Clone order" ,ylim=c(min(y,est)/0.8, max(y,est)/0.8),frame.plot=frame.plot, xaxt = "n", cex=cex) abline(h=0, col="grey") abline(v=CP0-0.5, col="grey", lty=5) est2 = NULL for(s in 1:max(segidx)) { idx = which(segidx==s) lines(est[idx]~loc[idx], col="red", lwd=lwd1) est2 = c(est2, rep(median(est[idx]), times=length(idx))) } if(length(obj$sigseg.b)>0) { for(s in obj$sigseg.b) { idx = which(segidx==s) lines(est[idx]~loc[idx], col="green", lwd=lwd2) } } # plot the intervals (segments) points(est2[c(1,CP[-length(CP)])]~loc[c(1,CP[-length(CP)])], col="blue", pch="[", cex=cex.bracket) points(est2[c(CP[-length(CP)]-1,G)]~loc[c(CP[-length(CP)]-1,G)], col="blue", pch="]", cex=cex.bracket) xat = (CP0 + c(1, CP0[-length(CP0)]))/2 - 0.5 text(xat, max(y,est)/0.82, 1:(length(CP0))) } plot.CP.func <- function(y, est, loc, CP0, CP, segidx, Main, ylab, xlab,frame.plot, lwd, cex.bracket=1,cex=0.5) { # plot the estimated segments against loc # y: the data points (e.g. clone-wise baseline/treatment effects) # est: the segment-wise estimated median # loc: clone location # CP0: the true change point locations # CP: the estimated change points # segidx: the estimated segment index vector # Main: the title of the plot plot(y~loc, type="p", col="grey32", main=Main, ylim=c(min(y,est)/0.8, max(y,est)/0.8), ylab=ylab, xlab=xlab,frame.plot=frame.plot, xaxt = "n", cex=cex) abline(h=0, col="grey") abline(v=CP0-0.5, col="grey", lty=5) est2 = NULL for(s in 1:max(segidx)) { idx = which(segidx==s) lines(est[idx]~loc[idx], col="red", lwd=lwd) est2 = c(est2, rep(median(est[idx]), times=length(idx))) } # plot the intervals (segments) points(est2[c(1,CP[-length(CP)])]~loc[c(1,CP[-length(CP)])], col="blue", pch="[", cex=cex.bracket) points(est2[c(CP[-length(CP)]-1,G)]~loc[c(CP[-length(CP)]-1,G)], col="blue", pch="]", cex=cex.bracket) } TP.FP.CP.func = function(CP0, CP) { ### TP and FP of change point detection # CP and CP0 both include 1 and G tmp.TP = 0 K = length(CP0) for(k in 2:(K-1)) { tmp = abs(CP - CP0[k]) idx = order(tmp)[1] if(tmp[idx]<=0) tmp.TP = tmp.TP+1 } tmp.FP = length(CP)-2 - tmp.TP return(data.frame(TP=tmp.TP, FP=tmp.FP)) } detect.each.seg.func = function(CP0, CP) { K=length(CP0) nseg = K-1 detect = rep(0, nseg) start0 = CP0[-K] end0 = CP0[-1]-1 end0[length(end0)] = CP0[length(CP0)] seg0 = rbind(start0, end0) start = CP[-length(CP)] end = CP[-1]-1 end[length(end)] = CP[length(CP)] seg = rbind(start, end) detect = NULL idxs = NULL for(k in 1:nseg) { tmp = apply(seg - seg0[,k], 2, function(x) sum(abs(x))) idx = order(tmp)[1] if(max(abs(seg[,idx]-seg0[,k]))<=2) tmp.detect=1 else tmp.detect=0 detect = c(detect, tmp.detect) idxs = c(idxs, idx) } FalseP = ncol(seg) - sum(detect) if(length(CP)==2) FalseP=0 return(c(detect, FalseP)) } ROC.CP.func = function(CP0, bhat, dcuts, G) { # CP0: does not include 1, includes G # nCP = length(CP0)-1 # nCP0 = length(CP0)-1 # nCP = length(CP.b) delta = abs(diff(bhat)) FP = TP = NULL for(dcut in dcuts) { # combine the very short segments with neighbor segments (treatment effect) if(max(delta)>=dcut) { CP.b = which(delta >= dcut)+1 ## within 2 locations are ok tmp.TP = 0 K = length(CP0)-1 # no of change points (not including 1 and G) for(k in 1:K) { tmp = abs(CP.b - CP0[k]) idx = order(tmp)[1] if(tmp[idx]<=2) tmp.TP = tmp.TP+1 } tmp.FP = length(CP.b)- tmp.TP } else tmp.TP=tmp.FP=0 FP = c(FP, tmp.FP) TP = c(TP, tmp.TP) } return(list(TP=TP, FP=FP)) } ROC.CP.pval.func = function(CP0, pval, dcuts, G) { ### calculate TP/FP change points by changing the p-value cutoffs # pval: (G-1)*1 vector # CP0: does not include 1, includes G # nCP0 = length(CP0)-1 # nCP = length(CP.b) FP = TP = NULL for(dcut in dcuts) { # combine the very short segments with neighbor segments (treatment effect) if(min(pval)<=dcut) { CP.b = which(pval <= dcut)+1 ## within 2 locations are ok tmp.TP = 0 K = length(CP0)-1 # no of change points (not including 1 and G) for(k in 1:K) { tmp = abs(CP.b - CP0[k]) idx = order(tmp)[1] if(tmp[idx]==0) tmp.TP = tmp.TP+1 } tmp.FP = length(CP.b)- tmp.TP } else tmp.TP=tmp.FP=0 FP = c(FP, tmp.FP) TP = c(TP, tmp.TP) } return(list(TP=TP, FP=FP)) } ROC.seg.pval.func = function(CP0, pval, dcuts, G) { ### calculate TP/FP of identifying segments by changing the p-value cutoffs # Problem: the TP is not increasing! # pval: (G-1)*1 vector # CP0: includes 1 and G FP = TP = NULL for(dcut in dcuts) { # combine the very short segments with neighbor segments (treatment effect) if(min(pval)<=dcut) { CP.b = which(pval <= dcut)+1 tt = detect.each.seg.func(CP0, CP.b) tmp.TP = sum(tt[-length(tt)]) tmp.FP = tt[length(tt)] } else tmp.TP=tmp.FP=0 FP = c(FP, tmp.FP) TP = c(TP, tmp.TP) } return(list(TP=TP, FP=FP)) }