##-------------------------------------------------- ## updated on Sep 22, 2006 ##-------------------------------------------------- ## changes made (Sep 22, 2006): ## . Fix "getcut.fun" in the file of "main.functions.forRDscoreTest.public.r" ## add "base =2 " in calculating info ##-------------------------------------------------- ## changes made (Aug 14, 2006): ## . No changes in the program itself ## . Remove "library(haplo.score)" ## . Append and refine some instructions ##-------------------------------------------------- data.mat = read.table("genodata.mat", header=T) ## read in the example data file digit <- 1 ## no. of digits used in coding alleles Y01.vec<- data.mat[,"Y01"] ## trait values in binary variable Yqt.vec<- data.mat[,"Yqt"] ## trait values in quantitative variable Geno <- data.mat[,-c(1:2)] ## genotype data in Schaid et al. 2002 format; see "help(haplo.score)" for more information ngeno <- nrow(data.mat) ## no. of individuals nhap <- 2*ngeno X.adj <- NA ## if no covariates ## X.adj <- cbind(sex, gender, BMI) ## the ngeno-by-p matrix for covariates. In this case p=3 (sex, gender and BMI); ## these covariates are not given in the example dataset) ###-------------------------------------------------------- ### Full-Dimensional analysis (Schaid et al 2002) ###------------------------------------------------------- library(haplo.stats) ## keep many rare haplotypes in the analysis and use simulated p-values Y01.hap.reg.FD <- haplo.score(y=Y01.vec, geno=Geno, trait.type="binomial", miss.val=NA, locus.label=NA, x.adj=X.adj, skip.haplo =1e-6, simulate = TRUE) Yqt.hap.reg.FD <- haplo.score(y=Yqt.vec, geno=Geno, trait.type="gaussian", miss.val=NA, locus.label=NA, x.adj=X.adj, skip.haplo =1e-6, simulate = TRUE) ## discard rare haplotypes in the analysis and use chi-squared p-values haplo.score(y=Y01.vec, geno=Geno, trait.type="binomial", miss.val=NA, locus.label=NA, x.adj=X.adj, skip.haplo =0.005) haplo.score(y=Yqt.vec, geno=Geno, trait.type="gaussian", miss.val=NA, locus.label=NA, x.adj=X.adj, skip.haplo =0.005) ###------------------------------------ ### Reduced-Dimensional analysis ###------------------------------------ source("main.functions.forRDscoreTest.public.r") ## get FUNCTIONS needed for Hap-RD analysis Y01.hap.reg.RD <- haplo.score.RD.unphased.fun(y=Y01.vec,geno=Geno,trait.type = "binomial", miss.val=NA, locus.label=NA, x.adj= X.adj, skip.haplo = 1e-6) Yqt.hap.reg.RD <- haplo.score.RD.unphased.fun(y=Yqt.vec,geno=Geno,trait.type = "gaussian", miss.val=NA, locus.label=NA, x.adj= X.adj, skip.haplo = 1e-6) ## The plot shows the haplotype frequencies estimated by the EM ## algorithm, and the haplotype categories are sorted from high to ## low. Those haplotypes with frequencies EQUAL and MORE than the ## value indicated by the horizontial line are reserved as the ## "core" haplotypes, and those haplotypes with frequencies below ## the line will be re-distributed into their cloest ancestral ## haplotype categories. ## ## In this example, the smallest 6 haplotypes will be clustered ## into the 7 "core" haplotype categories. ###--------------------------------------------------------------------- ### some examples on manipulating the output to see the testing results: ###--------------------------------------------------------------------- ## (1) estimated hap frequencies for the CLUSTERED (i.e., the re-distributed) haplotypes: names( Y01.hap.reg.RD$score.haplo ) = Y01.hap.reg.RD$haplotype; Y01.hap.reg.RD$score.haplo ## (2) global test Y01.hap.reg.RD$score.global.p < 0.05 ## (3) haplotype specific test hap.RD = colnames(Y01.hap.reg.RD$hap.prob.RD) reg.coef <-round(cbind(c(Y01.hap.reg.RD$hap.prob.RD), Y01.hap.reg.RD$score.haplo, Y01.hap.reg.RD$score.haplo.p), 5); rownames(reg.coef) = hap.RD; colnames(reg.coef) = c("prob", "score", "pvalue.chi") reg.coef ################################################################################################## ################################################################################################## ################################################################################################## ## Below is the results from the above analysis: ################################################################################################## ################################################################################################## ################################################################################################## Y01.hap.reg.FD ## -------------------------------------------------------------------------------- ## Global Score Statistics ## -------------------------------------------------------------------------------- ## ## global-stat = 22.63057, df = 11, p-val = 0.01992 ## ## -------------------------------------------------------------------------------- ## Global Simulation p-value Results ## -------------------------------------------------------------------------------- ## ## Global sim. p-val = 0.00783 ## Max-Stat sim. p-val = 0.00212 ## Number of Simulations, Global: 7539 , Max-Stat: 7539 ## ## -------------------------------------------------------------------------------- ## Haplotype-specific Scores ## -------------------------------------------------------------------------------- ## ## loc-1 loc-2 loc-3 loc-4 loc-5 loc-6 Hap-Freq Hap-Score p-val sim p-val ## [1,] 1 0 0 1 0 0 0.09639 -2.66294 0.00775 0.00783 ## [2,] 0 0 0 0 1 0 0.29334 -1.67615 0.09371 0.09338 ## [3,] 1 0 0 1 1 0 0.01347 -0.92335 0.35582 0.32166 ## [4,] 1 0 0 1 1 1 0.00333 -0.66673 0.50494 0.23836 ## [5,] 0 0 0 1 0 0 0.00563 -0.4401 0.65986 0.61109 ## [6,] 0 0 0 0 1 1 0.09883 0.01027 0.9918 0.98621 ## [7,] 1 0 0 0 0 0 0.07827 0.02586 0.97937 0.97679 ## [8,] 1 0 0 0 1 0 0.18602 0.35561 0.72214 0.72025 ## [9,] 0 0 0 0 0 0 0.04603 0.6457 0.51847 0.51811 ## [10,] 0 0 0 1 0 1 0.00118 0.76637 0.44345 1 ## [11,] 1 1 1 0 1 1 0.00415 1.11626 0.26431 0.37697 ## [12,] 1 0 0 0 1 1 1e-05 1.17793 0.23882 0.17124 ## [13,] 1 1 1 0 1 0 0.17335 3.63409 0.00028 0.00027 Yqt.hap.reg.FD ## -------------------------------------------------------------------------------- ## Global Score Statistics ## -------------------------------------------------------------------------------- ## ## global-stat = 15.40115, df = 12, p-val = 0.22023 ## ## -------------------------------------------------------------------------------- ## Global Simulation p-value Results ## -------------------------------------------------------------------------------- ## ## Global sim. p-val = 0.14313 ## Max-Stat sim. p-val = 0.126 ## Number of Simulations, Global: 2131 , Max-Stat: 1000 ## ## -------------------------------------------------------------------------------- ## Haplotype-specific Scores ## -------------------------------------------------------------------------------- ## ## loc-1 loc-2 loc-3 loc-4 loc-5 loc-6 Hap-Freq Hap-Score p-val sim p-val ## [1,] 1 0 0 1 0 0 0.09639 -1.88683 0.05918 0.066 ## [2,] 1 0 0 1 1 0 0.01347 -1.6479 0.09937 0.104 ## [3,] 0 0 0 0 1 0 0.29334 -1.3206 0.18664 0.156 ## [4,] 0 0 0 1 0 0 0.00563 -1.23731 0.21597 0.168 ## [5,] 1 0 0 1 1 1 0.00333 -0.75554 0.44992 0.364 ## [6,] 0 0 0 1 0 1 0.00118 -0.21591 0.82906 0.665 ## [7,] 1 0 0 0 1 0 0.18602 -0.08026 0.93603 0.925 ## [8,] 1 0 0 0 1 1 1e-05 0.036 0.97128 0.431 ## [9,] 0 0 0 0 1 1 0.09883 0.06742 0.94625 0.961 ## [10,] 1 1 1 0 1 1 0.00415 0.33066 0.7409 0.633 ## [11,] 0 0 0 0 0 0 0.04603 0.95484 0.33966 0.335 ## [12,] 1 0 0 0 0 0 0.07827 1.13612 0.25591 0.28 ## [13,] 1 1 1 0 1 0 0.17335 2.65978 0.00782 0.008 haplo.score(y=Y01.vec, geno=Geno, trait.type="binomial", miss.val=NA, locus.label=NA, x.adj=X.adj, skip.haplo =0.005) ## -------------------------------------------------------------------------------- ## Global Score Statistics ## -------------------------------------------------------------------------------- ## ## global-stat = 20.92524, df = 9, p-val = 0.01299 ## ## -------------------------------------------------------------------------------- ## Haplotype-specific Scores ## -------------------------------------------------------------------------------- ## ## loc-1 loc-2 loc-3 loc-4 loc-5 loc-6 Hap-Freq Hap-Score p-val ## [1,] 1 0 0 1 0 0 0.09639 -2.66294 0.00775 ## [2,] 0 0 0 0 1 0 0.29334 -1.67615 0.09371 ## [3,] 1 0 0 1 1 0 0.01347 -0.92335 0.35582 ## [4,] 0 0 0 1 0 0 0.00563 -0.4401 0.65986 ## [5,] 0 0 0 0 1 1 0.09883 0.01027 0.9918 ## [6,] 1 0 0 0 0 0 0.07827 0.02586 0.97937 ## [7,] 1 0 0 0 1 0 0.18602 0.35561 0.72213 ## [8,] 0 0 0 0 0 0 0.04603 0.6457 0.51847 ## [9,] 1 1 1 0 1 0 0.17335 3.63409 0.00028 haplo.score(y=Yqt.vec, geno=Geno, trait.type="gaussian", miss.val=NA, locus.label=NA, x.adj=X.adj, skip.haplo =0.005) ## -------------------------------------------------------------------------------- ## Global Score Statistics ## -------------------------------------------------------------------------------- ## ## global-stat = 15.27781, df = 9, p-val = 0.08358 ## ## ## ## -------------------------------------------------------------------------------- ## Haplotype-specific Scores ## -------------------------------------------------------------------------------- ## ## loc-1 loc-2 loc-3 loc-4 loc-5 loc-6 Hap-Freq Hap-Score p-val ## [1,] 1 0 0 1 0 0 0.09639 -1.88683 0.05918 ## [2,] 1 0 0 1 1 0 0.01347 -1.6479 0.09937 ## [3,] 0 0 0 0 1 0 0.29334 -1.3206 0.18664 ## [4,] 0 0 0 1 0 0 0.00563 -1.23731 0.21597 ## [5,] 1 0 0 0 1 0 0.18602 -0.08026 0.93603 ## [6,] 0 0 0 0 1 1 0.09883 0.06742 0.94625 ## [7,] 0 0 0 0 0 0 0.04603 0.95484 0.33966 ## [8,] 1 0 0 0 0 0 0.07827 1.13612 0.25591 ## [9,] 1 1 1 0 1 0 0.17335 2.65978 0.00782 ###------------------------------------ ### Reduced-Dimensional analysis ###------------------------------------ Y01.hap.reg.RD ## $score.global ## [,1] ## [1,] 20.49790 ## ## $df ## [1] 6 ## ## $score.global.p ## [,1] ## [1,] 0.002257099 ## ## $score.haplo ## 000000 000010 000011 100000 100010 100100 ## 0.61223351 -1.67614824 0.01046958 0.02585492 0.15073275 -2.74314033 ## 111010 ## 3.75162866 ## ## $score.haplo.p ## [1] 0.5403832726 0.0937091596 0.9916466400 0.9793730570 0.8801865356 ## [6] 0.0060854673 0.0001756896 ## ## $haplotype ## [1] "000000" "000010" "000011" "100000" "100010" "100100" "111010" ## ## $matB ## 000000 000010 000011 100000 100010 100100 111010 ## 000000 1.0000000 0 0.0000000000 0 0.0000000 0.0000000 0 ## 000010 0.0000000 1 0.0000000000 0 0.0000000 0.0000000 0 ## 000011 0.0000000 0 1.0000000000 0 0.0000000 0.0000000 0 ## 100000 0.0000000 0 0.0000000000 1 0.0000000 0.0000000 0 ## 100010 0.0000000 0 0.0000000000 0 1.0000000 0.0000000 0 ## 100100 0.0000000 0 0.0000000000 0 0.0000000 1.0000000 0 ## 111010 0.0000000 0 0.0000000000 0 0.0000000 0.0000000 1 ## 000100 0.3231771 0 0.0000000000 0 0.0000000 0.6768229 0 ## 100011 0.0000000 0 0.3469436353 0 0.6530564 0.0000000 0 ## 100110 0.0000000 0 0.0000000000 0 0.6586915 0.3413085 0 ## 111011 0.0000000 0 0.0000000000 0 0.0000000 0.0000000 1 ## 000101 0.3231771 0 0.0000000000 0 0.0000000 0.6768229 0 ## 100111 0.0000000 0 0.0003309574 0 0.6586861 0.3409829 0 ## ## $hap.prob.RD ## ## 000000 000010 000011 100000 100010 100100 111010 ## [1,] 0.04822624 0.2933377 0.09883231 0.07827456 0.1970972 0.1067321 0.1775 ## ## $hap.prob.FD ## 000000 000010 000011 100000 100010 100100 ## 4.602532e-02 2.933377e-01 9.882675e-02 7.827456e-02 1.860228e-01 9.638986e-02 ## 111010 000100 100011 100110 111011 000101 ## 1.733486e-01 5.627773e-03 1.286488e-05 1.347343e-02 4.151449e-03 1.182484e-03 ## 100111 ## 3.326453e-03 ## ## $locus.label ## [1] "loc-1" "loc-2" "loc-3" "loc-4" "loc-5" "loc-6" Yqt.hap.reg.RD ## $score.global ## [,1] ## [1,] 12.31810 ## ## $df ## [1] 6 ## ## $score.global.p ## [,1] ## [1,] 0.05523713 ## ## $score.haplo ## [1] 0.69980548 -1.43644053 0.06162877 1.26782710 -0.44351106 -2.28948274 ## [7] 2.31846530 ## ## $score.haplo.p ## [1] 0.48404879 0.15087703 0.95085846 0.20485970 0.65739612 0.02205132 0.02042405 ## ## $haplotype ## [1] "000000" "000010" "000011" "100000" "100010" "100100" "111010" ## ## $matB ## 000000 000010 000011 100000 100010 100100 111010 ## 000000 1.0000000 0 0.0000000000 0 0.0000000 0.0000000 0 ## 000010 0.0000000 1 0.0000000000 0 0.0000000 0.0000000 0 ## 000011 0.0000000 0 1.0000000000 0 0.0000000 0.0000000 0 ## 100000 0.0000000 0 0.0000000000 1 0.0000000 0.0000000 0 ## 100010 0.0000000 0 0.0000000000 0 1.0000000 0.0000000 0 ## 100100 0.0000000 0 0.0000000000 0 0.0000000 1.0000000 0 ## 111010 0.0000000 0 0.0000000000 0 0.0000000 0.0000000 1 ## 000100 0.3231771 0 0.0000000000 0 0.0000000 0.6768229 0 ## 100011 0.0000000 0 0.3469436517 0 0.6530563 0.0000000 0 ## 100110 0.0000000 0 0.0000000000 0 0.6586915 0.3413085 0 ## 111011 0.0000000 0 0.0000000000 0 0.0000000 0.0000000 1 ## 000101 0.3231771 0 0.0000000000 0 0.0000000 0.6768229 0 ## 100111 0.0000000 0 0.0003313412 0 0.6586861 0.3409826 0 ## ## $hap.prob.RD ## ## 000000 000010 000011 100000 100010 100100 111010 ## [1,] 0.04822624 0.2933377 0.09883232 0.07827456 0.1970972 0.1067321 0.1775 ## ## $hap.prob.FD ## 000000 000010 000011 100000 100010 100100 ## 4.602533e-02 2.933377e-01 9.882675e-02 7.827456e-02 1.860228e-01 9.638986e-02 ## 111010 000100 100011 100110 111011 000101 ## 1.733486e-01 5.627773e-03 1.287982e-05 1.347343e-02 4.151435e-03 1.182484e-03 ## 100111 ## 3.326453e-03 ## ## $locus.label ## [1] "loc-1" "loc-2" "loc-3" "loc-4" "loc-5" "loc-6" ##