#---------------------------------------------------------- # A. Purpose of the function: #---------------------------------------------------------- # Fits the model for each value on the grid of lambda values # A finer grid can be chosen for more accuracy # Solution is chosen via BIC # # The data should be called dat, with a response Y and a Geno object. # # The family can be gaussian, binomial, or poisson # # The details of the control options, etc are the same as in Haplo.glm from # the Haplo.stats package. The fit is a penalized version, # but the other options remain the same. # #---------------------------------------------------------- # B. Specific steps: #---------------------------------------------------------- # (1)* Save "GLM_Haplos.R" and this file ("example_code.R") # in the same directory. # * Save the example data files ("myy.bt", "myy.qt", "mygeno", and # "myx.adj" under a subdirectiory "Data". # These example files can be obtained from: # http://www4.stat.ncsu.edu/~jytzeng/Software/PLHap/Data # or there are links to the files on the PLHap page at # http://www4.stat.ncsu.edu/~jytzeng/Software/software_plhap.php # * Then run the lines below. #---------------------------------------------------------- # This requires the haplo.stats package. source("GLM_Haplos.R") #---------------------------------------------------------- # (2) Read in example data file haplo.stat format # code "case" as 1, "control" as 0 #---------------------------------------------------------- geno = read.table("Data/mygeno", header=T) Y.bt = unlist(read.table("Data/myy.bt", header=T)) Y.qt = unlist(read.table("Data/myy.qt", header=T)) X.adj = read.table("Data/myx.adj"); X.adj = X.adj[1:100,] ## contain 2 explanatory variables # the following is asked by "haplo.glm"; please read the help menu for haplo.glm for more details Geno <- setupGeno(geno, miss.val=NA) dat.bt <- data.frame(Geno=Geno, Y=Y.bt) dat.qt <- data.frame(Geno=Geno, Y=Y.qt) #---------------------------------------------------------- # (2') If input file is PLINK format (here we do not have such example file) # then Y = the 6th column of.ped file -1 # (as haplo.stats or haplo.glm code "case" as 1, "control" as 0) # Geno = ped file removing columns 1 to 6 # (however, below I only take the first 20 columns for demo) #---------------------------------------------------------- #ped = read.table("Data/plink.ped") #geno= ped[,-c(1:6)] #Y = ped[,6]-1 #X.adj = read.table("Data/myx.adj"); X.adj = X.adj[1:100,] ## contain 2 explanatory variables ## the following is asked by "haplo.glm"; please read the help menu for haplo.glm for more details #Geno <- setupGeno(geno, miss.val=NA) #dat <- data.frame(Geno=Geno, Y=Y) #---------------------------------------------------------- # (3) Run the following code: #---------------------------------------------------------- lambdas = seq(0.1,5,0.1) # -- Binary Traits -- btfit.pen.reg <- haplo.pen.glm.1(Y ~ Geno, family = binomial, lambdas = lambdas, allele.lev=attributes(Geno)$unique.alleles, data=dat.bt, control = haplo.glm.control(keep.rare.haplo=FALSE)) # -- Continuous Traits -- qtfit.pen.reg <- haplo.pen.glm.1(Y ~ Geno, family = gaussian, lambdas = lambdas, allele.lev=attributes(Geno)$unique.alleles, data=dat.qt, control = haplo.glm.control(keep.rare.haplo=FALSE)) ##----------------------------------------------------- ## below illustate using the binary trait analysis "btfit.pen.reg" ##----------------------------------------------------- btfit.pen.reg # full list of the unique haplotypes # (each row corresponds to a unique haplotype consistent with the observed data) btfit.pen.reg$haplo.unique # estimated frequencies corresponding to the unique haplotypes btfit.pen.reg$haplo.freq # haplotype set as the baseline for the regression # (the number corresponds to the row number from the list of unique haplotypes) btfit.pen.reg$haplo.base # coefficient estimates # (the numbers correspond to the rows in the list of unique haplotypes) btfit.pen.reg$coef #------------------------------------------------- # Result from the example #------------------------------------------------- # * haplotype has the same effect when they have exact same coefficiet # (as this is a penalized method instead of hypo testing # * the output follows the format of "haplo.glm". Can refer to the help # menu in R for haplo.glm for more detailed explanation of output #------------------------------------------------- ##=================== btfit.pen.reg ##=================== ## Call: ## haplo.glm(formula = formula, family = family, ## data = data, na.action = na.action, miss.val = miss.val, ## locus.label = locus.label, allele.lev = allele.lev, ## control = control, method = method, model = model, ## x = TRUE, y = TRUE, contrasts = contrasts) ## ## Coefficients: ## coef se t.stat pval ## (Intercept) -0.0664 0.161 -0.412 0.681 ## Geno.2 0.0000 0.165 0.000 1.000 ## Geno.3 0.2532 0.218 1.164 0.245 ## ## Haplotypes: ## loc.1 loc.2 loc.3 hap.freq ## Geno.2 0 1 0 0.312 ## Geno.3 0 1 1 0.132 ## haplo.base 0 0 0 0.550 ## full list of the unique haplotypes ## (each row corresponds to a unique haplotype consistent with the observed data) btfit.pen.reg$haplo.unique ## loc-1 loc-2 loc-3 ## 1 "0" "0" "0" ## 2 "0" "1" "0" ## 3 "0" "1" "1" ## 4 "1" "0" "1" ## 5 "1" "1" "1" ## estimated frequencies corresponding to the unique haplotypes btfit.pen.reg$haplo.freq ## [1] 0.5500 0.3122 0.1316 0.0000 0.0062 ## haplotype set as the baseline for the regression ## (the number corresponds to the row number from the list of unique haplotypes) # #btfit.pen.reg$haplo.base ## [1] 1 ## coefficient estimates ## (the numbers correspond to the rows in the list of unique haplotypes) btfit.pen.reg$coef # (Intercept) Geno.2 Geno.3 # # -0.066442 0.000000 0.253236 ##============================== qtfit.pen.reg ##============================== ## Call: ## haplo.glm(formula = formula, family = family, ## data = data, na.action = na.action, miss.val = miss.val, ## locus.label = locus.label, allele.lev = allele.lev, ## control = control, method = method, model = model, ## x = TRUE, y = TRUE, contrasts = contrasts) ## ## Coefficients: ## coef se t.stat pval ## (Intercept) 101 1.60 63.3 0 ## Geno.2 0 1.63 0.0 1 ## Geno.3 0 2.13 0.0 1 ## ## Haplotypes: ## loc.1 loc.2 loc.3 hap.freq ## Geno.2 0 1 0 0.312 ## Geno.3 0 1 1 0.132 ## haplo.base 0 0 0 0.550 qtfit.pen.reg$haplo.unique ## loc-1 loc-2 loc-3 ## 1 "0" "0" "0" ## 2 "0" "1" "0" ## 3 "0" "1" "1" ## 4 "1" "1" "1" # estimated frequencies corresponding to the unique haplotypes qtfit.pen.reg$haplo.freq ## [1] 0.5500 0.3121 0.1316 0.0062 # haplotype set as the baseline for the regression # (the number corresponds to the row number from the list of unique haplotypes) qtfit.pen.reg$haplo.base ## [1] 1 # coefficient estimates # (the numbers correspond to the rows in the list of unique haplotypes) ## ## qtfit.pen.reg$coef ## (Intercept) Geno.2 Geno.3 ## 101.188 0.000 0.000