library(haplo.stats) haplo.pen.glm.1 = function (formula = formula(data), family = gaussian, data = sys.parent(), na.action = "na.geno.keep", lambdas = seq(0.1,5,0.1), eps = 10^(-6), miss.val = c(0, NA), locus.label = NA, allele.lev = NULL, control = haplo.glm.control(), method = "glm.fit", model = FALSE, x = FALSE, y = TRUE, contrasts = NULL, ...) { require(haplo.stats) require(MASS) lambdas = sort(lambdas) if (min(lambdas) <=0) {return(cat("ERROR: All values for lambda must be > 0. \n"))} if (eps <=0) {return(cat("ERROR: Eps must be > 0. \n"))} init.fit = haplo.glm(formula = formula, family = family, data = data, na.action = na.action, miss.val = miss.val, locus.label = locus.label, allele.lev = allele.lev, control = control, method = method, model = model, x = TRUE, y = TRUE, contrasts = contrasts) init.est = c(init.fit$coef[1],init.fit$coef[init.fit$haplo.names]) p = length(init.est)-1 cov.est = init.fit$var.mat inv.cov.est = ginv(cov.est)[1:(p+1),1:(p+1)] X.star = chol(inv.cov.est) Y.star = X.star%*%init.est X = init.fit$x y = init.fit$y w = init.fit$haplo.post.info$post haplo.names = init.fit$haplo.names y1 = y X1 = X y = sqrt(w)*y X = diag(sqrt(w))%*%X haplo.x.mat = X[,haplo.names] qp = p*(p-1)/2 if (qp==0) temp.dm = matrix(0,nrow=1,ncol=1) if (qp>0) { v1 = rep(0,qp) c1 = qp c2 = 0 for(w1 in 1:(p-1)) { c1 = c1-w1 c2 = c2+(w1-1) v1 = cbind(c(rep(0,c1),rep(1,w1),rep(0,c2)),v1) } v2 = matrix(0,qp,p) c1 = 1 c2 = p-1 for (w1 in 1:(p-1)) { v2[c1:c2,(w1+1):p] = diag(p-w1) c1 = c2+1 c2 = c2+p-w1-1 } temp.dm = v1 - v2 } dm = temp.dm dm = dm[apply(abs(dm),1,sum)>0,] if (length(dm) == 0) {dm = matrix(0,nrow=1,ncol=total.p)} if (length(dm) == p) {dm = matrix(dm,nrow=1)} abs.wt.beta = abs(init.est[-1]) abs.wt.diff = abs(dm %*% init.est[-1]) a.sc.vec = as.vector(1 / c(abs.wt.beta, abs.wt.diff)) full.mat = NULL beta.t = init.est fit = init.fit gdat1 = fit$haplo.post.info$hap1 gdat2 = fit$haplo.post.info$hap2 prior.coef = ifelse(gdat1 != gdat2, 2, 1) subj.indx = as.numeric(factor(fit$haplo.post.info$indx)) len.subj.indx = length(subj.indx) n.subj = length(unique(subj.indx)) prior.tot = rep(0, n.subj) for(lambda in lambdas) { beta.eps = rep(1,length(abs.wt.beta)) while(max(beta.eps)>eps) { beta.tilde = beta.t[-1] abs.tilde = abs(beta.tilde) abs.diff = as.vector(abs(dm %*% beta.tilde)) if (length(abs.diff)<2) {quad.mat.2 = a.sc.vec[-(1:p)]/(abs.diff+eps)} else {quad.mat.2 = diag(a.sc.vec[-(1:p)]/(abs.diff+eps))} if (length(abs.tilde)<2) {quad.mat.1 = a.sc.vec[1:p]/(abs.tilde+eps)} else {quad.mat.1 = diag(a.sc.vec[1:p]/(abs.tilde+eps))} if (length(abs.diff)==1) {quad.mat.2 = 0} penalty.mat = lambda*(quad.mat.1 + t(dm) %*% quad.mat.2 %*% dm) Sigma.Mat = t(X.star) %*% X.star + rbind(0,cbind(0,penalty.mat)) beta.t = solve(Sigma.Mat) %*% t(X.star) %*% Y.star beta.eps = abs(beta.tilde-beta.t[-1])/(abs(beta.tilde)+eps) } df = sum((unique(abs(round(beta.t,6)))!=0)) fit$fitted.values = X1%*%beta.t switch(as.character(fit$family[1]), Binomial = , binomial = { fit$fitted.values = 1/(1+exp(-fit$fitted.values)) }, Poisson = , poisson = { fit$linear.predictors = fit$fitted.values }) dfit = dglm.fit(fit) prior = prior.coef*fit$haplo.freq[gdat1]*fit$haplo.freq[gdat2]*dfit tmp.sum = .C("groupsum", x = as.double(prior), indx = as.integer(subj.indx), n = as.integer(len.subj.indx), grouptot = as.double(prior.tot), ngroup = as.integer(n.subj), PACKAGE = "haplo.stats") pr.pheno = tmp.sum$grouptot lnlike.new = sum(log(pr.pheno)) bic = -2*lnlike.new+log(sum(w))*df full.mat = rbind(full.mat,c(beta.t,bic,df)) } BIC.loc = which.min(full.mat[,(p+2)]) if (lambdas[BIC.loc] == max(lambdas)) {cat("Note: The chosen lambda is the largest of the input grid. \n You should try larger values of lambda \n to continue the search for the minimum BIC. \n")} if (lambdas[BIC.loc] == min(lambdas)) {cat("Note: The chosen lambda is the smallest of the input grid. \n You should try smaller values of lambda \n to continue the search for the minimum BIC. \n")} BIC.coefs = round(full.mat[BIC.loc, 1:(p+1)],6) names(BIC.coefs) = c("(Intercept)",colnames(haplo.x.mat)) fit$haplo.freq = round(fit$haplo.freq,4) fit$coefficients = BIC.coefs fit$lambda.grid = lambdas fit$BIC = full.mat[, p+2] fit$df = full.mat[, p+3] fit$lambda = lambdas[BIC.loc] fit$init.coefs = init.fit$coef fit$CoefMat = full.mat[, 1:(p+1)] ## CSCS START 3 Feb 2010 ## CSCS Add a new class attribute to the fit object to get a new print function (below). attr(fit, 'class') = c("haplo.pen.glm", class(fit)) ## CSCS END return(fit) } ## CSCS START 3 Feb 2010 ## This function was copied from print.haplo.glm: Revision 1.10 2008/04/04 16:10:18 sinnwell ## We do not want to print the se, t.stat and p-val columns. print.haplo.pen.glm <- function(x, print.all.haplo=FALSE, show.missing=FALSE, digits = max(options()$digits - 4, 3), ...) { if(exists("is.R") && is.function(is.R) && is.R()) { x$call <- deparse(x$call, width.cutoff=40) cat("\n Call: ", x$call, sep="\n") } else { cat("\n Call: \n") dput(x$call) } haplo.df<- function(x){ z <- x$haplo.common df <- as.matrix(x$haplo.unique[z,,drop=FALSE]) y <- x$haplo.freq[z] if(x$haplo.rare.term){ df <- rbind(df, rep("*",ncol(df))) y <- c(y, sum(x$haplo.freq[x$haplo.rare])) } # use dimnames to change row names do not convert from matrix to df dimnames(df)[[1]] <- x$haplo.names df <- rbind(df,x$haplo.unique[x$haplo.base,]) dimnames(df)[[1]][nrow(df)] <- "haplo.base" y <- c(y,x$haplo.freq[x$haplo.base]) data.frame(df,hap.freq=y) } ncoef <- length(x$coef) coef <- x$coef cat("\nCoefficients:\n") print(coef, digits=digits) cat("\nHaplotypes:\n") hap.df <- haplo.df(x) print(hap.df, digits=digits) if(print.all.haplo){ haplo.type <- rep(NA,length(x$haplo.freq)) haplo.type[x$haplo.common] <- "C" haplo.type[x$haplo.rare] <- "*" haplo.type[x$haplo.base] <- "B" df <- data.frame(x$haplo.unique, hap.freq = round(x$haplo.freq, digits), hap.type=haplo.type) cat("\n") printBanner("All Haplotypes") cat("B = base haplotype\n") cat("C = common haplotype\n") cat("* = rare haplotype\n\n") print(df) } if(show.missing) {# & (nrow(miss.tbl) > 1)) { cat("\nSubjects removed by NAs in y or x, or all NA in geno\n") miss.tbl <- apply(1*x$missing, 2, sum) print(miss.tbl) } } ## CSCS END