allsub<-function(x,y,ret=F,print=T,m1=0){ # gets best subset of each size using leaps package # x is matrix or data frame of independent variables # y is vector of responses # ret=T returns printed stuff for plotting # print=T prints output to screen # m1=size of largest model to return, default=# columns in x require(leaps) ok<-complete.cases(x,y) x<-x[ok,] # get rid of na's y<-y[ok] # since regsubsets can't handle na's m<-ncol(x) # number of ind. var. n<-nrow(x) # sample size if(m1==0)m1=m vm<-1:m1 as.matrix(x)->x # in case x is a data frame regsubsets(x,y,method="exhaustive",nvmax=m1)->out.x summary(out.x)$which->z summary(out.x)->z2 Cp<-z2$rss*(n-m-1)/out.x$rss[m+1]-n+2*(vm+1) Cp.0=out.x$rss[1]*(n-m-1)/out.x$rss[m+1]-n+2 Cp.adj<-Cp-2*(m-vm)/(n-m-3) # Gilmour's adjusted Cp Cp.adj.0=Cp.0-2*m/(n-m-3) BIC<-n*log(2*pi)+n+n*log(z2$rss/n)+log(n)*(vm+2) # true BIC BIC.0=n*log(2*pi)+n+n*log(out.x$rss[1]/n)+log(n)*2 MSE=z2$rss/(n-vm-1) MSE.0=out.x$rss[1]/(n-1) RSQ=z2$rsq # RSQ.0=0 RSQ.adj=z2$adjr2 # RSQ.adj.0=0 var.in=matrix(0,nrow=m1,ncol=m) for(i in 1:m1){var.in[i,1:i]=which(z[i,-1])} if(print){ cat("Best Subsets of Size s",fill=T) cat("",fill=T) print(data.frame(p=vm+1,s=vm,var.in)) cat("",fill=T) cat("Model Summary Statistics",fill=T) cat("",fill=T) print(out<-data.frame(p=c(0,vm)+1,s=c(0,vm), Cp=round(c(Cp.0,Cp),3),Cp.adj=round(c(Cp.adj.0,Cp.adj),3), BIC=round(c(BIC.0,BIC),3),MSE=round(c(MSE.0,MSE),3),RSQ=round(c(0,RSQ),3),RSQ.adj=round(c(0,RSQ.adj),3))) cat("",fill=T) cat("p=number of effects in model, p=1 is just the intercept",fill=T) cat("s=number of independent variables in model",fill=T) cat("Cp=usual Cp, note p=s+1 where s=number of x variables in model",fill=T) cat("Cp.adj=adjusted Cp, choose smallest model with Cp.adj < p=s+1",fill=T) cat("BIC=official BIC, number of parameters=s+2, intercept & sigma^2",fill=T) cat("MSE=unbiased estimate of error variance",fill=T) cat("RSQ=usual R^2, 1-SSE/SST",fill=T) cat("RSQ.adj=adjusted R^2, 1-(n-1)(1-R^2)/(n-p)",fill=T) } # ends if(print) if(ret) return(out) }