fsr<-function(x,y,B=500,seed=100,amax=.3,gam0=.05){ # code by Dennis Boos, latest version 4/10 # estimated alpha for forward selection # B= number of bootstrap resamples, amax=max of alpha grid require(leaps) set.seed(seed) ok<-complete.cases(x,y) x<-x[ok,] # get rid of na's y<-y[ok] # since regsubsets can't handle na's m<-ncol(x) n<-nrow(x) p<-2*m # dim of phony enhanced x vp<-1:p # needed to get p-values vm<-1:m as.matrix(x)->x # in case x is a data frame pvm<-rep(0,m) # to create pvm below pvmax<-rep(0,p) # to create pvmax below alpha<-c(seq(.001,.20,by=.001),seq(.21,amax,by=.01)) ng<-length(alpha) # length of grid U<-rep(0,ng) # will contain num. of phonys entering S<-rep(0,ng) # will contain num. of true entering in orig. SP<-rep(0,ng) # will contain num. of true entering in enhanced regsubsets(x,y,method="forward")->out.x pv.orig<-1-pf((out.x$rss[vm]-out.x$rss[vm+1])*(out.x$nn-(vm+1))/out.x$rss[vm+1],1,out.x$nn-(vm+1)) for (i in 1:m){pvm[i]<-max(pv.orig[1:i])} # sequential max of pvalues print(real.seq<-data.frame(var=(out.x$vorder-1)[2:(m+1)],pval=round(pv.orig,4),pvmax=round(pvm,4))) # Get size of selected model for each alpha, just do 2:ng if alpha[1]=0 for (ia in 1:ng){S[ia] <- sum(pvm<=alpha[ia])} # next few to create projection cbind(rep(1,n),x)->x2 # must add on col. of 1's x2%*%solve(t(x2)%*%x2)%*%t(x2)->hx # projection, to create residuals diag(rep(1,n))->id # identity for (ib in 1:B){ # start bootstrap loop x[sample(n),]->xp # create matrix of dummies (id-hx)%*%xp->xp2 # residuals of phonys regressed on x xs<-cbind(x,xp2) # enhanced x matrix regsubsets(x=xs,y,method="forward")->out # pv = p-values of entering var pv<-1-pf((out$rss[vp]-out$rss[vp+1])*(out$nn-(vp+1))/out$rss[vp+1],1,out$nn-(vp+1)) for (i in 1:p){pvmax[i]<-max(pv[1:i])} # sequential max of pvalues vor<-(out$vorder-1)[2:(p+1)] # have to get rid of intercept # Note that we are summing over bootstrap replications for each alpha for (ia in 1:ng){U[ia] <- U[ia]+sum(vor>m & pvmax<=alpha[ia])} for (ia in 1:ng){SP[ia] <-SP[ia]+sum(pvmax<=alpha[ia])} } # ends bootstrap loop U<-U/B # average number of phonys entered SP<-SP/B # average number of var entered ghat.RE<-(m-S)*(U/m)/(1+SP-U) # gammahat_RE, SP-U plays role of S ghat<-(m-S)*(U/m)/(1+S) # gammahat_ER alphamax.RE<-alpha[ghat.RE==max(ghat.RE)] # alpha with largest ghat.RE alphamax<-alpha[ghat==max(ghat)] # alpha with largest ghat ind.RE<-(ghat.RE <= gam0 & alpha<=alphamax.RE)*1 ind<-(ghat <= gam0 & alpha<=alphamax)*1 max(ind.RE*alpha)->alphahat.RE # RE est. of alpha max(ind*alpha)->alphahat.ER # ER est. of alpha plot(alpha,ghat,type="l",lty=2,ylab="Est. Gamma",xlab="alpha") lines(alpha,ghat.RE,col="red") title("Est. gamma.RE (red) and gamma.ER (dash) Curves") print(res<-data.frame(m,n,gam0,amax,B,seed,alphahat.RE,alphahat.ER)) # now get the selected model, size1=size of model including intercept if(alphahat.ER>0) size1<-sum(pvm<=alphahat.ER)+1 else size1<-1 colnames(x)<-colnames(x,do.NULL=F,prefix="") # corrects for no colnames x<-x[,colnames(x)[(out.x$vorder-1)[2:size1]]] if (size1==1) {mod <- lm(y~1)} else {mod <- lm(y~x)} cat("",fill=T) cat("Variables Selected for alphahat.ER =",colnames(x),fill=T) return(list(S=S,U=U,SP=SP,alpha=alpha,ghat.RE=ghat.RE,ghat=ghat, real.seq=real.seq,res=res,mod=mod,size.ER=size1-1)) }