BayesSemiPar<-function(y,x, runs=5000,burn=1000,update=100, a1=0.1,b1=0.1,a2=0.1,b2=0.1,inprob=1){ ################ model ###############: # y~N(int+x%*%beta,inverse variance = taue) # beta[j]<-delta[j]*alpha[j] # delta[j]~Bern(inprob) # alpha[j]~N(0,inv.var=taua) # int~flat # taue~gamma(a1,b1) # taua~gamma(a2,b2) ########################################################: n<-length(y) p<-ncol(x) #initial values: int<-mean(y) beta<-alpha<-inout<-rep(1,p) taua<-1 taue<-1/var(y) #keep track of stuff: keep.beta<-matrix(0,runs,p) keep.int<-keep.taue<-dev<-rep(0,runs) #START THE MCMC: for(i in 1:runs){ taue<-rgamma(1,n/2+a1,sum((y-int-x%*%beta)^2)/2+b1) taua<-rgamma(1,p/2+a2,sum(alpha^2)/2+b2) int<-rnorm(1,mean(y-x%*%beta),1/sqrt(n*taue)) #update alpha z<-x%*%diag(inout) V<-solve(taue*t(z)%*%z+taua*diag(p)) M<-taue*t(z)%*%(y-int) alpha<-V%*%M+t(chol(V))%*%rnorm(p) beta<-alpha*inout #update inclusion indicators: if(inprob<1){ r<-y-int-x%*%beta for(j in 1:p){ r<-r+x[,j]*beta[j] log.p.in<-log(inprob)-0.5*taue*sum((r-x[,j]*alpha[j])^2) log.p.out<-log(1-inprob)-0.5*taue*sum(r^2) p.in<-exp(log.p.in-log.p.out)/(1+exp(log.p.in-log.p.out)) inout[j]<-rbinom(1,1,p.in) beta[j]<-inout[j]*alpha[j] r<-r-x[,j]*beta[j] } } if(inprob==1){inout<-rep(1,p);beta<-alpha} keep.beta[i,]<-beta keep.int[i]<-int keep.taue[i]<-taue dev[i]<--2*sum(dnorm(y,int+x%*%beta,1/sqrt(taue),log=T)) if(i%%update==0){ plot(y,main=i) lines(int+x%*%beta) } } beta.mn<-apply(keep.beta[burn:runs,],2,mean) n.terms<-apply(keep.beta!=0,1,sum) fitted<-mean(keep.int[burn:runs])+x%*%beta.mn sigma<-median(1/sqrt(keep.taue[burn:runs])) dhat<--2*sum(dnorm(y,fitted,sigma,log=T)) dbar<-mean(dev[burn:runs]) pD<-dbar-dhat DIC<-dbar+pD list(beta=keep.beta[burn:runs,], beta.mn=beta.mn, int=keep.int[burn:runs], taue=keep.taue[burn:runs], n.terms=n.terms, fitted=fitted,dev=dev, dbar=dbar,DIC=DIC,pD=pD)} ############### Output ##################: # beta,int,taue,n.terms = posterior samples # beta.mn = posterior mean of beta # fitted = posterior mean of int + x%*%beta #######################################################: #Load the motorcylce data library(splines) library(MASS) y<-mcycle$accel t<-mcycle$times y<-(y-mean(y))/sd(y) t<-t/max(t) #Plot the basis functions: B<-ns(t,10) plot(t,t,col=0,ylim=range(B),xlab="t",ylab="Bj(t)") for(j in 1:ncol(B)){lines(t,B[,j])} #Fit with 5 knots: fit5<-BayesSemiPar(y,ns(t,5)) #Fit with 10 knots: fit10<-BayesSemiPar(y,ns(t,10)) #Fit with 15 knots: fit15<-BayesSemiPar(y,ns(t,15)) #Fit with 20 knots: fit20<-BayesSemiPar(y,ns(t,20)) #Fit with 50 knots: fit50<-BayesSemiPar(y,ns(t,50)) #Fit with 25 potential knots and select them with SSVS: fitSSVS<-BayesSemiPar(y,ns(t,25),inprob=0.5) hist(fit1$n.terms[1000:5000],xlab="Number of active knots",main="") boxplot(fitSSVS$beta~col(fitSSVS$beta),outline=F,xlab="j",ylab="b[j]") #ploted the fitted values: plot(t,y) lines(t,fit10$fitted) lines(t,fitSSVS$fitted,lwd=2) legend("topleft",c("J=10","SSVS"),lwd=1:2,inset=0.05) y<-c(0.529, 0.502, 0.473, 0.529, 0.473, 0.473, 0.473, 0.502 0.473 0.473, 0.502 0.473, 0.473, 0.473, 0.417, 0.473, 0.417, 0.529, 0.473, 0.473, 0.529 0.253, 0.417, 0.417, 0.336, 0.198, 0.057, 0.473, 0.057, -0.136, -0.578 -0.607, -0.303, 0.084, 0.084, -0.523, -0.359, -0.026, 0.084, -0.523, -0.748 0.417, -1.135, -0.692, -0.941, -1.357, -1.079, -0.247, -1.243, -2.019, -1.580 -1.522, -1.632, -1.799, -0.523, -2.019, -1.243, -0.968, -2.104, -2.019, -1.911 -2.244, -1.580, -1.715, -2.019, -2.019, -2.131, -1.799, -1.439, -1.164, -0.578 -0.804, -0.663, -0.968, -0.388, -0.026, 0.417, -1.688, 0.084, -0.829, 0.198 -0.415, 0.028, 0.725, 0.611, 0.777, 0.084, 1.305, 1.499, 0.169, 1.278 2.081, 0.696, 1.665, 1.526, 1.499, 0.860, 1.472, 0.556, 2.081, 0.198 -0.607, 1.969, 1.249, 1.193, -0.247, 1.000, 1.499, 0.750, 0.640, 0.502 0.084, 0.253, 1.166, 0.307, 1.137, 0.529, 0.307, 0.833, 0.502, 0.529 0.750, 0.750, -0.026, 0.224, 0.253, 0.529, 0.750, 0.224, 0.473, 0.750 0.473, 0.750)