library(mvtnorm) #define functions makeprobs<-function(v,N){ #compute the stick-breaking weights probs<-v probs[2:N]<-probs[2:N]*cumprod(1-v[2:N-1]) probs} BayesNPReg<-function(y,X,runs=5000,N=20){ #y: vector of observations #X: matrix of predictors #N: Number of terms in the finite approx n<-length(y) p<-ncol(X) tXXinv<-solve(t(X)%*%X) #Generate initial values beta<-rep(0,p) theta<-rep(0,N) tau1<-tau2<-1 D<-1 v<-rbeta(N,1,D);v[N]<-1 probs<-makeprobs(v,N) g<-sample(1:N,n,replace=T,prob=probs) #keep track of stuff keepbeta<-keepgamma<-matrix(0,runs,p) keepD<-TruncProb<-rep(0,runs) x.grid<-seq(-10,10,0.1) density<-matrix(0,runs,length(x.grid)) #START THE MCMC! for(i in 1:runs){ #update beta: VAR<-tXXinv/tau1 MN<-tau1*VAR%*%t(X)%*%(y-theta[g]) beta<-as.vector(rmvnorm(1,MN,VAR)) #update the variances tau1<-rgamma(1,n/2+0.01,rate=sum((y-theta[g]-X%*%beta)^2)/2+0.01) tau2<-rgamma(1,N/2+0.01,rate=sum(theta^2)/2+0.01) #update theta resids<-y-X%*%beta for(j in 1:N){ VAR<-1/(tau1*sum(g==j)+tau2) MN<-VAR*sum(tau1*resids[g==j]) theta[j]<-rnorm(1,MN,sqrt(VAR)) } #update g,h for(s in 1:n){ g[s]<-sample(1:N,1,prob=probs*dnorm(resids[s],theta,1/sqrt(tau1))) } #new v for(j in 1:(N-1)){v[j]<-rbeta(1,sum(g==j)+1,sum(g>j)+D)} v[N]<-1 probs<-makeprobs(v,N) #new D: rgamma(1,N-1+0.1,rate=0.1-sum(log(1-v[-N]))) #keep track of stuff keepbeta[i,]<-beta keepD[i]<-D TruncProb[i]<-probs[N] for(j in 1:N){ density[i,]<-density[i,]+probs[j]*dnorm(x.grid,theta[j],1/sqrt(tau1)) } if(i%%100==0){ par(mfrow=c(1,1)) h<-hist(y-X%*%beta,breaks=25,main=i) lines(x.grid,max(h$count)*density[i,]/max(density[i,]),lwd=2,col=4) } } list(beta=keepbeta,D=keepD,TruncProb=TruncProb,grid=x.grid,density=density)} #A simulated example: set.seed(0820) n<-200 p<-5 x<-matrix(rnorm(n*p),n,p) x[,1]<-1 g<-rbinom(n,1,.8) y<-x[,1]+2*x[,2]+g*rnorm(n)+(1-g)*rnorm(n,3,3) print(summary(lm(y~x[,-1]))) fit<-BayesNPReg(y,x) print("Posterior summary of the betas") print(apply(fit$beta[1000:5000,],2,quantile,c(0.025,0.5,0.975))) #plot the estimated density: d<-apply(fit$density[1000:5000,],2,mean) plot(fit$grid,d,ylim=range(d),type="l", ylab="post mean dense",xlab="y")