#This file contains R code to perform MCMC sampling for #the model with a single continuous response with infomative #missingness. An example using simulated data is given below. info.miss.car<-function(Y,X,ADJ, runs=5000,burn=1000,update=10, eps=0.1,sd.beta=10){ #INPUTS: #Y = nsites x nsubs matrix of responses, missing = NA #X = nsubs x p matrix of subject-level covariates (no intercept) #ADJ[s,t] = I(site s adjacent to site t); ADJ[s,s]=0 #runs = number of MCMC samples #burn = number to discard as burn-in #update = how often to display current iteration #eps,sd.beta = hyperparameters #MODEL: #Y[,s] = a[1]+b[1]*theta[,s]+N(0,1/taue) #MISS[,s] = is.na(Y[,s]) = I(Ystar[,s]>0) #Ystar[,s] = a[2] + b[2]*theta[,s] + N(0,1) #theta[,s] ~ x[s,]%*%beta+CAR(ADJ,rho,taus) #taue,taus ~ Gamma(eps,eps) #a,b,beta ~ N(0,sd.beta^2) #rho~U(0,1) library(mvtnorm) nsites<-nrow(Y) nsub<-ncol(Y) p<-ncol(X) m<-apply(ADJ,2,sum) MISS<-is.na(Y) nMISS<-apply(MISS,2,sum) #INITIAL VALUES: Y[MISS]<-mean(na.omit(as.vector(Y))) beta<-rep(0,p) theta<-matrix(0,nsites,nsub) Ystar<-MISS-.5 a<-rep(0,2) b<-rep(1,2) taue<-taus<-0.01 rho<-0.95 #Store MCMC output: keepbeta<-matrix(0,runs,p) dimnames(keepbeta)[[2]]<-dimnames(X)[[2]] keepers<-matrix(0,runs,7) dimnames(keepers)[[2]]<-c("a1","a2","b1","b2","taue","taus","rho") mntheta<-sdtheta<-0*theta Q<-diag(m)-rho*ADJ xbeta<-X%*%beta for(i in 1:runs){ #some book-keeping v<-taue*b[1]^2+b[2]^2 VAR<-solve(v*diag(nsites) + taus*Q) tcholVAR<-t(chol(VAR)) sigma<-1/sqrt(taue) SSE<-SSS<-rep(0,nsub) Q.vect<-apply(Q,2,sum) #update subject-level parameters: for(s in 1:nsub){ #Fill in missing values: if(nMISS[s]>0){ mn<-a[1]+b[1]*theta[,s] Y[MISS[,s],s]<-mn[MISS[,s]]+sigma*rnorm(nMISS[s]) } #update latent spatial effects, theta[,s] MN<-taus*xbeta[s]*Q.vect+ taue*b[1]*(Y[,s]-a[1]) + b[2]*(Ystar[,s]-a[2]) theta[,s]<-VAR%*%MN+tcholVAR%*%rnorm(nsites) #update latent probit parameters, Ystar Ystar[,s]<-rtnorm(MISS[,s],a[2]+b[2]*theta[,s]) SSE[s]<-sum((Y[,s]-a[1]-b[1]*theta[,s])^2) SSS[s]<-t(theta[,s]-xbeta[s])%*%Q%*%(theta[,s]-xbeta[s]) } #Update variances: taue<-rgamma(1,nsub*nsites/2+eps,sum(SSE)/2+eps) taus<-rgamma(1,nsub*nsites/2+eps,sum(SSS)/2+eps) #update the probit regression parameters, a,b: #a[1] MN<-taue*sum(Y-b[1]*theta) VAR<-1/(nsites*nsub*taue+1/sd.beta^2) a[1]<-rnorm(1,MN*VAR,sqrt(VAR)) #a[2],b[2] MN<-rep(0,2) PREC<-diag(2)/sd.beta^2 for(s in 1:nsub){ xxx<-cbind(1,theta[,s]) PREC<-PREC+t(xxx)%*%xxx MN<-MN+t(xxx)%*%Ystar[,s] } VAR<-solve(PREC) ab<-rmvnorm(1,VAR%*%MN,VAR) a[2]<-ab[1];b[2]<-ab[2] #update subjct fixed effects, beta: MN<-rep(0,p);PREC<-diag(p)/sd.beta^2 for(s in 1:nsub){ xxx<-matrix(X[s,],nsites,p,byrow=T) txxxQ<-t(xxx)%*%Q PREC<-PREC+taus*txxxQ%*%xxx MN<-MN+taus*txxxQ%*%theta[,s] } beta<-as.vector(rmvnorm(1,solve(PREC)%*%MN,solve(PREC))) xbeta<-X%*%beta #update CAR spatial association parameters, rho canrho<-rbeta(1,50*rho,50*(1-rho)) if(canrho>0.0001 & canrho<0.9999){ canQ<-diag(m)-canrho*ADJ R<-dbeta(rho,50*canrho,50*(1-canrho),log=T)- dbeta(canrho,50*rho,50*(1-rho),log=T)+ 0.5*nsub*determinant(canQ,logarithm=T)$modulus- 0.5*nsub*determinant(Q,logarithm=T)$modulus for(s in 1:nsub){ r<-theta[,s]-xbeta[s] R<-R-0.5*taus*(t(r)%*%canQ%*%r-t(r)%*%Q%*%r) } if(!is.na(R)){if(runif(1)burn){ mntheta<-mntheta+theta/(runs-burn) sdtheta<-sdtheta+theta*theta/(runs-burn) } keepbeta[i,]<-beta keepers[i,]<-c(a,b,taue,taus,rho) #Plot the current iteration: if(i%%update==0){ par(mfrow=c(2,2)) plot(Y[,1],col=MISS[,1]+1,main="Data(point) v fit(line) for sub 1") lines(a[1]+b[1]*theta[,1]) legend("topright",c("observed","missing"),pch=1,col=1:2, inset=0.05,cex=0.7) plot(keepbeta[1:i,1],type="l",ylim=range(keepbeta[1:i,]), xlab="MCMC iteration",main="beta[j]"); if(p>1){for(j in 2:p){lines(keepbeta[1:i,j],col=j)}} plot(keepers[1:i,2],type="l",main="a[2] - probit mean",xlab="MCMC iteration") plot(keepers[1:i,4],type="l",main="b[2] - probit slope",xlab="MCMC iteration") } } list(beta=keepbeta[burn:runs,],keepers=keepers[burn:runs,], theta.mn=mntheta,theta.sd=sqrt(sdtheta-mntheta^2))} #Generate data from a truncated normal distribution: rtnorm<-function(Ybin,mu,sigma=1){ n<-length(Ybin) lp<-rep(-Inf,n);lp[Ybin==1]<-0 up<-rep(Inf,n); up[Ybin==0]<-0 lp<-pnorm(lp,mu,sigma) up<-pnorm(up,mu,sigma) qnorm(runif(n,lp,up),mu,sigma)} #GENERATE SIMULATED DATA TO DEMONSTRATE THE USE #OF THE MCMC CODE nsub<-100 #number of subjects nsites<-50 #number of observations per subject p<-2 #number of covariates beta<-c(0,.1) #true value of the regression parameters rho<-0.95 #spatial association parameter #SET UP AR(1) adjacency ADJ<-matrix(0,nsites,nsites) for(s in 2:nsites){ADJ[s,s-1]<-ADJ[s-1,s]<-1} COV<-diag(apply(ADJ,1,sum))-rho*ADJ COV<-solve(COV)/10 #GENERATE DATA library(mvtnorm) X<-matrix(rnorm(nsub*p),nsub,p) Y<-matrix(X%*%beta,nsites,nsub,byrow=T)+ t(rmvnorm(nsub,rep(0,nsites),COV)) cutoff<-quantile(na.omit(as.vector(Y)),.95) MISS<-rbinom(nsub*nsites,1,0.05+0.7*(Y>cutoff)) Y[MISS==1]<-NA #FIT THE MODEL fit<-info.miss.car(Y,X,ADJ) #DISPLAY RESULTS: print(round(apply(fit$beta,2,quantile,c(0.025,0.5,0.975)),3)) print(round(apply(fit$keepers,2,quantile,c(0.025,0.5,0.975)),3))