Let \(Y_i\) be the percent of voters that selected Obama in county \(i=1,…,n\). We model \[Y_i|\beta,\sigma^2\sim \mbox{Normal}(\alpha+X_{1i}\beta_1+…+X_{pi}\beta_p,\sigma^2)\] where \(X_{ji}\) is the \(j^{th}\) covariate for county \(i\).
# Load the data
dat <- read.csv("http://www4.stat.ncsu.edu/~reich/ST590/assignments/Obama2012.csv")
Y <- dat[,2]
Y <- (Y-mean(Y))/sd(Y)
X <- dat[,4:18]
X <- X[,-10] # X1 and X10 are perfectly correlated
X <- scale(X)
# Remove 5 observations for model fitting
test <- c(20,40,60,80,100)
Yo <- Y[-test] # Observed data
Xo <- X[-test,]
Yp <- Y[test] # Counties set aside for prediction
Xp <- X[test,]
no <- length(Yo)
np <- length(Yp)
p <- ncol(Xo)
library(rjags)
model_string <- "model{
# Likelihood
for(i in 1:no){
Yo[i] ~ dnorm(muo[i],inv.var)
muo[i] <- alpha + inprod(Xo[i,],beta[])
}
# Prediction
for(i in 1:np){
Yp[i] ~ dnorm(mup[i],inv.var)
mup[i] <- alpha + inprod(Xp[i,],beta[])
}
# Priors
for(j in 1:p){
beta[j] ~ dnorm(0,0.0001)
}
alpha ~ dnorm(0, 0.01)
inv.var ~ dgamma(0.01, 0.01)
sigma <- 1/sqrt(inv.var)
}"
# NOTE: Yp is not sent to JAGS!
model <- jags.model(textConnection(model_string),
data = list(Yo=Yo,no=no,np=np,p=p,Xo=Xo,Xp=Xp))
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph Size: 1826
##
## Initializing model
update(model, 10000, progress.bar="none")
samp <- coda.samples(model,
variable.names=c("beta","sigma","Yp","alpha"),
n.iter=20000, progress.bar="none")
summary(samp)
##
## Iterations = 10001:30000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 20000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## Yp[1] -0.9136022 0.49689 0.0035136 0.0047567
## Yp[2] 0.4227801 0.52124 0.0036857 0.0051239
## Yp[3] 1.4956660 0.63926 0.0045203 0.0058101
## Yp[4] -0.5697908 0.47450 0.0033552 0.0036524
## Yp[5] -1.1367092 0.48137 0.0034038 0.0035959
## alpha -0.0003303 0.04780 0.0003380 0.0003486
## beta[1] -0.1372551 0.11804 0.0008347 0.0020852
## beta[2] -0.0514684 0.06022 0.0004258 0.0007029
## beta[3] 0.1336259 0.12404 0.0008771 0.0022936
## beta[4] -0.5530310 0.99133 0.0070098 0.2050220
## beta[5] 0.7319574 0.90044 0.0063671 0.1825724
## beta[6] 0.1747891 0.28010 0.0019806 0.0572633
## beta[7] 0.2172308 0.08920 0.0006308 0.0084946
## beta[8] 0.1179216 0.15147 0.0010710 0.0208046
## beta[9] -0.4648639 0.18111 0.0012806 0.0065604
## beta[10] 0.0897669 0.06405 0.0004529 0.0012282
## beta[11] -0.0469901 0.08089 0.0005720 0.0012194
## beta[12] 0.0343414 0.05916 0.0004184 0.0005937
## beta[13] 0.2152934 0.08708 0.0006157 0.0017663
## beta[14] -0.1119907 0.06953 0.0004917 0.0012076
## sigma 0.4599528 0.03686 0.0002607 0.0003674
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## Yp[1] -1.90238 -1.244089 -0.9145996 -0.581604 0.06615
## Yp[2] -0.60653 0.076250 0.4234304 0.769634 1.44769
## Yp[3] 0.23336 1.070320 1.4972346 1.921019 2.74489
## Yp[4] -1.50058 -0.887855 -0.5719314 -0.253579 0.36211
## Yp[5] -2.08505 -1.461510 -1.1329064 -0.807812 -0.20084
## alpha -0.09444 -0.032359 -0.0001946 0.031933 0.09403
## beta[1] -0.37071 -0.215998 -0.1360272 -0.058618 0.09134
## beta[2] -0.16980 -0.091928 -0.0511394 -0.011343 0.06604
## beta[3] -0.10904 0.051521 0.1319619 0.215743 0.38046
## beta[4] -2.71195 -1.156525 -0.5309494 0.108000 1.27738
## beta[5] -1.25258 0.194394 0.7513626 1.318882 2.45772
## beta[6] -0.44016 0.008076 0.1803429 0.356036 0.71434
## beta[7] 0.03850 0.159003 0.2177323 0.276733 0.39137
## beta[8] -0.19935 0.023752 0.1218704 0.215659 0.41876
## beta[9] -0.82241 -0.585074 -0.4626599 -0.342815 -0.10954
## beta[10] -0.03501 0.046528 0.0899251 0.132719 0.21632
## beta[11] -0.20419 -0.101115 -0.0472861 0.007355 0.11309
## beta[12] -0.08057 -0.005340 0.0339851 0.074130 0.15047
## beta[13] 0.04368 0.157558 0.2149872 0.273992 0.38752
## beta[14] -0.24761 -0.158451 -0.1121980 -0.065626 0.02548
## sigma 0.39391 0.434247 0.4577187 0.483160 0.53902
#Extract the samples for each parameter
samps <- samp[[1]]
Yp.samps <- samps[,1:5]
alpha.samps <- samps[,6]
beta.samps <- samps[,7:20]
sigma.samps <- samps[,21]
# Compute the posterior mean for the plug-in predictions
beta.mn <- colMeans(beta.samps)
sigma.mn <- mean(sigma.samps)
alpha.mn <- mean(alpha.samps)
# Plot the PPD and plug-in
for(j in 1:np){
# PPD
plot(density(Yp.samps[,j]),xlab="Y",main="PPD")
# Plug-in
mu <- alpha.mn+sum(Xp[j,]*beta.mn)
y <- rnorm(20000,mu,sigma.mn)
lines(density(y),col=2)
# Truth
abline(v=Yp[j],col=3,lwd=2)
legend("topright",c("PPD","Plug-in","Truth"),col=1:3,lty=1,inset=0.05)
}
Notice how the PPD densities are slightly wider than the plug-in densities; this is the effect of accounting for uncertainty in \(\beta\) and \(\sigma\).