Multiple linear regression prediction

Let \(Y_i\) be the percent of voters that selected Obama in county \(i=1,…,n\). We model \[Y_i|\beta,\sigma^2\sim \mbox{Normal}(\alpha+X_{1i}\beta_1+…+X_{pi}\beta_p,\sigma^2)\] where \(X_{ji}\) is the \(j^{th}\) covariate for county \(i\).

Load and standardize the 2012 election data

# Load the data

 dat   <- read.csv("http://www4.stat.ncsu.edu/~reich/ST590/assignments/Obama2012.csv")
 Y     <- dat[,2]
 Y     <- (Y-mean(Y))/sd(Y)
 X     <- dat[,4:18]
 X     <- X[,-10] # X1 and X10 are perfectly correlated
 X     <- scale(X)

# Remove 5 observations for model fitting

 test  <- c(20,40,60,80,100)

 Yo    <- Y[-test]    # Observed data
 Xo    <- X[-test,]

 Yp    <- Y[test]     # Counties set aside for prediction
 Xp    <- X[test,]

 no    <- length(Yo)
 np    <- length(Yp)
 p     <- ncol(Xo)

Fit the linear regression model with Gaussian priors

library(rjags)


model_string <- "model{

  # Likelihood
  for(i in 1:no){
    Yo[i]   ~ dnorm(muo[i],inv.var)
    muo[i] <- alpha + inprod(Xo[i,],beta[])
  }

  # Prediction
  for(i in 1:np){
    Yp[i]  ~ dnorm(mup[i],inv.var)
    mup[i] <- alpha + inprod(Xp[i,],beta[])
  }

  # Priors
  for(j in 1:p){
    beta[j] ~ dnorm(0,0.0001)
  }
  alpha     ~ dnorm(0, 0.01)
  inv.var   ~ dgamma(0.01, 0.01)
  sigma     <- 1/sqrt(inv.var)
}"

Compile the model in JAGS

# NOTE: Yp is not sent to JAGS!
model <- jags.model(textConnection(model_string), 
                    data = list(Yo=Yo,no=no,np=np,p=p,Xo=Xo,Xp=Xp))
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
##    Graph Size: 1826
## 
## Initializing model
update(model, 10000, progress.bar="none")

samp <- coda.samples(model, 
        variable.names=c("beta","sigma","Yp","alpha"), 
        n.iter=20000, progress.bar="none")

summary(samp)
## 
## Iterations = 10001:30000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 20000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                Mean      SD  Naive SE Time-series SE
## Yp[1]    -0.9136022 0.49689 0.0035136      0.0047567
## Yp[2]     0.4227801 0.52124 0.0036857      0.0051239
## Yp[3]     1.4956660 0.63926 0.0045203      0.0058101
## Yp[4]    -0.5697908 0.47450 0.0033552      0.0036524
## Yp[5]    -1.1367092 0.48137 0.0034038      0.0035959
## alpha    -0.0003303 0.04780 0.0003380      0.0003486
## beta[1]  -0.1372551 0.11804 0.0008347      0.0020852
## beta[2]  -0.0514684 0.06022 0.0004258      0.0007029
## beta[3]   0.1336259 0.12404 0.0008771      0.0022936
## beta[4]  -0.5530310 0.99133 0.0070098      0.2050220
## beta[5]   0.7319574 0.90044 0.0063671      0.1825724
## beta[6]   0.1747891 0.28010 0.0019806      0.0572633
## beta[7]   0.2172308 0.08920 0.0006308      0.0084946
## beta[8]   0.1179216 0.15147 0.0010710      0.0208046
## beta[9]  -0.4648639 0.18111 0.0012806      0.0065604
## beta[10]  0.0897669 0.06405 0.0004529      0.0012282
## beta[11] -0.0469901 0.08089 0.0005720      0.0012194
## beta[12]  0.0343414 0.05916 0.0004184      0.0005937
## beta[13]  0.2152934 0.08708 0.0006157      0.0017663
## beta[14] -0.1119907 0.06953 0.0004917      0.0012076
## sigma     0.4599528 0.03686 0.0002607      0.0003674
## 
## 2. Quantiles for each variable:
## 
##              2.5%       25%        50%       75%    97.5%
## Yp[1]    -1.90238 -1.244089 -0.9145996 -0.581604  0.06615
## Yp[2]    -0.60653  0.076250  0.4234304  0.769634  1.44769
## Yp[3]     0.23336  1.070320  1.4972346  1.921019  2.74489
## Yp[4]    -1.50058 -0.887855 -0.5719314 -0.253579  0.36211
## Yp[5]    -2.08505 -1.461510 -1.1329064 -0.807812 -0.20084
## alpha    -0.09444 -0.032359 -0.0001946  0.031933  0.09403
## beta[1]  -0.37071 -0.215998 -0.1360272 -0.058618  0.09134
## beta[2]  -0.16980 -0.091928 -0.0511394 -0.011343  0.06604
## beta[3]  -0.10904  0.051521  0.1319619  0.215743  0.38046
## beta[4]  -2.71195 -1.156525 -0.5309494  0.108000  1.27738
## beta[5]  -1.25258  0.194394  0.7513626  1.318882  2.45772
## beta[6]  -0.44016  0.008076  0.1803429  0.356036  0.71434
## beta[7]   0.03850  0.159003  0.2177323  0.276733  0.39137
## beta[8]  -0.19935  0.023752  0.1218704  0.215659  0.41876
## beta[9]  -0.82241 -0.585074 -0.4626599 -0.342815 -0.10954
## beta[10] -0.03501  0.046528  0.0899251  0.132719  0.21632
## beta[11] -0.20419 -0.101115 -0.0472861  0.007355  0.11309
## beta[12] -0.08057 -0.005340  0.0339851  0.074130  0.15047
## beta[13]  0.04368  0.157558  0.2149872  0.273992  0.38752
## beta[14] -0.24761 -0.158451 -0.1121980 -0.065626  0.02548
## sigma     0.39391  0.434247  0.4577187  0.483160  0.53902

Plot samples from the posterior preditive distribution (PPD) and plug-in distribution

#Extract the samples for each parameter

 samps       <- samp[[1]]
 Yp.samps    <- samps[,1:5] 
 alpha.samps <- samps[,6]
 beta.samps  <- samps[,7:20]
 sigma.samps <- samps[,21]

# Compute the posterior mean for the plug-in predictions  

 beta.mn  <- colMeans(beta.samps)
 sigma.mn <- mean(sigma.samps)
 alpha.mn <- mean(alpha.samps) 


# Plot the PPD and plug-in

 for(j in 1:np){

   # PPD
   plot(density(Yp.samps[,j]),xlab="Y",main="PPD")

   # Plug-in
   mu <- alpha.mn+sum(Xp[j,]*beta.mn)
   y  <- rnorm(20000,mu,sigma.mn)
   lines(density(y),col=2)

   # Truth
   abline(v=Yp[j],col=3,lwd=2)

   legend("topright",c("PPD","Plug-in","Truth"),col=1:3,lty=1,inset=0.05)
}

plot of chunk plots plot of chunk plots plot of chunk plots plot of chunk plots plot of chunk plots

Notice how the PPD densities are slightly wider than the plug-in densities; this is the effect of accounting for uncertainty in \(\beta\) and \(\sigma\).