# Bayesian Statistical Methods

## Partial solutions

### Chapter 4: Linear models

(1) We assume the model $$Y_i\sim\mbox{Normal}(\mu,\sigma^2)$$ for placebo observations and $$Y_i\sim\mbox{Normal}(\mu+\delta,\sigma^2)$$ for treatment observations. The objective is to test whether $$\delta=0$$ and thus the two groups have the same population mean. To do this we use the two-sample t-test with Jeffreys' prior in Equation (4.7). The results are

   Y0    <- c(20,-31,-10,2,3,4)/10
Y1    <- c(-35,-16,-46,-9,-51,1)/10
n0    <- n1 <- 6
ybar0 <- mean(Y0)
s20   <- mean((Y0-ybar0)^2)
ybar1 <- mean(Y1)
s21   <- mean((Y1-ybar1)^2)
sp    <- sqrt((s20/2+s21/2))

#Posterior of delta
post_mn <- ybar1-ybar0
post_sd <- sp*sqrt(1/n0+1/n1)
cred_set <- post_mn+post_sd*qt(c(0.025,0.975),df=n0+n1)

post_mn;post_sd;cred_set

## [1] -2.4

## [1] 1.012423

## [1] -4.6058799 -0.1941201


The credible set excludes zero and so there is some evidence that the mean differs by treatment group. To test for sensitivity to the prior we also fit the model using vague but proper priors using JAGS. The results are slightly different as zero is included in the interval.

  library(rjags)
data <- list(n=6,Y0=Y0,Y1=Y1)

model_string <- textConnection("model{

# Likelihood
for(i in 1:n){
Y0[i] ~ dnorm(mu,tau)
Y1[i] ~ dnorm(mu+delta,tau)
}

# Priors
mu    ~  dnorm(0, 0.0001)
delta ~  dnorm(0, 0.0001)
tau   ~  dgamma(0.1, 0.1)
sigma <- 1/sqrt(tau)
}")

model <- jags.model(model_string,data = data, n.chains=2,quiet=TRUE)
update(model, 10000, progress.bar="none")
params  <- c("delta")
samples <- coda.samples(model,
variable.names=params,
n.iter=50000, progress.bar="none")
summary(samples)

##
## Iterations = 10001:60000
## Thinning interval = 1
## Number of chains = 2
## Sample size per chain = 50000
##
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
##
##           Mean             SD       Naive SE Time-series SE
##      -2.393159       1.230790       0.003892       0.006817
##
## 2. Quantiles for each variable:
##
##    2.5%     25%     50%     75%   97.5%
## -4.8425 -3.1573 -2.3922 -1.6250  0.0453


(3a)

  load("election_2008_2016.RData")

X     <- scale(X)   # standardize covariates
X     <- cbind(1,X) # add intercept
short <- c("Intercept", "Pop change", "65+","African American",
"Homeownership rate","Home value",
"Median income", "Poverty")
names <- c("Intercept", as.character(names[1:11,2]))
colnames(X) <- short

library(rjags)
data <- list(n=length(Y),p=ncol(X),Y=Y,X=X)

model_string <- textConnection("model{

# Likelihood
for(i in 1:n){
Y[i] ~ dnorm(inprod(X[i,],beta[]),tau)
}

# Priors
for(j in 1:p){beta[j] ~  dnorm(0, 0.0001)}
tau ~ dgamma(0.01,0.01)
}")

model <- jags.model(model_string,data = data, n.chains=2,quiet=TRUE)
update(model, 10000, progress.bar="none")
params  <- c("beta")
samples <- coda.samples(model,
variable.names=params,
n.iter=10000, progress.bar="none")
out     <- summary(samples)$statistics rownames(out)<-short out  ## Mean SD Naive SE Time-series SE ## Intercept 6.686496397 0.1346170 0.0009518858 0.0009519064 ## Pop change -1.127759387 0.1650132 0.0011668194 0.0016105791 ## 65+ 0.919267405 0.1991819 0.0014084286 0.0031709182 ## African American -1.581909094 0.1665072 0.0011773835 0.0017821903 ## Hispanic -2.058968437 0.1713594 0.0012116938 0.0021634813 ## HS grad 1.800601208 0.2548684 0.0018021918 0.0043122793 ## Bachelor's -6.330162971 0.2689717 0.0019019171 0.0047499486 ## Homeownership rate -0.002490939 0.2006266 0.0014186441 0.0028600396 ## Home value -1.356900866 0.2329022 0.0016468673 0.0040102929 ## Median income 1.838621959 0.3778961 0.0026721288 0.0088978579 ## Poverty 1.479849324 0.2891094 0.0020443124 0.0060089836   beta_hat <- out[,1] beta_hat  ## Intercept Pop change 65+ ## 6.686496397 -1.127759387 0.919267405 ## African American Hispanic HS grad ## -1.581909094 -2.058968437 1.800601208 ## Bachelor's Homeownership rate Home value ## -6.330162971 -0.002490939 -1.356900866 ## Median income Poverty ## 1.838621959 1.479849324  (3b)  R <- Y-X%*%beta_hat hist(R,breaks=25,xlab="Residuals")   county_plot(fips,R,main="Residuals",units="")  ## Warning in self$bind(): The following regions were missing and are being
## set to NA: 2050, 2105, 29105, 2122, 2150, 2164, 2180, 2188, 2240, 2090,
## 2198, 15005, 2100, 2170, 51515, 2016, 2060, 2290, 2282, 2070, 2110, 2130,
## 2185, 2195, 2220, 2230, 2020, 2068, 2013, 2261, 2270, 2275


  smallest <- rank(R)<=10
largest  <- rank(-R)<=10
all_dat[smallest,2:3]

##             area_name state_abbreviation
## 586   Franklin County                 ID
## 2825 Box Elder County                 UT
## 2826     Cache County                 UT
## 2829     Davis County                 UT
## 2835      Juab County                 UT
## 2841 Salt Lake County                 UT
## 2846    Tooele County                 UT
## 2848      Utah County                 UT
## 2852     Weber County                 UT

  all_dat[largest,2:3]

##             area_name state_abbreviation
## 264   Costilla County                 CO
## 646  Henderson County                 IL
## 851     Howard County                 IA
## 1044   Elliott County                 KY
## 1879  Franklin County                 NY
## 2066   Rolette County                 ND
## 2634     Duval County                 TX
## 2782     Starr County                 TX
## 2822    Zavala County                 TX
## 3139 Menominee County                 WI


The histogram shows that the results are reasonably well approximated by a normal distribution but with a few large residuals in both tails. Counties with small (large) residuals suggest that there is some unobserved factor that explains why these counties had a smaller (larger) swing towards the GOP in 2016 than expected by the model.

(3c) Adding random effects might be needed because the residuals cluster by state and so observations within a state are correlated.

  state <- as.character(all_dat[,3])
AKHI  <- state=="AK" | state=="HI" | state=="DC"
fips  <- fips[!AKHI]
Y     <- Y[!AKHI]
X     <- X[!AKHI,]
state <- state[!AKHI]

# Assign a numeric id to the counties in each state
st    <- unique(state)
id    <- rep(NA,length(Y))
for(j in 1:48){
id[state==st[j]]<-j
}
data  <- list(n=length(Y),p=ncol(X),Y=Y,X=X,id=id,ns=48)

model_string <- textConnection("model{

# Likelihood
for(i in 1:n){
Y[i] ~ dnorm(inprod(X[i,],beta[]) + RE[id[i]],tau1)
}

# Priors
for(j in 1:p){beta[j] ~  dnorm(0, 0.0001)}
for(j in 1:ns){RE[j] ~  dnorm(0, tau2)}
tau1 ~ dgamma(0.01,0.01)
tau2 ~ dgamma(0.01,0.01)
}")

init  <- list(beta=beta_hat,RE=rep(0,48),tau2=100,tau1=0.0001)
model <- jags.model(model_string,data = data, inits=init,n.chains=2,quiet=TRUE)
update(model, 10000, progress.bar="none")
params  <- c("beta","RE")
samples <- coda.samples(model,
variable.names=params,
n.iter=10000, progress.bar="none")

out                    <- summary(samples)$statistics rownames(out)[1:48] <- st rownames(out)[1:11+48] <- short out  ## Mean SD Naive SE Time-series SE ## AL -2.35079529 1.3272448 0.009385038 0.125088389 ## AZ -12.25937241 1.7634784 0.012469675 0.113056494 ## AR -6.19988938 1.2804410 0.009054085 0.120605363 ## CA -7.01577238 1.3743863 0.009718379 0.114943742 ## CO 1.13911571 1.3010095 0.009199526 0.113691113 ## CT 10.65611885 2.1429028 0.015152611 0.092518502 ## DE 3.36636993 3.0479055 0.021551946 0.085349765 ## FL -2.89859419 1.2935789 0.009146984 0.122238546 ## GA -4.08760224 1.2132655 0.008579083 0.126224194 ## ID -11.57021614 1.3791753 0.009752242 0.116757427 ## IL 2.83426003 1.2250804 0.008662627 0.118956709 ## IN 2.91966227 1.2474766 0.008820992 0.123158774 ## IA 10.70430627 1.2328854 0.008717816 0.120934493 ## KS -5.63502645 1.2251851 0.008663367 0.121337208 ## KY -0.87044834 1.2342576 0.008727519 0.128604364 ## LA -4.66966238 1.3344921 0.009436284 0.131482311 ## ME 7.67137033 1.7320247 0.012247264 0.114415338 ## MD 3.40137049 1.5652832 0.011068224 0.111106584 ## MA 1.78527355 1.8091718 0.012792777 0.099124808 ## MI 1.74925970 1.2549652 0.008873944 0.125124076 ## MN 7.19493339 1.2472408 0.008819324 0.129833590 ## MS -2.54280017 1.3344793 0.009436193 0.115721496 ## MO 2.78700228 1.2160978 0.008599110 0.123494489 ## MT -0.02537921 1.3082635 0.009250820 0.124042234 ## NE -2.16787012 1.2459532 0.008810219 0.112591046 ## NV -3.76786624 1.7005944 0.012025019 0.107632897 ## NH 7.63064120 1.9889148 0.014063751 0.106386253 ## NJ 9.16018769 1.6400819 0.011597130 0.113039518 ## NM -6.19122354 1.5245950 0.010780515 0.107572503 ## NY 9.60135342 1.2976013 0.009175427 0.120149555 ## NC -1.54679902 1.2434025 0.008792183 0.120693997 ## ND 6.23375694 1.3538141 0.009572911 0.111667277 ## OH 7.55504323 1.2458212 0.008809286 0.121851124 ## OK -4.71305603 1.2637313 0.008935930 0.128179881 ## OR -4.18068620 1.4143731 0.010001128 0.112828275 ## PA 3.25253815 1.2910600 0.009129173 0.122721026 ## RI 12.74206874 2.5527962 0.018050995 0.093929541 ## SC -1.55590454 1.4026918 0.009918528 0.127091242 ## SD 4.64751051 1.2911442 0.009129768 0.119394353 ## TN 0.06181392 1.2469659 0.008817380 0.121071648 ## TX -5.01587107 1.1868110 0.008392021 0.120149006 ## UT -25.39516700 1.4958714 0.010577408 0.117364275 ## VT 10.56334166 1.7809509 0.012593224 0.111357685 ## VA 0.56606927 1.2215851 0.008637911 0.128362170 ## WA -4.72693213 1.3953078 0.009866316 0.126095154 ## WV 1.48523363 1.3287092 0.009395393 0.119501065 ## WI 7.40288610 1.2740174 0.009008663 0.121882982 ## WY -2.81045480 1.5726898 0.011120597 0.111697883 ## Intercept 6.90825463 1.1035130 0.007803015 0.126524836 ## Pop change -0.31867891 0.1347395 0.000952752 0.001624652 ## 65+ 0.70411462 0.1677062 0.001185862 0.003301143 ## African American -0.61846255 0.1656749 0.001171499 0.002504580 ## Hispanic -0.12978944 0.1890504 0.001336788 0.003599065 ## HS grad 1.94538226 0.2263332 0.001600418 0.004628069 ## Bachelor's -6.31339344 0.2112819 0.001493989 0.004263014 ## Homeownership rate 0.72198057 0.1684133 0.001190862 0.003062256 ## Home value -0.61425401 0.2313528 0.001635912 0.004921532 ## Median income -0.15655165 0.3159311 0.002233970 0.008984158 ## Poverty 1.57812587 0.2186845 0.001546333 0.004662774  The states with small (large) random effects had a smaller (larger) swing towards the GOP than expected by our model. The state with smallest posterior mean random effect is Utah; the state with largest posterior mean random effect is Rhode Island. (5) We fit the logistic regression model $\mbox{logit}[\mbox{Prob}(Y_i=1)] = \sum_{j=1}^pX_{ij}\beta_j$ with uninformative priors $$\beta_j\sim\mbox{Normal}(0,1000)$$.  library("titanic") dat <- titanic_train Y <- dat[,2] age <- dat[,6] gender <- dat[,5] class <- dat[,3] X <- cbind(1,scale(age), ifelse(gender=="male",1,0), ifelse(class==2,1,0), ifelse(class==3,1,0)) colnames(X) <- c("Intercept","Age","Gender","Class=2","Class=3") miss <- is.na(rowSums(X)) X <- X[!miss,] Y <- Y[!miss] library(rjags) data <- list(n=nrow(X),p=ncol(X),Y=Y,X=X) model_string <- textConnection("model{ # Likelihood for(i in 1:n){ Y[i] ~ dbern(prob[i]) logit(prob[i]) = inprod(X[i,],beta[]) } # Priors for(j in 1:p){beta[j] ~ dnorm(0, 0.01)} }") model <- jags.model(model_string,data = data, n.chains=2,quiet=TRUE) update(model, 10000, progress.bar="none") params <- c("beta") samples <- coda.samples(model, variable.names=params, n.iter=10000, progress.bar="none") out <- summary(samples)$quantiles
plot(samples)


 rownames(out)<-colnames(X)
round(out,2)

##            2.5%   25%   50%   75% 97.5%
## Intercept  2.20  2.52  2.69  2.86  3.21
## Age       -0.76 -0.62 -0.54 -0.46 -0.32
## Gender    -2.95 -2.68 -2.54 -2.40 -2.14
## Class=2   -1.86 -1.50 -1.31 -1.13 -0.78
## Class=3   -3.15 -2.78 -2.59 -2.41 -2.06


The posterior medians are negative and 95% intervals exclude zero for all of the covariates. Therefore, the profile of the passenger with highest probability of survival is a young women in first class.

(7) Gibbs sampling is a good choice because all of the full conditional distributions are conjugate. For initial values one might set $$\alpha_j$$ to the group mean $${\bar Y}_j=\sum_{i=1}^nY_{ij}/n$$, $$\tau^2$$ to the sample variance of the $${\bar Y}_j$$, and $$\sigma^2$$ to the variance of the $$Y_{ij}-{\bar Y}_j$$. At iteration $$t$$ the Gibbs sampler would update the parameters as \begin{align} \alpha_j|\mbox{rest} &\sim\mbox{Normal}\left(\frac{\sum_{i=1}^nY_{ij}}{n+\sigma^2/\tau^2},\frac{\sigma^2}{n+\sigma^2/\tau^2}\right)\\ \sigma^2|\mbox{rest} &\sim\mbox{InvGamma}\left(nm/2+a,\sum_{i=1}^n\sum_{j=1}^m(Y_{ij}-\alpha_j)^2/2+b\right)\\ \tau^2|\mbox{rest} &\sim\mbox{InvGamma}\left(m/2+a,\sum_{j=1}^m\alpha_j^2/2+b\right) \end{align}

(9a)

The mean trend has intercept $$\alpha$$ and slope $$\beta$$, so the average increase in log odds per year is $$\beta$$. The parameter $$\rho$$ controls autocorrelation with $$\rho=0$$ giving indepedence across years and large $$\rho$$ giving strong dependence. Finally, $$\sigma^2$$ controls the variance of the process.

library(babynames)
dat <- babynames
dat <- dat[dat$name=="Sophia" & dat$sex=="F" &
dat$year>1950,] yr <- dat$year
p <- dat$prop t <- dat$year - 1950
Y <- log(p/(1-p))

plot(t+1950,Y,xlab="Year",ylab="Log odds of Sophia")


(9b)

  library(rjags)
data <- list(n=length(Y),Y=Y)

model_string <- textConnection("model{

# Likelihood
for(t in 2:n){
Y[t]     ~ dnorm(meanY[t],tau)
meanY[t] = alpha + beta*t +
rho*(Y[t-1] - alpha - beta*(t-1))
}

# Priors
alpha   ~ dnorm(0,0.00001)
beta    ~ dnorm(0,0.00001)
rho     ~ dbeta(1,1)
tau     ~ dgamma(0.01,0.01)
sigma  <- 1/sqrt(tau)
}")

model <- jags.model(model_string,data = data, n.chains=2,quiet=TRUE)
update(model, 10000, progress.bar="none")
params  <- c("alpha","beta","rho","sigma")
samples <- coda.samples(model,
variable.names=params,
n.iter=500000, thin=50,progress.bar="none")
plot(samples)


 summary(samples)

##
## Iterations = 11050:511000
## Thinning interval = 50
## Number of chains = 2
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
##
##            Mean        SD  Naive SE Time-series SE
## alpha -18.06255 208.09522 1.4714554      3.8526484
## beta    0.09474   0.26017 0.0018396      0.0056010
## rho     0.98997   0.02163 0.0001530      0.0015219
## sigma   0.17305   0.01561 0.0001104      0.0001118
##
## 2. Quantiles for each variable:
##
##            2.5%       25%       50%     75%    97.5%
## alpha -493.8244 -89.84806 -11.01476 48.2759 448.7726
## beta    -0.4707  -0.01635   0.08475  0.2076   0.6679
## rho      0.9161   0.99340   0.99764  0.9990   0.9999
## sigma    0.1457   0.16226   0.17179  0.1826   0.2075

 effectiveSize(samples)

##      alpha       beta        rho      sigma
##  2920.3346  2156.7651   443.0641 19506.0950

 gelman.diag(samples)

## Potential scale reduction factors:
##
##       Point est. Upper C.I.
## alpha       1.00       1.00
## beta        1.00       1.01
## rho         1.13       1.27
## sigma       1.00       1.00
##
## Multivariate psrf
##
## 1.02


Convergence is slow (because $$\rho\approx 1$$ and there is strong correlation between observations) and requires extremely long chains.

(9c) The prediction for 2020 depends on the values in 2018 and 2019. So we first sample 2018, then 2019, and then 2020.

 # Extract the posterior samples

samps  <- rbind(samples[[1]],samples[[2]])
samps[1:2,]

##          alpha       beta       rho     sigma
## [1,]  -9.88201 0.07777597 0.8273237 0.1744088
## [2,] -10.05306 0.08028947 0.8307580 0.1543123

 S      <- nrow(samps)
alpha  <- samps[,1]
beta   <- samps[,2]
rho    <- samps[,3]
sigma  <- samps[,4]

# Make predictions

e1     <- rnorm(S,0,sigma)
e2     <- rnorm(S,0,sigma)
e3     <- rnorm(S,0,sigma)
Y_2018 <- alpha + beta*68 + rho*( Y[67]-alpha - beta*67) + e1
Y_2019 <- alpha + beta*69 + rho*(Y_2018-alpha - beta*68) + e2
Y_2020 <- alpha + beta*70 + rho*(Y_2019-alpha - beta*69) + e3

# Plot the results
plot(Y,xlim=c(0,70),ylim=c(-10,-3.5))