Spatial modeling of gun-related homicide rates

Chapter 4.4.4: Linear models with correlated data

The data for this analysis come from “Firearm legislation and firearm mortality in the USA: a cross-sectional, state-level study” by Kalesan et. al. (2016). The response variable, \(Y_i\), is the log firearm-related death rate (i.e., the log of the number of deaths divided by the population) in 2010 in state \(i\). This is regressed onto five potential confounders,

  1. log 2009 firearm death rate per 10,000 people
  2. Firearm ownership rate quartile
  3. Unemployment rate quartile
  4. Non-firearm homicide rate quartile
  5. Firearm export rate quartile

The covariate of interest is the number of gun control laws in effect in the state. This gives \(p=6\) covariates.

We fit the linear model \[Y_i=\beta_0+\sum_{j=1}^pX_i\beta_j + \varepsilon_i.\] We compare the usual non-spatial model with \(\varepsilon_i\sim\mbox{Normal}(0,\sigma^2)\) with the spaital model \(\mbox{Cov}(\varepsilon_1,…,\varepsilon_n)\sim\mbox{Normal}(0,\Sigma)\). The covariance \[\Sigma=\tau^2S+ \sigma^2I_n\] is decomposed into a spatial covariance \(\tau^2S\) and a non-spatial covariance \(\sigma^2I_n\), where \(I_n\) is the \(n\times n\) identify matrix. The spatial covariance follows the conditionally-autoregressive model \(S = (M-\rho A)^{-1}\), where \(A\) is the adjacency matrix with \((i,j)\) element equal 1 if states \(i\) and \(j\) are neighbors and zero otherwise, and \(M\) is the diagonal matrix with \(i^{th}\) diagonal element equal to the number of states that neighbor state \(i\).

Load the data

 set.seed(0820)

 load("guns.RData")
 Y     <- log(10000*Y/N)
 Z[,1] <- log(Z[,1])
 X     <- cbind(1,Z,rowSums(X))

 # Remove AK and HI
 Y <- Y[-c(2,11)]
 X <- X[-c(2,11),]

 n <- length(Y)
 p <- ncol(X)

Fit the non-spatial model

 ns_model <- "model{

   # Likelihood
   for(i in 1:n){
      Y[i]   ~ dnorm(mu[i],taue)
      mu[i] <- inprod(X[i,],beta[])
   }
   # Priors
   for(j in 1:p){beta[j] ~ dnorm(0,0.01)}
   taue ~ dgamma(0.1,0.1)
   sig <- 1/sqrt(taue)

 }"

 library(rjags)
 dat    <- list(Y=Y,n=n,X=X,p=p)
 init   <- list(beta=rep(0,p))
 model1 <- jags.model(textConnection(ns_model),
                      inits=init,data = dat,quiet=TRUE)
 update(model1, 10000, progress.bar="none")
 samp1   <- coda.samples(model1, 
            variable.names=c("beta","sig"), 
            n.iter=20000, progress.bar="none")
 summary(samp1)
## 
## Iterations = 10001:30000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 20000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##              Mean       SD  Naive SE Time-series SE
## beta[1]  0.013944 0.086144 6.091e-04      0.0047820
## beta[2]  0.747707 0.081222 5.743e-04      0.0031505
## beta[3] -0.001578 0.020285 1.434e-04      0.0007276
## beta[4] -0.013208 0.016115 1.140e-04      0.0004328
## beta[5]  0.019918 0.018244 1.290e-04      0.0005871
## beta[6]  0.019271 0.018796 1.329e-04      0.0006771
## beta[7] -0.007685 0.004319 3.054e-05      0.0000967
## sig      0.101578 0.011526 8.150e-05      0.0001387
## 
## 2. Quantiles for each variable:
## 
##             2.5%       25%       50%       75%     97.5%
## beta[1] -0.15934 -0.042159  0.014508  0.070169 0.1866051
## beta[2]  0.58844  0.694445  0.748302  0.800498 0.9079541
## beta[3] -0.04249 -0.014814 -0.001272  0.011889 0.0381601
## beta[4] -0.04455 -0.024192 -0.013263 -0.002241 0.0184964
## beta[5] -0.01607  0.007886  0.020011  0.031872 0.0560053
## beta[6] -0.01791  0.007034  0.019323  0.031743 0.0563968
## beta[7] -0.01604 -0.010574 -0.007700 -0.004860 0.0009224
## sig      0.08216  0.093507  0.100491  0.108489 0.1274753

Create an adjacency matrix for the states in the US

 library(spdep)
 library(maptools)
 usa.state = map(database="state", fill=TRUE, plot=FALSE)
 state.ID <- sapply(strsplit(usa.state$names, ":"), function(x) x[1])
 usa.poly = map2SpatialPolygons(usa.state, IDs=state.ID)
 usa.nb = poly2nb(usa.poly)
 A = nb2mat(usa.nb, style="B")
 A <- A[-8,] # Take out DC
 A <- A[,-8]
 M <- diag(rowSums(A))

Fit the spatial model

 sp_model <- "model{

   # Likelihood
   for(i in 1:n){
      Y[i]  ~ dnorm(mu[i]+S[i],taue)
   }
   S[1:n] ~ dmnorm(zero[1:n],taus*Omega[1:n,1:n])
   for(i in 1:n){
      mu[i]   <- inprod(X[i,],beta[])
      zero[i] <- 0
   }
   Omega[1:n,1:n]<-M[1:n,1:n]-rho*A[1:n,1:n]

   # Priors
   for(j in 1:p){beta[j] ~ dnorm(0,0.01)}
   taue ~ dgamma(0.1,0.1)
   taus ~ dgamma(0.1,0.1)
   rho  ~ dunif(0,1)
   sig[1] <- 1/sqrt(taue)
   sig[2] <- 1/sqrt(taus)
  }"


  library(rjags)
  dat    <- list(Y=Y,n=n,X=X,A=A,M=M,p=p)
  init   <- list(rho=0.99,beta=lm(Y~X-1)$coef)
  model2 <- jags.model(textConnection(sp_model),
                       inits=init,data = dat,quiet=TRUE)
  update(model2, 10000, progress.bar="none")
  samp2  <- coda.samples(model2, 
            variable.names=c("beta","rho","sig"), 
            n.iter=20000, progress.bar="none")

  summary(samp2)
## 
## Iterations = 11001:31000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 20000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##              Mean       SD  Naive SE Time-series SE
## beta[1]  0.029355 0.121662 8.603e-04      0.0092254
## beta[2]  0.748870 0.109963 7.776e-04      0.0053884
## beta[3] -0.005812 0.027488 1.944e-04      0.0014090
## beta[4] -0.009143 0.020613 1.458e-04      0.0007147
## beta[5]  0.016025 0.024565 1.737e-04      0.0010367
## beta[6]  0.018046 0.025764 1.822e-04      0.0012171
## beta[7] -0.008311 0.005561 3.932e-05      0.0001470
## rho      0.376137 0.249532 1.764e-03      0.0036524
## sig[1]   0.104928 0.014109 9.976e-05      0.0002285
## sig[2]   0.149472 0.026697 1.888e-04      0.0005203
## 
## 2. Quantiles for each variable:
## 
##             2.5%        25%       50%       75%    97.5%
## beta[1] -0.20664 -0.0515372  0.031012  0.112671 0.258498
## beta[2]  0.52582  0.6769922  0.752191  0.823434 0.957389
## beta[3] -0.05682 -0.0246243 -0.006659  0.011867 0.051894
## beta[4] -0.05029 -0.0226582 -0.009069  0.004485 0.031515
## beta[5] -0.03192 -0.0002995  0.015881  0.032346 0.064015
## beta[6] -0.03029  0.0004163  0.017277  0.034839 0.070796
## beta[7] -0.01931 -0.0119155 -0.008261 -0.004604 0.002558
## rho      0.01582  0.1645056  0.344040  0.558963 0.894567
## sig[1]   0.08118  0.0948978  0.103549  0.113421 0.136312
## sig[2]   0.10519  0.1304824  0.146663  0.165348 0.209253
  rho <- samp2[[1]][,8]
  hist(rho,breaks=100)

plot of chunk sp

Summary: The spatial dependence parameter is estimated to be near one, indicating strong spatial dependence.

Compare the results across models

The objective is to determine if the coefficient corresponding to the number of gun laws, \(\beta_7\), is non-zero. Below we compare its posterior distribution for the spatial and non-spatial models.

 b1  <- samp1[[1]][,7]
 b2  <- samp2[[1]][,7]
 r   <- c(-0.035,0.015)
 d1  <- density(b1,from=r[1],to=r[2])
 d2  <- density(b2,from=r[1],to=r[2])

 plot(NA,xlim=r,ylim=c(0,max(d1$y)),
         xlab="Beta",ylab="Posterior density")
 lines(d1$x,d1$y)
 lines(d2$x,d2$y,lty=2)
 legend("topright",c("Non-spatial","Spatial"),lty=1:2,bty="n",cex=1.5)

plot of chunk comp

 mean(b1<0)
## [1] 0.96135
 mean(b2<0)
## [1] 0.93715

Summary: Both models provide evidence of a negative relationship between the number of gun laws and the firearm-related death rate. However, there is more uncertainty in the spatial model, which is likely more realistic.