DML inference using NN for gun ownership

Contents

19. DML inference using NN for gun ownership#

In this lab, we estimate the effect of gun ownership on the homicide rate using a neural network.

install.packages("librarian", quiet = T)
librarian::shelf(
  keras, 
  lfe, 
  tidyverse,
  tensorflow,
  quiet = T
)

also installing the dependency ‘BiocManager’

  These packages will be installed:

  'keras', 'lfe'

  It may take some time.

also installing the dependencies ‘Rcpp’, ‘RcppTOML’, ‘here’, ‘png’, ‘config’, ‘tfautograph’, ‘zoo’, ‘reticulate’, ‘tensorflow’, ‘tfruns’, ‘zeallot’, ‘Formula’, ‘xtable’, ‘sandwich’

Warning message in system("timedatectl", intern = TRUE):
“running command 'timedatectl' had status 1”

First, we need to load and preprocess the data.

# read in dataset
data <- read_csv("https://raw.githubusercontent.com/d2cml-ai/14.388_R/main/Data/gun_clean.csv", show_col_types = F) 


################## Find Variable Names from the Dataset ###################

varlist <- function (df=NULL,type=c("numeric","factor","character"), pattern="", exclude=NULL) {
  vars <- character(0)
  if (any(type %in% "numeric")) {
    vars <- c(vars,names(df)[sapply(df,is.numeric)])
  }
  if (any(type %in% "factor")) {
    vars <- c(vars,names(df)[sapply(df,is.factor)])
  }  
  if (any(type %in% "character")) {
    vars <- c(vars,names(df)[sapply(df,is.character)])
  }  
  vars[(!vars %in% exclude) & grepl(vars,pattern=pattern)]
}

########################### Create Variables ##############################

# dummy variables for year and county fixed effects
fixed  <- grep("X_Jfips", names(data), value=TRUE, fixed=TRUE)
year   <- varlist(data, pattern="X_Tyear")

# census control variables
census     <- NULL
census_var <- c("^AGE", "^BN", "^BP", "^BZ", "^ED", "^EL","^HI", "^HS", "^INC", "^LF", "^LN", "^PI", "^PO", "^PP", "^PV", "^SPR", "^VS")

for(i in 1:length(census_var)){
  census  <- append(census, varlist(data, pattern=census_var[i]))
}

############################### Variables #################################

# treatment variable
d     <- "logfssl"

# outcome variable
y     <- "logghomr"

# other control variables
X1    <- c("logrobr", "logburg", "burg_missing", "robrate_missing")
X2    <- c("newblack", "newfhh", "newmove", "newdens", "newmal")

######################  Partial-out Fixed Effects #########################

# new dataset for partialled-out variables
rdata <- as.data.frame(data$CountyCode) 
colnames(rdata) <- "CountyCode"

# variables to partial-out
varlist <- c(y, d,X1, X2, census)

# partial out year and county fixed effects from variables in varlist
for(i in 1:length(varlist)){
  form <- as.formula(paste(varlist[i], "~", paste(paste(year,collapse="+"),  paste(fixed,collapse="+"), sep="+")))
  rdata[, varlist[i]] <- lm(form, data)$residuals
}

New names:
• `` -> `...1`

19.1. DML for neural nets#

The following algorithm consumes \(Y\),\(D\) and \(Z\), and learns the residuals \(\tilde{Y}\) and \(\tilde{D}\) via a neural network, where the residuals are obtained by cross-validation (cross-fitting). Then, it prints the estimated coefficient \(\beta\) and the clustered standard error from the final OLS regression.

DML2.for.NN <- function(z, d, y, nfold=2, clu, num_epochs, batch_size) {
  nobs <- nrow(z) # number of observations
  foldid <- rep.int(1:nfold,times = ceiling(nobs/nfold))[sample.int(nobs)] # define folds indices
  I <- split(1:nobs, foldid)  # split observation indices into folds  
  ytil <- dtil <- rep(NA, nobs)
  cat("fold: ")
  for(b in 1:length(I)){
  # normalize the data
  mean <- apply(z[-I[[b]],], 2, mean)
  std <- apply(z[-I[[b]],], 2, sd)
  z[-I[[b]],] <- scale(z[-I[[b]],], center = mean, scale = std)
  z[I[[b]],] <- scale(z[I[[b]],], center = mean, scale = std)
  # building the model with 3 layers, the ReLU activation function, mse loss and rmsprop optimizer                  
  build_model <- function(){
  model <- keras_model_sequential() %>% 
    layer_dense(units = 16, activation = "relu", 
                input_shape = dim(z[-I[[b]],][2]))%>% 
    layer_dense(units = 16, activation = "relu") %>% 
    layer_dense(units = 1) 
  
    model %>% compile(
    optimizer = "rmsprop", 
    loss = "mse", 
    metrics = c("mae")
    )  
   }
  model.Y <- build_model()
  model.D <- build_model()                       
  # fitting the model                   
  model.D %>% fit(z[-I[[b]],], d[-I[[b]]],
                    epochs = num_epochs, batch_size = batch_size, verbose = 0)                       
  model.Y %>% fit(z[-I[[b]],], y[-I[[b]]],
                    epochs = num_epochs, batch_size = batch_size, verbose = 0)
  dhat <- model.D %>% predict(z[I[[b]],])
  yhat <- model.Y %>% predict(z[I[[b]],])   
  dtil[I[[b]]] <- (d[I[[b]]] - dhat) # record residual for the left-out fold
  ytil[I[[b]]] <- (y[I[[b]]] - yhat) # record residial for the left-out fold                  
  cat(b," ")
        }
  #rfit <- lm(ytil ~ dtil) # estimate the main parameter by regressing one residual on the other
  data <- data.frame(cbind(ytil, dtil, as.matrix(clu)))
  rfit <- felm(ytil ~ dtil|0|0|CountyCode,data=data)
  coef.est <- coef(rfit)[2] # extract the coefficient
  #HC <- vcovHC(rfit)
  se    <- summary(rfit,robust=T)$coefficients[2,2] # record robust standard error by county
  cat(sprintf("\ncoef (se) = %g (%g)\n", coef.est , se)) # print the output
  return( list(coef.est =coef.est , se=se, dtil=dtil, ytil=ytil, rfit=rfit) ) # save the output and residuals 
}

19.2. Estimating the effect with DML for neural nets#

# treatment variable
D <- rdata[which(colnames(rdata) == d)]
# outcome variable
Y <- rdata[which(colnames(rdata) == y)]
# construct matrix Z
Z <- rdata[which(colnames(rdata) %in% c(X1,X2,census))]

# inputs
y_nn <- as.matrix(Y)
d_nn <- as.matrix(D)
z_nn <- as.matrix(Z)
clu <- rdata[which(colnames(rdata) == "CountyCode")]

# DML with a NN
set.seed(123)
DML2.nn = DML2.for.NN(z_nn, d_nn, y_nn, nfold=2, clu, 100, 10)

fold:

Loaded Tensorflow version 2.8.2

1  2  
coef (se) = 0.351552 (0.180514)