DML inference for gun ownership

18. DML inference for gun ownership#

# Import relevant packages
import pandas as pd
import numpy as np
import pyreadr
import os
from urllib.request import urlopen
from sklearn import preprocessing
import patsy

from numpy import loadtxt
from keras.models import Sequential
from keras.layers import Dense

import hdmpy
import numpy as np
import random
import statsmodels.api as sm
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import colors
import warnings
warnings.filterwarnings('ignore')

from sklearn.linear_model import LassoCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV, ElasticNetCV
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
import itertools
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
from pandas.api.types import is_categorical_dtype
from itertools import compress
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.feature_selection import SelectFromModel

This notebook contains an example for teaching.

18.1. A Case Study: The Effect of Gun Ownership on Gun-Homicide Rates#

We consider the problem of estimating the effect of gun ownership on the homicide rate. For this purpose, we estimate the following partially linear model

\[ Y_{j,t} = \beta D_{j,(t-1)} + g(Z_{j,t}) + \epsilon_{j,t}. \]

18.2. Data#

\(Y_{j,t}\) is log homicide rate in county \(j\) at time \(t\), \(D_{j, t-1}\) is log fraction of suicides committed with a firearm in county \(j\) at time \(t-1\), which we use as a proxy for gun ownership, and \(Z_{j,t}\) is a set of demographic and economic characteristics of county \(j\) at time \(t\). The parameter \(\beta\) is the effect of gun ownership on the homicide rates, controlling for county-level demographic and economic characteristics.

The sample covers 195 large United States counties between the years 1980 through 1999, giving us 3900 observations.

data1 = pd.read_csv("https://raw.githubusercontent.com/d2cml-ai/14.388_py/main/data/gun_clean.csv")
data1.shape[1]

18.2.1. Preprocessing#

To account for heterogeneity across counties and time trends in all variables, we remove from them county-specific and time-specific effects in the following preprocessing.

import re

#################################  Find Variable Names from Dataset ########################

def varlist( df = None, type_dataframe = [ 'numeric' , 'categorical' , 'string' ],  pattern = "", exclude = None ):
    varrs = []
    
    if ('numeric' in type_dataframe):
        varrs = varrs + df.columns[ df.apply( is_numeric_dtype , axis = 0 ).to_list() ].tolist()
    
    if ('categorical' in type_dataframe):
        varrs = varrs + df.columns[ df.apply( is_categorical_dtype , axis = 0 ).to_list() ].tolist()
    
    if ('string' in type_dataframe):
        varrs = varrs + df.columns[ df.apply( is_string_dtype , axis = 0 ).to_list() ].tolist()
    
    grepl_result = np.array( [ re.search( pattern , variable ) is not None for variable in df.columns.to_list() ] )
    
    if exclude is None:
        result = list(compress( varrs, grepl_result ) )
    
    else:
        varrs_excluded = np.array( [var in exclude for var in varrs ] )
        and_filter = np.logical_and( ~varrs_excluded ,  grepl_result )
        result = list(compress( varrs, and_filter ) )
    
    return result   

################################# Create Variables ###############################


# Dummy Variables for Year and County Fixed Effects
r = re.compile("X_Jfips")
fixed = list( filter( r.match, data1.columns.to_list() ) )
year = varlist(data1, pattern="X_Tyear")

census = []
census_var = ["^AGE", "^BN", "^BP", "^BZ", "^ED", "^EL","^HI", "^HS", "^INC", "^LF", "^LN", "^PI", "^PO", "^PP", "^PV", "^SPR", "^VS"]

for variable in census_var:
    r = re.compile( variable )
    census = census + list( filter( r.match, data1.columns.to_list() ) )

    
################################ Variables ##################################
# Treatment Variable
d = "logfssl"

# Outcome Variable
y = "logghomr"

# Other Control Variables
X1 = ["logrobr", "logburg", "burg_missing", "robrate_missing"]
X2 = ["newblack", "newfhh", "newmove", "newdens", "newmal"]

#################################  Partial out Fixed Effects ########################

rdata = data1[['CountyCode']]

# Variables to be Partialled-out
varlist2 = [y] + [d] + X1 + X2 + census

# Partial out Variables in varlist from year and county fixed effect
for var_name in varlist2:
    form = var_name + " ~ "  + " + ".join( year ) + "+" + " + ".join( fixed )
    rdata[f'{var_name}'] = smf.ols( formula = form , data = data1 ).fit().resid

link="https://raw.githubusercontent.com/d2cml-ai/14.388_py/main/data/gun_clean.RData"
response = urlopen(link)
content = response.read()
fhandle = open( 'gun_clean.RData', 'wb')
fhandle.write(content)
fhandle.close()
result = pyreadr.read_r("gun_clean.RData")
os.remove("gun_clean.RData")

# Extracting the data frame from rdata_read
data = result[ 'data' ]
n = data.shape[0]

18.2.2. We check that our results are equal to R results at 6 decimals#

column_names = data.columns.to_list()

for col in column_names:
    result = (data[f'{col}'].round(6) == rdata[f'{col}'].round(6)).sum()

Now, we can construct the treatment variable, the outcome variable and the matrix \(Z\) that includes the control variables.

# Treatment Variable
D = rdata[ f'{d}']

# Outcome Variable
Y = rdata[ f'{y}']

# Construct matrix Z
Z = rdata[ X1 + X2 + census ]

Z.shape

(3900, 195)

We have in total 195 control variables. The control variables \(Z_{j,t}\) are from the U.S. Census Bureau and contain demographic and economic characteristics of the counties such as the age distribution, the income distribution, crime rates, federal spending, home ownership rates, house prices, educational attainment, voting paterns, employment statistics, and migration rates.

clu = rdata[['CountyCode']]
data = pd.concat([Y, D, Z, clu], axis=1)

data.to_csv( r"../data/gun_clean2.csv" , index = False )

18.3. The effect of gun ownership#

18.3.1. OLS#

After preprocessing the data, we first look at simple regression of \(Y_{j,t}\) on \(D_{j,t-1}\) without controls as a baseline model.

import statsmodels.api as sm
import statsmodels.formula.api as smf
import patsy

# # Run this line to avoid all the lines of code above
# data = pd.read_csv( r"../data/gun_clean2.csv"  )

# OLS clustering at the County level
model = "logghomr ~ logfssl"
baseline_ols = smf.ols(model , data=data).fit().get_robustcov_results(cov_type = "cluster", groups= data['CountyCode'])
baseline_ols_table = baseline_ols.summary2().tables[1]
print( baseline_ols_table.iloc[ 1 , 4:] )
baseline_ols_table.iloc[1, :]

[0.025    0.154480
0.975]    0.410129
Name: logfssl, dtype: float64

Coef.       0.282304
Std.Err.    0.064811
t           4.355825
P>|t|       0.000021
[0.025      0.154480
0.975]      0.410129
Name: logfssl, dtype: float64

The point estimate is \(0.282\) with the confidence interval ranging from 0.155 to 0.41. This suggests that increases in gun ownership rates are related to gun homicide rates - if gun ownership increases by 1% relative to a trend then the predicted gun homicide rate goes up by 0.28%, without controlling for counties’ characteristics.

Since our goal is to estimate the effect of gun ownership after controlling for a rich set county characteristics we next include the controls. First, we estimate the model by ols and then by an array of the modern regression methods using the double machine learning approach.

# define the variables
y = 'logghomr'

data_columns = list(data)
no_relev_col = ['logfssl', 'CountyCode', 'logghomr']

# This gives us: new_list = ['carrot' , 'lemon']
z = [col for col in data_columns if col not in no_relev_col]

control_formula = "logghomr" + ' ~ ' + 'logfssl + ' + ' + '.join( Z.columns.to_list() )

control_ols = smf.ols( control_formula , data=data).fit().get_robustcov_results(cov_type = "cluster", groups= data['CountyCode'])
control_ols_table = control_ols.summary2().tables[1]
print( control_ols_table.iloc[ 1 , 4:] )
control_ols_table.iloc[1, :]

[0.025    0.087545
0.975]    0.296430
Name: logfssl, dtype: float64

Coef.       0.191987
Std.Err.    0.052956
t           3.625435
P>|t|       0.000369
[0.025      0.087545
0.975]      0.296430
Name: logfssl, dtype: float64

In R, the coefficients of the bellow variables are Null. However, in python we got a very high value.

control_ols_table.loc[['PPQ110D', 'PPQ120D'], :]

	Coef.	Std.Err.	t	P>\|t\|	[0.025	0.975]
PPQ110D	1.590771e+11	7.334932e+11	0.216876	0.828533	-1.287568e+12	1.605722e+12
PPQ120D	-4.875189e+10	6.826842e+11	-0.071412	0.943143	-1.395188e+12	1.297684e+12

After controlling for a rich set of characteristics, the point estimate of gun ownership reduces to \(0.19\).

18.3.2. DML algorithm#

Here we perform inference of the predictive coefficient \(\beta\) in our partially linear statistical model,

\[ Y = D\beta + g(Z) + \epsilon, \quad E (\epsilon | D, Z) = 0, \]

using the double machine learning approach.

For \(\tilde Y = Y- E(Y|Z)\) and \(\tilde D= D- E(D|Z)\), we can write

\[ \begin{align} \tilde Y = \alpha \tilde D + \epsilon, \quad E (\epsilon |\tilde D) =0. \end{align} \]

Using cross-fitting, we employ modern regression methods to build estimators \(\hat \ell(Z)\) and \(\hat m(Z)\) of \(\ell(Z):=E(Y|Z)\) and \(m(Z):=E(D|Z)\) to obtain the estimates of the residualized quantities:

\[ \tilde Y_i = Y_i - \hat \ell (Z_i), \quad \tilde D_i = D_i - \hat m(Z_i), \quad \text{ for each } i = 1,\dots,n. \]

Finally, using ordinary least squares of \(\tilde Y_i\) on \(\tilde D_i\), we obtain the estimate of \(\beta\).

The following algorithm comsumes \(Y, D, Z\), and a machine learning method for learning the residuals \(\tilde Y\) and \(\tilde D\), where the residuals are obtained by cross-validation (cross-fitting). Then, it prints the estimated coefficient \(\beta\) and the corresponding standard error from the final OLS regression.

from sklearn.model_selection import KFold

def DML2_for_PLM(z, d, y, dreg, yreg, nfold, clu):
    
    kf = KFold(n_splits = nfold, shuffle=True) #Here we use kfold to generate kfolds
    I = np.arange(0, len(d)) #To have a id vector from data
    train_id, test_id =  [], [] #arrays to store kfold's ids

    #generate and store kfold's id
    for kfold_index in kf.split(I):
        train_id.append(kfold_index[0])
        test_id.append(kfold_index[1])
    
    # Create array to save errors 
    dtil = np.zeros( len(z) ).reshape( len(z) , 1 )
    ytil = np.zeros( len(z) ).reshape( len(z) , 1 )
    
    # loop to save results
    for b in range(0,len(train_id)):

        # Lasso regression, excluding folds selected 
        dfit = dreg(z[train_id[b],], d[train_id[b],])
        yfit = yreg(z[train_id[b],], y[train_id[b],])

        # predict estimates using the 
        dhat = dfit.predict( z[test_id[b],] )
        yhat = yfit.predict( z[test_id[b],] )

        # save errors  
        dtil[test_id[b]] =  d[test_id[b],] - dhat.reshape( -1 , 1 )
        ytil[test_id[b]] = y[test_id[b],] - yhat.reshape( -1 , 1 )
        print(b, " ")
    
    # Create dataframe 
    data_2 = pd.DataFrame(np.concatenate( ( ytil, dtil,clu ), axis = 1), columns = ['ytil','dtil','CountyCode'])
   
    # OLS clustering at the County level
    model = "ytil ~ dtil"
    baseline_ols = smf.ols(model , data = data_2 ).fit().get_robustcov_results(cov_type = "cluster", groups= data_2['CountyCode'])
    coef_est = baseline_ols.summary2().tables[1]['Coef.']['dtil']
    se = baseline_ols.summary2().tables[1]['Std.Err.']['dtil']
    
    Final_result = { 'coef_est' : coef_est , 'se' : se , 'dtil' : dtil , 'ytil' : ytil }

    print("Coefficient is {}, SE is equal to {}".format(coef_est, se))
    
    return Final_result

Now, we apply the Double Machine Learning (DML) approach with different machine learning methods. First, we load the relevant libraries.

Let us, construct the input matrices.

# Create main variables
Y = data['logghomr']
D = data['logfssl']
Z = data.drop(['logghomr', 'logfssl', 'CountyCode'], axis=1)
CLU = data['CountyCode']

# as matrix
y = Y.to_numpy().reshape( len(Y) , 1 )
d = D.to_numpy().reshape( len(Y) , 1 )
z = Z.to_numpy()
clu = clu.to_numpy().reshape( len(Y) , 1 )

18.3.3. Lasso#

def dreg(z,d):
    alpha=0.00000001
    result = linear_model.Lasso(alpha = alpha).fit(z, d)
    return result

def yreg(z,y):
    alpha=0.00000001
    result = linear_model.Lasso(alpha = alpha).fit(z, y)
    return result

DML2_lasso = DML2_for_PLM(z, d, y, dreg, yreg, 10, clu)











Coefficient is 0.2173388201108385, SE is equal to 0.05589284340861423

18.3.3.1. We use SelectFromModel to select variables which do not have a zero coefficient. It is done because we want to reduce dimensionality as rlasso( post = T ) does.#

class Lasso_post:
    
    def __init__(self, alpha ):
        self.alpha = alpha

        
    def fit( self, X, Y ):
        self.X = X
        self.Y = Y
        lasso = linear_model.Lasso( alpha = self.alpha ).fit( X , Y )
        model = SelectFromModel( lasso , prefit = True )
        X_new = model.transform( X )
        # Gettin indices from columns which has variance for regression
        index_X = model.get_support()
        
        self.index = index_X
        new_x = X[ : ,  index_X ]
        
        lasso2 = linear_model.Lasso( alpha = self.alpha ).fit( new_x , Y )
        self.model = lasso2
        
        return self
    
    def predict( self , X ):
        
        dropped_X = X[ : , self.index ]
        
        predictions = self.model.predict( dropped_X )
        
        return predictions

Run the below code to verify whether Lasso_post functions as it is expected.

def dreg(z,d):
    alpha=0.00000001
    result = Lasso_post( alpha = alpha ).fit( z , d )
    return result

def yreg( z , y ):
    alpha = 0.00000001
    result = Lasso_post( alpha = alpha ).fit( z , y )
    return result

DML2_lasso_post = DML2_for_PLM(z, d, y, dreg, yreg, 10, clu)











Coefficient is 0.2248307753410121, SE is equal to 0.05648034432412721

18.3.4. hdmy#

We are going to replicate the above regressions but using hmdpy library.

import hdmpy
from statsmodels.tools import add_constant

class rlasso_sklearn:
    
    def __init__(self, post ):
        self.post = post
       
    def fit( self, X, Y ):
        
        self.X = X
        self.Y = Y
        
        # Standarization of X and Y
        self.rlasso_model = hdmpy.rlasso( X , Y , post = self.post )                
        return self
    
    def predict( self , X_1 ):
        self.X_1 = X_1
        beta = self.rlasso_model.est['coefficients'].to_numpy()
        
        if beta.sum() == 0:
            prediction = np.repeat( self.rlasso_model.est['intercept'] , self.X_1.shape[0] )
        
        else:
            prediction = ( add_constant( self.X_1 , has_constant = 'add') @ beta ).flatten()
                
        return prediction

# Post = false
def dreg(x, d):
    result = rlasso_sklearn( post = False ).fit( x , d )
    return result

def yreg(x,y):
    result = rlasso_sklearn( post = False ).fit( x , y )
    return result

DML2_lasso_hdmpy = DML2_for_PLM(z, d, y, dreg, yreg, 10, clu)











Coefficient is 0.22355861068775723, SE is equal to 0.056959040205356325

# Post = True
def dreg(x, d):
    result = rlasso_sklearn( post = True ).fit( x , d )
    return result

def yreg(x,y):
    result = rlasso_sklearn( post = True ).fit( x , y )
    return result

DML2_lasso_post_hdmpy = DML2_for_PLM(z, d, y, dreg, yreg, 10, clu)











Coefficient is 0.22778980282980787, SE is equal to 0.055391060681506045

We found out that the results are close. However, there are not a big differences between the Sklearn and the Hdmpy lasso function.

18.3.5. cv.glmnet curiosities#

According to link1 and link2, It standardize X variables before estimation. In sklearn we have an option named normalize. It normalizes X by subtracting the mean and dividing by the l2-norm. Based on link3, cv.glmnet (standardize = TRUE) and sklearn (normlize = True) are not the same. We decided to use StandardScaler that meets suggestions made in link3. We proved this in the commented code below.

class standard_skl_model:
    
    def __init__(self, model ):
        self.model = model
       
    def fit( self, X, Y ):
        
        # Standarization of X and Y
        self.scaler_X = StandardScaler()
        self.scaler_X.fit( X )
        std_X = self.scaler_X.transform( X )
                
        self.model.fit( std_X , Y )
                
        return self
    
    def predict( self , X ):
        
        self.scaler_X = StandardScaler()
        self.scaler_X.fit( X )
        std_X = self.scaler_X.transform( X )
        
        prediction = self.model.predict( std_X )
        
        return prediction

Run the below code to verify whether standard_skl_model functions as it is expected.

#DML with cross-validated Lasso:
def dreg(z,d):
    result = standard_skl_model( LassoCV(cv = 10 , random_state = 0 ) ).fit( z, d )
    return result

def yreg(z,y):
    result = standard_skl_model( LassoCV(cv = 10 , random_state = 0  ) ).fit( z, y )
    return result

DML2_lasso_cv = DML2_for_PLM(z, d, y, dreg, yreg, 2 , clu)

0  
1  
Coefficient is 0.20674588093019308, SE is equal to 0.05530116123651258

# DML with cross-validated Lasso:
def dreg(z,d):
    result = standard_skl_model( ElasticNetCV( cv = 10 , random_state = 0 , l1_ratio = 0.5, max_iter = 100000 ) ).fit( z, d )
    return result

def yreg(z,y):
    result = standard_skl_model( ElasticNetCV( cv = 10 , random_state = 0 , l1_ratio = 0.5, max_iter = 100000 ) ).fit( z, y )
    return result

DML2_elnet = DML2_for_PLM(z, d, y, dreg, yreg, 2 , clu)

0  
1  
Coefficient is 0.22254221301343585, SE is equal to 0.053879833505691754

#DML with cross-validated Lasso:
def dreg(z,d):
    result = standard_skl_model( ElasticNetCV( cv = 10 ,  random_state = 0 , l1_ratio = 0.0001 ) ).fit( z, d )
    return result

def yreg(z,y):
    result = standard_skl_model( ElasticNetCV( cv = 10 , random_state = 0 , l1_ratio = 0.0001 ) ).fit( z, y )
    return result

DML2_ridge = DML2_for_PLM(z, d, y, dreg, yreg, 2, clu)

0  
1  
Coefficient is 0.19639693752730308, SE is equal to 0.05303210277187753

Here we also compute DML with OLS used as the ML method

#DML with cross-validated Lasso:
def dreg(z,d):
    result = LinearRegression().fit( z, d )
    return result

def yreg(z,y):
    result = LinearRegression().fit( z, y )
    return result

DML2_ols = DML2_for_PLM(z, d, y, dreg, yreg, 2, clu)

0  
1  
Coefficient is 0.1867889216454423, SE is equal to 0.05529603580186457

Next, we also apply Random Forest for comparison purposes.

18.3.6. Random Forest#

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor

#DML with cross-validated Lasso:
def dreg(z,d):
    result = RandomForestRegressor( random_state = 0 , n_estimators = 500 , max_features = 65 , n_jobs = 4 , min_samples_leaf = 5 ).fit( z, d )
    return result

def yreg(z,y):
    result = RandomForestRegressor( random_state = 0 , n_estimators = 500 , max_features = 65 , n_jobs = 4 , min_samples_leaf = 5 ).fit( z, y )
    return result

DML2_RF = DML2_for_PLM(z, d, y, dreg, yreg, 2, clu)   # set to 2 due to computation time

0  
1  
Coefficient is 0.1714420140198645, SE is equal to 0.05677223302029048

We conclude that the gun ownership rates are related to gun homicide rates - if gun ownership increases by 1% relative to a trend then the predicted gun homicide rate goes up by about 0.20% controlling for counties’ characteristics.

Finally, let’s see which method is actually better. We compute RMSE for predicting D and Y, and see which of the methods works better.

mods = [DML2_ols, DML2_lasso, DML2_lasso_post , DML2_lasso_cv, DML2_ridge, DML2_elnet, DML2_RF]
mods_name = ["DML2_ols", "DML2_lasso", "DML2_lasso_post" , "DML2_lasso_cv", 'DML2_ridge', 'DML2_elnet', 'DML2_RF']

def mdl( model , model_name ):
    
    RMSEY = np.sqrt( np.mean( model[ 'ytil' ] ) ** 2 ) # I have some doubts about these equations...we have to recheck
    RMSED = np.sqrt( np.mean( model[ 'dtil' ] ) ** 2 ) # I have some doubts about these equations...we have to recheck
    
    result = pd.DataFrame( { model_name : [ RMSEY , RMSED ]} , index = [ 'RMSEY' , 'RMSED' ])
    return result

RES = [ mdl( model , name ) for model, name in zip( mods , mods_name ) ]
    
pr_Res = pd.concat( RES, axis = 1)

pr_Res

	DML2_ols	DML2_lasso	DML2_lasso_post	DML2_lasso_cv	DML2_ridge	DML2_elnet	DML2_RF
RMSEY	0.005623	0.000244	0.000479	1.764970e-18	7.401487e-19	2.391250e-18	0.002519
RMSED	0.000370	0.000527	0.000086	9.251859e-20	9.963540e-20	1.138690e-19	0.000603

18.3.6.1. This verfies that the function DML2_for_PLM has no errors#

np.where(DML2_lasso_post[ 'ytil' ] == 0)[0].size

It looks like the best method for predicting D is Lasso, and the best method for predicting Y is CV Ridge.

#DML with cross-validated Lasso:
def dreg(z,d):
    result = standard_skl_model( LassoCV(cv = 10 , random_state = 0 , alphas = [0]) ).fit( z, d )
    return result


def yreg(z,y):
    result = standard_skl_model( ElasticNetCV( cv = 10 ,  random_state = 0 , l1_ratio = 0.0001 ) ).fit( z, y )
    return result

DML2_best = DML2_for_PLM(z, d, y , dreg, yreg, 10, clu)











Coefficient is 0.20611237089029483, SE is equal to 0.052394355011270544

table = np.zeros( ( 9 , 2 ))
table[ 0 , 0] = baseline_ols_table.iloc[ 1 , 0 ]
table[ 1 , 0] = control_ols_table.iloc[ 1 , 0 ]
table[ 2 , 0] = DML2_lasso['coef_est']
table[ 3 , 0] = DML2_lasso_post['coef_est']
table[ 4 , 0] = DML2_lasso_cv['coef_est']
table[ 5 , 0] = DML2_ridge['coef_est']
table[ 6 , 0] = DML2_elnet['coef_est']
table[ 7 , 0] = DML2_RF['coef_est']
table[ 8 , 0] = DML2_best['coef_est']
table[ 0 , 1] = baseline_ols_table.iloc[ 1 , 1 ]
table[ 1 , 1] = control_ols_table.iloc[ 1 , 1 ]
table[ 2 , 1] = DML2_lasso['se']
table[ 3 , 1] = DML2_lasso_post['se']
table[ 4 , 1] = DML2_lasso_cv['se']
table[ 5 , 1] = DML2_ridge['se']
table[ 6 , 1] = DML2_elnet['se']
table[ 7 , 1] = DML2_RF['se']
table[ 8 , 1] = DML2_best['se']

table = pd.DataFrame( table , index = [ "Baseline OLS", "Least Squares with controls", "Lasso", \
             "Post-Lasso", "CV Lasso","CV Elnet", "CV Ridge", "Random Forest", "Best" ] , \
            columns = ["Estimate","Standard Error"] )
table.round( 3 )

	Estimate	Standard Error
Baseline OLS	0.282	0.065
Least Squares with controls	0.192	0.053
Lasso	0.217	0.056
Post-Lasso	0.225	0.056
CV Lasso	0.207	0.055
CV Elnet	0.196	0.053
CV Ridge	0.223	0.054
Random Forest	0.171	0.057
Best	0.206	0.052