# !wget https://developer.nvidia.com/compute/cuda/9.0/Prod/local_installers/cuda-repo-ubuntu1604-9-0-local_9.0.176-1_amd64-deb
# !dpkg -i cuda-repo-ubuntu1604-9-0-local_9.0.176-1_amd64-deb
# !apt-key add /var/cuda-repo-9-0-local/7fa2af80.pub
# !apt update -q
# !apt install cuda gcc-6 g++-6 -y -q
# !ln -s /usr/bin/gcc-6 /usr/local/cuda/bin/gcc
# !ln -s /usr/bin/g++-6 /usr/local/cuda/bin/g++
# !curl -sSL "https://julialang-s3.julialang.org/bin/linux/x64/1.7/julia-1.7.3-linux-x86_64.tar.gz" -o julia.tar.gz
# !tar -xzf julia.tar.gz -C /usr --strip-components 1
# !rm -rf julia.tar.gz*
# !julia -e 'using Pkg; pkg"add IJulia; precompile"'

25. Weak IV Experiments#

# Import relevant packages
# Import relevant packages
# using Pkg
# Pkg.add("CSV")
# Pkg.add("DataFrames")
# Pkg.add("GLM")
# Pkg.add("FixedEffectModels")
# Pkg.add("DecisionTree")
# Pkg.add("PrettyTables")
# Pkg.add("CovarianceMatrices")
# Pkg.add("RegressionTables")
# Pkg.add("StatsFuns")
# Pkg.add("Plots")
# Pkg.add("RData")
# Pkg.add("MLBase")

using CSV, DataFrames, FixedEffectModels, DecisionTree, PrettyTables, CovarianceMatrices, RegressionTables, StatsFuns, Plots, RData, MLBase, GLM
function foo1(a;rng=MersenneTwister(3))
  return randn(rng,a)
end
foo1 (generic function with 1 method)
function foo2(a;rng=MersenneTwister(1))
  return randn(rng,a)
end
foo2 (generic function with 1 method)
    B = 1000
    IVEst = zeros( B )
    n = 100
    beta = 0.25

    U = foo1(n)
    Z = foo2(n)
    D = beta*Z+U
    Y = D + U;
    intercept = ones(length(U))
    data1 = DataFrame(intercept = intercept, U = U, Z = Z, D = D, Y = Y);

    mod = reg(data1, @formula(D ~ Z))
                             Linear Model                             
=======================================================================
Number of obs:                 100   Degrees of freedom:              2
R2:                          0.067   R2 Adjusted:                 0.057
F-Stat:                    7.01262   p-value:                     0.009
=======================================================================
D           |  Estimate Std.Error  t value Pr(>|t|) Lower 95% Upper 95%
-----------------------------------------------------------------------
Z           |  0.250108  0.094447  2.64814    0.009 0.0626815  0.437535
(Intercept) | 0.0873342 0.0949433 0.919856    0.360 -0.101078  0.275746
=======================================================================
IV =  reg(data1, @formula(Y ~ 0 + (D ~ Z)))
IV
                          IV Model                          
============================================================
Number of obs:            100  Degrees of freedom:         1
R2:                     0.752  R2 Adjusted:            0.749
F-Stat:               6.79219  p-value:                0.011
F-Stat (First Stage): 7.01315  p-value (First Stage):  0.008
============================================================
Y  | Estimate Std.Error t value Pr(>|t|) Lower 95% Upper 95%
------------------------------------------------------------
D  | 0.994509  0.381596 2.60618    0.011   0.23734   1.75168
============================================================
IV.coef
1-element Vector{Float64}:
 0.994509337024402

Note that the instrument is weak here (contolled by \(\beta\)) – the t-stat is less than 4.

25.1. Run 1000 trials to evaluate distribution of the IV estimator#

# dependent variable ~ exogenous variables + (endogenous variables ~ instrumental variables)
# Set seed
B = 1000
IVEst = zeros(B)


for i in 1:B
    
    U = randn( n)
    Z = randn( n)
    D = beta*Z+U
    Y = D + U
    intercept = ones(length(U))
    data2 = DataFrame(intercept = intercept, U = U, Z = Z, D = D, Y = Y);
        
    IV =  reg(data2, @formula(Y ~ + (D ~  Z)))
    
    IVEst[i,1] = IV.coef[2]
end
println(minimum(IVEst))
println(maximum(IVEst))
-40.368264648867964
57.83003936193075
IVEst
1000-element Vector{Float64}:
 -0.2633141021687235
  0.8948832260053269
 -0.20787697208091646
  0.6792079361265198
  1.052671004982911
 -0.7421608576444562
  1.1668318792400871
  0.933747261081341
  0.6850752909186767
  1.4639180207647025
 -0.9642817021998845
 -1.497309542643626
  1.2189323077695131
  ⋮
  5.460232661324924
  0.5478868710622211
  1.3717744154929605
  1.2644078444676423
  1.487865163308266
  0.7873831110483491
  0.7967961713080116
  0.9696312102644875
 -0.07785624411680934
  0.08917470799234822
  1.4232140851370487
  0.9943702695396225

25.2. Plot the Actual Distribution against the Normal Approximation (based on Strong Instrument Assumption)#

val = collect(range( -5, 5.5, step = 0.05 ))
var = (1/beta^2)*(1/100) # theoretical variance of IV
sd = sqrt(var)

μ=0; σ=sd
d = Normal(μ, σ)
normal_dist = rand(d,1000)

# plotting both distibutions on the same figure
Seaborn.kdeplot(x = IVEst.-1, shade = true, color = "red")
Seaborn.kdeplot(x = normal_dist, shade = true, color = "blue")
Seaborn.title("Actual Distribution vs Gaussian")
Seaborn.xlabel("IV Estimator -True Effect")
Seaborn.xlim(-5,5)
../_images/1fc59d4d0a138ed0f2588d8f44cb3db555ba83463eaf4fa0ad3970f62dd7fed9.png
(-5.0, 5.0)