6. Even study#

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings, patsy
warnings.filterwarnings('ignore')
from linearmodels.panel  import PanelOLS as fe
import statsmodels.api as sm

dpi = 600
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[1], line 6
      4 import warnings, patsy
      5 warnings.filterwarnings('ignore')
----> 6 from linearmodels.panel  import PanelOLS as fe
      7 import statsmodels.api as sm
      9 dpi = 600

ModuleNotFoundError: No module named 'linearmodels'

6.1. With difference in difference results#

data = pd.read_csv("https://raw.githubusercontent.com/d2cml-ai/python_visual_library/main/data/autor-jole2003.csv")
features = ['lnths', 'year_since_adoption', 'state', 'year', 'after']
data1 = data[features]
# for i in ['state', 'year', 'year_since_adoption']:
    # data1[i] = data1[i].astype('category')
data1 = data1.set_index(['state', 'year'])
data1.head(5)

fml = "lnths ~ C(year_since_adoption) - 1"
fml1 = 'lnths ~ after - 1'

y_data, x_data = patsy.dmatrices(fml, data = data1, return_type='dataframe')
y_data_1, x_data_1 = patsy.dmatrices(fml1, data = data1, return_type = 'dataframe')

x_data1 = x_data.filter(regex=r'0[^-1]')
nn =['C(year_since_adoption)[-9.0]', 'C(year_since_adoption)[-8.0]',
       'C(year_since_adoption)[-7.0]', 'C(year_since_adoption)[-6.0]',
       'C(year_since_adoption)[-5.0]', 'C(year_since_adoption)[-4.0]',
       'C(year_since_adoption)[-3.0]', 'C(year_since_adoption)[-2.0]',
        'C(year_since_adoption)[0.0]',
       'C(year_since_adoption)[1.0]', 'C(year_since_adoption)[2.0]',
       'C(year_since_adoption)[3.0]', 'C(year_since_adoption)[4.0]',
       'C(year_since_adoption)[5.0]', 'C(year_since_adoption)[6.0]',
       'C(year_since_adoption)[7.0]', 'C(year_since_adoption)[8.0]',
       'C(year_since_adoption)[9.0]', 'C(year_since_adoption)[10.0]',
       'C(year_since_adoption)[11.0]', 'C(year_since_adoption)[12.0]',
       'C(year_since_adoption)[13.0]', 'C(year_since_adoption)[14.0]',
       'C(year_since_adoption)[15.0]']

x_data2 = x_data1[nn]

mdl = fe(y_data, x_data2, time_effects=True, entity_effects=True)\
.fit( cov_type='clustered', cluster_entity = True )

mdl1 = fe(y_data_1, x_data_1['after[True]'], time_effects=True, entity_effects=True)\
.fit( cov_type='clustered', cluster_entity = True )
p = np.array(mdl.params) * 100
std = np.array(mdl.std_errors) * 100
u95 = p + 1.96 * std
l95 = p - 1.96 * std
# u90 = p + 1.64 * std
# l90 = p - 1.64 * std
df1 = pd.DataFrame(
    {
    "poin": p,
    'u95': u95,
    'l95': l95,    
    # 'u90': u90,    
    # 'l90': l90,
    'x': np.concatenate([np.arange(-9, -1, 1), np.arange(0, 16, 1)])
    }
)

df1.loc[len(df1.index)] = [0, 0, 0,  -1]
df1.sort_values('x', inplace=True)
# df11 = pd.melt(df1, id_vars = ['poin', 'x', 'u95', 'l95'], value_vars = ['u90', 'l90'])
df12 = pd.melt(df1, id_vars = ['poin', 'x'], value_vars = ['u95', 'l95'])
poin x variable value
0 1.095201 -9 u95 67.670811
1 3.087418 -8 u95 52.649471
2 2.952648 -7 u95 38.896585
3 1.520740 -6 u95 29.696308
4 -0.936925 -5 u95 23.286290
# -- boundary lines
dind = mdl1.params[0]*100
didse = mdl1.std_errors[0]*100

x_after = np.arange(-0, 16, 1)
x_befor = np.arange(-9, 0, 1)
y_bef = np.repeat(0, len(x_befor))
dind1 = np.where(x_after > -5, dind, 0)
upper = np.where(dind1 != 0, dind + 1.96 * didse, 0)
lower = np.where(dind1 != 0, dind - 1.96 * didse, 0)
fig = plt.figure(dpi = dpi)
ax = fig.add_axes([.1, 1, 1, 1])

for i in np.unique(df12.x):
    ref_data = df12[df12.x == i]
    ax.plot('x', 'value', data = ref_data, color = "#2a437c", lw = .8)


ax.axhline(.0, lw = .5, color = '#2a437c', linestyle= "-.")

ax.scatter('x', 'poin', data = df12, color = "#2a437c")
ax.plot(x_after, dind1, lw = 1, color = 'red', linestyle = "-")
ax.plot(x_befor, y_bef, lw = 1, color = 'red', linestyle = "-")
ax.plot(x_after, upper, lw = .8, color = 'red', linestyle = "--")
ax.plot(x_after, lower, lw = .8, color = 'red', linestyle = "--")

ax.axvline(-.5, lw = .5, linestyle="-.", color = 'black')

omit = ['top', 'right']
ax.spines[omit].set_visible(False)

ax.set_xticks(np.arange(-10, 16, 5))
ax.set_yticks(np.arange(-40, 41, 40))
ax.set_xlabel("Year relative to adoption of implied contract exception")
ax.set_ylabel("Coefficient estimates & 95% CI")
plt.show();
Text(0, 0.5, 'Coefficient estimates & 95% CI')
../_images/cce9bde94222df79a4b7f8bb328186f935ff0edb7e0e73601822b23a16291eaa.png

6.2. Ribbon for Confidence Intervals#

data = pd.read_csv("https://raw.githubusercontent.com/d2cml-ai/python_visual_library/main/data/autor-jole2003.csv")
features = ['lnths', 'year_since_adoption', 'state', 'year']
data1 = data[features]
# for i in ['state', 'year', 'year_since_adoption']:
    # data1[i] = data1[i].astype('category')
data1 = data1.set_index(['state', 'year'])
data1.head(5)

fml = "lnths ~ C(year_since_adoption) - 1"
y_data, x_data = patsy.dmatrices(fml, data = data1, return_type='dataframe')
x_data1 = x_data.filter(regex=r'0[^-1]')
nn =['C(year_since_adoption)[-9.0]', 'C(year_since_adoption)[-8.0]',
       'C(year_since_adoption)[-7.0]', 'C(year_since_adoption)[-6.0]',
       'C(year_since_adoption)[-5.0]', 'C(year_since_adoption)[-4.0]',
       'C(year_since_adoption)[-3.0]', 'C(year_since_adoption)[-2.0]',
        'C(year_since_adoption)[0.0]',
       'C(year_since_adoption)[1.0]', 'C(year_since_adoption)[2.0]',
       'C(year_since_adoption)[3.0]', 'C(year_since_adoption)[4.0]',
       'C(year_since_adoption)[5.0]', 'C(year_since_adoption)[6.0]',
       'C(year_since_adoption)[7.0]', 'C(year_since_adoption)[8.0]',
       'C(year_since_adoption)[9.0]', 'C(year_since_adoption)[10.0]',
       'C(year_since_adoption)[11.0]', 'C(year_since_adoption)[12.0]',
       'C(year_since_adoption)[13.0]', 'C(year_since_adoption)[14.0]',
       'C(year_since_adoption)[15.0]']
x_data2 = x_data1[nn]

mdl = fe(y_data, x_data2, time_effects=True, entity_effects=True)\
.fit( cov_type='clustered', cluster_entity = True )
p = np.array(mdl.params) * 100
std = np.array(mdl.std_errors) * 100
u95 = p + 1.96 * std
l95 = p - 1.96 * std
u90 = p + 1.64 * std
l90 = p - 1.64 * std
len(p), len(std), len(u95), len(u90)
df1 = pd.DataFrame(
    {
    "poin": p,
    'u95': u95,
    'l95': l95,    
    'u90': u90,    
    'l90': l90,
    'x': np.concatenate([np.arange(-9, -1, 1), np.arange(0, 16, 1)])
    }
)
df1.loc[len(df1.index)] = [0, 0, 0, 0, 0, -1]
df11 = pd.melt(df1, id_vars = ['poin', 'x', 'u95', 'l95'], value_vars = ['u90', 'l90'])
df12 = pd.melt(df11, id_vars = ['poin', 'x'], value_vars = ['u95', 'l95'])
df1.sort_values('x', inplace=True)
fig = plt.figure(figsize=(8, 6), dpi = dpi, facecolor = 'white')
ax = fig.add_axes([.1, 1, 1, 1])

alf = .5
omit = ['right', 'top']

ax.fill_between('x', 'l95', 'u95', data = df1 ,alpha = alf, label = "95", color = "#bbbcbf")
ax.fill_between('x', 'l90', 'u90', data = df1 ,alpha = alf, label = "90", color = "#4b5263")
ax.scatter("x", "poin", data = df1, color = "black", label = "")

ax.legend(title = "Confidence Intervals", loc = (1.03, .5))
ax.axvline(-.5, color = "black", lw = .5, linestyle = "-.")
ax.axhline(0 , color = "black", lw = 1 , linestyle = "-.")
ax.set_yticks(np.arange(-40, 41, 40))
ax.spines[omit].set_visible(False)
ax.set_xlabel("Year relative to adoption of implied contract exeption", size = 15)
ax.set_ylabel("Conefficient estimates & Confidence Intervals", size = 15);
plt.show();
Text(0, 0.5, 'Conefficient estimates & Confidence Intervals')
../_images/4c55dfd4c04a7d8bea0b24bbd81284fa92f18df3e2b80e7b443412d95f060f13.png