7. Linear Penalized Regs#

7.1. Data Generating Process: Approximately Sparse#

install.packages("librarian", quiet = T)
librarian::shelf(glmnet, broom, tidyverse, hdm, quiet = T)
set.seed(1)

n = 100;
p = 400;

Z= runif(n)-1/2;
W = matrix(runif(n*p)-1/2, n, p);



beta = 1/seq(1:p)^2;   # approximately sparse beta
#beta = rnorm(p)*.2    # dense beta
gX = exp(4*Z)+ W%*%beta;  # leading term nonlinear
X = cbind(Z, Z^2, Z^3, W );  # polynomials in Zs will be approximating exp(4*Z)


Y = gX + rnorm(n);    #generate Y


plot(gX,Y, xlab="g(X)", ylab="Y")    #plot V vs g(X)

print(c("theoretical R2:", var(gX)/var(Y)))
Warning message in system("timedatectl", intern = TRUE):
“running command 'timedatectl' had status 1”
[1] "theoretical R2:"   "0.826881788964026"
A matrix: 1 × 1 of type dbl
0.8268818
../_images/a9b75249d076d150826f0781d32078a916fccfc2f0dde18be3108faf075359d7.png

We use package Glmnet to carry out predictions using cross-validated lasso, ridge, and elastic net

# library(glmnet)
fit.lasso.cv   <- cv.glmnet(X, Y, family="gaussian", alpha=1)  # family gaussian means that we'll be using square loss
fit.ridge   <- cv.glmnet(X, Y, family="gaussian", alpha=0)     # family gaussian means that we'll be using square loss
fit.elnet   <- cv.glmnet(X, Y, family="gaussian", alpha=.5)    # family gaussian means that we'll be using square loss

yhat.lasso.cv    <- predict(fit.lasso.cv, newx = X)            # predictions
yhat.ridge   <- predict(fit.ridge, newx = X)
yhat.elnet   <- predict(fit.elnet, newx = X)

MSE.lasso.cv <- summary(lm((gX-yhat.lasso.cv)^2~1))$coef[1:2]  # report MSE and standard error for MSE for approximating g(X)
MSE.ridge <- summary(lm((gX-yhat.ridge)^2~1))$coef[1:2]        # report MSE and standard error for MSE for approximating g(X)
MSE.elnet <- summary(lm((gX-yhat.elnet)^2~1))$coef[1:2]        # report MSE and standard error for MSE for approximating g(X)

Here we compute the lasso and ols post lasso using plug-in choices for penalty levels, using package hdm

# library(hdm) 
fit.rlasso  <- rlasso(Y~X,  post=FALSE)      # lasso with plug-in penalty level
fit.rlasso.post <- rlasso(Y~X,  post=TRUE)    # post-lasso with plug-in penalty level

yhat.rlasso   <- predict(fit.rlasso)            #predict g(X) for values of X
yhat.rlasso.post   <- predict(fit.rlasso.post)  #predict g(X) for values of X

MSE.lasso <- summary(lm((gX-yhat.rlasso)^2~1))$coef[1:2]       # report MSE and standard error for MSE for approximating g(X)
MSE.lasso.post <- summary(lm((gX-yhat.rlasso.post)^2~1))$coef[1:2]  # report MSE and standard error for MSE for approximating g(X)

Next we code up lava, which alternates the fitting of lasso and ridge

# library(glmnet)

lava.predict<- function(X,Y, iter=5){
    
g1 = predict(rlasso(X, Y, post=F))  #lasso step fits "sparse part"
m1 =  predict(glmnet(X, as.vector(Y - g1), family = "gaussian", alpha = 0, lambda = 20),newx=X ) #ridge step fits the "dense" part

    
i = 1
while(i <= iter) {
  g1 = predict(rlasso(X, as.vector(Y - m1), post = F))   #lasso step fits "sparse part"
  m1 = predict(glmnet(X, as.vector(Y - g1), family = "gaussian",  alpha = 0, lambda = 20),newx = X )  #ridge step fits the "dense" part
  i = i+1 
  }

return(g1+m1)
}


yhat.lava = lava.predict(X,Y)
MSE.lava <- summary(lm((gX-yhat.lava)^2~1))$coef[1:2]       # report MSE and standard error for MSE for approximating g(X)

    
MSE.lava
  1. 0.158968193469853
  2. 0.0256845068490924
# library(xtable)
table<- matrix(0, 6, 2)
table[1,1:2]   <- MSE.lasso.cv
table[2,1:2]   <- MSE.ridge
table[3,1:2]   <- MSE.elnet
table[4,1:2]   <- MSE.lasso
table[5,1:2]   <- MSE.lasso.post
table[6,1:2]   <- MSE.lava

colnames(table)<- c("MSA", "S.E. for MSA")
rownames(table)<- c("Cross-Validated Lasso", "Cross-Validated ridge","Cross-Validated elnet",
                    "Lasso","Post-Lasso","Lava")
# tab <- xtable(table, digits =3)

table
A matrix: 6 × 2 of type dbl
MSAS.E. for MSA
Cross-Validated Lasso0.36660090.061279916
Cross-Validated ridge2.38425590.369909988
Cross-Validated elnet0.39886080.063757962
Lasso0.14821900.026705771
Post-Lasso0.08452360.009469643
Lava0.15896820.025684507
plot(gX, gX, pch=19, cex=1, ylab="predicted value", xlab="true g(X)")

points(gX, yhat.rlasso, col=2, pch=18, cex = 1.5 )
points(gX,  yhat.rlasso.post, col=3, pch=17,  cex = 1.2  )
points( gX, yhat.lasso.cv,col=4, pch=19,  cex = 1.2 )


legend("bottomright", 
  legend = c("rLasso", "Post-rLasso", "CV Lasso"), 
  col = c(2,3,4), 
  pch = c(18,17, 19), 
  bty = "n", 
  pt.cex = 1.3, 
  cex = 1.2, 
  text.col = "black", 
  horiz = F , 
  inset = c(0.1, 0.1))
../_images/5543c6d7772cee60c20c4ffe97e7c46529bf22bac9eae4c99db3b4cde022c83e.png

7.2. Data Generating Process: Approximately Sparse + Small Dense Part#

set.seed(1)

n = 100;
p = 400;

Z= runif(n)-1/2;
W = matrix(runif(n*p)-1/2, n, p);


beta = rnorm(p)*.2    # dense beta
gX = exp(4*Z)+ W%*%beta;  # leading term nonlinear
X = cbind(Z, Z^2, Z^3, W );  # polynomials in Zs will be approximating exp(4*Z)


Y = gX + rnorm(n);    #generate Y


plot(gX,Y, xlab="g(X)", ylab="Y")    #plot V vs g(X)

print( c("theoretical R2:", var(gX)/var(Y)))
[1] "theoretical R2:"   "0.696687990094227"
A matrix: 1 × 1 of type dbl
0.696688
../_images/5ac8ab1ff5acacbe7725d2221e3b65487e90f59a1d6ee6d9d63efe2ec47d86c6.png
# library(glmnet)
fit.lasso.cv   <- cv.glmnet(X, Y, family="gaussian", alpha=1)  # family gaussian means that we'll be using square loss
fit.ridge   <- cv.glmnet(X, Y, family="gaussian", alpha=0)     # family gaussian means that we'll be using square loss
fit.elnet   <- cv.glmnet(X, Y, family="gaussian", alpha=.5)    # family gaussian means that we'll be using square loss

yhat.lasso.cv    <- predict(fit.lasso.cv, newx = X)            # predictions
yhat.ridge   <- predict(fit.ridge, newx = X)
yhat.elnet   <- predict(fit.elnet, newx = X)

MSE.lasso.cv <- summary(lm((gX-yhat.lasso.cv)^2~1))$coef[1:2]  # report MSE and standard error for MSE for approximating g(X)
MSE.ridge <- summary(lm((gX-yhat.ridge)^2~1))$coef[1:2]        # report MSE and standard error for MSE for approximating g(X)
MSE.elnet <- summary(lm((gX-yhat.elnet)^2~1))$coef[1:2]        # report MSE and standard error for MSE for approximating g(X)
# library(hdm) 
fit.rlasso  <- rlasso(Y~X,  post=FALSE)      # lasso with plug-in penalty level
fit.rlasso.post <- rlasso(Y~X,  post=TRUE)    # post-lasso with plug-in penalty level

yhat.rlasso   <- predict(fit.rlasso)            #predict g(X) for values of X
yhat.rlasso.post   <- predict(fit.rlasso.post)  #predict g(X) for values of X

MSE.lasso <- summary(lm((gX-yhat.rlasso)^2~1))$coef[1:2]       # report MSE and standard error for MSE for approximating g(X)
MSE.lasso.post <- summary(lm((gX-yhat.rlasso.post)^2~1))$coef[1:2]  # report MSE and standard error for MSE for approximating g(X)
# library(glmnet)

lava.predict<- function(X,Y, iter=5){
    
g1 = predict(rlasso(X, Y, post=F))  #lasso step fits "sparse part"
m1 =  predict(glmnet(X, as.vector(Y-g1), family="gaussian", alpha=0, lambda =20),newx=X ) #ridge step fits the "dense" part

    
i=1
while(i<= iter) {
  g1 = predict(rlasso(X, as.vector(Y-m1), post=F))   #lasso step fits "sparse part"
  m1 = predict(glmnet(X, as.vector(Y-g1), family="gaussian",  alpha=0, lambda =20),newx=X );  #ridge step fits the "dense" part
  i = i+1 
}

return(g1+m1);
}


yhat.lava = lava.predict(X,Y)
MSE.lava <- summary(lm((gX-yhat.lava)^2~1))$coef[1:2]       # report MSE and standard error for MSE for approximating g(X)

    
MSE.lava
  1. 0.679714730890029
  2. 0.0737496772656864
# library(xtable)
table<- matrix(0, 6, 2)
table[1,1:2]   <- MSE.lasso.cv
table[2,1:2]   <- MSE.ridge
table[3,1:2]   <- MSE.elnet
table[4,1:2]   <- MSE.lasso
table[5,1:2]   <- MSE.lasso.post
table[6,1:2]   <- MSE.lava

colnames(table)<- c("MSA", "S.E. for MSA")
rownames(table)<- c("Cross-Validated Lasso", "Cross-Validated ridge","Cross-Validated elnet",
                    "Lasso","Post-Lasso","Lava")
# tab <- xtable(table, digits =3)

table
A matrix: 6 × 2 of type dbl
MSAS.E. for MSA
Cross-Validated Lasso1.93182250.29130485
Cross-Validated ridge3.41066850.49426500
Cross-Validated elnet1.82515910.25189955
Lasso0.96411510.11978038
Post-Lasso1.59660180.20707956
Lava0.67971470.07374968
plot(gX, gX, pch=19, cex=1, ylab="predicted value", xlab="true g(X)")

points(gX, yhat.rlasso,   col=2, pch=18, cex = 1.5 )
points(gX, yhat.elnet,  col=3, pch=17,  cex = 1.2  )
points(gX, yhat.lava,  col=4, pch=19,  cex = 1.2 )


legend("bottomright", 
  legend = c("rLasso", "Elnet", "Lava"), 
  col = c(2,3,4), 
  pch = c(18,17, 19), 
  bty = "n", 
  pt.cex = 1.3, 
  cex = 1.2, 
  text.col = "black", 
  horiz = F , 
  inset = c(0.1, 0.1))
../_images/b53de68576093dc74de9f0add56f74132aa34c2aee0a5ce7a4c86ce6a79151e8.png