Lab Exercise: Comparing Lasso, Ridge and Elastic Net.

We will look at an example of sparse regression where the predictors are highly correlated and compare between Lasso, Elastic Net and Ridge Regression in terms of the test set errors. Recall that Ridge, Lasso, Elastic Net are all special cases of Elastic net with the following penalty: \[ P_{\lambda,\alpha}(\beta) = \lambda \frac{(1-\alpha)}{2} {\lVert \beta \rVert}_2^2 + \lambda \alpha {\lVert \beta \rVert}_1^2 \] So \(\alpha = 0\) is Ridge, \(\alpha = 1\) is Lasso, anything in between is Elastic Net.

Data Generation

We fix a \(\beta\) with few non-zero elements and a design matrix (\(X\) matrix) with large \(p\), small \(n\) settings.

\[ \beta = (\underbrace{10, \ldots, 10}_{10},\underbrace{0,\ldots,0}_{40})^T \\ p = 50, n = 100 \\ Cov(X) = \Sigma = \{ (\rho)^{|i-j|}\}_{1\le i,j \le p} \\ \text{and } y = X \beta + \epsilon \]

library(MASS)  # Package needed to generate correlated precictors
library(glmnet)
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-5
set.seed(1234)
n <- 100    # Number of observations
p <- 50     # Number of predictors included in model
p1 <- 10 # Number of non-zero beta'
rho = 0.7
CovMatrix <- outer(1:p, 1:p, function(x,y) {rho^abs(x-y)})

x <- mvrnorm(n, rep(0,p), CovMatrix)
beta <- c(rep(5,p1),rep(0,p-p1))
eps=rnorm(n,mean=0,sd=0.1)
fx = x %*% beta
y=fx+eps

# Split data into train and test sets
train_rows <- sample(1:n, n/2)
x.train <- x[train_rows, ]
x.test <- x[-train_rows, ]

y.train <- y[train_rows]
y.test <- y[-train_rows]

Lasso vs. Elastic Net

Our goal is to fit the lasso, ridge and elastic net with different values of \(\alpha\) ranging from \(0\) to \(1\) using the training data, and compare the mean squared error for the test set. Here is a sample code for fitting elastic net with \(\alpha = 0.5\) and lasso with \(\alpha = 0\) and comparing their test MSE.

fit.enet <- cv.glmnet(x.train, y.train, type.measure = "mse",alpha=.5)
plot(fit.enet)

yhat.enet <- predict(fit.enet, s=fit.enet$lambda.1se, newx=x.test)

fit.lasso <- cv.glmnet(x.train, y.train, type.measure = "mse",alpha=1)
plot(fit.lasso)

yhat.lasso <- predict(fit.lasso, s=fit.lasso$lambda.1se, newx=x.test)

(mse.enet <- mean((y.test - yhat.enet)^2))
## [1] 1.844263
(mse.lasso <- mean((y.test - yhat.lasso)^2))
## [1] 2.255085

Clearly, Elastic Net is the winner. This happens because the predictors are correlated. Lasso does not work well for highly correlated predictors (can we think of a reason?). If you make \(\rho\) small, the situation reverses.

Exercise: You need to do this for different values of \(\alpha\) and correlation \(\rho\). You can take \(10\) equispaced values of \(\alpha\) and \(3\) different values of \(\rho\) (small, medium and high correlation). Please avoid copy-pasting the R code \(10 \times 3\) times - use a for loop or apply-type constructs.

Solution

library(MASS)  # Package needed to generate correlated precictors
library(glmnet)
set.seed(1234)
n <- 100    # Number of observations
p <- 50     # Number of predictors included in model
p1 <- 10 # Number of non-zero beta'
rho = 0

rho.set = c(0,0.3,0.5,0.9)
alpha.set = seq(0,1,length.out=11)
printf <- function(...) invisible(print(sprintf(...)))

mse.table = matrix(0,length(rho.set),length(alpha.set))
nonzero.table = matrix(0,length(rho.set),length(alpha.set))

for (i in 1:length(rho.set)){
  for (j in 1:length(alpha.set)){
    
    r = rho.set[i]
    a = alpha.set[j]
    CovMatrix <- outer(1:p, 1:p, function(x,y) {r^abs(x-y)})
    x <- mvrnorm(n, rep(0,p), CovMatrix)
    beta <- c(rep(5,p1),rep(0,p-p1))
    eps=rnorm(n,mean=0,sd=0.1)
    fx = x %*% beta
    y=fx+eps
    
    # Split data into train and test sets
    train_rows <- sample(1:n, n/2)
    x.train <- x[train_rows, ]
    x.test <- x[-train_rows, ]
    
    y.train <- y[train_rows]
    y.test <- y[-train_rows]
    
    fit <- cv.glmnet(x.train, y.train, type.measure="mse",alpha=a)
    yhat <- predict(fit, s=fit$lambda.1se, newx=x.test)
    mse <- mean((y.test - yhat)^2)
    mse.table[i,j] = mse
    coeff <- coef.cv.glmnet(fit, s =fit$lambda.1se )
    nonzero.table[i,j] = length(coeff[which(coeff!=0)])
    #printf("For correlation %f MSE for GLMNET with alpha = %f is %f", r, a, mse)
    }
}
colnames(mse.table) <- as.factor(alpha.set)
rownames(mse.table) <- as.factor(rho.set)
mse.table
##            0       0.1      0.2       0.3       0.4      0.5       0.6
## 0   198.3370  6.433258 3.028248 1.5972283 0.5701973 1.081100 0.7998857
## 0.3 282.2767 11.652394 4.365928 1.0022945 1.2228335 1.121347 0.6923379
## 0.5 343.4524  4.763336 1.102608 0.7369811 1.3739258 1.240289 0.8596819
## 0.9 627.7559  2.557292 2.542435 2.2410127 2.8885615 3.320300 2.8868343
##           0.7       0.8       0.9         1
## 0   0.6290865 0.6683079 0.4334599 0.7747658
## 0.3 0.9115730 0.8024105 0.5669784 0.6056630
## 0.5 1.4708319 1.4119898 1.9468166 1.1135726
## 0.9 4.5985822 2.4236040 3.8443345 2.9828976
colnames(nonzero.table) <- as.factor(alpha.set)
rownames(nonzero.table) <- as.factor(rho.set)
nonzero.table
##      0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9  1
## 0   51  33  29  27  17  16  17  13  12  13 15
## 0.3 51  31  22  14  14  12  12  11  11  11 11
## 0.5 51  20  14  11  11  11  11  11  11  12 11
## 0.9 51  13  14  12  13  12  11  11  11  11 11
library(ggplot2)
mse.data = rbind(data.frame(values=mse.table[1,], x=alpha.set, rho = rho.set[1]),
                data.frame(values=mse.table[2,], x=alpha.set, rho = rho.set[2]),
                data.frame(values=mse.table[3,], x=alpha.set, rho = rho.set[3]),
                data.frame(values=mse.table[4,], x=alpha.set, rho = rho.set[4]))

mse.plot = ggplot(mse.data, aes(x=x, y=values, group=as.factor(rho), 
                                colour= as.factor(rho) )) + 
  geom_line() + ylab("M. S. E.") + xlab(expression(alpha))+scale_y_log10()

mse.plot <- mse.plot + theme(axis.title.y = element_text(size = rel(1.2), angle = 90))+theme(axis.title.x = element_text(size = rel(1.2)))
mse.plot<- mse.plot+ theme(axis.text = element_text(size = rel(1.2)))+
  theme(legend.position = c(0.75,0.75),legend.title=element_text(size=15, face="bold"),legend.text=element_text(size=15))
mse.plot <- mse.plot+theme(strip.text.x = element_text(size=20, face="bold"),strip.text.y = element_text(size=15, face="bold"))

print(mse.plot)