Studying betas in multiple regression when predictors are correlated

Libraries

    library(tidyverse)
 
    
    
    library(psych)
    library(sjPlot)

  
    library(ggthemes)
    library(ggplot2)
    library(MASS)
    library(purrr)

   library(broom)
   library(lm.beta)
  
  

    library(RColorBrewer)  

    library(plotly)

    library(glmnet)
    library(glmnetUtils)

    source("http://www.labape.com.br/rprimi/R/cria_quartis.R")

Question

In multiple regression with \(X_1 ... X_n\) predicting \(Y\) each \(B_i\) or \(\beta_i\) coefficient represents the unique effect of that varible on \(Y\) controling all the other variables in the model. If \(\beta_1\) corresponds to the unique effect of \(X_1\), and \(X_1\) is correlated with \(X_2\) where does sharred effect goes ?

Data simulation

This code simulates draw 3 variables \(Y\), \(X_1\), and \(X_2\) from a multivariate normal random distribution having a specified var_cor matrix of correlations.
It makes a grid of possible correlations params and then simulates data using mvrnormfunction.

  params <- expand.grid(
      r_Y1 = seq(0, .68, by=.05), 
      r_Y2 = seq(0, .68, by=.05),
      r_12= seq(0, .88, by=.05)
  )
 
  simulate_3vars <- function (r_Y1, r_Y2, r_12, n=400 ){
    var_cor <-  matrix(
     c(1.00,  r_12 , r_Y1 ,
       r_12,  1.00,  r_Y2, 
       r_Y1,  r_Y2,  1.00), 
    byrow=TRUE, nrow=3, ncol=3, 
    dimnames = list(
        c("X1", "X2", "Y"), 
        c("X1", "X2", "Y")
        ))
  df <-  try(
      mvrnorm(n, mu = c(0, 0, 0), 
      Sigma = var_cor, empirical = TRUE) %>% as.data.frame(), silent = TRUE) 
    return(df)
  }


  simulated_data  <- params %>% 
     mutate(data =  pmap(., simulate_3vars)
       )
 
  simulated_data  <- params %>% 
     mutate(data =  pmap(., simulate_3vars),
      cl = map_chr(data, class)
       )
  
  
 
 simulated_data <- simulated_data %>%
     filter(cl == "data.frame") %>%
     mutate(
      mult_regres = map(data, ~lm(Y ~ X1 + X2, data=.)),
      lasso_regres  = map(data, ~glmnet(Y ~ X1 + X2, data=. )),
      beta = map(mult_regres, ~lm.beta(.)),
      beta2 = map(beta, "coefficients"),
      glance = map(mult_regres, glance),
      
         )

 
 
  simulated_data <- simulated_data %>%
     mutate(
      beta2 = map(beta2, t)
         ) %>%
     mutate(
      beta2 = map(beta2, as.data.frame)
         )
 
  summar <- simulated_data %>% select(glance) %>% unnest 
  betas <- simulated_data %>% select(beta2) %>% unnest
     
  df <- simulated_data %>% select(1:3) %>% bind_cols(betas, summar)

  names(df)[5:6] <- c("b_x1", "b_x2")

Results

As explained here: https://stats.stackexchange.com/questions/24827/where-is-the-shared-variance-between-all-ivs-in-a-linear-multiple-regression-equ when \(X_1\) and \(X_2\) have the same correlation with \(Y\) as \(X_1\) and \(X_2\) “come closer and closer to being perfectly correlated, their b-values in the multiple regression come closer and closer to HALF of the b-value in the simple linear regression of either one of them. However, as \(X_1\) and \(X_2\) come closer and closer to being perfectly correlated, the STANDARD ERROR of b1 and b2 moves closer and closer to infinity, so the t-values converge on zero. So, the t-values will converge on zero (i.e., no UNIQUE linear relationship between either \(X_1\) and \(Y\) or \(X_2\) and \(Y\)), but the b-values converge to half the value of the b-values in the simple linear regression … so as the correlation between \(X_1\) and \(X_2\) approaches unity, EACH of the partial slope coefficients approaches contributing equally to the prediction of the \(Y\) value, even though neither independent variable offers any UNIQUE explanation of the dependent variable” (HTH // Phil, 2017)

This is show in the simulation

  df %>% filter(r_Y2 == .50 & r_Y1 == .50) %>%
    ggplot(aes(y=b_x1, x=b_x2, color = r_12, size=r_12)) + 
    geom_point(alpha=1/2)  +
    scale_colour_gradientn(colours = brewer.pal(7, "Paired")) +
    scale_y_continuous(breaks= seq(0.20, .55, by=.05), limits = c(.20,.55)) +
    scale_x_continuous(breaks= seq(0.20, .55, by=.05), limits = c(.20,.55)) +
    geom_vline(xintercept = .5, color = "orange") +
    geom_hline(yintercept = .5,  color = "orange") +
    theme_minimal()

Now look how crazy \(\beta's\) goes when \(r_{yx_1} > r_{yx_2}\) as \(r_{x_{12}}\) approaches 1. Here we are seeing Simpsom’s paradox.

 df %>% filter(r_Y1 == .50 & r_Y2 == .25 ) %>%
    ggplot(aes(y=b_x1, x=b_x2, color = r_12, size=r_12)) + 
    geom_point(alpha=1/2)  +
    scale_colour_gradientn(colours = brewer.pal(7, "Paired")) +
    geom_vline(xintercept = .25, color = "orange") +
    geom_hline(yintercept = .50,  color = "orange") +
    scale_y_continuous(breaks= round(seq(.3, 1, by=.10), 2), limits = c(.3,1)) +
    scale_x_continuous(breaks= round(seq(-.6, .40, by=.10), 2), limits = c(-.6,.40)) +
    theme_minimal()

Now 3d graph

    colors <- brewer.pal(7, "Paired")
    
    df %>% filter(r_Y1 == .50 & r_Y2 == .25 ) %>%
      plot_ly(y=~b_x1, z=~b_x1, x=~r_12,
        type="scatter3d", 
        mode="markers", color=~r_12, 
          colors = colors,
        size = 3.5,
        opacity = 1/1.5)

   colors <- brewer.pal(7, "Paired")
    
    df %>% filter(r_Y1 == .50 & r_Y2 == .50 ) %>%
      plot_ly(y=~b_x1, z=~b_x1, x=~r_12,
        type="scatter3d", 
        mode="markers", color=~r_12, 
          colors = colors,
        size = 3.5,
        opacity = 1/1.5)

Lasso regression

# http://varianceexplained.org/broom-gallery/snippets/broom-glmnet.html
 

    tidy(simulated_data$lasso_regr[[1]])

    tidied <- tidy(simulated_data$lasso_regr[[1]]) %>% filter(term != "(Intercept)")
    
    ggplot(tidied, aes(step, estimate, group = term)) + geom_line()

    coefficients <- simulated_data$lasso_regr[[1]] %>%
     broom::tidy() %>%
     mutate(log_lambda = log(lambda)) %>%
     filter(term != "(Intercept)")
    
    plot(simulated_data$lasso_regr[[1]], xvar = "lambda", label = TRUE)
    
    ggplot(coefficients, aes(x=log_lambda, y=estimate, col=term)) +
     geom_line() + coord_cartesian(xlim=log(coefficients$lambda))
    
   
    tidied_cv <- tidy(simulated_data$lasso_regr[[1]])
    glance_cv <- glance(simulated_data$lasso_regr[[1]])