setwd("~/Dropbox/R Stat")
 load("senna.RData")

Correlação entre as variáveis

library(sjPlot)

# Análise correlacional
vars = c("m_notas", "F1.Cons", "F2.Extr", "F3.EmSt", "F4.Agre",
         "F5.Opns", "F6.NVLoc", "ESCOLARIDADE", "SEXO", "IDADE")
# sjt.corr(sennav1[ , vars], triangle = "lower")
  m_notas F1.Cons F2.Extr F3.EmSt F4.Agre F5.Opns F6.NVLoc ESCOLARIDADE SEXO IDADE
m_notas                    
F1.Cons 0.495***                  
F2.Extr 0.084 0.123                
F3.EmSt 0.311* 0.675*** 0.096              
F4.Agre 0.424*** 0.490*** 0.367** 0.427***            
F5.Opns 0.186 0.449*** 0.187 0.160 0.424***          
F6.NVLoc -0.237 -0.388** 0.069 -0.415*** -0.122 0.166        
ESCOLARIDADE -0.384** -0.527*** -0.298* -0.307* -0.270* -0.222 0.064      
SEXO 0.310* 0.071 0.217 -0.089 0.198 0.178 -0.057 0.000    
IDADE -0.378** -0.504*** -0.313* -0.261* -0.277* -0.252* 0.010 0.977*** 0.011  
Computed correlation used spearman-method with listwise-deletion.

Regressão múltipla prevendo notas a partir dos seis habilidades socioemocionais

# Regressão múltipla
fit1 <- lm( m_notas~F1.Cons+F2.Extr+F3.EmSt+F4.Agre+F5.Opns+F6.NVLoc, data=sennav1)
summary(fit1)
## 
## Call:
## lm(formula = m_notas ~ F1.Cons + F2.Extr + F3.EmSt + F4.Agre + 
##     F5.Opns + F6.NVLoc, data = sennav1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.40730 -0.53414  0.07799  0.61265  2.09773 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.72321    1.11784   4.225 8.54e-05 ***
## F1.Cons      0.85598    0.24183   3.540 0.000797 ***
## F2.Extr     -0.07189    0.25169  -0.286 0.776194    
## F3.EmSt     -0.44010    0.23712  -1.856 0.068537 .  
## F4.Agre      0.85238    0.26088   3.267 0.001826 ** 
## F5.Opns     -0.40424    0.25114  -1.610 0.112914    
## F6.NVLoc    -0.16230    0.25627  -0.633 0.529008    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9748 on 58 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.3861, Adjusted R-squared:  0.3226 
## F-statistic:  6.08 on 6 and 58 DF,  p-value: 5.513e-05
# Se só incluímos F1.Cons e F3.EmSt o segundo fator continua com peso negativo
fit2 <- lm( m_notas~F1.Cons+F3.EmSt, data=sennav1)
summary(fit2) 
## 
## Call:
## lm(formula = m_notas ~ F1.Cons + F3.EmSt, data = sennav1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6504 -0.6763  0.0190  0.6489  3.2813 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   5.1896     0.5936   8.743 2.05e-12 ***
## F1.Cons       0.8184     0.2238   3.658 0.000527 ***
## F3.EmSt      -0.2213     0.2418  -0.915 0.363644    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.045 on 62 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2452, Adjusted R-squared:  0.2209 
## F-statistic: 10.07 on 2 and 62 DF,  p-value: 0.000163
    m_notas
    B CI std. Beta CI p
(Intercept)   4.72 2.49 – 6.96     <.001
F1.Cons   0.86 0.37 – 1.34 0.62 0.28 – 0.96 .001
F2.Extr   -0.07 -0.58 – 0.43 -0.03 -0.26 – 0.20 .776
F3.EmSt   -0.44 -0.91 – 0.03 -0.30 -0.61 – 0.02 .069
F4.Agre   0.85 0.33 – 1.37 0.41 0.17 – 0.66 .002
F5.Opns   -0.40 -0.91 – 0.10 -0.22 -0.48 – 0.05 .113
F6.NVLoc   -0.16 -0.68 – 0.35 -0.08 -0.33 – 0.17 .529
Observations   65
R2 / adj. R2   .386 / .323
    m_notas
    B CI std. Beta CI p
(Intercept)   5.19 4.00 – 6.38     <.001
F1.Cons   0.82 0.37 – 1.27 0.59 0.28 – 0.91 .001
F3.EmSt   -0.22 -0.70 – 0.26 -0.15 -0.47 – 0.17 .364
Observations   65
R2 / adj. R2   .245 / .221

Como intepretar o coeficiente de F3.EmSt na regressão múltipla ?

# Cria uma variável subdidindo F1.Cons em quatro grupos usando os quartis

  # Acha os quartis
quartis <- quantile(sennav1$F1.Cons, probs = c(0, .25, .50, .75, 1) )
  
 # Cria a F1.ConsQ com quatro níveis
sennav1$F1.ConsQ <- cut(sennav1$F1.Cons, quartis, ordered_result =TRUE,
                        include.lowest = TRUE)

library(ggplot2)
ggplot(data=sennav1,aes(x=F3.EmSt, y=m_notas)) + 
        geom_point() +  geom_smooth(method="lm", se=FALSE) +
          theme_bw()

ggplot(data=sennav1,
        aes(x=F3.EmSt, y=m_notas, color = F1.ConsQ)) + 
        geom_point() +  geom_smooth(method="lm", se=FALSE) +
          theme_bw()

Porquê ocorre o paradoxo de Simpsom?

# Cria uma variável subdidindo F1.Cons em quatro grupos usando os quartis
sennav1$sexo.f <-as.factor(sennav1$SEXO)
sennav1$ano.f <-as.factor(sennav1$ESCOLARIDADE)

ggplot(data=sennav1[!is.na(sennav1$m_notas), ], 
            aes(x=F3.EmSt, y=m_notas, color = sexo.f))  + 
        geom_point() +  geom_smooth(method="lm", se=FALSE) +
        facet_grid(F1.ConsQ~.) + theme_bw()