setwd("~/Dropbox/R Stat")
 load("senna.RData")

Correlação entre as variáveis

library(sjPlot)

# Análise correlacional
vars = c("m_notas", "F1.Cons", "F2.Extr", "F3.EmSt", "F4.Agre",
         "F5.Opns", "F6.NVLoc", "ESCOLARIDADE", "SEXO", "IDADE")
# sjt.corr(sennav1[ , vars], triangle = "lower")
  m_notas F1.Cons F2.Extr F3.EmSt F4.Agre F5.Opns F6.NVLoc ESCOLARIDADE SEXO IDADE
m_notas                    
F1.Cons 0.495***                  
F2.Extr 0.084 0.123                
F3.EmSt 0.311* 0.675*** 0.096              
F4.Agre 0.424*** 0.490*** 0.367** 0.427***            
F5.Opns 0.186 0.449*** 0.187 0.160 0.424***          
F6.NVLoc -0.237 -0.388** 0.069 -0.415*** -0.122 0.166        
ESCOLARIDADE -0.384** -0.527*** -0.298* -0.307* -0.270* -0.222 0.064      
SEXO 0.310* 0.071 0.217 -0.089 0.198 0.178 -0.057 0.000    
IDADE -0.378** -0.504*** -0.313* -0.261* -0.277* -0.252* 0.010 0.977*** 0.011  
Computed correlation used spearman-method with listwise-deletion.

Regressão múltipla prevendo notas a partir dos seis habilidades socioemocionais

# Regressão múltipla
fit1 <- lm( m_notas~F1.Cons+F2.Extr+F3.EmSt+F4.Agre+F5.Opns+F6.NVLoc, data=sennav1)
summary(fit1)
## 
## Call:
## lm(formula = m_notas ~ F1.Cons + F2.Extr + F3.EmSt + F4.Agre + 
##     F5.Opns + F6.NVLoc, data = sennav1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.40730 -0.53414  0.07799  0.61265  2.09773 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.72321    1.11784   4.225 8.54e-05 ***
## F1.Cons      0.85598    0.24183   3.540 0.000797 ***
## F2.Extr     -0.07189    0.25169  -0.286 0.776194    
## F3.EmSt     -0.44010    0.23712  -1.856 0.068537 .  
## F4.Agre      0.85238    0.26088   3.267 0.001826 ** 
## F5.Opns     -0.40424    0.25114  -1.610 0.112914    
## F6.NVLoc    -0.16230    0.25627  -0.633 0.529008    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9748 on 58 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.3861, Adjusted R-squared:  0.3226 
## F-statistic:  6.08 on 6 and 58 DF,  p-value: 5.513e-05
# Se só incluímos F1.Cons e F3.EmSt o segundo fator continua com peso negativo
fit2 <- lm( m_notas~F1.Cons+F3.EmSt, data=sennav1)
summary(fit2) 
## 
## Call:
## lm(formula = m_notas ~ F1.Cons + F3.EmSt, data = sennav1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6504 -0.6763  0.0190  0.6489  3.2813 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   5.1896     0.5936   8.743 2.05e-12 ***
## F1.Cons       0.8184     0.2238   3.658 0.000527 ***
## F3.EmSt      -0.2213     0.2418  -0.915 0.363644    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.045 on 62 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2452, Adjusted R-squared:  0.2209 
## F-statistic: 10.07 on 2 and 62 DF,  p-value: 0.000163
    m_notas
    B CI std. Beta CI p
(Intercept)   4.72 2.49 – 6.96     <.001
F1.Cons   0.86 0.37 – 1.34 0.62 0.28 – 0.96 .001
F2.Extr   -0.07 -0.58 – 0.43 -0.03 -0.26 – 0.20 .776
F3.EmSt   -0.44 -0.91 – 0.03 -0.30 -0.61 – 0.02 .069
F4.Agre   0.85 0.33 – 1.37 0.41 0.17 – 0.66 .002
F5.Opns   -0.40 -0.91 – 0.10 -0.22 -0.48 – 0.05 .113
F6.NVLoc   -0.16 -0.68 – 0.35 -0.08 -0.33 – 0.17 .529
Observations   65
R2 / adj. R2   .386 / .323
    m_notas
    B CI std. Beta CI p
(Intercept)   5.19 4.00 – 6.38     <.001
F1.Cons   0.82 0.37 – 1.27 0.59 0.28 – 0.91 .001
F3.EmSt   -0.22 -0.70 – 0.26 -0.15 -0.47 – 0.17 .364
Observations   65
R2 / adj. R2   .245 / .221

Como intepretar o coeficiente de F3.EmSt na regressão múltipla ?

# Cria uma variável subdidindo F1.Cons em quatro grupos usando os quartis

  # Acha os quartis
quartis <- quantile(sennav1$F1.Cons, probs = c(0, .25, .50, .75, 1) )
  
 # Cria a F1.ConsQ com quatro níveis
sennav1$F1.ConsQ <- cut(sennav1$F1.Cons, quartis, ordered_result =TRUE,
                        include.lowest = TRUE)

library(ggplot2)
ggplot(data=sennav1,aes(x=F3.EmSt, y=m_notas)) + 
        geom_point() +  geom_smooth(method="lm", se=FALSE) +
          theme_bw()

ggplot(data=sennav1,
        aes(x=F3.EmSt, y=m_notas, color = F1.ConsQ)) + 
        geom_point() +  geom_smooth(method="lm", se=FALSE) +
          theme_bw()

Porquê ocorre o paradoxo de Simpsom?

# Cria uma variável subdidindo F1.Cons em quatro grupos usando os quartis
sennav1$sexo.f <-as.factor(sennav1$SEXO)
sennav1$ano.f <-as.factor(sennav1$ESCOLARIDADE)

ggplot(data=sennav1[!is.na(sennav1$m_notas), ], 
            aes(x=F3.EmSt, y=m_notas, color = sexo.f))  + 
        geom_point() +  geom_smooth(method="lm", se=FALSE) +
        facet_grid(F1.ConsQ~.) + theme_bw()

ggplot(data=sennav1[!is.na(sennav1$m_notas), ], 
            aes(x=F3.EmSt, y=m_notas, color = ano.f))  + 
        geom_point(aes(color=ano.f)) +  geom_smooth(method="lm", se=FALSE) +
        facet_grid(F1.ConsQ~.) + theme_bw()

Modelo final incluindo gênero e ano escolar

# Regressão múltipla
fit3 <- lm( m_notas~F1.Cons+F2.Extr+F3.EmSt+F4.Agre+F5.Opns+F6.NVLoc+
                   sexo.f*ano.f*F3.EmSt , data=sennav1)
summary(fit3)
## 
## Call:
## lm(formula = m_notas ~ F1.Cons + F2.Extr + F3.EmSt + F4.Agre + 
##     F5.Opns + F6.NVLoc + sexo.f * ano.f * F3.EmSt, data = sennav1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.07182 -0.48856  0.01122  0.59381  1.56493 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)  
## (Intercept)             3.89086    1.63723   2.376   0.0215 *
## F1.Cons                 0.72846    0.27424   2.656   0.0107 *
## F2.Extr                -0.28167    0.24608  -1.145   0.2580  
## F3.EmSt                 0.26563    0.37279   0.713   0.4796  
## F4.Agre                 0.63096    0.26281   2.401   0.0203 *
## F5.Opns                -0.44448    0.25005  -1.778   0.0818 .
## F6.NVLoc               -0.01259    0.25520  -0.049   0.9609  
## sexo.f1                 0.27749    1.72734   0.161   0.8730  
## ano.f7                  2.61749    2.01282   1.300   0.1997  
## ano.f9                  1.04553    2.28980   0.457   0.6500  
## sexo.f1:ano.f7          0.35660    2.77279   0.129   0.8982  
## sexo.f1:ano.f9          2.83656    2.76465   1.026   0.3100  
## F3.EmSt:sexo.f1         0.05292    0.47175   0.112   0.9111  
## F3.EmSt:ano.f7         -0.74929    0.55436  -1.352   0.1828  
## F3.EmSt:ano.f9         -0.51139    0.70600  -0.724   0.4724  
## F3.EmSt:sexo.f1:ano.f7 -0.03218    0.79406  -0.041   0.9678  
## F3.EmSt:sexo.f1:ano.f9 -0.82360    0.84446  -0.975   0.3343  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8614 on 48 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6032, Adjusted R-squared:  0.471 
## F-statistic: 4.561 on 16 and 48 DF,  p-value: 2.151e-05
    m_notas
    B CI std. Beta CI p
(Intercept)   3.89 0.60 – 7.18     .022
F1.Cons   0.73 0.18 – 1.28 0.53 0.14 – 0.92 .011
F2.Extr   -0.28 -0.78 – 0.21 -0.13 -0.35 – 0.09 .258
F3.EmSt   0.27 -0.48 – 1.02 0.18 -0.31 – 0.67 .480
F4.Agre   0.63 0.10 – 1.16 0.31 0.06 – 0.55 .020
F5.Opns   -0.44 -0.95 – 0.06 -0.24 -0.50 – 0.02 .082
F6.NVLoc   -0.01 -0.53 – 0.50 -0.01 -0.25 – 0.24 .961
sexo.f1   0.28 -3.20 – 3.75 0.12 -1.32 – 1.56 .873
ano.f
ano.f7   2.62 -1.43 – 6.66 1.06 -0.54 – 2.67 .200
ano.f9   1.05 -3.56 – 5.65 0.42 -1.37 – 2.20 .650
sexo.f1:ano.f7   0.36 -5.22 – 5.93 0.10 -1.42 – 1.62 .898
sexo.f1:ano.f9   2.84 -2.72 – 8.40 0.94 -0.85 – 2.73 .310
F3.EmSt:sexo.f1   0.05 -0.90 – 1.00 0.08 -1.28 – 1.44 .911
F3.EmSt:ano.f7   -0.75 -1.86 – 0.37 -1.07 -2.62 – 0.48 .183
F3.EmSt:ano.f9   -0.51 -1.93 – 0.91 -0.62 -2.31 – 1.06 .472
F3.EmSt:sexo.f1:ano.f7   -0.03 -1.63 – 1.56 -0.03 -1.45 – 1.39 .968
F3.EmSt:sexo.f1:ano.f9   -0.82 -2.52 – 0.87 -0.82 -2.47 – 0.83 .334
Observations   65
R2 / adj. R2   .603 / .471