Libraries
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.3.1
## ✔ tibble 2.0.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'tidyr' was built under R version 3.5.2
## Warning: package 'purrr' was built under R version 3.5.2
## Warning: package 'stringr' was built under R version 3.5.2
## Warning: package 'forcats' was built under R version 3.5.2
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
library(psych)
## Warning: package 'psych' was built under R version 3.5.2
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(RColorBrewer)
library(corrplot)
## corrplot 0.84 loaded
library(sjmisc)
## Warning: package 'sjmisc' was built under R version 3.5.2
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
##
## Attaching package: 'sjmisc'
## The following object is masked from 'package:purrr':
##
## is_empty
## The following object is masked from 'package:tidyr':
##
## replace_na
## The following object is masked from 'package:tibble':
##
## add_case
library(sjPlot)
library(sjlabelled)
## Warning: package 'sjlabelled' was built under R version 3.5.2
##
## Attaching package: 'sjlabelled'
## The following object is masked from 'package:forcats':
##
## as_factor
Data
df <- readRDS("enem_enade_bio.rds")
graph_corplot <- function(df, vars1, vars2=NULL) {
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
if (is.null(vars2)) {
p.mat <- corr.test(x =df[, vars1], adjust = "none") %>% .$p
corr.test(x =df[, vars1], adjust = "none") %>% .$r %>%
corrplot(method = "color", col = col(50), number.cex = .7,
addCoef.col = "black", # Add coefficient of correlation
tl.col = "black", tl.srt = 90, # Text label color and rotation
# Combine with significance
p.mat = p.mat, sig.level = 0.05, insig = "blank",
# hide correlation coefficient on the principal diagonal
diag = TRUE)
} else {
p.mat <- corr.test(x =df[, vars1], y = df[ , vars2], adjust = "none") %>% .$p
corr.test(x =df[, vars1], y = df[ , vars2], adjust = "none") %>% .$r %>%
corrplot(method = "color", col = col(50), number.cex = .7,
addCoef.col = "black", # Add coefficient of correlation
tl.col = "black", tl.srt = 90, # Text label color and rotation
# Combine with significance
p.mat = p.mat, sig.level = 0.05, insig = "blank",
# hide correlation coefficient on the principal diagonal
diag = TRUE)
}
}
names(df)
## [1] "NU_ANO" "CO_GRUPO" "CD_ORGAC"
## [4] "CO_REGIAO" "SG_UF" "SQ_GRAD"
## [7] "IN_GRAD" "NU_IDADE" "TP_SEXO"
## [10] "ANO_FIM_2G" "ANO_IN_GRA" "TP_PR_GER"
## [13] "NT_OBJ_FG" "NT_DIS_FG" "NT_FG"
## [16] "NT_OBJ_CE" "NT_DIS_CE" "NT_GER"
## [19] "Tem_bolsa" "Privada" "escol_pais"
## [22] "Sus_trab" "branco" "negrind"
## [25] "mulato" "uso_comp" "bibl_acervo"
## [28] "cond_fis" "bibl_serv" "form_social_BR1"
## [31] "form_social_BR2" "plan_peda" "exigencia"
## [34] "co_rs_81c" "IC_1" "Monit_1"
## [37] "Extensao_1" "IC_incent" "co_rs_92c"
## [40] "compet_met" "compet_soc" "competencias"
## [43] "IDD" "ENEM98_TTO" "ENEM98_TTR"
## [46] "ENEM99_TTO" "ENEM99_TTR" "ENEM00_TTO"
## [49] "ENEM00_TTR" "ENEM01_TTO" "ENEM01_TTR"
## [52] "ENEM02_TTO" "ENEM02_TTR" "ENEM03_TTO"
## [55] "ENEM03_TTR" "ENEM04_TTO" "ENEM04_TTR"
## [58] "ENEM05_TTO" "ENEM05_TTR"
df$TP_SEXO <- add_labels(df$TP_SEXO, labels = attr(df$TP_SEXO, "value.labels"))
df %>% select(TP_SEXO) %>% frq
##
## # TP_SEXO <numeric>
## # total N=10933 valid N=10933 mean=1.72 sd=0.45
##
## val label frq raw.prc valid.prc cum.prc
## 1 Masculino 3114 28.48 28.48 28.48
## 2 Feminino 7819 71.52 71.52 100.00
## NA NA 0 0.00 NA NA
vars1 <- names(df)[c(13, 14, 16, 17, 18, 50, 51, 20, 21, 22, 9 )]
graph_corplot(df=df, vars1=vars1)

colors <- brewer.pal(7, "Paired")
df %>%
plot_ly(x=~escol_pais, y=~ENEM01_TTO, z=~NT_GER ,
type="scatter3d",
mode="markers", color=~escol_pais, colors = colors,
size = 3.5,
opacity = 1/1.5)
## Warning: Ignoring 9359 observations
df %>%
plot_ly(x=~escol_pais, y=~ENEM01_TTO, z=~NT_GER ,
type="scatter3d",
mode="markers", color=~escol_pais, colors = colors,
size = 3.5, symbol = ~TP_SEXO, symbols = c('circle','x'),
opacity = 1/1.5)
## Warning: Ignoring 9359 observations