Análise de cluster: análise de classificação por similaridade.
Exercício de análise de cluster: abrindo banco de dados
        setwd("~/Dropbox/R Stat/")
        load("senna2.RData")
       
        names(bd)
##   [1] "ID"                            "SEXO"                         
##   [3] "ano_esc"                       "IDADE"                        
##   [5] "MÊS"                           "COR"                          
##   [7] "MÃE"                           "AVÔ/Ó"                        
##   [9] "PAI"                           "TIOS"                         
##  [11] "IRMÃOS"                        "FILHOS"                       
##  [13] "MEIO IRMAO"                    "OUTROS PARENTES"              
##  [15] "MADRASTA"                      "OUTROS N PARENTES"            
##  [17] "PADRASTO"                      "SOZINHO"                      
##  [19] "QTD PESSOAS NA CASA"           "N IRMÃO MAIS NOVO"            
##  [21] "N IRMAO MAIS VELHO"            "N IRMAO MAIS VELHO MORA JUNTO"
##  [23] "E. Mae"                        "CALÇAMENTO"                   
##  [25] "ENERGIA"                       "ÁGUA"                         
##  [27] "COLETA LIXO"                   "BOLSA FAMILIA"                
##  [29] "EMPREGADA"                     "CARRO"                        
##  [31] "GELADEIRA"                     "LAVA ROUPA"                   
##  [33] "PC"                            "MICROONDAS"                   
##  [35] "TV"                            "QT TEMPO ATE ESCOLA"          
##  [37] "CRECHE"                        "PRÉ-ESCOLA"                   
##  [39] "FUNDAMENT."                    "MÉDIO"                        
##  [41] "REPROVADO"                     "LISTA EXER."                  
##  [43] "MATERIAL APOIO"                "FALTAS "                      
##  [45] "PAI.1"                         "MÃE.1"                        
##  [47] "IRMÃO"                         "SOZINHO.1"                    
##  [49] "OUTRO LUGAR"                   "NÃO ESTUDO"                   
##  [51] "NIVEL ESCOLARIDADE"            "A_0"                          
##  [53] "C_0"                           "E_0"                          
##  [55] "N_0"                           "O_0"                          
##  [57] "A_1"                           "C_1"                          
##  [59] "E_1"                           "N_1"                          
##  [61] "O_1"                           "A_Cmp_0"                      
##  [63] "A_Mod_0"                       "A_Resp_0"                     
##  [65] "A_Tru_0"                       "C_Achv_0"                     
##  [67] "C_Conc_0"                      "C_Ord_0"                      
##  [69] "C_SD_0"                        "C_SofR_0"                     
##  [71] "E_Act_0"                       "E_Assr_0"                     
##  [73] "E_Soc_0"                       "N_LAngrVol_0"                 
##  [75] "N_LAnx_0"                      "N_LDep_0"                     
##  [77] "O_Aes_0"                       "O_CrImg_0"                    
##  [79] "O_IntCur_0"                    "OvCl_1"                       
##  [81] "A_Cmp_1"                       "A_Mod_1"                      
##  [83] "A_Resp_1"                      "A_Tru_1"                      
##  [85] "C_Achv_1"                      "C_Conc_1"                     
##  [87] "C_Ord_1"                       "C_SD_1"                       
##  [89] "C_SofR_1"                      "E_Act_1"                      
##  [91] "E_Assr_1"                      "E_Soc_1"                      
##  [93] "N_LAngrVol_1"                  "N_LAnx_1"                     
##  [95] "N_LDep_1"                      "O_Aes_1"                      
##  [97] "O_CrImg_1"                     "O_IntCur_1"                   
##  [99] "id"                            "p1_i001"                      
## [101] "p1_i002"                       "p1_i003"                      
## [103] "p1_i004"                       "p1_i005"                      
## [105] "p1_i006"                       "p1_i007"                      
## [107] "p1_i008"                       "ano_esc2"
Rodando a análise de cluster hierárquica
        d   <- dist(bd[ , 52:61], method="euclidean") 
        cluster    <- hclust(d, method="ward.D2")
        plot(cluster, hang = -1)
        abline(h=4.3,lty=3,col="red")
        rect.hclust(cluster, k=5, border="red")

        grp4 <- cutree(cluster, k = 4) 
        
        grp4.3 <- cutree(cluster, h = 4.3) 
        
        table(grp4.3)
## grp4.3
##  1  2  3  4  5  6  7  8 
## 18  9  6 12  7  4  7  4
        table(grp4)
## grp4
##  1  2  3  4 
## 22 28 13  4
Quantos clusters?
plot(cluster$height)

subs <- round(cluster$height - c(0, cluster$height[-length(cluster$height)]), 3)
Descrevendo os clusters
library(psych)
describeBy(bd[ , 52:61], group = grp4)
## group: 1
##     vars  n mean   sd median trimmed  mad  min  max range  skew kurtosis
## A_0    1 22 4.09 0.41   4.21    4.10 0.34 3.33 4.91  1.58 -0.25    -0.80
## C_0    2 22 4.04 0.50   4.03    4.08 0.41 2.67 4.90  2.23 -0.70     0.67
## E_0    3 22 3.83 0.57   4.03    3.86 0.45 2.72 4.61  1.89 -0.60    -1.03
## N_0    4 22 3.24 0.51   3.31    3.27 0.55 2.29 3.95  1.67 -0.41    -1.09
## O_0    5 22 4.27 0.35   4.31    4.29 0.29 3.50 4.73  1.23 -0.66    -0.41
## A_1    6 22 3.84 0.47   3.92    3.86 0.56 3.00 4.50  1.50 -0.21    -1.38
## C_1    7 22 4.14 0.42   4.20    4.14 0.44 3.33 4.93  1.60 -0.01    -0.95
## E_1    8 22 3.94 0.59   4.11    3.99 0.41 2.78 4.78  2.00 -0.85    -0.64
## N_1    9 22 3.51 0.48   3.47    3.52 0.54 2.56 4.33  1.78  0.03    -0.93
## O_1   10 22 3.91 0.69   3.72    3.90 0.66 2.89 5.00  2.11  0.28    -1.43
##       se
## A_0 0.09
## C_0 0.11
## E_0 0.12
## N_0 0.11
## O_0 0.07
## A_1 0.10
## C_1 0.09
## E_1 0.13
## N_1 0.10
## O_1 0.15
## -------------------------------------------------------- 
## group: 2
##     vars  n mean   sd median trimmed  mad  min  max range  skew kurtosis
## A_0    1 28 3.65 0.49   3.71    3.69 0.37 2.12 4.46  2.33 -1.00     1.31
## C_0    2 28 3.39 0.52   3.41    3.39 0.44 2.00 4.53  2.53 -0.13     0.62
## E_0    3 28 3.56 0.58   3.39    3.57 0.45 2.33 4.67  2.33  0.09    -0.49
## N_0    4 28 3.19 0.45   3.20    3.21 0.50 2.05 3.90  1.86 -0.46    -0.07
## O_0    5 28 3.71 0.57   3.86    3.74 0.62 2.28 4.50  2.22 -0.56    -0.62
## A_1    6 28 3.35 0.50   3.42    3.37 0.43 1.92 4.33  2.42 -0.56     0.85
## C_1    7 28 3.24 0.47   3.27    3.25 0.40 1.73 4.27  2.53 -0.70     1.95
## E_1    8 28 3.30 0.45   3.33    3.32 0.33 2.11 4.44  2.33 -0.34     1.00
## N_1    9 28 3.20 0.63   3.22    3.16 0.49 2.22 4.89  2.67  0.58     0.04
## O_1   10 28 3.26 0.62   3.28    3.26 0.74 2.22 4.44  2.22  0.04    -1.15
##       se
## A_0 0.09
## C_0 0.10
## E_0 0.11
## N_0 0.08
## O_0 0.11
## A_1 0.10
## C_1 0.09
## E_1 0.09
## N_1 0.12
## O_1 0.12
## -------------------------------------------------------- 
## group: 3
##     vars  n mean   sd median trimmed  mad  min  max range  skew kurtosis
## A_0    1 13 3.39 0.57   3.25    3.35 0.56 2.67 4.67  2.00  0.75    -0.53
## C_0    2 13 2.86 0.57   2.67    2.80 0.40 2.13 4.30  2.17  1.03     0.43
## E_0    3 13 3.71 0.48   3.83    3.74 0.49 2.89 4.28  1.39 -0.45    -1.37
## N_0    4 13 2.30 0.53   2.24    2.27 0.56 1.57 3.29  1.71  0.38    -1.28
## O_0    5 13 3.50 0.45   3.56    3.55 0.41 2.38 4.06  1.68 -0.97     0.35
## A_1    6 13 3.12 0.52   3.00    3.04 0.25 2.50 4.58  2.08  1.49     2.26
## C_1    7 13 2.65 0.54   2.67    2.60 0.40 1.87 4.00  2.13  0.83     0.55
## E_1    8 13 3.14 0.61   3.22    3.16 0.33 1.89 4.11  2.22 -0.43    -0.58
## N_1    9 13 1.88 0.36   1.78    1.87 0.49 1.44 2.44  1.00  0.15    -1.66
## O_1   10 13 2.74 0.58   2.67    2.75 0.49 1.67 3.78  2.11  0.04    -0.96
##       se
## A_0 0.16
## C_0 0.16
## E_0 0.13
## N_0 0.15
## O_0 0.13
## A_1 0.14
## C_1 0.15
## E_1 0.17
## N_1 0.10
## O_1 0.16
## -------------------------------------------------------- 
## group: 4
##     vars n mean   sd median trimmed  mad  min  max range  skew kurtosis
## A_0    1 4 3.97 0.33   3.88    3.97 0.22 3.71 4.42  0.71  0.45    -1.95
## C_0    2 4 4.18 0.48   4.41    4.18 0.06 3.47 4.46  0.99 -0.74    -1.69
## E_0    3 4 4.46 0.22   4.39    4.46 0.12 4.28 4.78  0.50  0.55    -1.84
## N_0    4 4 3.85 0.64   3.69    3.85 0.37 3.27 4.76  1.50  0.51    -1.81
## O_0    5 4 3.23 0.26   3.12    3.23 0.08 3.06 3.61  0.56  0.68    -1.74
## A_1    6 4 4.02 0.55   4.00    4.02 0.62 3.42 4.67  1.25  0.06    -2.14
## C_1    7 4 4.22 0.75   4.35    4.22 0.54 3.20 5.00  1.80 -0.35    -1.85
## E_1    8 4 4.61 0.29   4.56    4.61 0.25 4.33 5.00  0.67  0.32    -2.01
## N_1    9 4 4.28 0.63   4.28    4.28 0.74 3.56 5.00  1.44  0.00    -2.13
## O_1   10 4 2.94 0.67   2.94    2.94 0.82 2.22 3.67  1.44  0.00    -2.27
##       se
## A_0 0.16
## C_0 0.24
## E_0 0.11
## N_0 0.32
## O_0 0.13
## A_1 0.28
## C_1 0.38
## E_1 0.15
## N_1 0.32
## O_1 0.33
plot(bd[,52:56], col=grp4, pch=19, cex=1)

source("http://www.labape.com.br/rprimi/R/cluster_fig.R")  
cluster_fig(bd = bd[ , 52:56], grp = grp4)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha